diff mbox series

insane.bbclass: introduce SIGILL finder

Message ID 20230831091618.3715995-1-bbara93@gmail.com
State New
Headers show
Series insane.bbclass: introduce SIGILL finder | expand

Commit Message

Benjamin Bara Aug. 31, 2023, 9:16 a.m. UTC
From: Benjamin Bara <benjamin.bara@skidata.com>

This commit should look for unsupported instructions depending on the
active tune features. For now, it checks for vfpv3d16 and other non-neon
machines, but it can be easily extended for other architectures/checks.

Reason for this check is that a couple of packages assume neon support
for armv7, but it is actually optional.

Signed-off-by: Benjamin Bara <benjamin.bara@skidata.com>
---
Hi,

as I lately played a little bit around with a vfpv3d16 machine and some
multimedia packages, I stumbled across a couple of illegal instructions
during runtime. Therefore I decided to hack a QA job which should find
these during package time. Not sure if this is the correct location to
do such a check and if this is something needed at all...

Regards,
Benjamin
---
 meta/classes-global/insane.bbclass | 78 +++++++++++++++++++++++++++++-
 meta/lib/oe/qa.py                  | 34 +++++++++++++
 2 files changed, 111 insertions(+), 1 deletion(-)

Comments

Alexander Kanavin Aug. 31, 2023, 9:24 a.m. UTC | #1
objdump -d is a heavy operation, and the problem is both arm-specific
and rare. Also SIGILL is a very clear indicator of what the problem
is, even if it's annoying to see it at runtime.

I don't think this should happen across all builds and components.

Alex

On Thu, 31 Aug 2023 at 11:16, Benjamin Bara <bbara93@gmail.com> wrote:
>
> From: Benjamin Bara <benjamin.bara@skidata.com>
>
> This commit should look for unsupported instructions depending on the
> active tune features. For now, it checks for vfpv3d16 and other non-neon
> machines, but it can be easily extended for other architectures/checks.
>
> Reason for this check is that a couple of packages assume neon support
> for armv7, but it is actually optional.
>
> Signed-off-by: Benjamin Bara <benjamin.bara@skidata.com>
> ---
> Hi,
>
> as I lately played a little bit around with a vfpv3d16 machine and some
> multimedia packages, I stumbled across a couple of illegal instructions
> during runtime. Therefore I decided to hack a QA job which should find
> these during package time. Not sure if this is the correct location to
> do such a check and if this is something needed at all...
>
> Regards,
> Benjamin
> ---
>  meta/classes-global/insane.bbclass | 78 +++++++++++++++++++++++++++++-
>  meta/lib/oe/qa.py                  | 34 +++++++++++++
>  2 files changed, 111 insertions(+), 1 deletion(-)
>
> diff --git a/meta/classes-global/insane.bbclass b/meta/classes-global/insane.bbclass
> index 2e53778934..5b9112d05c 100644
> --- a/meta/classes-global/insane.bbclass
> +++ b/meta/classes-global/insane.bbclass
> @@ -44,7 +44,7 @@ ERROR_QA ?= "dev-so debug-deps dev-deps debug-files arch pkgconfig la \
>              configure-gettext perllocalpod shebang-size \
>              already-stripped installed-vs-shipped ldflags compile-host-path \
>              install-host-path pn-overrides unknown-configure-option \
> -            useless-rpaths rpaths staticdev empty-dirs \
> +            useless-rpaths rpaths staticdev empty-dirs sigill \
>              patch-fuzz \
>              "
>  # Add usrmerge QA check based on distro feature
> @@ -635,6 +635,82 @@ def check_32bit_symbols(path, packagename, d, elf, messages):
>                      'Suppress with INSANE_SKIP = "32bit-time"'
>                  )
>
> +QAPATHTEST[sigill] = "package_qa_check_sigill"
> +def package_qa_check_sigill(path, name, d, elf, messages):
> +    """
> +    Check that the package doesn't contain unsupported instructions.
> +    """
> +    import re
> +
> +    if not elf:
> +        return
> +
> +    if os.path.islink(path):
> +        return
> +
> +    def test_skeleton(grep, test):
> +        dump = elf.run_filtered_objdump_unstripped("-d", grep, d, name)
> +        if dump == 'stripped':
> +            # stripped binaries might give false positives
> +            return
> +
> +        # get all instructions and registers from the disassembled binary
> +        instr_list = []
> +        regs_list = []
> +        for line in dump.split("\\n"):
> +            splitted = dump.split("\\t")
> +            if len(splitted) < 3:
> +                continue
> +            # 0 is just empty, as line starts with \t
> +            instr_list.append(splitted[1])
> +            regs_list.append(splitted[2])
> +
> +        # loop through instr+regs list and apply the given test function
> +        uniques = set()
> +        for index, regs in enumerate(regs_list):
> +            instr = instr_list[index]
> +            affected = test(instr, regs)
> +            if affected:
> +                uniques.add(f"{instr} {regs}")
> +
> +        for instr_regs in uniques:
> +            oe.qa.add_message(messages, "sigill", "%s contains %s" % (path, instr_regs))
> +
> +    features = d.getVar('TUNE_FEATURES')
> +    if "vfpv3d16" in features:
> +        # grep for d16-d31, d0-d15 are valid for f64 instructions
> +        vfpv3d16_grep = "f64\s+(d1|d2|d3)"
> +
> +        def vfpv3d16_test(instr, regs):
> +            for reg in re.findall(r'd(\d+)', regs):
> +                return int(reg) >= 16
> +
> +        test_skeleton(vfpv3d16_grep, vfpv3d16_test)
> +
> +    if "armv7a" in features and "neon" not in features:
> +        # https://developer.arm.com/documentation/den0018/a/NEON-and-VFP-Instruction-Summary/List-of-all-NEON-and-VFP-instructions
> +        neon_instrs = ["vq?r?shl", "vq?abs", "vq?add", "vq?movn", "vq?sub",
> +                       "vr?addhn", "vr?hadd", "vr?shrn?", "vr?sra", "vr?subhn",
> +                       "vabal?", "vabdl?", "vacge", "vacgt", "vacle", "vaclt",
> +                       "vaddl", "vaddw", "vand", "vbic", "vbif", "vbit", "vceq",
> +                       "vcge", "vcgt", "vcle", "vcls", "vclt", "vclz", "vcnt",
> +                       "vdup", "veor", "vext", "vhsub", "vmax", "vmin", "vmlal",
> +                       "vmlsl", "vmov2", "vmovl", "vmull", "vmvn", "vqneg",
> +                       "vorn", "vorr", "vpadal", "vpaddl?", "vpmax", "vpmin",
> +                       "vqr?dmulh", "vqr?shru?n", "vqdmlal", "vqdmlsl",
> +                       "vqdmull", "vqmovun", "vqshl", "vqshlu", "vcrecpe",
> +                       "vcrecps", "vrev", "vrsqrte", "vrsqrts", "vshl", "vshll",
> +                       "vsli", "vsri", "vsubl", "vsubw", "vswp", "vtbl", "vtbx",
> +                       "vtrn", "vtst", "vuzp", "vzip"]
> +        # most of them are only NEON when the used data type isn't floating-point
> +        neon_grep = "\s(" + "|".join(neon_instrs) + ")\.(s|u|p)"
> +
> +        def neon_test(instr, regs):
> +            # if something is found by the grep, the package is affected
> +            return True
> +
> +        test_skeleton(neon_grep, neon_test)
> +
>  # Check license variables
>  do_populate_lic[postfuncs] += "populate_lic_qa_checksum"
>  python populate_lic_qa_checksum() {
> diff --git a/meta/lib/oe/qa.py b/meta/lib/oe/qa.py
> index de980638c4..075622c98f 100644
> --- a/meta/lib/oe/qa.py
> +++ b/meta/lib/oe/qa.py
> @@ -157,6 +157,40 @@ class ELFFile:
>              bb.note("%s %s %s failed: %s" % (objdump, cmd, self.name, e))
>              return ""
>
> +    def run_filtered_objdump_unstripped(self, cmd, filter, d, pn):
> +        import bb.process
> +        import sys
> +
> +        objdump = d.getVar('OBJDUMP')
> +
> +        # we require the unstripped binary here, as objdump might have problems
> +        # detecting the correct instruction format, which leads to false positives
> +        name = self.name.replace(d.getVar('PKGDEST') + '/' + pn, d.getVar('D'))
> +
> +        env = os.environ.copy()
> +        env["LC_ALL"] = "C"
> +        env["PATH"] = d.getVar('PATH')
> +
> +        # ensure that binary is unstripped
> +        try:
> +            bb.note("file %s" % (name))
> +            output = bb.process.run(["file", name], env=env, shell=False)[0]
> +            if ", stripped" in output:
> +                return "stripped"
> +        except Exception as e:
> +            bb.note("file %s failed: %s" % (name, e))
> +            return ""
> +
> +        try:
> +            bb.note("%s %s %s | grep -E %s" % (objdump, cmd, name, filter))
> +            objdump_proc = bb.process.Popen([objdump, cmd, "--no-addresses", "--no-show-raw-insn", name], env=env, shell=False)
> +            grep_proc = bb.process.Popen(["grep", "-E", filter], env=env, shell=False, stdin=objdump_proc.stdout)
> +            objdump_proc.stdout.close()
> +            return str(grep_proc.communicate()[0])
> +        except Exception as e:
> +            bb.note("%s %s %s failed: %s" % (objdump, cmd, name, e))
> +            return ""
> +
>  def elf_machine_to_string(machine):
>      """
>      Return the name of a given ELF e_machine field or the hex value as a string
> --
> 2.34.1
>
>
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#186965): https://lists.openembedded.org/g/openembedded-core/message/186965
> Mute This Topic: https://lists.openembedded.org/mt/101070322/1686489
> Group Owner: openembedded-core+owner@lists.openembedded.org
> Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub [alex.kanavin@gmail.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>
Benjamin Bara Aug. 31, 2023, 10:22 a.m. UTC | #2
Hi Alex,

thanks for the feedback!

On Thu, 31 Aug 2023 at 11:24, Alexander Kanavin <alex.kanavin@gmail.com> wrote:
> objdump -d is a heavy operation, and the problem is both arm-specific
> and rare. Also SIGILL is a very clear indicator of what the problem
> is, even if it's annoying to see it at runtime.

As the grep is part of the call, at least the processing is quite fast. It
could also be extended to other architectures which might not support the
latest and greatest SIMD extensions.

> I don't think this should happen across all builds and components.

It will just be executed if the "filters" apply to your machine, currently:
- "vfpv3d16" in features
- "armv7a" in features and "neon" not in features

But I understand that this probably doesn't make sense for a "everyday build".

Regards,
Benjamin
Ross Burton Sept. 1, 2023, 10:19 a.m. UTC | #3
On 31 Aug 2023, at 10:16, Benjamin Bara via lists.openembedded.org <bbara93=gmail.com@lists.openembedded.org> wrote:
> 
> From: Benjamin Bara <benjamin.bara@skidata.com>
> 
> This commit should look for unsupported instructions depending on the
> active tune features. For now, it checks for vfpv3d16 and other non-neon
> machines, but it can be easily extended for other architectures/checks.
> 
> Reason for this check is that a couple of packages assume neon support
> for armv7, but it is actually optional.

Presumably this will trigger on recipes which generate the code for all the extended instructions (neon, sve, etc) and pick and runtime what functions to run?

Ross
Benjamin Bara Sept. 4, 2023, 7:39 a.m. UTC | #4
Hi Ross,

On Fri, 1 Sept 2023 at 12:19, Ross Burton <Ross.Burton@arm.com> wrote:
> On 31 Aug 2023, at 10:16, Benjamin Bara via lists.openembedded.org <bbara93=gmail.com@lists.openembedded.org> wrote:
> >
> > From: Benjamin Bara <benjamin.bara@skidata.com>
> >
> > This commit should look for unsupported instructions depending on the
> > active tune features. For now, it checks for vfpv3d16 and other non-neon
> > machines, but it can be easily extended for other architectures/checks.
> >
> > Reason for this check is that a couple of packages assume neon support
> > for armv7, but it is actually optional.
>
> Presumably this will trigger on recipes which generate the code for all the extended instructions (neon, sve, etc) and pick and runtime what functions to run?

Yep, that's true. So far, I found openssl[1] and pulseaudio[2], but I am still
testing. I would insane_skip these in a V2 if it is considered useful.

Regards,
Benjamin

[1] https://github.com/openssl/openssl/blob/openssl-3.1.2/crypto/armcap.c#L71
[2] https://gitlab.freedesktop.org/pulseaudio/pulseaudio/-/blob/v16.1/src/pulsecore/cpu-arm.c?ref_type=tags#L115
diff mbox series

Patch

diff --git a/meta/classes-global/insane.bbclass b/meta/classes-global/insane.bbclass
index 2e53778934..5b9112d05c 100644
--- a/meta/classes-global/insane.bbclass
+++ b/meta/classes-global/insane.bbclass
@@ -44,7 +44,7 @@  ERROR_QA ?= "dev-so debug-deps dev-deps debug-files arch pkgconfig la \
             configure-gettext perllocalpod shebang-size \
             already-stripped installed-vs-shipped ldflags compile-host-path \
             install-host-path pn-overrides unknown-configure-option \
-            useless-rpaths rpaths staticdev empty-dirs \
+            useless-rpaths rpaths staticdev empty-dirs sigill \
             patch-fuzz \
             "
 # Add usrmerge QA check based on distro feature
@@ -635,6 +635,82 @@  def check_32bit_symbols(path, packagename, d, elf, messages):
                     'Suppress with INSANE_SKIP = "32bit-time"'
                 )
 
+QAPATHTEST[sigill] = "package_qa_check_sigill"
+def package_qa_check_sigill(path, name, d, elf, messages):
+    """
+    Check that the package doesn't contain unsupported instructions.
+    """
+    import re
+
+    if not elf:
+        return
+
+    if os.path.islink(path):
+        return
+
+    def test_skeleton(grep, test):
+        dump = elf.run_filtered_objdump_unstripped("-d", grep, d, name)
+        if dump == 'stripped':
+            # stripped binaries might give false positives
+            return
+
+        # get all instructions and registers from the disassembled binary
+        instr_list = []
+        regs_list = []
+        for line in dump.split("\\n"):
+            splitted = dump.split("\\t")
+            if len(splitted) < 3:
+                continue
+            # 0 is just empty, as line starts with \t
+            instr_list.append(splitted[1])
+            regs_list.append(splitted[2])
+
+        # loop through instr+regs list and apply the given test function
+        uniques = set()
+        for index, regs in enumerate(regs_list):
+            instr = instr_list[index]
+            affected = test(instr, regs)
+            if affected:
+                uniques.add(f"{instr} {regs}")
+
+        for instr_regs in uniques:
+            oe.qa.add_message(messages, "sigill", "%s contains %s" % (path, instr_regs))
+
+    features = d.getVar('TUNE_FEATURES')
+    if "vfpv3d16" in features:
+        # grep for d16-d31, d0-d15 are valid for f64 instructions
+        vfpv3d16_grep = "f64\s+(d1|d2|d3)"
+
+        def vfpv3d16_test(instr, regs):
+            for reg in re.findall(r'd(\d+)', regs):
+                return int(reg) >= 16
+
+        test_skeleton(vfpv3d16_grep, vfpv3d16_test)
+
+    if "armv7a" in features and "neon" not in features:
+        # https://developer.arm.com/documentation/den0018/a/NEON-and-VFP-Instruction-Summary/List-of-all-NEON-and-VFP-instructions
+        neon_instrs = ["vq?r?shl", "vq?abs", "vq?add", "vq?movn", "vq?sub",
+                       "vr?addhn", "vr?hadd", "vr?shrn?", "vr?sra", "vr?subhn",
+                       "vabal?", "vabdl?", "vacge", "vacgt", "vacle", "vaclt",
+                       "vaddl", "vaddw", "vand", "vbic", "vbif", "vbit", "vceq",
+                       "vcge", "vcgt", "vcle", "vcls", "vclt", "vclz", "vcnt",
+                       "vdup", "veor", "vext", "vhsub", "vmax", "vmin", "vmlal",
+                       "vmlsl", "vmov2", "vmovl", "vmull", "vmvn", "vqneg",
+                       "vorn", "vorr", "vpadal", "vpaddl?", "vpmax", "vpmin",
+                       "vqr?dmulh", "vqr?shru?n", "vqdmlal", "vqdmlsl",
+                       "vqdmull", "vqmovun", "vqshl", "vqshlu", "vcrecpe",
+                       "vcrecps", "vrev", "vrsqrte", "vrsqrts", "vshl", "vshll",
+                       "vsli", "vsri", "vsubl", "vsubw", "vswp", "vtbl", "vtbx",
+                       "vtrn", "vtst", "vuzp", "vzip"]
+        # most of them are only NEON when the used data type isn't floating-point
+        neon_grep = "\s(" + "|".join(neon_instrs) + ")\.(s|u|p)"
+
+        def neon_test(instr, regs):
+            # if something is found by the grep, the package is affected
+            return True
+
+        test_skeleton(neon_grep, neon_test)
+
 # Check license variables
 do_populate_lic[postfuncs] += "populate_lic_qa_checksum"
 python populate_lic_qa_checksum() {
diff --git a/meta/lib/oe/qa.py b/meta/lib/oe/qa.py
index de980638c4..075622c98f 100644
--- a/meta/lib/oe/qa.py
+++ b/meta/lib/oe/qa.py
@@ -157,6 +157,40 @@  class ELFFile:
             bb.note("%s %s %s failed: %s" % (objdump, cmd, self.name, e))
             return ""
 
+    def run_filtered_objdump_unstripped(self, cmd, filter, d, pn):
+        import bb.process
+        import sys
+
+        objdump = d.getVar('OBJDUMP')
+
+        # we require the unstripped binary here, as objdump might have problems
+        # detecting the correct instruction format, which leads to false positives
+        name = self.name.replace(d.getVar('PKGDEST') + '/' + pn, d.getVar('D'))
+
+        env = os.environ.copy()
+        env["LC_ALL"] = "C"
+        env["PATH"] = d.getVar('PATH')
+
+        # ensure that binary is unstripped
+        try:
+            bb.note("file %s" % (name))
+            output = bb.process.run(["file", name], env=env, shell=False)[0]
+            if ", stripped" in output:
+                return "stripped"
+        except Exception as e:
+            bb.note("file %s failed: %s" % (name, e))
+            return ""
+
+        try:
+            bb.note("%s %s %s | grep -E %s" % (objdump, cmd, name, filter))
+            objdump_proc = bb.process.Popen([objdump, cmd, "--no-addresses", "--no-show-raw-insn", name], env=env, shell=False)
+            grep_proc = bb.process.Popen(["grep", "-E", filter], env=env, shell=False, stdin=objdump_proc.stdout)
+            objdump_proc.stdout.close()
+            return str(grep_proc.communicate()[0])
+        except Exception as e:
+            bb.note("%s %s %s failed: %s" % (objdump, cmd, name, e))
+            return ""
+
 def elf_machine_to_string(machine):
     """
     Return the name of a given ELF e_machine field or the hex value as a string