From patchwork Tue Nov 21 11:33:46 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ross Burton X-Patchwork-Id: 34933 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from aws-us-west-2-korg-lkml-1.web.codeaurora.org (localhost.localdomain [127.0.0.1]) by smtp.lore.kernel.org (Postfix) with ESMTP id 46022C61D92 for ; Tue, 21 Nov 2023 11:33:59 +0000 (UTC) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by mx.groups.io with SMTP id smtpd.web11.37781.1700566437762411594 for ; Tue, 21 Nov 2023 03:33:58 -0800 Authentication-Results: mx.groups.io; dkim=none (message not signed); spf=pass (domain: arm.com, ip: 217.140.110.172, mailfrom: ross.burton@arm.com) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id ECE05FEC; Tue, 21 Nov 2023 03:34:43 -0800 (PST) Received: from oss-tx204.lab.cambridge.arm.com (usa-sjc-imap-foss1.foss.arm.com [10.121.207.14]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id CDAC03F7A6; Tue, 21 Nov 2023 03:33:56 -0800 (PST) From: ross.burton@arm.com To: openembedded-core@lists.openembedded.org Cc: Steve Sakoman Subject: [PATCH] oeqa/selftest/debuginfod: improve selftest Date: Tue, 21 Nov 2023 11:33:46 +0000 Message-Id: <20231121113346.2655150-1-ross.burton@arm.com> X-Mailer: git-send-email 2.34.1 MIME-Version: 1.0 List-Id: X-Webhook-Received: from li982-79.members.linode.com [45.33.32.79] by aws-us-west-2-korg-lkml-1.web.codeaurora.org with HTTPS for ; Tue, 21 Nov 2023 11:33:59 -0000 X-Groupsio-URL: https://lists.openembedded.org/g/openembedded-core/message/190967 From: Ross Burton This test was occasionally failing for no obvious reason, so refactor and improve: - While waiting for the daemon, check that it is still running and explicitly timeout after 10s when making the HTTP call. - While waiting for the daemon to be ready, log the current state of the daemon so we can tell if we're timing out as it is still scanning. - This was in fact the cause of the intermittant failures, because the TMPDIR is reused between tests and may contain a large number of packages. Do the tests in an isolated TMPDIR to hopefully mitigate this issue. - Decorate the test using runqemu as such so that can be skipped in environments without runqemu - Add a second test that doesn't use runqemu or images, which is faster but less realistic. Signed-off-by: Ross Burton --- meta/lib/oeqa/selftest/cases/debuginfod.py | 122 +++++++++++++++------ 1 file changed, 87 insertions(+), 35 deletions(-) diff --git a/meta/lib/oeqa/selftest/cases/debuginfod.py b/meta/lib/oeqa/selftest/cases/debuginfod.py index 37f51760fbc..d0a8941aa06 100644 --- a/meta/lib/oeqa/selftest/cases/debuginfod.py +++ b/meta/lib/oeqa/selftest/cases/debuginfod.py @@ -6,7 +6,11 @@ import os import socketserver import subprocess +import time +import urllib +import pathlib +from oeqa.core.decorator import OETestTag from oeqa.selftest.case import OESelftestTestCase from oeqa.utils.commands import bitbake, get_bb_var, runqemu @@ -21,39 +25,54 @@ class Debuginfod(OESelftestTestCase): Request the metrics endpoint periodically and wait for there to be no busy scanning threads. - Returns True if debuginfod is ready, False if we timed out + Returns if debuginfod is ready, raises an exception if not within the + timeout. """ - import time, urllib # Wait a minute - countdown = 6 - delay = 10 + countdown = 12 + delay = 5 + latest = None while countdown: + self.logger.info("waiting...") time.sleep(delay) + + self.logger.info("polling server") + if self.debuginfod.poll(): + self.logger.info("server dead") + self.debuginfod.communicate() + self.fail("debuginfod terminated unexpectedly") + self.logger.info("server alive") + try: - with urllib.request.urlopen("http://localhost:%d/metrics" % port) as f: - lines = f.read().decode("ascii").splitlines() - if "thread_busy{role=\"scan\"} 0" in lines: - return True + with urllib.request.urlopen("http://localhost:%d/metrics" % port, timeout=10) as f: + for line in f.read().decode("ascii").splitlines(): + key, value = line.rsplit(" ", 1) + if key == "thread_busy{role=\"scan\"}": + latest = int(value) + self.logger.info("Waiting for %d scan jobs to finish" % latest) + if latest == 0: + return except urllib.error.URLError as e: + # TODO: how to catch just timeouts? self.logger.error(e) + countdown -= 1 - return False + raise TimeoutError("Cannot connect debuginfod, still %d scan jobs running" % latest) - def test_debuginfod(self): - self.write_config( - """ -DISTRO_FEATURES:append = " debuginfod" -CORE_IMAGE_EXTRA_INSTALL += "elfutils" - """ - ) - bitbake("core-image-minimal elfutils-native:do_addto_recipe_sysroot") + def start_debuginfod(self): + # We assume that the caller has already bitbake'd elfutils-native:do_addto_recipe_sysroot + + # Save some useful paths for later + native_sysroot = pathlib.Path(get_bb_var("RECIPE_SYSROOT_NATIVE", "elfutils-native")) + native_bindir = native_sysroot / "usr" / "bin" + self.debuginfod = native_bindir / "debuginfod" + self.debuginfod_find = native_bindir / "debuginfod-find" - native_sysroot = get_bb_var("RECIPE_SYSROOT_NATIVE", "elfutils-native") cmd = [ - os.path.join(native_sysroot, "usr", "bin", "debuginfod"), + self.debuginfod, "--verbose", # In-memory database, this is a one-shot test "--database=:memory:", @@ -76,31 +95,64 @@ CORE_IMAGE_EXTRA_INSTALL += "elfutils" else: self.fail("Unknown package class %s" % format) - # Find a free port + # Find a free port. Racey but the window is small. with socketserver.TCPServer(("localhost", 0), None) as s: - port = s.server_address[1] - cmd.append("--port=%d" % port) + self.port = s.server_address[1] + cmd.append("--port=%d" % self.port) + + self.logger.info(f"Starting server {cmd}") + self.debuginfod = subprocess.Popen(cmd, env={}) + self.wait_for_debuginfod(self.port) + + + def test_debuginfod_native(self): + """ + Test debuginfod outside of qemu, by building a package and looking up a + binary's debuginfo using elfutils-native. + """ + + self.write_config(""" +TMPDIR = "${TOPDIR}/tmp-debuginfod" +DISTRO_FEATURES:append = " debuginfod" +""") + bitbake("elfutils-native:do_addto_recipe_sysroot xz xz:do_package") try: - # Remove DEBUGINFOD_URLS from the environment so we don't try - # looking in the distro debuginfod + self.start_debuginfod() + env = os.environ.copy() - if "DEBUGINFOD_URLS" in env: - del env["DEBUGINFOD_URLS"] + env["DEBUGINFOD_URLS"] = "http://localhost:%d/" % self.port + + pkgs = pathlib.Path(get_bb_var("PKGDEST", "xz")) + cmd = (self.debuginfod_find, "debuginfo", pkgs / "xz" / "usr" / "bin" / "xz.xz") + self.logger.info(f"Starting client {cmd}") + output = subprocess.check_output(cmd, env=env, text=True) + # This should be more comprehensive + self.assertIn("/.cache/debuginfod_client/", output) + finally: + self.debuginfod.kill() + + @OETestTag("runqemu") + def test_debuginfod_qemu(self): + """ + Test debuginfod-find inside a qemu, talking to a debuginfod on the host. + """ + + self.write_config(""" +TMPDIR = "${TOPDIR}/tmp-debuginfod" +DISTRO_FEATURES:append = " debuginfod" +CORE_IMAGE_EXTRA_INSTALL += "elfutils xz" + """) + bitbake("core-image-minimal elfutils-native:do_addto_recipe_sysroot") - self.logger.info(f"Starting server {cmd}") - debuginfod = subprocess.Popen(cmd, env=env) + try: + self.start_debuginfod() with runqemu("core-image-minimal", runqemuparams="nographic") as qemu: - self.assertTrue(self.wait_for_debuginfod(port)) - - cmd = ( - "DEBUGINFOD_URLS=http://%s:%d/ debuginfod-find debuginfo /usr/bin/debuginfod" - % (qemu.server_ip, port) - ) + cmd = "DEBUGINFOD_URLS=http://%s:%d/ debuginfod-find debuginfo /usr/bin/xz" % (qemu.server_ip, self.port) self.logger.info(f"Starting client {cmd}") status, output = qemu.run_serial(cmd) # This should be more comprehensive self.assertIn("/.cache/debuginfod_client/", output) finally: - debuginfod.kill() + self.debuginfod.kill()