diff mbox series

[v2,1/1] archiver.bbclass: Drop tarfile module to improve performance

Message ID c8789ebbf118dc635ca8ecb0f4a16c0f7b74c732.1703230207.git.liezhi.yang@windriver.com
State Accepted, archived
Commit 6548354f049b173e8d443bc547d35c9d9fc05259
Headers show
Series [v2,1/1] archiver.bbclass: Drop tarfile module to improve performance | expand

Commit Message

Robert Yang Dec. 22, 2023, 7:31 a.m. UTC
From: Robert Yang <liezhi.yang@windriver.com>

* The tarfile module doesn't support xz options or environment varible
  XZ_DEFAULTS, this makes do_ar_patched incrediblely slow when the file is
  large, for example, chromium-x11 is about 3GB:
  - "bitbake chromium-x11 -car_patched" hasn't been done after 3 hours on my
     host, I checked the partial tar.xz file is only 1.5GB, so maybe more than 6
     hours is required to complete the task.

  - Now only less than 4 minutes is needed on the same host.

* Need add xz to HOSTTOOLS when archiver.bbclass is enabled and compression is xz.

Signed-off-by: Robert Yang <liezhi.yang@windriver.com>
---
 meta/classes/archiver.bbclass | 28 ++++++++++++++++------------
 meta/conf/bitbake.conf        |  3 +++
 2 files changed, 19 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/meta/classes/archiver.bbclass b/meta/classes/archiver.bbclass
index 80a69cf31db..2d0bbfbd422 100644
--- a/meta/classes/archiver.bbclass
+++ b/meta/classes/archiver.bbclass
@@ -401,19 +401,11 @@  python do_ar_mirror() {
         subprocess.check_call(cmd, shell=True)
 }
 
-def exclude_useless_paths(tarinfo):
-    if tarinfo.isdir():
-        if tarinfo.name.endswith('/temp') or tarinfo.name.endswith('/patches') or tarinfo.name.endswith('/.pc'):
-            return None
-        elif tarinfo.name == 'temp' or tarinfo.name == 'patches' or tarinfo.name == '.pc':
-            return None
-    return tarinfo
-
 def create_tarball(d, srcdir, suffix, ar_outdir):
     """
     create the tarball from srcdir
     """
-    import tarfile
+    import subprocess
 
     # Make sure we are only creating a single tarball for gcc sources
     if (d.getVar('SRC_URI') == ""):
@@ -425,6 +417,16 @@  def create_tarball(d, srcdir, suffix, ar_outdir):
     srcdir = os.path.realpath(srcdir)
 
     compression_method = d.getVarFlag('ARCHIVER_MODE', 'compression')
+    if compression_method == "xz":
+        compression_cmd = "xz %s" % d.getVar('XZ_DEFAULTS')
+    # To keep compatibility with ARCHIVER_MODE[compression]
+    elif compression_method == "gz":
+        compression_cmd = "gzip"
+    elif compression_method == "bz2":
+        compression_cmd = "bzip2"
+    else:
+        bb.fatal("Unsupported compression_method: %s" % compression_method)
+
     bb.utils.mkdirhier(ar_outdir)
     if suffix:
         filename = '%s-%s.tar.%s' % (d.getVar('PF'), suffix, compression_method)
@@ -433,9 +435,11 @@  def create_tarball(d, srcdir, suffix, ar_outdir):
     tarname = os.path.join(ar_outdir, filename)
 
     bb.note('Creating %s' % tarname)
-    tar = tarfile.open(tarname, 'w:%s' % compression_method)
-    tar.add(srcdir, arcname=os.path.basename(srcdir), filter=exclude_useless_paths)
-    tar.close()
+    dirname = os.path.dirname(srcdir)
+    basename = os.path.basename(srcdir)
+    exclude = "--exclude=temp --exclude=patches --exclude='.pc'"
+    tar_cmd = "tar %s -cf - %s | %s > %s" % (exclude, basename, compression_cmd, tarname)
+    subprocess.check_call(tar_cmd, cwd=dirname, shell=True)
 
 # creating .diff.gz between source.orig and source
 def create_diff_gz(d, src_orig, src, ar_outdir):
diff --git a/meta/conf/bitbake.conf b/meta/conf/bitbake.conf
index 83b12cbc158..f838444af03 100644
--- a/meta/conf/bitbake.conf
+++ b/meta/conf/bitbake.conf
@@ -529,6 +529,9 @@  HOSTTOOLS += " \
 # Tools needed to run testimage runtime image testing
 HOSTTOOLS += "${@'ip ping ps scp ssh stty' if (bb.utils.contains_any('IMAGE_CLASSES', 'testimage testsdk', True, False, d) or any(x in (d.getVar("BBINCLUDED") or "") for x in ["testimage.bbclass", "testsdk.bbclass"])) else ''}"
 
+# Used by archiver.bbclass when compression is xz
+HOSTTOOLS += "${@'xz' if (('archiver.bbclass' in (d.getVar('BBINCLUDED') or '')) and (d.getVarFlag('ARCHIVER_MODE', 'compression') == 'xz')) else ''}"
+
 # Link to these if present
 HOSTTOOLS_NONFATAL += "aws gcc-ar gpg gpg-agent ld.bfd ld.gold nc pigz sftp socat ssh sudo"