diff mbox series

[1/1] archiver.bbclass: Drop tarfile module to improve performance

Message ID 692606bf032e2ffc29bd3283156449a7747fe793.1703011843.git.liezhi.yang@windriver.com
State Accepted, archived
Commit 6548354f049b173e8d443bc547d35c9d9fc05259
Headers show
Series [1/1] archiver.bbclass: Drop tarfile module to improve performance | expand

Commit Message

Robert Yang Dec. 19, 2023, 6:52 p.m. UTC
From: Robert Yang <liezhi.yang@windriver.com>

* The tarfile module doesn't support xz options or environment varible
  XZ_DEFAULTS, this makes do_ar_patched incrediblely slow when the file is
  large, for example, chromium-x11 is about 3GB:
  - "bitbake chromium-x11 -car_patched" hasn't been done after 3 hours on my
     host, I checked the partial tar.xz file is only 1.5GB, so maybe more than 6
     hours is required to complete the task.

  - Now only less than 4 minutes is needed on the same host.

* Need add xz to HOSTTOOLS when archiver.bbclass is enabled and compression is xz.

Signed-off-by: Robert Yang <liezhi.yang@windriver.com>
---
 meta/classes/archiver.bbclass | 28 ++++++++++++++++------------
 meta/conf/bitbake.conf        |  3 +++
 2 files changed, 19 insertions(+), 12 deletions(-)

Comments

Robert Yang Dec. 22, 2023, 3:48 a.m. UTC | #1
On 12/20/23 02:52, Robert Yang via lists.openembedded.org wrote:
> From: Robert Yang <liezhi.yang@windriver.com>
> 
> * The tarfile module doesn't support xz options or environment varible
>    XZ_DEFAULTS, this makes do_ar_patched incrediblely slow when the file is
>    large, for example, chromium-x11 is about 3GB:
>    - "bitbake chromium-x11 -car_patched" hasn't been done after 3 hours on my
>       host, I checked the partial tar.xz file is only 1.5GB, so maybe more than 6
>       hours is required to complete the task.
> 
>    - Now only less than 4 minutes is needed on the same host.
> 
> * Need add xz to HOSTTOOLS when archiver.bbclass is enabled and compression is xz.
> 
> Signed-off-by: Robert Yang <liezhi.yang@windriver.com>
> ---
>   meta/classes/archiver.bbclass | 28 ++++++++++++++++------------
>   meta/conf/bitbake.conf        |  3 +++
>   2 files changed, 19 insertions(+), 12 deletions(-)
> 
> diff --git a/meta/classes/archiver.bbclass b/meta/classes/archiver.bbclass
> index 80a69cf31db..2d0bbfbd422 100644
> --- a/meta/classes/archiver.bbclass
> +++ b/meta/classes/archiver.bbclass
> @@ -401,19 +401,11 @@ python do_ar_mirror() {
>           subprocess.check_call(cmd, shell=True)
>   }
>   
> -def exclude_useless_paths(tarinfo):
> -    if tarinfo.isdir():
> -        if tarinfo.name.endswith('/temp') or tarinfo.name.endswith('/patches') or tarinfo.name.endswith('/.pc'):
> -            return None
> -        elif tarinfo.name == 'temp' or tarinfo.name == 'patches' or tarinfo.name == '.pc':
> -            return None
> -    return tarinfo
> -
>   def create_tarball(d, srcdir, suffix, ar_outdir):
>       """
>       create the tarball from srcdir
>       """
> -    import tarfile
> +    import subprocess
>   
>       # Make sure we are only creating a single tarball for gcc sources
>       if (d.getVar('SRC_URI') == ""):
> @@ -425,6 +417,16 @@ def create_tarball(d, srcdir, suffix, ar_outdir):
>       srcdir = os.path.realpath(srcdir)
>   
>       compression_method = d.getVarFlag('ARCHIVER_MODE', 'compression')
> +    if compression_method == "xz":
> +        compression_cmd = "xz %s" % d.getVar('XZ_DEFAULTS')
> +    # To keep compatibility with ARCHIVER_MODE[compression]
> +    elif compression_method == "gz":
> +        compression_cmd = "gzip"
> +    elif compression_method == "bz2":
> +        compression_cmd = "bzip2"
> +    else:
> +        bb.fatal("Unsupported compression_method: %s" % compression_method)
> +
>       bb.utils.mkdirhier(ar_outdir)
>       if suffix:
>           filename = '%s-%s.tar.%s' % (d.getVar('PF'), suffix, compression_method)
> @@ -433,9 +435,11 @@ def create_tarball(d, srcdir, suffix, ar_outdir):
>       tarname = os.path.join(ar_outdir, filename)
>   
>       bb.note('Creating %s' % tarname)
> -    tar = tarfile.open(tarname, 'w:%s' % compression_method)
> -    tar.add(srcdir, arcname=os.path.basename(srcdir), filter=exclude_useless_paths)
> -    tar.close()
> +    dirname = os.path.dirname(srcdir)
> +    basename = os.path.basename(srcdir)
> +    exclude = "--exclude=temp --exclude=patches --exclude='.pc'"
> +    tar_cmd = "tar %s -cf - %s | %s > %s" % (exclude, basename, compression_cmd, tarname)
> +    subprocess.check_call(tar_cmd, cwd=dirname, shell=True)
>   
>   # creating .diff.gz between source.orig and source
>   def create_diff_gz(d, src_orig, src, ar_outdir):
> diff --git a/meta/conf/bitbake.conf b/meta/conf/bitbake.conf
> index e7826e7af96..681af512bfa 100644
> --- a/meta/conf/bitbake.conf
> +++ b/meta/conf/bitbake.conf
> @@ -528,6 +528,9 @@ HOSTTOOLS += " \
>   # Tools needed to run testimage runtime image testing
>   HOSTTOOLS += "${@'ip ping ps scp ssh stty' if (bb.utils.contains_any('IMAGE_CLASSES', 'testimage testsdk', True, False, d) or any(x in (d.getVar("BBINCLUDED") or "") for x in ["testimage.bbclass", "testsdk.bbclass"])) else ''}"
>   
> +# Used by archiver.bbclass when compression is xz
> +HOSTTOOLS += "${@'xz' if (('archiver.bbclass' in d.getVar('BBINCLUDED')) and (d.getVarFlag('ARCHIVER_MODE', 'compression') == 'xz')) else ''}"

Sorry, please ignore this patch, I will send a V2. The BBINCLUDED maybe None in 
layerindex which makes layerindex failed.

// Robert


> +
>   # Link to these if present
>   HOSTTOOLS_NONFATAL += "aws gcc-ar gpg gpg-agent ld.bfd ld.gold nc pigz sftp socat ssh sudo"
>   
> 
> 
> 
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#192756): https://lists.openembedded.org/g/openembedded-core/message/192756
> Mute This Topic: https://lists.openembedded.org/mt/103268096/7304958
> Group Owner: openembedded-core+owner@lists.openembedded.org
> Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub [liezhi.yang@eng.windriver.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>
diff mbox series

Patch

diff --git a/meta/classes/archiver.bbclass b/meta/classes/archiver.bbclass
index 80a69cf31db..2d0bbfbd422 100644
--- a/meta/classes/archiver.bbclass
+++ b/meta/classes/archiver.bbclass
@@ -401,19 +401,11 @@  python do_ar_mirror() {
         subprocess.check_call(cmd, shell=True)
 }
 
-def exclude_useless_paths(tarinfo):
-    if tarinfo.isdir():
-        if tarinfo.name.endswith('/temp') or tarinfo.name.endswith('/patches') or tarinfo.name.endswith('/.pc'):
-            return None
-        elif tarinfo.name == 'temp' or tarinfo.name == 'patches' or tarinfo.name == '.pc':
-            return None
-    return tarinfo
-
 def create_tarball(d, srcdir, suffix, ar_outdir):
     """
     create the tarball from srcdir
     """
-    import tarfile
+    import subprocess
 
     # Make sure we are only creating a single tarball for gcc sources
     if (d.getVar('SRC_URI') == ""):
@@ -425,6 +417,16 @@  def create_tarball(d, srcdir, suffix, ar_outdir):
     srcdir = os.path.realpath(srcdir)
 
     compression_method = d.getVarFlag('ARCHIVER_MODE', 'compression')
+    if compression_method == "xz":
+        compression_cmd = "xz %s" % d.getVar('XZ_DEFAULTS')
+    # To keep compatibility with ARCHIVER_MODE[compression]
+    elif compression_method == "gz":
+        compression_cmd = "gzip"
+    elif compression_method == "bz2":
+        compression_cmd = "bzip2"
+    else:
+        bb.fatal("Unsupported compression_method: %s" % compression_method)
+
     bb.utils.mkdirhier(ar_outdir)
     if suffix:
         filename = '%s-%s.tar.%s' % (d.getVar('PF'), suffix, compression_method)
@@ -433,9 +435,11 @@  def create_tarball(d, srcdir, suffix, ar_outdir):
     tarname = os.path.join(ar_outdir, filename)
 
     bb.note('Creating %s' % tarname)
-    tar = tarfile.open(tarname, 'w:%s' % compression_method)
-    tar.add(srcdir, arcname=os.path.basename(srcdir), filter=exclude_useless_paths)
-    tar.close()
+    dirname = os.path.dirname(srcdir)
+    basename = os.path.basename(srcdir)
+    exclude = "--exclude=temp --exclude=patches --exclude='.pc'"
+    tar_cmd = "tar %s -cf - %s | %s > %s" % (exclude, basename, compression_cmd, tarname)
+    subprocess.check_call(tar_cmd, cwd=dirname, shell=True)
 
 # creating .diff.gz between source.orig and source
 def create_diff_gz(d, src_orig, src, ar_outdir):
diff --git a/meta/conf/bitbake.conf b/meta/conf/bitbake.conf
index e7826e7af96..681af512bfa 100644
--- a/meta/conf/bitbake.conf
+++ b/meta/conf/bitbake.conf
@@ -528,6 +528,9 @@  HOSTTOOLS += " \
 # Tools needed to run testimage runtime image testing
 HOSTTOOLS += "${@'ip ping ps scp ssh stty' if (bb.utils.contains_any('IMAGE_CLASSES', 'testimage testsdk', True, False, d) or any(x in (d.getVar("BBINCLUDED") or "") for x in ["testimage.bbclass", "testsdk.bbclass"])) else ''}"
 
+# Used by archiver.bbclass when compression is xz
+HOSTTOOLS += "${@'xz' if (('archiver.bbclass' in d.getVar('BBINCLUDED')) and (d.getVarFlag('ARCHIVER_MODE', 'compression') == 'xz')) else ''}"
+
 # Link to these if present
 HOSTTOOLS_NONFATAL += "aws gcc-ar gpg gpg-agent ld.bfd ld.gold nc pigz sftp socat ssh sudo"