[yocto-autobuilder-helper,hardknott,3/3] shared-repos: Use tar instead of rsync for speed

Message ID 20220228141629.7688-3-anuj.mittal@intel.com
State New
Headers show
Series [yocto-autobuilder-helper,hardknott,1/3] scripts/prepare-shared-repos: Use tmpfs for speed | expand

Commit Message

Mittal, Anuj Feb. 28, 2022, 2:16 p.m. UTC
From: Richard Purdie <richard.purdie@linuxfoundation.org>

The rysnc of 20,000 files (650MB) onto the nas is slow taking ~3 minutes
at idle and worse at load. This is due to the number of files which
is a pain point for NFS. This piece of the build is also a bottleneck
since the rest of a build depends on it happening.

If we switch to zstd compressed tar, it takes 2.49s. Other compression
methods were much slower but zstd seems 'accptable' and speeds things
up too.

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
(cherry picked from commit aff49e938ee34e1fc5a2954e3e22a4ca1ae9ac7b)
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
---
 scripts/prepare-shared-repos | 4 ++--
 scripts/send-qa-email        | 6 ++++--
 scripts/shared-repo-unpack   | 9 ++++++---
 3 files changed, 12 insertions(+), 7 deletions(-)

Patch

diff --git a/scripts/prepare-shared-repos b/scripts/prepare-shared-repos
index c221e69..a5bc0da 100755
--- a/scripts/prepare-shared-repos
+++ b/scripts/prepare-shared-repos
@@ -39,5 +39,5 @@  with tempfile.TemporaryDirectory(prefix="shared-repo-temp-", dir="/home/pokybuil
         if args.publish_dir:
             utils.publishrepo(tempdir, repo, args.publish_dir)
 
-    utils.printheader("Running rsync")
-    subprocess.check_call("rsync -a " + tempdir + "/* " + args.sharedsrcdir, shell=True)
+    utils.printheader("Creating shared src tarball")
+    subprocess.check_call("tar -I zstd -cf " + args.sharedsrcdir.rstrip("/") + ".tar.zst ./*", shell=True, cwd=tempdir)
diff --git a/scripts/send-qa-email b/scripts/send-qa-email
index 1b69307..bc594df 100755
--- a/scripts/send-qa-email
+++ b/scripts/send-qa-email
@@ -45,9 +45,11 @@  buildtoolsdir = os.path.dirname(args.repojson) + "/build/buildtools"
 if os.path.exists(buildtoolsdir):
     utils.enable_buildtools_tarball(buildtoolsdir)
 
+repodir = os.path.dirname(args.repojson) + "/build/repos"
+
 if 'poky' in repos and os.path.exists(resulttool) and args.results_dir:
     # Need the finalised revisions (not 'HEAD')
-    targetrepodir = "%s/poky" % (args.sharedrepodir)
+    targetrepodir = "%s/poky" % (repodir)
     revision = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=targetrepodir).decode('utf-8').strip()
     branch = repos['poky']['branch']
     repo = repos['poky']['url']
@@ -116,7 +118,7 @@  if args.send.lower() != 'true' or not args.publish_dir or not args.release:
 buildhashes = ""
 for repo in sorted(repos.keys()):
     # Need the finalised revisions (not 'HEAD')
-    targetrepodir = "%s/%s" % (args.sharedrepodir, repo)
+    targetrepodir = "%s/%s" % (repodir, repo)
     revision = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=targetrepodir).decode('utf-8').strip()
     buildhashes += "%s: %s\n" % (repo, revision)
 
diff --git a/scripts/shared-repo-unpack b/scripts/shared-repo-unpack
index f08efa8..f7f87af 100755
--- a/scripts/shared-repo-unpack
+++ b/scripts/shared-repo-unpack
@@ -50,11 +50,14 @@  needrepos_baseddirs = [r.split('/')[0] for r in needrepos]
 for repo in sorted(repos.keys()):
     if repo not in needrepos_baseddirs:
         continue
-    targetrepodir = "%s/%s" % (targetsubdir, repo)
     if args.cache_dir:
         utils.printheader("Copying in repo %s" % repo)
-        utils.mkdir(targetrepodir)
-        subprocess.check_call(["rsync", "-a", "%s/%s" % (args.cache_dir, repo), targetsubdir])
+        utils.mkdir(targetsubdir)
+        if args.target in ["a-full", "a-quick"]:
+            # full/quick need all repo data due to send-qa-email
+            subprocess.check_call(["tar", "-I", "zstd", "-C", targetsubdir, "-xf", "%s.tar.zst" % args.cache_dir])
+        else:
+            subprocess.check_call(["tar", "-I", "zstd", "-C", targetsubdir, "-xf", "%s.tar.zst" % args.cache_dir, "./" + repo])
     else:
         utils.printheader("Fetching repo %s" % repo)
         utils.fetchgitrepo(targetsubdir, repo, repos[repo], stashdir)