diff mbox series

[RFC] bitbake: fetch2/git: Use git fetch to shallow clone revisions

Message ID 20220826121330.192914-1-liezhi.yang@windriver.com
State New
Headers show
Series [RFC] bitbake: fetch2/git: Use git fetch to shallow clone revisions | expand

Commit Message

Robert Yang Aug. 26, 2022, 12:13 p.m. UTC
The "git clone --depth" only works for refs, doesn't support revisions, but
"git fetch --depth" supports revisions, o use it to do the shallow clone, so
use it to do the shallow clone, the idea is from
"git clone --recurse-submodules --shallow-submodules".

The workflow is (Only enable when BB_GIT_SHALLOW = "1"):
$ git init --bare <clonedir>
$ git remote add origin <url>
$ git fetch origin --depeth <depeth> revision
$ git branch <branchname> FETCH_HEAD
$ git tag v<branchname> FETCH_HEAD

Here is the testing data based on poky, the testing server has a very good
network bandwidth:

Add 'BB_GIT_SHALLOW = "1"' conf/local.conf
$ rm -fr tmp downloads # Fresh download for each build
$ time bitbake world --runall=fetch

       Full        Shallow      Saved
--------------------------------------
Time:  15m59        2m31         84% (13m28s)
Size:  1.2G         12G          90% (10.8G)

* Size is the size of downloads/git2/, the tarballs are not counted.

We can see that it saves a lot of download speed and disk space, for example:

linux-yocto: 2.8G -> 228M
llvm: 2.5G -> 171M
cryptography: 1.5G -> 35M

And "$ bitbake world" works well.

This a RFC patch, feel free to give you comments.

Signed-off-by: Robert Yang <liezhi.yang@windriver.com>
---
 bitbake/lib/bb/fetch2/git.py | 83 ++++++++++++++++++++++++++++--------
 1 file changed, 66 insertions(+), 17 deletions(-)
diff mbox series

Patch

diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py
index 4534bd75800..57bb61d5ee1 100644
--- a/bitbake/lib/bb/fetch2/git.py
+++ b/bitbake/lib/bb/fetch2/git.py
@@ -244,6 +244,7 @@  class Git(FetchMethod):
                 ud.unresolvedrev[name] = 'HEAD'
 
         ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false -c core.pager=cat"
+        ud.basecmd = "LANG=C %s" % ud.basecmd
 
         write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0"
         ud.write_tarballs = write_tarballs != "0" or ud.rebaseable
@@ -344,6 +345,49 @@  class Git(FetchMethod):
             return False
         return True
 
+    def shallow_clone_by_fetch(self, ud, repourl, d):
+        """
+        Use "git fetch --depth <depth> revision" to implement shallow clone
+        since git can't clone a revision, a better solution should be:
+        "git fetch --depth <depth> revision:<branchname>" but it doesn't work
+        when revision is a tag, e.g.:
+        error: cannot update ref 'refs/heads/master': trying to write
+                non-commit object <revision> to branch 'refs/heads/master'
+        """
+
+        import datetime
+
+        depth = ud.shallow_depths[ud.names[0]]
+        revision = ud.revisions[ud.names[0]]
+        branchname = ud.branches[ud.names[0]]
+        if not branchname:
+            branchname = "master"
+
+        # Rename branchname if it exists which can:
+        # - Avoid conflicts during update
+        # - Keep the revision on a branch so that "git submodule update --recursive"
+        #    can work since it requires the revision on a branch.
+        branch_path = os.path.join(ud.clonedir, 'refs/heads/%s' % branchname)
+        if os.path.exists(branch_path):
+            os.rename(branch_path, '%s.%s' % (branch_path, datetime.datetime.now().strftime("%Y%m%d%H%M%S")))
+
+        init_cmd = "%s init --bare -q" % ud.basecmd
+        add_remote_cmd = "%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl))
+        fetch_cmd = "%s fetch --progress origin --depth %s %s" % (ud.basecmd, depth, revision)
+        # Create both branch and tag for the revision
+        branch_cmd = "%s branch -f %s FETCH_HEAD" % (ud.basecmd, branchname)
+        tag_cmd = "%s tag -f v%s FETCH_HEAD" % (ud.basecmd, branchname)
+
+        if ud.proto.lower() != 'file':
+            bb.fetch2.check_network_access(d, fetch_cmd, ud.url)
+
+        if not os.path.exists(ud.clonedir):
+            bb.utils.mkdirhier(ud.clonedir)
+
+        progresshandler = GitProgressHandler(d)
+        for cmd in (init_cmd, add_remote_cmd, fetch_cmd, branch_cmd, tag_cmd):
+            runfetchcmd(cmd, d, log=progresshandler, workdir=ud.clonedir)
+
     def download(self, ud, d):
         """Fetch url"""
 
@@ -360,7 +404,7 @@  class Git(FetchMethod):
             else:
                 tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
                 runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir)
-                fetch_cmd = "LANG=C %s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir))
+                fetch_cmd = "%s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir))
                 runfetchcmd(fetch_cmd, d, workdir=ud.clonedir)
         repourl = self._get_repo_url(ud)
 
@@ -369,27 +413,32 @@  class Git(FetchMethod):
             # We do this since git will use a "-l" option automatically for local urls where possible
             if repourl.startswith("file://"):
                 repourl = repourl[7:]
-            clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir)
-            if ud.proto.lower() != 'file':
-                bb.fetch2.check_network_access(d, clone_cmd, ud.url)
-            progresshandler = GitProgressHandler(d)
-            runfetchcmd(clone_cmd, d, log=progresshandler)
+            if ud.shallow:
+                self.shallow_clone_by_fetch(ud, repourl, d)
+            else:
+                clone_cmd = "%s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir)
+                progresshandler = GitProgressHandler(d)
+                if ud.proto.lower() != 'file':
+                    bb.fetch2.check_network_access(d, clone_cmd, ud.url)
+                runfetchcmd(clone_cmd, d, log=progresshandler)
 
         # Update the checkout if needed
         if self.clonedir_need_update(ud, d):
             output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir)
             if "origin" in output:
-              runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir)
-
-            runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir)
-            fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl))
-            if ud.proto.lower() != 'file':
-                bb.fetch2.check_network_access(d, fetch_cmd, ud.url)
-            progresshandler = GitProgressHandler(d)
-            runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir)
-            runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir)
-            runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir)
-            runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir)
+                runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir)
+            if ud.shallow:
+                self.shallow_clone_by_fetch(ud, repourl, d)
+            else:
+                runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir)
+                fetch_cmd = "%s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl))
+                if ud.proto.lower() != 'file':
+                    bb.fetch2.check_network_access(d, fetch_cmd, ud.url)
+                progresshandler = GitProgressHandler(d)
+                runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir)
+                runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir)
+                runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir)
+                runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir)
             try:
                 os.unlink(ud.fullmirror)
             except OSError as exc: