[bitbake-devel] gitsm: Check all submodules for updates

Submitted by Nicolas Cornu via bitbake-devel on March 27, 2019, 6:03 a.m. | Patch ID: 159809

Details

Message ID 20190327060316.136092-1-wak@google.com
State New
Headers show

Commit Message

Nicolas Cornu via bitbake-devel March 27, 2019, 6:03 a.m.
Currently, our gitsm fetcher is inheriting the need_update function from
the git fetcher. This results in our do_fetch checking for updates only
if the SRCREV is missing from the top-level project.

Unfortunately, this doesn't take into account the dependent SRCREVs
missing from submodules due to a lack of the submodule existing
during the first checkout, or arbitrary download cache purges of
submodule data.

The most trivial way to encounter this is to have gitsm fetch a software
project before and after adding submodules. When it goes to use the
update that has submodules it will think that it has all of the needed
revisions since it checked the top-level repo. However none of those
submodules will exist in the downloads cache since they weren't included
in the original commit.

Tested:
    Ran the unit test suite with just the new unit test to make sure it
    fails repeatedly. Then applied the fix and ran it some more to
    ensure the fix works.

Signed-off-by: William A. Kennington III <wak@google.com>
---
 lib/bb/fetch2/gitsm.py | 24 ++++++++++++++++++++++++
 lib/bb/tests/fetch.py  | 19 +++++++++++++++++++
 2 files changed, 43 insertions(+)

Patch hide | download patch | download mbox

diff --git a/lib/bb/fetch2/gitsm.py b/lib/bb/fetch2/gitsm.py
index b21fed26..f1ecd170 100644
--- a/lib/bb/fetch2/gitsm.py
+++ b/lib/bb/fetch2/gitsm.py
@@ -36,6 +36,7 @@  from   bb.fetch2.git import Git
 from   bb.fetch2 import runfetchcmd
 from   bb.fetch2 import logger
 from   bb.fetch2 import Fetch
+from   bb.fetch2 import FetchData
 from   bb.fetch2 import BBFetchException
 
 class GitSM(Git):
@@ -147,6 +148,29 @@  class GitSM(Git):
 
         return submodules != []
 
+    def need_update(self, ud, d):
+        if Git.need_update(self, ud, d):
+            return True
+        class NeedUpdateException(Exception):
+            pass
+        def need_update_submodule(ud, url, module, modpath, d):
+            url += ";bareclone=1;nobranch=1"
+            lud = FetchData(url, d, False)
+            lud.setup_localpath(d)
+            if lud.lockfile:
+                lf = bb.utils.lockfile(lud.lockfile)
+            try:
+                if lud.method.need_update(lud, d):
+                    raise NeedUpdateException
+            finally:
+                if lud.lockfile:
+                    bb.utils.unlockfile(lf)
+        try:
+            self.process_submodules(ud, ud.clonedir, need_update_submodule, d)
+            return False
+        except NeedUpdateException:
+            return True
+
     def download(self, ud, d):
         def download_submodule(ud, url, module, modpath, d):
             url += ";bareclone=1;nobranch=1"
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index 25732627..429998b3 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -942,6 +942,25 @@  class FetcherNetworkTest(FetcherTest):
         self.assertTrue(os.path.exists(os.path.join(repo_path, '.git/modules/extern/json/config')), msg='Missing submodule config "extern/json"')
         self.assertTrue(os.path.exists(os.path.join(repo_path, '.git/modules/extern/sanitizers/config')), msg='Missing submodule config "extern/sanitizers"')
 
+    def test_git_submodule_update_CLI11(self):
+        """ Prevent regression on update detection not finding missing submodule, or modules without needed commits """
+        url = "gitsm://github.com/CLIUtils/CLI11;protocol=git;rev=cf6a99fa69aaefe477cc52e3ef4a7d2d7fa40714"
+        fetcher = bb.fetch.Fetch([url], self.d)
+        fetcher.download()
+
+        # CLI11 that pulls in a newer nlohmann-json
+        url = "gitsm://github.com/CLIUtils/CLI11;protocol=git;rev=49ac989a9527ee9bb496de9ded7b4872c2e0e5ca"
+        fetcher = bb.fetch.Fetch([url], self.d)
+        fetcher.download()
+        # Previous cwd has been deleted
+        os.chdir(os.path.dirname(self.unpackdir))
+        fetcher.unpack(self.unpackdir)
+
+        repo_path = os.path.join(self.tempdir, 'unpacked', 'git')
+        self.assertTrue(os.path.exists(os.path.join(repo_path, '.git/modules/extern/googletest/config')), msg='Missing submodule config "extern/googletest"')
+        self.assertTrue(os.path.exists(os.path.join(repo_path, '.git/modules/extern/json/config')), msg='Missing submodule config "extern/json"')
+        self.assertTrue(os.path.exists(os.path.join(repo_path, '.git/modules/extern/sanitizers/config')), msg='Missing submodule config "extern/sanitizers"')
+
     def test_git_submodule_aktualizr(self):
         url = "gitsm://github.com/advancedtelematic/aktualizr;branch=master;protocol=git;rev=d00d1a04cc2366d1a5f143b84b9f507f8bd32c44"
         fetcher = bb.fetch.Fetch([url], self.d)

Comments

Mark Hatle March 27, 2019, 4:01 p.m.
On 3/27/19 1:03 AM, William A. Kennington III via bitbake-devel wrote:
> Currently, our gitsm fetcher is inheriting the need_update function from
> the git fetcher. This results in our do_fetch checking for updates only
> if the SRCREV is missing from the top-level project.

The theory here is that if the original (fetched) repository exists, and nothing
has changed, then there is no reason to further iterate into the submodules --
since by definitely they could not have changed, in a meaningful way, since the
main git didn't change it's references to them.

> Unfortunately, this doesn't take into account the dependent SRCREVs
> missing from submodules due to a lack of the submodule existing
> during the first checkout, or arbitrary download cache purges of
> submodule data.

Looking at what is happening via the test case included, the system fetches the
upstream repository once.. and sets up everything.

It then changes the SRCREV (emulating the condition of say two package versions
that point to the same repo, but different commits.)  The system says, well I
have already downloaded a repo with that srcrev -- no reason to iterate and do
anymore work, then when it unpacks and iterates it fails due to the new items
not being available as expected.

So really, we need to resync/update the fetch if the SRCREV has changed from the
last fetch to the new fetch..  this should then trigger the new fetch/update
behavior automatically.

...but I'm not sure we can know if the SRCREV has actually changed (the
reference to it) from the last time we fetched...

> The most trivial way to encounter this is to have gitsm fetch a software
> project before and after adding submodules. When it goes to use the
> update that has submodules it will think that it has all of the needed
> revisions since it checked the top-level repo. However none of those
> submodules will exist in the downloads cache since they weren't included
> in the original commit.

What it was intended to do was, if the main repository was updated (for any
reason), then it was supposed to iterate over all of the submodules, and then
update each one independently.  Since the main repository is not being updated
(we already have the commit, we just were not using it), it never updated so it
never iterated.

I trust what your change below does fixes the issue, but I'm still wondering if
there is some more fundamental issue that has to be addressed first.

(BTW I really appreciate the test case being added.  It will help prevent this
type of problem in the future -- and also gives me a chance to replicate what is
happening with and without the rest of the change.)

I'm trying to figure out if there might be a simpler way to check if the SRCREV
has changed from the initial download, even if we have the new SRCREV in the
repository.

--Mark

> Tested:
>     Ran the unit test suite with just the new unit test to make sure it
>     fails repeatedly. Then applied the fix and ran it some more to
>     ensure the fix works.
> 
> Signed-off-by: William A. Kennington III <wak@google.com>
> ---
>  lib/bb/fetch2/gitsm.py | 24 ++++++++++++++++++++++++
>  lib/bb/tests/fetch.py  | 19 +++++++++++++++++++
>  2 files changed, 43 insertions(+)
> 
> diff --git a/lib/bb/fetch2/gitsm.py b/lib/bb/fetch2/gitsm.py
> index b21fed26..f1ecd170 100644
> --- a/lib/bb/fetch2/gitsm.py
> +++ b/lib/bb/fetch2/gitsm.py
> @@ -36,6 +36,7 @@ from   bb.fetch2.git import Git
>  from   bb.fetch2 import runfetchcmd
>  from   bb.fetch2 import logger
>  from   bb.fetch2 import Fetch
> +from   bb.fetch2 import FetchData
>  from   bb.fetch2 import BBFetchException
>  
>  class GitSM(Git):
> @@ -147,6 +148,29 @@ class GitSM(Git):
>  
>          return submodules != []
>  
> +    def need_update(self, ud, d):
> +        if Git.need_update(self, ud, d):
> +            return True
> +        class NeedUpdateException(Exception):
> +            pass
> +        def need_update_submodule(ud, url, module, modpath, d):
> +            url += ";bareclone=1;nobranch=1"
> +            lud = FetchData(url, d, False)
> +            lud.setup_localpath(d)
> +            if lud.lockfile:
> +                lf = bb.utils.lockfile(lud.lockfile)
> +            try:
> +                if lud.method.need_update(lud, d):
> +                    raise NeedUpdateException
> +            finally:
> +                if lud.lockfile:
> +                    bb.utils.unlockfile(lf)
> +        try:
> +            self.process_submodules(ud, ud.clonedir, need_update_submodule, d)
> +            return False
> +        except NeedUpdateException:
> +            return True
> +
>      def download(self, ud, d):
>          def download_submodule(ud, url, module, modpath, d):
>              url += ";bareclone=1;nobranch=1"
> diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
> index 25732627..429998b3 100644
> --- a/lib/bb/tests/fetch.py
> +++ b/lib/bb/tests/fetch.py
> @@ -942,6 +942,25 @@ class FetcherNetworkTest(FetcherTest):
>          self.assertTrue(os.path.exists(os.path.join(repo_path, '.git/modules/extern/json/config')), msg='Missing submodule config "extern/json"')
>          self.assertTrue(os.path.exists(os.path.join(repo_path, '.git/modules/extern/sanitizers/config')), msg='Missing submodule config "extern/sanitizers"')
>  
> +    def test_git_submodule_update_CLI11(self):
> +        """ Prevent regression on update detection not finding missing submodule, or modules without needed commits """
> +        url = "gitsm://github.com/CLIUtils/CLI11;protocol=git;rev=cf6a99fa69aaefe477cc52e3ef4a7d2d7fa40714"
> +        fetcher = bb.fetch.Fetch([url], self.d)
> +        fetcher.download()
> +
> +        # CLI11 that pulls in a newer nlohmann-json
> +        url = "gitsm://github.com/CLIUtils/CLI11;protocol=git;rev=49ac989a9527ee9bb496de9ded7b4872c2e0e5ca"
> +        fetcher = bb.fetch.Fetch([url], self.d)
> +        fetcher.download()
> +        # Previous cwd has been deleted
> +        os.chdir(os.path.dirname(self.unpackdir))
> +        fetcher.unpack(self.unpackdir)
> +
> +        repo_path = os.path.join(self.tempdir, 'unpacked', 'git')
> +        self.assertTrue(os.path.exists(os.path.join(repo_path, '.git/modules/extern/googletest/config')), msg='Missing submodule config "extern/googletest"')
> +        self.assertTrue(os.path.exists(os.path.join(repo_path, '.git/modules/extern/json/config')), msg='Missing submodule config "extern/json"')
> +        self.assertTrue(os.path.exists(os.path.join(repo_path, '.git/modules/extern/sanitizers/config')), msg='Missing submodule config "extern/sanitizers"')
> +
>      def test_git_submodule_aktualizr(self):
>          url = "gitsm://github.com/advancedtelematic/aktualizr;branch=master;protocol=git;rev=d00d1a04cc2366d1a5f143b84b9f507f8bd32c44"
>          fetcher = bb.fetch.Fetch([url], self.d)
>