[bitbake-devel] hashserv: Merge divergent output hashes

Submitted by Joshua Watt on Nov. 26, 2019, 3:50 p.m. | Patch ID: 167407

Details

Message ID 20191126155036.1541537-1-JPEWhacker@gmail.com
State New
Headers show

Commit Message

Joshua Watt Nov. 26, 2019, 3:50 p.m.
Instructs the hash equivalence server to merge diverging output hashes
to the same unihash when one is reported that unifies them. The primary
use for this is -cross and -native recipes where the outputs will never
match on different host architectures, but the unihashes need to
converge.

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
---
 bitbake/lib/hashserv/__init__.py |  1 +
 bitbake/lib/hashserv/server.py   | 31 +++++++++++++++++++++++--
 bitbake/lib/hashserv/tests.py    | 39 ++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/bitbake/lib/hashserv/__init__.py b/bitbake/lib/hashserv/__init__.py
index c3318620f54..218a54a04d8 100644
--- a/bitbake/lib/hashserv/__init__.py
+++ b/bitbake/lib/hashserv/__init__.py
@@ -48,6 +48,7 @@  def setup_database(database, sync=True):
         # Create new indexes
         cursor.execute('CREATE INDEX IF NOT EXISTS taskhash_lookup_v2 ON tasks_v2 (method, taskhash, created)')
         cursor.execute('CREATE INDEX IF NOT EXISTS outhash_lookup_v2 ON tasks_v2 (method, outhash)')
+        cursor.execute('CREATE INDEX IF NOT EXISTS unihash_lookup_v2 ON tasks_v2 (method, unihash)')
 
     return db
 
diff --git a/bitbake/lib/hashserv/server.py b/bitbake/lib/hashserv/server.py
index 0aff77688e4..0a294b4b1a0 100644
--- a/bitbake/lib/hashserv/server.py
+++ b/bitbake/lib/hashserv/server.py
@@ -267,9 +267,36 @@  class ServerClient(object):
                 # If a row matching the outhash was found, the unihash for
                 # the new taskhash should be the same as that one.
                 # Otherwise the caller provided unihash is used.
-                unihash = data['unihash']
-                if row is not None:
+                if row is not None and data['unihash'] != row['unihash']:
+                    # Update unihashes to ensure all branches are converging on
+                    # the same unihash. This is generally a bad thing because
+                    # it means that builds are not reproducible, but it
+                    # occasionally is unavoidable such as in the case of -cross
+                    # and -native tasks for different build host architectures.
+                    # For example, take the following taskhashes that produce
+                    # the given outhashes and unihashes:
+                    #
+                    #  taskhash outhash unihash
+                    #  A        Z       1
+                    #  B        Y       2
+                    #  C        Y       3 -> 2
+                    #  B        Z       2 -> 1
+                    #
+                    # In this case, the B task isn't being built reproducibly,
+                    # but for the second B the server sees it matches the
+                    # outhash of A, and reports that the unihash should be
+                    # changed from 2 to 1.
+                    #
+                    # The inconsistency is that there are still entries that
+                    # refer to unihash 2. These should be remapped to unihash
+                    # 1, since the latest entry shows that these are
+                    # equivalent.
+                    cursor.execute('''UPDATE tasks_v2 SET unihash=:new_unihash WHERE method=:method AND unihash=:old_unihash''',
+                            {'method': data['method'], 'new_unihash': row['unihash'], 'old_unihash': data['unihash']})
+
                     unihash = row['unihash']
+                else:
+                    unihash = data['unihash']
 
                 insert_data = {
                     'method': data['method'],
diff --git a/bitbake/lib/hashserv/tests.py b/bitbake/lib/hashserv/tests.py
index a5472a996d2..ee9107a9aa8 100644
--- a/bitbake/lib/hashserv/tests.py
+++ b/bitbake/lib/hashserv/tests.py
@@ -99,6 +99,45 @@  class TestHashEquivalenceServer(object):
         result = self.client.get_unihash(self.METHOD, taskhash)
         self.assertEqual(result, unihash)
 
+    def test_hash_merging(self):
+        # Verify that unihashes are correctly merged together when diverging
+        # hashes are found. Uses the following table which describes how each
+        # task is reported:
+        #
+        #  taskhash outhash unihash
+        #  A        Z       A
+        #  B        Y       B
+        #  C        Y       C -> B
+        #  B        Z       B -> A
+        #
+        A_taskhash = "A"
+        B_taskhash = "B"
+        C_taskhash = "C"
+        Z_outhash = "Z"
+        Y_outhash = "Y"
+
+        result = self.client.report_unihash(A_taskhash, self.METHOD, Z_outhash, A_taskhash)
+        self.assertEqual(result['unihash'], A_taskhash, 'Server reported bad unihash change for task A')
+
+        result = self.client.report_unihash(B_taskhash, self.METHOD, Y_outhash, B_taskhash)
+        self.assertEqual(result['unihash'], B_taskhash, 'Server reported bad unihash change for task A')
+
+        result = self.client.report_unihash(C_taskhash, self.METHOD, Y_outhash, C_taskhash)
+        self.assertEqual(result['unihash'], B_taskhash, 'Server reported bad unihash change for task C')
+
+        # Report a second B with the Z outhash. It should be change to A's unihash
+        result = self.client.report_unihash(B_taskhash, self.METHOD, Z_outhash, B_taskhash)
+        self.assertEqual(result['unihash'], A_taskhash, 'Server reported bad unihash change for task B')
+
+        # The unihash for C should also be A's unihash
+        result = self.client.get_unihash(self.METHOD, C_taskhash)
+        self.assertEqual(result, A_taskhash, 'Server returned bad unihash for task C')
+
+        # The reported unihash for B should also be A. NOTE: this *should*
+        # return the first B reported because it is older
+        result = self.client.get_unihash(self.METHOD, B_taskhash)
+        self.assertEqual(result, A_taskhash, 'Server returned bad unihash for task B')
+
     def test_stress(self):
         def query_server(failures):
             client = Client(self.server.address)