diff mbox series

[bitbake-devel,4/5] hashserv: Add API to clean unused entries

Message ID 20231006153645.1609760-5-JPEWhacker@gmail.com
State New
Headers show
Series Add cleanup commands for hash equivalence | expand

Commit Message

Joshua Watt Oct. 6, 2023, 3:36 p.m. UTC
Adds an API to remove unused entries in the outhash database based on
age and if they are referenced by any unihash

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
---
 bitbake/lib/hashserv/client.py |  5 +++++
 bitbake/lib/hashserv/server.py | 20 +++++++++++++++++++-
 bitbake/lib/hashserv/tests.py  | 19 +++++++++++++++++++
 3 files changed, 43 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/bitbake/lib/hashserv/client.py b/bitbake/lib/hashserv/client.py
index eeafeabda05..d5c981864a2 100644
--- a/bitbake/lib/hashserv/client.py
+++ b/bitbake/lib/hashserv/client.py
@@ -105,6 +105,10 @@  class AsyncClient(bb.asyncrpc.AsyncClient):
         await self._set_mode(self.MODE_NORMAL)
         return await self.send_message({"remove": {"where": where}})
 
+    async def clean_unused(self, max_age):
+        await self._set_mode(self.MODE_NORMAL)
+        return await self.send_message({"clean_unused": {"max_age_seconds": max_age}})
+
 
 class Client(bb.asyncrpc.Client):
     def __init__(self):
@@ -120,6 +124,7 @@  class Client(bb.asyncrpc.Client):
             "reset_stats",
             "backfill_wait",
             "remove",
+            "clean_unused",
         )
 
     def _get_async_client(self):
diff --git a/bitbake/lib/hashserv/server.py b/bitbake/lib/hashserv/server.py
index d52e1d46df5..b2ca357b2b1 100644
--- a/bitbake/lib/hashserv/server.py
+++ b/bitbake/lib/hashserv/server.py
@@ -4,7 +4,7 @@ 
 #
 
 from contextlib import closing, contextmanager
-from datetime import datetime
+from datetime import datetime, timedelta
 import enum
 import asyncio
 import logging
@@ -187,6 +187,7 @@  class ServerClient(bb.asyncrpc.AsyncServerConnection):
                 'reset-stats': self.handle_reset_stats,
                 'backfill-wait': self.handle_backfill_wait,
                 'remove': self.handle_remove,
+                'clean_unused': self.handle_clean_unused,
             })
 
     def validate_proto_version(self):
@@ -542,6 +543,23 @@  class ServerClient(bb.asyncrpc.AsyncServerConnection):
 
         self.write_message({"count": count})
 
+    async def handle_clean_unused(self, request):
+        max_age = request["max_age_seconds"]
+        with closing(self.db.cursor()) as cursor:
+            cursor.execute(
+                """
+                DELETE FROM outhashes_v2 WHERE created<:oldest AND NOT EXISTS (
+                    SELECT unihashes_v2.id FROM unihashes_v2 WHERE unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash LIMIT 1
+                )
+                """,
+                {
+                    "oldest": datetime.now() - timedelta(seconds=-max_age)
+                }
+            )
+            count = cursor.rowcount
+
+        self.write_message({"count": count})
+
     def query_equivalent(self, cursor, method, taskhash):
         # This is part of the inner loop and must be as fast as possible
         cursor.execute(
diff --git a/bitbake/lib/hashserv/tests.py b/bitbake/lib/hashserv/tests.py
index a3e066406e3..f343c586b5d 100644
--- a/bitbake/lib/hashserv/tests.py
+++ b/bitbake/lib/hashserv/tests.py
@@ -158,6 +158,25 @@  class HashEquivalenceCommonTests(object):
         result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
         self.assertIsNone(result_outhash)
 
+    def test_clean_unused(self):
+        taskhash, outhash, unihash = self.test_create_hash()
+
+        # Clean the database, which should not remove anything because all hashes an in-use
+        result = self.client.clean_unused(0)
+        self.assertEqual(result["count"], 0)
+        self.assertClientGetHash(self.client, taskhash, unihash)
+
+        # Remove the unihash. The row in the outhash table should still be present
+        self.client.remove({"unihash": unihash})
+        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash, False)
+        self.assertIsNotNone(result_outhash)
+
+        # Now clean with no minimum age which will remove the outhash
+        result = self.client.clean_unused(0)
+        self.assertEqual(result["count"], 1)
+        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash, False)
+        self.assertIsNone(result_outhash)
+
     def test_huge_message(self):
         # Simple test that hashes can be created
         taskhash = 'c665584ee6817aa99edfc77a44dd853828279370'