From patchwork Wed Nov 1 15:42:02 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Joshua Watt X-Patchwork-Id: 33371 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from aws-us-west-2-korg-lkml-1.web.codeaurora.org (localhost.localdomain [127.0.0.1]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0C45FC0018C for ; Wed, 1 Nov 2023 15:42:38 +0000 (UTC) Received: from mail-oi1-f172.google.com (mail-oi1-f172.google.com [209.85.167.172]) by mx.groups.io with SMTP id smtpd.web10.10804.1698853354603050901 for ; Wed, 01 Nov 2023 08:42:34 -0700 Authentication-Results: mx.groups.io; dkim=pass header.i=@gmail.com header.s=20230601 header.b=Ep4B5VSo; spf=pass (domain: gmail.com, ip: 209.85.167.172, mailfrom: jpewhacker@gmail.com) Received: by mail-oi1-f172.google.com with SMTP id 5614622812f47-3b2e4107f47so4710480b6e.2 for ; Wed, 01 Nov 2023 08:42:34 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1698853352; x=1699458152; darn=lists.openembedded.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=D8Xwg4jcVAbaTF/RS5lq0ebYMPWd8462CpU7mMbsAzo=; b=Ep4B5VSo8ZUnjC3LC4eP9F4PYn+xvftPDxKRzaKDAkc5CZvyU20o4kL+Urx3rQacF8 H0j2TO6Y2OzpmW8UXH/4faOcK3vBbU2YoWdwi9YCY9zDhwrEtsDR5UEi+z9Cc9IwfWLe 9rv9uLd/laXjpLe217kr2fPBscQlIDkAlyyV0lgbEw11HvUaJckh9nu9dLrHkj3m8FyB 8oajXlUQsi24yHzsw5q5wBcqZgJBukdLmrp7AmK0vKcW8tcbnwGe2Z+oqJBux/iAxMIT GeyOIUCoxmds+VJZtxXE7qF/M6inafP1R3wIaz/luwvG87k8lqxSyAbp0K1dU7yBbyOD 4Xiw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1698853352; x=1699458152; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=D8Xwg4jcVAbaTF/RS5lq0ebYMPWd8462CpU7mMbsAzo=; b=HdooC6t/rG75Bvtz8EDWH2EfVOlnopwMOCj5165sxbq4khwQ+2xQ04QpqgB+eyXWTC KjPaULRXkhodwEkUs0HGjWsG3iL8c6w2Ldszzs4VqXYdFcG4/AcfINeMm38jEl/UP1n5 +jwte98eiFcu9v3mBIDgUuRhTeTQMtQa9hyfgTHIvSmKUYU/iKppDJU4OJVdgChvCydU 0swB93G7uPEC75CsYTqkWikLM6z96zS3soJhGjyoH+kOBAdQfy1aG+E+oHNEjTLbYpCK 0x6vl+WTrDIvPj+xyb5j4xJEVT04RpBT+Pj0DKPgn6LK8/YCaJAo3dz4Ju17+tHH5s1J qeag== X-Gm-Message-State: AOJu0YzKBNHYjVkZFAGXeTBrdDbUIrAjiiRcV+Rhbmr9bRXXvfk2OgFS 9JkbruJ3CXAwzowl/9+YZrWkiSEYQCM= X-Google-Smtp-Source: AGHT+IHSx84ZIKf0qAMV1HrqGuAKzR+jHCUT33LRwYAxEFR8KKtCYofeX98i7iK3W6txiVf5hIrtOA== X-Received: by 2002:aca:1a09:0:b0:3b2:f50c:1ce5 with SMTP id a9-20020aca1a09000000b003b2f50c1ce5mr16398384oia.31.1698853352648; Wed, 01 Nov 2023 08:42:32 -0700 (PDT) Received: from localhost.localdomain ([2601:282:4300:19e0::6aa6]) by smtp.gmail.com with ESMTPSA id l9-20020aca1909000000b003ae36d664a9sm249651oii.39.2023.11.01.08.42.31 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 01 Nov 2023 08:42:32 -0700 (PDT) From: Joshua Watt X-Google-Original-From: Joshua Watt To: bitbake-devel@lists.openembedded.org Cc: Joshua Watt Subject: [bitbake-devel][PATCH v5 08/22] hashserv: Add SQLalchemy backend Date: Wed, 1 Nov 2023 09:42:02 -0600 Message-Id: <20231101154216.2758185-9-JPEWhacker@gmail.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20231101154216.2758185-1-JPEWhacker@gmail.com> References: <20231031172138.3577199-1-JPEWhacker@gmail.com> <20231101154216.2758185-1-JPEWhacker@gmail.com> MIME-Version: 1.0 List-Id: X-Webhook-Received: from li982-79.members.linode.com [45.33.32.79] by aws-us-west-2-korg-lkml-1.web.codeaurora.org with HTTPS for ; Wed, 01 Nov 2023 15:42:38 -0000 X-Groupsio-URL: https://lists.openembedded.org/g/bitbake-devel/message/15390 Adds an SQLAlchemy backend to the server. While this database backend is slower than the more direct sqlite backend, it easily supports just about any SQL server, which is useful for large scale deployments. Signed-off-by: Joshua Watt --- bin/bitbake-hashserv | 12 ++ lib/bb/asyncrpc/connection.py | 11 +- lib/hashserv/__init__.py | 21 ++- lib/hashserv/sqlalchemy.py | 304 ++++++++++++++++++++++++++++++++++ lib/hashserv/tests.py | 19 ++- 5 files changed, 362 insertions(+), 5 deletions(-) create mode 100644 lib/hashserv/sqlalchemy.py diff --git a/bin/bitbake-hashserv b/bin/bitbake-hashserv index a916a90c..59b8b07f 100755 --- a/bin/bitbake-hashserv +++ b/bin/bitbake-hashserv @@ -69,6 +69,16 @@ To bind to all addresses, leave the ADDRESS empty, e.g. "--bind :8686" or action="store_true", help="Disallow write operations from clients ($HASHSERVER_READ_ONLY)", ) + parser.add_argument( + "--db-username", + default=os.environ.get("HASHSERVER_DB_USERNAME", None), + help="Database username ($HASHSERVER_DB_USERNAME)", + ) + parser.add_argument( + "--db-password", + default=os.environ.get("HASHSERVER_DB_PASSWORD", None), + help="Database password ($HASHSERVER_DB_PASSWORD)", + ) args = parser.parse_args() @@ -90,6 +100,8 @@ To bind to all addresses, leave the ADDRESS empty, e.g. "--bind :8686" or args.database, upstream=args.upstream, read_only=read_only, + db_username=args.db_username, + db_password=args.db_password, ) server.serve_forever() return 0 diff --git a/lib/bb/asyncrpc/connection.py b/lib/bb/asyncrpc/connection.py index a10628f7..7f0cf6ba 100644 --- a/lib/bb/asyncrpc/connection.py +++ b/lib/bb/asyncrpc/connection.py @@ -7,6 +7,7 @@ import asyncio import itertools import json +from datetime import datetime from .exceptions import ClientError, ConnectionClosedError @@ -30,6 +31,12 @@ def chunkify(msg, max_chunk): yield "\n" +def json_serialize(obj): + if isinstance(obj, datetime): + return obj.isoformat() + raise TypeError("Type %s not serializeable" % type(obj)) + + class StreamConnection(object): def __init__(self, reader, writer, timeout, max_chunk=DEFAULT_MAX_CHUNK): self.reader = reader @@ -42,7 +49,7 @@ class StreamConnection(object): return self.writer.get_extra_info("peername") async def send_message(self, msg): - for c in chunkify(json.dumps(msg), self.max_chunk): + for c in chunkify(json.dumps(msg, default=json_serialize), self.max_chunk): self.writer.write(c.encode("utf-8")) await self.writer.drain() @@ -105,7 +112,7 @@ class WebsocketConnection(object): return ":".join(str(s) for s in self.socket.remote_address) async def send_message(self, msg): - await self.send(json.dumps(msg)) + await self.send(json.dumps(msg, default=json_serialize)) async def recv_message(self): m = await self.recv() diff --git a/lib/hashserv/__init__.py b/lib/hashserv/__init__.py index 90d8cff1..9a8ee4e8 100644 --- a/lib/hashserv/__init__.py +++ b/lib/hashserv/__init__.py @@ -35,15 +35,32 @@ def parse_address(addr): return (ADDR_TYPE_TCP, (host, int(port))) -def create_server(addr, dbname, *, sync=True, upstream=None, read_only=False): +def create_server( + addr, + dbname, + *, + sync=True, + upstream=None, + read_only=False, + db_username=None, + db_password=None +): def sqlite_engine(): from .sqlite import DatabaseEngine return DatabaseEngine(dbname, sync) + def sqlalchemy_engine(): + from .sqlalchemy import DatabaseEngine + + return DatabaseEngine(dbname, db_username, db_password) + from . import server - db_engine = sqlite_engine() + if "://" in dbname: + db_engine = sqlalchemy_engine() + else: + db_engine = sqlite_engine() s = server.Server(db_engine, upstream=upstream, read_only=read_only) diff --git a/lib/hashserv/sqlalchemy.py b/lib/hashserv/sqlalchemy.py new file mode 100644 index 00000000..3216621f --- /dev/null +++ b/lib/hashserv/sqlalchemy.py @@ -0,0 +1,304 @@ +#! /usr/bin/env python3 +# +# Copyright (C) 2023 Garmin Ltd. +# +# SPDX-License-Identifier: GPL-2.0-only +# + +import logging +from datetime import datetime + +from sqlalchemy.ext.asyncio import create_async_engine +from sqlalchemy.pool import NullPool +from sqlalchemy import ( + MetaData, + Column, + Table, + Text, + Integer, + UniqueConstraint, + DateTime, + Index, + select, + insert, + exists, + literal, + and_, + delete, +) +import sqlalchemy.engine +from sqlalchemy.orm import declarative_base +from sqlalchemy.exc import IntegrityError + +logger = logging.getLogger("hashserv.sqlalchemy") + +Base = declarative_base() + + +class UnihashesV2(Base): + __tablename__ = "unihashes_v2" + id = Column(Integer, primary_key=True, autoincrement=True) + method = Column(Text, nullable=False) + taskhash = Column(Text, nullable=False) + unihash = Column(Text, nullable=False) + + __table_args__ = ( + UniqueConstraint("method", "taskhash"), + Index("taskhash_lookup_v3", "method", "taskhash"), + ) + + +class OuthashesV2(Base): + __tablename__ = "outhashes_v2" + id = Column(Integer, primary_key=True, autoincrement=True) + method = Column(Text, nullable=False) + taskhash = Column(Text, nullable=False) + outhash = Column(Text, nullable=False) + created = Column(DateTime) + owner = Column(Text) + PN = Column(Text) + PV = Column(Text) + PR = Column(Text) + task = Column(Text) + outhash_siginfo = Column(Text) + + __table_args__ = ( + UniqueConstraint("method", "taskhash", "outhash"), + Index("outhash_lookup_v3", "method", "outhash"), + ) + + +class DatabaseEngine(object): + def __init__(self, url, username=None, password=None): + self.logger = logger + self.url = sqlalchemy.engine.make_url(url) + + if username is not None: + self.url = self.url.set(username=username) + + if password is not None: + self.url = self.url.set(password=password) + + async def create(self): + self.logger.info("Using database %s", self.url) + self.engine = create_async_engine(self.url, poolclass=NullPool) + + async with self.engine.begin() as conn: + # Create tables + logger.info("Creating tables...") + await conn.run_sync(Base.metadata.create_all) + + def connect(self, logger): + return Database(self.engine, logger) + + +def map_row(row): + if row is None: + return None + return dict(**row._mapping) + + +class Database(object): + def __init__(self, engine, logger): + self.engine = engine + self.db = None + self.logger = logger + + async def __aenter__(self): + self.db = await self.engine.connect() + return self + + async def __aexit__(self, exc_type, exc_value, traceback): + await self.close() + + async def close(self): + await self.db.close() + self.db = None + + async def get_unihash_by_taskhash_full(self, method, taskhash): + statement = ( + select( + OuthashesV2, + UnihashesV2.unihash.label("unihash"), + ) + .join( + UnihashesV2, + and_( + UnihashesV2.method == OuthashesV2.method, + UnihashesV2.taskhash == OuthashesV2.taskhash, + ), + ) + .where( + OuthashesV2.method == method, + OuthashesV2.taskhash == taskhash, + ) + .order_by( + OuthashesV2.created.asc(), + ) + .limit(1) + ) + self.logger.debug("%s", statement) + async with self.db.begin(): + result = await self.db.execute(statement) + return map_row(result.first()) + + async def get_unihash_by_outhash(self, method, outhash): + statement = ( + select(OuthashesV2, UnihashesV2.unihash.label("unihash")) + .join( + UnihashesV2, + and_( + UnihashesV2.method == OuthashesV2.method, + UnihashesV2.taskhash == OuthashesV2.taskhash, + ), + ) + .where( + OuthashesV2.method == method, + OuthashesV2.outhash == outhash, + ) + .order_by( + OuthashesV2.created.asc(), + ) + .limit(1) + ) + self.logger.debug("%s", statement) + async with self.db.begin(): + result = await self.db.execute(statement) + return map_row(result.first()) + + async def get_outhash(self, method, outhash): + statement = ( + select(OuthashesV2) + .where( + OuthashesV2.method == method, + OuthashesV2.outhash == outhash, + ) + .order_by( + OuthashesV2.created.asc(), + ) + .limit(1) + ) + + self.logger.debug("%s", statement) + async with self.db.begin(): + result = await self.db.execute(statement) + return map_row(result.first()) + + async def get_equivalent_for_outhash(self, method, outhash, taskhash): + statement = ( + select( + OuthashesV2.taskhash.label("taskhash"), + UnihashesV2.unihash.label("unihash"), + ) + .join( + UnihashesV2, + and_( + UnihashesV2.method == OuthashesV2.method, + UnihashesV2.taskhash == OuthashesV2.taskhash, + ), + ) + .where( + OuthashesV2.method == method, + OuthashesV2.outhash == outhash, + OuthashesV2.taskhash != taskhash, + ) + .order_by( + OuthashesV2.created.asc(), + ) + .limit(1) + ) + self.logger.debug("%s", statement) + async with self.db.begin(): + result = await self.db.execute(statement) + return map_row(result.first()) + + async def get_equivalent(self, method, taskhash): + statement = select( + UnihashesV2.unihash, + UnihashesV2.method, + UnihashesV2.taskhash, + ).where( + UnihashesV2.method == method, + UnihashesV2.taskhash == taskhash, + ) + self.logger.debug("%s", statement) + async with self.db.begin(): + result = await self.db.execute(statement) + return map_row(result.first()) + + async def remove(self, condition): + async def do_remove(table): + where = {} + for c in table.__table__.columns: + if c.key in condition and condition[c.key] is not None: + where[c] = condition[c.key] + + if where: + statement = delete(table).where(*[(k == v) for k, v in where.items()]) + self.logger.debug("%s", statement) + async with self.db.begin(): + result = await self.db.execute(statement) + return result.rowcount + + return 0 + + count = 0 + count += await do_remove(UnihashesV2) + count += await do_remove(OuthashesV2) + + return count + + async def clean_unused(self, oldest): + statement = delete(OuthashesV2).where( + OuthashesV2.created < oldest, + ~( + select(UnihashesV2.id) + .where( + UnihashesV2.method == OuthashesV2.method, + UnihashesV2.taskhash == OuthashesV2.taskhash, + ) + .limit(1) + .exists() + ), + ) + self.logger.debug("%s", statement) + async with self.db.begin(): + result = await self.db.execute(statement) + return result.rowcount + + async def insert_unihash(self, method, taskhash, unihash): + statement = insert(UnihashesV2).values( + method=method, + taskhash=taskhash, + unihash=unihash, + ) + self.logger.debug("%s", statement) + try: + async with self.db.begin(): + await self.db.execute(statement) + return True + except IntegrityError: + logger.debug( + "%s, %s, %s already in unihash database", method, taskhash, unihash + ) + return False + + async def insert_outhash(self, data): + outhash_columns = set(c.key for c in OuthashesV2.__table__.columns) + + data = {k: v for k, v in data.items() if k in outhash_columns} + + if "created" in data and not isinstance(data["created"], datetime): + data["created"] = datetime.fromisoformat(data["created"]) + + statement = insert(OuthashesV2).values(**data) + self.logger.debug("%s", statement) + try: + async with self.db.begin(): + await self.db.execute(statement) + return True + except IntegrityError: + logger.debug( + "%s, %s already in outhash database", data["method"], data["outhash"] + ) + return False diff --git a/lib/hashserv/tests.py b/lib/hashserv/tests.py index 4c98a280..268b2700 100644 --- a/lib/hashserv/tests.py +++ b/lib/hashserv/tests.py @@ -33,7 +33,7 @@ class HashEquivalenceTestSetup(object): def start_server(self, dbpath=None, upstream=None, read_only=False, prefunc=server_prefunc): self.server_index += 1 if dbpath is None: - dbpath = os.path.join(self.temp_dir.name, "db%d.sqlite" % self.server_index) + dbpath = self.make_dbpath() def cleanup_server(server): if server.process.exitcode is not None: @@ -53,6 +53,9 @@ class HashEquivalenceTestSetup(object): return server + def make_dbpath(self): + return os.path.join(self.temp_dir.name, "db%d.sqlite" % self.server_index) + def start_client(self, server_address): def cleanup_client(client): client.close() @@ -517,6 +520,20 @@ class TestHashEquivalenceWebsocketServer(HashEquivalenceTestSetup, HashEquivalen return "ws://%s:0" % host +class TestHashEquivalenceWebsocketsSQLAlchemyServer(TestHashEquivalenceWebsocketServer): + def setUp(self): + try: + import sqlalchemy + import aiosqlite + except ImportError as e: + self.skipTest(str(e)) + + super().setUp() + + def make_dbpath(self): + return "sqlite+aiosqlite:///%s" % os.path.join(self.temp_dir.name, "db%d.sqlite" % self.server_index) + + class TestHashEquivalenceExternalServer(HashEquivalenceTestSetup, HashEquivalenceCommonTests, unittest.TestCase): def start_test_server(self): if 'BB_TEST_HASHSERV' not in os.environ: