diff mbox series

[RFC,v2,05/12] oe/sbom: search into json

Message ID 20231031224733.367227-6-louis.rannou@syslinbit.com
State New
Headers show
Series SPDX3 Proof-of-Concept | expand

Commit Message

Louis Rannou Oct. 31, 2023, 10:47 p.m. UTC
Create a function that search into a json-ld instead of completely loading it.

Signed-off-by: Louis Rannou <louis.rannou@syslinbit.com>
---
 meta/lib/oe/sbom.py  | 32 ++++++++++++++++++++++++++++++++
 meta/lib/oe/spdx3.py | 13 +++++++------
 2 files changed, 39 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/meta/lib/oe/sbom.py b/meta/lib/oe/sbom.py
index ec543fa43d..c99ae1a228 100644
--- a/meta/lib/oe/sbom.py
+++ b/meta/lib/oe/sbom.py
@@ -120,3 +120,35 @@  def read_doc(fn):
         doc = oe.spdx.SPDXDocument.from_json(f)
 
     return (doc, sha1.hexdigest())
+
+
+def search_doc(fn, attr_types=None):
+    """
+    Look for all attributes in the given dictionary. Return the document
+    element, a dictionary of the required attributes and the sha1 of the file.
+    """
+    import hashlib
+    import oe.spdx3
+    import io
+    import contextlib
+
+    @contextlib.contextmanager
+    def get_file():
+        if isinstance(fn, io.IOBase):
+            yield fn
+        else:
+            with fn.open("rb") as f:
+                yield f
+
+    with get_file() as f:
+        sha1 = hashlib.sha1()
+        while True:
+            chunk = f.read(4096)
+            if not chunk:
+                break
+            sha1.update(chunk)
+
+        f.seek(0)
+        doc, attributes = oe.spdx3.SPDX3SpdxDocument.from_json(f, attr_types or [])
+
+    return (doc, attributes, sha1.hexdigest())
diff --git a/meta/lib/oe/spdx3.py b/meta/lib/oe/spdx3.py
index a027c0ee5b..36ba7aa1c3 100644
--- a/meta/lib/oe/spdx3.py
+++ b/meta/lib/oe/spdx3.py
@@ -286,17 +286,16 @@  class SPDX3SpdxDocument(SPDX3Bundle):
     @classmethod
     def from_json(cls, f, attributes=[]):
         """
-        Look into a json file for all objects of given type. Return the document
-        element and a dictionary of required objects.
+        Look into a json file. This will return a dictionnary that represents
+        the SpdxDocument, and is attributes is specified, a list of
+        representation of thos attributes.
         """
+
         class Decoder(json.JSONDecoder):
             def __init__(self, *args, **kwargs):
                 super().__init__(object_hook=self.object_hook, *args, **kwargs)
 
             def object_hook(self, d):
-                if 'type' in d.keys():
-                    if d['type'] in attributes or d['type'] == 'SpdxDocument':
-                        return d
                 if '@graph' in d.keys():
                     spdxDocument = None
                     attr = {a: [] for a in attributes}
@@ -304,9 +303,11 @@  class SPDX3SpdxDocument(SPDX3Bundle):
                         if p is not None:
                             if p['type'] == 'SpdxDocument':
                                 spdxDocument = p
-                            else:
+                            elif p['type'] in attributes:
                                 attr[p['type']].append(p)
                     return (spdxDocument, attr)
+                else:
+                    return d
 
         return json.load(f, cls=Decoder)