@@ -688,6 +688,40 @@ Here is an example URL::
It can also be used when setting mirrors definitions using the :term:`PREMIRRORS` variable.
+.. _gcp-fetcher:
+
+GCP Fetcher (``gs://``)
+--------------------------
+
+This submodule fetches data from a
+`Google Cloud Storage Bucket <https://cloud.google.com/storage/docs/buckets>`__.
+It uses the `Google Cloud Storage Python Client <https://cloud.google.com/python/docs/reference/storage/latest>`__
+to check the status of objects in the bucket and download them.
+The use of the Python client makes it substantially faster than using command
+line tools such as gsutil.
+
+The fetcher requires the Google Cloud Storage Python Client to be installed, along
+with the gsutil tool.
+
+The fetcher requires that the machine has valid credentials for accessing the
+chosen bucket. Instructions for authentication can be found in the
+`Google Cloud documentation <https://cloud.google.com/docs/authentication/provide-credentials-adc#local-dev>`__.
+
+The fetcher can be used for fetching sstate artifacts from a GCS bucket by
+specifying the :term:`SSTATE_MIRRORS` variable as shown below::
+
+ SSTATE_MIRRORS ?= "\
+ file://.* gs://<bucket name>/PATH \
+ "
+
+The fetcher can also be used in recipes::
+
+ SRC_URI = "gs://<bucket name>/<foo_container>/<bar_file>"
+
+However, the checksum of the file should be also be provided::
+
+ SRC_URI[sha256sum] = "<sha256 string>"
+
.. _crate-fetcher:
Crate Fetcher (``crate://``)
@@ -791,6 +825,8 @@ Fetch submodules also exist for the following:
- OSC (``osc://``)
+- S3 (``s3://``)
+
- Secure FTP (``sftp://``)
- Secure Shell (``ssh://``)
@@ -1290,7 +1290,7 @@ class FetchData(object):
if checksum_name in self.parm:
checksum_expected = self.parm[checksum_name]
- elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3", "az", "crate"]:
+ elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3", "az", "crate", "gs"]:
checksum_expected = None
else:
checksum_expected = d.getVarFlag("SRC_URI", checksum_name)
@@ -1973,6 +1973,7 @@ from . import npm
from . import npmsw
from . import az
from . import crate
+from . import gcp
methods.append(local.Local())
methods.append(wget.Wget())
@@ -1994,3 +1995,4 @@ methods.append(npm.Npm())
methods.append(npmsw.NpmShrinkWrap())
methods.append(az.Az())
methods.append(crate.Crate())
+methods.append(gcp.GCP())
new file mode 100644
@@ -0,0 +1,98 @@
+"""
+BitBake 'Fetch' implementation for Google Cloup Platform Storage.
+
+Class for fetching files from Google Cloud Storage using the
+Google Cloud Storage Python Client. The GCS Python Client must
+be correctly installed, configured and authenticated prior to use.
+Additionally, gsutil must also be installed.
+
+"""
+
+# Copyright (C) 2023, Snap Inc.
+#
+# Based in part on bb.fetch2.s3:
+# Copyright (C) 2017 Andre McCurdy
+#
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Based on functions from the base bb module, Copyright 2003 Holger Schurig
+
+import os
+import bb
+import urllib.parse, urllib.error
+from bb.fetch2 import FetchMethod
+from bb.fetch2 import FetchError
+from bb.fetch2 import logger
+
+class GCP(FetchMethod):
+ """
+ Class to fetch urls via GCP's Python API.
+ """
+ def __init__(self):
+ self.gcp_client = None
+
+ def supports(self, ud, d):
+ """
+ Check to see if a given url can be fetched with GCP.
+ """
+ return ud.type in ['gs']
+
+ def recommends_checksum(self, urldata):
+ return True
+
+ def urldata_init(self, ud, d):
+ if 'downloadfilename' in ud.parm:
+ ud.basename = ud.parm['downloadfilename']
+ else:
+ ud.basename = os.path.basename(ud.path)
+
+ ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
+
+ def get_gcp_client(self):
+ from google.cloud import storage
+ self.gcp_client = storage.Client(project=None)
+
+ def download(self, ud, d):
+ """
+ Fetch urls using the GCP API.
+ Assumes localpath was called first.
+ """
+ logger.debug2(f"Trying to download gs://{ud.host}{ud.path} to {ud.localpath}")
+ if self.gcp_client is None:
+ self.get_gcp_client()
+
+ bb.fetch2.check_network_access(d, "gsutil stat", ud.url)
+
+ # Path sometimes has leading slash, so strip it
+ path = ud.path.lstrip("/")
+ blob = self.gcp_client.bucket(ud.host).blob(path)
+ blob.download_to_filename(ud.localpath)
+
+ # Additional sanity checks copied from the wget class (although there
+ # are no known issues which mean these are required, treat the GCP API
+ # tool with a little healthy suspicion).
+ if not os.path.exists(ud.localpath):
+ raise FetchError(f"The GCP API returned success for gs://{ud.host}{ud.path} but {ud.localpath} doesn't exist?!")
+
+ if os.path.getsize(ud.localpath) == 0:
+ os.remove(ud.localpath)
+ raise FetchError(f"The downloaded file for gs://{ud.host}{ud.path} resulted in a zero size file?! Deleting and failing since this isn't right.")
+
+ return True
+
+ def checkstatus(self, fetch, ud, d):
+ """
+ Check the status of a URL.
+ """
+ logger.debug2(f"Checking status of gs://{ud.host}{ud.path}")
+ if self.gcp_client is None:
+ self.get_gcp_client()
+
+ bb.fetch2.check_network_access(d, "gsutil stat", ud.url)
+
+ # Path sometimes has leading slash, so strip it
+ path = ud.path.lstrip("/")
+ if self.gcp_client.bucket(ud.host).blob(path).exists() == False:
+ raise FetchError(f"The GCP API reported that gs://{ud.host}{ud.path} does not exist")
+ else:
+ return True