[bitbake-devel,2/2] utils: also use mmap for SHA256 and SHA1, for performance

Submitted by Ross Burton on Nov. 7, 2019, 11:57 p.m. | Patch ID: 166764

Details

Message ID 20191107235733.25853-2-ross.burton@intel.com
State New
Headers show

Commit Message

Ross Burton Nov. 7, 2019, 11:57 p.m.
md5_file() uses a mmap() window to improve performance when hashing files, so
refactor the code and do the same for SHA1 and SHA256.

Signed-off-by: Ross Burton <ross.burton@intel.com>
---
 bitbake/lib/bb/utils.py | 34 ++++++++++++++--------------------
 1 file changed, 14 insertions(+), 20 deletions(-)

Patch hide | download patch | download mbox

diff --git a/bitbake/lib/bb/utils.py b/bitbake/lib/bb/utils.py
index d035949b3d6..8d40bcdf836 100644
--- a/bitbake/lib/bb/utils.py
+++ b/bitbake/lib/bb/utils.py
@@ -520,22 +520,26 @@  def unlockfile(lf):
     fcntl.flock(lf.fileno(), fcntl.LOCK_UN)
     lf.close()
 
-def md5_file(filename):
-    """
-    Return the hex string representation of the MD5 checksum of filename.
-    """
-    import hashlib, mmap
+def _hasher(method, filename):
+    import mmap
 
     with open(filename, "rb") as f:
-        m = hashlib.md5()
         try:
             with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
                 for chunk in iter(lambda: mm.read(8192), b''):
-                    m.update(chunk)
+                    method.update(chunk)
         except ValueError:
             # You can't mmap() an empty file so silence this exception
             pass
-    return m.hexdigest()
+    return method.hexdigest()
+
+
+def md5_file(filename):
+    """
+    Return the hex string representation of the MD5 checksum of filename.
+    """
+    import hashlib
+    return _hasher(hashlib.md5(), filename)
 
 def sha256_file(filename):
     """
@@ -543,24 +547,14 @@  def sha256_file(filename):
     filename.
     """
     import hashlib
-
-    s = hashlib.sha256()
-    with open(filename, "rb") as f:
-        for line in f:
-            s.update(line)
-    return s.hexdigest()
+    return _hasher(hashlib.sha256(), filename)
 
 def sha1_file(filename):
     """
     Return the hex string representation of the SHA1 checksum of the filename
     """
     import hashlib
-
-    s = hashlib.sha1()
-    with open(filename, "rb") as f:
-        for line in f:
-            s.update(line)
-    return s.hexdigest()
+    return _hasher(hashlib.sha1(), filename)
 
 def preserved_envvars_exported():
     """Variables which are taken from the environment and placed in and exported