Patchwork [bitbake-devel,1/9] cache: Use configuration's hash value to validate cache

login
register
mail settings
Submitter Dongxiao Xu
Date Feb. 23, 2012, 1:47 p.m.
Message ID <b3f25e2bde001abac9a13737105b03874667c6c8.1330003017.git.dongxiao.xu@intel.com>
Download mbox | patch
Permalink /patch/21689/
State Accepted
Commit 1c1df03a6c4717bfd5faab144c4f8bbfcbae0b57
Headers show

Comments

Dongxiao Xu - Feb. 23, 2012, 1:47 p.m.
Previously we use the file time stamp to judge if a cache is valid.
Here this commit introduce a new method, which calculates the total
hash value for a certain configuration's key/value paris, and tag
it into cache filename, for example, bb_cache.dat.xxxyyyzzz.

This mechanism also ensures the cache's correctness if user
dynamically setting variables from some frontend GUI, like HOB.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
---
 lib/bb/cache.py      |   32 ++++++++++++--------------------
 lib/bb/cooker.py     |    4 +++-
 lib/bb/data_smart.py |   21 +++++++++++++++++++++
 3 files changed, 36 insertions(+), 21 deletions(-)

Patch

diff --git a/lib/bb/cache.py b/lib/bb/cache.py
index 99e0f34..3d89435 100644
--- a/lib/bb/cache.py
+++ b/lib/bb/cache.py
@@ -42,10 +42,10 @@  except ImportError:
     logger.info("Importing cPickle failed. "
                 "Falling back to a very slow implementation.")
 
-__cache_version__ = "142"
+__cache_version__ = "143"
 
-def getCacheFile(path, filename):
-    return os.path.join(path, filename)
+def getCacheFile(path, filename, data_hash):
+    return os.path.join(path, filename + "." + data_hash)
 
 # RecipeInfoCommon defines common data retrieving methods
 # from meta data for caches. CoreRecipeInfo as well as other
@@ -245,7 +245,7 @@  class Cache(object):
     BitBake Cache implementation
     """
 
-    def __init__(self, data, caches_array):
+    def __init__(self, data, data_hash, caches_array):
         # Pass caches_array information into Cache Constructor
         # It will be used in later for deciding whether we 
         # need extra cache file dump/load support 
@@ -257,6 +257,7 @@  class Cache(object):
         self.data = None
         self.data_fn = None
         self.cacheclean = True
+        self.data_hash = data_hash
 
         if self.cachedir in [None, '']:
             self.has_cache = False
@@ -265,26 +266,17 @@  class Cache(object):
             return
 
         self.has_cache = True
-        self.cachefile = getCacheFile(self.cachedir, "bb_cache.dat")
+        self.cachefile = getCacheFile(self.cachedir, "bb_cache.dat", self.data_hash)
 
         logger.debug(1, "Using cache in '%s'", self.cachedir)
         bb.utils.mkdirhier(self.cachedir)
 
-        # If any of configuration.data's dependencies are newer than the
-        # cache there isn't even any point in loading it...
-        newest_mtime = 0
-        deps = data.getVar("__base_depends")
-
-        old_mtimes = [old_mtime for _, old_mtime in deps]
-        old_mtimes.append(newest_mtime)
-        newest_mtime = max(old_mtimes)
-
         cache_ok = True
         if self.caches_array:
             for cache_class in self.caches_array:
                 if type(cache_class) is type and issubclass(cache_class, RecipeInfoCommon):
-                    cachefile = getCacheFile(self.cachedir, cache_class.cachefile)
-                    cache_ok = cache_ok and (bb.parse.cached_mtime_noerror(cachefile) >= newest_mtime)
+                    cachefile = getCacheFile(self.cachedir, cache_class.cachefile, self.data_hash)
+                    cache_ok = cache_ok and os.path.exists(cachefile)
                     cache_class.init_cacheData(self)
         if cache_ok:
             self.load_cachefile()
@@ -318,7 +310,7 @@  class Cache(object):
         # Calculate the correct cachesize of all those cache files
         for cache_class in self.caches_array:
             if type(cache_class) is type and issubclass(cache_class, RecipeInfoCommon):
-                cachefile = getCacheFile(self.cachedir, cache_class.cachefile)
+                cachefile = getCacheFile(self.cachedir, cache_class.cachefile, self.data_hash)
                 with open(cachefile, "rb") as cachefile:
                     cachesize += os.fstat(cachefile.fileno()).st_size
 
@@ -326,7 +318,7 @@  class Cache(object):
         
         for cache_class in self.caches_array:
             if type(cache_class) is type and issubclass(cache_class, RecipeInfoCommon):
-                cachefile = getCacheFile(self.cachedir, cache_class.cachefile)
+                cachefile = getCacheFile(self.cachedir, cache_class.cachefile, self.data_hash)
                 with open(cachefile, "rb") as cachefile:
                     pickled = pickle.Unpickler(cachefile)                    
                     while cachefile:
@@ -579,7 +571,7 @@  class Cache(object):
         for cache_class in self.caches_array:
             if type(cache_class) is type and issubclass(cache_class, RecipeInfoCommon):
                 cache_class_name = cache_class.__name__
-                cachefile = getCacheFile(self.cachedir, cache_class.cachefile)
+                cachefile = getCacheFile(self.cachedir, cache_class.cachefile, self.data_hash)
                 file_dict[cache_class_name] = open(cachefile, "wb")
                 pickler_dict[cache_class_name] =  pickle.Pickler(file_dict[cache_class_name], pickle.HIGHEST_PROTOCOL)
                    
@@ -684,7 +676,7 @@  def init(cooker):
     Files causing parsing errors are evicted from the cache.
 
     """
-    return Cache(cooker.configuration.data)
+    return Cache(cooker.configuration.data, cooker.configuration.data_hash)
 
 
 class CacheData(object):
diff --git a/lib/bb/cooker.py b/lib/bb/cooker.py
index f0778e5..af91178 100644
--- a/lib/bb/cooker.py
+++ b/lib/bb/cooker.py
@@ -849,6 +849,7 @@  class BBCooker:
         bb.event.fire(bb.event.ConfigParsed(), data)
         bb.parse.init_parser(data)
         self.configuration.data = data
+        self.configuration.data_hash = data.get_hash()
 
     def handleCollections( self, collections ):
         """Handle collections"""
@@ -1494,6 +1495,7 @@  class CookerParser(object):
         self.filelist = filelist
         self.cooker = cooker
         self.cfgdata = cooker.configuration.data
+        self.cfghash = cooker.configuration.data_hash
 
         # Accounting statistics
         self.parsed = 0
@@ -1509,7 +1511,7 @@  class CookerParser(object):
         self.num_processes = int(self.cfgdata.getVar("BB_NUMBER_PARSE_THREADS", True) or
                                  multiprocessing.cpu_count())
 
-        self.bb_cache = bb.cache.Cache(self.cfgdata, cooker.caches_array)
+        self.bb_cache = bb.cache.Cache(self.cfgdata, self.cfghash, cooker.caches_array)
         self.fromcache = []
         self.willparse = []
         for filename in self.filelist:
diff --git a/lib/bb/data_smart.py b/lib/bb/data_smart.py
index ea13478..24c7a8f 100644
--- a/lib/bb/data_smart.py
+++ b/lib/bb/data_smart.py
@@ -31,6 +31,7 @@  BitBake build tools.
 import copy, re
 from collections import MutableMapping
 import logging
+import hashlib
 import bb, bb.codeparser
 from bb   import utils
 from bb.COW  import COWDictBase
@@ -459,3 +460,23 @@  class DataSmart(MutableMapping):
 
     def __delitem__(self, var):
         self.delVar(var)
+
+    def get_hash(self):
+        data = ""
+        keys = iter(self)
+        for key in keys:
+            if key in ["TIME", "DATE"]:
+                continue
+            if key == "__depends":
+                deps = list(self.getVar(key, False))
+                deps.sort()
+                value = [deps[i][0] for i in range(len(deps))]
+            elif key == "PATH":
+                path = list(set(self.getVar(key, False).split(':')))
+                path.sort()
+                value = " ".join(path)
+            else:
+                value = self.getVar(key, False) or ""
+            data = data + key + ': ' + str(value) + '\n'
+
+        return hashlib.md5(data).hexdigest()