Bladeren bron

Misc changes to speed up serialization of intermediate trees

Yentl Van Tendeloo 8 jaren geleden
bovenliggende
commit
3787799cd7
2 gewijzigde bestanden met toevoegingen van 25 en 6 verwijderingen
  1. 15 2
      interface/HUTN/hutn_compiler/compiler.py
  2. 10 4
      interface/HUTN/hutn_compiler/hutnparser.py

+ 15 - 2
interface/HUTN/hutn_compiler/compiler.py

@@ -23,16 +23,26 @@ def md5digest(data):
     hasher.update(data)
     return hasher.hexdigest()
 
+fetch_caches = {}
+
 def fetch_cached(data, mode=None):
+    global fetch_caches
+
     try:
         md5 = md5digest(data)
+
+        if md5 in fetch_caches:
+            return fetch_caches[md5]
+
         cache_folder = os.path.abspath("%s/../caches/" % (os.path.dirname(os.path.abspath(__file__))))
         if mode is None:
             picklefile = cache_folder + "/%s.pickle" % md5
         else:
             picklefile = cache_folder + "/%s_%s.pickle" % (mode, md5)
         with open(picklefile, "rb") as f:
-            return pickle.load(f)
+            d = pickle.load(f)
+            fetch_caches[md5] = d
+            return d
     except:
         return None
 
@@ -151,7 +161,10 @@ def do_compile(inputfile, grammarfile, visitors=[], include_paths = [], mode="")
             else:
                 # The outer for finally finished, so there were no includes remaining, thus terminate the infinite while loop
                 break
-    tree_data = pickle.dumps(result["tree"], pickle.HIGHEST_PROTOCOL)
+
+    pruned = result["tree"].prune()
+    import json
+    tree_data = json.dumps(pruned)
     new_result = fetch_cached(tree_data, mode)
     if new_result is None:
         result["tree"].fix_tracability(inputfile)

+ 10 - 4
interface/HUTN/hutn_compiler/hutnparser.py

@@ -28,16 +28,21 @@ from copy import deepcopy
 
 from position import Position
 
+tail_cache = {}
+line_cache = {}
+
 class Tree(object):
     def __init__(self, head, tail, startpos, endpos, inputfile = None):
         self.head = head
         self.tail = tail
         self.startpos = startpos
         self.endpos = endpos
-        self._tail = None
         self.inputfile = inputfile
         # IMPORTANT: self.replaced: replace_child defines self.replaced
 
+    def prune(self):
+        return (self.head, [i.prune() if isinstance(i, Tree) else i for i in self.tail])
+
     def is_rule(self):
         return self.head.islower()
 
@@ -45,11 +50,12 @@ class Tree(object):
         return not self.is_rule()
 
     def get_tail(self):
+        global tail_cache
         if self.is_rule():
-            if not self._tail:
-                self._tail = [t for t in self.get_raw_tail()
+            if self not in tail_cache:
+                tail_cache[self] = [t for t in self.get_raw_tail()
                               if not t.head.startswith("implicit_autogenerated_")]
-            return self._tail
+            return tail_cache[self]
         else:
             return self.get_raw_tail()