import sys from collections import defaultdict import os import gzip import time import cPickle as pickle # Work around Python 2 where a 'big integer' automatically becomes a long if sys.version > '3': # pragma: no cover integer_types = (int,) primitive_types = (int, float, str, bool) else: # pragma: no cover integer_types = (int, long) primitive_types = (int, long, float, str, bool, unicode) complex_primitives = frozenset(["if", "while", "assign", "call", "break", "continue", "return","resolve","access", "constant", "input", "output", "declare", "global"]) def instance_to_string(value): return value["value"] def string_to_instance(value): return {'value': value} class ModelverseState(object): def __init__(self, bootfile = None): self.free_id = 0 self.edges = {} self.outgoing = {} self.incoming = {} self.values = {} self.nodes = set() self.GC = True self.to_delete = set() self.cache = {} self.cache_node = {} if bootfile is not None: self.root = self.parse(bootfile) else: self.root = self.create_node() def dump_modelverse(self): with open("/tmp/modelverse.out", "w") as f: f.write("digraph main {\n") for n in self.nodes: if n in self.values: f.write("a_%s [label=\"a_%s (%s)\"];\n" % (n, n, self.values[n])) else: f.write("a_%s [label=\"a_%s\"];\n" % (n, n)) for i, e in self.edges.iteritems(): f.write("%s -> %s [label=\"%s\"];\n" % (e[0], e[1], i)) f.write("}") return self.root def parse(self, filename): picklefile = filename + ".pickle" try: if os.path.getmtime(picklefile) > os.path.getmtime(filename): # Pickle is more recent than bootstrap file, so we can use it self.root, self.free_id, self.nodes, self.edges, self.values, self.cache, self.cache_node = pickle.load(open(picklefile, 'rb')) for name in self.edges: source, destination = self.edges[name] self.outgoing.setdefault(source, set()).add(name) self.incoming.setdefault(destination, set()).add(name) return self.root else: raise Exception("Invalid pickle") except Exception as e: # We have to parse the file and create the pickle symbols = {} def resolve(symb): try: return int(symb) except: if symb[0] == "?": derefs = symb[1:].split("/") v = self.read_dict(symbols["root"], "__hierarchy") for deref in derefs: v = self.read_dict(v, deref) return v else: return symbols[symb] with gzip.open(filename, 'rb') as f: for line in f: element_type, constructor = line.split(None, 1) name, values = constructor.split("(", 1) values, _ = values.rsplit(")", 1) if element_type == "Node": name = name.split()[0] if values == "": symbols[name] = self.create_node() else: value = values if value in complex_primitives: value = string_to_instance(value) else: value = eval(value) symbols[name] = self.create_nodevalue(value) elif element_type == "Edge": name = name.split()[0] values = [v.split()[0] for v in values.split(",")] symbols[name] = self.create_edge(resolve(values[0]), resolve(values[1])) elif element_type == "Dict": values = [v.split()[0] for v in values.split(",")] if values[1] in complex_primitives: values[1] = string_to_instance(values[1]) else: values[1] = eval(values[1]) self.create_dict(resolve(values[0]), values[1], resolve(values[2])) else: raise Exception("Unknown element type: %s" % element_type) # Creation successful, now also create a pickle with open(picklefile, 'wb') as f: pickle.dump((symbols["root"], self.free_id, self.nodes, self.edges, self.values, self.cache, self.cache_node), f, pickle.HIGHEST_PROTOCOL) return symbols["root"] def read_root(self): return self.root def create_node(self): self.nodes.add(self.free_id) self.free_id += 1 return self.free_id - 1 def create_edge(self, source, target): if source not in self.edges and source not in self.nodes: return None elif target not in self.edges and target not in self.nodes: return None else: self.outgoing.setdefault(source, set()).add(self.free_id) self.incoming.setdefault(target, set()).add(self.free_id) self.edges[self.free_id] = (source, target) self.free_id += 1 if source in self.edges: # We are creating something dict_readable # Fill in the cache already! dict_source, dict_target = self.edges[source] if target in self.values: self.cache.setdefault(dict_source, {})[self.values[target]] = source self.cache_node.setdefault(dict_source, {})[target] = source return self.free_id - 1 def is_valid_datavalue(self, value): if isinstance(value, dict): if "value" in value and value["value"] in complex_primitives: return True else: return False elif not isinstance(value, primitive_types): return False elif isinstance(value, integer_types) and not (-2**63 <= value <= 2**64 - 1): return False return True def create_nodevalue(self, value): if not self.is_valid_datavalue(value): return None self.values[self.free_id] = value self.nodes.add(self.free_id) self.free_id += 1 return self.free_id - 1 def create_dict(self, source, data, destination): if source not in self.nodes and source not in self.edges: return None elif destination not in self.nodes and destination not in self.edges: return None elif not self.is_valid_datavalue(data): return None else: n = self.create_nodevalue(data) e = self.create_edge(source, destination) self.create_edge(e, n) self.cache.setdefault(source, {})[data] = e self.cache_node.setdefault(source, {})[n] = e def read_value(self, node): if node in self.values: return self.values[node] else: return None def read_outgoing(self, elem): if elem in self.edges or elem in self.nodes: if elem in self.outgoing: return list(self.outgoing[elem]) else: return [] return None def read_incoming(self, elem): if elem in self.edges or elem in self.nodes: if elem in self.incoming: return list(self.incoming[elem]) else: return [] else: return None def read_edge(self, edge): if edge in self.edges: return [self.edges[edge][0], self.edges[edge][1]] else: return [None, None] def read_dict(self, node, value): try: first = self.cache[node][value] # Got hit, so validate if (self.edges[first][0] == node) and \ (value in [self.values[self.edges[i][1]] for i in self.outgoing[first]]): return self.edges[first][1] # Hit but invalid now del self.cache[node][value] except KeyError: # Didn't exist pass return None def read_dict_keys(self, node): if node not in self.nodes and node not in self.edges: return None result = [] if node in self.outgoing: for e1 in self.outgoing[node]: if e1 in self.outgoing: for e2 in self.outgoing[e1]: result.append(self.edges[e2][1]) #NOTE cannot just use the cache here, as some keys in the cache might not actually exist; we would have to check all of them anyway return result def read_dict_edge(self, node, value): try: first = self.cache[node][value] # Got hit, so validate if (self.edges[first][0] == node) and \ (value in [self.values[self.edges[i][1]] for i in self.outgoing[first]]): return first # Hit but invalid now del self.cache[node][value] except KeyError: # Didn't exist pass return None def read_dict_node(self, node, value_node): e = self.read_dict_node_edge(node, value_node) if e is None: return None else: self.cache_node.setdefault(node, {})[value_node] = e return self.edges[e][1] def read_dict_node_edge(self, node, value_node): try: first = self.cache_node[node][value_node] # Got hit, so validate if (self.edges[first][0] == node) and \ (value_node in [self.edges[i][1] for i in self.outgoing[first]]): return first # Hit but invalid now del self.cache_node[node][value_node] except KeyError: # Didn't exist pass return None def read_reverse_dict(self, node, value): if node not in self.nodes and node not in self.edges: return None elif not self.is_valid_datavalue(value): return None # Get all outgoing links matches = [] if node in self.incoming: for e1 in self.incoming[node]: # For each link, we read the links that might link to a data value if e1 in self.outgoing: for e2 in self.outgoing[e1]: # Now read out the target of the link target = self.edges[e2][1] # And access its value if target in self.values and self.values[target] == value: # Found a match if len(self.outgoing[e1]) > 1: return None else: matches.append(e1) if len(matches) == 0: return None else: return [self.edges[e][0] for e in matches] def delete_node(self, node): if node == self.root: return None elif node not in self.nodes: return None self.nodes.remove(node) if node in self.values: del self.values[node] s = set() if node in self.outgoing: for e in self.outgoing[node]: s.add(e) del self.outgoing[node] if node in self.incoming: for e in self.incoming[node]: s.add(e) del self.incoming[node] for e in s: self.delete_edge(e) if node in self.outgoing: del self.outgoing[node] if node in self.incoming: del self.incoming[node] return None def delete_edge(self, edge): if edge not in self.edges: return None s, t = self.edges[edge] if t in self.incoming: self.incoming[t].remove(edge) if s in self.outgoing: self.outgoing[s].remove(edge) del self.edges[edge] s = set() if edge in self.outgoing: for e in self.outgoing[edge]: s.add(e) if edge in self.incoming: for e in self.incoming[edge]: s.add(e) for e in s: self.delete_edge(e) if edge in self.outgoing: del self.outgoing[edge] if edge in self.incoming: del self.incoming[edge] if (self.GC) and (t in self.incoming and not self.incoming[t]) and (t not in self.edges): # Remove this node as well # Edges aren't deleted like this, as they might have a reachable target and source! # If they haven't, they will be removed because the source was removed. self.to_delete.add(t) def garbage_collect(self): while self.to_delete: t = self.to_delete.pop() if t in self.incoming and not self.incoming[t]: self.delete_node(t) def purge(self): self.garbage_collect() values = set(self.edges) values.update(self.nodes) visit_list = [self.root] while visit_list: elem = visit_list.pop() if elem in values: # Remove it from the leftover values values.remove(elem) if elem in self.edges: visit_list.extend(self.edges[elem]) if elem in self.outgoing: visit_list.extend(self.outgoing[elem]) if elem in self.incoming: visit_list.extend(self.incoming[elem]) dset = set() for key in self.cache: if key not in self.nodes and key not in self.edges: dset.add(key) for key in dset: del self.cache[key] dset = set() for key in self.cache_node: if key not in self.nodes and key not in self.edges: dset.add(key) for key in dset: del self.cache_node[key] # All remaining elements are to be purged if len(values) > 0: while values: v = values.pop() if v in self.nodes: self.delete_node(v)