from modelverse_state import status import sys from collections import defaultdict import os import gzip import cPickle as pickle # Work around Python 2 where a 'big integer' automatically becomes a long if sys.version > '3': # pragma: no cover integer_types = (int,) primitive_types = (int, float, str, bool) else: # pragma: no cover integer_types = (int, long) primitive_types = (int, long, float, str, bool, unicode) complex_primitives = frozenset(["if", "while", "assign", "call", "break", "continue", "return","resolve","access", "constant", "input", "output", "declare", "global"]) def instance_to_string(value): return value["value"] def string_to_instance(value): return {'value': value} class ModelverseState(object): def __init__(self, bootfile = None): self.free_id = 0 self.edges = {} self.outgoing = defaultdict(set) self.incoming = defaultdict(set) self.values = {} self.nodes = set() self.GC = True self.to_delete = set() self.cache = {} if bootfile is not None: self.root = self.parse(bootfile) else: self.root, _ = self.create_node() def dump_modelverse(self): with open("/tmp/modelverse.out", "w") as f: f.write("digraph main {\n") for n in self.nodes: if n in self.values: f.write("a_%s [label=\"a_%s (%s)\"];\n" % (n, n, self.values[n])) else: f.write("a_%s [label=\"a_%s\"];\n" % (n, n)) for i, e in self.edges.iteritems(): f.write("%s -> %s [label=\"%s\"];\n" % (e[0], e[1], i)) f.write("}") return (self.root, status.SUCCESS) def parse(self, filename): picklefile = filename + ".pickle" try: if os.path.getmtime(picklefile) > os.path.getmtime(filename): # Pickle is more recent than grammarfile, so we can use it self.root, self.free_id, self.nodes, self.edges, self.values = pickle.load(open(picklefile, 'rb')) for name in self.edges: source, destination = self.edges[name] self.outgoing[source].add(name) self.incoming[destination].add(name) return self.root else: raise Exception("Invalid pickle") except Exception as e: # We have to parse the file and create the pickle symbols = {} def resolve(symb): try: return int(symb) except: if symb[0] == "?": derefs = symb[1:].split("/") v, _ = self.read_dict(symbols["root"], "__hierarchy") for deref in derefs: v, _ = self.read_dict(v, deref) return v else: return symbols[symb] with gzip.open(filename, 'rb') as f: for line in f: element_type, constructor = line.split(None, 1) name, values = constructor.split("(", 1) name = name.split()[0] values, _ = values.rsplit(")", 1) if element_type == "Node": if values == "": symbols[name], status = self.create_node() else: value = values if value in complex_primitives: value = string_to_instance(value) else: value = eval(value) symbols[name], status = self.create_nodevalue(value) elif element_type == "Edge": values = [v.split()[0] for v in values.split(",")] symbols[name], status = self.create_edge(resolve(values[0]), resolve(values[1])) else: raise Exception("Unknown element type: %s" % element_type) if status != 100: raise Exception("Failed to process line for reason %s: %s" % (status, line)) # Creation successful, now also create a pickle with open(picklefile, 'wb') as f: pickle.dump((symbols["root"], self.free_id, self.nodes, self.edges, self.values), f, pickle.HIGHEST_PROTOCOL) return symbols["root"] def read_root(self): return (self.root, status.SUCCESS) def create_node(self): self.nodes.add(self.free_id) self.free_id += 1 return (self.free_id - 1, status.SUCCESS) def create_edge(self, source, target): if source not in self.edges and source not in self.nodes: return (None, status.FAIL_CE_SOURCE) elif target not in self.edges and target not in self.nodes: return (None, status.FAIL_CE_TARGET) else: self.outgoing[source].add(self.free_id) self.incoming[target].add(self.free_id) self.edges[self.free_id] = (source, target) self.free_id += 1 return (self.free_id - 1, status.SUCCESS) def is_valid_datavalue(self, value): if isinstance(value, dict): if "value" in value and value["value"] in complex_primitives: return True else: return False elif not isinstance(value, primitive_types): return False elif isinstance(value, integer_types) and not (-2**63 <= value <= 2**64 - 1): return False return True def create_nodevalue(self, value): if not self.is_valid_datavalue(value): print("Not correct: " + str(value)) return (None, status.FAIL_CNV_OOB) self.values[self.free_id] = value self.nodes.add(self.free_id) self.free_id += 1 return (self.free_id - 1, status.SUCCESS) def create_dict(self, source, data, destination): if source not in self.nodes and source not in self.edges: return (None, status.FAIL_CDICT_SOURCE) if destination not in self.nodes and destination not in self.edges: return (None, status.FAIL_CDICT_TARGET) if not self.is_valid_datavalue(data): return (None, status.FAIL_CDICT_OOB) if (type(data) == float and data == 0.0): print("Got dictionary with value 0.0") print(locals()) print(self.values.get(destination, destination)) raise Exception() n = self.create_nodevalue(data)[0] e = self.create_edge(source, destination)[0] self.create_edge(e, n) self.cache.setdefault(source, {})[data] = e return (None, status.SUCCESS) def read_value(self, node): if node not in self.nodes: return (None, status.FAIL_RV_UNKNOWN) v = self.values.get(node, None) if v is None: return (None, status.FAIL_RV_NO_VALUE) else: return (v, status.SUCCESS) def read_outgoing(self, elem): if elem in self.edges or elem in self.nodes: return (list(self.outgoing[elem]), status.SUCCESS) else: return (None, status.FAIL_RO_UNKNOWN) def read_incoming(self, elem): if elem in self.edges or elem in self.nodes: return (list(self.incoming[elem]), status.SUCCESS) else: return (None, status.FAIL_RI_UNKNOWN) def read_edge(self, edge): v = self.edges.get(edge, None) if v is None: return ([None, None], status.FAIL_RE_UNKNOWN) else: s, t = v return ([s, t], status.SUCCESS) def read_dict(self, node, value): e, s = self.read_dict_edge(node, value) if s != status.SUCCESS: return (None, {status.FAIL_RDICTE_UNKNOWN: status.FAIL_RDICT_UNKNOWN, status.FAIL_RDICTE_UNCERTAIN: status.FAIL_RDICT_UNCERTAIN, status.FAIL_RDICTE_OOB: status.FAIL_RDICT_OOB, status.FAIL_RDICTE_NOT_FOUND: status.FAIL_RDICT_NOT_FOUND, status.FAIL_RDICTE_AMBIGUOUS: status.FAIL_RDICT_AMBIGUOUS}[s]) return (self.edges[e][1], status.SUCCESS) def read_dict_keys(self, node): if node not in self.nodes and node not in self.edges: return (None, status.FAIL_RDICTKEYS_UNKNOWN) result = [] for e1 in self.outgoing.get(node, set()): data_links = self.outgoing.get(e1, set()) for e2 in data_links: result.append(self.edges[e2][1]) return (result, status.SUCCESS) def read_dict_edge(self, node, value): try: first = self.cache[node][str(value)] # Got hit, so validate if (self.edges[first][0] == node) and \ (len(self.outgoing[first]) == 1) and \ (self.values[self.edges[list(self.outgoing[first])[0]][1]] == value): return (first, status.SUCCESS) del self.cache[node][value] except KeyError: # Didn't exist pass if node not in self.nodes and node not in self.edges: return (None, status.FAIL_RDICTE_UNKNOWN) if not self.is_valid_datavalue(value): return (None, status.FAIL_RDICTE_OOB) # Get all outgoing links found = None for e1 in self.outgoing.get(node, set()): data_links = self.outgoing.get(e1, set()) # For each link, we read the links that might link to a data value for e2 in data_links: # Now read out the target of the link target = self.edges[e2][1] # And access its value v = self.values.get(target, None) if v == value: # Found a match # Now get the target of the original link if found is not None: print("Duplicate key on value: %s : %s (%s <-> %s)!" % (v, type(v), found, e1)) return (None, status.FAIL_RDICTE_AMBIGUOUS) found = e1 self.cache.setdefault(node, {})[value] = e1 if found is not None: return (found, status.SUCCESS) else: return (None, status.FAIL_RDICTE_NOT_FOUND) def read_dict_node(self, node, value_node): e, s = self.read_dict_node_edge(node, value_node) if s != status.SUCCESS: return (None, {status.FAIL_RDICTNE_UNKNOWN: status.FAIL_RDICTN_UNKNOWN, status.FAIL_RDICTNE_UNCERTAIN: status.FAIL_RDICTN_UNCERTAIN, status.FAIL_RDICTNE_AMBIGUOUS: status.FAIL_RDICTN_AMBIGUOUS, status.FAIL_RDICTNE_OOB: status.FAIL_RDICTN_OOB, status.FAIL_RDICTNE_NOT_FOUND: status.FAIL_RDICTN_NOT_FOUND}[s]) return (self.edges[e][1], status.SUCCESS) def read_dict_node_edge(self, node, value_node): if node not in self.nodes and node not in self.edges: return (None, status.FAIL_RDICTNE_UNKNOWN) # Get all outgoing links found = None for e1 in self.outgoing.get(node, set()): data_links = self.outgoing.get(e1, set()) # For each link, we read the links that might link to a data value for e2 in data_links: # Now read out the target of the link target = self.edges[e2][1] # And access its value if target == value_node: # Found a match # Now get the target of the original link if found is not None: print("Duplicate key on node: %s (%s <-> %s)!" % (value_node, found, e1)) return (None, status.FAIL_RDICTNE_AMBIGUOUS) found = e1 if found is not None: return (found, status.SUCCESS) else: return (None, status.FAIL_RDICTNE_NOT_FOUND) def read_reverse_dict(self, node, value): if node not in self.nodes and node not in self.edges: return (None, status.FAIL_RRDICT_UNKNOWN) elif not self.is_valid_datavalue(value): return (None, status.FAIL_RRDICT_OOB) # Get all outgoing links matches = [] for e1 in self.incoming.get(node, set()): data_links = self.outgoing.get(e1, set()) # For each link, we read the links that might link to a data value for e2 in data_links: # Now read out the target of the link target = self.edges[e2][1] # And access its value v = self.values.get(target, None) if v == value: # Found a match if len(data_links) > 1: return (None, status.FAIL_RRDICT_UNCERTAIN) else: matches.append(e1) if len(matches) == 0: return (None, status.FAIL_RRDICT_NOT_FOUND) else: return ([self.edges[e][0] for e in matches], status.SUCCESS) def delete_node(self, node): if node == self.root: return (None, status.FAIL_DN_UNKNOWN) if node not in self.nodes: return (None, status.FAIL_DN_UNKNOWN) self.nodes.remove(node) if node in self.cache: del self.cache[node] if node in self.values: del self.values[node] s = set() for e in self.outgoing[node]: s.add(e) for e in self.incoming[node]: s.add(e) for e in s: self.delete_edge(e) del self.outgoing[node] del self.incoming[node] return (None, status.SUCCESS) def delete_edge(self, edge): if edge not in self.edges: return (None, status.FAIL_DE_UNKNOWN) s, t = self.edges[edge] self.incoming[t].remove(edge) self.outgoing[s].remove(edge) del self.edges[edge] s = set() for e in self.outgoing[edge]: s.add(e) for e in self.incoming[edge]: s.add(e) for e in s: self.delete_edge(e) del self.outgoing[edge] del self.incoming[edge] if (self.GC) and (not self.incoming[t]) and (t not in self.edges): # Remove this node as well # Edges aren't deleted like this, as they might have a reachable target and source! # If they haven't, they will be removed because the source was removed. self.to_delete.add(t) return (None, status.SUCCESS) def garbage_collect(self): while self.to_delete: t = self.to_delete.pop() if not self.incoming[t]: self.delete_node(t) def purge(self): self.garbage_collect() values = set(self.nodes) | set(self.edges) visit_list = [self.root] while visit_list: elem = visit_list.pop() if elem in values: # Remove it from the leftover values values.remove(elem) if elem in self.edges: visit_list.extend(self.edges[elem]) visit_list.extend(self.outgoing[elem]) visit_list.extend(self.incoming[elem]) # All remaining elements are to be purged if len(values) > 0: while values: v = values.pop() if v in self.nodes: self.delete_node(v) #print("Remaining nodes: " + str(len(self.nodes)))