123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418 |
- import sys
- from collections import defaultdict
- import os
- import gzip
- import time
- import marshal
- # Work around Python 2 where a 'big integer' automatically becomes a long
- if sys.version > '3': # pragma: no cover
- integer_types = (int,)
- primitive_types = (int, float, str, bool)
- else: # pragma: no cover
- integer_types = (int, long)
- primitive_types = (int, long, float, str, bool, unicode)
- complex_primitives = frozenset(["if", "while", "assign", "call", "break", "continue", "return","resolve","access", "constant", "input", "output", "declare", "global", "none"])
- def instance_to_string(value):
- return value["value"]
- def string_to_instance(value):
- return {'value': value}
- class ModelverseState(object):
- def __init__(self, bootfile = None):
- self.free_id = 0
- self.edges = {}
- self.outgoing = {}
- self.incoming = {}
- self.values = {}
- self.nodes = set()
- self.GC = True
- self.to_delete = set()
- self.cache = {}
- self.cache_node = {}
- if bootfile is not None:
- self.root = self.parse(bootfile)
- else:
- self.root = self.create_node()
- def dump_modelverse(self):
- with open("/tmp/modelverse.out", "w") as f:
- f.write("digraph main {\n")
- for n in self.nodes:
- if n in self.values:
- f.write("a_%s [label=\"a_%s (%s)\"];\n" % (n, n, self.values[n]))
- else:
- f.write("a_%s [label=\"a_%s\"];\n" % (n, n))
- for i, e in list(self.edges.items()):
- f.write("%s -> %s [label=\"%s\"];\n" % (e[0], e[1], i))
- f.write("}")
- return self.root
- def parse(self, filename):
- picklefile = filename + ".pickle"
- try:
- if os.path.getmtime(picklefile) > os.path.getmtime(filename):
- # Pickle is more recent than bootstrap file, so we can use it
- self.root, self.free_id, self.nodes, self.edges, self.values, self.cache, self.cache_node = marshal.load(open(picklefile, 'rb'))
- for name in self.edges:
- source, destination = self.edges[name]
- self.outgoing.setdefault(source, set()).add(name)
- self.incoming.setdefault(destination, set()).add(name)
- return self.root
- else:
- raise Exception("Invalid pickle")
- except Exception as e:
- # We have to parse the file and create the pickle
- symbols = {}
- def resolve(symb):
- try:
- return int(symb)
- except:
- if symb[0] == "?":
- derefs = symb[1:].split("/")
- v = self.read_dict(symbols["root"], "__hierarchy")
- for deref in derefs:
- v = self.read_dict(v, deref)
- return v
- else:
- return symbols[symb]
- with gzip.open(filename, 'rb') as f:
- for line in f:
- #handle str to bytes
- if sys.version_info[0] > 2:
- line = line.decode()
- element_type, constructor = line.split(None, 1)
- name, values = constructor.split("(", 1)
- values, _ = values.rsplit(")", 1)
- if element_type == "Node":
- name = name.split()[0]
- if values == "":
- symbols[name] = self.create_node()
- else:
- value = values
- if value in complex_primitives:
- value = string_to_instance(value)
- else:
- value = eval(value)
- symbols[name] = self.create_nodevalue(value)
- elif element_type == "Edge":
- name = name.split()[0]
- values = [v.split()[0] for v in values.split(",")]
- symbols[name] = self.create_edge(resolve(values[0]), resolve(values[1]))
- elif element_type == "Dict":
- values = [v.split()[0] for v in values.split(",")]
- if values[1] in complex_primitives:
- values[1] = string_to_instance(values[1])
- else:
- values[1] = eval(values[1])
- self.create_dict(resolve(values[0]), values[1], resolve(values[2]))
- else:
- raise Exception("Unknown element type: %s" % element_type)
- # Creation successful, now also create a pickle
- with open(picklefile, 'wb') as f:
- marshal.dump((symbols["root"], self.free_id, self.nodes, self.edges, self.values, self.cache, self.cache_node), f)
- return symbols["root"]
- def read_root(self):
- return self.root
- def create_node(self):
- self.nodes.add(self.free_id)
- self.free_id += 1
- return self.free_id - 1
- def create_edge(self, source, target):
- if source not in self.edges and source not in self.nodes:
- return None
- elif target not in self.edges and target not in self.nodes:
- return None
- else:
- self.outgoing.setdefault(source, set()).add(self.free_id)
- self.incoming.setdefault(target, set()).add(self.free_id)
- self.edges[self.free_id] = (source, target)
- self.free_id += 1
- if source in self.edges:
- # We are creating something dict_readable
- # Fill in the cache already!
- dict_source, dict_target = self.edges[source]
- if target in self.values:
- self.cache.setdefault(dict_source, {})[self.values[target]] = source
- self.cache_node.setdefault(dict_source, {})[target] = source
- return self.free_id - 1
- def is_valid_datavalue(self, value):
- if isinstance(value, dict):
- if "value" in value and value["value"] in complex_primitives:
- return True
- else:
- return False
- elif not isinstance(value, primitive_types):
- return False
- elif isinstance(value, integer_types) and not (-2**63 <= value <= 2**63 - 1):
- return False
- return True
- def create_nodevalue(self, value):
- if not self.is_valid_datavalue(value):
- return None
- self.values[self.free_id] = value
- self.nodes.add(self.free_id)
- self.free_id += 1
- return self.free_id - 1
- def create_dict(self, source, data, destination):
- if isinstance(destination, dict):
- print("ERROR IN CD: " + str(locals()))
- if source not in self.nodes and source not in self.edges:
- return None
- elif destination not in self.nodes and destination not in self.edges:
- return None
- elif not self.is_valid_datavalue(data):
- return None
- else:
- n = self.create_nodevalue(data)
- e = self.create_edge(source, destination)
- self.create_edge(e, n)
- self.cache.setdefault(source, {})[data] = e
- self.cache_node.setdefault(source, {})[n] = e
- def read_value(self, node):
- if node in self.values:
- return self.values[node]
- else:
- return None
- def read_outgoing(self, elem):
- if elem in self.edges or elem in self.nodes:
- if elem in self.outgoing:
- return list(self.outgoing[elem])
- else:
- return []
- return None
- def read_incoming(self, elem):
- if elem in self.edges or elem in self.nodes:
- if elem in self.incoming:
- return list(self.incoming[elem])
- else:
- return []
- else:
- return None
- def read_edge(self, edge):
- if edge in self.edges:
- return [self.edges[edge][0], self.edges[edge][1]]
- else:
- return [None, None]
- def read_dict(self, node, value):
- #print("RD -- " + str(value))
- try:
- first = self.cache[node][value]
- # Got hit, so validate
- if (self.edges[first][0] == node) and \
- (value in [self.values[self.edges[i][1]] for i in self.outgoing[first] if self.edges[i][1] in self.values]):
- return self.edges[first][1]
- # Hit but invalid now
- del self.cache[node][value]
- except KeyError:
- # Didn't exist
- pass
- except:
- raise
- return None
- def read_dict_keys(self, node):
- if node not in self.nodes and node not in self.edges:
- return None
- result = []
- #NOTE cannot just use the cache here, as some keys in the cache might not actually exist; we would have to check all of them anyway
- if node in self.outgoing:
- for e1 in self.outgoing[node]:
- if e1 in self.outgoing:
- for e2 in self.outgoing[e1]:
- result.append(self.edges[e2][1])
- return result
- def read_dict_edge(self, node, value):
- try:
- first = self.cache[node][value]
- # Got hit, so validate
- if (self.edges[first][0] == node) and \
- (value in [self.values[self.edges[i][1]] for i in self.outgoing[first] if self.edges[i][1] in self.values]):
- return first
- # Hit but invalid now
- del self.cache[node][value]
- except KeyError:
- # Didn't exist
- pass
- return None
- def read_dict_node(self, node, value_node):
- e = self.read_dict_node_edge(node, value_node)
- if e is None:
- return None
- else:
- self.cache_node.setdefault(node, {})[value_node] = e
- return self.edges[e][1]
- def read_dict_node_edge(self, node, value_node):
- try:
- first = self.cache_node[node][value_node]
- # Got hit, so validate
- if (self.edges[first][0] == node) and \
- (value_node in [self.edges[i][1] for i in self.outgoing[first]]):
- return first
- # Hit but invalid now
- del self.cache_node[node][value_node]
- except KeyError:
- # Didn't exist
- pass
- return None
- def read_reverse_dict(self, node, value):
- if node not in self.nodes and node not in self.edges:
- return None
- elif not self.is_valid_datavalue(value):
- return None
- # Get all outgoing links
- matches = []
- if node in self.incoming:
- for e1 in self.incoming[node]:
- # For each link, we read the links that might link to a data value
- if e1 in self.outgoing:
- for e2 in self.outgoing[e1]:
- # Now read out the target of the link
- target = self.edges[e2][1]
- # And access its value
- if target in self.values and self.values[target] == value:
- # Found a match
- matches.append(e1)
- return [self.edges[e][0] for e in matches]
- def delete_node(self, node):
- if node == self.root:
- return None
- elif node not in self.nodes:
- return None
- self.nodes.remove(node)
- if node in self.values:
- del self.values[node]
- s = set()
- if node in self.outgoing:
- for e in self.outgoing[node]:
- s.add(e)
- del self.outgoing[node]
- if node in self.incoming:
- for e in self.incoming[node]:
- s.add(e)
- del self.incoming[node]
- for e in s:
- self.delete_edge(e)
- if node in self.outgoing:
- del self.outgoing[node]
- if node in self.incoming:
- del self.incoming[node]
- return None
- def delete_edge(self, edge):
- if edge not in self.edges:
- return None
- s, t = self.edges[edge]
- if t in self.incoming:
- self.incoming[t].remove(edge)
- if s in self.outgoing:
- self.outgoing[s].remove(edge)
- del self.edges[edge]
- s = set()
- if edge in self.outgoing:
- for e in self.outgoing[edge]:
- s.add(e)
- if edge in self.incoming:
- for e in self.incoming[edge]:
- s.add(e)
- for e in s:
- self.delete_edge(e)
- if edge in self.outgoing:
- del self.outgoing[edge]
- if edge in self.incoming:
- del self.incoming[edge]
- if (self.GC) and (t in self.incoming and not self.incoming[t]) and (t not in self.edges):
- # Remove this node as well
- # Edges aren't deleted like this, as they might have a reachable target and source!
- # If they haven't, they will be removed because the source was removed.
- self.to_delete.add(t)
- def purge(self):
- initial_nodes = len(self.nodes)
- initial_edges = len(self.edges)
- while self.to_delete:
- t = self.to_delete.pop()
- if t in self.incoming and not self.incoming[t]:
- self.delete_node(t)
- values = set(self.edges)
- values.update(self.nodes)
- visit_list = [self.root]
- while visit_list:
- elem = visit_list.pop()
- if elem in values:
- # Remove it from the leftover values
- values.remove(elem)
- if elem in self.edges:
- visit_list.extend(self.edges[elem])
- if elem in self.outgoing:
- visit_list.extend(self.outgoing[elem])
- if elem in self.incoming:
- visit_list.extend(self.incoming[elem])
- dset = set()
- for key in self.cache:
- if key not in self.nodes and key not in self.edges:
- dset.add(key)
- for key in dset:
- del self.cache[key]
- dset = set()
- for key in self.cache_node:
- if key not in self.nodes and key not in self.edges:
- dset.add(key)
- for key in dset:
- del self.cache_node[key]
- # All remaining elements are to be purged
- if len(values) > 0:
- while values:
- v = values.pop()
- if v in self.nodes:
- self.delete_node(v)
|