main.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. import sys
  2. from collections import defaultdict
  3. import os
  4. import gzip
  5. import time
  6. # Work around Python 2 where a 'big integer' automatically becomes a long
  7. if sys.version > '3': # pragma: no cover
  8. import pickle as pickle
  9. integer_types = (int,)
  10. primitive_types = (int, float, str, bool)
  11. else: # pragma: no cover
  12. import cPickle as pickle
  13. integer_types = (int, long)
  14. primitive_types = (int, long, float, str, bool, unicode)
  15. complex_primitives = frozenset(["if", "while", "assign", "call", "break", "continue", "return","resolve","access", "constant", "input", "output", "declare", "global", "none"])
  16. def instance_to_string(value):
  17. return value["value"]
  18. def string_to_instance(value):
  19. return {'value': value}
  20. class ModelverseState(object):
  21. def __init__(self, bootfile = None):
  22. self.free_id = 0
  23. self.edges = {}
  24. self.outgoing = {}
  25. self.incoming = {}
  26. self.values = {}
  27. self.nodes = set()
  28. self.GC = True
  29. self.to_delete = set()
  30. self.cache = {}
  31. self.cache_node = {}
  32. if bootfile is not None:
  33. self.root = self.parse(bootfile)
  34. else:
  35. self.root = self.create_node()
  36. def dump_modelverse(self):
  37. with open("/tmp/modelverse.out", "w") as f:
  38. f.write("digraph main {\n")
  39. for n in self.nodes:
  40. if n in self.values:
  41. f.write("a_%s [label=\"a_%s (%s)\"];\n" % (n, n, self.values[n]))
  42. else:
  43. f.write("a_%s [label=\"a_%s\"];\n" % (n, n))
  44. for i, e in list(self.edges.items()):
  45. f.write("%s -> %s [label=\"%s\"];\n" % (e[0], e[1], i))
  46. f.write("}")
  47. return self.root
  48. def parse(self, filename):
  49. picklefile = filename + ".pickle"
  50. try:
  51. if os.path.getmtime(picklefile) > os.path.getmtime(filename):
  52. # Pickle is more recent than bootstrap file, so we can use it
  53. self.root, self.free_id, self.nodes, self.edges, self.values, self.cache, self.cache_node = pickle.load(open(picklefile, 'rb'))
  54. for name in self.edges:
  55. source, destination = self.edges[name]
  56. self.outgoing.setdefault(source, set()).add(name)
  57. self.incoming.setdefault(destination, set()).add(name)
  58. return self.root
  59. else:
  60. raise Exception("Invalid pickle")
  61. except Exception as e:
  62. # We have to parse the file and create the pickle
  63. symbols = {}
  64. def resolve(symb):
  65. try:
  66. return int(symb)
  67. except:
  68. if symb[0] == "?":
  69. derefs = symb[1:].split("/")
  70. v = self.read_dict(symbols["root"], "__hierarchy")
  71. for deref in derefs:
  72. v = self.read_dict(v, deref)
  73. return v
  74. else:
  75. return symbols[symb]
  76. with gzip.open(filename, 'rb') as f:
  77. for line in f:
  78. #handle str to bytes
  79. if sys.version_info[0] > 2:
  80. line = line.decode()
  81. element_type, constructor = line.split(None, 1)
  82. name, values = constructor.split("(", 1)
  83. values, _ = values.rsplit(")", 1)
  84. if element_type == "Node":
  85. name = name.split()[0]
  86. if values == "":
  87. symbols[name] = self.create_node()
  88. else:
  89. value = values
  90. if value in complex_primitives:
  91. value = string_to_instance(value)
  92. else:
  93. value = eval(value)
  94. symbols[name] = self.create_nodevalue(value)
  95. elif element_type == "Edge":
  96. name = name.split()[0]
  97. values = [v.split()[0] for v in values.split(",")]
  98. symbols[name] = self.create_edge(resolve(values[0]), resolve(values[1]))
  99. elif element_type == "Dict":
  100. values = [v.split()[0] for v in values.split(",")]
  101. if values[1] in complex_primitives:
  102. values[1] = string_to_instance(values[1])
  103. else:
  104. values[1] = eval(values[1])
  105. self.create_dict(resolve(values[0]), values[1], resolve(values[2]))
  106. else:
  107. raise Exception("Unknown element type: %s" % element_type)
  108. # Creation successful, now also create a pickle
  109. with open(picklefile, 'wb') as f:
  110. pickle.dump((symbols["root"], self.free_id, self.nodes, self.edges, self.values, self.cache, self.cache_node), f, pickle.HIGHEST_PROTOCOL)
  111. return symbols["root"]
  112. def read_root(self):
  113. return self.root
  114. def create_node(self):
  115. self.nodes.add(self.free_id)
  116. self.free_id += 1
  117. return self.free_id - 1
  118. def create_edge(self, source, target):
  119. if source not in self.edges and source not in self.nodes:
  120. return None
  121. elif target not in self.edges and target not in self.nodes:
  122. return None
  123. else:
  124. self.outgoing.setdefault(source, set()).add(self.free_id)
  125. self.incoming.setdefault(target, set()).add(self.free_id)
  126. self.edges[self.free_id] = (source, target)
  127. self.free_id += 1
  128. if source in self.edges:
  129. # We are creating something dict_readable
  130. # Fill in the cache already!
  131. dict_source, dict_target = self.edges[source]
  132. if target in self.values:
  133. self.cache.setdefault(dict_source, {})[self.values[target]] = source
  134. self.cache_node.setdefault(dict_source, {})[target] = source
  135. return self.free_id - 1
  136. def is_valid_datavalue(self, value):
  137. if isinstance(value, dict):
  138. if "value" in value and value["value"] in complex_primitives:
  139. return True
  140. else:
  141. return False
  142. elif not isinstance(value, primitive_types):
  143. return False
  144. elif isinstance(value, integer_types) and not (-2**63 <= value <= 2**63 - 1):
  145. return False
  146. return True
  147. def create_nodevalue(self, value):
  148. if not self.is_valid_datavalue(value):
  149. return None
  150. self.values[self.free_id] = value
  151. self.nodes.add(self.free_id)
  152. self.free_id += 1
  153. return self.free_id - 1
  154. def create_dict(self, source, data, destination):
  155. if source not in self.nodes and source not in self.edges:
  156. return None
  157. elif destination not in self.nodes and destination not in self.edges:
  158. return None
  159. elif not self.is_valid_datavalue(data):
  160. return None
  161. else:
  162. n = self.create_nodevalue(data)
  163. e = self.create_edge(source, destination)
  164. self.create_edge(e, n)
  165. self.cache.setdefault(source, {})[data] = e
  166. self.cache_node.setdefault(source, {})[n] = e
  167. def read_value(self, node):
  168. if node in self.values:
  169. return self.values[node]
  170. else:
  171. return None
  172. def read_outgoing(self, elem):
  173. if elem in self.edges or elem in self.nodes:
  174. if elem in self.outgoing:
  175. return list(self.outgoing[elem])
  176. else:
  177. return []
  178. return None
  179. def read_incoming(self, elem):
  180. if elem in self.edges or elem in self.nodes:
  181. if elem in self.incoming:
  182. return list(self.incoming[elem])
  183. else:
  184. return []
  185. else:
  186. return None
  187. def read_edge(self, edge):
  188. if edge in self.edges:
  189. return [self.edges[edge][0], self.edges[edge][1]]
  190. else:
  191. return [None, None]
  192. def read_dict(self, node, value):
  193. try:
  194. first = self.cache[node][value]
  195. # Got hit, so validate
  196. if (self.edges[first][0] == node) and \
  197. (value in [self.values[self.edges[i][1]] for i in self.outgoing[first] if self.edges[i][1] in self.values]):
  198. return self.edges[first][1]
  199. # Hit but invalid now
  200. del self.cache[node][value]
  201. except KeyError:
  202. # Didn't exist
  203. #print("No such entry: " + str(locals()))
  204. pass
  205. except:
  206. raise
  207. return None
  208. def read_dict_keys(self, node):
  209. if node not in self.nodes and node not in self.edges:
  210. return None
  211. result = []
  212. #NOTE cannot just use the cache here, as some keys in the cache might not actually exist; we would have to check all of them anyway
  213. if node in self.outgoing:
  214. for e1 in self.outgoing[node]:
  215. if e1 in self.outgoing:
  216. for e2 in self.outgoing[e1]:
  217. result.append(self.edges[e2][1])
  218. return result
  219. def read_dict_edge(self, node, value):
  220. try:
  221. first = self.cache[node][value]
  222. # Got hit, so validate
  223. if (self.edges[first][0] == node) and \
  224. (value in [self.values[self.edges[i][1]] for i in self.outgoing[first] if self.edges[i][1] in self.values]):
  225. return first
  226. # Hit but invalid now
  227. del self.cache[node][value]
  228. except KeyError:
  229. # Didn't exist
  230. pass
  231. return None
  232. def read_dict_node(self, node, value_node):
  233. e = self.read_dict_node_edge(node, value_node)
  234. if e is None:
  235. return None
  236. else:
  237. self.cache_node.setdefault(node, {})[value_node] = e
  238. return self.edges[e][1]
  239. def read_dict_node_edge(self, node, value_node):
  240. try:
  241. first = self.cache_node[node][value_node]
  242. # Got hit, so validate
  243. if (self.edges[first][0] == node) and \
  244. (value_node in [self.edges[i][1] for i in self.outgoing[first]]):
  245. return first
  246. # Hit but invalid now
  247. del self.cache_node[node][value_node]
  248. except KeyError:
  249. # Didn't exist
  250. pass
  251. return None
  252. def read_reverse_dict(self, node, value):
  253. if node not in self.nodes and node not in self.edges:
  254. return None
  255. elif not self.is_valid_datavalue(value):
  256. return None
  257. # Get all outgoing links
  258. matches = []
  259. if node in self.incoming:
  260. for e1 in self.incoming[node]:
  261. # For each link, we read the links that might link to a data value
  262. if e1 in self.outgoing:
  263. for e2 in self.outgoing[e1]:
  264. # Now read out the target of the link
  265. target = self.edges[e2][1]
  266. # And access its value
  267. if target in self.values and self.values[target] == value:
  268. # Found a match
  269. matches.append(e1)
  270. return [self.edges[e][0] for e in matches]
  271. def delete_node(self, node):
  272. if node == self.root:
  273. return None
  274. elif node not in self.nodes:
  275. return None
  276. self.nodes.remove(node)
  277. if node in self.values:
  278. del self.values[node]
  279. s = set()
  280. if node in self.outgoing:
  281. for e in self.outgoing[node]:
  282. s.add(e)
  283. del self.outgoing[node]
  284. if node in self.incoming:
  285. for e in self.incoming[node]:
  286. s.add(e)
  287. del self.incoming[node]
  288. for e in s:
  289. self.delete_edge(e)
  290. if node in self.outgoing:
  291. del self.outgoing[node]
  292. if node in self.incoming:
  293. del self.incoming[node]
  294. return None
  295. def delete_edge(self, edge):
  296. if edge not in self.edges:
  297. return None
  298. s, t = self.edges[edge]
  299. if t in self.incoming:
  300. self.incoming[t].remove(edge)
  301. if s in self.outgoing:
  302. self.outgoing[s].remove(edge)
  303. del self.edges[edge]
  304. s = set()
  305. if edge in self.outgoing:
  306. for e in self.outgoing[edge]:
  307. s.add(e)
  308. if edge in self.incoming:
  309. for e in self.incoming[edge]:
  310. s.add(e)
  311. for e in s:
  312. self.delete_edge(e)
  313. if edge in self.outgoing:
  314. del self.outgoing[edge]
  315. if edge in self.incoming:
  316. del self.incoming[edge]
  317. if (self.GC) and (t in self.incoming and not self.incoming[t]) and (t not in self.edges):
  318. # Remove this node as well
  319. # Edges aren't deleted like this, as they might have a reachable target and source!
  320. # If they haven't, they will be removed because the source was removed.
  321. self.to_delete.add(t)
  322. def garbage_collect(self):
  323. while self.to_delete:
  324. t = self.to_delete.pop()
  325. if t in self.incoming and not self.incoming[t]:
  326. self.delete_node(t)
  327. def purge(self):
  328. self.garbage_collect()
  329. values = set(self.edges)
  330. values.update(self.nodes)
  331. visit_list = [self.root]
  332. while visit_list:
  333. elem = visit_list.pop()
  334. if elem in values:
  335. # Remove it from the leftover values
  336. values.remove(elem)
  337. if elem in self.edges:
  338. visit_list.extend(self.edges[elem])
  339. if elem in self.outgoing:
  340. visit_list.extend(self.outgoing[elem])
  341. if elem in self.incoming:
  342. visit_list.extend(self.incoming[elem])
  343. dset = set()
  344. for key in self.cache:
  345. if key not in self.nodes and key not in self.edges:
  346. dset.add(key)
  347. for key in dset:
  348. del self.cache[key]
  349. dset = set()
  350. for key in self.cache_node:
  351. if key not in self.nodes and key not in self.edges:
  352. dset.add(key)
  353. for key in dset:
  354. del self.cache_node[key]
  355. # All remaining elements are to be purged
  356. if len(values) > 0:
  357. while values:
  358. v = values.pop()
  359. if v in self.nodes:
  360. self.delete_node(v)