main.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. import sys
  2. from collections import defaultdict
  3. import os
  4. import gzip
  5. import time
  6. import cPickle as pickle
  7. # Work around Python 2 where a 'big integer' automatically becomes a long
  8. if sys.version > '3': # pragma: no cover
  9. integer_types = (int,)
  10. primitive_types = (int, float, str, bool)
  11. else: # pragma: no cover
  12. integer_types = (int, long)
  13. primitive_types = (int, long, float, str, bool, unicode)
  14. complex_primitives = frozenset(["if", "while", "assign", "call", "break", "continue", "return","resolve","access", "constant", "input", "output", "declare", "global"])
  15. def instance_to_string(value):
  16. return value["value"]
  17. def string_to_instance(value):
  18. return {'value': value}
  19. class ModelverseState(object):
  20. def __init__(self, bootfile = None):
  21. self.free_id = 0
  22. self.edges = {}
  23. self.outgoing = {}
  24. self.incoming = {}
  25. self.values = {}
  26. self.nodes = set()
  27. self.GC = True
  28. self.to_delete = set()
  29. self.cache = {}
  30. self.cache_node = {}
  31. if bootfile is not None:
  32. self.root = self.parse(bootfile)
  33. else:
  34. self.root, _ = self.create_node()
  35. def dump_modelverse(self):
  36. with open("/tmp/modelverse.out", "w") as f:
  37. f.write("digraph main {\n")
  38. for n in self.nodes:
  39. if n in self.values:
  40. f.write("a_%s [label=\"a_%s (%s)\"];\n" % (n, n, self.values[n]))
  41. else:
  42. f.write("a_%s [label=\"a_%s\"];\n" % (n, n))
  43. for i, e in self.edges.iteritems():
  44. f.write("%s -> %s [label=\"%s\"];\n" % (e[0], e[1], i))
  45. f.write("}")
  46. return self.root
  47. def parse(self, filename):
  48. picklefile = filename + ".pickle"
  49. try:
  50. if os.path.getmtime(picklefile) > os.path.getmtime(filename):
  51. # Pickle is more recent than grammarfile, so we can use it
  52. self.root, self.free_id, self.nodes, self.edges, self.values, self.cache, self.cache_node = pickle.load(open(picklefile, 'rb'))
  53. for name in self.edges:
  54. source, destination = self.edges[name]
  55. self.outgoing.setdefault(source, set()).add(name)
  56. self.incoming.setdefault(destination, set()).add(name)
  57. return self.root
  58. else:
  59. raise Exception("Invalid pickle")
  60. except Exception as e:
  61. # We have to parse the file and create the pickle
  62. symbols = {}
  63. def resolve(symb):
  64. try:
  65. return int(symb)
  66. except:
  67. if symb[0] == "?":
  68. derefs = symb[1:].split("/")
  69. v, _ = self.read_dict(symbols["root"], "__hierarchy")
  70. for deref in derefs:
  71. v, _ = self.read_dict(v, deref)
  72. return v
  73. else:
  74. return symbols[symb]
  75. with gzip.open(filename, 'rb') as f:
  76. for line in f:
  77. element_type, constructor = line.split(None, 1)
  78. name, values = constructor.split("(", 1)
  79. values, _ = values.rsplit(")", 1)
  80. if element_type == "Node":
  81. name = name.split()[0]
  82. if values == "":
  83. symbols[name] = self.create_node()
  84. else:
  85. value = values
  86. if value in complex_primitives:
  87. value = string_to_instance(value)
  88. else:
  89. value = eval(value)
  90. symbols[name] = self.create_nodevalue(value)
  91. elif element_type == "Edge":
  92. name = name.split()[0]
  93. values = [v.split()[0] for v in values.split(",")]
  94. symbols[name] = self.create_edge(resolve(values[0]), resolve(values[1]))
  95. elif element_type == "Dict":
  96. values = [v.split()[0] for v in values.split(",")]
  97. if values[1] in complex_primitives:
  98. values[1] = string_to_instance(values[1])
  99. else:
  100. values[1] = eval(values[1])
  101. self.create_dict(resolve(values[0]), values[1], resolve(values[2]))
  102. else:
  103. raise Exception("Unknown element type: %s" % element_type)
  104. # Creation successful, now also create a pickle
  105. with open(picklefile, 'wb') as f:
  106. pickle.dump((symbols["root"], self.free_id, self.nodes, self.edges, self.values, self.cache, self.cache_node), f, pickle.HIGHEST_PROTOCOL)
  107. return symbols["root"]
  108. def read_root(self):
  109. return self.root
  110. def create_node(self):
  111. self.nodes.add(self.free_id)
  112. self.free_id += 1
  113. return self.free_id - 1
  114. def create_edge(self, source, target):
  115. if source not in self.edges and source not in self.nodes:
  116. return None
  117. elif target not in self.edges and target not in self.nodes:
  118. return None
  119. else:
  120. self.outgoing.setdefault(source, set()).add(self.free_id)
  121. self.incoming.setdefault(target, set()).add(self.free_id)
  122. self.edges[self.free_id] = (source, target)
  123. self.free_id += 1
  124. if source in self.edges:
  125. # We are creating something dict_readable
  126. # Fill in the cache already!
  127. dict_source, dict_target = self.edges[source]
  128. if target in self.values:
  129. self.cache.setdefault(dict_source, {})[self.values[target]] = source
  130. self.cache_node.setdefault(dict_source, {})[target] = source
  131. return self.free_id - 1
  132. def is_valid_datavalue(self, value):
  133. if isinstance(value, dict):
  134. if "value" in value and value["value"] in complex_primitives:
  135. return True
  136. else:
  137. return False
  138. elif not isinstance(value, primitive_types):
  139. return False
  140. elif isinstance(value, integer_types) and not (-2**63 <= value <= 2**64 - 1):
  141. return False
  142. return True
  143. def create_nodevalue(self, value):
  144. if not self.is_valid_datavalue(value):
  145. return None
  146. self.values[self.free_id] = value
  147. self.nodes.add(self.free_id)
  148. self.free_id += 1
  149. return self.free_id - 1
  150. def create_dict(self, source, data, destination):
  151. if source not in self.nodes and source not in self.edges:
  152. return None
  153. elif destination not in self.nodes and destination not in self.edges:
  154. return None
  155. elif not self.is_valid_datavalue(data):
  156. return None
  157. else:
  158. n = self.create_nodevalue(data)
  159. e = self.create_edge(source, destination)
  160. self.create_edge(e, n)
  161. self.cache.setdefault(source, {})[data] = e
  162. self.cache_node.setdefault(source, {})[n] = e
  163. def read_value(self, node):
  164. if node in self.values:
  165. return self.values[node]
  166. else:
  167. return None
  168. def read_outgoing(self, elem):
  169. if elem in self.edges or elem in self.nodes:
  170. if elem in self.outgoing:
  171. return list(self.outgoing[elem])
  172. else:
  173. return []
  174. return None
  175. def read_incoming(self, elem):
  176. if elem in self.edges or elem in self.nodes:
  177. if elem in self.incoming:
  178. return list(self.incoming[elem])
  179. else:
  180. return []
  181. else:
  182. return None
  183. def read_edge(self, edge):
  184. if edge in self.edges:
  185. return [self.edges[edge][0], self.edges[edge][1]]
  186. else:
  187. return [None, None]
  188. def read_dict(self, node, value):
  189. try:
  190. first = self.cache[node][value]
  191. # Got hit, so validate
  192. if (self.edges[first][0] == node) and \
  193. (value in [self.values[self.edges[i][1]] for i in self.outgoing[first]]):
  194. return self.edges[first][1]
  195. # Hit but invalid now
  196. del self.cache[node][value]
  197. except KeyError:
  198. # Didn't exist
  199. pass
  200. return None
  201. def read_dict_keys(self, node):
  202. if node not in self.nodes and node not in self.edges:
  203. return None
  204. result = []
  205. if node in self.outgoing:
  206. for e1 in self.outgoing[node]:
  207. if e1 in self.outgoing:
  208. for e2 in self.outgoing[e1]:
  209. result.append(self.edges[e2][1])
  210. #NOTE cannot just use the cache here, as some keys in the cache might not actually exist; we would have to check all of them anyway
  211. return result
  212. def read_dict_edge(self, node, value):
  213. try:
  214. first = self.cache[node][value]
  215. # Got hit, so validate
  216. if (self.edges[first][0] == node) and \
  217. (value in [self.values[self.edges[i][1]] for i in self.outgoing[first]]):
  218. return first
  219. # Hit but invalid now
  220. del self.cache[node][value]
  221. except KeyError:
  222. # Didn't exist
  223. pass
  224. return None
  225. def read_dict_node(self, node, value_node):
  226. e = self.read_dict_node_edge(node, value_node)
  227. if e is None:
  228. return None
  229. else:
  230. self.cache_node.setdefault(node, {})[value_node] = e
  231. return self.edges[e][1]
  232. def read_dict_node_edge(self, node, value_node):
  233. try:
  234. first = self.cache_node[node][value_node]
  235. # Got hit, so validate
  236. if (self.edges[first][0] == node) and \
  237. (value_node in [self.edges[i][1] for i in self.outgoing[first]]):
  238. return first
  239. # Hit but invalid now
  240. del self.cache_node[node][value_node]
  241. except KeyError:
  242. # Didn't exist
  243. pass
  244. return None
  245. def read_reverse_dict(self, node, value):
  246. if node not in self.nodes and node not in self.edges:
  247. return None
  248. elif not self.is_valid_datavalue(value):
  249. return None
  250. # Get all outgoing links
  251. matches = []
  252. if node in self.incoming:
  253. for e1 in self.incoming[node]:
  254. # For each link, we read the links that might link to a data value
  255. if e1 in self.outgoing:
  256. for e2 in self.outgoing[e1]:
  257. # Now read out the target of the link
  258. target = self.edges[e2][1]
  259. # And access its value
  260. if target in self.values and self.values[target] == value:
  261. # Found a match
  262. if len(self.outgoing[e1]) > 1:
  263. return None
  264. else:
  265. matches.append(e1)
  266. if len(matches) == 0:
  267. return None
  268. else:
  269. return [self.edges[e][0] for e in matches]
  270. def delete_node(self, node):
  271. if node == self.root:
  272. return None
  273. elif node not in self.nodes:
  274. return None
  275. self.nodes.remove(node)
  276. if node in self.values:
  277. del self.values[node]
  278. s = set()
  279. if node in self.outgoing:
  280. for e in self.outgoing[node]:
  281. s.add(e)
  282. del self.outgoing[node]
  283. if node in self.incoming:
  284. for e in self.incoming[node]:
  285. s.add(e)
  286. del self.incoming[node]
  287. for e in s:
  288. self.delete_edge(e)
  289. if node in self.outgoing:
  290. del self.outgoing[node]
  291. if node in self.incoming:
  292. del self.incoming[node]
  293. return None
  294. def delete_edge(self, edge):
  295. if edge not in self.edges:
  296. return None
  297. s, t = self.edges[edge]
  298. if t in self.incoming:
  299. self.incoming[t].remove(edge)
  300. if s in self.outgoing:
  301. self.outgoing[s].remove(edge)
  302. del self.edges[edge]
  303. s = set()
  304. if edge in self.outgoing:
  305. for e in self.outgoing[edge]:
  306. s.add(e)
  307. if edge in self.incoming:
  308. for e in self.incoming[edge]:
  309. s.add(e)
  310. for e in s:
  311. self.delete_edge(e)
  312. if edge in self.outgoing:
  313. del self.outgoing[edge]
  314. if edge in self.incoming:
  315. del self.incoming[edge]
  316. if (self.GC) and (t in self.incoming and not self.incoming[t]) and (t not in self.edges):
  317. # Remove this node as well
  318. # Edges aren't deleted like this, as they might have a reachable target and source!
  319. # If they haven't, they will be removed because the source was removed.
  320. self.to_delete.add(t)
  321. def garbage_collect(self):
  322. while self.to_delete:
  323. t = self.to_delete.pop()
  324. if t in self.incoming and not self.incoming[t]:
  325. self.delete_node(t)
  326. def purge(self):
  327. self.garbage_collect()
  328. values = set(self.edges)
  329. values.update(self.nodes)
  330. visit_list = [self.root]
  331. while visit_list:
  332. elem = visit_list.pop()
  333. if elem in values:
  334. # Remove it from the leftover values
  335. values.remove(elem)
  336. if elem in self.edges:
  337. visit_list.extend(self.edges[elem])
  338. if elem in self.outgoing:
  339. visit_list.extend(self.outgoing[elem])
  340. if elem in self.incoming:
  341. visit_list.extend(self.incoming[elem])
  342. dset = set()
  343. for key in self.cache:
  344. if key not in self.nodes and key not in self.edges:
  345. dset.add(key)
  346. for key in dset:
  347. del self.cache[key]
  348. dset = set()
  349. for key in self.cache_node:
  350. if key not in self.nodes and key not in self.edges:
  351. dset.add(key)
  352. for key in dset:
  353. del self.cache_node[key]
  354. # All remaining elements are to be purged
  355. if len(values) > 0:
  356. while values:
  357. v = values.pop()
  358. if v in self.nodes:
  359. self.delete_node(v)