main.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. from modelverse_state import status
  2. import sys
  3. from collections import defaultdict
  4. import os
  5. import gzip
  6. import time
  7. import cPickle as pickle
  8. # Work around Python 2 where a 'big integer' automatically becomes a long
  9. if sys.version > '3': # pragma: no cover
  10. integer_types = (int,)
  11. primitive_types = (int, float, str, bool)
  12. else: # pragma: no cover
  13. integer_types = (int, long)
  14. primitive_types = (int, long, float, str, bool, unicode)
  15. complex_primitives = frozenset(["if", "while", "assign", "call", "break", "continue", "return","resolve","access", "constant", "input", "output", "declare", "global"])
  16. def instance_to_string(value):
  17. return value["value"]
  18. def string_to_instance(value):
  19. return {'value': value}
  20. class ModelverseState(object):
  21. def __init__(self, bootfile = None):
  22. self.free_id = 0
  23. self.edges = {}
  24. self.outgoing = {}
  25. self.incoming = {}
  26. self.values = {}
  27. self.nodes = set()
  28. self.GC = True
  29. self.to_delete = set()
  30. self.cache = {}
  31. self.cache_node = {}
  32. if bootfile is not None:
  33. self.root = self.parse(bootfile)
  34. else:
  35. self.root, _ = self.create_node()
  36. def dump_modelverse(self):
  37. with open("/tmp/modelverse.out", "w") as f:
  38. f.write("digraph main {\n")
  39. for n in self.nodes:
  40. if n in self.values:
  41. f.write("a_%s [label=\"a_%s (%s)\"];\n" % (n, n, self.values[n]))
  42. else:
  43. f.write("a_%s [label=\"a_%s\"];\n" % (n, n))
  44. for i, e in self.edges.iteritems():
  45. f.write("%s -> %s [label=\"%s\"];\n" % (e[0], e[1], i))
  46. f.write("}")
  47. return (self.root, status.SUCCESS)
  48. def parse(self, filename):
  49. picklefile = filename + ".pickle"
  50. try:
  51. if os.path.getmtime(picklefile) > os.path.getmtime(filename):
  52. # Pickle is more recent than grammarfile, so we can use it
  53. self.root, self.free_id, self.nodes, self.edges, self.values, self.cache, self.cache_node = pickle.load(open(picklefile, 'rb'))
  54. for name in self.edges:
  55. source, destination = self.edges[name]
  56. self.outgoing.setdefault(source, set()).add(name)
  57. self.incoming.setdefault(destination, set()).add(name)
  58. return self.root
  59. else:
  60. raise Exception("Invalid pickle")
  61. except Exception as e:
  62. # We have to parse the file and create the pickle
  63. symbols = {}
  64. def resolve(symb):
  65. try:
  66. return int(symb)
  67. except:
  68. if symb[0] == "?":
  69. derefs = symb[1:].split("/")
  70. v, _ = self.read_dict(symbols["root"], "__hierarchy")
  71. for deref in derefs:
  72. v, _ = self.read_dict(v, deref)
  73. return v
  74. else:
  75. return symbols[symb]
  76. with gzip.open(filename, 'rb') as f:
  77. for line in f:
  78. element_type, constructor = line.split(None, 1)
  79. name, values = constructor.split("(", 1)
  80. values, _ = values.rsplit(")", 1)
  81. if element_type == "Node":
  82. name = name.split()[0]
  83. if values == "":
  84. symbols[name], status = self.create_node()
  85. else:
  86. value = values
  87. if value in complex_primitives:
  88. value = string_to_instance(value)
  89. else:
  90. value = eval(value)
  91. symbols[name], status = self.create_nodevalue(value)
  92. elif element_type == "Edge":
  93. name = name.split()[0]
  94. values = [v.split()[0] for v in values.split(",")]
  95. symbols[name], status = self.create_edge(resolve(values[0]), resolve(values[1]))
  96. elif element_type == "Dict":
  97. values = [v.split()[0] for v in values.split(",")]
  98. if values[1] in complex_primitives:
  99. values[1] = string_to_instance(values[1])
  100. else:
  101. values[1] = eval(values[1])
  102. self.create_dict(resolve(values[0]), values[1], resolve(values[2]))
  103. else:
  104. raise Exception("Unknown element type: %s" % element_type)
  105. if status != 100:
  106. raise Exception("Failed to process line for reason %s: %s" % (status, line))
  107. # Creation successful, now also create a pickle
  108. with open(picklefile, 'wb') as f:
  109. pickle.dump((symbols["root"], self.free_id, self.nodes, self.edges, self.values, self.cache, self.cache_node), f, pickle.HIGHEST_PROTOCOL)
  110. return symbols["root"]
  111. def read_root(self):
  112. return (self.root, status.SUCCESS)
  113. def create_node(self):
  114. self.nodes.add(self.free_id)
  115. self.free_id += 1
  116. return (self.free_id - 1, status.SUCCESS)
  117. def create_edge(self, source, target):
  118. if source not in self.edges and source not in self.nodes:
  119. return (None, status.FAIL_CE_SOURCE)
  120. elif target not in self.edges and target not in self.nodes:
  121. return (None, status.FAIL_CE_TARGET)
  122. else:
  123. self.outgoing.setdefault(source, set()).add(self.free_id)
  124. self.incoming.setdefault(target, set()).add(self.free_id)
  125. self.edges[self.free_id] = (source, target)
  126. self.free_id += 1
  127. if source in self.edges:
  128. # We are creating something dict_readable
  129. # Fill in the cache already!
  130. dict_source, dict_target = self.edges[source]
  131. if target in self.values:
  132. self.cache.setdefault(dict_source, {})[self.values[target]] = source
  133. self.cache_node.setdefault(dict_source, {})[target] = source
  134. return (self.free_id - 1, status.SUCCESS)
  135. def is_valid_datavalue(self, value):
  136. if isinstance(value, dict):
  137. if "value" in value and value["value"] in complex_primitives:
  138. return True
  139. else:
  140. return False
  141. elif not isinstance(value, primitive_types):
  142. return False
  143. elif isinstance(value, integer_types) and not (-2**63 <= value <= 2**64 - 1):
  144. return False
  145. return True
  146. def create_nodevalue(self, value):
  147. if not self.is_valid_datavalue(value):
  148. return (None, status.FAIL_CNV_OOB)
  149. self.values[self.free_id] = value
  150. self.nodes.add(self.free_id)
  151. self.free_id += 1
  152. return (self.free_id - 1, status.SUCCESS)
  153. def create_dict(self, source, data, destination):
  154. if source not in self.nodes and source not in self.edges:
  155. return (None, status.FAIL_CDICT_SOURCE)
  156. if destination not in self.nodes and destination not in self.edges:
  157. return (None, status.FAIL_CDICT_TARGET)
  158. if not self.is_valid_datavalue(data):
  159. return (None, status.FAIL_CDICT_OOB)
  160. n = self.create_nodevalue(data)[0]
  161. e = self.create_edge(source, destination)[0]
  162. self.create_edge(e, n)
  163. self.cache.setdefault(source, {})[data] = e
  164. self.cache_node.setdefault(source, {})[n] = e
  165. return (None, status.SUCCESS)
  166. def read_value(self, node):
  167. if node in self.values:
  168. return (self.values[node], status.SUCCESS)
  169. elif node not in self.nodes:
  170. return (None, status.FAIL_RV_UNKNOWN)
  171. else:
  172. return (None, status.FAIL_RV_NO_VALUE)
  173. def read_outgoing(self, elem):
  174. if elem in self.edges or elem in self.nodes:
  175. if elem in self.outgoing:
  176. return (list(self.outgoing[elem]), status.SUCCESS)
  177. else:
  178. return ([], status.SUCCESS)
  179. else:
  180. return (None, status.FAIL_RO_UNKNOWN)
  181. def read_incoming(self, elem):
  182. if elem in self.edges or elem in self.nodes:
  183. if elem in self.incoming:
  184. return (list(self.incoming[elem]), status.SUCCESS)
  185. else:
  186. return ([], status.SUCCESS)
  187. else:
  188. return (None, status.FAIL_RI_UNKNOWN)
  189. def read_edge(self, edge):
  190. if edge in self.edges:
  191. return (list(self.edges[edge]), status.SUCCESS)
  192. else:
  193. return ([None, None], status.FAIL_RE_UNKNOWN)
  194. def read_dict(self, node, value):
  195. try:
  196. first = self.cache[node][value]
  197. # Got hit, so validate
  198. if (self.edges[first][0] == node) and \
  199. (len(self.outgoing[first]) == 1) and \
  200. (self.values[self.edges[list(self.outgoing[first])[0]][1]] == value):
  201. return (self.edges[first][1], status.SUCCESS)
  202. # Hit but invalid now
  203. del self.cache[node][value]
  204. except KeyError:
  205. # Didn't exist
  206. pass
  207. # Get all outgoing links
  208. if node in self.outgoing:
  209. for e1 in self.outgoing[node]:
  210. # For each link, we read the links that might link to a data value
  211. if e1 in self.outgoing:
  212. for e2 in self.outgoing[e1]:
  213. # Now read out the target of the link
  214. target = self.edges[e2][1]
  215. # And access its value
  216. if target in self.values and self.values[target] == value:
  217. # Found a match
  218. # Now get the target of the original link
  219. self.cache.setdefault(node, {})[value] = e1
  220. print("Cache did not find, but still entry: %s at %s" % (value, node))
  221. return (self.edges[e1][1], status.SUCCESS)
  222. if node not in self.nodes and node not in self.edges:
  223. return (None, status.FAIL_RDICT_UNKNOWN)
  224. elif not self.is_valid_datavalue(value):
  225. return (None, status.FAIL_RDICT_OOB)
  226. else:
  227. return (None, status.FAIL_RDICT_NOT_FOUND)
  228. def read_dict_keys(self, node):
  229. if node not in self.nodes and node not in self.edges:
  230. return (None, status.FAIL_RDICTKEYS_UNKNOWN)
  231. result = []
  232. if node in self.outgoing:
  233. for e1 in self.outgoing[node]:
  234. if e1 in self.outgoing:
  235. for e2 in self.outgoing[e1]:
  236. result.append(self.edges[e2][1])
  237. return (result, status.SUCCESS)
  238. def read_dict_edge(self, node, value):
  239. try:
  240. first = self.cache[node][value]
  241. # Got hit, so validate
  242. if (self.edges[first][0] == node) and \
  243. (len(self.outgoing[first]) == 1) and \
  244. (self.values[self.edges[list(self.outgoing[first])[0]][1]] == value):
  245. return (first, status.SUCCESS)
  246. # Hit but invalid now
  247. del self.cache[node][value]
  248. except KeyError:
  249. # Didn't exist
  250. pass
  251. # Get all outgoing links
  252. found = None
  253. if node in self.outgoing:
  254. for e1 in self.outgoing[node]:
  255. # For each link, we read the links that might link to a data value
  256. if e1 in self.outgoing:
  257. for e2 in self.outgoing[e1]:
  258. # Now read out the target of the link
  259. target = self.edges[e2][1]
  260. # And access its value
  261. if target in self.values and self.values[target] == value:
  262. # Found a match
  263. # Now get the target of the original link
  264. if found is not None:
  265. print("Duplicate key on value: %s : %s (%s <-> %s)!" % (self.values[target], type(v), found, e1))
  266. return (None, status.FAIL_RDICTE_AMBIGUOUS)
  267. found = e1
  268. self.cache.setdefault(node, {})[value] = e1
  269. if found is not None:
  270. return (found, status.SUCCESS)
  271. elif node not in self.nodes and node not in self.edges:
  272. return (None, status.FAIL_RDICTE_UNKNOWN)
  273. elif not self.is_valid_datavalue(value):
  274. return (None, status.FAIL_RDICTE_OOB)
  275. else:
  276. return (None, status.FAIL_RDICTE_NOT_FOUND)
  277. def read_dict_node(self, node, value_node):
  278. try:
  279. first = self.cache_node[node][value_node]
  280. # Got hit, so validate
  281. if (self.edges[first][0] == node) and \
  282. (len(self.outgoing[first]) == 1) and \
  283. (self.edges[list(self.outgoing[first])[0]][1] == value_node):
  284. return (self.edges[first][1], status.SUCCESS)
  285. # Hit but invalid now
  286. del self.cache_node[node][value_node]
  287. except KeyError:
  288. # Didn't exist
  289. pass
  290. e, s = self.read_dict_node_edge(node, value_node)
  291. if s != status.SUCCESS:
  292. return (None, {status.FAIL_RDICTNE_UNKNOWN: status.FAIL_RDICTN_UNKNOWN,
  293. status.FAIL_RDICTNE_UNCERTAIN: status.FAIL_RDICTN_UNCERTAIN,
  294. status.FAIL_RDICTNE_AMBIGUOUS: status.FAIL_RDICTN_AMBIGUOUS,
  295. status.FAIL_RDICTNE_OOB: status.FAIL_RDICTN_OOB,
  296. status.FAIL_RDICTNE_NOT_FOUND: status.FAIL_RDICTN_NOT_FOUND}[s])
  297. self.cache_node.setdefault(node, {})[value_node] = e
  298. return (self.edges[e][1], status.SUCCESS)
  299. def read_dict_node_edge(self, node, value_node):
  300. # Get all outgoing links
  301. found = None
  302. if node in self.outgoing:
  303. for e1 in self.outgoing[node]:
  304. # For each link, we read the links that might link to a data value
  305. if e1 in self.outgoing:
  306. for e2 in self.outgoing[e1]:
  307. # And access its value
  308. if self.edges[e2][1] == value_node:
  309. # Found a match
  310. # Now get the target of the original link
  311. if found is not None:
  312. print("Duplicate key on node: %s (%s <-> %s)!" % (value_node, found, e1))
  313. return (None, status.FAIL_RDICTNE_AMBIGUOUS)
  314. found = e1
  315. if found is not None:
  316. return (found, status.SUCCESS)
  317. elif node not in self.nodes and node not in self.edges:
  318. return (None, status.FAIL_RDICTNE_UNKNOWN)
  319. else:
  320. return (None, status.FAIL_RDICTNE_NOT_FOUND)
  321. def read_reverse_dict(self, node, value):
  322. if node not in self.nodes and node not in self.edges:
  323. return (None, status.FAIL_RRDICT_UNKNOWN)
  324. elif not self.is_valid_datavalue(value):
  325. return (None, status.FAIL_RRDICT_OOB)
  326. # Get all outgoing links
  327. matches = []
  328. if node in self.incoming:
  329. for e1 in self.incoming[node]:
  330. # For each link, we read the links that might link to a data value
  331. if e1 in self.outgoing:
  332. for e2 in self.outgoing[e1]:
  333. # Now read out the target of the link
  334. target = self.edges[e2][1]
  335. # And access its value
  336. if target in self.values and self.values[target] == value:
  337. # Found a match
  338. if len(self.outgoing[e1]) > 1:
  339. return (None, status.FAIL_RRDICT_UNCERTAIN)
  340. else:
  341. matches.append(e1)
  342. if len(matches) == 0:
  343. return (None, status.FAIL_RRDICT_NOT_FOUND)
  344. else:
  345. return ([self.edges[e][0] for e in matches], status.SUCCESS)
  346. def delete_node(self, node):
  347. if node == self.root:
  348. return (None, status.FAIL_DN_UNKNOWN)
  349. if node not in self.nodes:
  350. return (None, status.FAIL_DN_UNKNOWN)
  351. self.nodes.remove(node)
  352. if node in self.values:
  353. del self.values[node]
  354. s = set()
  355. if node in self.outgoing:
  356. for e in self.outgoing[node]:
  357. s.add(e)
  358. del self.outgoing[node]
  359. if node in self.incoming:
  360. for e in self.incoming[node]:
  361. s.add(e)
  362. del self.incoming[node]
  363. for e in s:
  364. self.delete_edge(e)
  365. if node in self.outgoing:
  366. del self.outgoing[node]
  367. if node in self.incoming:
  368. del self.incoming[node]
  369. return (None, status.SUCCESS)
  370. def delete_edge(self, edge):
  371. if edge not in self.edges:
  372. return (None, status.FAIL_DE_UNKNOWN)
  373. s, t = self.edges[edge]
  374. if t in self.incoming:
  375. self.incoming[t].remove(edge)
  376. if s in self.outgoing:
  377. self.outgoing[s].remove(edge)
  378. del self.edges[edge]
  379. s = set()
  380. if edge in self.outgoing:
  381. for e in self.outgoing[edge]:
  382. s.add(e)
  383. if edge in self.incoming:
  384. for e in self.incoming[edge]:
  385. s.add(e)
  386. for e in s:
  387. self.delete_edge(e)
  388. if edge in self.outgoing:
  389. del self.outgoing[edge]
  390. if edge in self.incoming:
  391. del self.incoming[edge]
  392. if (self.GC) and (t in self.incoming and not self.incoming[t]) and (t not in self.edges):
  393. # Remove this node as well
  394. # Edges aren't deleted like this, as they might have a reachable target and source!
  395. # If they haven't, they will be removed because the source was removed.
  396. self.to_delete.add(t)
  397. return (None, status.SUCCESS)
  398. def garbage_collect(self):
  399. while self.to_delete:
  400. t = self.to_delete.pop()
  401. if t in self.incoming and not self.incoming[t]:
  402. self.delete_node(t)
  403. def purge(self):
  404. self.garbage_collect()
  405. values = set(self.edges)
  406. values.update(self.nodes)
  407. visit_list = [self.root]
  408. while visit_list:
  409. elem = visit_list.pop()
  410. if elem in values:
  411. # Remove it from the leftover values
  412. values.remove(elem)
  413. if elem in self.edges:
  414. visit_list.extend(self.edges[elem])
  415. if elem in self.outgoing:
  416. visit_list.extend(self.outgoing[elem])
  417. if elem in self.incoming:
  418. visit_list.extend(self.incoming[elem])
  419. dset = set()
  420. for key in self.cache:
  421. if key not in self.nodes and key not in self.edges:
  422. dset.add(key)
  423. for key in dset:
  424. del self.cache[key]
  425. dset = set()
  426. for key in self.cache_node:
  427. if key not in self.nodes and key not in self.edges:
  428. dset.add(key)
  429. for key in dset:
  430. del self.cache_node[key]
  431. # All remaining elements are to be purged
  432. if len(values) > 0:
  433. while values:
  434. v = values.pop()
  435. if v in self.nodes:
  436. self.delete_node(v)