main.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. from modelverse_state import status
  2. import sys
  3. from collections import defaultdict
  4. import os
  5. import gzip
  6. import time
  7. import cPickle as pickle
  8. # Work around Python 2 where a 'big integer' automatically becomes a long
  9. if sys.version > '3': # pragma: no cover
  10. integer_types = (int,)
  11. primitive_types = (int, float, str, bool)
  12. else: # pragma: no cover
  13. integer_types = (int, long)
  14. primitive_types = (int, long, float, str, bool, unicode)
  15. complex_primitives = frozenset(["if", "while", "assign", "call", "break", "continue", "return","resolve","access", "constant", "input", "output", "declare", "global"])
  16. def instance_to_string(value):
  17. return value["value"]
  18. def string_to_instance(value):
  19. return {'value': value}
  20. class ModelverseState(object):
  21. def __init__(self, bootfile = None):
  22. self.free_id = 0
  23. self.edges = {}
  24. self.outgoing = defaultdict(set)
  25. self.incoming = defaultdict(set)
  26. self.values = {}
  27. self.nodes = set()
  28. self.GC = True
  29. self.to_delete = set()
  30. self.cache = {}
  31. if bootfile is not None:
  32. self.root = self.parse(bootfile)
  33. else:
  34. self.root, _ = self.create_node()
  35. def dump_modelverse(self):
  36. with open("/tmp/modelverse.out", "w") as f:
  37. f.write("digraph main {\n")
  38. for n in self.nodes:
  39. if n in self.values:
  40. f.write("a_%s [label=\"a_%s (%s)\"];\n" % (n, n, self.values[n]))
  41. else:
  42. f.write("a_%s [label=\"a_%s\"];\n" % (n, n))
  43. for i, e in self.edges.iteritems():
  44. f.write("%s -> %s [label=\"%s\"];\n" % (e[0], e[1], i))
  45. f.write("}")
  46. return (self.root, status.SUCCESS)
  47. def parse(self, filename):
  48. picklefile = filename + ".pickle"
  49. try:
  50. if os.path.getmtime(picklefile) > os.path.getmtime(filename):
  51. # Pickle is more recent than grammarfile, so we can use it
  52. self.root, self.free_id, self.nodes, self.edges, self.values = pickle.load(open(picklefile, 'rb'))
  53. for name in self.edges:
  54. source, destination = self.edges[name]
  55. self.outgoing[source].add(name)
  56. self.incoming[destination].add(name)
  57. return self.root
  58. else:
  59. raise Exception("Invalid pickle")
  60. except Exception as e:
  61. # We have to parse the file and create the pickle
  62. symbols = {}
  63. def resolve(symb):
  64. try:
  65. return int(symb)
  66. except:
  67. if symb[0] == "?":
  68. derefs = symb[1:].split("/")
  69. v, _ = self.read_dict(symbols["root"], "__hierarchy")
  70. for deref in derefs:
  71. v, _ = self.read_dict(v, deref)
  72. return v
  73. else:
  74. return symbols[symb]
  75. with gzip.open(filename, 'rb') as f:
  76. for line in f:
  77. element_type, constructor = line.split(None, 1)
  78. name, values = constructor.split("(", 1)
  79. name = name.split()[0]
  80. values, _ = values.rsplit(")", 1)
  81. if element_type == "Node":
  82. if values == "":
  83. symbols[name], status = self.create_node()
  84. else:
  85. value = values
  86. if value in complex_primitives:
  87. value = string_to_instance(value)
  88. else:
  89. value = eval(value)
  90. symbols[name], status = self.create_nodevalue(value)
  91. elif element_type == "Edge":
  92. values = [v.split()[0] for v in values.split(",")]
  93. symbols[name], status = self.create_edge(resolve(values[0]), resolve(values[1]))
  94. elif element_type == "Dict":
  95. values = [v.split()[0] for v in values.split(",")]
  96. if values[1] in complex_primitives:
  97. values[1] = string_to_instance(values[1])
  98. else:
  99. values[1] = eval(values[1])
  100. symbols[name], status = self.create_dict(resolve(values[0]), values[1], resolve(values[2]))
  101. else:
  102. raise Exception("Unknown element type: %s" % element_type)
  103. if status != 100:
  104. raise Exception("Failed to process line for reason %s: %s" % (status, line))
  105. # Creation successful, now also create a pickle
  106. with open(picklefile, 'wb') as f:
  107. pickle.dump((symbols["root"], self.free_id, self.nodes, self.edges, self.values), f, pickle.HIGHEST_PROTOCOL)
  108. return symbols["root"]
  109. def read_root(self):
  110. return (self.root, status.SUCCESS)
  111. def create_node(self):
  112. self.nodes.add(self.free_id)
  113. self.free_id += 1
  114. return (self.free_id - 1, status.SUCCESS)
  115. def create_edge(self, source, target):
  116. if source not in self.edges and source not in self.nodes:
  117. return (None, status.FAIL_CE_SOURCE)
  118. elif target not in self.edges and target not in self.nodes:
  119. return (None, status.FAIL_CE_TARGET)
  120. else:
  121. self.outgoing[source].add(self.free_id)
  122. self.incoming[target].add(self.free_id)
  123. self.edges[self.free_id] = (source, target)
  124. self.free_id += 1
  125. return (self.free_id - 1, status.SUCCESS)
  126. def is_valid_datavalue(self, value):
  127. if isinstance(value, dict):
  128. if "value" in value and value["value"] in complex_primitives:
  129. return True
  130. else:
  131. return False
  132. elif not isinstance(value, primitive_types):
  133. return False
  134. elif isinstance(value, integer_types) and not (-2**63 <= value <= 2**64 - 1):
  135. return False
  136. return True
  137. def create_nodevalue(self, value):
  138. if not self.is_valid_datavalue(value):
  139. raise Exception()
  140. return (None, status.FAIL_CNV_OOB)
  141. self.values[self.free_id] = value
  142. self.nodes.add(self.free_id)
  143. self.free_id += 1
  144. return (self.free_id - 1, status.SUCCESS)
  145. def create_dict(self, source, data, destination):
  146. if source not in self.nodes and source not in self.edges:
  147. return (None, status.FAIL_CDICT_SOURCE)
  148. if destination not in self.nodes and destination not in self.edges:
  149. return (None, status.FAIL_CDICT_TARGET)
  150. if not self.is_valid_datavalue(data):
  151. return (None, status.FAIL_CDICT_OOB)
  152. n = self.create_nodevalue(data)[0]
  153. e = self.create_edge(source, destination)[0]
  154. self.create_edge(e, n)
  155. self.cache.setdefault(source, {})[data] = e
  156. return (None, status.SUCCESS)
  157. def read_value(self, node):
  158. if node not in self.nodes:
  159. return (None, status.FAIL_RV_UNKNOWN)
  160. v = self.values.get(node, None)
  161. if v is None:
  162. return (None, status.FAIL_RV_NO_VALUE)
  163. else:
  164. return (v, status.SUCCESS)
  165. def read_outgoing(self, elem):
  166. if elem in self.edges or elem in self.nodes:
  167. return (list(self.outgoing[elem]), status.SUCCESS)
  168. else:
  169. return (None, status.FAIL_RO_UNKNOWN)
  170. def read_incoming(self, elem):
  171. if elem in self.edges or elem in self.nodes:
  172. return (list(self.incoming[elem]), status.SUCCESS)
  173. else:
  174. return (None, status.FAIL_RI_UNKNOWN)
  175. def read_edge(self, edge):
  176. v = self.edges.get(edge, None)
  177. if v is None:
  178. return ([None, None], status.FAIL_RE_UNKNOWN)
  179. else:
  180. s, t = v
  181. return ([s, t], status.SUCCESS)
  182. def read_dict_old(self, node, value):
  183. e, s = self.read_dict_edge(node, value)
  184. if s != status.SUCCESS:
  185. return (None, {status.FAIL_RDICTE_UNKNOWN: status.FAIL_RDICT_UNKNOWN,
  186. status.FAIL_RDICTE_UNCERTAIN: status.FAIL_RDICT_UNCERTAIN,
  187. status.FAIL_RDICTE_OOB: status.FAIL_RDICT_OOB,
  188. status.FAIL_RDICTE_NOT_FOUND: status.FAIL_RDICT_NOT_FOUND,
  189. status.FAIL_RDICTE_AMBIGUOUS: status.FAIL_RDICT_AMBIGUOUS}[s])
  190. return (self.edges[e][1], status.SUCCESS)
  191. def read_dict(self, node, value):
  192. try:
  193. first = self.cache[node][value]
  194. # Got hit, so validate
  195. if (self.edges[first][0] == node) and \
  196. (len(self.outgoing[first]) == 1) and \
  197. (self.values[self.edges[list(self.outgoing[first])[0]][1]] == value):
  198. return (self.edges[first][1], status.SUCCESS)
  199. # Hit but invalid now
  200. del self.cache[node][value]
  201. except KeyError:
  202. # Didn't exist
  203. pass
  204. if node not in self.nodes and node not in self.edges:
  205. return (None, status.FAIL_RDICT_UNKNOWN)
  206. if not self.is_valid_datavalue(value):
  207. return (None, status.FAIL_RDICT_OOB)
  208. # Get all outgoing links
  209. for e1 in self.outgoing.get(node, set()):
  210. data_links = self.outgoing.get(e1, set())
  211. # For each link, we read the links that might link to a data value
  212. for e2 in data_links:
  213. # Now read out the target of the link
  214. target = self.edges[e2][1]
  215. # And access its value
  216. v = self.values.get(target, None)
  217. if v == value:
  218. # Found a match
  219. # Now get the target of the original link
  220. self.cache.setdefault(node, {})[value] = e1
  221. return (self.edges[e1][1], status.SUCCESS)
  222. return (None, status.FAIL_RDICT_NOT_FOUND)
  223. def read_dict_keys(self, node):
  224. if node not in self.nodes and node not in self.edges:
  225. return (None, status.FAIL_RDICTKEYS_UNKNOWN)
  226. result = []
  227. for e1 in self.outgoing.get(node, set()):
  228. data_links = self.outgoing.get(e1, set())
  229. for e2 in data_links:
  230. result.append(self.edges[e2][1])
  231. return (result, status.SUCCESS)
  232. def read_dict_edge(self, node, value):
  233. try:
  234. first = self.cache[node][value]
  235. # Got hit, so validate
  236. if (self.edges[first][0] == node) and \
  237. (len(self.outgoing[first]) == 1) and \
  238. (self.values[self.edges[list(self.outgoing[first])[0]][1]] == value):
  239. return (first, status.SUCCESS)
  240. # Hit but invalid now
  241. del self.cache[node][value]
  242. except KeyError:
  243. # Didn't exist
  244. pass
  245. if node not in self.nodes and node not in self.edges:
  246. return (None, status.FAIL_RDICTE_UNKNOWN)
  247. if not self.is_valid_datavalue(value):
  248. return (None, status.FAIL_RDICTE_OOB)
  249. # Get all outgoing links
  250. found = None
  251. for e1 in self.outgoing.get(node, set()):
  252. data_links = self.outgoing.get(e1, set())
  253. # For each link, we read the links that might link to a data value
  254. for e2 in data_links:
  255. # Now read out the target of the link
  256. target = self.edges[e2][1]
  257. # And access its value
  258. v = self.values.get(target, None)
  259. if v == value:
  260. # Found a match
  261. # Now get the target of the original link
  262. if found is not None:
  263. print("Duplicate key on value: %s : %s (%s <-> %s)!" % (v, type(v), found, e1))
  264. return (None, status.FAIL_RDICTE_AMBIGUOUS)
  265. found = e1
  266. self.cache.setdefault(node, {})[value] = e1
  267. if found is not None:
  268. return (found, status.SUCCESS)
  269. else:
  270. return (None, status.FAIL_RDICTE_NOT_FOUND)
  271. def read_dict_node(self, node, value_node):
  272. e, s = self.read_dict_node_edge(node, value_node)
  273. if s != status.SUCCESS:
  274. return (None, {status.FAIL_RDICTNE_UNKNOWN: status.FAIL_RDICTN_UNKNOWN,
  275. status.FAIL_RDICTNE_UNCERTAIN: status.FAIL_RDICTN_UNCERTAIN,
  276. status.FAIL_RDICTNE_AMBIGUOUS: status.FAIL_RDICTN_AMBIGUOUS,
  277. status.FAIL_RDICTNE_OOB: status.FAIL_RDICTN_OOB,
  278. status.FAIL_RDICTNE_NOT_FOUND: status.FAIL_RDICTN_NOT_FOUND}[s])
  279. return (self.edges[e][1], status.SUCCESS)
  280. def read_dict_node_edge(self, node, value_node):
  281. if node not in self.nodes and node not in self.edges:
  282. return (None, status.FAIL_RDICTNE_UNKNOWN)
  283. # Get all outgoing links
  284. found = None
  285. for e1 in self.outgoing.get(node, set()):
  286. data_links = self.outgoing.get(e1, set())
  287. # For each link, we read the links that might link to a data value
  288. for e2 in data_links:
  289. # Now read out the target of the link
  290. target = self.edges[e2][1]
  291. # And access its value
  292. if target == value_node:
  293. # Found a match
  294. # Now get the target of the original link
  295. if found is not None:
  296. print("Duplicate key on node: %s (%s <-> %s)!" % (value_node, found, e1))
  297. return (None, status.FAIL_RDICTNE_AMBIGUOUS)
  298. found = e1
  299. if found is not None:
  300. return (found, status.SUCCESS)
  301. else:
  302. return (None, status.FAIL_RDICTNE_NOT_FOUND)
  303. def read_reverse_dict(self, node, value):
  304. if node not in self.nodes and node not in self.edges:
  305. return (None, status.FAIL_RRDICT_UNKNOWN)
  306. elif not self.is_valid_datavalue(value):
  307. return (None, status.FAIL_RRDICT_OOB)
  308. # Get all outgoing links
  309. matches = []
  310. for e1 in self.incoming.get(node, set()):
  311. data_links = self.outgoing.get(e1, set())
  312. # For each link, we read the links that might link to a data value
  313. for e2 in data_links:
  314. # Now read out the target of the link
  315. target = self.edges[e2][1]
  316. # And access its value
  317. v = self.values.get(target, None)
  318. if v == value:
  319. # Found a match
  320. if len(data_links) > 1:
  321. return (None, status.FAIL_RRDICT_UNCERTAIN)
  322. else:
  323. matches.append(e1)
  324. if len(matches) == 0:
  325. return (None, status.FAIL_RRDICT_NOT_FOUND)
  326. else:
  327. return ([self.edges[e][0] for e in matches], status.SUCCESS)
  328. def delete_node(self, node):
  329. if node == self.root:
  330. return (None, status.FAIL_DN_UNKNOWN)
  331. if node not in self.nodes:
  332. return (None, status.FAIL_DN_UNKNOWN)
  333. self.nodes.remove(node)
  334. if node in self.cache:
  335. del self.cache[node]
  336. if node in self.values:
  337. del self.values[node]
  338. s = set()
  339. for e in self.outgoing[node]:
  340. s.add(e)
  341. for e in self.incoming[node]:
  342. s.add(e)
  343. for e in s:
  344. self.delete_edge(e)
  345. del self.outgoing[node]
  346. del self.incoming[node]
  347. return (None, status.SUCCESS)
  348. def delete_edge(self, edge):
  349. if edge not in self.edges:
  350. return (None, status.FAIL_DE_UNKNOWN)
  351. s, t = self.edges[edge]
  352. self.incoming[t].remove(edge)
  353. self.outgoing[s].remove(edge)
  354. del self.edges[edge]
  355. s = set()
  356. for e in self.outgoing[edge]:
  357. s.add(e)
  358. for e in self.incoming[edge]:
  359. s.add(e)
  360. for e in s:
  361. self.delete_edge(e)
  362. del self.outgoing[edge]
  363. del self.incoming[edge]
  364. if (self.GC) and (not self.incoming[t]) and (t not in self.edges):
  365. # Remove this node as well
  366. # Edges aren't deleted like this, as they might have a reachable target and source!
  367. # If they haven't, they will be removed because the source was removed.
  368. self.to_delete.add(t)
  369. return (None, status.SUCCESS)
  370. def garbage_collect(self):
  371. while self.to_delete:
  372. t = self.to_delete.pop()
  373. if not self.incoming[t]:
  374. self.delete_node(t)
  375. def purge(self):
  376. self.garbage_collect()
  377. values = set(self.nodes) | set(self.edges)
  378. visit_list = [self.root]
  379. while visit_list:
  380. elem = visit_list.pop()
  381. if elem in values:
  382. # Remove it from the leftover values
  383. values.remove(elem)
  384. if elem in self.edges:
  385. visit_list.extend(self.edges[elem])
  386. visit_list.extend(self.outgoing[elem])
  387. visit_list.extend(self.incoming[elem])
  388. # All remaining elements are to be purged
  389. if len(values) > 0:
  390. while values:
  391. v = values.pop()
  392. if v in self.nodes:
  393. self.delete_node(v)
  394. #print("Remaining nodes: " + str(len(self.nodes)))