main.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. from modelverse_state import status
  2. import sys
  3. from collections import defaultdict
  4. import os
  5. import gzip
  6. import cPickle as pickle
  7. # Work around Python 2 where a 'big integer' automatically becomes a long
  8. if sys.version > '3': # pragma: no cover
  9. integer_types = (int,)
  10. primitive_types = (int, float, str, bool)
  11. else: # pragma: no cover
  12. integer_types = (int, long)
  13. primitive_types = (int, long, float, str, bool, unicode)
  14. complex_primitives = frozenset(["if", "while", "assign", "call", "break", "continue", "return","resolve","access", "constant", "input", "output", "declare", "global"])
  15. def instance_to_string(value):
  16. return value["value"]
  17. def string_to_instance(value):
  18. return {'value': value}
  19. class ModelverseState(object):
  20. def __init__(self, bootfile = None):
  21. self.free_id = 0
  22. self.edges = {}
  23. self.outgoing = defaultdict(set)
  24. self.incoming = defaultdict(set)
  25. self.values = {}
  26. self.nodes = set()
  27. self.GC = True
  28. self.to_delete = set()
  29. self.cache = {}
  30. if bootfile is not None:
  31. self.root = self.parse(bootfile)
  32. else:
  33. self.root, _ = self.create_node()
  34. def dump_modelverse(self):
  35. with open("/tmp/modelverse.out", "w") as f:
  36. f.write("digraph main {\n")
  37. for n in self.nodes:
  38. if n in self.values:
  39. f.write("a_%s [label=\"a_%s (%s)\"];\n" % (n, n, self.values[n]))
  40. else:
  41. f.write("a_%s [label=\"a_%s\"];\n" % (n, n))
  42. for i, e in self.edges.iteritems():
  43. f.write("%s -> %s [label=\"%s\"];\n" % (e[0], e[1], i))
  44. f.write("}")
  45. return (self.root, status.SUCCESS)
  46. def parse(self, filename):
  47. picklefile = filename + ".pickle"
  48. try:
  49. if os.path.getmtime(picklefile) > os.path.getmtime(filename):
  50. # Pickle is more recent than grammarfile, so we can use it
  51. self.root, self.free_id, self.nodes, self.edges, self.values = pickle.load(open(picklefile, 'rb'))
  52. for name in self.edges:
  53. source, destination = self.edges[name]
  54. self.outgoing[source].add(name)
  55. self.incoming[destination].add(name)
  56. return self.root
  57. else:
  58. raise Exception("Invalid pickle")
  59. except Exception as e:
  60. # We have to parse the file and create the pickle
  61. symbols = {}
  62. def resolve(symb):
  63. try:
  64. return int(symb)
  65. except:
  66. if symb[0] == "?":
  67. derefs = symb[1:].split("/")
  68. v, _ = self.read_dict(symbols["root"], "__hierarchy")
  69. for deref in derefs:
  70. v, _ = self.read_dict(v, deref)
  71. return v
  72. else:
  73. return symbols[symb]
  74. with gzip.open(filename, 'rb') as f:
  75. for line in f:
  76. element_type, constructor = line.split(None, 1)
  77. name, values = constructor.split("(", 1)
  78. name = name.split()[0]
  79. values, _ = values.rsplit(")", 1)
  80. if element_type == "Node":
  81. if values == "":
  82. symbols[name], status = self.create_node()
  83. else:
  84. value = values
  85. if value in complex_primitives:
  86. value = string_to_instance(value)
  87. else:
  88. value = eval(value)
  89. symbols[name], status = self.create_nodevalue(value)
  90. elif element_type == "Edge":
  91. values = [v.split()[0] for v in values.split(",")]
  92. symbols[name], status = self.create_edge(resolve(values[0]), resolve(values[1]))
  93. else:
  94. raise Exception("Unknown element type: %s" % element_type)
  95. if status != 100:
  96. raise Exception("Failed to process line for reason %s: %s" % (status, line))
  97. # Creation successful, now also create a pickle
  98. with open(picklefile, 'wb') as f:
  99. pickle.dump((symbols["root"], self.free_id, self.nodes, self.edges, self.values), f, pickle.HIGHEST_PROTOCOL)
  100. return symbols["root"]
  101. def read_root(self):
  102. return (self.root, status.SUCCESS)
  103. def create_node(self):
  104. self.nodes.add(self.free_id)
  105. self.free_id += 1
  106. return (self.free_id - 1, status.SUCCESS)
  107. def create_edge(self, source, target):
  108. if source not in self.edges and source not in self.nodes:
  109. return (None, status.FAIL_CE_SOURCE)
  110. elif target not in self.edges and target not in self.nodes:
  111. return (None, status.FAIL_CE_TARGET)
  112. else:
  113. self.outgoing[source].add(self.free_id)
  114. self.incoming[target].add(self.free_id)
  115. self.edges[self.free_id] = (source, target)
  116. self.free_id += 1
  117. return (self.free_id - 1, status.SUCCESS)
  118. def is_valid_datavalue(self, value):
  119. if isinstance(value, dict):
  120. if "value" in value and value["value"] in complex_primitives:
  121. return True
  122. else:
  123. return False
  124. elif not isinstance(value, primitive_types):
  125. return False
  126. elif isinstance(value, integer_types) and not (-2**63 <= value <= 2**64 - 1):
  127. return False
  128. return True
  129. def create_nodevalue(self, value):
  130. if not self.is_valid_datavalue(value):
  131. print("Not correct: " + str(value))
  132. return (None, status.FAIL_CNV_OOB)
  133. self.values[self.free_id] = value
  134. self.nodes.add(self.free_id)
  135. self.free_id += 1
  136. return (self.free_id - 1, status.SUCCESS)
  137. def create_dict(self, source, data, destination):
  138. if source not in self.nodes and source not in self.edges:
  139. return (None, status.FAIL_CDICT_SOURCE)
  140. if destination not in self.nodes and destination not in self.edges:
  141. return (None, status.FAIL_CDICT_TARGET)
  142. if not self.is_valid_datavalue(data):
  143. return (None, status.FAIL_CDICT_OOB)
  144. if (type(data) == float and data == 0.0):
  145. print("Got dictionary with value 0.0")
  146. print(locals())
  147. print(self.values.get(destination, destination))
  148. raise Exception()
  149. n = self.create_nodevalue(data)[0]
  150. e = self.create_edge(source, destination)[0]
  151. self.create_edge(e, n)
  152. self.cache.setdefault(source, {})[data] = e
  153. return (None, status.SUCCESS)
  154. def read_value(self, node):
  155. if node not in self.nodes:
  156. return (None, status.FAIL_RV_UNKNOWN)
  157. v = self.values.get(node, None)
  158. if v is None:
  159. return (None, status.FAIL_RV_NO_VALUE)
  160. else:
  161. return (v, status.SUCCESS)
  162. def read_outgoing(self, elem):
  163. if elem in self.edges or elem in self.nodes:
  164. return (list(self.outgoing[elem]), status.SUCCESS)
  165. else:
  166. return (None, status.FAIL_RO_UNKNOWN)
  167. def read_incoming(self, elem):
  168. if elem in self.edges or elem in self.nodes:
  169. return (list(self.incoming[elem]), status.SUCCESS)
  170. else:
  171. return (None, status.FAIL_RI_UNKNOWN)
  172. def read_edge(self, edge):
  173. v = self.edges.get(edge, None)
  174. if v is None:
  175. return ([None, None], status.FAIL_RE_UNKNOWN)
  176. else:
  177. s, t = v
  178. return ([s, t], status.SUCCESS)
  179. def read_dict(self, node, value):
  180. e, s = self.read_dict_edge(node, value)
  181. if s != status.SUCCESS:
  182. return (None, {status.FAIL_RDICTE_UNKNOWN: status.FAIL_RDICT_UNKNOWN,
  183. status.FAIL_RDICTE_UNCERTAIN: status.FAIL_RDICT_UNCERTAIN,
  184. status.FAIL_RDICTE_OOB: status.FAIL_RDICT_OOB,
  185. status.FAIL_RDICTE_NOT_FOUND: status.FAIL_RDICT_NOT_FOUND,
  186. status.FAIL_RDICTE_AMBIGUOUS: status.FAIL_RDICT_AMBIGUOUS}[s])
  187. return (self.edges[e][1], status.SUCCESS)
  188. def read_dict_keys(self, node):
  189. if node not in self.nodes and node not in self.edges:
  190. return (None, status.FAIL_RDICTKEYS_UNKNOWN)
  191. result = []
  192. for e1 in self.outgoing.get(node, set()):
  193. data_links = self.outgoing.get(e1, set())
  194. for e2 in data_links:
  195. result.append(self.edges[e2][1])
  196. return (result, status.SUCCESS)
  197. def read_dict_edge(self, node, value):
  198. try:
  199. first = self.cache[node][str(value)]
  200. # Got hit, so validate
  201. if (self.edges[first][0] == node) and \
  202. (len(self.outgoing[first]) == 1) and \
  203. (self.values[self.edges[list(self.outgoing[first])[0]][1]] == value):
  204. return (first, status.SUCCESS)
  205. del self.cache[node][value]
  206. except KeyError:
  207. # Didn't exist
  208. pass
  209. if node not in self.nodes and node not in self.edges:
  210. return (None, status.FAIL_RDICTE_UNKNOWN)
  211. if not self.is_valid_datavalue(value):
  212. return (None, status.FAIL_RDICTE_OOB)
  213. # Get all outgoing links
  214. found = None
  215. for e1 in self.outgoing.get(node, set()):
  216. data_links = self.outgoing.get(e1, set())
  217. # For each link, we read the links that might link to a data value
  218. for e2 in data_links:
  219. # Now read out the target of the link
  220. target = self.edges[e2][1]
  221. # And access its value
  222. v = self.values.get(target, None)
  223. if v == value:
  224. # Found a match
  225. # Now get the target of the original link
  226. if found is not None:
  227. print("Duplicate key on value: %s : %s (%s <-> %s)!" % (v, type(v), found, e1))
  228. return (None, status.FAIL_RDICTE_AMBIGUOUS)
  229. found = e1
  230. self.cache.setdefault(node, {})[value] = e1
  231. if found is not None:
  232. return (found, status.SUCCESS)
  233. else:
  234. return (None, status.FAIL_RDICTE_NOT_FOUND)
  235. def read_dict_node(self, node, value_node):
  236. e, s = self.read_dict_node_edge(node, value_node)
  237. if s != status.SUCCESS:
  238. return (None, {status.FAIL_RDICTNE_UNKNOWN: status.FAIL_RDICTN_UNKNOWN,
  239. status.FAIL_RDICTNE_UNCERTAIN: status.FAIL_RDICTN_UNCERTAIN,
  240. status.FAIL_RDICTNE_AMBIGUOUS: status.FAIL_RDICTN_AMBIGUOUS,
  241. status.FAIL_RDICTNE_OOB: status.FAIL_RDICTN_OOB,
  242. status.FAIL_RDICTNE_NOT_FOUND: status.FAIL_RDICTN_NOT_FOUND}[s])
  243. return (self.edges[e][1], status.SUCCESS)
  244. def read_dict_node_edge(self, node, value_node):
  245. if node not in self.nodes and node not in self.edges:
  246. return (None, status.FAIL_RDICTNE_UNKNOWN)
  247. # Get all outgoing links
  248. found = None
  249. for e1 in self.outgoing.get(node, set()):
  250. data_links = self.outgoing.get(e1, set())
  251. # For each link, we read the links that might link to a data value
  252. for e2 in data_links:
  253. # Now read out the target of the link
  254. target = self.edges[e2][1]
  255. # And access its value
  256. if target == value_node:
  257. # Found a match
  258. # Now get the target of the original link
  259. if found is not None:
  260. print("Duplicate key on node: %s (%s <-> %s)!" % (value_node, found, e1))
  261. return (None, status.FAIL_RDICTNE_AMBIGUOUS)
  262. found = e1
  263. if found is not None:
  264. return (found, status.SUCCESS)
  265. else:
  266. return (None, status.FAIL_RDICTNE_NOT_FOUND)
  267. def read_reverse_dict(self, node, value):
  268. if node not in self.nodes and node not in self.edges:
  269. return (None, status.FAIL_RRDICT_UNKNOWN)
  270. elif not self.is_valid_datavalue(value):
  271. return (None, status.FAIL_RRDICT_OOB)
  272. # Get all outgoing links
  273. matches = []
  274. for e1 in self.incoming.get(node, set()):
  275. data_links = self.outgoing.get(e1, set())
  276. # For each link, we read the links that might link to a data value
  277. for e2 in data_links:
  278. # Now read out the target of the link
  279. target = self.edges[e2][1]
  280. # And access its value
  281. v = self.values.get(target, None)
  282. if v == value:
  283. # Found a match
  284. if len(data_links) > 1:
  285. return (None, status.FAIL_RRDICT_UNCERTAIN)
  286. else:
  287. matches.append(e1)
  288. if len(matches) == 0:
  289. return (None, status.FAIL_RRDICT_NOT_FOUND)
  290. else:
  291. return ([self.edges[e][0] for e in matches], status.SUCCESS)
  292. def delete_node(self, node):
  293. if node == self.root:
  294. return (None, status.FAIL_DN_UNKNOWN)
  295. if node not in self.nodes:
  296. return (None, status.FAIL_DN_UNKNOWN)
  297. self.nodes.remove(node)
  298. if node in self.cache:
  299. del self.cache[node]
  300. if node in self.values:
  301. del self.values[node]
  302. s = set()
  303. for e in self.outgoing[node]:
  304. s.add(e)
  305. for e in self.incoming[node]:
  306. s.add(e)
  307. for e in s:
  308. self.delete_edge(e)
  309. del self.outgoing[node]
  310. del self.incoming[node]
  311. return (None, status.SUCCESS)
  312. def delete_edge(self, edge):
  313. if edge not in self.edges:
  314. return (None, status.FAIL_DE_UNKNOWN)
  315. s, t = self.edges[edge]
  316. self.incoming[t].remove(edge)
  317. self.outgoing[s].remove(edge)
  318. del self.edges[edge]
  319. s = set()
  320. for e in self.outgoing[edge]:
  321. s.add(e)
  322. for e in self.incoming[edge]:
  323. s.add(e)
  324. for e in s:
  325. self.delete_edge(e)
  326. del self.outgoing[edge]
  327. del self.incoming[edge]
  328. if (self.GC) and (not self.incoming[t]) and (t not in self.edges):
  329. # Remove this node as well
  330. # Edges aren't deleted like this, as they might have a reachable target and source!
  331. # If they haven't, they will be removed because the source was removed.
  332. self.to_delete.add(t)
  333. return (None, status.SUCCESS)
  334. def garbage_collect(self):
  335. while self.to_delete:
  336. t = self.to_delete.pop()
  337. if not self.incoming[t]:
  338. self.delete_node(t)
  339. def purge(self):
  340. self.garbage_collect()
  341. values = set(self.nodes) | set(self.edges)
  342. visit_list = [self.root]
  343. while visit_list:
  344. elem = visit_list.pop()
  345. if elem in values:
  346. # Remove it from the leftover values
  347. values.remove(elem)
  348. if elem in self.edges:
  349. visit_list.extend(self.edges[elem])
  350. visit_list.extend(self.outgoing[elem])
  351. visit_list.extend(self.incoming[elem])
  352. # All remaining elements are to be purged
  353. if len(values) > 0:
  354. while values:
  355. v = values.pop()
  356. if v in self.nodes:
  357. self.delete_node(v)
  358. print("Remaining nodes: " + str(len(self.nodes)))