rdf.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. from modelverse_state import status
  2. import sys
  3. from collections import defaultdict
  4. import os
  5. import rdflib
  6. import json
  7. import cPickle as pickle
  8. # Work around Python 2 where a 'big integer' automatically becomes a long
  9. if sys.version > '3': # pragma: no cover
  10. integer_types = (int,)
  11. primitive_types = (int, float, str, bool)
  12. else: # pragma: no cover
  13. integer_types = (int, long)
  14. primitive_types = (int, long, float, str, bool, unicode)
  15. complex_primitives = frozenset(["if", "while", "assign", "call", "break", "continue", "return","resolve","access", "constant", "input", "output", "declare", "global"])
  16. def instance_to_string(value):
  17. return value["value"]
  18. def string_to_instance(value):
  19. return {'value': value}
  20. class ModelverseState(object):
  21. def __init__(self, bootfile = None):
  22. self.graph = rdflib.Graph()
  23. self.mv = rdflib.Namespace("http://modelverse.mv/#")
  24. self.graph.bind("MV", self.mv)
  25. self.parse(bootfile)
  26. self.root = 0
  27. self.GC = True
  28. self.to_delete = set()
  29. def parse(self, filename):
  30. triplestore = filename + ".nt"
  31. try:
  32. if os.path.getmtime(triplestore) > os.path.getmtime(filename):
  33. self.graph.parse(triplestore, format="nt")
  34. else:
  35. raise Exception("Invalid triplestore")
  36. except Exception as e:
  37. # We have to parse the file and create the pickle
  38. symbols = {}
  39. def resolve(symb):
  40. try:
  41. return int(symb)
  42. except:
  43. if symb[0] == "?":
  44. derefs = symb[1:].split("/")
  45. v, _ = self.read_dict(symbols["root"], "__hierarchy")
  46. for deref in derefs:
  47. v, _ = self.read_dict(v, deref)
  48. return v
  49. else:
  50. return symbols[symb]
  51. with open(filename, 'r') as f:
  52. for line in f:
  53. element_type, constructor = line.split(None, 1)
  54. name, values = constructor.split("(", 1)
  55. name = name.split()[0]
  56. values, _ = values.rsplit(")", 1)
  57. if element_type == "Node":
  58. if values == "":
  59. symbols[name], status = self.create_node()
  60. else:
  61. value = values
  62. if value in complex_primitives:
  63. value = string_to_instance(value)
  64. else:
  65. value = eval(value)
  66. symbols[name], status = self.create_nodevalue(value)
  67. elif element_type == "Edge":
  68. values = [v.split()[0] for v in values.split(",")]
  69. symbols[name], status = self.create_edge(resolve(values[0]), resolve(values[1]))
  70. else:
  71. raise Exception("Unknown element type: %s" % element_type)
  72. if status != 100:
  73. raise Exception("Failed to process line for reason %s: %s" % (status, line))
  74. # Creation successful, now also create a pickle
  75. self.graph.serialize(triplestore, format="nt")
  76. #TODO this loses information about the root!
  77. return symbols["root"]
  78. def read_root(self):
  79. return (self.root, status.SUCCESS)
  80. def create_node(self):
  81. return (rdflib.BNode(), status.SUCCESS)
  82. def create_edge(self, source, target):
  83. if not isinstance(source, rdflib.BNode):
  84. return (None, status.FAIL_CE_SOURCE)
  85. if not isinstance(target, rdflib.BNode):
  86. return (None, status.FAIL_CE_TARGET)
  87. edge = rdflib.BNode()
  88. self.graph.add((edge, self.mv.hasSource, source))
  89. self.graph.add((edge, self.mv.hasTarget, target))
  90. return (edge, status.SUCCESS)
  91. def is_valid_datavalue(self, value):
  92. if isinstance(value, dict):
  93. if "value" in value and value["value"] in complex_primitives:
  94. return True
  95. else:
  96. return False
  97. elif not isinstance(value, primitive_types):
  98. return False
  99. elif isinstance(value, integer_types) and not (-2**63 <= value <= 2**64 - 1):
  100. return False
  101. return True
  102. def create_nodevalue(self, value):
  103. if not self.is_valid_datavalue(value):
  104. return (None, status.FAIL_CNV_OOB)
  105. node = rdflib.BNode()
  106. self.graph.add((node, self.mv.hasValue, rdflib.Literal(json.dumps(value))))
  107. return (node, status.SUCCESS)
  108. def create_dict(self, source, data, destination):
  109. if not isinstance(source, rdflib.BNode):
  110. return (None, status.FAIL_CDICT_SOURCE)
  111. if not isinstance(target, rdflib.BNode):
  112. return (None, status.FAIL_CDICT_TARGET)
  113. if not self.is_valid_datavalue(data):
  114. return (None, status.FAIL_CDICT_OOB)
  115. n = self.create_nodevalue(data)[0]
  116. e = self.create_edge(source, destination)[0]
  117. t = self.create_edge(e, n)
  118. return (None, status.SUCCESS)
  119. def read_value(self, node):
  120. if not isinstance(node, rdflib.BNode):
  121. return (None, status.FAIL_RV_UNKNOWN)
  122. result = self.graph.query(
  123. """
  124. SELECT ?value
  125. WHERE {
  126. _:%s MV:hasValue ?value .
  127. }
  128. """ % node)
  129. if len(result) == 0:
  130. return (None, status.FAIL_RV_NO_VALUE)
  131. return (list(result)[0][0], status.SUCCESS)
  132. def read_outgoing(self, elem):
  133. if not isinstance(elem, rdflib.BNode):
  134. return (None, status.FAIL_RO_UNKNOWN)
  135. result = self.graph.query(
  136. """
  137. SELECT ?link
  138. WHERE {
  139. _:%s MV:hasTarget ?link .
  140. }
  141. """ % elem)
  142. return ([i[0] for i in result], status.SUCCESS)
  143. def read_incoming(self, elem):
  144. if not isinstance(elem, rdflib.BNode):
  145. return (None, status.FAIL_RI_UNKNOWN)
  146. result = self.graph.query(
  147. """
  148. SELECT ?link
  149. WHERE {
  150. _:%s MV:hasSource ?link .
  151. }
  152. """ % elem)
  153. return ([i[0] for i in result], status.SUCCESS)
  154. def read_edge(self, edge):
  155. result = self.graph.query(
  156. """
  157. SELECT ?source, ?target
  158. WHERE {
  159. _:%s MV:hasSource ?source ;
  160. MV:hasTarget ?target .
  161. }
  162. """ % edge)
  163. if len(result) == 0:
  164. return ([None, None], status.FAIL_RE_UNKNOWN)
  165. else:
  166. return ([list(result)[0][0], list(result)[0][1]], status.SUCCESS)
  167. def read_dict(self, node, value):
  168. if not isinstance(node, rdflib.BNode):
  169. return (None, status.FAIL_RDICT_UNKNOWN)
  170. if not self.is_valid_datavalue(value):
  171. return (None, status.FAIL_RDICT_OOB)
  172. q = """
  173. SELECT ?value_node
  174. WHERE {
  175. ?main_edge MV:hasSource _:%s ;
  176. MV:hasTarget ?value_node .
  177. ?attr_edge MV:hasSource ?main_edge ;
  178. MV:hasTarget ?attr_node .
  179. ?attr_node MV:hasValue '%s' .
  180. }
  181. """ % (node, json.dumps(value))
  182. result = self.graph.query(q)
  183. if len(result) == 0:
  184. return (None, status.FAIL_RDICT_NOT_FOUND)
  185. return (list(result)[0][0], status.SUCCESS)
  186. def read_dict_keys(self, node):
  187. if not isinstance(node, rdflib.BNode):
  188. return (None, status.FAIL_RDICT_UNKNOWN)
  189. result = self.graph.query(
  190. """
  191. SELECT ?key
  192. WHERE {
  193. ?main_edge MV:hasSource _:%s .
  194. ?attr_edge MV:hasSource ?main_edge ;
  195. MV:hasTarget ?key .
  196. }
  197. """)
  198. return ([i[0] for i in result], status.SUCCESS)
  199. def read_dict_edge(self, node, value):
  200. if not isinstance(node, rdflib.BNode):
  201. return (None, status.FAIL_RDICTE_UNKNOWN)
  202. if not self.is_valid_datavalue(value):
  203. return (None, status.FAIL_RDICTE_OOB)
  204. result = self.graph.query(
  205. """
  206. SELECT ?main_edge
  207. WHERE {
  208. ?main_edge MV:hasSource _:%s ;
  209. MV:hasTarget ?value_node .
  210. ?attr_edge MV:hasSource ?main_edge ;
  211. MV:hasTarget ?attr_node .
  212. ?attr_node MV:hasValue '%s' .
  213. }
  214. """ % (node, value))
  215. if len(result) == 0:
  216. return (None, status.FAIL_RDICTE_NOT_FOUND)
  217. return (list(result)[0][0], status.SUCCESS)
  218. def read_dict_node(self, node, value_node):
  219. if not isinstance(node, rdflib.BNode):
  220. return (None, status.FAIL_RDICTN_UNKNOWN)
  221. result = self.graph.query(
  222. """
  223. SELECT ?value
  224. WHERE {
  225. ?main_edge MV:hasSource _:%s ;
  226. MV:hasTarget ?value_node .
  227. ?attr_edge MV:hasSource ?main_edge ;
  228. MV:hasTarget _:%s .
  229. ?value_node MV:hasValue ?value .
  230. }
  231. """ % (node, value))
  232. if len(result) == 0:
  233. return (None, status.FAIL_RDICTN_NOT_FOUND)
  234. return (list(result)[0][0], status.SUCCESS)
  235. def read_dict_node_edge(self, node, value_node):
  236. if not isinstance(node, rdflib.BNode):
  237. return (None, status.FAIL_RDICTNE_UNKNOWN)
  238. result = self.graph.query(
  239. """
  240. SELECT ?main_edge
  241. WHERE {
  242. ?main_edge MV:hasSource _:%s .
  243. ?attr_edge MV:hasSource ?main_edge ;
  244. MV:hasTarget _:%s .
  245. }
  246. """ % (node, value))
  247. if len(result) == 0:
  248. return (None, status.FAIL_RDICTNE_NOT_FOUND)
  249. return (list(result)[0][0], status.SUCCESS)
  250. def read_reverse_dict(self, node, value):
  251. if not isinstance(node, rdflib.BNode):
  252. return (None, status.FAIL_RRDICT_UNKNOWN)
  253. if not self.is_valid_datavalue(value):
  254. return (None, status.FAIL_RRDICT_OOB)
  255. result = self.graph.query(
  256. """
  257. SELECT ?main_edge
  258. WHERE {
  259. ?main_edge MV:hasTarget _:%s .
  260. ?attr_edge MV:hasSource ?main_edge ;
  261. MV:hasTarget ?value_node .
  262. ?value_node MV:hasValue '%s' .
  263. }
  264. """ % (node, value))
  265. return ([i[0] for i in result], status.SUCCESS)
  266. def delete_node(self, node):
  267. if node == self.root:
  268. return (None, status.FAIL_DN_UNKNOWN)
  269. if not isinstance(node, rdflib.BNode):
  270. return (None, status.FAIL_DN_UNKNOWN)
  271. # Remove its value if it exists
  272. self.graph.remove((node, None, None))
  273. # Get all edges connecting this
  274. result = self.graph.query(
  275. """
  276. SELECT ?edge
  277. WHERE {
  278. { ?edge MV:hasTarget _:%s . }
  279. UNION
  280. { ?edge MV:hasSource _:%s . }
  281. }
  282. """)
  283. # ... and remove them
  284. for e in result:
  285. self.delete_edge(e[0])
  286. return (None, status.SUCCESS)
  287. def delete_edge(self, edge):
  288. if not isinstance(node, rdflib.BNode):
  289. return (None, status.FAIL_DN_UNKNOWN)
  290. # Remove its links
  291. self.graph.remove((node, None, None))
  292. # Get all edges connecting this
  293. result = self.graph.query(
  294. """
  295. SELECT ?edge
  296. WHERE {
  297. { ?edge MV:hasTarget _:%s . }
  298. UNION
  299. { ?edge MV:hasSource _:%s . }
  300. }
  301. """)
  302. # ... and remove them
  303. for e in result:
  304. self.delete_edge(e[0])
  305. return (None, status.SUCCESS)
  306. def garbage_collect(self):
  307. pass
  308. def purge(self):
  309. pass