Browse Source

Added an initial RDF implementation using rdflib

Yentl Van Tendeloo 8 years ago
parent
commit
22fd8edfa4
1 changed files with 346 additions and 0 deletions
  1. 346 0
      state/modelverse_state/rdf.py

+ 346 - 0
state/modelverse_state/rdf.py

@@ -0,0 +1,346 @@
+from modelverse_state import status
+import sys
+from collections import defaultdict
+import os
+import rdflib
+
+import cPickle as pickle
+
+# Work around Python 2 where a 'big integer' automatically becomes a long
+if sys.version > '3': # pragma: no cover
+    integer_types = (int,)
+    primitive_types = (int, float, str, bool)
+else: # pragma: no cover
+    integer_types = (int, long)
+    primitive_types = (int, long, float, str, bool, unicode)
+complex_primitives = frozenset(["if", "while", "assign", "call", "break", "continue", "return","resolve","access", "constant", "input", "output", "declare", "global"])
+
+def instance_to_string(value):
+    return value["value"]
+
+def string_to_instance(value):
+    return {'value': value}
+
+class ModelverseState(object):
+    def __init__(self, bootfile = None):
+        self.graph = rdflib.Graph()
+        self.parse(bootfile)
+        self.root = 0
+
+        self.GC = True
+        self.to_delete = set()
+
+    def parse(self, filename):
+        triplestore = filename + ".n3"
+        try:
+            if os.path.getmtime(triplestore) > os.path.getmtime(filename):
+                self.graph.parse(filename, format="n3")
+            else:
+                raise Exception("Invalid triplestore")
+        except Exception as e:
+            # We have to parse the file and create the pickle
+            symbols = {}
+
+            def resolve(symb):
+                try:
+                    return int(symb)
+                except:
+                    if symb[0] == "?":
+                        derefs = symb[1:].split("/")
+                        v, _ = self.read_dict(symbols["root"], "__hierarchy")
+                        for deref in derefs:
+                            v, _ = self.read_dict(v, deref)
+                        return v
+                    else:
+                        return symbols[symb]
+
+            with open(filename, 'r') as f:
+                for line in f:
+                    element_type, constructor = line.split(None, 1)
+                    name, values = constructor.split("(", 1)
+                    name = name.split()[0]
+                    values, _ = values.rsplit(")", 1)
+
+                    if element_type == "Node":
+                        if values == "":
+                            symbols[name], status = self.create_node()
+                        else:
+                            value = values
+                            if value in complex_primitives:
+                                value = string_to_instance(value)
+                            else:
+                                value = eval(value)
+                            symbols[name], status = self.create_nodevalue(value)
+                    elif element_type == "Edge":
+                        values = [v.split()[0] for v in values.split(",")]
+                        symbols[name], status = self.create_edge(resolve(values[0]), resolve(values[1]))
+                    else:
+                        raise Exception("Unknown element type: %s" % element_type)
+
+                    if status != 100:
+                        raise Exception("Failed to process line for reason %s: %s" % (status, line))
+
+            # Creation successful, now also create a pickle
+            self.graph.serialize(triplestore, format="n3")
+            #TODO this loses information about the root!
+            return symbols["root"]
+
+    def read_root(self):
+        return (self.root, status.SUCCESS)
+
+    def create_node(self):
+        return (rdflib.BNode(), status.SUCCESS)
+
+    def create_edge(self, source, target):
+        if not isinstance(source, rdflib.BNode):
+            return (None, status.FAIL_CE_SOURCE)
+        if not isinstance(target, rdflib.BNode):
+            return (None, status.FAIL_CE_TARGET)
+        edge = rdflib.BNode()
+        self.graph.add((edge, "hasSource", source))
+        self.graph.add((edge, "hasTarget", target))
+        return (edge, status.SUCCESS)
+        
+    def is_valid_datavalue(self, value):
+        if isinstance(value, dict):
+            if "value" in value and value["value"] in complex_primitives:
+                return True
+            else:
+                return False
+        elif not isinstance(value, primitive_types):
+            return False
+        elif isinstance(value, integer_types) and not (-2**63 <= value <= 2**64 - 1):
+            return False
+        return True
+
+    def create_nodevalue(self, value):
+        if not self.is_valid_datavalue(value):
+            return (None, status.FAIL_CNV_OOB)
+        node = rdflib.BNode()
+        self.graph.add((node, "hasValue", rdflib.Literal(value)))
+        return (node, status.SUCCESS)
+
+    def create_dict(self, source, data, destination):
+        if not isinstance(source, rdflib.BNode):
+            return (None, status.FAIL_CDICT_SOURCE)
+        if not isinstance(target, rdflib.BNode):
+            return (None, status.FAIL_CDICT_TARGET)
+        if not self.is_valid_datavalue(data):
+            return (None, status.FAIL_CDICT_OOB)
+
+        n = self.create_nodevalue(data)[0]
+        e = self.create_edge(source, destination)[0]
+        self.create_edge(e, n)
+        return (None, status.SUCCESS)
+
+    def read_value(self, node):
+        if not isinstance(node, rdflib.BNode):
+            return (None, status.FAIL_RV_UNKNOWN)
+        result = self.graph.query(
+            """
+            SELECT ?value
+            WHERE {
+                %s "hasValue" ?value.
+            }
+            """ % node
+        if len(result) == 0:
+            return (None, status.FAIL_RV_NO_VALUE)
+        return (result[0], status.SUCCESS)
+
+    def read_outgoing(self, elem):
+        if not isinstance(elem, rdflib.BNode):
+            return (None, status.FAIL_RO_UNKNOWN)
+        result = self.graph.query(
+            """
+            SELECT ?link
+            WHERE {
+                %s "hasTarget" ?link.
+            }
+            """ % elem
+        return (list(result), status.SUCCESS)
+
+    def read_incoming(self, elem):
+        if not isinstance(elem, rdflib.BNode):
+            return (None, status.FAIL_RI_UNKNOWN)
+        result = self.graph.query(
+            """
+            SELECT ?link
+            WHERE {
+                %s "hasSource" ?link.
+            }
+            """ % elem
+        return (list(result), status.SUCCESS)
+
+    def read_edge(self, edge):
+        result = self.graph.query(
+            """
+            SELECT ?source, ?target
+            WHERE {
+                %s "hasSource" ?source;
+                   "hasTarget" ?target.
+            }
+            """ % edge
+        if len(result) == 0:
+            return ([None, None], status.FAIL_RE_UNKNOWN)
+        else:
+            return (list(result), status.SUCCESS)
+
+    def read_dict(self, node, value):
+        if not isinstance(node, rdflib.BNode):
+            return (None, status.FAIL_RDICT_UNKNWON)
+        if not self.is_valid_datavalue(value):
+            return (None, status.FAIL_RDICT_OOB)
+        result = self.graph.query(
+            """
+            SELECT ?value
+            WHERE {
+                ?main_edge "hasSource" %s;
+                           "hasTarget" ?value_node.
+                ?attr_edge "hasSource" ?main_edge;
+                           "hasTarget" ?attr_node.
+                ?attr_node "hasValue" %s.
+                ?value_node "hasValue" ?value.
+            }
+            """ % (node, value)
+        if len(result) == 0:
+            return (None, status.FAIL_RDICT_NOT_FOUND)
+        return (result[0], status.SUCCESS)
+
+    def read_dict_keys(self, node):
+        if not isinstance(node, rdflib.BNode):
+            return (None, status.FAIL_RDICT_UNKNWON)
+
+        result = self.graph.query(
+            """
+            SELECT ?key
+            WHERE {
+                ?main_edge "hasSource" %s.
+                ?attr_edge "hasSource" ?main_edge;
+                           "hasTarget" ?key.
+            }
+            """
+        return (list(result), status.SUCCESS)
+
+    def read_dict_edge(self, node, value):
+        if not isinstance(node, rdflib.BNode):
+            return (None, status.FAIL_RDICTE_UNKNOWN)
+        if not self.is_valid_datavalue(value):
+            return (None, status.FAIL_RDICTE_OOB)
+        result = self.graph.query(
+            """
+            SELECT ?main_edge
+            WHERE {
+                ?main_edge "hasSource" %s;
+                           "hasTarget" ?value_node.
+                ?attr_edge "hasSource" ?main_edge;
+                           "hasTarget" ?attr_node.
+                ?attr_node "hasValue" %s.
+            }
+            """ % (node, value)
+        if len(result) == 0:
+            return (None, status.FAIL_RDICTE_NOT_FOUND)
+        return (result[0], status.SUCCESS)
+
+    def read_dict_node(self, node, value_node):
+        if not isinstance(node, rdflib.BNode):
+            return (None, status.FAIL_RDICTN_UNKNOWN)
+        result = self.graph.query(
+            """
+            SELECT ?value
+            WHERE {
+                ?main_edge "hasSource" %s;
+                           "hasTarget" ?value_node.
+                ?attr_edge "hasSource" ?main_edge;
+                           "hasTarget" %s.
+                ?value_node "hasValue" ?value.
+            }
+            """ % (node, value)
+        if len(result) == 0:
+            return (None, status.FAIL_RDICTN_NOT_FOUND)
+        return (result[0], status.SUCCESS)
+
+    def read_dict_node_edge(self, node, value_node):
+        if not isinstance(node, rdflib.BNode):
+            return (None, status.FAIL_RDICTNE_UNKNOWN)
+        result = self.graph.query(
+            """
+            SELECT ?main_edge
+            WHERE {
+                ?main_edge "hasSource" %s.
+                ?attr_edge "hasSource" ?main_edge;
+                           "hasTarget" %s.
+            }
+            """ % (node, value)
+        if len(result) == 0:
+            return (None, status.FAIL_RDICTNE_NOT_FOUND)
+        return (result[0], status.SUCCESS)
+
+    def read_reverse_dict(self, node, value):
+        if not isinstance(node, rdflib.BNode):
+            return (None, status.FAIL_RRDICT_UNKNOWN)
+        if not self.is_valid_datavalue(value):
+            return (None, status.FAIL_RRDICT_OOB)
+        result = self.graph.query(
+            """
+            SELECT ?main_edge
+            WHERE {
+                ?main_edge "hasTarget" %s.
+                ?attr_edge "hasSource" ?main_edge;
+                           "hasTarget" ?value_node.
+                ?value_node "hasValue" %s.
+            }
+            """ % (node, value)
+
+        return (list(result), status.SUCCESS)
+
+    def delete_node(self, node):
+        if node == self.root:
+            return (None, status.FAIL_DN_UNKNOWN)
+        if not isinstance(node, rdflib.BNode):
+            return (None, status.FAIL_DN_UNKNOWN)
+        # Remove its value if it exists
+        self.graph.remove((node, None, None))
+
+        # Get all edges connecting this
+        result = self.graph.query(
+            """
+            SELECT ?edge
+            WHERE {
+                { ?edge "hasTarget" %s. }
+                UNION
+                { ?edge "hasSource" %s. }
+            }
+            """
+        # ... and remove them
+        for e in result:
+            self.delete_edge(e)
+
+        return (None, status.SUCCESS)
+
+    def delete_edge(self, edge):
+        if not isinstance(node, rdflib.BNode):
+            return (None, status.FAIL_DN_UNKNOWN)
+        # Remove its links
+        self.graph.remove((node, None, None))
+
+        # Get all edges connecting this
+        result = self.graph.query(
+            """
+            SELECT ?edge
+            WHERE {
+                { ?edge "hasTarget" %s. }
+                UNION
+                { ?edge "hasSource" %s. }
+            }
+            """
+        # ... and remove them
+        for e in result:
+            self.delete_edge(e)
+
+        return (None, status.SUCCESS)
+
+    def garbage_collect(self):  
+        pass
+
+    def purge(self):
+        pass