Forráskód Böngészése

Add endpoint example

Arkadiusz Ryś 2 éve
szülő
commit
5425244d1d
3 módosított fájl, 251 hozzáadás és 0 törlés
  1. 0 0
      spendpoint/__init__.py
  2. 105 0
      spendpoint/__main__.py
  3. 146 0
      spendpoint/wrapper.py

+ 0 - 0
spendpoint/__init__.py


+ 105 - 0
spendpoint/__main__.py

@@ -0,0 +1,105 @@
+import rdflib
+from rdflib import RDF, RDFS, ConjunctiveGraph, Literal, URIRef
+from rdflib.plugins.sparql.evalutils import _eval
+
+from rdflib_endpoint import SparqlEndpoint
+
+
+def custom_concat(query_results, ctx, part, eval_part):
+    """
+    Concat 2 string and return the length as additional Length variable
+    \f
+    :param query_results:   An array with the query results objects
+    :param ctx:             <class 'rdflib.plugins.sparql.sparql.QueryContext'>
+    :param part:            Part of the query processed (e.g. Extend or BGP) <class 'rdflib.plugins.sparql.parserutils.CompValue'>
+    :param eval_part:       Part currently evaluated
+    :return:                the same query_results provided in input param, with additional results
+    """
+    argument1 = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
+    argument2 = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
+    evaluation = []
+    scores = []
+    concat_string = argument1 + argument2
+    reverse_string = argument2 + argument1
+    # Append the concatenated string to the results
+    evaluation.append(concat_string)
+    evaluation.append(reverse_string)
+    # Append the scores for each row of results
+    scores.append(len(concat_string))
+    scores.append(len(reverse_string))
+    # Append our results to the query_results
+    for i, result in enumerate(evaluation):
+        query_results.append(
+            eval_part.merge({part.var: Literal(result), rdflib.term.Variable(part.var + "Length"): Literal(scores[i])})
+        )
+    return query_results, ctx, part, eval_part
+
+
+def most_similar(query_results, ctx, part, eval_part):
+    """
+    Get most similar entities for a given entity
+
+    PREFIX openpredict: <https://w3id.org/um/openpredict/>
+    SELECT ?drugOrDisease ?mostSimilar ?mostSimilarScore WHERE {
+        BIND("OMIM:246300" AS ?drugOrDisease)
+        BIND(openpredict:most_similar(?drugOrDisease) AS ?mostSimilar)
+    """
+    # argumentEntity = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
+    # try:
+    #     argumentLimit = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
+    # except:
+    #     argumentLimit = None
+
+    # Using stub data
+    similarity_results = [{"mostSimilar": "DRUGBANK:DB00001", "score": 0.42}]
+
+    evaluation = []
+    scores = []
+    for most_similar in similarity_results:
+        evaluation.append(most_similar["mostSimilar"])
+        scores.append(most_similar["score"])
+
+    # Append our results to the query_results
+    for i, result in enumerate(evaluation):
+        query_results.append(
+            eval_part.merge({part.var: Literal(result), rdflib.term.Variable(part.var + "Score"): Literal(scores[i])})
+        )
+    return query_results, ctx, part, eval_part
+
+
+example_query = """PREFIX myfunctions: <https://w3id.org/um/sparql-functions/>
+SELECT ?concat ?concatLength WHERE {
+    BIND("First" AS ?first)
+    BIND(myfunctions:custom_concat(?first, "last") AS ?concat)
+}"""
+
+# Use ConjunctiveGraph to support nquads and graphs in SPARQL queries
+# identifier is the default graph
+g = ConjunctiveGraph(
+    # store="Oxigraph",
+    identifier=URIRef("https://w3id.org/um/sparql-functions/graph/default"),
+)
+
+# Example to add a nquad to the exposed graph
+g.add((URIRef("http://subject"), RDF.type, URIRef("http://object"), URIRef("http://graph")))
+g.add((URIRef("http://subject"), RDFS.label, Literal("foo"), URIRef("http://graph")))
+
+# Start the SPARQL endpoint based on the RDFLib Graph
+app = SparqlEndpoint(
+    graph=g,
+    functions={
+        "https://w3id.org/um/openpredict/most_similar": most_similar,
+        "https://w3id.org/um/sparql-functions/custom_concat": custom_concat,
+    },
+    title="SPARQL endpoint for RDFLib graph",
+    description="A SPARQL endpoint to serve machine learning models, or any other logic implemented in Python. \n[Source code](https://github.com/vemonet/rdflib-endpoint)",
+    version="0.1.0",
+    public_url="https://service.openpredict.137.120.31.102.nip.io/sparql",
+    cors_enabled=True,
+    example_query=example_query,
+)
+
+## Uncomment to run it directly with python app/main.py
+# if __name__ == "__main__":
+#     import uvicorn
+#     uvicorn.run(app, host="0.0.0.0", port=8000)

+ 146 - 0
spendpoint/wrapper.py

@@ -0,0 +1,146 @@
+"""
+This example shows how a custom evaluation function can be added to
+handle certain SPARQL Algebra elements.
+
+A custom function is added that adds ``rdfs:subClassOf`` "inference" when
+asking for ``rdf:type`` triples.
+
+Here the custom eval function is added manually, normally you would use
+setuptools and entry_points to do it:
+i.e. in your setup.py::
+
+    entry_points = {
+        'rdf.plugins.sparqleval': [
+            'myfunc =     mypackage:MyFunction',
+            ],
+    }
+"""
+
+# EvalBGP https://rdflib.readthedocs.io/en/stable/_modules/rdflib/plugins/sparql/evaluate.html
+# Custom fct for rdf:type with auto infer super-classes: https://github.com/RDFLib/rdflib/blob/master/examples/custom_eval.py
+# BGP = Basic Graph Pattern
+# Docs rdflib custom fct: https://rdflib.readthedocs.io/en/stable/intro_to_sparql.html
+# StackOverflow: https://stackoverflow.com/questions/43976691/custom-sparql-functions-in-rdflib/66988421#66988421
+
+# Another project: https://github.com/bas-stringer/scry/blob/master/query_handler.py
+# https://www.w3.org/TR/sparql11-service-description/#example-turtle
+# Federated query: https://www.w3.org/TR/2013/REC-sparql11-federated-query-20130321/#defn_service
+# XML method: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.plugins.sparql.results.html#module-rdflib.plugins.sparql.results.xmlresults
+
+import rdflib
+from rdflib import Literal, URIRef
+from rdflib.plugins.sparql import parser
+from rdflib.plugins.sparql.algebra import pprintAlgebra, translateQuery
+from rdflib.plugins.sparql.evaluate import evalBGP
+
+# inferredSubClass = rdflib.RDFS.subClassOf * "*"  # any number of rdfs.subClassOf
+biolink = URIRef("https://w3id.org/biolink/vocab/")
+
+
+class Result:
+    pass
+
+
+def add_to_graph(ctx, drug, disease, score):
+    bnode = rdflib.BNode()
+    ctx.graph.add((bnode, rdflib.RDF.type, rdflib.RDF.Statement))
+    ctx.graph.add((bnode, rdflib.RDF.subject, drug))
+    ctx.graph.add((bnode, rdflib.RDF.predicate, biolink + "treats"))
+    ctx.graph.add((bnode, rdflib.RDF.object, disease))
+    ctx.graph.add((bnode, biolink + "category", biolink + "ChemicalToDiseaseOrPhenotypicFeatureAssociation"))
+    ctx.graph.add((bnode, biolink + "has_confidence_level", score))
+
+
+def get_triples(disease):
+    drug = URIRef("http://bio2rdf.org/drugbank:DB00001")
+    score = Literal("1.0")
+
+    r = Result()
+    r.drug = drug
+    r.disease = disease
+    r.score = score
+
+    results = []
+    results.append(r)
+    return results
+
+
+# def parseRelationalExpr(expr):
+
+
+def custom_eval(ctx, part):
+    """ """
+    # print (part.name)
+
+    if part.name == "Project":
+        ctx.myvars = []
+
+    # search extend for variable binding
+    if part.name == "Extend" and hasattr(part, "expr") and not isinstance(part.expr, list):
+        ctx.myvars.append(part.expr)
+
+    # search for filter
+    if part.name == "Filter" and hasattr(part, "expr"):
+        if hasattr(part.expr, "expr"):
+            if part.expr.expr["op"] == "=":
+                part.expr.expr["expr"]
+                d = part.expr.expr["other"]
+                ctx.myvars.append(d)
+        else:
+            if part.expr["op"] == "=":
+                part.expr["expr"]
+                d = part.expr["other"]
+                ctx.myvars.append(d)
+
+    # search the BGP for the variable of interest
+    if part.name == "BGP":
+        triples = []
+        for t in part.triples:
+            if t[1] == rdflib.RDF.object:
+                disease = t[2]
+                # check first if the disease term is specified in the bgp triple
+                if isinstance(disease, rdflib.term.URIRef):
+                    ctx.myvars.append(disease)
+
+                # fetch instances
+                for d in ctx.myvars:
+                    results = get_triples(d)
+                    for r in results:
+                        add_to_graph(ctx, r.drug, r.disease, r.score)
+
+            triples.append(t)
+        return evalBGP(ctx, triples)
+    raise NotImplementedError()
+
+
+if __name__ == "__main__":
+    # add function directly, normally we would use setuptools and entry_points
+    rdflib.plugins.sparql.CUSTOM_EVALS["exampleEval"] = custom_eval
+
+    g = rdflib.Graph()
+
+    q = """PREFIX openpredict: <https://w3id.org/um/openpredict/>
+        PREFIX biolink: <https://w3id.org/biolink/vocab/>
+        PREFIX omim: <http://bio2rdf.org/omim:>
+        SELECT ?disease ?drug ?score
+        {
+            ?association a rdf:Statement ;
+                rdf:subject ?drug ;
+                rdf:predicate ?predicate ;
+                #rdf:object omim:246300 ;
+                rdf:object ?disease ;
+                biolink:category biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation ;
+                biolink:has_confidence_level ?score .
+            #?disease dcat:identifier "OMIM:246300" .
+            BIND(omim:1 AS ?disease)
+            #FILTER(?disease = omim:2 || ?disease = omim:3)
+            #VALUES ?disease { omim:5 omim:6 omim:7 }
+        }"""
+
+    pq = parser.parseQuery(q)
+    tq = translateQuery(pq)
+    pprintAlgebra(tq)
+
+    # Find all FOAF Agents
+    for x in g.query(q):
+        print(x)