2 éve · 5425244d1d
--- a/spendpoint/__init__.py
+++ b/spendpoint/__init__.py
--- a/spendpoint/__main__.py
+++ b/spendpoint/__main__.py
@@ -0,0 +1,105 @@
 
				+import rdflib

			
 
				+from rdflib import RDF, RDFS, ConjunctiveGraph, Literal, URIRef

			
 
				+from rdflib.plugins.sparql.evalutils import _eval

			
 
				+

			
 
				+from rdflib_endpoint import SparqlEndpoint

			
 
				+

			
 
				+

			
 
				+def custom_concat(query_results, ctx, part, eval_part):

			
 
				+    """

			
 
				+    Concat 2 string and return the length as additional Length variable

			
 
				+    \f

			
 
				+    :param query_results:   An array with the query results objects

			
 
				+    :param ctx:             <class 'rdflib.plugins.sparql.sparql.QueryContext'>

			
 
				+    :param part:            Part of the query processed (e.g. Extend or BGP) <class 'rdflib.plugins.sparql.parserutils.CompValue'>

			
 
				+    :param eval_part:       Part currently evaluated

			
 
				+    :return:                the same query_results provided in input param, with additional results

			
 
				+    """

			
 
				+    argument1 = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))

			
 
				+    argument2 = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))

			
 
				+    evaluation = []

			
 
				+    scores = []

			
 
				+    concat_string = argument1 + argument2

			
 
				+    reverse_string = argument2 + argument1

			
 
				+    # Append the concatenated string to the results

			
 
				+    evaluation.append(concat_string)

			
 
				+    evaluation.append(reverse_string)

			
 
				+    # Append the scores for each row of results

			
 
				+    scores.append(len(concat_string))

			
 
				+    scores.append(len(reverse_string))

			
 
				+    # Append our results to the query_results

			
 
				+    for i, result in enumerate(evaluation):

			
 
				+        query_results.append(

			
 
				+            eval_part.merge({part.var: Literal(result), rdflib.term.Variable(part.var + "Length"): Literal(scores[i])})

			
 
				+        )

			
 
				+    return query_results, ctx, part, eval_part

			
 
				+

			
 
				+

			
 
				+def most_similar(query_results, ctx, part, eval_part):

			
 
				+    """

			
 
				+    Get most similar entities for a given entity

			
 
				+

			
 
				+    PREFIX openpredict: <https://w3id.org/um/openpredict/>

			
 
				+    SELECT ?drugOrDisease ?mostSimilar ?mostSimilarScore WHERE {

			
 
				+        BIND("OMIM:246300" AS ?drugOrDisease)

			
 
				+        BIND(openpredict:most_similar(?drugOrDisease) AS ?mostSimilar)

			
 
				+    """

			
 
				+    # argumentEntity = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))

			
 
				+    # try:

			
 
				+    #     argumentLimit = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))

			
 
				+    # except:

			
 
				+    #     argumentLimit = None

			
 
				+

			
 
				+    # Using stub data

			
 
				+    similarity_results = [{"mostSimilar": "DRUGBANK:DB00001", "score": 0.42}]

			
 
				+

			
 
				+    evaluation = []

			
 
				+    scores = []

			
 
				+    for most_similar in similarity_results:

			
 
				+        evaluation.append(most_similar["mostSimilar"])

			
 
				+        scores.append(most_similar["score"])

			
 
				+

			
 
				+    # Append our results to the query_results

			
 
				+    for i, result in enumerate(evaluation):

			
 
				+        query_results.append(

			
 
				+            eval_part.merge({part.var: Literal(result), rdflib.term.Variable(part.var + "Score"): Literal(scores[i])})

			
 
				+        )

			
 
				+    return query_results, ctx, part, eval_part

			
 
				+

			
 
				+

			
 
				+example_query = """PREFIX myfunctions: <https://w3id.org/um/sparql-functions/>

			
 
				+SELECT ?concat ?concatLength WHERE {

			
 
				+    BIND("First" AS ?first)

			
 
				+    BIND(myfunctions:custom_concat(?first, "last") AS ?concat)

			
 
				+}"""

			
 
				+

			
 
				+# Use ConjunctiveGraph to support nquads and graphs in SPARQL queries

			
 
				+# identifier is the default graph

			
 
				+g = ConjunctiveGraph(

			
 
				+    # store="Oxigraph",

			
 
				+    identifier=URIRef("https://w3id.org/um/sparql-functions/graph/default"),

			
 
				+)

			
 
				+

			
 
				+# Example to add a nquad to the exposed graph

			
 
				+g.add((URIRef("http://subject"), RDF.type, URIRef("http://object"), URIRef("http://graph")))

			
 
				+g.add((URIRef("http://subject"), RDFS.label, Literal("foo"), URIRef("http://graph")))

			
 
				+

			
 
				+# Start the SPARQL endpoint based on the RDFLib Graph

			
 
				+app = SparqlEndpoint(

			
 
				+    graph=g,

			
 
				+    functions={

			
 
				+        "https://w3id.org/um/openpredict/most_similar": most_similar,

			
 
				+        "https://w3id.org/um/sparql-functions/custom_concat": custom_concat,

			
 
				+    },

			
 
				+    title="SPARQL endpoint for RDFLib graph",

			
 
				+    description="A SPARQL endpoint to serve machine learning models, or any other logic implemented in Python. \n[Source code](https://github.com/vemonet/rdflib-endpoint)",

			
 
				+    version="0.1.0",

			
 
				+    public_url="https://service.openpredict.137.120.31.102.nip.io/sparql",

			
 
				+    cors_enabled=True,

			
 
				+    example_query=example_query,

			
 
				+)

			
 
				+

			
 
				+## Uncomment to run it directly with python app/main.py

			
 
				+# if __name__ == "__main__":

			
 
				+#     import uvicorn

			
 
				+#     uvicorn.run(app, host="0.0.0.0", port=8000)

			
--- a/spendpoint/wrapper.py
+++ b/spendpoint/wrapper.py
@@ -0,0 +1,146 @@
 
				+"""

			
 
				+This example shows how a custom evaluation function can be added to

			
 
				+handle certain SPARQL Algebra elements.

			
 
				+

			
 
				+A custom function is added that adds ``rdfs:subClassOf`` "inference" when

			
 
				+asking for ``rdf:type`` triples.

			
 
				+

			
 
				+Here the custom eval function is added manually, normally you would use

			
 
				+setuptools and entry_points to do it:

			
 
				+i.e. in your setup.py::

			
 
				+

			
 
				+    entry_points = {

			
 
				+        'rdf.plugins.sparqleval': [

			
 
				+            'myfunc =     mypackage:MyFunction',

			
 
				+            ],

			
 
				+    }

			
 
				+"""

			
 
				+

			
 
				+# EvalBGP https://rdflib.readthedocs.io/en/stable/_modules/rdflib/plugins/sparql/evaluate.html

			
 
				+# Custom fct for rdf:type with auto infer super-classes: https://github.com/RDFLib/rdflib/blob/master/examples/custom_eval.py

			
 
				+# BGP = Basic Graph Pattern

			
 
				+# Docs rdflib custom fct: https://rdflib.readthedocs.io/en/stable/intro_to_sparql.html

			
 
				+# StackOverflow: https://stackoverflow.com/questions/43976691/custom-sparql-functions-in-rdflib/66988421#66988421

			
 
				+

			
 
				+# Another project: https://github.com/bas-stringer/scry/blob/master/query_handler.py

			
 
				+# https://www.w3.org/TR/sparql11-service-description/#example-turtle

			
 
				+# Federated query: https://www.w3.org/TR/2013/REC-sparql11-federated-query-20130321/#defn_service

			
 
				+# XML method: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.plugins.sparql.results.html#module-rdflib.plugins.sparql.results.xmlresults

			
 
				+

			
 
				+import rdflib

			
 
				+from rdflib import Literal, URIRef

			
 
				+from rdflib.plugins.sparql import parser

			
 
				+from rdflib.plugins.sparql.algebra import pprintAlgebra, translateQuery

			
 
				+from rdflib.plugins.sparql.evaluate import evalBGP

			
 
				+

			
 
				+# inferredSubClass = rdflib.RDFS.subClassOf * "*"  # any number of rdfs.subClassOf

			
 
				+biolink = URIRef("https://w3id.org/biolink/vocab/")

			
 
				+

			
 
				+

			
 
				+class Result:

			
 
				+    pass

			
 
				+

			
 
				+

			
 
				+def add_to_graph(ctx, drug, disease, score):

			
 
				+    bnode = rdflib.BNode()

			
 
				+    ctx.graph.add((bnode, rdflib.RDF.type, rdflib.RDF.Statement))

			
 
				+    ctx.graph.add((bnode, rdflib.RDF.subject, drug))

			
 
				+    ctx.graph.add((bnode, rdflib.RDF.predicate, biolink + "treats"))

			
 
				+    ctx.graph.add((bnode, rdflib.RDF.object, disease))

			
 
				+    ctx.graph.add((bnode, biolink + "category", biolink + "ChemicalToDiseaseOrPhenotypicFeatureAssociation"))

			
 
				+    ctx.graph.add((bnode, biolink + "has_confidence_level", score))

			
 
				+

			
 
				+

			
 
				+def get_triples(disease):

			
 
				+    drug = URIRef("http://bio2rdf.org/drugbank:DB00001")

			
 
				+    score = Literal("1.0")

			
 
				+

			
 
				+    r = Result()

			
 
				+    r.drug = drug

			
 
				+    r.disease = disease

			
 
				+    r.score = score

			
 
				+

			
 
				+    results = []

			
 
				+    results.append(r)

			
 
				+    return results

			
 
				+

			
 
				+

			
 
				+# def parseRelationalExpr(expr):

			
 
				+

			
 
				+

			
 
				+def custom_eval(ctx, part):

			
 
				+    """ """

			
 
				+    # print (part.name)

			
 
				+

			
 
				+    if part.name == "Project":

			
 
				+        ctx.myvars = []

			
 
				+

			
 
				+    # search extend for variable binding

			
 
				+    if part.name == "Extend" and hasattr(part, "expr") and not isinstance(part.expr, list):

			
 
				+        ctx.myvars.append(part.expr)

			
 
				+

			
 
				+    # search for filter

			
 
				+    if part.name == "Filter" and hasattr(part, "expr"):

			
 
				+        if hasattr(part.expr, "expr"):

			
 
				+            if part.expr.expr["op"] == "=":

			
 
				+                part.expr.expr["expr"]

			
 
				+                d = part.expr.expr["other"]

			
 
				+                ctx.myvars.append(d)

			
 
				+        else:

			
 
				+            if part.expr["op"] == "=":

			
 
				+                part.expr["expr"]

			
 
				+                d = part.expr["other"]

			
 
				+                ctx.myvars.append(d)

			
 
				+

			
 
				+    # search the BGP for the variable of interest

			
 
				+    if part.name == "BGP":

			
 
				+        triples = []

			
 
				+        for t in part.triples:

			
 
				+            if t[1] == rdflib.RDF.object:

			
 
				+                disease = t[2]

			
 
				+                # check first if the disease term is specified in the bgp triple

			
 
				+                if isinstance(disease, rdflib.term.URIRef):

			
 
				+                    ctx.myvars.append(disease)

			
 
				+

			
 
				+                # fetch instances

			
 
				+                for d in ctx.myvars:

			
 
				+                    results = get_triples(d)

			
 
				+                    for r in results:

			
 
				+                        add_to_graph(ctx, r.drug, r.disease, r.score)

			
 
				+

			
 
				+            triples.append(t)

			
 
				+        return evalBGP(ctx, triples)

			
 
				+    raise NotImplementedError()

			
 
				+

			
 
				+

			
 
				+if __name__ == "__main__":

			
 
				+    # add function directly, normally we would use setuptools and entry_points

			
 
				+    rdflib.plugins.sparql.CUSTOM_EVALS["exampleEval"] = custom_eval

			
 
				+

			
 
				+    g = rdflib.Graph()

			
 
				+

			
 
				+    q = """PREFIX openpredict: <https://w3id.org/um/openpredict/>

			
 
				+        PREFIX biolink: <https://w3id.org/biolink/vocab/>

			
 
				+        PREFIX omim: <http://bio2rdf.org/omim:>

			
 
				+        SELECT ?disease ?drug ?score

			
 
				+        {

			
 
				+            ?association a rdf:Statement ;

			
 
				+                rdf:subject ?drug ;

			
 
				+                rdf:predicate ?predicate ;

			
 
				+                #rdf:object omim:246300 ;

			
 
				+                rdf:object ?disease ;

			
 
				+                biolink:category biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation ;

			
 
				+                biolink:has_confidence_level ?score .

			
 
				+            #?disease dcat:identifier "OMIM:246300" .

			
 
				+            BIND(omim:1 AS ?disease)

			
 
				+            #FILTER(?disease = omim:2 || ?disease = omim:3)

			
 
				+            #VALUES ?disease { omim:5 omim:6 omim:7 }

			
 
				+        }"""

			
 
				+

			
 
				+    pq = parser.parseQuery(q)

			
 
				+    tq = translateQuery(pq)

			
 
				+    pprintAlgebra(tq)

			
 
				+

			
 
				+    # Find all FOAF Agents

			
 
				+    for x in g.query(q):

			
 
				+        print(x)