Prechádzať zdrojové kódy

Hack around Fuseki service request error

Arkadiusz Ryś 2 rokov pred
rodič
commit
8a5f37fde0

+ 4 - 0
README.rst

@@ -1,6 +1,10 @@
+##########
 SpEndPoint
 ##########
 
+Creates a SPARQL endpoint supporting custom services.
+Default access at `http://127.0.0.1:8000/gui`.
+
 Installation
 ------------
 

+ 1 - 1
requirements.txt

@@ -1,5 +1,5 @@
 # SpEndPoint
-arklog            ~= 0.5.0
+arklog            ~= 0.5.1
 rdflib            ~= 6.2.0
 fastapi           ~= 0.92
 starlette         ~= 0.25.0

+ 44 - 36
spendpoint/endpoint.py

@@ -2,11 +2,11 @@
 
 import logging
 import re
-from typing import Any, Callable, Dict, List, Optional, Union
-from urllib import parse
-
+import arklog
 import pkg_resources
 import rdflib
+from typing import Any, Callable, Dict, List, Optional, Union
+from urllib import parse
 from fastapi import FastAPI, Query, Request, Response
 from fastapi.responses import JSONResponse
 from rdflib import ConjunctiveGraph, Dataset, Graph, Literal, URIRef
@@ -16,6 +16,8 @@ from rdflib.plugins.sparql.evalutils import _eval
 from rdflib.plugins.sparql.parserutils import CompValue
 from rdflib.plugins.sparql.sparql import QueryContext, SPARQLError
 
+arklog.set_config_logging()
+
 
 class SparqlEndpoint(FastAPI):
     """SPARQL endpoint for services and storage of heterogeneous data."""
@@ -40,24 +42,31 @@ class SparqlEndpoint(FastAPI):
         """"""
         return mime.split(",")[0] in ("text/turtle",)
 
-
-    def __init__(
-        self,
-        *args: Any,
-        title: str,
-        description: str,
-        version: str,
-        functions: Dict[str, Callable[..., Any]],
-        graph: Union[Graph, ConjunctiveGraph, Dataset] = ConjunctiveGraph(),
-        **kwargs: Any,
-    ):
+    async def requested_result_type(self, request: Request, operation: str) -> str:
+        logging.debug("Getting mime type.")
+        output_mime_type = request.headers["accept"]
+        # TODO Ugly hack, fix later (Fuseki sends options)
+        output_mime_type = output_mime_type.split(",")[0]
+        if isinstance(output_mime_type, list):
+            return output_mime_type[0]
+
+        # TODO Use match or dict for this
+        if not output_mime_type:
+            logging.warning("No mime type provided. Setting mimetype to 'application/xml'.")
+            return "application/xml"
+        if operation == "Construct Query" and (self.is_json_mime_type(output_mime_type) or self.is_csv_mime_type(output_mime_type)):
+            return "text/turtle"
+        if operation == "Construct Query" and output_mime_type == "application/xml":
+            return "application/rdf+xml"
+        return output_mime_type
+
+    def __init__(self, *args: Any, title: str, description: str, version: str, functions: Dict[str, Callable[..., Any]], graph: Union[Graph, ConjunctiveGraph, Dataset] = ConjunctiveGraph(), **kwargs: Any):
         """"""
         self.graph = graph
         self.functions = functions
         self.title = title
         self.description = description
         self.version = version
-
         super().__init__(*args, title=title, description=description, version=version, **kwargs)
         rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
         api_responses: Optional[Dict[Union[int, str], Dict[str, Any]]] = {
@@ -95,7 +104,9 @@ class SparqlEndpoint(FastAPI):
 
         @self.get("/", name="SPARQL endpoint", description="", responses=api_responses)
         async def sparql_endpoint_get(request: Request, query: Optional[str] = Query(None)) -> Response:
+            logging.debug("Received GET request.")
             if not query:
+                logging.warning("No query provided in GET request!")
                 return JSONResponse({"error": "No query provided."})
 
             graph_ns = {}
@@ -121,30 +132,26 @@ class SparqlEndpoint(FastAPI):
                     content={"message": "Error executing the SPARQL query on the RDFLib Graph"},
                 )
 
-            # Format and return results depending on Accept mime type in request header
-            output_mime_type = request.headers["accept"]
-            if not output_mime_type:
-                output_mime_type = "application/xml"
-            # Handle mime type for construct queries
-            if query_operation == "Construct Query" and (self.is_json_mime_type(output_mime_type) or self.is_csv_mime_type(output_mime_type)):
-                output_mime_type = "text/turtle"
-                # TODO: support JSON-LD for construct query?
-                # g.serialize(format='json-ld', indent=4)
-            if query_operation == "Construct Query" and output_mime_type == "application/xml":
-                output_mime_type = "application/rdf+xml"
-
-            if self.is_csv_mime_type(output_mime_type):
-                return Response(query_results.serialize(format="csv"), media_type=output_mime_type)
-            elif self.is_json_mime_type(output_mime_type):
-                return Response(query_results.serialize(format="json"), media_type=output_mime_type)
-            elif self.is_xml_mime_type(output_mime_type):
-                return Response(query_results.serialize(format="xml"), media_type=output_mime_type)
-            elif self.is_turtle_mime_type(output_mime_type):
-                return Response(query_results.serialize(format="turtle"), media_type=output_mime_type)
-            return Response(query_results.serialize(format="xml"), media_type="application/sparql-results+xml")
+            logging.debug(f"{type(query_results)=}")
+            output_mime_type = await self.requested_result_type(request, query_operation)
+            logging.debug(f"Returning {output_mime_type}.")
+            try:
+                if self.is_csv_mime_type(output_mime_type):
+                    return Response(query_results.serialize(format="csv"), media_type=output_mime_type)
+                elif self.is_json_mime_type(output_mime_type):
+                    return Response(query_results.serialize(format="json"), media_type=output_mime_type)
+                elif self.is_xml_mime_type(output_mime_type):
+                    return Response(query_results.serialize(format="xml"), media_type=output_mime_type)
+                elif self.is_turtle_mime_type(output_mime_type):
+                    return Response(query_results.serialize(format="turtle"), media_type=output_mime_type)
+                return Response(query_results.serialize(format="xml"), media_type="application/sparql-results+xml")
+            except Exception as e:
+                logging.exception(e)
+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph"})
 
         @self.post("/", name="SPARQL endpoint", description="", responses=api_responses)
         async def sparql_endpoint_post(request: Request, query: Optional[str] = Query(None)) -> Response:
+            logging.debug("Received POST request.")
             if not query:
                 # Handle federated query services which provide the query in the body
                 query_body = await request.body()
@@ -168,6 +175,7 @@ class SparqlEndpoint(FastAPI):
             raise NotImplementedError()
 
         query_results = []
+        logging.debug("Custom evaluation.")
         for eval_part in evalPart(ctx, part.p):
             # Checks if the function is a URI (custom function)
             if hasattr(part.expr, "iri"):

+ 0 - 145
spendpoint/wrapper.py

@@ -1,145 +0,0 @@
-# Copied from https://pypi.org/project/rdflib-endpoint/
-
-"""
-This example shows how a custom evaluation function can be added to
-handle certain SPARQL Algebra elements.
-
-A custom function is added that adds ``rdfs:subClassOf`` "inference" when
-asking for ``rdf:type`` triples.
-
-Here the custom eval function is added manually, normally you would use
-setuptools and entry_points to do it:
-i.e. in your setup.py::
-
-    entry_points = {
-        'rdf.plugins.sparqleval': [
-            'myfunc =     mypackage:MyFunction',
-            ],
-    }
-"""
-
-# EvalBGP https://rdflib.readthedocs.io/en/stable/_modules/rdflib/plugins/sparql/evaluate.html
-# Custom fct for rdf:type with auto infer super-classes: https://github.com/RDFLib/rdflib/blob/master/examples/custom_eval.py
-# BGP = Basic Graph Pattern
-# Docs rdflib custom fct: https://rdflib.readthedocs.io/en/stable/intro_to_sparql.html
-# StackOverflow: https://stackoverflow.com/questions/43976691/custom-sparql-functions-in-rdflib/66988421#66988421
-
-# Another project: https://github.com/bas-stringer/scry/blob/master/query_handler.py
-# https://www.w3.org/TR/sparql11-service-description/#example-turtle
-# Federated query: https://www.w3.org/TR/2013/REC-sparql11-federated-query-20130321/#defn_service
-# XML method: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.plugins.sparql.results.html#module-rdflib.plugins.sparql.results.xmlresults
-
-import rdflib
-from rdflib import Literal, URIRef
-from rdflib.plugins.sparql import parser
-from rdflib.plugins.sparql.algebra import pprintAlgebra, translateQuery
-from rdflib.plugins.sparql.evaluate import evalBGP
-
-# inferredSubClass = rdflib.RDFS.subClassOf * "*"  # any number of rdfs.subClassOf
-biolink = URIRef("https://w3id.org/biolink/vocab/")
-
-
-class Result:
-    pass
-
-
-def add_to_graph(ctx, drug, disease, score):
-    bnode = rdflib.BNode()
-    ctx.graph.add((bnode, rdflib.RDF.type, rdflib.RDF.Statement))
-    ctx.graph.add((bnode, rdflib.RDF.subject, drug))
-    ctx.graph.add((bnode, rdflib.RDF.predicate, biolink + "treats"))
-    ctx.graph.add((bnode, rdflib.RDF.object, disease))
-    ctx.graph.add((bnode, biolink + "category", biolink + "ChemicalToDiseaseOrPhenotypicFeatureAssociation"))
-    ctx.graph.add((bnode, biolink + "has_confidence_level", score))
-
-
-def get_triples(disease):
-    drug = URIRef("http://bio2rdf.org/drugbank:DB00001")
-    score = Literal("1.0")
-
-    r = Result()
-    r.drug = drug
-    r.disease = disease
-    r.score = score
-
-    results = []
-    results.append(r)
-    return results
-
-
-def custom_eval(ctx, part):
-    """ """
-    # print (part.name)
-
-    if part.name == "Project":
-        ctx.myvars = []
-
-    # search extend for variable binding
-    if part.name == "Extend" and hasattr(part, "expr") and not isinstance(part.expr, list):
-        ctx.myvars.append(part.expr)
-
-    # search for filter
-    if part.name == "Filter" and hasattr(part, "expr"):
-        if hasattr(part.expr, "expr"):
-            if part.expr.expr["op"] == "=":
-                part.expr.expr["expr"]
-                d = part.expr.expr["other"]
-                ctx.myvars.append(d)
-        else:
-            if part.expr["op"] == "=":
-                part.expr["expr"]
-                d = part.expr["other"]
-                ctx.myvars.append(d)
-
-    # search the BGP for the variable of interest
-    if part.name == "BGP":
-        triples = []
-        for t in part.triples:
-            if t[1] == rdflib.RDF.object:
-                disease = t[2]
-                # check first if the disease term is specified in the bgp triple
-                if isinstance(disease, rdflib.term.URIRef):
-                    ctx.myvars.append(disease)
-
-                # fetch instances
-                for d in ctx.myvars:
-                    results = get_triples(d)
-                    for r in results:
-                        add_to_graph(ctx, r.drug, r.disease, r.score)
-
-            triples.append(t)
-        return evalBGP(ctx, triples)
-    raise NotImplementedError()
-
-
-if __name__ == "__main__":
-    # add function directly, normally we would use setuptools and entry_points
-    rdflib.plugins.sparql.CUSTOM_EVALS["exampleEval"] = custom_eval
-
-    g = rdflib.Graph()
-
-    q = """PREFIX openpredict: <https://w3id.org/um/openpredict/>
-        PREFIX biolink: <https://w3id.org/biolink/vocab/>
-        PREFIX omim: <http://bio2rdf.org/omim:>
-        SELECT ?disease ?drug ?score
-        {
-            ?association a rdf:Statement ;
-                rdf:subject ?drug ;
-                rdf:predicate ?predicate ;
-                #rdf:object omim:246300 ;
-                rdf:object ?disease ;
-                biolink:category biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation ;
-                biolink:has_confidence_level ?score .
-            #?disease dcat:identifier "OMIM:246300" .
-            BIND(omim:1 AS ?disease)
-            #FILTER(?disease = omim:2 || ?disease = omim:3)
-            #VALUES ?disease { omim:5 omim:6 omim:7 }
-        }"""
-
-    pq = parser.parseQuery(q)
-    tq = translateQuery(pq)
-    pprintAlgebra(tq)
-
-    # Find all FOAF Agents
-    for x in g.query(q):
-        print(x)

+ 20 - 20
spendpoint/yasgui.html

@@ -2,27 +2,27 @@
 <html lang="en">
 
 <head>
-    <meta charset="utf-8">
-    <title>RDFLib endpoint</title>
-    <link href="https://unpkg.com/@triply/yasgui@4/build/yasgui.min.css" rel="stylesheet" type="text/css" />
-    <script src="https://unpkg.com/@triply/yasgui@4/build/yasgui.min.js"></script>
+  <meta charset="utf-8">
+  <title>RDFLib endpoint</title>
+  <link href="https://unpkg.com/@triply/yasgui@4/build/yasgui.min.css" rel="stylesheet" type="text/css"/>
+  <script src="https://unpkg.com/@triply/yasgui@4/build/yasgui.min.js"></script>
 </head>
 
 <body>
-    <div id="yasgui"></div>
-    <script>
-        Yasqe.defaults.value = `$EXAMPLE_QUERY`
-        const url = window.location.href.endsWith('/') ? window.location.href.slice(0, -1) : window.location.href;
-        const yasgui = new Yasgui(document.getElementById("yasgui"), {
-            requestConfig: { endpoint: url + "/" },
-            endpointCatalogueOptions: {
-                getData: function () {
-                    return [
-                        { endpoint: url + "/" },
-                    ];
-                },
-                keys: [],
-            },
-        });
-    </script>
+<div id="yasgui"></div>
+<script>
+  Yasqe.defaults.value = `$EXAMPLE_QUERY`
+  const url = window.location.href.endsWith('/') ? window.location.href.slice(0, -1) : window.location.href;
+  const yasgui = new Yasgui(document.getElementById("yasgui"), {
+    requestConfig: {endpoint: url + "/"},
+    endpointCatalogueOptions: {
+      getData: function () {
+        return [
+          {endpoint: url + "/"},
+        ];
+      },
+      keys: [],
+    },
+  });
+</script>
 </body>

+ 0 - 30
tests/test_query_endpoint.py

@@ -1,30 +0,0 @@
-import logging
-import arklog
-import pytest
-# arklog.set_config_logging()
-from SPARQLWrapper import SPARQLWrapper, JSON
-
-logging.basicConfig(encoding="utf-8", level=logging.DEBUG)
-
-# TODO Convert to test
-def query_0():
-    """"""
-    sparql = SPARQLWrapper("http://localhost:8000/")
-    sparql.setReturnFormat(JSON)
-    sparql.setQuery(
-        """
-        PREFIX dtf: <https://ontology.rys.app/dt/function/>
-        SELECT ?outlier ?outlier_value WHERE {
-            BIND(dtf:outlier("data.csv", "2") AS ?outlier)
-        }
-        """
-    )
-    ret = sparql.query().convert()
-    if not ret:
-        logging.info("No outliers!")
-    for r in ret["results"]["bindings"]:
-        logging.info(r)
-
-
-if __name__ == "__main__":
-    query_0()