2 年前 · 424ad5d59f
--- a/.editorconfig
+++ b/.editorconfig
@@ -8,7 +8,7 @@ indent_style = space
 
																 insert_final_newline = true
															
 
																 trim_trailing_whitespace = true
															
 
																-[*.{css,html,yml,yaml,js,xml}]
															
 
																+[*.{css,html,yml,yaml,js,xml,nix}]
															
 
																 indent_size = 2
															
 
																 [{*.log,LICENSE}]
															
--- a/.gitignore
+++ b/.gitignore
@@ -8,5 +8,5 @@ dist/
 
																 .idea
															
 
																 # Generated example files
															
 
																-data/example.csv
															
 
																-data/example.parquet
															
 
																+data/example*.csv
															
 
																+data/example*.parquet
															
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -2,6 +2,14 @@
 
																 History
															
 
																 =======
															
 
																+0.4.0 (2023-10-11)
															
 
																+------------------
															
 
																+* Cells now use a more transparent system.
															
 
																+
															
 
																+0.4.0 (2023-07-12)
															
 
																+------------------
															
 
																+* Docker containers now use configuration file.
															
 
																+
															
 
																 0.3.0 (2023-05-16)
															
 
																 ------------------
															
 
																 * Add configuration options.
															
--- a/README.rst
+++ b/README.rst
@@ -7,16 +7,45 @@ The default access point is at `http://127.0.0.1:8000`.
 
																 This endpoint can be configured in the `configuration.toml <data/configuration.toml>`_ file.
															
 
																 The docker image created uses uvicorn the host the application at `0.0.0.0:80`. Feel free to map this to any port of your liking.
															
 
																-We currently support 3 services out of the box:
															
 
																+Bound services
															
 
																+--------------
															
 
																+
															
 
																+We currently support 4 bind services out of the box:
															
 
																 .. code-block::
															
 
																    dtf:outlier
															
 
																    dtf:example
															
 
																    dtf:conversion
															
 
																+   dtf:cell
															
 
																 The outlier service relies on `another endpoint <https://msdl.uantwerpen.be/git/lucasalbertins/DTDesign/src/main/tools/typeOperations>`_ which needs to be set up and accessible.
															
 
																+.. code-block:: sparql
															
 
																+
															
 
																+   PREFIX dtf: <https://ontology.rys.app/dt/function/>
															
 
																+   SELECT ?cell ?cell_value WHERE {
															
 
																+     SERVICE <http://localhost:8000/> {BIND(dtf:cell("data/example.csv", 0, 0) AS ?cell)}
															
 
																+   }
															
 
																+
															
 
																+SPARQL query showing bind based cell service call.
															
 
																+
															
 
																+URI based services
															
 
																+------------------
															
 
																+
															
 
																+A second, more versatile, way to access a service is provided in the form of an URI.
															
 
																+It is possible to query cells by specifying an individual cell in the URI of the service call.
															
 
																+
															
 
																+.. code-block:: sparql
															
 
																+
															
 
																+   SELECT ?s ?p ?o WHERE {
															
 
																+     BIND(ENCODE_FOR_URI("http://ua.be/sdo2l/description/artifacts/artifacts#random-artefact") as ?e)
															
 
																+     BIND(uri(concat("http://localhost:8000/cell/?iri=", ?e ,"&row=2&column=2&file_name=example.csv")) as ?c)
															
 
																+     SERVICE ?c {?s ?p ?o}
															
 
																+   }
															
 
																+
															
 
																+SPARQL query showing URI based cell service call.
															
 
																+
															
 
																 Installation
															
 
																 ------------
															
--- a/data/configuration.toml
+++ b/data/configuration.toml
@@ -19,3 +19,8 @@ call = "example_service"
 
																 name = "conversion"
															
 
																 namespace = "https://ontology.rys.app/dt/function/conversion"
															
 
																 call = "conversion_service"
															
 
																+
															
 
																+[[services]]
															
 
																+name = "cell"
															
 
																+namespace = "https://ontology.rys.app/dt/function/cell"
															
 
																+call = "cell_service"
															
--- a/default.nix
+++ b/default.nix
@@ -1,22 +0,0 @@
 
																-{
															
 
																-  pkgs ? import <nixpkgs> {}
															
 
																-}:
															
 
																-
															
 
																-pkgs.python3Packages.buildPythonPackage rec {
															
 
																-  pname = "spendpoint";
															
 
																-  version = "0.2.0";
															
 
																-  format = "pyproject";
															
 
																-  src = ./.;
															
 
																-
															
 
																-  nativeBuildInputs = [
															
 
																-    #flit-core
															
 
																-  ];
															
 
																-
															
 
																-  buildInputs = [
															
 
																-    #flit-core
															
 
																-  ];
															
 
																-
															
 
																-  propagatedBuildInputs = [
															
 
																-    pkgs.python3Packages.jinja2
															
 
																-  ];
															
 
																-}
															
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,35 +22,35 @@ keywords = ["spendpoint"]
 
																 dependencies = [
															
 
																     "toml~=0.10.2",
															
 
																     "arklog~=0.5.1",
															
 
																-    "rdflib~=6.3.2",
															
 
																+    "rdflib~=7.0.0",
															
 
																     "pandas~=2.0.1",
															
 
																     "dacite~=1.8.1",
															
 
																-    "fastapi~=0.95.2",
															
 
																-    "pyarrow~=12.0.0",
															
 
																-    "requests~=2.30.0",
															
 
																-    "starlette~=0.27",
															
 
																+    "fastapi~=0.101.0",
															
 
																+    "pyarrow~=12.0.1",
															
 
																+    "requests~=2.31.0",
															
 
																+    "starlette~=0.31.0",
															
 
																     "python-magic~=0.4.27",
															
 
																-    "uvicorn[standard]~=0.22.0",
															
 
																+    "uvicorn[standard]~=0.23.2",
															
 
																 ]
															
 
																 [project.optional-dependencies]
															
 
																 test = [
															
 
																-    "pytest~=7.3.1",
															
 
																+    "pytest~=7.4.0",
															
 
																     "sparqlwrapper~=2.0.0",
															
 
																 ]
															
 
																 doc = [
															
 
																-    "sphinx~=7.0.1",
															
 
																+    "sphinx~=7.1.2",
															
 
																 ]
															
 
																 dev = [
															
 
																-    "tox~=4.5.1",
															
 
																-    "pip~=23.1.2",
															
 
																+    "tox~=4.6.4",
															
 
																+    "pip~=23.2.1",
															
 
																     "flit~=3.9.0",
															
 
																     "twine~=4.0.2",
															
 
																-    "numpy~=1.24.3",
															
 
																-    "invoke~=2.1.2",
															
 
																+    "numpy~=1.25.2",
															
 
																+    "invoke~=2.2.0",
															
 
																     "jinja2~=3.1.2",
															
 
																-    "flake8~=6.0.0",
															
 
																-    "coverage~=7.2.5",
															
 
																+    "flake8~=6.1.0",
															
 
																+    "coverage~=7.2.7",
															
 
																 ]
															
 
																 [project.urls]
															
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,27 +1,27 @@
 
																 # SpEndPoint
															
 
																 toml              ~= 0.10.2
															
 
																 arklog            ~= 0.5.1
															
 
																-rdflib            ~= 6.3.2
															
 
																+rdflib            ~= 7.0.0
															
 
																 pandas            ~= 2.0.1
															
 
																 dacite            ~= 1.8.1
															
 
																-fastapi           ~= 0.95.2
															
 
																-pyarrow           ~= 12.0.0
															
 
																-requests          ~= 2.30.0
															
 
																-starlette         ~= 0.27
															
 
																+fastapi           ~= 0.101.0
															
 
																+pyarrow           ~= 12.0.1
															
 
																+requests          ~= 2.31.0
															
 
																+starlette         ~= 0.31.0
															
 
																 python-magic      ~= 0.4.27
															
 
																-uvicorn[standard] ~= 0.22.0
															
 
																+uvicorn[standard] ~= 0.23.2
															
 
																 # Test
															
 
																-pytest        ~= 7.3.1
															
 
																+pytest        ~= 7.4.0
															
 
																 sparqlwrapper ~= 2.0.0
															
 
																 # Doc
															
 
																-sphinx ~= 7.0.1
															
 
																+sphinx ~= 7.1.2
															
 
																 # Dev
															
 
																-tox      ~= 4.5.1
															
 
																-pip      ~= 23.1.2
															
 
																+tox      ~= 4.6.4
															
 
																+pip      ~= 23.2.1
															
 
																 flit     ~= 3.9.0
															
 
																 twine    ~= 4.0.2
															
 
																-numpy    ~= 1.24.3
															
 
																-invoke   ~= 2.1.2
															
 
																+numpy    ~= 1.25.2
															
 
																+invoke   ~= 2.2.0
															
 
																 jinja2   ~= 3.1.2
															
 
																-flake8   ~= 6.0.0
															
 
																-coverage ~= 7.2.5
															
 
																+flake8   ~= 6.1.0
															
 
																+coverage ~= 7.2.7
															
--- a/spendpoint/__init__.py
+++ b/spendpoint/__init__.py
@@ -1,3 +1,3 @@
 
																 """SPARQL endpoint for ontologies."""
															
 
																-__version__ = "0.4.0"
															
 
																+__version__ = "0.5.0"
															
 
																 __version_info__ = tuple((int(num) if num.isdigit() else num for num in __version__.replace("-", ".", 1).split(".")))
															
--- a/spendpoint/endpoint.py
+++ b/spendpoint/endpoint.py
@@ -1,183 +1,42 @@
 
																 # Copied and modified from https://pypi.org/project/rdflib-endpoint/
															
 
																+# https://fastapi.tiangolo.com/
															
 
																 import logging
															
 
																-import re
															
 
																 import arklog
															
 
																-import rdflib
															
 
																-from typing import Any, Dict, List, Optional, Union
															
 
																-from urllib import parse
															
 
																-from fastapi import FastAPI, Query, Request, Response
															
 
																-from fastapi.responses import JSONResponse
															
 
																-from rdflib import ConjunctiveGraph, Dataset, Graph, Literal, URIRef
															
 
																-from rdflib.plugins.sparql import prepareQuery
															
 
																-from rdflib.plugins.sparql.evaluate import evalPart
															
 
																-from rdflib.plugins.sparql.evalutils import _eval
															
 
																-from rdflib.plugins.sparql.parserutils import CompValue
															
 
																-from rdflib.plugins.sparql.sparql import QueryContext, SPARQLError
															
 
																-
															
 
																-from spendpoint import service
															
 
																+import time
															
 
																+from typing import Any
															
 
																+from fastapi import FastAPI, Request, Response
															
 
																+from fastapi.middleware.cors import CORSMiddleware
															
 
																+from spendpoint.router import SparqlRouter
															
 
																 arklog.set_config_logging()
															
 
																-
															
 
																 class SparqlEndpoint(FastAPI):
															
 
																     """SPARQL endpoint for services and storage of heterogeneous data."""
															
 
																-    @staticmethod
															
 
																-    def is_json_mime_type(mime: str) -> bool:
															
 
																-        """"""
															
 
																-        return mime.split(",")[0] in ("application/sparql-results+json","application/json","text/javascript","application/javascript")
															
 
																-
															
 
																-    @staticmethod
															
 
																-    def is_csv_mime_type(mime: str) -> bool:
															
 
																-        """"""
															
 
																-        return mime.split(",")[0] in ("text/csv", "application/sparql-results+csv")
															
 
																-
															
 
																-    @staticmethod
															
 
																-    def is_xml_mime_type(mime: str) -> bool:
															
 
																-        """"""
															
 
																-        return mime.split(",")[0] in ("application/xml", "application/sparql-results+xml")
															
 
																-
															
 
																-    @staticmethod
															
 
																-    def is_turtle_mime_type(mime: str) -> bool:
															
 
																+    def __init__(self, *args: Any, title: str, description: str, version: str, configuration, **kwargs: Any):
															
 
																         """"""
															
 
																-        return mime.split(",")[0] in ("text/turtle",)
															
 
																-
															
 
																-    async def requested_result_type(self, request: Request, operation: str) -> str:
															
 
																-        output_mime_type = request.headers["accept"]
															
 
																-        # TODO Ugly hack, fix later (Fuseki sends options)
															
 
																-        output_mime_type = output_mime_type.split(",")[0]
															
 
																-        if isinstance(output_mime_type, list):
															
 
																-            return output_mime_type[0]
															
 
																-        # TODO Use match or dict for this
															
 
																-        if not output_mime_type:
															
 
																-            logging.warning("No mime type provided. Setting mimetype to 'application/xml'.")
															
 
																-            return "application/xml"
															
 
																-        if operation == "Construct Query" and (self.is_json_mime_type(output_mime_type) or self.is_csv_mime_type(output_mime_type)):
															
 
																-            return "text/turtle"
															
 
																-        if operation == "Construct Query" and output_mime_type == "application/xml":
															
 
																-            return "application/rdf+xml"
															
 
																-        return output_mime_type
															
 
																-
															
 
																-    def __init__(self, *args: Any, title: str, description: str, version: str, configuration, graph: Union[Graph, ConjunctiveGraph, Dataset] = ConjunctiveGraph(), **kwargs: Any):
															
 
																-        """"""
															
 
																-        self.graph = graph
															
 
																         self.title = title
															
 
																         self.description = description
															
 
																         self.version = version
															
 
																         self.configuration = configuration
															
 
																         super().__init__(*args, title=title, description=description, version=version, **kwargs)
															
 
																         logging.debug(self.description)
															
 
																-        rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
															
 
																-        api_responses: Optional[Dict[Union[int, str], Dict[str, Any]]] = {
															
 
																-            200: {
															
 
																-                "description": "SPARQL query results",
															
 
																-                "content": {
															
 
																-                    "application/sparql-results+json": {
															
 
																-                        "results": {"bindings": []},
															
 
																-                        "head": {"vars": []},
															
 
																-                    },
															
 
																-                    "application/json": {
															
 
																-                        "results": {"bindings": []},
															
 
																-                        "head": {"vars": []},
															
 
																-                    },
															
 
																-                    "text/csv": {"example": "s,p,o"},
															
 
																-                    "application/sparql-results+csv": {"example": "s,p,o"},
															
 
																-                    "text/turtle": {"example": "service description"},
															
 
																-                    "application/sparql-results+xml": {"example": "<root></root>"},
															
 
																-                    "application/xml": {"example": "<root></root>"},
															
 
																-                },
															
 
																-            },
															
 
																-            400: {
															
 
																-                "description": "Bad Request",
															
 
																-            },
															
 
																-            403: {
															
 
																-                "description": "Forbidden",
															
 
																-            },
															
 
																-            422: {
															
 
																-                "description": "Unprocessable Entity",
															
 
																-            },
															
 
																-        }
															
 
																-
															
 
																-        @self.get("/", name="SPARQL endpoint", description="", responses=api_responses)
															
 
																-        async def sparql_endpoint_get(request: Request, query: Optional[str] = Query(None)) -> Response:
															
 
																-            logging.debug("Received GET request.")
															
 
																-            if not query:
															
 
																-                logging.warning("No query provided in GET request!")
															
 
																-                return JSONResponse({"error": "No query provided."})
															
 
																-
															
 
																-            graph_ns = {}
															
 
																-            for prefix, ns_uri in self.graph.namespaces():
															
 
																-                graph_ns[prefix] = ns_uri
															
 
																-
															
 
																-            try:
															
 
																-                parsed_query = prepareQuery(query, initNs=graph_ns)
															
 
																-                query_operation = re.sub(r"(\w)([A-Z])", r"\1 \2", parsed_query.algebra.name)
															
 
																-            except Exception as e:
															
 
																-                logging.error("Error parsing the SPARQL query: " + str(e))
															
 
																-                return JSONResponse(
															
 
																-                    status_code=400,
															
 
																-                    content={"message": "Error parsing the SPARQL query"},
															
 
																-                )
															
 
																-
															
 
																-            try:
															
 
																-                query_results = self.graph.query(query, initNs=graph_ns)
															
 
																-            except Exception as e:
															
 
																-                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
															
 
																-                # TODO Send better error which can be parsed as a SPARQL response or check it client side
															
 
																-                return JSONResponse(
															
 
																-                    status_code=400,
															
 
																-                    content={"message": "Error executing the SPARQL query on the RDFLib Graph"},
															
 
																-                )
															
 
																-            output_mime_type = await self.requested_result_type(request, query_operation)
															
 
																-            logging.debug(f"Returning {output_mime_type}.")
															
 
																-            try:
															
 
																-                if self.is_csv_mime_type(output_mime_type):
															
 
																-                    return Response(query_results.serialize(format="csv"), media_type=output_mime_type)
															
 
																-                elif self.is_json_mime_type(output_mime_type):
															
 
																-                    return Response(query_results.serialize(format="json"), media_type=output_mime_type)
															
 
																-                elif self.is_xml_mime_type(output_mime_type):
															
 
																-                    return Response(query_results.serialize(format="xml"), media_type=output_mime_type)
															
 
																-                elif self.is_turtle_mime_type(output_mime_type):
															
 
																-                    return Response(query_results.serialize(format="turtle"), media_type=output_mime_type)
															
 
																-                return Response(query_results.serialize(format="xml"), media_type="application/sparql-results+xml")
															
 
																-            except Exception as e:
															
 
																-                logging.exception(e)
															
 
																-                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph"})
															
 
																-
															
 
																-        @self.post("/", name="SPARQL endpoint", description="", responses=api_responses)
															
 
																-        async def sparql_endpoint_post(request: Request, query: Optional[str] = Query(None)) -> Response:
															
 
																-            logging.debug("Received POST request.")
															
 
																-            if not query:
															
 
																-                # Handle federated query services which provide the query in the body
															
 
																-                query_body = await request.body()
															
 
																-                body = query_body.decode("utf-8")
															
 
																-                parsed_query = parse.parse_qsl(body)
															
 
																-                for params in parsed_query:
															
 
																-                    if params[0] == "query":
															
 
																-                        query = parse.unquote(params[1])
															
 
																-            return await sparql_endpoint_get(request, query)
															
 
																-
															
 
																-
															
 
																-    def eval_custom_functions(self, ctx: QueryContext, part: CompValue) -> List[Any]:
															
 
																-        if part.name != "Extend":
															
 
																-            raise NotImplementedError()
															
 
																-
															
 
																-        query_results = []
															
 
																-        logging.debug("Custom evaluation.")
															
 
																-        for eval_part in evalPart(ctx, part.p):
															
 
																-            # Checks if the function is a URI (custom function)
															
 
																-            if hasattr(part.expr, "iri"):
															
 
																-                for conf_service in self.configuration.services:
															
 
																-                    # Check if URI correspond to a registered custom function
															
 
																-                    if part.expr.iri == URIRef(conf_service.namespace):
															
 
																-                        query_results, ctx, part, eval_part = getattr(service, conf_service.call)(query_results, ctx, part, eval_part, conf_service)
															
 
																-            else:
															
 
																-                # For built-in SPARQL functions (that are not URIs)
															
 
																-                evaluation: List[Any] = [_eval(part.expr, eval_part.forget(ctx, _except=part._vars))]
															
 
																-                if isinstance(evaluation[0], SPARQLError):
															
 
																-                    raise evaluation[0]
															
 
																-                # Append results for built-in SPARQL functions
															
 
																-                for result in evaluation:
															
 
																-                    query_results.append(eval_part.merge({part.var: Literal(result)}))
															
 
																-        return query_results
															
 
																+        sparql_router = SparqlRouter(title=title, description=description, version=version, configuration=configuration)
															
 
																+        self.include_router(sparql_router)
															
 
																+        self.add_middleware(
															
 
																+            CORSMiddleware,
															
 
																+            allow_origins=["*"],
															
 
																+            allow_credentials=True,
															
 
																+            allow_methods=["*"],
															
 
																+            allow_headers=["*"],
															
 
																+        )
															
 
																+
															
 
																+        @self.middleware("http")
															
 
																+        async def add_process_time_header(request: Request, call_next: Any) -> Response:
															
 
																+            start_time = time.time()
															
 
																+            response: Response = await call_next(request)
															
 
																+            duration = str(time.time() - start_time)
															
 
																+            response.headers["X-Process-Time"] = duration
															
 
																+            logging.debug(f"X-Process-Time = {duration}")
															
 
																+            return response
															
--- a/spendpoint/router.py
+++ b/spendpoint/router.py
@@ -0,0 +1,259 @@
 
																+import logging
															
 
																+import rdflib
															
 
																+import pandas as pd
															
 
																+from typing import Any, List, Optional
															
 
																+from urllib import parse
															
 
																+from rdflib.plugins.sparql import prepareQuery
															
 
																+from rdflib.plugins.sparql.processor import SPARQLResult
															
 
																+from spendpoint import service
															
 
																+from fastapi import APIRouter, Query, Request, Response
															
 
																+from fastapi.responses import JSONResponse
															
 
																+from rdflib import ConjunctiveGraph, Literal, URIRef
															
 
																+from rdflib.plugins.sparql.evaluate import evalPart
															
 
																+from rdflib.plugins.sparql.evalutils import _eval
															
 
																+from rdflib.plugins.sparql.parserutils import CompValue
															
 
																+from rdflib.plugins.sparql.sparql import QueryContext, SPARQLError
															
 
																+from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, VOID, XMLNS, XSD
															
 
																+
															
 
																+
															
 
																+CONTENT_TYPE_TO_RDFLIB_FORMAT = {
															
 
																+    # https://www.w3.org/TR/sparql11-results-json/
															
 
																+    "application/sparql-results+json": "json",
															
 
																+    "application/json": "json",
															
 
																+    "text/json": "json",
															
 
																+    # https://www.w3.org/TR/rdf-sparql-XMLres/
															
 
																+    "application/sparql-results+xml": "xml",
															
 
																+    "application/xml": "xml",  # for compatibility
															
 
																+    "application/rdf+xml": "xml",  # for compatibility
															
 
																+    "text/xml": "xml",  # not standard
															
 
																+    # https://www.w3.org/TR/sparql11-results-csv-tsv/
															
 
																+    "application/sparql-results+csv": "csv",
															
 
																+    "text/csv": "csv",  # for compatibility
															
 
																+    # Extras
															
 
																+    "text/turtle": "ttl",
															
 
																+}
															
 
																+DEFAULT_CONTENT_TYPE = "application/json"
															
 
																+
															
 
																+
															
 
																+def parse_accept_header(accept: str) -> List[str]:
															
 
																+    """
															
 
																+    Given an accept header string, return a list of media types in order of preference.
															
 
																+
															
 
																+    :param accept: Accept header value
															
 
																+    :return: Ordered list of media type preferences
															
 
																+    """
															
 
																+
															
 
																+    def _parse_preference(qpref: str) -> float:
															
 
																+        qparts = qpref.split("=")
															
 
																+        try:
															
 
																+            return float(qparts[1].strip())
															
 
																+        except ValueError:
															
 
																+            pass
															
 
																+        except IndexError:
															
 
																+            pass
															
 
																+        return 1.0
															
 
																+
															
 
																+    preferences = []
															
 
																+    types = accept.split(",")
															
 
																+    dpref = 2.0
															
 
																+    for mtype in types:
															
 
																+        parts = mtype.split(";")
															
 
																+        parts = [part.strip() for part in parts]
															
 
																+        pref = dpref
															
 
																+        try:
															
 
																+            for part in parts[1:]:
															
 
																+                if part.startswith("q="):
															
 
																+                    pref = _parse_preference(part)
															
 
																+                    break
															
 
																+        except IndexError:
															
 
																+            pass
															
 
																+        # preserve order of appearance in the list
															
 
																+        dpref = dpref - 0.01
															
 
																+        preferences.append((parts[0], pref))
															
 
																+    preferences.sort(key=lambda x: -x[1])
															
 
																+    return [pref[0] for pref in preferences]
															
 
																+
															
 
																+
															
 
																+def add_cell_to_graph(graph: ConjunctiveGraph, iri: str, row: int, column: int, cell_value: Any, verbose: bool) -> None:
															
 
																+    """Adds a Cell as known in the SDO2L ontology to the graph."""
															
 
																+    tabular_prefix = "http://ua.be/sdo2l/vocabulary/formalism/tabular"
															
 
																+    cell = URIRef(f"{iri}-cell-{row}-{column}")
															
 
																+    # Store the triples in a temporary graph. This allows us to use the rdflib query engine for the sub-query instead of finding the matching pairs manually.
															
 
																+    graph.add((cell, URIRef(f"{tabular_prefix}#holdsContent"), Literal(cell_value)))
															
 
																+    graph.add((cell, URIRef(f"{tabular_prefix}#hasRowPosition"), Literal(int(row))))
															
 
																+    graph.add((cell, URIRef(f"{tabular_prefix}#hasColumnPosition"), Literal(int(column))))
															
 
																+
															
 
																+    if verbose:
															
 
																+        graph.add((cell, URIRef(f"{tabular_prefix}#isCellOfTabularData"), URIRef(f"{iri}")))
															
 
																+        graph.add((cell, URIRef(f"{tabular_prefix}#isInCollection"), URIRef(f"{iri}-column-{int(column)}")))
															
 
																+        graph.add((cell, URIRef(f"{tabular_prefix}#isInCollection"), URIRef(f"{iri}-row-{int(row)}")))
															
 
																+
															
 
																+        graph.add((cell, RDF.type, OWL.Thing))
															
 
																+        graph.add((cell, RDF.type, URIRef("http://ua.be/sdo2l/vocabulary/formalism/tabular#Cell")))
															
 
																+        graph.add((cell, RDF.type, URIRef("http://ua.be/sdo2l/vocabulary/formalism/file#Data")))
															
 
																+        graph.add((cell, OWL.sameAs, URIRef(f"{iri}-cell-{int(row)}-{int(column)}")))
															
 
																+
															
 
																+
															
 
																+class SparqlRouter(APIRouter):
															
 
																+    """Class to deploy a SPARQL endpoint using a RDFLib Graph."""
															
 
																+
															
 
																+    def __init__(self, *args: Any, title: str, description: str, version: str, configuration, **kwargs: Any):
															
 
																+        self.title = title
															
 
																+        self.description = description
															
 
																+        self.version = version
															
 
																+        self.configuration = configuration
															
 
																+        super().__init__(*args, **kwargs)
															
 
																+        rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
															
 
																+
															
 
																+        async def encode_graph_query_results(request, query_results):
															
 
																+            """"""
															
 
																+            mime_types = parse_accept_header(request.headers.get("accept", DEFAULT_CONTENT_TYPE))
															
 
																+            output_mime_type = DEFAULT_CONTENT_TYPE
															
 
																+            for mime_type in mime_types:
															
 
																+                if mime_type in CONTENT_TYPE_TO_RDFLIB_FORMAT:
															
 
																+                    output_mime_type = mime_type
															
 
																+                    break
															
 
																+            logging.debug(f"Returning {output_mime_type}.")
															
 
																+            try:
															
 
																+                rdflib_format = CONTENT_TYPE_TO_RDFLIB_FORMAT[output_mime_type]
															
 
																+                response = Response(query_results.serialize(format=rdflib_format), media_type=output_mime_type)
															
 
																+            except Exception as e:
															
 
																+                logging.error(f"Error serializing the SPARQL query results with RDFLib: {e}")
															
 
																+                return JSONResponse(status_code=422, content={"message": "Error serializing the SPARQL query results."})
															
 
																+            else:
															
 
																+                return response
															
 
																+
															
 
																+        @self.get("/")
															
 
																+        async def sparql_endpoint_get(request: Request, query: Optional[str] = Query(None)) -> Response:
															
 
																+            """Returns an empty result."""
															
 
																+            # The graph is empty, so you would expect this to never return any pairs.
															
 
																+            # But we inject pairs in the custom functions!
															
 
																+            logging.debug("Received GET request.")
															
 
																+            if not query:
															
 
																+                logging.warning("No query provided in GET request!")
															
 
																+                return JSONResponse({"message": "No query provided."})
															
 
																+
															
 
																+            graph = ConjunctiveGraph()
															
 
																+            try:
															
 
																+                query_results = graph.query(query)
															
 
																+            except Exception as e:
															
 
																+                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
															
 
																+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph."})
															
 
																+
															
 
																+            return await encode_graph_query_results(request, query_results)
															
 
																+
															
 
																+        @self.post("/")
															
 
																+        async def sparql_endpoint_post(request: Request, query: Optional[str] = Query(None)) -> Response:
															
 
																+            """Returns an empty result."""
															
 
																+            logging.debug("Received POST request.")
															
 
																+            if not query:
															
 
																+                query_body = await request.body()
															
 
																+                body = query_body.decode("utf-8")
															
 
																+                parsed_query = parse.parse_qsl(body)
															
 
																+                for params in parsed_query:
															
 
																+                    if params[0] == "query":
															
 
																+                        query = parse.unquote(params[1])
															
 
																+            return await sparql_endpoint_get(request, query)
															
 
																+
															
 
																+        @self.get("/cell/")
															
 
																+        async def sparql_cell_endpoint_get(request: Request, iri, file_name, row, column, verbose: bool = True, query: Optional[str] = Query(None)) -> Response:
															
 
																+            """
															
 
																+            SELECT ?s ?p ?o WHERE {
															
 
																+              BIND(ENCODE_FOR_URI("http://ua.be/sdo2l/description/artifacts/artifacts#random-artefact") as ?e)
															
 
																+              BIND(uri(concat("http://localhost:8000/cell/?iri=", ?e ,"&row=2&column=2&file_name=example.csv")) as ?c)
															
 
																+              SERVICE ?c {?s ?p ?o}
															
 
																+            }
															
 
																+            """
															
 
																+            logging.debug(f"Received cell GET request [{iri}:{file_name}->{row}:{column}].")
															
 
																+            graph = ConjunctiveGraph()
															
 
																+            graph_ns = dict(graph.namespaces())
															
 
																+            # graph_ns["tabular"] = "http://ua.be/sdo2l/vocabulary/formalisms/tabular#"
															
 
																+            df = pd.read_csv(f"data/{file_name}", index_col=None, header=None)
															
 
																+            cell_value = df.iat[int(row), int(column)]
															
 
																+            add_cell_to_graph(graph, iri, int(row), int(column),cell_value, verbose)
															
 
																+            logging.debug(f"{cell_value=}")
															
 
																+
															
 
																+            try:
															
 
																+                query_results = graph.query(query, initNs=graph_ns)
															
 
																+            except Exception as e:
															
 
																+                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
															
 
																+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph."})
															
 
																+            return await encode_graph_query_results(request, query_results)
															
 
																+
															
 
																+        @self.get("/cell/{iri}/{file_name}/")
															
 
																+        async def sparql_sheet_endpoint_get(request: Request, iri, file_name, query: Optional[str] = Query(None), verbose: bool = True) -> Response:
															
 
																+            """Return all cell in SDO2L notation for a file."""
															
 
																+            logging.debug(f"Received sheet GET request [{file_name}].")
															
 
																+            graph = ConjunctiveGraph()
															
 
																+            graph_ns = dict(graph.namespaces())
															
 
																+            # graph_ns["tabular"] = "http://ua.be/sdo2l/vocabulary/formalisms/tabular#"
															
 
																+            df = pd.read_csv(f"data/{file_name}", index_col=None, header=None)
															
 
																+            df.reset_index()
															
 
																+
															
 
																+            # Please forgive me pandas gods
															
 
																+            for row in range(df.shape[0]):
															
 
																+                for column in range(df.shape[1]):
															
 
																+                    cell_value = df.at[row, column]
															
 
																+                    add_cell_to_graph(graph, iri, int(row), int(column), cell_value, verbose)
															
 
																+
															
 
																+            try:
															
 
																+                query_results = graph.query(query, initNs=graph_ns)
															
 
																+            except Exception as e:
															
 
																+                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
															
 
																+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph."})
															
 
																+            return await encode_graph_query_results(request, query_results)
															
 
																+
															
 
																+        @self.get("/cell/{iri}/{file_name}/{row}/{column}/")
															
 
																+        async def sparql_cell_endpoint_get(request: Request, iri, file_name, row, column, query: Optional[str] = Query(None), verbose: bool = True) -> Response:
															
 
																+            """
															
 
																+            Create an ephemeral graph store based on the call parameters and perform the requested query.
															
 
																+            SELECT ?s ?p ?o WHERE {
															
 
																+              bind(str('http://localhost:8000') as ?base)
															
 
																+              bind(str('iri') as ?iri)
															
 
																+              bind(str('cell') as ?operation)
															
 
																+              bind(str('example.csv') as ?file)
															
 
																+              bind(str(2) as ?row)
															
 
																+              bind(str(2) as ?column)
															
 
																+              bind(iri(concat(?base, "/", ?operation, "/", ?file, "/", ?row, "/", ?column, "/")) as ?call)
															
 
																+              SERVICE ?call {?s ?p ?o}
															
 
																+            }
															
 
																+            """
															
 
																+            logging.debug(f"Received cell GET request [{file_name}->{row}:{column}].")
															
 
																+            graph = ConjunctiveGraph()
															
 
																+            graph_ns = dict(graph.namespaces())
															
 
																+            # graph_ns["tabular"] = "http://ua.be/sdo2l/vocabulary/formalisms/tabular#"
															
 
																+            df = pd.read_csv(f"data/{file_name}", index_col=None, header=None)
															
 
																+            cell_value = df.iat[int(row), int(column)]
															
 
																+            add_cell_to_graph(graph, iri, int(row), int(column), cell_value, verbose)
															
 
																+            logging.debug(f"{cell_value=}")
															
 
																+
															
 
																+            try:
															
 
																+                query_results = graph.query(query, initNs=graph_ns)
															
 
																+            except Exception as e:
															
 
																+                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
															
 
																+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph."})
															
 
																+            return await encode_graph_query_results(request, query_results)
															
 
																+
															
 
																+
															
 
																+    def eval_custom_functions(self, ctx: QueryContext, part: CompValue) -> List[SPARQLResult]:
															
 
																+        if part.name != "Extend":
															
 
																+            raise NotImplementedError()
															
 
																+
															
 
																+        query_results = []
															
 
																+        logging.debug("Custom evaluation.")
															
 
																+        for eval_part in evalPart(ctx, part.p):
															
 
																+            # Checks if the function is a URI (custom function)
															
 
																+            if hasattr(part.expr, "iri"):
															
 
																+                for conf_service in self.configuration.services:
															
 
																+                    # Check if URI correspond to a registered custom function
															
 
																+                    if part.expr.iri == URIRef(conf_service.namespace):
															
 
																+                        query_results, ctx, part, eval_part = getattr(service, conf_service.call)(query_results, ctx, part, eval_part, conf_service)
															
 
																+            else:
															
 
																+                # For built-in SPARQL functions (that are not URIs)
															
 
																+                evaluation: List[Any] = [_eval(part.expr, eval_part.forget(ctx, _except=part._vars))]
															
 
																+                if isinstance(evaluation[0], SPARQLError):
															
 
																+                    raise evaluation[0]
															
 
																+                # Append results for built-in SPARQL functions
															
 
																+                for result in evaluation:
															
 
																+                    query_results.append(eval_part.merge({part.var: Literal(result)}))
															
 
																+        return query_results
															
--- a/spendpoint/service.py
+++ b/spendpoint/service.py
@@ -19,7 +19,6 @@ class Outlier:
 
																 def outlier_service(query_results, ctx, part, eval_part, service_configuration):
															
 
																     """
															
 
																-
															
 
																     Example query:
															
 
																     PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
															
 
																     PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
															
@@ -83,7 +82,6 @@ def conversion_service(query_results, ctx, part, eval_part, service_configuratio
 
																     return query_results, ctx, part, eval_part
															
 
																-
															
 
																 def example_service(query_results, ctx, part, eval_part, service_configuration):
															
 
																     """"""
															
 
																     logging.debug(f"{query_results=}")
															
@@ -104,3 +102,32 @@ def example_service(query_results, ctx, part, eval_part, service_configuration):
 
																     for outlier in outliers:
															
 
																         query_results.append(eval_part.merge({part.var: Literal(outlier.iri), rdflib.term.Variable(part.var + "_value"): Literal(outlier.value)}))
															
 
																     return query_results, ctx, part, eval_part
															
 
																+
															
 
																+
															
 
																+# TODO maybe return a 'cell' type
															
 
																+def cell_service(query_results, ctx, part, eval_part, service_configuration):
															
 
																+    """"""
															
 
																+    logging.debug(f"{query_results=}")
															
 
																+    logging.debug(f"{ctx=}")
															
 
																+    logging.debug(f"{part=}")
															
 
																+    logging.debug(f"{eval_part=}")
															
 
																+
															
 
																+    file_name = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
															
 
																+    row = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
															
 
																+    column = str(_eval(part.expr.expr[2], eval_part.forget(ctx, _except=part.expr._vars)))
															
 
																+
															
 
																+    # TODO Should probably grab some setting from the KG, like header etc, maybe do that in query
															
 
																+    logging.info(f"Looking for cell {row}:{column} in '{file_name}'.")
															
 
																+    try:
															
 
																+        df = pd.read_csv(file_name, index_col=None, header=None)
															
 
																+        cell_value = df.iat[int(row), int(column)]
															
 
																+        query_results.append(eval_part.merge({
															
 
																+            part.var: Literal(cell_value),
															
 
																+            rdflib.term.Variable(part.var + "_value"): Literal(cell_value)
															
 
																+        }))
															
 
																+        logging.debug(f"{cell_value=}")
															
 
																+    except:
															
 
																+        # TODO Expand error info
															
 
																+        logging.error(f"Error.")
															
 
																+
															
 
																+    return query_results, ctx, part, eval_part
															
--- a/tasks.py
+++ b/tasks.py
@@ -74,12 +74,12 @@ def release(c, version):
 
																 @task(name="generate", aliases=("gen", "csv"))
															
 
																-def generate_random_data_csv(c, rows=200000, columns=50):
															
 
																+def generate_random_data_csv(c, rows=200000, columns=50, name="example"):
															
 
																     """"""
															
 
																     import numpy as np
															
 
																     import uuid
															
 
																     data_dir = Path(__file__).resolve().parent / Path("data")
															
 
																-    out_file_path = data_dir / Path("example.csv")
															
 
																+    out_file_path = data_dir / Path(f"{name}.csv")
															
 
																     chunk = 1000
															
 
																     current_row = 0
															
 
																     with out_file_path.open("w", encoding="utf-8", buffering=chunk) as csv_file: