2 лет назад · 424ad5d59f
--- a/.editorconfig
+++ b/.editorconfig
@@ -8,7 +8,7 @@ indent_style = space
 
				 insert_final_newline = true
			
 
				 trim_trailing_whitespace = true
			
 
				 
			
 
				-[*.{css,html,yml,yaml,js,xml}]
			
 
				+[*.{css,html,yml,yaml,js,xml,nix}]
			
 
				 indent_size = 2
			
 
				 
			
 
				 [{*.log,LICENSE}]
			
--- a/.gitignore
+++ b/.gitignore
@@ -8,5 +8,5 @@ dist/
 
				 .idea
			
 
				 
			
 
				 # Generated example files
			
 
				-data/example.csv
			
 
				-data/example.parquet
			
 
				+data/example*.csv
			
 
				+data/example*.parquet
			
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -2,6 +2,14 @@
 
				 History
			
 
				 =======
			
 
				 
			
 
				+0.4.0 (2023-10-11)
			
 
				+------------------
			
 
				+* Cells now use a more transparent system.
			
 
				+
			
 
				+0.4.0 (2023-07-12)
			
 
				+------------------
			
 
				+* Docker containers now use configuration file.
			
 
				+
			
 
				 0.3.0 (2023-05-16)
			
 
				 ------------------
			
 
				 * Add configuration options.
			
--- a/README.rst
+++ b/README.rst
@@ -7,16 +7,45 @@ The default access point is at `http://127.0.0.1:8000`.
 
				 This endpoint can be configured in the `configuration.toml <data/configuration.toml>`_ file.
			
 
				 The docker image created uses uvicorn the host the application at `0.0.0.0:80`. Feel free to map this to any port of your liking.
			
 
				 
			
 
				-We currently support 3 services out of the box:
			
 
				+Bound services
			
 
				+--------------
			
 
				+
			
 
				+We currently support 4 bind services out of the box:
			
 
				 
			
 
				 .. code-block::
			
 
				 
			
 
				    dtf:outlier
			
 
				    dtf:example
			
 
				    dtf:conversion
			
 
				+   dtf:cell
			
 
				 
			
 
				 The outlier service relies on `another endpoint <https://msdl.uantwerpen.be/git/lucasalbertins/DTDesign/src/main/tools/typeOperations>`_ which needs to be set up and accessible.
			
 
				 
			
 
				+.. code-block:: sparql
			
 
				+
			
 
				+   PREFIX dtf: <https://ontology.rys.app/dt/function/>
			
 
				+   SELECT ?cell ?cell_value WHERE {
			
 
				+     SERVICE <http://localhost:8000/> {BIND(dtf:cell("data/example.csv", 0, 0) AS ?cell)}
			
 
				+   }
			
 
				+
			
 
				+SPARQL query showing bind based cell service call.
			
 
				+
			
 
				+URI based services
			
 
				+------------------
			
 
				+
			
 
				+A second, more versatile, way to access a service is provided in the form of an URI.
			
 
				+It is possible to query cells by specifying an individual cell in the URI of the service call.
			
 
				+
			
 
				+.. code-block:: sparql
			
 
				+
			
 
				+   SELECT ?s ?p ?o WHERE {
			
 
				+     BIND(ENCODE_FOR_URI("http://ua.be/sdo2l/description/artifacts/artifacts#random-artefact") as ?e)
			
 
				+     BIND(uri(concat("http://localhost:8000/cell/?iri=", ?e ,"&row=2&column=2&file_name=example.csv")) as ?c)
			
 
				+     SERVICE ?c {?s ?p ?o}
			
 
				+   }
			
 
				+
			
 
				+SPARQL query showing URI based cell service call.
			
 
				+
			
 
				 Installation
			
 
				 ------------
			
 
				 
			
--- a/data/configuration.toml
+++ b/data/configuration.toml
@@ -19,3 +19,8 @@ call = "example_service"
 
				 name = "conversion"
			
 
				 namespace = "https://ontology.rys.app/dt/function/conversion"
			
 
				 call = "conversion_service"
			
 
				+
			
 
				+[[services]]
			
 
				+name = "cell"
			
 
				+namespace = "https://ontology.rys.app/dt/function/cell"
			
 
				+call = "cell_service"
			
--- a/default.nix
+++ b/default.nix
@@ -1,22 +0,0 @@
 
				-{
			
 
				-  pkgs ? import <nixpkgs> {}
			
 
				-}:
			
 
				-
			
 
				-pkgs.python3Packages.buildPythonPackage rec {
			
 
				-  pname = "spendpoint";
			
 
				-  version = "0.2.0";
			
 
				-  format = "pyproject";
			
 
				-  src = ./.;
			
 
				-
			
 
				-  nativeBuildInputs = [
			
 
				-    #flit-core
			
 
				-  ];
			
 
				-
			
 
				-  buildInputs = [
			
 
				-    #flit-core
			
 
				-  ];
			
 
				-
			
 
				-  propagatedBuildInputs = [
			
 
				-    pkgs.python3Packages.jinja2
			
 
				-  ];
			
 
				-}
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,35 +22,35 @@ keywords = ["spendpoint"]
 
				 dependencies = [
			
 
				     "toml~=0.10.2",
			
 
				     "arklog~=0.5.1",
			
 
				-    "rdflib~=6.3.2",
			
 
				+    "rdflib~=7.0.0",
			
 
				     "pandas~=2.0.1",
			
 
				     "dacite~=1.8.1",
			
 
				-    "fastapi~=0.95.2",
			
 
				-    "pyarrow~=12.0.0",
			
 
				-    "requests~=2.30.0",
			
 
				-    "starlette~=0.27",
			
 
				+    "fastapi~=0.101.0",
			
 
				+    "pyarrow~=12.0.1",
			
 
				+    "requests~=2.31.0",
			
 
				+    "starlette~=0.31.0",
			
 
				     "python-magic~=0.4.27",
			
 
				-    "uvicorn[standard]~=0.22.0",
			
 
				+    "uvicorn[standard]~=0.23.2",
			
 
				 ]
			
 
				 
			
 
				 [project.optional-dependencies]
			
 
				 test = [
			
 
				-    "pytest~=7.3.1",
			
 
				+    "pytest~=7.4.0",
			
 
				     "sparqlwrapper~=2.0.0",
			
 
				 ]
			
 
				 doc = [
			
 
				-    "sphinx~=7.0.1",
			
 
				+    "sphinx~=7.1.2",
			
 
				 ]
			
 
				 dev = [
			
 
				-    "tox~=4.5.1",
			
 
				-    "pip~=23.1.2",
			
 
				+    "tox~=4.6.4",
			
 
				+    "pip~=23.2.1",
			
 
				     "flit~=3.9.0",
			
 
				     "twine~=4.0.2",
			
 
				-    "numpy~=1.24.3",
			
 
				-    "invoke~=2.1.2",
			
 
				+    "numpy~=1.25.2",
			
 
				+    "invoke~=2.2.0",
			
 
				     "jinja2~=3.1.2",
			
 
				-    "flake8~=6.0.0",
			
 
				-    "coverage~=7.2.5",
			
 
				+    "flake8~=6.1.0",
			
 
				+    "coverage~=7.2.7",
			
 
				 ]
			
 
				 
			
 
				 [project.urls]
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,27 +1,27 @@
 
				 # SpEndPoint
			
 
				 toml              ~= 0.10.2
			
 
				 arklog            ~= 0.5.1
			
 
				-rdflib            ~= 6.3.2
			
 
				+rdflib            ~= 7.0.0
			
 
				 pandas            ~= 2.0.1
			
 
				 dacite            ~= 1.8.1
			
 
				-fastapi           ~= 0.95.2
			
 
				-pyarrow           ~= 12.0.0
			
 
				-requests          ~= 2.30.0
			
 
				-starlette         ~= 0.27
			
 
				+fastapi           ~= 0.101.0
			
 
				+pyarrow           ~= 12.0.1
			
 
				+requests          ~= 2.31.0
			
 
				+starlette         ~= 0.31.0
			
 
				 python-magic      ~= 0.4.27
			
 
				-uvicorn[standard] ~= 0.22.0
			
 
				+uvicorn[standard] ~= 0.23.2
			
 
				 # Test
			
 
				-pytest        ~= 7.3.1
			
 
				+pytest        ~= 7.4.0
			
 
				 sparqlwrapper ~= 2.0.0
			
 
				 # Doc
			
 
				-sphinx ~= 7.0.1
			
 
				+sphinx ~= 7.1.2
			
 
				 # Dev
			
 
				-tox      ~= 4.5.1
			
 
				-pip      ~= 23.1.2
			
 
				+tox      ~= 4.6.4
			
 
				+pip      ~= 23.2.1
			
 
				 flit     ~= 3.9.0
			
 
				 twine    ~= 4.0.2
			
 
				-numpy    ~= 1.24.3
			
 
				-invoke   ~= 2.1.2
			
 
				+numpy    ~= 1.25.2
			
 
				+invoke   ~= 2.2.0
			
 
				 jinja2   ~= 3.1.2
			
 
				-flake8   ~= 6.0.0
			
 
				-coverage ~= 7.2.5
			
 
				+flake8   ~= 6.1.0
			
 
				+coverage ~= 7.2.7
			
--- a/spendpoint/__init__.py
+++ b/spendpoint/__init__.py
@@ -1,3 +1,3 @@
 
				 """SPARQL endpoint for ontologies."""
			
 
				-__version__ = "0.4.0"
			
 
				+__version__ = "0.5.0"
			
 
				 __version_info__ = tuple((int(num) if num.isdigit() else num for num in __version__.replace("-", ".", 1).split(".")))
			
--- a/spendpoint/endpoint.py
+++ b/spendpoint/endpoint.py
@@ -1,183 +1,42 @@
 
				 # Copied and modified from https://pypi.org/project/rdflib-endpoint/
			
 
				+# https://fastapi.tiangolo.com/
			
 
				 
			
 
				 import logging
			
 
				-import re
			
 
				 import arklog
			
 
				-import rdflib
			
 
				-from typing import Any, Dict, List, Optional, Union
			
 
				-from urllib import parse
			
 
				-from fastapi import FastAPI, Query, Request, Response
			
 
				-from fastapi.responses import JSONResponse
			
 
				-from rdflib import ConjunctiveGraph, Dataset, Graph, Literal, URIRef
			
 
				-from rdflib.plugins.sparql import prepareQuery
			
 
				-from rdflib.plugins.sparql.evaluate import evalPart
			
 
				-from rdflib.plugins.sparql.evalutils import _eval
			
 
				-from rdflib.plugins.sparql.parserutils import CompValue
			
 
				-from rdflib.plugins.sparql.sparql import QueryContext, SPARQLError
			
 
				-
			
 
				-from spendpoint import service
			
 
				+import time
			
 
				+from typing import Any
			
 
				+from fastapi import FastAPI, Request, Response
			
 
				+from fastapi.middleware.cors import CORSMiddleware
			
 
				+from spendpoint.router import SparqlRouter
			
 
				 
			
 
				 arklog.set_config_logging()
			
 
				 
			
 
				-
			
 
				 class SparqlEndpoint(FastAPI):
			
 
				     """SPARQL endpoint for services and storage of heterogeneous data."""
			
 
				 
			
 
				-    @staticmethod
			
 
				-    def is_json_mime_type(mime: str) -> bool:
			
 
				-        """"""
			
 
				-        return mime.split(",")[0] in ("application/sparql-results+json","application/json","text/javascript","application/javascript")
			
 
				-
			
 
				-    @staticmethod
			
 
				-    def is_csv_mime_type(mime: str) -> bool:
			
 
				-        """"""
			
 
				-        return mime.split(",")[0] in ("text/csv", "application/sparql-results+csv")
			
 
				-
			
 
				-    @staticmethod
			
 
				-    def is_xml_mime_type(mime: str) -> bool:
			
 
				-        """"""
			
 
				-        return mime.split(",")[0] in ("application/xml", "application/sparql-results+xml")
			
 
				-
			
 
				-    @staticmethod
			
 
				-    def is_turtle_mime_type(mime: str) -> bool:
			
 
				+    def __init__(self, *args: Any, title: str, description: str, version: str, configuration, **kwargs: Any):
			
 
				         """"""
			
 
				-        return mime.split(",")[0] in ("text/turtle",)
			
 
				-
			
 
				-    async def requested_result_type(self, request: Request, operation: str) -> str:
			
 
				-        output_mime_type = request.headers["accept"]
			
 
				-        # TODO Ugly hack, fix later (Fuseki sends options)
			
 
				-        output_mime_type = output_mime_type.split(",")[0]
			
 
				-        if isinstance(output_mime_type, list):
			
 
				-            return output_mime_type[0]
			
 
				-        # TODO Use match or dict for this
			
 
				-        if not output_mime_type:
			
 
				-            logging.warning("No mime type provided. Setting mimetype to 'application/xml'.")
			
 
				-            return "application/xml"
			
 
				-        if operation == "Construct Query" and (self.is_json_mime_type(output_mime_type) or self.is_csv_mime_type(output_mime_type)):
			
 
				-            return "text/turtle"
			
 
				-        if operation == "Construct Query" and output_mime_type == "application/xml":
			
 
				-            return "application/rdf+xml"
			
 
				-        return output_mime_type
			
 
				-
			
 
				-    def __init__(self, *args: Any, title: str, description: str, version: str, configuration, graph: Union[Graph, ConjunctiveGraph, Dataset] = ConjunctiveGraph(), **kwargs: Any):
			
 
				-        """"""
			
 
				-        self.graph = graph
			
 
				         self.title = title
			
 
				         self.description = description
			
 
				         self.version = version
			
 
				         self.configuration = configuration
			
 
				         super().__init__(*args, title=title, description=description, version=version, **kwargs)
			
 
				         logging.debug(self.description)
			
 
				-        rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
			
 
				-        api_responses: Optional[Dict[Union[int, str], Dict[str, Any]]] = {
			
 
				-            200: {
			
 
				-                "description": "SPARQL query results",
			
 
				-                "content": {
			
 
				-                    "application/sparql-results+json": {
			
 
				-                        "results": {"bindings": []},
			
 
				-                        "head": {"vars": []},
			
 
				-                    },
			
 
				-                    "application/json": {
			
 
				-                        "results": {"bindings": []},
			
 
				-                        "head": {"vars": []},
			
 
				-                    },
			
 
				-                    "text/csv": {"example": "s,p,o"},
			
 
				-                    "application/sparql-results+csv": {"example": "s,p,o"},
			
 
				-                    "text/turtle": {"example": "service description"},
			
 
				-                    "application/sparql-results+xml": {"example": "<root></root>"},
			
 
				-                    "application/xml": {"example": "<root></root>"},
			
 
				-                },
			
 
				-            },
			
 
				-            400: {
			
 
				-                "description": "Bad Request",
			
 
				-            },
			
 
				-            403: {
			
 
				-                "description": "Forbidden",
			
 
				-            },
			
 
				-            422: {
			
 
				-                "description": "Unprocessable Entity",
			
 
				-            },
			
 
				-        }
			
 
				-
			
 
				-        @self.get("/", name="SPARQL endpoint", description="", responses=api_responses)
			
 
				-        async def sparql_endpoint_get(request: Request, query: Optional[str] = Query(None)) -> Response:
			
 
				-            logging.debug("Received GET request.")
			
 
				-            if not query:
			
 
				-                logging.warning("No query provided in GET request!")
			
 
				-                return JSONResponse({"error": "No query provided."})
			
 
				-
			
 
				-            graph_ns = {}
			
 
				-            for prefix, ns_uri in self.graph.namespaces():
			
 
				-                graph_ns[prefix] = ns_uri
			
 
				-
			
 
				-            try:
			
 
				-                parsed_query = prepareQuery(query, initNs=graph_ns)
			
 
				-                query_operation = re.sub(r"(\w)([A-Z])", r"\1 \2", parsed_query.algebra.name)
			
 
				-            except Exception as e:
			
 
				-                logging.error("Error parsing the SPARQL query: " + str(e))
			
 
				-                return JSONResponse(
			
 
				-                    status_code=400,
			
 
				-                    content={"message": "Error parsing the SPARQL query"},
			
 
				-                )
			
 
				-
			
 
				-            try:
			
 
				-                query_results = self.graph.query(query, initNs=graph_ns)
			
 
				-            except Exception as e:
			
 
				-                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
			
 
				-                # TODO Send better error which can be parsed as a SPARQL response or check it client side
			
 
				-                return JSONResponse(
			
 
				-                    status_code=400,
			
 
				-                    content={"message": "Error executing the SPARQL query on the RDFLib Graph"},
			
 
				-                )
			
 
				-            output_mime_type = await self.requested_result_type(request, query_operation)
			
 
				-            logging.debug(f"Returning {output_mime_type}.")
			
 
				-            try:
			
 
				-                if self.is_csv_mime_type(output_mime_type):
			
 
				-                    return Response(query_results.serialize(format="csv"), media_type=output_mime_type)
			
 
				-                elif self.is_json_mime_type(output_mime_type):
			
 
				-                    return Response(query_results.serialize(format="json"), media_type=output_mime_type)
			
 
				-                elif self.is_xml_mime_type(output_mime_type):
			
 
				-                    return Response(query_results.serialize(format="xml"), media_type=output_mime_type)
			
 
				-                elif self.is_turtle_mime_type(output_mime_type):
			
 
				-                    return Response(query_results.serialize(format="turtle"), media_type=output_mime_type)
			
 
				-                return Response(query_results.serialize(format="xml"), media_type="application/sparql-results+xml")
			
 
				-            except Exception as e:
			
 
				-                logging.exception(e)
			
 
				-                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph"})
			
 
				-
			
 
				-        @self.post("/", name="SPARQL endpoint", description="", responses=api_responses)
			
 
				-        async def sparql_endpoint_post(request: Request, query: Optional[str] = Query(None)) -> Response:
			
 
				-            logging.debug("Received POST request.")
			
 
				-            if not query:
			
 
				-                # Handle federated query services which provide the query in the body
			
 
				-                query_body = await request.body()
			
 
				-                body = query_body.decode("utf-8")
			
 
				-                parsed_query = parse.parse_qsl(body)
			
 
				-                for params in parsed_query:
			
 
				-                    if params[0] == "query":
			
 
				-                        query = parse.unquote(params[1])
			
 
				-            return await sparql_endpoint_get(request, query)
			
 
				-
			
 
				-
			
 
				-    def eval_custom_functions(self, ctx: QueryContext, part: CompValue) -> List[Any]:
			
 
				-        if part.name != "Extend":
			
 
				-            raise NotImplementedError()
			
 
				-
			
 
				-        query_results = []
			
 
				-        logging.debug("Custom evaluation.")
			
 
				-        for eval_part in evalPart(ctx, part.p):
			
 
				-            # Checks if the function is a URI (custom function)
			
 
				-            if hasattr(part.expr, "iri"):
			
 
				-                for conf_service in self.configuration.services:
			
 
				-                    # Check if URI correspond to a registered custom function
			
 
				-                    if part.expr.iri == URIRef(conf_service.namespace):
			
 
				-                        query_results, ctx, part, eval_part = getattr(service, conf_service.call)(query_results, ctx, part, eval_part, conf_service)
			
 
				-            else:
			
 
				-                # For built-in SPARQL functions (that are not URIs)
			
 
				-                evaluation: List[Any] = [_eval(part.expr, eval_part.forget(ctx, _except=part._vars))]
			
 
				-                if isinstance(evaluation[0], SPARQLError):
			
 
				-                    raise evaluation[0]
			
 
				-                # Append results for built-in SPARQL functions
			
 
				-                for result in evaluation:
			
 
				-                    query_results.append(eval_part.merge({part.var: Literal(result)}))
			
 
				-        return query_results
			
 
				+        sparql_router = SparqlRouter(title=title, description=description, version=version, configuration=configuration)
			
 
				+        self.include_router(sparql_router)
			
 
				+        self.add_middleware(
			
 
				+            CORSMiddleware,
			
 
				+            allow_origins=["*"],
			
 
				+            allow_credentials=True,
			
 
				+            allow_methods=["*"],
			
 
				+            allow_headers=["*"],
			
 
				+        )
			
 
				+
			
 
				+        @self.middleware("http")
			
 
				+        async def add_process_time_header(request: Request, call_next: Any) -> Response:
			
 
				+            start_time = time.time()
			
 
				+            response: Response = await call_next(request)
			
 
				+            duration = str(time.time() - start_time)
			
 
				+            response.headers["X-Process-Time"] = duration
			
 
				+            logging.debug(f"X-Process-Time = {duration}")
			
 
				+            return response
			
--- a/spendpoint/router.py
+++ b/spendpoint/router.py
@@ -0,0 +1,259 @@
 
				+import logging
			
 
				+import rdflib
			
 
				+import pandas as pd
			
 
				+from typing import Any, List, Optional
			
 
				+from urllib import parse
			
 
				+from rdflib.plugins.sparql import prepareQuery
			
 
				+from rdflib.plugins.sparql.processor import SPARQLResult
			
 
				+from spendpoint import service
			
 
				+from fastapi import APIRouter, Query, Request, Response
			
 
				+from fastapi.responses import JSONResponse
			
 
				+from rdflib import ConjunctiveGraph, Literal, URIRef
			
 
				+from rdflib.plugins.sparql.evaluate import evalPart
			
 
				+from rdflib.plugins.sparql.evalutils import _eval
			
 
				+from rdflib.plugins.sparql.parserutils import CompValue
			
 
				+from rdflib.plugins.sparql.sparql import QueryContext, SPARQLError
			
 
				+from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, VOID, XMLNS, XSD
			
 
				+
			
 
				+
			
 
				+CONTENT_TYPE_TO_RDFLIB_FORMAT = {
			
 
				+    # https://www.w3.org/TR/sparql11-results-json/
			
 
				+    "application/sparql-results+json": "json",
			
 
				+    "application/json": "json",
			
 
				+    "text/json": "json",
			
 
				+    # https://www.w3.org/TR/rdf-sparql-XMLres/
			
 
				+    "application/sparql-results+xml": "xml",
			
 
				+    "application/xml": "xml",  # for compatibility
			
 
				+    "application/rdf+xml": "xml",  # for compatibility
			
 
				+    "text/xml": "xml",  # not standard
			
 
				+    # https://www.w3.org/TR/sparql11-results-csv-tsv/
			
 
				+    "application/sparql-results+csv": "csv",
			
 
				+    "text/csv": "csv",  # for compatibility
			
 
				+    # Extras
			
 
				+    "text/turtle": "ttl",
			
 
				+}
			
 
				+DEFAULT_CONTENT_TYPE = "application/json"
			
 
				+
			
 
				+
			
 
				+def parse_accept_header(accept: str) -> List[str]:
			
 
				+    """
			
 
				+    Given an accept header string, return a list of media types in order of preference.
			
 
				+
			
 
				+    :param accept: Accept header value
			
 
				+    :return: Ordered list of media type preferences
			
 
				+    """
			
 
				+
			
 
				+    def _parse_preference(qpref: str) -> float:
			
 
				+        qparts = qpref.split("=")
			
 
				+        try:
			
 
				+            return float(qparts[1].strip())
			
 
				+        except ValueError:
			
 
				+            pass
			
 
				+        except IndexError:
			
 
				+            pass
			
 
				+        return 1.0
			
 
				+
			
 
				+    preferences = []
			
 
				+    types = accept.split(",")
			
 
				+    dpref = 2.0
			
 
				+    for mtype in types:
			
 
				+        parts = mtype.split(";")
			
 
				+        parts = [part.strip() for part in parts]
			
 
				+        pref = dpref
			
 
				+        try:
			
 
				+            for part in parts[1:]:
			
 
				+                if part.startswith("q="):
			
 
				+                    pref = _parse_preference(part)
			
 
				+                    break
			
 
				+        except IndexError:
			
 
				+            pass
			
 
				+        # preserve order of appearance in the list
			
 
				+        dpref = dpref - 0.01
			
 
				+        preferences.append((parts[0], pref))
			
 
				+    preferences.sort(key=lambda x: -x[1])
			
 
				+    return [pref[0] for pref in preferences]
			
 
				+
			
 
				+
			
 
				+def add_cell_to_graph(graph: ConjunctiveGraph, iri: str, row: int, column: int, cell_value: Any, verbose: bool) -> None:
			
 
				+    """Adds a Cell as known in the SDO2L ontology to the graph."""
			
 
				+    tabular_prefix = "http://ua.be/sdo2l/vocabulary/formalism/tabular"
			
 
				+    cell = URIRef(f"{iri}-cell-{row}-{column}")
			
 
				+    # Store the triples in a temporary graph. This allows us to use the rdflib query engine for the sub-query instead of finding the matching pairs manually.
			
 
				+    graph.add((cell, URIRef(f"{tabular_prefix}#holdsContent"), Literal(cell_value)))
			
 
				+    graph.add((cell, URIRef(f"{tabular_prefix}#hasRowPosition"), Literal(int(row))))
			
 
				+    graph.add((cell, URIRef(f"{tabular_prefix}#hasColumnPosition"), Literal(int(column))))
			
 
				+
			
 
				+    if verbose:
			
 
				+        graph.add((cell, URIRef(f"{tabular_prefix}#isCellOfTabularData"), URIRef(f"{iri}")))
			
 
				+        graph.add((cell, URIRef(f"{tabular_prefix}#isInCollection"), URIRef(f"{iri}-column-{int(column)}")))
			
 
				+        graph.add((cell, URIRef(f"{tabular_prefix}#isInCollection"), URIRef(f"{iri}-row-{int(row)}")))
			
 
				+
			
 
				+        graph.add((cell, RDF.type, OWL.Thing))
			
 
				+        graph.add((cell, RDF.type, URIRef("http://ua.be/sdo2l/vocabulary/formalism/tabular#Cell")))
			
 
				+        graph.add((cell, RDF.type, URIRef("http://ua.be/sdo2l/vocabulary/formalism/file#Data")))
			
 
				+        graph.add((cell, OWL.sameAs, URIRef(f"{iri}-cell-{int(row)}-{int(column)}")))
			
 
				+
			
 
				+
			
 
				+class SparqlRouter(APIRouter):
			
 
				+    """Class to deploy a SPARQL endpoint using a RDFLib Graph."""
			
 
				+
			
 
				+    def __init__(self, *args: Any, title: str, description: str, version: str, configuration, **kwargs: Any):
			
 
				+        self.title = title
			
 
				+        self.description = description
			
 
				+        self.version = version
			
 
				+        self.configuration = configuration
			
 
				+        super().__init__(*args, **kwargs)
			
 
				+        rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
			
 
				+
			
 
				+        async def encode_graph_query_results(request, query_results):
			
 
				+            """"""
			
 
				+            mime_types = parse_accept_header(request.headers.get("accept", DEFAULT_CONTENT_TYPE))
			
 
				+            output_mime_type = DEFAULT_CONTENT_TYPE
			
 
				+            for mime_type in mime_types:
			
 
				+                if mime_type in CONTENT_TYPE_TO_RDFLIB_FORMAT:
			
 
				+                    output_mime_type = mime_type
			
 
				+                    break
			
 
				+            logging.debug(f"Returning {output_mime_type}.")
			
 
				+            try:
			
 
				+                rdflib_format = CONTENT_TYPE_TO_RDFLIB_FORMAT[output_mime_type]
			
 
				+                response = Response(query_results.serialize(format=rdflib_format), media_type=output_mime_type)
			
 
				+            except Exception as e:
			
 
				+                logging.error(f"Error serializing the SPARQL query results with RDFLib: {e}")
			
 
				+                return JSONResponse(status_code=422, content={"message": "Error serializing the SPARQL query results."})
			
 
				+            else:
			
 
				+                return response
			
 
				+
			
 
				+        @self.get("/")
			
 
				+        async def sparql_endpoint_get(request: Request, query: Optional[str] = Query(None)) -> Response:
			
 
				+            """Returns an empty result."""
			
 
				+            # The graph is empty, so you would expect this to never return any pairs.
			
 
				+            # But we inject pairs in the custom functions!
			
 
				+            logging.debug("Received GET request.")
			
 
				+            if not query:
			
 
				+                logging.warning("No query provided in GET request!")
			
 
				+                return JSONResponse({"message": "No query provided."})
			
 
				+
			
 
				+            graph = ConjunctiveGraph()
			
 
				+            try:
			
 
				+                query_results = graph.query(query)
			
 
				+            except Exception as e:
			
 
				+                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
			
 
				+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph."})
			
 
				+
			
 
				+            return await encode_graph_query_results(request, query_results)
			
 
				+
			
 
				+        @self.post("/")
			
 
				+        async def sparql_endpoint_post(request: Request, query: Optional[str] = Query(None)) -> Response:
			
 
				+            """Returns an empty result."""
			
 
				+            logging.debug("Received POST request.")
			
 
				+            if not query:
			
 
				+                query_body = await request.body()
			
 
				+                body = query_body.decode("utf-8")
			
 
				+                parsed_query = parse.parse_qsl(body)
			
 
				+                for params in parsed_query:
			
 
				+                    if params[0] == "query":
			
 
				+                        query = parse.unquote(params[1])
			
 
				+            return await sparql_endpoint_get(request, query)
			
 
				+
			
 
				+        @self.get("/cell/")
			
 
				+        async def sparql_cell_endpoint_get(request: Request, iri, file_name, row, column, verbose: bool = True, query: Optional[str] = Query(None)) -> Response:
			
 
				+            """
			
 
				+            SELECT ?s ?p ?o WHERE {
			
 
				+              BIND(ENCODE_FOR_URI("http://ua.be/sdo2l/description/artifacts/artifacts#random-artefact") as ?e)
			
 
				+              BIND(uri(concat("http://localhost:8000/cell/?iri=", ?e ,"&row=2&column=2&file_name=example.csv")) as ?c)
			
 
				+              SERVICE ?c {?s ?p ?o}
			
 
				+            }
			
 
				+            """
			
 
				+            logging.debug(f"Received cell GET request [{iri}:{file_name}->{row}:{column}].")
			
 
				+            graph = ConjunctiveGraph()
			
 
				+            graph_ns = dict(graph.namespaces())
			
 
				+            # graph_ns["tabular"] = "http://ua.be/sdo2l/vocabulary/formalisms/tabular#"
			
 
				+            df = pd.read_csv(f"data/{file_name}", index_col=None, header=None)
			
 
				+            cell_value = df.iat[int(row), int(column)]
			
 
				+            add_cell_to_graph(graph, iri, int(row), int(column),cell_value, verbose)
			
 
				+            logging.debug(f"{cell_value=}")
			
 
				+
			
 
				+            try:
			
 
				+                query_results = graph.query(query, initNs=graph_ns)
			
 
				+            except Exception as e:
			
 
				+                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
			
 
				+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph."})
			
 
				+            return await encode_graph_query_results(request, query_results)
			
 
				+
			
 
				+        @self.get("/cell/{iri}/{file_name}/")
			
 
				+        async def sparql_sheet_endpoint_get(request: Request, iri, file_name, query: Optional[str] = Query(None), verbose: bool = True) -> Response:
			
 
				+            """Return all cell in SDO2L notation for a file."""
			
 
				+            logging.debug(f"Received sheet GET request [{file_name}].")
			
 
				+            graph = ConjunctiveGraph()
			
 
				+            graph_ns = dict(graph.namespaces())
			
 
				+            # graph_ns["tabular"] = "http://ua.be/sdo2l/vocabulary/formalisms/tabular#"
			
 
				+            df = pd.read_csv(f"data/{file_name}", index_col=None, header=None)
			
 
				+            df.reset_index()
			
 
				+
			
 
				+            # Please forgive me pandas gods
			
 
				+            for row in range(df.shape[0]):
			
 
				+                for column in range(df.shape[1]):
			
 
				+                    cell_value = df.at[row, column]
			
 
				+                    add_cell_to_graph(graph, iri, int(row), int(column), cell_value, verbose)
			
 
				+
			
 
				+            try:
			
 
				+                query_results = graph.query(query, initNs=graph_ns)
			
 
				+            except Exception as e:
			
 
				+                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
			
 
				+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph."})
			
 
				+            return await encode_graph_query_results(request, query_results)
			
 
				+
			
 
				+        @self.get("/cell/{iri}/{file_name}/{row}/{column}/")
			
 
				+        async def sparql_cell_endpoint_get(request: Request, iri, file_name, row, column, query: Optional[str] = Query(None), verbose: bool = True) -> Response:
			
 
				+            """
			
 
				+            Create an ephemeral graph store based on the call parameters and perform the requested query.
			
 
				+            SELECT ?s ?p ?o WHERE {
			
 
				+              bind(str('http://localhost:8000') as ?base)
			
 
				+              bind(str('iri') as ?iri)
			
 
				+              bind(str('cell') as ?operation)
			
 
				+              bind(str('example.csv') as ?file)
			
 
				+              bind(str(2) as ?row)
			
 
				+              bind(str(2) as ?column)
			
 
				+              bind(iri(concat(?base, "/", ?operation, "/", ?file, "/", ?row, "/", ?column, "/")) as ?call)
			
 
				+              SERVICE ?call {?s ?p ?o}
			
 
				+            }
			
 
				+            """
			
 
				+            logging.debug(f"Received cell GET request [{file_name}->{row}:{column}].")
			
 
				+            graph = ConjunctiveGraph()
			
 
				+            graph_ns = dict(graph.namespaces())
			
 
				+            # graph_ns["tabular"] = "http://ua.be/sdo2l/vocabulary/formalisms/tabular#"
			
 
				+            df = pd.read_csv(f"data/{file_name}", index_col=None, header=None)
			
 
				+            cell_value = df.iat[int(row), int(column)]
			
 
				+            add_cell_to_graph(graph, iri, int(row), int(column), cell_value, verbose)
			
 
				+            logging.debug(f"{cell_value=}")
			
 
				+
			
 
				+            try:
			
 
				+                query_results = graph.query(query, initNs=graph_ns)
			
 
				+            except Exception as e:
			
 
				+                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
			
 
				+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph."})
			
 
				+            return await encode_graph_query_results(request, query_results)
			
 
				+
			
 
				+
			
 
				+    def eval_custom_functions(self, ctx: QueryContext, part: CompValue) -> List[SPARQLResult]:
			
 
				+        if part.name != "Extend":
			
 
				+            raise NotImplementedError()
			
 
				+
			
 
				+        query_results = []
			
 
				+        logging.debug("Custom evaluation.")
			
 
				+        for eval_part in evalPart(ctx, part.p):
			
 
				+            # Checks if the function is a URI (custom function)
			
 
				+            if hasattr(part.expr, "iri"):
			
 
				+                for conf_service in self.configuration.services:
			
 
				+                    # Check if URI correspond to a registered custom function
			
 
				+                    if part.expr.iri == URIRef(conf_service.namespace):
			
 
				+                        query_results, ctx, part, eval_part = getattr(service, conf_service.call)(query_results, ctx, part, eval_part, conf_service)
			
 
				+            else:
			
 
				+                # For built-in SPARQL functions (that are not URIs)
			
 
				+                evaluation: List[Any] = [_eval(part.expr, eval_part.forget(ctx, _except=part._vars))]
			
 
				+                if isinstance(evaluation[0], SPARQLError):
			
 
				+                    raise evaluation[0]
			
 
				+                # Append results for built-in SPARQL functions
			
 
				+                for result in evaluation:
			
 
				+                    query_results.append(eval_part.merge({part.var: Literal(result)}))
			
 
				+        return query_results
			
--- a/spendpoint/service.py
+++ b/spendpoint/service.py
@@ -19,7 +19,6 @@ class Outlier:
 
				 
			
 
				 def outlier_service(query_results, ctx, part, eval_part, service_configuration):
			
 
				     """
			
 
				-
			
 
				     Example query:
			
 
				     PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
			
 
				     PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
			
@@ -83,7 +82,6 @@ def conversion_service(query_results, ctx, part, eval_part, service_configuratio
 
				     return query_results, ctx, part, eval_part
			
 
				 
			
 
				 
			
 
				-
			
 
				 def example_service(query_results, ctx, part, eval_part, service_configuration):
			
 
				     """"""
			
 
				     logging.debug(f"{query_results=}")
			
@@ -104,3 +102,32 @@ def example_service(query_results, ctx, part, eval_part, service_configuration):
 
				     for outlier in outliers:
			
 
				         query_results.append(eval_part.merge({part.var: Literal(outlier.iri), rdflib.term.Variable(part.var + "_value"): Literal(outlier.value)}))
			
 
				     return query_results, ctx, part, eval_part
			
 
				+
			
 
				+
			
 
				+# TODO maybe return a 'cell' type
			
 
				+def cell_service(query_results, ctx, part, eval_part, service_configuration):
			
 
				+    """"""
			
 
				+    logging.debug(f"{query_results=}")
			
 
				+    logging.debug(f"{ctx=}")
			
 
				+    logging.debug(f"{part=}")
			
 
				+    logging.debug(f"{eval_part=}")
			
 
				+
			
 
				+    file_name = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
			
 
				+    row = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
			
 
				+    column = str(_eval(part.expr.expr[2], eval_part.forget(ctx, _except=part.expr._vars)))
			
 
				+
			
 
				+    # TODO Should probably grab some setting from the KG, like header etc, maybe do that in query
			
 
				+    logging.info(f"Looking for cell {row}:{column} in '{file_name}'.")
			
 
				+    try:
			
 
				+        df = pd.read_csv(file_name, index_col=None, header=None)
			
 
				+        cell_value = df.iat[int(row), int(column)]
			
 
				+        query_results.append(eval_part.merge({
			
 
				+            part.var: Literal(cell_value),
			
 
				+            rdflib.term.Variable(part.var + "_value"): Literal(cell_value)
			
 
				+        }))
			
 
				+        logging.debug(f"{cell_value=}")
			
 
				+    except:
			
 
				+        # TODO Expand error info
			
 
				+        logging.error(f"Error.")
			
 
				+
			
 
				+    return query_results, ctx, part, eval_part
			
--- a/tasks.py
+++ b/tasks.py
@@ -74,12 +74,12 @@ def release(c, version):
 
				 
			
 
				 
			
 
				 @task(name="generate", aliases=("gen", "csv"))
			
 
				-def generate_random_data_csv(c, rows=200000, columns=50):
			
 
				+def generate_random_data_csv(c, rows=200000, columns=50, name="example"):
			
 
				     """"""
			
 
				     import numpy as np
			
 
				     import uuid
			
 
				     data_dir = Path(__file__).resolve().parent / Path("data")
			
 
				-    out_file_path = data_dir / Path("example.csv")
			
 
				+    out_file_path = data_dir / Path(f"{name}.csv")
			
 
				     chunk = 1000
			
 
				     current_row = 0
			
 
				     with out_file_path.open("w", encoding="utf-8", buffering=chunk) as csv_file: