ソースを参照

Merge branch 'release-0.5.0'

Arkadiusz Ryś 2 年 前
コミット
424ad5d59f
13 ファイル変更390 行追加225 行削除
  1. 1 1
      .editorconfig
  2. 2 2
      .gitignore
  3. 8 0
      HISTORY.rst
  4. 30 1
      README.rst
  5. 5 0
      data/configuration.toml
  6. 0 22
      default.nix
  7. 14 14
      pyproject.toml
  8. 14 14
      requirements.txt
  9. 1 1
      spendpoint/__init__.py
  10. 25 166
      spendpoint/endpoint.py
  11. 259 0
      spendpoint/router.py
  12. 29 2
      spendpoint/service.py
  13. 2 2
      tasks.py

+ 1 - 1
.editorconfig

@@ -8,7 +8,7 @@ indent_style = space
 insert_final_newline = true
 insert_final_newline = true
 trim_trailing_whitespace = true
 trim_trailing_whitespace = true
 
 
-[*.{css,html,yml,yaml,js,xml}]
+[*.{css,html,yml,yaml,js,xml,nix}]
 indent_size = 2
 indent_size = 2
 
 
 [{*.log,LICENSE}]
 [{*.log,LICENSE}]

+ 2 - 2
.gitignore

@@ -8,5 +8,5 @@ dist/
 .idea
 .idea
 
 
 # Generated example files
 # Generated example files
-data/example.csv
-data/example.parquet
+data/example*.csv
+data/example*.parquet

+ 8 - 0
HISTORY.rst

@@ -2,6 +2,14 @@
 History
 History
 =======
 =======
 
 
+0.4.0 (2023-10-11)
+------------------
+* Cells now use a more transparent system.
+
+0.4.0 (2023-07-12)
+------------------
+* Docker containers now use configuration file.
+
 0.3.0 (2023-05-16)
 0.3.0 (2023-05-16)
 ------------------
 ------------------
 * Add configuration options.
 * Add configuration options.

+ 30 - 1
README.rst

@@ -7,16 +7,45 @@ The default access point is at `http://127.0.0.1:8000`.
 This endpoint can be configured in the `configuration.toml <data/configuration.toml>`_ file.
 This endpoint can be configured in the `configuration.toml <data/configuration.toml>`_ file.
 The docker image created uses uvicorn the host the application at `0.0.0.0:80`. Feel free to map this to any port of your liking.
 The docker image created uses uvicorn the host the application at `0.0.0.0:80`. Feel free to map this to any port of your liking.
 
 
-We currently support 3 services out of the box:
+Bound services
+--------------
+
+We currently support 4 bind services out of the box:
 
 
 .. code-block::
 .. code-block::
 
 
    dtf:outlier
    dtf:outlier
    dtf:example
    dtf:example
    dtf:conversion
    dtf:conversion
+   dtf:cell
 
 
 The outlier service relies on `another endpoint <https://msdl.uantwerpen.be/git/lucasalbertins/DTDesign/src/main/tools/typeOperations>`_ which needs to be set up and accessible.
 The outlier service relies on `another endpoint <https://msdl.uantwerpen.be/git/lucasalbertins/DTDesign/src/main/tools/typeOperations>`_ which needs to be set up and accessible.
 
 
+.. code-block:: sparql
+
+   PREFIX dtf: <https://ontology.rys.app/dt/function/>
+   SELECT ?cell ?cell_value WHERE {
+     SERVICE <http://localhost:8000/> {BIND(dtf:cell("data/example.csv", 0, 0) AS ?cell)}
+   }
+
+SPARQL query showing bind based cell service call.
+
+URI based services
+------------------
+
+A second, more versatile, way to access a service is provided in the form of an URI.
+It is possible to query cells by specifying an individual cell in the URI of the service call.
+
+.. code-block:: sparql
+
+   SELECT ?s ?p ?o WHERE {
+     BIND(ENCODE_FOR_URI("http://ua.be/sdo2l/description/artifacts/artifacts#random-artefact") as ?e)
+     BIND(uri(concat("http://localhost:8000/cell/?iri=", ?e ,"&row=2&column=2&file_name=example.csv")) as ?c)
+     SERVICE ?c {?s ?p ?o}
+   }
+
+SPARQL query showing URI based cell service call.
+
 Installation
 Installation
 ------------
 ------------
 
 

+ 5 - 0
data/configuration.toml

@@ -19,3 +19,8 @@ call = "example_service"
 name = "conversion"
 name = "conversion"
 namespace = "https://ontology.rys.app/dt/function/conversion"
 namespace = "https://ontology.rys.app/dt/function/conversion"
 call = "conversion_service"
 call = "conversion_service"
+
+[[services]]
+name = "cell"
+namespace = "https://ontology.rys.app/dt/function/cell"
+call = "cell_service"

+ 0 - 22
default.nix

@@ -1,22 +0,0 @@
-{
-  pkgs ? import <nixpkgs> {}
-}:
-
-pkgs.python3Packages.buildPythonPackage rec {
-  pname = "spendpoint";
-  version = "0.2.0";
-  format = "pyproject";
-  src = ./.;
-
-  nativeBuildInputs = [
-    #flit-core
-  ];
-
-  buildInputs = [
-    #flit-core
-  ];
-
-  propagatedBuildInputs = [
-    pkgs.python3Packages.jinja2
-  ];
-}

+ 14 - 14
pyproject.toml

@@ -22,35 +22,35 @@ keywords = ["spendpoint"]
 dependencies = [
 dependencies = [
     "toml~=0.10.2",
     "toml~=0.10.2",
     "arklog~=0.5.1",
     "arklog~=0.5.1",
-    "rdflib~=6.3.2",
+    "rdflib~=7.0.0",
     "pandas~=2.0.1",
     "pandas~=2.0.1",
     "dacite~=1.8.1",
     "dacite~=1.8.1",
-    "fastapi~=0.95.2",
-    "pyarrow~=12.0.0",
-    "requests~=2.30.0",
-    "starlette~=0.27",
+    "fastapi~=0.101.0",
+    "pyarrow~=12.0.1",
+    "requests~=2.31.0",
+    "starlette~=0.31.0",
     "python-magic~=0.4.27",
     "python-magic~=0.4.27",
-    "uvicorn[standard]~=0.22.0",
+    "uvicorn[standard]~=0.23.2",
 ]
 ]
 
 
 [project.optional-dependencies]
 [project.optional-dependencies]
 test = [
 test = [
-    "pytest~=7.3.1",
+    "pytest~=7.4.0",
     "sparqlwrapper~=2.0.0",
     "sparqlwrapper~=2.0.0",
 ]
 ]
 doc = [
 doc = [
-    "sphinx~=7.0.1",
+    "sphinx~=7.1.2",
 ]
 ]
 dev = [
 dev = [
-    "tox~=4.5.1",
-    "pip~=23.1.2",
+    "tox~=4.6.4",
+    "pip~=23.2.1",
     "flit~=3.9.0",
     "flit~=3.9.0",
     "twine~=4.0.2",
     "twine~=4.0.2",
-    "numpy~=1.24.3",
-    "invoke~=2.1.2",
+    "numpy~=1.25.2",
+    "invoke~=2.2.0",
     "jinja2~=3.1.2",
     "jinja2~=3.1.2",
-    "flake8~=6.0.0",
-    "coverage~=7.2.5",
+    "flake8~=6.1.0",
+    "coverage~=7.2.7",
 ]
 ]
 
 
 [project.urls]
 [project.urls]

+ 14 - 14
requirements.txt

@@ -1,27 +1,27 @@
 # SpEndPoint
 # SpEndPoint
 toml              ~= 0.10.2
 toml              ~= 0.10.2
 arklog            ~= 0.5.1
 arklog            ~= 0.5.1
-rdflib            ~= 6.3.2
+rdflib            ~= 7.0.0
 pandas            ~= 2.0.1
 pandas            ~= 2.0.1
 dacite            ~= 1.8.1
 dacite            ~= 1.8.1
-fastapi           ~= 0.95.2
-pyarrow           ~= 12.0.0
-requests          ~= 2.30.0
-starlette         ~= 0.27
+fastapi           ~= 0.101.0
+pyarrow           ~= 12.0.1
+requests          ~= 2.31.0
+starlette         ~= 0.31.0
 python-magic      ~= 0.4.27
 python-magic      ~= 0.4.27
-uvicorn[standard] ~= 0.22.0
+uvicorn[standard] ~= 0.23.2
 # Test
 # Test
-pytest        ~= 7.3.1
+pytest        ~= 7.4.0
 sparqlwrapper ~= 2.0.0
 sparqlwrapper ~= 2.0.0
 # Doc
 # Doc
-sphinx ~= 7.0.1
+sphinx ~= 7.1.2
 # Dev
 # Dev
-tox      ~= 4.5.1
-pip      ~= 23.1.2
+tox      ~= 4.6.4
+pip      ~= 23.2.1
 flit     ~= 3.9.0
 flit     ~= 3.9.0
 twine    ~= 4.0.2
 twine    ~= 4.0.2
-numpy    ~= 1.24.3
-invoke   ~= 2.1.2
+numpy    ~= 1.25.2
+invoke   ~= 2.2.0
 jinja2   ~= 3.1.2
 jinja2   ~= 3.1.2
-flake8   ~= 6.0.0
-coverage ~= 7.2.5
+flake8   ~= 6.1.0
+coverage ~= 7.2.7

+ 1 - 1
spendpoint/__init__.py

@@ -1,3 +1,3 @@
 """SPARQL endpoint for ontologies."""
 """SPARQL endpoint for ontologies."""
-__version__ = "0.4.0"
+__version__ = "0.5.0"
 __version_info__ = tuple((int(num) if num.isdigit() else num for num in __version__.replace("-", ".", 1).split(".")))
 __version_info__ = tuple((int(num) if num.isdigit() else num for num in __version__.replace("-", ".", 1).split(".")))

+ 25 - 166
spendpoint/endpoint.py

@@ -1,183 +1,42 @@
 # Copied and modified from https://pypi.org/project/rdflib-endpoint/
 # Copied and modified from https://pypi.org/project/rdflib-endpoint/
+# https://fastapi.tiangolo.com/
 
 
 import logging
 import logging
-import re
 import arklog
 import arklog
-import rdflib
-from typing import Any, Dict, List, Optional, Union
-from urllib import parse
-from fastapi import FastAPI, Query, Request, Response
-from fastapi.responses import JSONResponse
-from rdflib import ConjunctiveGraph, Dataset, Graph, Literal, URIRef
-from rdflib.plugins.sparql import prepareQuery
-from rdflib.plugins.sparql.evaluate import evalPart
-from rdflib.plugins.sparql.evalutils import _eval
-from rdflib.plugins.sparql.parserutils import CompValue
-from rdflib.plugins.sparql.sparql import QueryContext, SPARQLError
-
-from spendpoint import service
+import time
+from typing import Any
+from fastapi import FastAPI, Request, Response
+from fastapi.middleware.cors import CORSMiddleware
+from spendpoint.router import SparqlRouter
 
 
 arklog.set_config_logging()
 arklog.set_config_logging()
 
 
-
 class SparqlEndpoint(FastAPI):
 class SparqlEndpoint(FastAPI):
     """SPARQL endpoint for services and storage of heterogeneous data."""
     """SPARQL endpoint for services and storage of heterogeneous data."""
 
 
-    @staticmethod
-    def is_json_mime_type(mime: str) -> bool:
-        """"""
-        return mime.split(",")[0] in ("application/sparql-results+json","application/json","text/javascript","application/javascript")
-
-    @staticmethod
-    def is_csv_mime_type(mime: str) -> bool:
-        """"""
-        return mime.split(",")[0] in ("text/csv", "application/sparql-results+csv")
-
-    @staticmethod
-    def is_xml_mime_type(mime: str) -> bool:
-        """"""
-        return mime.split(",")[0] in ("application/xml", "application/sparql-results+xml")
-
-    @staticmethod
-    def is_turtle_mime_type(mime: str) -> bool:
+    def __init__(self, *args: Any, title: str, description: str, version: str, configuration, **kwargs: Any):
         """"""
         """"""
-        return mime.split(",")[0] in ("text/turtle",)
-
-    async def requested_result_type(self, request: Request, operation: str) -> str:
-        output_mime_type = request.headers["accept"]
-        # TODO Ugly hack, fix later (Fuseki sends options)
-        output_mime_type = output_mime_type.split(",")[0]
-        if isinstance(output_mime_type, list):
-            return output_mime_type[0]
-        # TODO Use match or dict for this
-        if not output_mime_type:
-            logging.warning("No mime type provided. Setting mimetype to 'application/xml'.")
-            return "application/xml"
-        if operation == "Construct Query" and (self.is_json_mime_type(output_mime_type) or self.is_csv_mime_type(output_mime_type)):
-            return "text/turtle"
-        if operation == "Construct Query" and output_mime_type == "application/xml":
-            return "application/rdf+xml"
-        return output_mime_type
-
-    def __init__(self, *args: Any, title: str, description: str, version: str, configuration, graph: Union[Graph, ConjunctiveGraph, Dataset] = ConjunctiveGraph(), **kwargs: Any):
-        """"""
-        self.graph = graph
         self.title = title
         self.title = title
         self.description = description
         self.description = description
         self.version = version
         self.version = version
         self.configuration = configuration
         self.configuration = configuration
         super().__init__(*args, title=title, description=description, version=version, **kwargs)
         super().__init__(*args, title=title, description=description, version=version, **kwargs)
         logging.debug(self.description)
         logging.debug(self.description)
-        rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
-        api_responses: Optional[Dict[Union[int, str], Dict[str, Any]]] = {
-            200: {
-                "description": "SPARQL query results",
-                "content": {
-                    "application/sparql-results+json": {
-                        "results": {"bindings": []},
-                        "head": {"vars": []},
-                    },
-                    "application/json": {
-                        "results": {"bindings": []},
-                        "head": {"vars": []},
-                    },
-                    "text/csv": {"example": "s,p,o"},
-                    "application/sparql-results+csv": {"example": "s,p,o"},
-                    "text/turtle": {"example": "service description"},
-                    "application/sparql-results+xml": {"example": "<root></root>"},
-                    "application/xml": {"example": "<root></root>"},
-                },
-            },
-            400: {
-                "description": "Bad Request",
-            },
-            403: {
-                "description": "Forbidden",
-            },
-            422: {
-                "description": "Unprocessable Entity",
-            },
-        }
-
-        @self.get("/", name="SPARQL endpoint", description="", responses=api_responses)
-        async def sparql_endpoint_get(request: Request, query: Optional[str] = Query(None)) -> Response:
-            logging.debug("Received GET request.")
-            if not query:
-                logging.warning("No query provided in GET request!")
-                return JSONResponse({"error": "No query provided."})
-
-            graph_ns = {}
-            for prefix, ns_uri in self.graph.namespaces():
-                graph_ns[prefix] = ns_uri
-
-            try:
-                parsed_query = prepareQuery(query, initNs=graph_ns)
-                query_operation = re.sub(r"(\w)([A-Z])", r"\1 \2", parsed_query.algebra.name)
-            except Exception as e:
-                logging.error("Error parsing the SPARQL query: " + str(e))
-                return JSONResponse(
-                    status_code=400,
-                    content={"message": "Error parsing the SPARQL query"},
-                )
-
-            try:
-                query_results = self.graph.query(query, initNs=graph_ns)
-            except Exception as e:
-                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
-                # TODO Send better error which can be parsed as a SPARQL response or check it client side
-                return JSONResponse(
-                    status_code=400,
-                    content={"message": "Error executing the SPARQL query on the RDFLib Graph"},
-                )
-            output_mime_type = await self.requested_result_type(request, query_operation)
-            logging.debug(f"Returning {output_mime_type}.")
-            try:
-                if self.is_csv_mime_type(output_mime_type):
-                    return Response(query_results.serialize(format="csv"), media_type=output_mime_type)
-                elif self.is_json_mime_type(output_mime_type):
-                    return Response(query_results.serialize(format="json"), media_type=output_mime_type)
-                elif self.is_xml_mime_type(output_mime_type):
-                    return Response(query_results.serialize(format="xml"), media_type=output_mime_type)
-                elif self.is_turtle_mime_type(output_mime_type):
-                    return Response(query_results.serialize(format="turtle"), media_type=output_mime_type)
-                return Response(query_results.serialize(format="xml"), media_type="application/sparql-results+xml")
-            except Exception as e:
-                logging.exception(e)
-                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph"})
-
-        @self.post("/", name="SPARQL endpoint", description="", responses=api_responses)
-        async def sparql_endpoint_post(request: Request, query: Optional[str] = Query(None)) -> Response:
-            logging.debug("Received POST request.")
-            if not query:
-                # Handle federated query services which provide the query in the body
-                query_body = await request.body()
-                body = query_body.decode("utf-8")
-                parsed_query = parse.parse_qsl(body)
-                for params in parsed_query:
-                    if params[0] == "query":
-                        query = parse.unquote(params[1])
-            return await sparql_endpoint_get(request, query)
-
-
-    def eval_custom_functions(self, ctx: QueryContext, part: CompValue) -> List[Any]:
-        if part.name != "Extend":
-            raise NotImplementedError()
-
-        query_results = []
-        logging.debug("Custom evaluation.")
-        for eval_part in evalPart(ctx, part.p):
-            # Checks if the function is a URI (custom function)
-            if hasattr(part.expr, "iri"):
-                for conf_service in self.configuration.services:
-                    # Check if URI correspond to a registered custom function
-                    if part.expr.iri == URIRef(conf_service.namespace):
-                        query_results, ctx, part, eval_part = getattr(service, conf_service.call)(query_results, ctx, part, eval_part, conf_service)
-            else:
-                # For built-in SPARQL functions (that are not URIs)
-                evaluation: List[Any] = [_eval(part.expr, eval_part.forget(ctx, _except=part._vars))]
-                if isinstance(evaluation[0], SPARQLError):
-                    raise evaluation[0]
-                # Append results for built-in SPARQL functions
-                for result in evaluation:
-                    query_results.append(eval_part.merge({part.var: Literal(result)}))
-        return query_results
+        sparql_router = SparqlRouter(title=title, description=description, version=version, configuration=configuration)
+        self.include_router(sparql_router)
+        self.add_middleware(
+            CORSMiddleware,
+            allow_origins=["*"],
+            allow_credentials=True,
+            allow_methods=["*"],
+            allow_headers=["*"],
+        )
+
+        @self.middleware("http")
+        async def add_process_time_header(request: Request, call_next: Any) -> Response:
+            start_time = time.time()
+            response: Response = await call_next(request)
+            duration = str(time.time() - start_time)
+            response.headers["X-Process-Time"] = duration
+            logging.debug(f"X-Process-Time = {duration}")
+            return response

+ 259 - 0
spendpoint/router.py

@@ -0,0 +1,259 @@
+import logging
+import rdflib
+import pandas as pd
+from typing import Any, List, Optional
+from urllib import parse
+from rdflib.plugins.sparql import prepareQuery
+from rdflib.plugins.sparql.processor import SPARQLResult
+from spendpoint import service
+from fastapi import APIRouter, Query, Request, Response
+from fastapi.responses import JSONResponse
+from rdflib import ConjunctiveGraph, Literal, URIRef
+from rdflib.plugins.sparql.evaluate import evalPart
+from rdflib.plugins.sparql.evalutils import _eval
+from rdflib.plugins.sparql.parserutils import CompValue
+from rdflib.plugins.sparql.sparql import QueryContext, SPARQLError
+from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, VOID, XMLNS, XSD
+
+
+CONTENT_TYPE_TO_RDFLIB_FORMAT = {
+    # https://www.w3.org/TR/sparql11-results-json/
+    "application/sparql-results+json": "json",
+    "application/json": "json",
+    "text/json": "json",
+    # https://www.w3.org/TR/rdf-sparql-XMLres/
+    "application/sparql-results+xml": "xml",
+    "application/xml": "xml",  # for compatibility
+    "application/rdf+xml": "xml",  # for compatibility
+    "text/xml": "xml",  # not standard
+    # https://www.w3.org/TR/sparql11-results-csv-tsv/
+    "application/sparql-results+csv": "csv",
+    "text/csv": "csv",  # for compatibility
+    # Extras
+    "text/turtle": "ttl",
+}
+DEFAULT_CONTENT_TYPE = "application/json"
+
+
+def parse_accept_header(accept: str) -> List[str]:
+    """
+    Given an accept header string, return a list of media types in order of preference.
+
+    :param accept: Accept header value
+    :return: Ordered list of media type preferences
+    """
+
+    def _parse_preference(qpref: str) -> float:
+        qparts = qpref.split("=")
+        try:
+            return float(qparts[1].strip())
+        except ValueError:
+            pass
+        except IndexError:
+            pass
+        return 1.0
+
+    preferences = []
+    types = accept.split(",")
+    dpref = 2.0
+    for mtype in types:
+        parts = mtype.split(";")
+        parts = [part.strip() for part in parts]
+        pref = dpref
+        try:
+            for part in parts[1:]:
+                if part.startswith("q="):
+                    pref = _parse_preference(part)
+                    break
+        except IndexError:
+            pass
+        # preserve order of appearance in the list
+        dpref = dpref - 0.01
+        preferences.append((parts[0], pref))
+    preferences.sort(key=lambda x: -x[1])
+    return [pref[0] for pref in preferences]
+
+
+def add_cell_to_graph(graph: ConjunctiveGraph, iri: str, row: int, column: int, cell_value: Any, verbose: bool) -> None:
+    """Adds a Cell as known in the SDO2L ontology to the graph."""
+    tabular_prefix = "http://ua.be/sdo2l/vocabulary/formalism/tabular"
+    cell = URIRef(f"{iri}-cell-{row}-{column}")
+    # Store the triples in a temporary graph. This allows us to use the rdflib query engine for the sub-query instead of finding the matching pairs manually.
+    graph.add((cell, URIRef(f"{tabular_prefix}#holdsContent"), Literal(cell_value)))
+    graph.add((cell, URIRef(f"{tabular_prefix}#hasRowPosition"), Literal(int(row))))
+    graph.add((cell, URIRef(f"{tabular_prefix}#hasColumnPosition"), Literal(int(column))))
+
+    if verbose:
+        graph.add((cell, URIRef(f"{tabular_prefix}#isCellOfTabularData"), URIRef(f"{iri}")))
+        graph.add((cell, URIRef(f"{tabular_prefix}#isInCollection"), URIRef(f"{iri}-column-{int(column)}")))
+        graph.add((cell, URIRef(f"{tabular_prefix}#isInCollection"), URIRef(f"{iri}-row-{int(row)}")))
+
+        graph.add((cell, RDF.type, OWL.Thing))
+        graph.add((cell, RDF.type, URIRef("http://ua.be/sdo2l/vocabulary/formalism/tabular#Cell")))
+        graph.add((cell, RDF.type, URIRef("http://ua.be/sdo2l/vocabulary/formalism/file#Data")))
+        graph.add((cell, OWL.sameAs, URIRef(f"{iri}-cell-{int(row)}-{int(column)}")))
+
+
+class SparqlRouter(APIRouter):
+    """Class to deploy a SPARQL endpoint using a RDFLib Graph."""
+
+    def __init__(self, *args: Any, title: str, description: str, version: str, configuration, **kwargs: Any):
+        self.title = title
+        self.description = description
+        self.version = version
+        self.configuration = configuration
+        super().__init__(*args, **kwargs)
+        rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
+
+        async def encode_graph_query_results(request, query_results):
+            """"""
+            mime_types = parse_accept_header(request.headers.get("accept", DEFAULT_CONTENT_TYPE))
+            output_mime_type = DEFAULT_CONTENT_TYPE
+            for mime_type in mime_types:
+                if mime_type in CONTENT_TYPE_TO_RDFLIB_FORMAT:
+                    output_mime_type = mime_type
+                    break
+            logging.debug(f"Returning {output_mime_type}.")
+            try:
+                rdflib_format = CONTENT_TYPE_TO_RDFLIB_FORMAT[output_mime_type]
+                response = Response(query_results.serialize(format=rdflib_format), media_type=output_mime_type)
+            except Exception as e:
+                logging.error(f"Error serializing the SPARQL query results with RDFLib: {e}")
+                return JSONResponse(status_code=422, content={"message": "Error serializing the SPARQL query results."})
+            else:
+                return response
+
+        @self.get("/")
+        async def sparql_endpoint_get(request: Request, query: Optional[str] = Query(None)) -> Response:
+            """Returns an empty result."""
+            # The graph is empty, so you would expect this to never return any pairs.
+            # But we inject pairs in the custom functions!
+            logging.debug("Received GET request.")
+            if not query:
+                logging.warning("No query provided in GET request!")
+                return JSONResponse({"message": "No query provided."})
+
+            graph = ConjunctiveGraph()
+            try:
+                query_results = graph.query(query)
+            except Exception as e:
+                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph."})
+
+            return await encode_graph_query_results(request, query_results)
+
+        @self.post("/")
+        async def sparql_endpoint_post(request: Request, query: Optional[str] = Query(None)) -> Response:
+            """Returns an empty result."""
+            logging.debug("Received POST request.")
+            if not query:
+                query_body = await request.body()
+                body = query_body.decode("utf-8")
+                parsed_query = parse.parse_qsl(body)
+                for params in parsed_query:
+                    if params[0] == "query":
+                        query = parse.unquote(params[1])
+            return await sparql_endpoint_get(request, query)
+
+        @self.get("/cell/")
+        async def sparql_cell_endpoint_get(request: Request, iri, file_name, row, column, verbose: bool = True, query: Optional[str] = Query(None)) -> Response:
+            """
+            SELECT ?s ?p ?o WHERE {
+              BIND(ENCODE_FOR_URI("http://ua.be/sdo2l/description/artifacts/artifacts#random-artefact") as ?e)
+              BIND(uri(concat("http://localhost:8000/cell/?iri=", ?e ,"&row=2&column=2&file_name=example.csv")) as ?c)
+              SERVICE ?c {?s ?p ?o}
+            }
+            """
+            logging.debug(f"Received cell GET request [{iri}:{file_name}->{row}:{column}].")
+            graph = ConjunctiveGraph()
+            graph_ns = dict(graph.namespaces())
+            # graph_ns["tabular"] = "http://ua.be/sdo2l/vocabulary/formalisms/tabular#"
+            df = pd.read_csv(f"data/{file_name}", index_col=None, header=None)
+            cell_value = df.iat[int(row), int(column)]
+            add_cell_to_graph(graph, iri, int(row), int(column),cell_value, verbose)
+            logging.debug(f"{cell_value=}")
+
+            try:
+                query_results = graph.query(query, initNs=graph_ns)
+            except Exception as e:
+                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph."})
+            return await encode_graph_query_results(request, query_results)
+
+        @self.get("/cell/{iri}/{file_name}/")
+        async def sparql_sheet_endpoint_get(request: Request, iri, file_name, query: Optional[str] = Query(None), verbose: bool = True) -> Response:
+            """Return all cell in SDO2L notation for a file."""
+            logging.debug(f"Received sheet GET request [{file_name}].")
+            graph = ConjunctiveGraph()
+            graph_ns = dict(graph.namespaces())
+            # graph_ns["tabular"] = "http://ua.be/sdo2l/vocabulary/formalisms/tabular#"
+            df = pd.read_csv(f"data/{file_name}", index_col=None, header=None)
+            df.reset_index()
+
+            # Please forgive me pandas gods
+            for row in range(df.shape[0]):
+                for column in range(df.shape[1]):
+                    cell_value = df.at[row, column]
+                    add_cell_to_graph(graph, iri, int(row), int(column), cell_value, verbose)
+
+            try:
+                query_results = graph.query(query, initNs=graph_ns)
+            except Exception as e:
+                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph."})
+            return await encode_graph_query_results(request, query_results)
+
+        @self.get("/cell/{iri}/{file_name}/{row}/{column}/")
+        async def sparql_cell_endpoint_get(request: Request, iri, file_name, row, column, query: Optional[str] = Query(None), verbose: bool = True) -> Response:
+            """
+            Create an ephemeral graph store based on the call parameters and perform the requested query.
+            SELECT ?s ?p ?o WHERE {
+              bind(str('http://localhost:8000') as ?base)
+              bind(str('iri') as ?iri)
+              bind(str('cell') as ?operation)
+              bind(str('example.csv') as ?file)
+              bind(str(2) as ?row)
+              bind(str(2) as ?column)
+              bind(iri(concat(?base, "/", ?operation, "/", ?file, "/", ?row, "/", ?column, "/")) as ?call)
+              SERVICE ?call {?s ?p ?o}
+            }
+            """
+            logging.debug(f"Received cell GET request [{file_name}->{row}:{column}].")
+            graph = ConjunctiveGraph()
+            graph_ns = dict(graph.namespaces())
+            # graph_ns["tabular"] = "http://ua.be/sdo2l/vocabulary/formalisms/tabular#"
+            df = pd.read_csv(f"data/{file_name}", index_col=None, header=None)
+            cell_value = df.iat[int(row), int(column)]
+            add_cell_to_graph(graph, iri, int(row), int(column), cell_value, verbose)
+            logging.debug(f"{cell_value=}")
+
+            try:
+                query_results = graph.query(query, initNs=graph_ns)
+            except Exception as e:
+                logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
+                return JSONResponse(status_code=400, content={"message": "Error executing the SPARQL query on the RDFLib Graph."})
+            return await encode_graph_query_results(request, query_results)
+
+
+    def eval_custom_functions(self, ctx: QueryContext, part: CompValue) -> List[SPARQLResult]:
+        if part.name != "Extend":
+            raise NotImplementedError()
+
+        query_results = []
+        logging.debug("Custom evaluation.")
+        for eval_part in evalPart(ctx, part.p):
+            # Checks if the function is a URI (custom function)
+            if hasattr(part.expr, "iri"):
+                for conf_service in self.configuration.services:
+                    # Check if URI correspond to a registered custom function
+                    if part.expr.iri == URIRef(conf_service.namespace):
+                        query_results, ctx, part, eval_part = getattr(service, conf_service.call)(query_results, ctx, part, eval_part, conf_service)
+            else:
+                # For built-in SPARQL functions (that are not URIs)
+                evaluation: List[Any] = [_eval(part.expr, eval_part.forget(ctx, _except=part._vars))]
+                if isinstance(evaluation[0], SPARQLError):
+                    raise evaluation[0]
+                # Append results for built-in SPARQL functions
+                for result in evaluation:
+                    query_results.append(eval_part.merge({part.var: Literal(result)}))
+        return query_results

+ 29 - 2
spendpoint/service.py

@@ -19,7 +19,6 @@ class Outlier:
 
 
 def outlier_service(query_results, ctx, part, eval_part, service_configuration):
 def outlier_service(query_results, ctx, part, eval_part, service_configuration):
     """
     """
-
     Example query:
     Example query:
     PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
     PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
     PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
     PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
@@ -83,7 +82,6 @@ def conversion_service(query_results, ctx, part, eval_part, service_configuratio
     return query_results, ctx, part, eval_part
     return query_results, ctx, part, eval_part
 
 
 
 
-
 def example_service(query_results, ctx, part, eval_part, service_configuration):
 def example_service(query_results, ctx, part, eval_part, service_configuration):
     """"""
     """"""
     logging.debug(f"{query_results=}")
     logging.debug(f"{query_results=}")
@@ -104,3 +102,32 @@ def example_service(query_results, ctx, part, eval_part, service_configuration):
     for outlier in outliers:
     for outlier in outliers:
         query_results.append(eval_part.merge({part.var: Literal(outlier.iri), rdflib.term.Variable(part.var + "_value"): Literal(outlier.value)}))
         query_results.append(eval_part.merge({part.var: Literal(outlier.iri), rdflib.term.Variable(part.var + "_value"): Literal(outlier.value)}))
     return query_results, ctx, part, eval_part
     return query_results, ctx, part, eval_part
+
+
+# TODO maybe return a 'cell' type
+def cell_service(query_results, ctx, part, eval_part, service_configuration):
+    """"""
+    logging.debug(f"{query_results=}")
+    logging.debug(f"{ctx=}")
+    logging.debug(f"{part=}")
+    logging.debug(f"{eval_part=}")
+
+    file_name = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
+    row = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
+    column = str(_eval(part.expr.expr[2], eval_part.forget(ctx, _except=part.expr._vars)))
+
+    # TODO Should probably grab some setting from the KG, like header etc, maybe do that in query
+    logging.info(f"Looking for cell {row}:{column} in '{file_name}'.")
+    try:
+        df = pd.read_csv(file_name, index_col=None, header=None)
+        cell_value = df.iat[int(row), int(column)]
+        query_results.append(eval_part.merge({
+            part.var: Literal(cell_value),
+            rdflib.term.Variable(part.var + "_value"): Literal(cell_value)
+        }))
+        logging.debug(f"{cell_value=}")
+    except:
+        # TODO Expand error info
+        logging.error(f"Error.")
+
+    return query_results, ctx, part, eval_part

+ 2 - 2
tasks.py

@@ -74,12 +74,12 @@ def release(c, version):
 
 
 
 
 @task(name="generate", aliases=("gen", "csv"))
 @task(name="generate", aliases=("gen", "csv"))
-def generate_random_data_csv(c, rows=200000, columns=50):
+def generate_random_data_csv(c, rows=200000, columns=50, name="example"):
     """"""
     """"""
     import numpy as np
     import numpy as np
     import uuid
     import uuid
     data_dir = Path(__file__).resolve().parent / Path("data")
     data_dir = Path(__file__).resolve().parent / Path("data")
-    out_file_path = data_dir / Path("example.csv")
+    out_file_path = data_dir / Path(f"{name}.csv")
     chunk = 1000
     chunk = 1000
     current_row = 0
     current_row = 0
     with out_file_path.open("w", encoding="utf-8", buffering=chunk) as csv_file:
     with out_file_path.open("w", encoding="utf-8", buffering=chunk) as csv_file: