Browse Source

Merge branch 'release-0.2.0'

Arkadiusz Ryś 2 years ago
parent
commit
a1e7a04cc3
12 changed files with 107 additions and 69 deletions
  1. 4 1
      .gitignore
  2. 8 0
      HISTORY.rst
  3. 13 4
      README.rst
  4. 10 6
      pyproject.toml
  5. 7 4
      requirements.txt
  6. 1 1
      spendpoint/__init__.py
  7. 2 15
      spendpoint/endpoint.py
  8. 13 6
      spendpoint/main.py
  9. 31 2
      spendpoint/service.py
  10. 0 28
      spendpoint/yasgui.html
  11. 16 0
      tasks.py
  12. 2 2
      tests/test_query_endpoint.py

+ 4 - 1
.gitignore

@@ -3,5 +3,8 @@ venv/
 build/
 build/
 dist/
 dist/
 *.egg-info
 *.egg-info
-
 *.log
 *.log
+
+# Generated example files
+data/example.csv
+data/example.parquet

+ 8 - 0
HISTORY.rst

@@ -2,6 +2,14 @@
 History
 History
 =======
 =======
 
 
+0.0.0 (yyyy-mm-dd)
+------------------
+* Add conversion service.
+
+0.2.0 (2023-04-04)
+------------------
+* Prototype conversion service.
+
 0.1.0 (2023-03-06)
 0.1.0 (2023-03-06)
 ------------------
 ------------------
 * Create outlier service bridge.
 * Create outlier service bridge.

+ 13 - 4
README.rst

@@ -3,16 +3,25 @@ SpEndPoint
 ##########
 ##########
 
 
 Creates a SPARQL endpoint supporting custom services.
 Creates a SPARQL endpoint supporting custom services.
-Default access at `http://127.0.0.1:8000/gui`.
+Default access at `http://127.0.0.1:8000`.
+
+Currently supports 3 services:
+
+.. code-block::
+
+   dtf:outlier
+   dtf:example
+   dtf:conversion
 
 
 Installation
 Installation
 ------------
 ------------
 
 
-.. code-block:: shell
+..
+   .. code-block:: shell
 
 
-   pip install spendpoint
+      pip install spendpoint
 
 
-or
+   or
 
 
 .. code-block:: shell
 .. code-block:: shell
 
 

+ 10 - 6
pyproject.toml

@@ -20,27 +20,31 @@ dynamic = ["version", "description"]
 license = {file = "LICENSE"}
 license = {file = "LICENSE"}
 keywords = ["spendpoint"]
 keywords = ["spendpoint"]
 dependencies = [
 dependencies = [
-    "arklog~=0.5.0",
+    "arklog~=0.5.1",
     "rdflib~=6.2.0",
     "rdflib~=6.2.0",
-    "fastapi~=0.92",
-    "starlette~=0.25.0",
+    "pandas~=1.5.3",
+    "fastapi~=0.94.0",
+    "pyarrow~=11.0.0",
+    "requests~=2.28.2",
+    "starlette~=0.26.0.post1",
     "python-magic~=0.4.27",
     "python-magic~=0.4.27",
-    "uvicorn[standard]~=0.20.0",
+    "uvicorn[standard]~=0.21.0",
 ]
 ]
 
 
 [project.optional-dependencies]
 [project.optional-dependencies]
 test = [
 test = [
-    "pytest~=7.2.1",
+    "pytest~=7.2.2",
     "sparqlwrapper~=2.0.0",
     "sparqlwrapper~=2.0.0",
 ]
 ]
 doc = [
 doc = [
     "sphinx~=6.1.3",
     "sphinx~=6.1.3",
 ]
 ]
 dev = [
 dev = [
-    "tox~=4.4.6",
+    "tox~=4.4.7",
     "pip~=23.0.1",
     "pip~=23.0.1",
     "flit~=3.8.0",
     "flit~=3.8.0",
     "twine~=4.0.2",
     "twine~=4.0.2",
+    "numpy~=1.24.2",
     "invoke~=2.0.0",
     "invoke~=2.0.0",
     "jinja2~=3.1.2",
     "jinja2~=3.1.2",
     "flake8~=6.0.0",
     "flake8~=6.0.0",

+ 7 - 4
requirements.txt

@@ -1,21 +1,24 @@
 # SpEndPoint
 # SpEndPoint
 arklog            ~= 0.5.1
 arklog            ~= 0.5.1
 rdflib            ~= 6.2.0
 rdflib            ~= 6.2.0
-fastapi           ~= 0.92
+pandas            ~= 1.5.3
+fastapi           ~= 0.94.0
+pyarrow           ~= 11.0.0
 requests          ~= 2.28.2
 requests          ~= 2.28.2
-starlette         ~= 0.25.0
+starlette         ~= 0.26.0.post1
 python-magic      ~= 0.4.27
 python-magic      ~= 0.4.27
-uvicorn[standard] ~= 0.20.0
+uvicorn[standard] ~= 0.21.0
 # Test
 # Test
 pytest        ~= 7.2.2
 pytest        ~= 7.2.2
 sparqlwrapper ~= 2.0.0
 sparqlwrapper ~= 2.0.0
 # Doc
 # Doc
 sphinx ~= 6.1.3
 sphinx ~= 6.1.3
 # Dev
 # Dev
-tox      ~= 4.4.6
+tox      ~= 4.4.7
 pip      ~= 23.0.1
 pip      ~= 23.0.1
 flit     ~= 3.8.0
 flit     ~= 3.8.0
 twine    ~= 4.0.2
 twine    ~= 4.0.2
+numpy    ~= 1.24.2
 invoke   ~= 2.0.0
 invoke   ~= 2.0.0
 jinja2   ~= 3.1.2
 jinja2   ~= 3.1.2
 flake8   ~= 6.0.0
 flake8   ~= 6.0.0

+ 1 - 1
spendpoint/__init__.py

@@ -1,3 +1,3 @@
 """SPARQL endpoint for ontologies."""
 """SPARQL endpoint for ontologies."""
-__version__ = "0.1.0"
+__version__ = "0.2.0"
 __version_info__ = tuple((int(num) if num.isdigit() else num for num in __version__.replace("-", ".", 1).split(".")))
 __version_info__ = tuple((int(num) if num.isdigit() else num for num in __version__.replace("-", ".", 1).split(".")))

+ 2 - 15
spendpoint/endpoint.py

@@ -3,7 +3,6 @@
 import logging
 import logging
 import re
 import re
 import arklog
 import arklog
-import pkg_resources
 import rdflib
 import rdflib
 from typing import Any, Callable, Dict, List, Optional, Union
 from typing import Any, Callable, Dict, List, Optional, Union
 from urllib import parse
 from urllib import parse
@@ -43,13 +42,11 @@ class SparqlEndpoint(FastAPI):
         return mime.split(",")[0] in ("text/turtle",)
         return mime.split(",")[0] in ("text/turtle",)
 
 
     async def requested_result_type(self, request: Request, operation: str) -> str:
     async def requested_result_type(self, request: Request, operation: str) -> str:
-        logging.debug("Getting mime type.")
         output_mime_type = request.headers["accept"]
         output_mime_type = request.headers["accept"]
         # TODO Ugly hack, fix later (Fuseki sends options)
         # TODO Ugly hack, fix later (Fuseki sends options)
         output_mime_type = output_mime_type.split(",")[0]
         output_mime_type = output_mime_type.split(",")[0]
         if isinstance(output_mime_type, list):
         if isinstance(output_mime_type, list):
             return output_mime_type[0]
             return output_mime_type[0]
-
         # TODO Use match or dict for this
         # TODO Use match or dict for this
         if not output_mime_type:
         if not output_mime_type:
             logging.warning("No mime type provided. Setting mimetype to 'application/xml'.")
             logging.warning("No mime type provided. Setting mimetype to 'application/xml'.")
@@ -68,6 +65,7 @@ class SparqlEndpoint(FastAPI):
         self.description = description
         self.description = description
         self.version = version
         self.version = version
         super().__init__(*args, title=title, description=description, version=version, **kwargs)
         super().__init__(*args, title=title, description=description, version=version, **kwargs)
+        logging.debug(self.description)
         rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
         rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
         api_responses: Optional[Dict[Union[int, str], Dict[str, Any]]] = {
         api_responses: Optional[Dict[Union[int, str], Dict[str, Any]]] = {
             200: {
             200: {
@@ -86,9 +84,6 @@ class SparqlEndpoint(FastAPI):
                     "text/turtle": {"example": "service description"},
                     "text/turtle": {"example": "service description"},
                     "application/sparql-results+xml": {"example": "<root></root>"},
                     "application/sparql-results+xml": {"example": "<root></root>"},
                     "application/xml": {"example": "<root></root>"},
                     "application/xml": {"example": "<root></root>"},
-                    # "application/rdf+xml": {
-                    #     "example": '<?xml version="1.0" encoding="UTF-8"?> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF>'
-                    # },
                 },
                 },
             },
             },
             400: {
             400: {
@@ -127,12 +122,11 @@ class SparqlEndpoint(FastAPI):
                 query_results = self.graph.query(query, initNs=graph_ns)
                 query_results = self.graph.query(query, initNs=graph_ns)
             except Exception as e:
             except Exception as e:
                 logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
                 logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
+                # TODO Send better error which can be parsed as a SPARQL response or check it client side
                 return JSONResponse(
                 return JSONResponse(
                     status_code=400,
                     status_code=400,
                     content={"message": "Error executing the SPARQL query on the RDFLib Graph"},
                     content={"message": "Error executing the SPARQL query on the RDFLib Graph"},
                 )
                 )
-
-            logging.debug(f"{type(query_results)=}")
             output_mime_type = await self.requested_result_type(request, query_operation)
             output_mime_type = await self.requested_result_type(request, query_operation)
             logging.debug(f"Returning {output_mime_type}.")
             logging.debug(f"Returning {output_mime_type}.")
             try:
             try:
@@ -162,13 +156,6 @@ class SparqlEndpoint(FastAPI):
                         query = parse.unquote(params[1])
                         query = parse.unquote(params[1])
             return await sparql_endpoint_get(request, query)
             return await sparql_endpoint_get(request, query)
 
 
-        @self.get("/gui", include_in_schema=False)
-        async def serve_yasgui() -> Response:
-            """Serve YASGUI interface"""
-            with open(pkg_resources.resource_filename("spendpoint", "yasgui.html")) as f:
-                html_str = f.read()
-            html_str = html_str.replace("$EXAMPLE_QUERY", "")
-            return Response(content=html_str, media_type="text/html")
 
 
     def eval_custom_functions(self, ctx: QueryContext, part: CompValue) -> List[Any]:
     def eval_custom_functions(self, ctx: QueryContext, part: CompValue) -> List[Any]:
         if part.name != "Extend":
         if part.name != "Extend":

+ 13 - 6
spendpoint/main.py

@@ -1,16 +1,23 @@
 import arklog
 import arklog
 from spendpoint.endpoint import SparqlEndpoint
 from spendpoint.endpoint import SparqlEndpoint
 from spendpoint import __version__
 from spendpoint import __version__
-from spendpoint.service import outlier_service, example_service
+from spendpoint.service import outlier_service, example_service, conversion_service
 
 
 arklog.set_config_logging()
 arklog.set_config_logging()
 
 
+functions = {
+    "https://ontology.rys.app/dt/function/outlier": outlier_service,
+    "https://ontology.rys.app/dt/function/example": example_service,
+    "https://ontology.rys.app/dt/function/conversion": conversion_service,
+}
+
 app = SparqlEndpoint(
 app = SparqlEndpoint(
     version = __version__,
     version = __version__,
-    functions = {
-        "https://ontology.rys.app/dt/function/outlier": outlier_service,
-        "https://ontology.rys.app/dt/function/example": example_service,
-    },
+    functions = functions,
     title = "SPARQL endpoint for storage and services",
     title = "SPARQL endpoint for storage and services",
-    description = "/n".join(("SPARQL endpoint.",))
+    description = "\n".join((
+        "SPARQL endpoint.",
+        f"Supports {len(functions)} custom services:",
+        *(f" - {service_uri}" for service_uri, fc in functions.items()))
+    )
 )
 )

+ 31 - 2
spendpoint/service.py

@@ -1,10 +1,13 @@
 import logging
 import logging
+from pathlib import Path
+
 import arklog
 import arklog
 import rdflib
 import rdflib
-from rdflib import Literal
+import pandas as pd
+from rdflib import Literal, XSD
 from rdflib.plugins.sparql.evalutils import _eval
 from rdflib.plugins.sparql.evalutils import _eval
 from dataclasses import dataclass
 from dataclasses import dataclass
-
+from timeit import default_timer as timer
 from spendpoint.bridge import fetch_outliers
 from spendpoint.bridge import fetch_outliers
 arklog.set_config_logging()
 arklog.set_config_logging()
 
 
@@ -53,6 +56,32 @@ def outlier_service(query_results, ctx, part, eval_part):
     return query_results, ctx, part, eval_part
     return query_results, ctx, part, eval_part
 
 
 
 
+def conversion_service(query_results, ctx, part, eval_part):
+    """"""
+    logging.debug(f"Conversion service.")
+    input_file_name = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
+    output_file_name = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
+    data_dir = Path(__file__).resolve().parent.parent / Path("data")
+    input_file_path = data_dir / Path(input_file_name)
+    output_file_path = data_dir / Path(output_file_name)
+    success = False
+    start_time = timer()
+    if input_file_path.suffix.endswith("csv") and output_file_path.suffix.endswith("parquet"):
+        df = pd.read_csv(input_file_path)
+        df.to_parquet(output_file_path)
+        success = True
+    end_time = timer()
+    query_results.append(eval_part.merge({
+        part.var: Literal(""),
+        rdflib.term.Variable(part.var + "_input") : Literal(input_file_name),
+        rdflib.term.Variable(part.var + "_output") : Literal(output_file_name),
+        rdflib.term.Variable(part.var + "_duration") : Literal(end_time - start_time, datatype=XSD.duration),
+        rdflib.term.Variable(part.var + "_success") : Literal(success),
+    }))
+    return query_results, ctx, part, eval_part
+
+
+
 def example_service(query_results, ctx, part, eval_part):
 def example_service(query_results, ctx, part, eval_part):
     """"""
     """"""
     logging.debug(f"{query_results=}")
     logging.debug(f"{query_results=}")

+ 0 - 28
spendpoint/yasgui.html

@@ -1,28 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-
-<head>
-  <meta charset="utf-8">
-  <title>RDFLib endpoint</title>
-  <link href="https://unpkg.com/@triply/yasgui@4/build/yasgui.min.css" rel="stylesheet" type="text/css"/>
-  <script src="https://unpkg.com/@triply/yasgui@4/build/yasgui.min.js"></script>
-</head>
-
-<body>
-<div id="yasgui"></div>
-<script>
-  Yasqe.defaults.value = `$EXAMPLE_QUERY`
-  const url = window.location.href.endsWith('/') ? window.location.href.slice(0, -1) : window.location.href;
-  const yasgui = new Yasgui(document.getElementById("yasgui"), {
-    requestConfig: {endpoint: url + "/"},
-    endpointCatalogueOptions: {
-      getData: function () {
-        return [
-          {endpoint: url + "/"},
-        ];
-      },
-      keys: [],
-    },
-  });
-</script>
-</body>

+ 16 - 0
tasks.py

@@ -71,3 +71,19 @@ def release(c, version):
     c.run(f"git push")
     c.run(f"git push")
     c.run(f"git branch -d release-{_major}.{_minor}.{_patch}")
     c.run(f"git branch -d release-{_major}.{_minor}.{_patch}")
     c.run(f"git push origin --tags")
     c.run(f"git push origin --tags")
+
+
+@task(name="generate", aliases=("gen", "csv"))
+def generate_random_data_csv(c, rows=200000, columns=50):
+    """"""
+    import numpy as np
+    import uuid
+    data_dir = Path(__file__).resolve().parent / Path("data")
+    out_file_path = data_dir / Path("example.csv")
+    chunk = 1000
+    current_row = 0
+    with out_file_path.open("w", encoding="utf-8", buffering=chunk) as csv_file:
+        while current_row < rows:
+            data = [[uuid.uuid4() for i in range(chunk)], np.random.random(chunk) * 100, np.random.random(chunk) * 50, *[np.random.randint(1000, size=(chunk,)) for x in range(columns - 3)]]
+            csv_file.writelines([('%s,%.6f,%.6f,%i' + (',%i' * (columns - 4)) + '\n') % row for row in zip(*data)])
+            current_row += chunk

+ 2 - 2
tests/test_query_endpoint.py

@@ -8,8 +8,8 @@ arklog.set_config_logging()
 prefixes = "\n".join((
 prefixes = "\n".join((
     "PREFIX dtf:  <https://ontology.rys.app/dt/function/>",
     "PREFIX dtf:  <https://ontology.rys.app/dt/function/>",
     "PREFIX owl:  <http://www.w3.org/2002/07/owl#>",
     "PREFIX owl:  <http://www.w3.org/2002/07/owl#>",
-    "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
-    "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>",
+    "PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
+    "PREFIX xsd:  <http://www.w3.org/2001/XMLSchema#>",
     "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>",
     "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>",
 ))
 ))