Преглед изворни кода

Merge branch 'release-0.2.0'

Arkadiusz Ryś пре 2 година
родитељ
комит
a1e7a04cc3
12 измењених фајлова са 107 додато и 69 уклоњено
  1. 4 1
      .gitignore
  2. 8 0
      HISTORY.rst
  3. 13 4
      README.rst
  4. 10 6
      pyproject.toml
  5. 7 4
      requirements.txt
  6. 1 1
      spendpoint/__init__.py
  7. 2 15
      spendpoint/endpoint.py
  8. 13 6
      spendpoint/main.py
  9. 31 2
      spendpoint/service.py
  10. 0 28
      spendpoint/yasgui.html
  11. 16 0
      tasks.py
  12. 2 2
      tests/test_query_endpoint.py

+ 4 - 1
.gitignore

@@ -3,5 +3,8 @@ venv/
 build/
 dist/
 *.egg-info
-
 *.log
+
+# Generated example files
+data/example.csv
+data/example.parquet

+ 8 - 0
HISTORY.rst

@@ -2,6 +2,14 @@
 History
 =======
 
+0.0.0 (yyyy-mm-dd)
+------------------
+* Add conversion service.
+
+0.2.0 (2023-04-04)
+------------------
+* Prototype conversion service.
+
 0.1.0 (2023-03-06)
 ------------------
 * Create outlier service bridge.

+ 13 - 4
README.rst

@@ -3,16 +3,25 @@ SpEndPoint
 ##########
 
 Creates a SPARQL endpoint supporting custom services.
-Default access at `http://127.0.0.1:8000/gui`.
+Default access at `http://127.0.0.1:8000`.
+
+Currently supports 3 services:
+
+.. code-block::
+
+   dtf:outlier
+   dtf:example
+   dtf:conversion
 
 Installation
 ------------
 
-.. code-block:: shell
+..
+   .. code-block:: shell
 
-   pip install spendpoint
+      pip install spendpoint
 
-or
+   or
 
 .. code-block:: shell
 

+ 10 - 6
pyproject.toml

@@ -20,27 +20,31 @@ dynamic = ["version", "description"]
 license = {file = "LICENSE"}
 keywords = ["spendpoint"]
 dependencies = [
-    "arklog~=0.5.0",
+    "arklog~=0.5.1",
     "rdflib~=6.2.0",
-    "fastapi~=0.92",
-    "starlette~=0.25.0",
+    "pandas~=1.5.3",
+    "fastapi~=0.94.0",
+    "pyarrow~=11.0.0",
+    "requests~=2.28.2",
+    "starlette~=0.26.0.post1",
     "python-magic~=0.4.27",
-    "uvicorn[standard]~=0.20.0",
+    "uvicorn[standard]~=0.21.0",
 ]
 
 [project.optional-dependencies]
 test = [
-    "pytest~=7.2.1",
+    "pytest~=7.2.2",
     "sparqlwrapper~=2.0.0",
 ]
 doc = [
     "sphinx~=6.1.3",
 ]
 dev = [
-    "tox~=4.4.6",
+    "tox~=4.4.7",
     "pip~=23.0.1",
     "flit~=3.8.0",
     "twine~=4.0.2",
+    "numpy~=1.24.2",
     "invoke~=2.0.0",
     "jinja2~=3.1.2",
     "flake8~=6.0.0",

+ 7 - 4
requirements.txt

@@ -1,21 +1,24 @@
 # SpEndPoint
 arklog            ~= 0.5.1
 rdflib            ~= 6.2.0
-fastapi           ~= 0.92
+pandas            ~= 1.5.3
+fastapi           ~= 0.94.0
+pyarrow           ~= 11.0.0
 requests          ~= 2.28.2
-starlette         ~= 0.25.0
+starlette         ~= 0.26.0.post1
 python-magic      ~= 0.4.27
-uvicorn[standard] ~= 0.20.0
+uvicorn[standard] ~= 0.21.0
 # Test
 pytest        ~= 7.2.2
 sparqlwrapper ~= 2.0.0
 # Doc
 sphinx ~= 6.1.3
 # Dev
-tox      ~= 4.4.6
+tox      ~= 4.4.7
 pip      ~= 23.0.1
 flit     ~= 3.8.0
 twine    ~= 4.0.2
+numpy    ~= 1.24.2
 invoke   ~= 2.0.0
 jinja2   ~= 3.1.2
 flake8   ~= 6.0.0

+ 1 - 1
spendpoint/__init__.py

@@ -1,3 +1,3 @@
 """SPARQL endpoint for ontologies."""
-__version__ = "0.1.0"
+__version__ = "0.2.0"
 __version_info__ = tuple((int(num) if num.isdigit() else num for num in __version__.replace("-", ".", 1).split(".")))

+ 2 - 15
spendpoint/endpoint.py

@@ -3,7 +3,6 @@
 import logging
 import re
 import arklog
-import pkg_resources
 import rdflib
 from typing import Any, Callable, Dict, List, Optional, Union
 from urllib import parse
@@ -43,13 +42,11 @@ class SparqlEndpoint(FastAPI):
         return mime.split(",")[0] in ("text/turtle",)
 
     async def requested_result_type(self, request: Request, operation: str) -> str:
-        logging.debug("Getting mime type.")
         output_mime_type = request.headers["accept"]
         # TODO Ugly hack, fix later (Fuseki sends options)
         output_mime_type = output_mime_type.split(",")[0]
         if isinstance(output_mime_type, list):
             return output_mime_type[0]
-
         # TODO Use match or dict for this
         if not output_mime_type:
             logging.warning("No mime type provided. Setting mimetype to 'application/xml'.")
@@ -68,6 +65,7 @@ class SparqlEndpoint(FastAPI):
         self.description = description
         self.version = version
         super().__init__(*args, title=title, description=description, version=version, **kwargs)
+        logging.debug(self.description)
         rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
         api_responses: Optional[Dict[Union[int, str], Dict[str, Any]]] = {
             200: {
@@ -86,9 +84,6 @@ class SparqlEndpoint(FastAPI):
                     "text/turtle": {"example": "service description"},
                     "application/sparql-results+xml": {"example": "<root></root>"},
                     "application/xml": {"example": "<root></root>"},
-                    # "application/rdf+xml": {
-                    #     "example": '<?xml version="1.0" encoding="UTF-8"?> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF>'
-                    # },
                 },
             },
             400: {
@@ -127,12 +122,11 @@ class SparqlEndpoint(FastAPI):
                 query_results = self.graph.query(query, initNs=graph_ns)
             except Exception as e:
                 logging.error("Error executing the SPARQL query on the RDFLib Graph: " + str(e))
+                # TODO Send better error which can be parsed as a SPARQL response or check it client side
                 return JSONResponse(
                     status_code=400,
                     content={"message": "Error executing the SPARQL query on the RDFLib Graph"},
                 )
-
-            logging.debug(f"{type(query_results)=}")
             output_mime_type = await self.requested_result_type(request, query_operation)
             logging.debug(f"Returning {output_mime_type}.")
             try:
@@ -162,13 +156,6 @@ class SparqlEndpoint(FastAPI):
                         query = parse.unquote(params[1])
             return await sparql_endpoint_get(request, query)
 
-        @self.get("/gui", include_in_schema=False)
-        async def serve_yasgui() -> Response:
-            """Serve YASGUI interface"""
-            with open(pkg_resources.resource_filename("spendpoint", "yasgui.html")) as f:
-                html_str = f.read()
-            html_str = html_str.replace("$EXAMPLE_QUERY", "")
-            return Response(content=html_str, media_type="text/html")
 
     def eval_custom_functions(self, ctx: QueryContext, part: CompValue) -> List[Any]:
         if part.name != "Extend":

+ 13 - 6
spendpoint/main.py

@@ -1,16 +1,23 @@
 import arklog
 from spendpoint.endpoint import SparqlEndpoint
 from spendpoint import __version__
-from spendpoint.service import outlier_service, example_service
+from spendpoint.service import outlier_service, example_service, conversion_service
 
 arklog.set_config_logging()
 
+functions = {
+    "https://ontology.rys.app/dt/function/outlier": outlier_service,
+    "https://ontology.rys.app/dt/function/example": example_service,
+    "https://ontology.rys.app/dt/function/conversion": conversion_service,
+}
+
 app = SparqlEndpoint(
     version = __version__,
-    functions = {
-        "https://ontology.rys.app/dt/function/outlier": outlier_service,
-        "https://ontology.rys.app/dt/function/example": example_service,
-    },
+    functions = functions,
     title = "SPARQL endpoint for storage and services",
-    description = "/n".join(("SPARQL endpoint.",))
+    description = "\n".join((
+        "SPARQL endpoint.",
+        f"Supports {len(functions)} custom services:",
+        *(f" - {service_uri}" for service_uri, fc in functions.items()))
+    )
 )

+ 31 - 2
spendpoint/service.py

@@ -1,10 +1,13 @@
 import logging
+from pathlib import Path
+
 import arklog
 import rdflib
-from rdflib import Literal
+import pandas as pd
+from rdflib import Literal, XSD
 from rdflib.plugins.sparql.evalutils import _eval
 from dataclasses import dataclass
-
+from timeit import default_timer as timer
 from spendpoint.bridge import fetch_outliers
 arklog.set_config_logging()
 
@@ -53,6 +56,32 @@ def outlier_service(query_results, ctx, part, eval_part):
     return query_results, ctx, part, eval_part
 
 
+def conversion_service(query_results, ctx, part, eval_part):
+    """"""
+    logging.debug(f"Conversion service.")
+    input_file_name = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
+    output_file_name = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
+    data_dir = Path(__file__).resolve().parent.parent / Path("data")
+    input_file_path = data_dir / Path(input_file_name)
+    output_file_path = data_dir / Path(output_file_name)
+    success = False
+    start_time = timer()
+    if input_file_path.suffix.endswith("csv") and output_file_path.suffix.endswith("parquet"):
+        df = pd.read_csv(input_file_path)
+        df.to_parquet(output_file_path)
+        success = True
+    end_time = timer()
+    query_results.append(eval_part.merge({
+        part.var: Literal(""),
+        rdflib.term.Variable(part.var + "_input") : Literal(input_file_name),
+        rdflib.term.Variable(part.var + "_output") : Literal(output_file_name),
+        rdflib.term.Variable(part.var + "_duration") : Literal(end_time - start_time, datatype=XSD.duration),
+        rdflib.term.Variable(part.var + "_success") : Literal(success),
+    }))
+    return query_results, ctx, part, eval_part
+
+
+
 def example_service(query_results, ctx, part, eval_part):
     """"""
     logging.debug(f"{query_results=}")

+ 0 - 28
spendpoint/yasgui.html

@@ -1,28 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-
-<head>
-  <meta charset="utf-8">
-  <title>RDFLib endpoint</title>
-  <link href="https://unpkg.com/@triply/yasgui@4/build/yasgui.min.css" rel="stylesheet" type="text/css"/>
-  <script src="https://unpkg.com/@triply/yasgui@4/build/yasgui.min.js"></script>
-</head>
-
-<body>
-<div id="yasgui"></div>
-<script>
-  Yasqe.defaults.value = `$EXAMPLE_QUERY`
-  const url = window.location.href.endsWith('/') ? window.location.href.slice(0, -1) : window.location.href;
-  const yasgui = new Yasgui(document.getElementById("yasgui"), {
-    requestConfig: {endpoint: url + "/"},
-    endpointCatalogueOptions: {
-      getData: function () {
-        return [
-          {endpoint: url + "/"},
-        ];
-      },
-      keys: [],
-    },
-  });
-</script>
-</body>

+ 16 - 0
tasks.py

@@ -71,3 +71,19 @@ def release(c, version):
     c.run(f"git push")
     c.run(f"git branch -d release-{_major}.{_minor}.{_patch}")
     c.run(f"git push origin --tags")
+
+
+@task(name="generate", aliases=("gen", "csv"))
+def generate_random_data_csv(c, rows=200000, columns=50):
+    """"""
+    import numpy as np
+    import uuid
+    data_dir = Path(__file__).resolve().parent / Path("data")
+    out_file_path = data_dir / Path("example.csv")
+    chunk = 1000
+    current_row = 0
+    with out_file_path.open("w", encoding="utf-8", buffering=chunk) as csv_file:
+        while current_row < rows:
+            data = [[uuid.uuid4() for i in range(chunk)], np.random.random(chunk) * 100, np.random.random(chunk) * 50, *[np.random.randint(1000, size=(chunk,)) for x in range(columns - 3)]]
+            csv_file.writelines([('%s,%.6f,%.6f,%i' + (',%i' * (columns - 4)) + '\n') % row for row in zip(*data)])
+            current_row += chunk

+ 2 - 2
tests/test_query_endpoint.py

@@ -8,8 +8,8 @@ arklog.set_config_logging()
 prefixes = "\n".join((
     "PREFIX dtf:  <https://ontology.rys.app/dt/function/>",
     "PREFIX owl:  <http://www.w3.org/2002/07/owl#>",
-    "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
-    "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>",
+    "PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
+    "PREFIX xsd:  <http://www.w3.org/2001/XMLSchema#>",
     "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>",
 ))