Browse Source

Make application configurable

Arkadiusz Ryś 2 years ago
parent
commit
f174a25a26

+ 8 - 0
README.rst

@@ -28,3 +28,11 @@ Installation
 .. code-block:: shell
 .. code-block:: shell
 
 
    pip install --index-url https://pip:glpat-m8mNfhxZAUnWvy7rLS1x@git.rys.one/api/v4/projects/262/packages/pypi/simple --no-deps spendpoint
    pip install --index-url https://pip:glpat-m8mNfhxZAUnWvy7rLS1x@git.rys.one/api/v4/projects/262/packages/pypi/simple --no-deps spendpoint
+
+Configuration
+-------------
+
+A configuration file at `data/configuration.toml` holds all user configurable data.
+You can set the `host` and `port` the server will listen on.
+A more advanced use is to import extra services.
+These services need to be defined in the `service.py` file as well.

+ 1 - 1
data/configuration.toml

@@ -7,7 +7,7 @@ name = "outliers"
 namespace = "https://ontology.rys.app/dt/function/outlier"
 namespace = "https://ontology.rys.app/dt/function/outlier"
 call = "outlier_service"
 call = "outlier_service"
 endpoint = "http://127.0.0.1:9090/api/csv/outlier"
 endpoint = "http://127.0.0.1:9090/api/csv/outlier"
-timeout = 5 # How many seconds we wait for a result
+timeout = 60 # How many seconds we wait for a result
 
 
 [[services]]
 [[services]]
 name = "example"
 name = "example"

+ 2 - 0
pyproject.toml

@@ -20,9 +20,11 @@ dynamic = ["version", "description"]
 license = {file = "LICENSE"}
 license = {file = "LICENSE"}
 keywords = ["spendpoint"]
 keywords = ["spendpoint"]
 dependencies = [
 dependencies = [
+    "toml~=0.10.2",
     "arklog~=0.5.1",
     "arklog~=0.5.1",
     "rdflib~=6.3.2",
     "rdflib~=6.3.2",
     "pandas~=2.0.1",
     "pandas~=2.0.1",
+    "dacite~=1.8.1",
     "fastapi~=0.95.2",
     "fastapi~=0.95.2",
     "pyarrow~=12.0.0",
     "pyarrow~=12.0.0",
     "requests~=2.30.0",
     "requests~=2.30.0",

+ 2 - 0
requirements.txt

@@ -1,7 +1,9 @@
 # SpEndPoint
 # SpEndPoint
+toml              ~= 0.10.2
 arklog            ~= 0.5.1
 arklog            ~= 0.5.1
 rdflib            ~= 6.3.2
 rdflib            ~= 6.3.2
 pandas            ~= 2.0.1
 pandas            ~= 2.0.1
+dacite            ~= 1.8.1
 fastapi           ~= 0.95.2
 fastapi           ~= 0.95.2
 pyarrow           ~= 12.0.0
 pyarrow           ~= 12.0.0
 requests          ~= 2.30.0
 requests          ~= 2.30.0

+ 21 - 2
spendpoint/__main__.py

@@ -1,4 +1,23 @@
+import logging
+import sys
+from pathlib import Path
+
+import arklog
+import dacite
+import toml
 import uvicorn
 import uvicorn
-from spendpoint.main import app
 
 
-uvicorn.run(app, host="0.0.0.0", port=8000)
+from spendpoint.configuration import Configuration
+from spendpoint.main import get_application
+
+arklog.set_config_logging()
+
+data_dir = Path(__file__).resolve().parent.parent / Path("data")
+logging.debug(f"Looking for configuration in '{data_dir}'.")
+try:
+    configuration = toml.loads((data_dir / Path("configuration.toml")).read_text(encoding="utf-8"))
+    configuration = dacite.from_dict(data_class=Configuration, data=configuration, )
+except FileNotFoundError as e:
+    logging.error(f"Configuration 'configuration.toml' not found. {e}")
+    sys.exit(8)
+uvicorn.run(get_application(configuration), host=configuration.server.host, port=configuration.server.port)

+ 1 - 2
spendpoint/bridge.py

@@ -7,9 +7,8 @@ from typing import Union
 arklog.set_config_logging()
 arklog.set_config_logging()
 
 
 
 
-def fetch_outliers(file_name: str, column: Union[str, int], iri: str) -> Graph:
+def fetch_outliers(file_name: str, column: Union[str, int], iri: str, outlier_service_url: str) -> Graph:
     """"""
     """"""
-    outlier_service_url = "http://127.0.0.1:9090/api/csv/outlier"
     try:
     try:
         column = column if isinstance(column, int) else int(column)
         column = column if isinstance(column, int) else int(column)
     except ValueError as e:
     except ValueError as e:

+ 22 - 0
spendpoint/configuration.py

@@ -0,0 +1,22 @@
+from collections import Counter
+from dataclasses import dataclass, field
+from typing import Iterable, List, Optional
+
+@dataclass(init=True, repr=True, order=False, frozen=True)
+class Server:
+    host: str
+    port: int
+
+@dataclass(init=True, repr=True, order=False, frozen=True)
+class Service:
+    name: str
+    namespace: str
+    call: str
+    endpoint: Optional[str] = None
+    timeout: Optional[int] = None
+
+
+@dataclass(init=True, repr=True, order=False, frozen=True)
+class Configuration:
+    server: Server
+    services: List[Service] = field(default_factory=list)

+ 8 - 8
spendpoint/endpoint.py

@@ -4,7 +4,7 @@ import logging
 import re
 import re
 import arklog
 import arklog
 import rdflib
 import rdflib
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 from urllib import parse
 from urllib import parse
 from fastapi import FastAPI, Query, Request, Response
 from fastapi import FastAPI, Query, Request, Response
 from fastapi.responses import JSONResponse
 from fastapi.responses import JSONResponse
@@ -15,6 +15,8 @@ from rdflib.plugins.sparql.evalutils import _eval
 from rdflib.plugins.sparql.parserutils import CompValue
 from rdflib.plugins.sparql.parserutils import CompValue
 from rdflib.plugins.sparql.sparql import QueryContext, SPARQLError
 from rdflib.plugins.sparql.sparql import QueryContext, SPARQLError
 
 
+from spendpoint import service
+
 arklog.set_config_logging()
 arklog.set_config_logging()
 
 
 
 
@@ -57,13 +59,13 @@ class SparqlEndpoint(FastAPI):
             return "application/rdf+xml"
             return "application/rdf+xml"
         return output_mime_type
         return output_mime_type
 
 
-    def __init__(self, *args: Any, title: str, description: str, version: str, functions: Dict[str, Callable[..., Any]], graph: Union[Graph, ConjunctiveGraph, Dataset] = ConjunctiveGraph(), **kwargs: Any):
+    def __init__(self, *args: Any, title: str, description: str, version: str, configuration, graph: Union[Graph, ConjunctiveGraph, Dataset] = ConjunctiveGraph(), **kwargs: Any):
         """"""
         """"""
         self.graph = graph
         self.graph = graph
-        self.functions = functions
         self.title = title
         self.title = title
         self.description = description
         self.description = description
         self.version = version
         self.version = version
+        self.configuration = configuration
         super().__init__(*args, title=title, description=description, version=version, **kwargs)
         super().__init__(*args, title=title, description=description, version=version, **kwargs)
         logging.debug(self.description)
         logging.debug(self.description)
         rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
         rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
@@ -166,12 +168,10 @@ class SparqlEndpoint(FastAPI):
         for eval_part in evalPart(ctx, part.p):
         for eval_part in evalPart(ctx, part.p):
             # Checks if the function is a URI (custom function)
             # Checks if the function is a URI (custom function)
             if hasattr(part.expr, "iri"):
             if hasattr(part.expr, "iri"):
-                # Iterate through the custom functions passed in the constructor
-                for function_uri, custom_function in self.functions.items():
+                for conf_service in self.configuration.services:
                     # Check if URI correspond to a registered custom function
                     # Check if URI correspond to a registered custom function
-                    if part.expr.iri == URIRef(function_uri):
-                        # Execute each function
-                        query_results, ctx, part, eval_part = custom_function(query_results, ctx, part, eval_part)
+                    if part.expr.iri == URIRef(conf_service.namespace):
+                        query_results, ctx, part, eval_part = getattr(service, conf_service.call)(query_results, ctx, part, eval_part, conf_service)
             else:
             else:
                 # For built-in SPARQL functions (that are not URIs)
                 # For built-in SPARQL functions (that are not URIs)
                 evaluation: List[Any] = [_eval(part.expr, eval_part.forget(ctx, _except=part._vars))]
                 evaluation: List[Any] = [_eval(part.expr, eval_part.forget(ctx, _except=part._vars))]

+ 15 - 18
spendpoint/main.py

@@ -1,23 +1,20 @@
 import arklog
 import arklog
+
+from spendpoint.configuration import Configuration
 from spendpoint.endpoint import SparqlEndpoint
 from spendpoint.endpoint import SparqlEndpoint
 from spendpoint import __version__
 from spendpoint import __version__
-from spendpoint.service import outlier_service, example_service, conversion_service
-
-arklog.set_config_logging()
-
-functions = {
-    "https://ontology.rys.app/dt/function/outlier": outlier_service,
-    "https://ontology.rys.app/dt/function/example": example_service,
-    "https://ontology.rys.app/dt/function/conversion": conversion_service,
-}
 
 
-app = SparqlEndpoint(
-    version = __version__,
-    functions = functions,
-    title = "SPARQL endpoint for storage and services",
-    description = "\n".join((
-        "SPARQL endpoint.",
-        f"Supports {len(functions)} custom services:",
-        *(f" - {service_uri}" for service_uri, fc in functions.items()))
+def get_application(configuration: Configuration):
+    arklog.set_config_logging()
+    functions = [conf_service.namespace for conf_service in configuration.services]
+    app = SparqlEndpoint(
+        version = __version__,
+        title = "SPARQL endpoint for storage and services",
+        description = "\n".join((
+            "SPARQL endpoint.",
+            f"Supports {len(functions)} custom services:",
+            *(f" - {service_uri}" for service_uri in functions))
+        ),
+        configuration=configuration
     )
     )
-)
+    return app

+ 7 - 6
spendpoint/service.py

@@ -17,7 +17,7 @@ class Outlier:
     iri: str
     iri: str
     value: str
     value: str
 
 
-def outlier_service(query_results, ctx, part, eval_part):
+def outlier_service(query_results, ctx, part, eval_part, service_configuration):
     """
     """
 
 
     Example query:
     Example query:
@@ -39,14 +39,15 @@ def outlier_service(query_results, ctx, part, eval_part):
     :param ctx:
     :param ctx:
     :param part:
     :param part:
     :param eval_part:
     :param eval_part:
+    :param service_configuration:
     :return:
     :return:
     """
     """
-    logging.debug(f"Outlier service.")
+    logging.debug(f"Outlier service '{service_configuration.namespace}'.")
     file_name = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
     file_name = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
     column = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
     column = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
     iri = str(_eval(part.expr.expr[2], eval_part.forget(ctx, _except=part.expr._vars)))
     iri = str(_eval(part.expr.expr[2], eval_part.forget(ctx, _except=part.expr._vars)))
     logging.info(f"Looking for outlier in '{file_name}' at column '{column}' for '{iri}'.")
     logging.info(f"Looking for outlier in '{file_name}' at column '{column}' for '{iri}'.")
-    outlier_graph = fetch_outliers(file_name, column, iri)
+    outlier_graph = fetch_outliers(file_name, column, iri, service_configuration.endpoint)
     for stmt in outlier_graph:
     for stmt in outlier_graph:
         query_results.append(eval_part.merge({
         query_results.append(eval_part.merge({
             part.var: stmt[0],
             part.var: stmt[0],
@@ -57,9 +58,9 @@ def outlier_service(query_results, ctx, part, eval_part):
     return query_results, ctx, part, eval_part
     return query_results, ctx, part, eval_part
 
 
 
 
-def conversion_service(query_results, ctx, part, eval_part):
+def conversion_service(query_results, ctx, part, eval_part, service_configuration):
     """"""
     """"""
-    logging.debug(f"Conversion service.")
+    logging.debug(f"Conversion service '{service_configuration.namespace}'.")
     input_file_name = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
     input_file_name = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
     output_file_name = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
     output_file_name = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
     data_dir = Path(__file__).resolve().parent.parent / Path("data")
     data_dir = Path(__file__).resolve().parent.parent / Path("data")
@@ -83,7 +84,7 @@ def conversion_service(query_results, ctx, part, eval_part):
 
 
 
 
 
 
-def example_service(query_results, ctx, part, eval_part):
+def example_service(query_results, ctx, part, eval_part, service_configuration):
     """"""
     """"""
     logging.debug(f"{query_results=}")
     logging.debug(f"{query_results=}")
     logging.debug(f"{ctx=}")
     logging.debug(f"{ctx=}")