Sfoglia il codice sorgente

Make application configurable

Arkadiusz Ryś 2 anni fa
parent
commit
f174a25a26

+ 8 - 0
README.rst

@@ -28,3 +28,11 @@ Installation
 .. code-block:: shell
 
    pip install --index-url https://pip:glpat-m8mNfhxZAUnWvy7rLS1x@git.rys.one/api/v4/projects/262/packages/pypi/simple --no-deps spendpoint
+
+Configuration
+-------------
+
+A configuration file at `data/configuration.toml` holds all user configurable data.
+You can set the `host` and `port` the server will listen on.
+A more advanced use is to import extra services.
+These services need to be defined in the `service.py` file as well.

+ 1 - 1
data/configuration.toml

@@ -7,7 +7,7 @@ name = "outliers"
 namespace = "https://ontology.rys.app/dt/function/outlier"
 call = "outlier_service"
 endpoint = "http://127.0.0.1:9090/api/csv/outlier"
-timeout = 5 # How many seconds we wait for a result
+timeout = 60 # How many seconds we wait for a result
 
 [[services]]
 name = "example"

+ 2 - 0
pyproject.toml

@@ -20,9 +20,11 @@ dynamic = ["version", "description"]
 license = {file = "LICENSE"}
 keywords = ["spendpoint"]
 dependencies = [
+    "toml~=0.10.2",
     "arklog~=0.5.1",
     "rdflib~=6.3.2",
     "pandas~=2.0.1",
+    "dacite~=1.8.1",
     "fastapi~=0.95.2",
     "pyarrow~=12.0.0",
     "requests~=2.30.0",

+ 2 - 0
requirements.txt

@@ -1,7 +1,9 @@
 # SpEndPoint
+toml              ~= 0.10.2
 arklog            ~= 0.5.1
 rdflib            ~= 6.3.2
 pandas            ~= 2.0.1
+dacite            ~= 1.8.1
 fastapi           ~= 0.95.2
 pyarrow           ~= 12.0.0
 requests          ~= 2.30.0

+ 21 - 2
spendpoint/__main__.py

@@ -1,4 +1,23 @@
+import logging
+import sys
+from pathlib import Path
+
+import arklog
+import dacite
+import toml
 import uvicorn
-from spendpoint.main import app
 
-uvicorn.run(app, host="0.0.0.0", port=8000)
+from spendpoint.configuration import Configuration
+from spendpoint.main import get_application
+
+arklog.set_config_logging()
+
+data_dir = Path(__file__).resolve().parent.parent / Path("data")
+logging.debug(f"Looking for configuration in '{data_dir}'.")
+try:
+    configuration = toml.loads((data_dir / Path("configuration.toml")).read_text(encoding="utf-8"))
+    configuration = dacite.from_dict(data_class=Configuration, data=configuration, )
+except FileNotFoundError as e:
+    logging.error(f"Configuration 'configuration.toml' not found. {e}")
+    sys.exit(8)
+uvicorn.run(get_application(configuration), host=configuration.server.host, port=configuration.server.port)

+ 1 - 2
spendpoint/bridge.py

@@ -7,9 +7,8 @@ from typing import Union
 arklog.set_config_logging()
 
 
-def fetch_outliers(file_name: str, column: Union[str, int], iri: str) -> Graph:
+def fetch_outliers(file_name: str, column: Union[str, int], iri: str, outlier_service_url: str) -> Graph:
     """"""
-    outlier_service_url = "http://127.0.0.1:9090/api/csv/outlier"
     try:
         column = column if isinstance(column, int) else int(column)
     except ValueError as e:

+ 22 - 0
spendpoint/configuration.py

@@ -0,0 +1,22 @@
+from collections import Counter
+from dataclasses import dataclass, field
+from typing import Iterable, List, Optional
+
+@dataclass(init=True, repr=True, order=False, frozen=True)
+class Server:
+    host: str
+    port: int
+
+@dataclass(init=True, repr=True, order=False, frozen=True)
+class Service:
+    name: str
+    namespace: str
+    call: str
+    endpoint: Optional[str] = None
+    timeout: Optional[int] = None
+
+
+@dataclass(init=True, repr=True, order=False, frozen=True)
+class Configuration:
+    server: Server
+    services: List[Service] = field(default_factory=list)

+ 8 - 8
spendpoint/endpoint.py

@@ -4,7 +4,7 @@ import logging
 import re
 import arklog
 import rdflib
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 from urllib import parse
 from fastapi import FastAPI, Query, Request, Response
 from fastapi.responses import JSONResponse
@@ -15,6 +15,8 @@ from rdflib.plugins.sparql.evalutils import _eval
 from rdflib.plugins.sparql.parserutils import CompValue
 from rdflib.plugins.sparql.sparql import QueryContext, SPARQLError
 
+from spendpoint import service
+
 arklog.set_config_logging()
 
 
@@ -57,13 +59,13 @@ class SparqlEndpoint(FastAPI):
             return "application/rdf+xml"
         return output_mime_type
 
-    def __init__(self, *args: Any, title: str, description: str, version: str, functions: Dict[str, Callable[..., Any]], graph: Union[Graph, ConjunctiveGraph, Dataset] = ConjunctiveGraph(), **kwargs: Any):
+    def __init__(self, *args: Any, title: str, description: str, version: str, configuration, graph: Union[Graph, ConjunctiveGraph, Dataset] = ConjunctiveGraph(), **kwargs: Any):
         """"""
         self.graph = graph
-        self.functions = functions
         self.title = title
         self.description = description
         self.version = version
+        self.configuration = configuration
         super().__init__(*args, title=title, description=description, version=version, **kwargs)
         logging.debug(self.description)
         rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions
@@ -166,12 +168,10 @@ class SparqlEndpoint(FastAPI):
         for eval_part in evalPart(ctx, part.p):
             # Checks if the function is a URI (custom function)
             if hasattr(part.expr, "iri"):
-                # Iterate through the custom functions passed in the constructor
-                for function_uri, custom_function in self.functions.items():
+                for conf_service in self.configuration.services:
                     # Check if URI correspond to a registered custom function
-                    if part.expr.iri == URIRef(function_uri):
-                        # Execute each function
-                        query_results, ctx, part, eval_part = custom_function(query_results, ctx, part, eval_part)
+                    if part.expr.iri == URIRef(conf_service.namespace):
+                        query_results, ctx, part, eval_part = getattr(service, conf_service.call)(query_results, ctx, part, eval_part, conf_service)
             else:
                 # For built-in SPARQL functions (that are not URIs)
                 evaluation: List[Any] = [_eval(part.expr, eval_part.forget(ctx, _except=part._vars))]

+ 15 - 18
spendpoint/main.py

@@ -1,23 +1,20 @@
 import arklog
+
+from spendpoint.configuration import Configuration
 from spendpoint.endpoint import SparqlEndpoint
 from spendpoint import __version__
-from spendpoint.service import outlier_service, example_service, conversion_service
-
-arklog.set_config_logging()
-
-functions = {
-    "https://ontology.rys.app/dt/function/outlier": outlier_service,
-    "https://ontology.rys.app/dt/function/example": example_service,
-    "https://ontology.rys.app/dt/function/conversion": conversion_service,
-}
 
-app = SparqlEndpoint(
-    version = __version__,
-    functions = functions,
-    title = "SPARQL endpoint for storage and services",
-    description = "\n".join((
-        "SPARQL endpoint.",
-        f"Supports {len(functions)} custom services:",
-        *(f" - {service_uri}" for service_uri, fc in functions.items()))
+def get_application(configuration: Configuration):
+    arklog.set_config_logging()
+    functions = [conf_service.namespace for conf_service in configuration.services]
+    app = SparqlEndpoint(
+        version = __version__,
+        title = "SPARQL endpoint for storage and services",
+        description = "\n".join((
+            "SPARQL endpoint.",
+            f"Supports {len(functions)} custom services:",
+            *(f" - {service_uri}" for service_uri in functions))
+        ),
+        configuration=configuration
     )
-)
+    return app

+ 7 - 6
spendpoint/service.py

@@ -17,7 +17,7 @@ class Outlier:
     iri: str
     value: str
 
-def outlier_service(query_results, ctx, part, eval_part):
+def outlier_service(query_results, ctx, part, eval_part, service_configuration):
     """
 
     Example query:
@@ -39,14 +39,15 @@ def outlier_service(query_results, ctx, part, eval_part):
     :param ctx:
     :param part:
     :param eval_part:
+    :param service_configuration:
     :return:
     """
-    logging.debug(f"Outlier service.")
+    logging.debug(f"Outlier service '{service_configuration.namespace}'.")
     file_name = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
     column = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
     iri = str(_eval(part.expr.expr[2], eval_part.forget(ctx, _except=part.expr._vars)))
     logging.info(f"Looking for outlier in '{file_name}' at column '{column}' for '{iri}'.")
-    outlier_graph = fetch_outliers(file_name, column, iri)
+    outlier_graph = fetch_outliers(file_name, column, iri, service_configuration.endpoint)
     for stmt in outlier_graph:
         query_results.append(eval_part.merge({
             part.var: stmt[0],
@@ -57,9 +58,9 @@ def outlier_service(query_results, ctx, part, eval_part):
     return query_results, ctx, part, eval_part
 
 
-def conversion_service(query_results, ctx, part, eval_part):
+def conversion_service(query_results, ctx, part, eval_part, service_configuration):
     """"""
-    logging.debug(f"Conversion service.")
+    logging.debug(f"Conversion service '{service_configuration.namespace}'.")
     input_file_name = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
     output_file_name = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
     data_dir = Path(__file__).resolve().parent.parent / Path("data")
@@ -83,7 +84,7 @@ def conversion_service(query_results, ctx, part, eval_part):
 
 
 
-def example_service(query_results, ctx, part, eval_part):
+def example_service(query_results, ctx, part, eval_part, service_configuration):
     """"""
     logging.debug(f"{query_results=}")
     logging.debug(f"{ctx=}")