|
@@ -1,10 +1,13 @@
|
|
import logging
|
|
import logging
|
|
|
|
+from pathlib import Path
|
|
|
|
+
|
|
import arklog
|
|
import arklog
|
|
import rdflib
|
|
import rdflib
|
|
-from rdflib import Literal
|
|
|
|
|
|
+import pandas as pd
|
|
|
|
+from rdflib import Literal, XSD
|
|
from rdflib.plugins.sparql.evalutils import _eval
|
|
from rdflib.plugins.sparql.evalutils import _eval
|
|
from dataclasses import dataclass
|
|
from dataclasses import dataclass
|
|
-
|
|
|
|
|
|
+from timeit import default_timer as timer
|
|
from spendpoint.bridge import fetch_outliers
|
|
from spendpoint.bridge import fetch_outliers
|
|
arklog.set_config_logging()
|
|
arklog.set_config_logging()
|
|
|
|
|
|
@@ -53,6 +56,32 @@ def outlier_service(query_results, ctx, part, eval_part):
|
|
return query_results, ctx, part, eval_part
|
|
return query_results, ctx, part, eval_part
|
|
|
|
|
|
|
|
|
|
|
|
+def conversion_service(query_results, ctx, part, eval_part):
|
|
|
|
+ """"""
|
|
|
|
+ logging.debug(f"Conversion service.")
|
|
|
|
+ input_file_name = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars)))
|
|
|
|
+ output_file_name = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars)))
|
|
|
|
+ data_dir = Path(__file__).resolve().parent.parent / Path("data")
|
|
|
|
+ input_file_path = data_dir / Path(input_file_name)
|
|
|
|
+ output_file_path = data_dir / Path(output_file_name)
|
|
|
|
+ success = False
|
|
|
|
+ start_time = timer()
|
|
|
|
+ if input_file_path.suffix.endswith("csv") and output_file_path.suffix.endswith("parquet"):
|
|
|
|
+ df = pd.read_csv(input_file_path)
|
|
|
|
+ df.to_parquet(output_file_path)
|
|
|
|
+ success = True
|
|
|
|
+ end_time = timer()
|
|
|
|
+ query_results.append(eval_part.merge({
|
|
|
|
+ part.var: Literal(""),
|
|
|
|
+ rdflib.term.Variable(part.var + "_input") : Literal(input_file_name),
|
|
|
|
+ rdflib.term.Variable(part.var + "_output") : Literal(output_file_name),
|
|
|
|
+ rdflib.term.Variable(part.var + "_duration") : Literal(end_time - start_time, datatype=XSD.duration),
|
|
|
|
+ rdflib.term.Variable(part.var + "_success") : Literal(success),
|
|
|
|
+ }))
|
|
|
|
+ return query_results, ctx, part, eval_part
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
def example_service(query_results, ctx, part, eval_part):
|
|
def example_service(query_results, ctx, part, eval_part):
|
|
""""""
|
|
""""""
|
|
logging.debug(f"{query_results=}")
|
|
logging.debug(f"{query_results=}")
|