Просмотр исходного кода

Merge remote-tracking branch 'msdl/main'

Joeri Exelmans 3 лет назад
Родитель
Сommit
48ccaaff3f

BIN
docs/virtual data.xopp


+ 47 - 0
tools/oml-generators/src/main/java/csv/CSVOperationManager.java

@@ -0,0 +1,47 @@
+package csv;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+
+import csv.operations.OutlierDetectionOperation;
+import util.Operation;
+
+public class CSVOperationManager {
+
+	private Operation operation;
+	
+	
+	public CSVOperationManager() {
+		
+	
+	}
+		
+	public String executeCSVOperation(File csv) throws IOException {
+		if (csv != null && csv.exists()) {
+			operation.setFile(csv);
+			return operation.execute();
+		}
+		throw new FileNotFoundException("The csv file does not exist!");
+		
+	}
+	
+	public void setOperation(Operation op) {
+		operation = op;
+	}
+	
+	public static void main(String[] args) throws IOException {
+		CSVOperationManager opm = new CSVOperationManager();		
+		opm.setOperation(new OutlierDetectionOperation(2,"http://ua.be/drivetrain/description/artifacts/artifacts#", "drivetrain-sensor-data"));
+		System.out.println(opm.executeCSVOperation(new File("src/main/resources/csv/rotation.csv")));
+	}
+	
+	
+	
+}

+ 120 - 0
tools/oml-generators/src/main/java/csv/operations/OutlierDetectionOperation.java

@@ -0,0 +1,120 @@
+package csv.operations;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+
+import util.Operation;
+import util.Pair;
+import util.Prefixes;
+
+public class OutlierDetectionOperation implements Operation {
+
+	private int column;
+	
+	private File csv;
+	
+	private String artifactPrefix;
+	
+	private String instanceName;
+	
+	public OutlierDetectionOperation(int column, String prefix, String instanceName) {
+		this.column = column;
+		this.artifactPrefix = prefix;
+		this.instanceName = instanceName;
+	}
+	
+	public OutlierDetectionOperation(int column, String prefix, String instanceName, File csv) {
+		this.column = column;
+		this.csv = csv;
+		this.artifactPrefix = prefix;
+		this.instanceName = instanceName;
+	}
+	
+	public String execute() throws FileNotFoundException, IOException {
+		List<CSVRecord> list = readCSV();
+		Pair<Double,Double> sdAndAvg = calculateStandardDeviation(list);
+		return searchOutliers(list,sdAndAvg.getFst(), sdAndAvg.getSnd());
+	}
+
+	private Pair<Double,Double> calculateStandardDeviation(List<CSVRecord> list) throws IOException {
+		double sum = 0;
+		int i =1;
+		double sumbefore = 0;
+		if (list.size() > 1) {
+			list.remove(0);
+			try {
+				for (CSVRecord row : list) {
+					sumbefore = sum;
+					sum += Double.parseDouble(row.get(column));
+//					Used the code below to check if overflow could be happening
+//					if ((sumbefore < 0 && sum > 0) || (sumbefore > 0 && sum < 0)) {
+//						System.out.println("before = " + sumbefore + " / sum = " + sum);
+//						
+//					}
+//					i++;
+				}
+			} catch (ArrayIndexOutOfBoundsException e) {
+				throw new IOException("Your CSV file may not be well-formed. Check if all rows have the same number of columns.");
+			}
+			
+		}
+		double average = sum/list.size();
+
+	    // calculate the standard deviation
+	    double sd = 0.0;
+	    for (CSVRecord row : list) {
+			double value = Double.parseDouble(row.get(column));
+			sd += Math.pow(value - average, 2);
+		}
+
+	    sd = Math.sqrt(sd / (list.size()));
+	    
+		return new Pair<Double,Double>(sd,average);
+	}
+
+	private String searchOutliers(List<CSVRecord> list, double sd, double average) {
+		StringBuilder sb = new StringBuilder();
+		int i = 1;
+		int rowN = 1;
+		for (CSVRecord row : list) {
+			double value = Double.parseDouble(row.get(column));
+			if (((value - average)/sd) >= 2 ) {
+				sb.append("<" + artifactPrefix + instanceName + "-cell-" + rowN + "" + column + ">\n");
+				sb.append("\trdf:type "+ Prefixes.CELL_VOCABULARY_IRI + " , ");
+				sb.append(Prefixes.DATA_VOCABULARY_IRI + " , owl:Thing ;\n");
+				sb.append("\t"+Prefixes.TABULAR_VOCABULARY_IRI+ "hasColumnPosition " + column + " ;\n");
+				sb.append("\t"+Prefixes.TABULAR_VOCABULARY_IRI+ "hasRowPosition " + rowN + " ;\n");
+				sb.append("\t"+Prefixes.TABULAR_VOCABULARY_IRI+ "holdsContent \"" + value + "\" ;\n");
+				sb.append("\t"+Prefixes.TABULAR_VOCABULARY_IRI+ "isCellOfTabularData <" + artifactPrefix + instanceName + "> ;\n");
+				sb.append("\t"+Prefixes.TABULAR_VOCABULARY_IRI+ "isInCollection <" + artifactPrefix + instanceName + "-column-" + column + "> , ");
+				sb.append("<" + artifactPrefix + instanceName + "-row-" + rowN + "> ;\n");
+				sb.append("\towl:sameAs <" + artifactPrefix + instanceName + "-cell-" + rowN + "" + column + "> .\n\n");
+				//sb.append("Outlier " + i++ + "  = " + value + " / in row = " + rowN + "\n");
+			}
+			rowN++;
+		}
+		return sb.toString();
+	}
+
+	private List<CSVRecord> readCSV() throws FileNotFoundException, IOException {
+		CSVParser parser = new CSVParser(new FileReader(csv), CSVFormat.DEFAULT);
+		return parser.getRecords();
+	}
+
+	public void setFile(File csv) {
+		this.csv = csv;
+	}
+	
+	
+	
+	
+	
+
+}

+ 13 - 0
tools/oml-generators/src/main/java/util/Operation.java

@@ -0,0 +1,13 @@
+package util;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+public interface Operation {
+
+	public String execute() throws FileNotFoundException, IOException;
+
+	public void setFile(File csv);
+	
+}

+ 13 - 0
tools/oml-generators/src/main/java/util/Prefixes.java

@@ -0,0 +1,13 @@
+package util;
+
+public final class Prefixes {
+
+	
+	public static final String DATA_VOCABULARY_IRI = "<http://ua.be/sdo2l/vocabulary/base/file#Data>";
+	
+	public static final String CELL_VOCABULARY_IRI = "<http://ua.be/sdo2l/vocabulary/base/tabular#Cell>";
+	
+	public static final String TABULAR_VOCABULARY_IRI = "<http://ua.be/sdo2l/vocabulary/base/tabular#";
+	
+	
+}