|
|
@ -10,6 +10,7 @@ import java.util.concurrent.ConcurrentMap;
|
|
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
|
|
|
|
|
|
|
|
|
|
import data.Filter;
|
|
|
|
import data.Filter;
|
|
|
|
|
|
|
|
import data.MultipleHMKeys;
|
|
|
|
import org.apache.commons.csv.CSVFormat;
|
|
|
|
import org.apache.commons.csv.CSVFormat;
|
|
|
|
import org.apache.commons.csv.CSVPrinter;
|
|
|
|
import org.apache.commons.csv.CSVPrinter;
|
|
|
|
import org.apache.commons.lang3.tuple.Pair;
|
|
|
|
import org.apache.commons.lang3.tuple.Pair;
|
|
|
@ -20,22 +21,22 @@ import data.Enums.WordLevelType;
|
|
|
|
|
|
|
|
|
|
|
|
@SuppressWarnings("unchecked")
|
|
|
|
@SuppressWarnings("unchecked")
|
|
|
|
public class Export {
|
|
|
|
public class Export {
|
|
|
|
public static void SetToJSON(Set<Pair<String, Map<String, Long>>> set) {
|
|
|
|
public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) {
|
|
|
|
JSONArray wrapper = new JSONArray();
|
|
|
|
JSONArray wrapper = new JSONArray();
|
|
|
|
|
|
|
|
|
|
|
|
for (Pair<String, Map<String, Long>> p : set) {
|
|
|
|
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
|
|
|
JSONArray data_wrapper = new JSONArray();
|
|
|
|
JSONArray data_wrapper = new JSONArray();
|
|
|
|
JSONObject metric = new JSONObject();
|
|
|
|
JSONObject metric = new JSONObject();
|
|
|
|
|
|
|
|
|
|
|
|
String title = p.getLeft();
|
|
|
|
String title = p.getLeft();
|
|
|
|
Map<String, Long> map = p.getRight();
|
|
|
|
Map<MultipleHMKeys, Long> map = p.getRight();
|
|
|
|
|
|
|
|
|
|
|
|
if (map.isEmpty())
|
|
|
|
if (map.isEmpty())
|
|
|
|
continue;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
|
|
long total = Util.mapSumFrequencies(map);
|
|
|
|
long total = Util.mapSumFrequencies(map);
|
|
|
|
|
|
|
|
|
|
|
|
for (Map.Entry<String, Long> e : map.entrySet()) {
|
|
|
|
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
|
|
|
JSONObject data_entry = new JSONObject();
|
|
|
|
JSONObject data_entry = new JSONObject();
|
|
|
|
data_entry.put("word", e.getKey());
|
|
|
|
data_entry.put("word", e.getKey());
|
|
|
|
data_entry.put("frequency", e.getValue());
|
|
|
|
data_entry.put("frequency", e.getValue());
|
|
|
@ -56,8 +57,8 @@ public class Export {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
|
|
|
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
|
|
|
Map<String, Map<String, AtomicLong>> taxonomyResults) {
|
|
|
|
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults) {
|
|
|
|
//Delimiter used in CSV file
|
|
|
|
//Delimiter used in CSV file
|
|
|
|
String NEW_LINE_SEPARATOR = "\n";
|
|
|
|
String NEW_LINE_SEPARATOR = "\n";
|
|
|
|
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
|
|
|
|
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
|
|
|
@ -65,8 +66,8 @@ public class Export {
|
|
|
|
|
|
|
|
|
|
|
|
//Count frequencies
|
|
|
|
//Count frequencies
|
|
|
|
long num_frequencies = 0;
|
|
|
|
long num_frequencies = 0;
|
|
|
|
for (Pair<String, Map<String, Long>> p : set) {
|
|
|
|
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
|
|
|
Map<String, Long> map = p.getRight();
|
|
|
|
Map<MultipleHMKeys, Long> map = p.getRight();
|
|
|
|
if (map.isEmpty())
|
|
|
|
if (map.isEmpty())
|
|
|
|
continue;
|
|
|
|
continue;
|
|
|
|
num_frequencies = Util.mapSumFrequencies(map);
|
|
|
|
num_frequencies = Util.mapSumFrequencies(map);
|
|
|
@ -88,21 +89,48 @@ public class Export {
|
|
|
|
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
|
|
|
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
|
|
|
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
|
|
|
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
|
|
|
FILE_HEADER_AL.add("Različnica");
|
|
|
|
FILE_HEADER_AL.add("Različnica");
|
|
|
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
|
|
|
|
|
|
|
FILE_HEADER_AL.add("Delež glede na vse različnice");
|
|
|
|
|
|
|
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
|
|
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
|
|
|
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
|
|
|
|
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
|
|
|
|
FILE_HEADER_AL.add("Lema");
|
|
|
|
FILE_HEADER_AL.add("Lema");
|
|
|
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
|
|
|
|
|
|
|
FILE_HEADER_AL.add("Delež glede na vse leme");
|
|
|
|
|
|
|
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
|
|
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
|
|
|
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
|
|
|
|
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
|
|
|
|
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
|
|
|
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
|
|
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
|
|
|
|
|
|
|
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
|
|
|
|
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
|
|
|
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
|
|
|
FILE_HEADER_AL.add("Lema");
|
|
|
|
FILE_HEADER_AL.add("Lema");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) {
|
|
|
|
|
|
|
|
for (MultipleHMKeys key : value.keySet()){
|
|
|
|
|
|
|
|
if(!key.getLemma().equals("")){
|
|
|
|
|
|
|
|
FILE_HEADER_AL.add("Lema");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if(!key.getWordType().equals("")){
|
|
|
|
|
|
|
|
FILE_HEADER_AL.add("Besedna vrsta");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if(!key.getMsd().equals("")){
|
|
|
|
|
|
|
|
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
|
|
|
|
|
|
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
|
|
|
|
|
|
|
FILE_HEADER_AL.add("Delež glede na vse različnice");
|
|
|
|
|
|
|
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
|
|
|
|
|
|
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
|
|
|
|
|
|
|
FILE_HEADER_AL.add("Delež glede na vse leme");
|
|
|
|
|
|
|
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
|
|
|
|
|
|
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
|
|
|
|
|
|
|
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
|
|
|
|
|
|
|
|
} else {
|
|
|
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
|
|
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
|
|
|
FILE_HEADER_AL.add("Delež glede na vse leme");
|
|
|
|
FILE_HEADER_AL.add("Delež glede na vse leme");
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -122,14 +150,14 @@ public class Export {
|
|
|
|
|
|
|
|
|
|
|
|
String fileName = "";
|
|
|
|
String fileName = "";
|
|
|
|
|
|
|
|
|
|
|
|
for (Pair<String, Map<String, Long>> p : set) {
|
|
|
|
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
|
|
|
String title = p.getLeft();
|
|
|
|
String title = p.getLeft();
|
|
|
|
fileName = title.replace(": ", "-");
|
|
|
|
fileName = title.replace(": ", "-");
|
|
|
|
fileName = fileName.replace(" ", "_").concat(".csv");
|
|
|
|
fileName = fileName.replace(" ", "_").concat(".csv");
|
|
|
|
|
|
|
|
|
|
|
|
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
|
|
|
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
|
|
|
|
|
|
|
|
|
|
|
Map<String, Long> map = p.getRight();
|
|
|
|
Map<MultipleHMKeys, Long> map = p.getRight();
|
|
|
|
|
|
|
|
|
|
|
|
if (map.isEmpty())
|
|
|
|
if (map.isEmpty())
|
|
|
|
continue;
|
|
|
|
continue;
|
|
|
@ -155,9 +183,18 @@ public class Export {
|
|
|
|
//Create CSV file header
|
|
|
|
//Create CSV file header
|
|
|
|
csvFilePrinter.printRecord(FILE_HEADER);
|
|
|
|
csvFilePrinter.printRecord(FILE_HEADER);
|
|
|
|
|
|
|
|
|
|
|
|
for (Map.Entry<String, Long> e : map.entrySet()) {
|
|
|
|
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
|
|
|
List dataEntry = new ArrayList<>();
|
|
|
|
List dataEntry = new ArrayList<>();
|
|
|
|
dataEntry.add(e.getKey());
|
|
|
|
dataEntry.add(e.getKey().getKey());
|
|
|
|
|
|
|
|
if(!e.getKey().getLemma().equals("")){
|
|
|
|
|
|
|
|
dataEntry.add(e.getKey().getLemma());
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if(!e.getKey().getWordType().equals("")){
|
|
|
|
|
|
|
|
dataEntry.add(e.getKey().getWordType());
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if(!e.getKey().getMsd().equals("")){
|
|
|
|
|
|
|
|
dataEntry.add(e.getKey().getMsd());
|
|
|
|
|
|
|
|
}
|
|
|
|
dataEntry.add(e.getValue().toString());
|
|
|
|
dataEntry.add(e.getValue().toString());
|
|
|
|
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
|
|
|
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
|
|
|
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
|
|
|
|
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
|
|
|
|