Added taxonomy presentation in results
This commit is contained in:
@@ -5,7 +5,11 @@ import static util.Util.*;
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import data.Filter;
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.CSVPrinter;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
@@ -52,17 +56,29 @@ public class Export {
|
||||
}
|
||||
}
|
||||
|
||||
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
||||
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
||||
Map<String, Map<String, AtomicLong>> taxonomyResults) {
|
||||
//Delimiter used in CSV file
|
||||
String NEW_LINE_SEPARATOR = "\n";
|
||||
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
|
||||
Object[] FILE_HEADER;
|
||||
|
||||
//Count frequencies
|
||||
int num_frequencies = 0;
|
||||
long num_frequencies = 0;
|
||||
for (Pair<String, Map<String, Long>> p : set) {
|
||||
Map<String, Long> map = p.getRight();
|
||||
for (Map.Entry<String, Long> e : map.entrySet()) {
|
||||
num_frequencies += e.getValue();
|
||||
if (map.isEmpty())
|
||||
continue;
|
||||
num_frequencies = Util.mapSumFrequencies(map);
|
||||
}
|
||||
|
||||
Map<String, Long> num_taxonomy_frequencies = new ConcurrentHashMap<>();
|
||||
for (String taxonomyKey : taxonomyResults.keySet()) {
|
||||
num_taxonomy_frequencies.put(taxonomyKey, (long) 0);
|
||||
for (AtomicLong value : taxonomyResults.get(taxonomyKey).values()){
|
||||
long val = num_taxonomy_frequencies.get(taxonomyKey);
|
||||
val += value.get();
|
||||
num_taxonomy_frequencies.put(taxonomyKey, val);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,19 +87,36 @@ public class Export {
|
||||
if (headerInfoBlock.containsKey("Analiza") && headerInfoBlock.get("Analiza").equals("Besede")) {
|
||||
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
||||
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
||||
FILE_HEADER = new Object[]{"Različnica", "Skupna absolutna pogostost", "Delež glede na vse različnice"};
|
||||
FILE_HEADER_AL.add("Različnica");
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add("Delež glede na vse različnice");
|
||||
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
||||
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
|
||||
FILE_HEADER = new Object[]{"Lema", "Skupna absolutna pogostost", "Delež glede na vse leme"};
|
||||
FILE_HEADER_AL.add("Lema");
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add("Delež glede na vse leme");
|
||||
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
||||
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
|
||||
FILE_HEADER = new Object[]{"Oblikoskladenjska oznaka", "Skupna absolutna pogostost", "Delež glede na vse oblikoskladenjske oznake"};
|
||||
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
|
||||
} else {
|
||||
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
||||
FILE_HEADER = new Object[]{"Lema", "Skupna pogostost", "Delež glede na leme"};
|
||||
FILE_HEADER_AL.add("Lema");
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add("Delež glede na vse leme");
|
||||
}
|
||||
} else
|
||||
FILE_HEADER_AL.add("Skupna relativna pogostost");
|
||||
for (String key : taxonomyResults.keySet()) {
|
||||
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
|
||||
FILE_HEADER_AL.add("Delež [" + key + "]");
|
||||
FILE_HEADER_AL.add("Relativna pogostost [" + key + "]");
|
||||
}
|
||||
FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
|
||||
FILE_HEADER_AL.toArray(FILE_HEADER);
|
||||
} else {
|
||||
FILE_HEADER = new Object[]{"word", "frequency", "percent"};
|
||||
}
|
||||
|
||||
String fileName = "";
|
||||
|
||||
@@ -99,7 +132,7 @@ public class Export {
|
||||
if (map.isEmpty())
|
||||
continue;
|
||||
|
||||
long total = Util.mapSumFrequencies(map);
|
||||
// long total = Util.mapSumFrequencies(map);
|
||||
|
||||
OutputStreamWriter fileWriter = null;
|
||||
CSVPrinter csvFilePrinter = null;
|
||||
@@ -124,7 +157,16 @@ public class Export {
|
||||
List dataEntry = new ArrayList<>();
|
||||
dataEntry.add(e.getKey());
|
||||
dataEntry.add(e.getValue().toString());
|
||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / total));
|
||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
||||
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
|
||||
for (String key : taxonomyResults.keySet()){
|
||||
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
|
||||
dataEntry.add(frequency.toString());
|
||||
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
|
||||
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key)));
|
||||
|
||||
}
|
||||
|
||||
csvFilePrinter.printRecord(dataEntry);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
|
||||
Reference in New Issue
Block a user