|
|
@ -5,7 +5,11 @@ import static util.Util.*; |
|
|
|
import java.io.*; |
|
|
|
import java.nio.charset.StandardCharsets; |
|
|
|
import java.util.*; |
|
|
|
import java.util.concurrent.ConcurrentHashMap; |
|
|
|
import java.util.concurrent.ConcurrentMap; |
|
|
|
import java.util.concurrent.atomic.AtomicLong; |
|
|
|
|
|
|
|
import data.Filter; |
|
|
|
import org.apache.commons.csv.CSVFormat; |
|
|
|
import org.apache.commons.csv.CSVPrinter; |
|
|
|
import org.apache.commons.lang3.tuple.Pair; |
|
|
@ -52,17 +56,29 @@ public class Export { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) { |
|
|
|
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock, |
|
|
|
Map<String, Map<String, AtomicLong>> taxonomyResults) { |
|
|
|
//Delimiter used in CSV file |
|
|
|
String NEW_LINE_SEPARATOR = "\n"; |
|
|
|
List<Object> FILE_HEADER_AL = new ArrayList<Object>(); |
|
|
|
Object[] FILE_HEADER; |
|
|
|
|
|
|
|
//Count frequencies |
|
|
|
int num_frequencies = 0; |
|
|
|
long num_frequencies = 0; |
|
|
|
for (Pair<String, Map<String, Long>> p : set) { |
|
|
|
Map<String, Long> map = p.getRight(); |
|
|
|
for (Map.Entry<String, Long> e : map.entrySet()) { |
|
|
|
num_frequencies += e.getValue(); |
|
|
|
if (map.isEmpty()) |
|
|
|
continue; |
|
|
|
num_frequencies = Util.mapSumFrequencies(map); |
|
|
|
} |
|
|
|
|
|
|
|
Map<String, Long> num_taxonomy_frequencies = new ConcurrentHashMap<>(); |
|
|
|
for (String taxonomyKey : taxonomyResults.keySet()) { |
|
|
|
num_taxonomy_frequencies.put(taxonomyKey, (long) 0); |
|
|
|
for (AtomicLong value : taxonomyResults.get(taxonomyKey).values()){ |
|
|
|
long val = num_taxonomy_frequencies.get(taxonomyKey); |
|
|
|
val += value.get(); |
|
|
|
num_taxonomy_frequencies.put(taxonomyKey, val); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
@ -71,19 +87,36 @@ public class Export { |
|
|
|
if (headerInfoBlock.containsKey("Analiza") && headerInfoBlock.get("Analiza").equals("Besede")) { |
|
|
|
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) { |
|
|
|
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies)); |
|
|
|
FILE_HEADER = new Object[]{"Različnica", "Skupna absolutna pogostost", "Delež glede na vse različnice"}; |
|
|
|
FILE_HEADER_AL.add("Različnica"); |
|
|
|
FILE_HEADER_AL.add("Skupna absolutna pogostost"); |
|
|
|
FILE_HEADER_AL.add("Delež glede na vse različnice"); |
|
|
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) { |
|
|
|
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies)); |
|
|
|
FILE_HEADER = new Object[]{"Lema", "Skupna absolutna pogostost", "Delež glede na vse leme"}; |
|
|
|
FILE_HEADER_AL.add("Lema"); |
|
|
|
FILE_HEADER_AL.add("Skupna absolutna pogostost"); |
|
|
|
FILE_HEADER_AL.add("Delež glede na vse leme"); |
|
|
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) { |
|
|
|
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies)); |
|
|
|
FILE_HEADER = new Object[]{"Oblikoskladenjska oznaka", "Skupna absolutna pogostost", "Delež glede na vse oblikoskladenjske oznake"}; |
|
|
|
FILE_HEADER_AL.add("Oblikoskladenjska oznaka"); |
|
|
|
FILE_HEADER_AL.add("Skupna absolutna pogostost"); |
|
|
|
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake"); |
|
|
|
} else { |
|
|
|
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies)); |
|
|
|
FILE_HEADER = new Object[]{"Lema", "Skupna pogostost", "Delež glede na leme"}; |
|
|
|
FILE_HEADER_AL.add("Lema"); |
|
|
|
FILE_HEADER_AL.add("Skupna absolutna pogostost"); |
|
|
|
FILE_HEADER_AL.add("Delež glede na vse leme"); |
|
|
|
} |
|
|
|
FILE_HEADER_AL.add("Skupna relativna pogostost"); |
|
|
|
for (String key : taxonomyResults.keySet()) { |
|
|
|
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]"); |
|
|
|
FILE_HEADER_AL.add("Delež [" + key + "]"); |
|
|
|
FILE_HEADER_AL.add("Relativna pogostost [" + key + "]"); |
|
|
|
} |
|
|
|
} else |
|
|
|
FILE_HEADER = new String[ FILE_HEADER_AL.size() ]; |
|
|
|
FILE_HEADER_AL.toArray(FILE_HEADER); |
|
|
|
} else { |
|
|
|
FILE_HEADER = new Object[]{"word", "frequency", "percent"}; |
|
|
|
} |
|
|
|
|
|
|
|
String fileName = ""; |
|
|
|
|
|
|
@ -99,7 +132,7 @@ public class Export { |
|
|
|
if (map.isEmpty()) |
|
|
|
continue; |
|
|
|
|
|
|
|
long total = Util.mapSumFrequencies(map); |
|
|
|
// long total = Util.mapSumFrequencies(map); |
|
|
|
|
|
|
|
OutputStreamWriter fileWriter = null; |
|
|
|
CSVPrinter csvFilePrinter = null; |
|
|
@ -124,7 +157,16 @@ public class Export { |
|
|
|
List dataEntry = new ArrayList<>(); |
|
|
|
dataEntry.add(e.getKey()); |
|
|
|
dataEntry.add(e.getValue().toString()); |
|
|
|
dataEntry.add(formatNumberAsPercent((double) e.getValue() / total)); |
|
|
|
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies)); |
|
|
|
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies)); |
|
|
|
for (String key : taxonomyResults.keySet()){ |
|
|
|
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey()); |
|
|
|
dataEntry.add(frequency.toString()); |
|
|
|
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key))); |
|
|
|
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key))); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
csvFilePrinter.printRecord(dataEntry); |
|
|
|
} |
|
|
|
} catch (Exception e) { |
|
|
|