Added functional additional combinational filters for words
This commit is contained in:
@@ -10,6 +10,7 @@ import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import data.Filter;
|
||||
import data.MultipleHMKeys;
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.CSVPrinter;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
@@ -20,22 +21,22 @@ import data.Enums.WordLevelType;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public class Export {
|
||||
public static void SetToJSON(Set<Pair<String, Map<String, Long>>> set) {
|
||||
public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) {
|
||||
JSONArray wrapper = new JSONArray();
|
||||
|
||||
for (Pair<String, Map<String, Long>> p : set) {
|
||||
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||
JSONArray data_wrapper = new JSONArray();
|
||||
JSONObject metric = new JSONObject();
|
||||
|
||||
String title = p.getLeft();
|
||||
Map<String, Long> map = p.getRight();
|
||||
Map<MultipleHMKeys, Long> map = p.getRight();
|
||||
|
||||
if (map.isEmpty())
|
||||
continue;
|
||||
|
||||
long total = Util.mapSumFrequencies(map);
|
||||
|
||||
for (Map.Entry<String, Long> e : map.entrySet()) {
|
||||
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
||||
JSONObject data_entry = new JSONObject();
|
||||
data_entry.put("word", e.getKey());
|
||||
data_entry.put("frequency", e.getValue());
|
||||
@@ -56,8 +57,8 @@ public class Export {
|
||||
}
|
||||
}
|
||||
|
||||
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
||||
Map<String, Map<String, AtomicLong>> taxonomyResults) {
|
||||
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
||||
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults) {
|
||||
//Delimiter used in CSV file
|
||||
String NEW_LINE_SEPARATOR = "\n";
|
||||
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
|
||||
@@ -65,8 +66,8 @@ public class Export {
|
||||
|
||||
//Count frequencies
|
||||
long num_frequencies = 0;
|
||||
for (Pair<String, Map<String, Long>> p : set) {
|
||||
Map<String, Long> map = p.getRight();
|
||||
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||
Map<MultipleHMKeys, Long> map = p.getRight();
|
||||
if (map.isEmpty())
|
||||
continue;
|
||||
num_frequencies = Util.mapSumFrequencies(map);
|
||||
@@ -88,21 +89,48 @@ public class Export {
|
||||
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
||||
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
||||
FILE_HEADER_AL.add("Različnica");
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add("Delež glede na vse različnice");
|
||||
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
||||
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
|
||||
FILE_HEADER_AL.add("Lema");
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add("Delež glede na vse leme");
|
||||
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
||||
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
|
||||
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
|
||||
} else {
|
||||
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
||||
FILE_HEADER_AL.add("Lema");
|
||||
}
|
||||
|
||||
|
||||
for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) {
|
||||
for (MultipleHMKeys key : value.keySet()){
|
||||
if(!key.getLemma().equals("")){
|
||||
FILE_HEADER_AL.add("Lema");
|
||||
}
|
||||
if(!key.getWordType().equals("")){
|
||||
FILE_HEADER_AL.add("Besedna vrsta");
|
||||
}
|
||||
if(!key.getMsd().equals("")){
|
||||
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add("Delež glede na vse različnice");
|
||||
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add("Delež glede na vse leme");
|
||||
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
|
||||
} else {
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add("Delež glede na vse leme");
|
||||
}
|
||||
@@ -122,14 +150,14 @@ public class Export {
|
||||
|
||||
String fileName = "";
|
||||
|
||||
for (Pair<String, Map<String, Long>> p : set) {
|
||||
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||
String title = p.getLeft();
|
||||
fileName = title.replace(": ", "-");
|
||||
fileName = fileName.replace(" ", "_").concat(".csv");
|
||||
|
||||
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
||||
|
||||
Map<String, Long> map = p.getRight();
|
||||
Map<MultipleHMKeys, Long> map = p.getRight();
|
||||
|
||||
if (map.isEmpty())
|
||||
continue;
|
||||
@@ -155,9 +183,18 @@ public class Export {
|
||||
//Create CSV file header
|
||||
csvFilePrinter.printRecord(FILE_HEADER);
|
||||
|
||||
for (Map.Entry<String, Long> e : map.entrySet()) {
|
||||
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
||||
List dataEntry = new ArrayList<>();
|
||||
dataEntry.add(e.getKey());
|
||||
dataEntry.add(e.getKey().getKey());
|
||||
if(!e.getKey().getLemma().equals("")){
|
||||
dataEntry.add(e.getKey().getLemma());
|
||||
}
|
||||
if(!e.getKey().getWordType().equals("")){
|
||||
dataEntry.add(e.getKey().getWordType());
|
||||
}
|
||||
if(!e.getKey().getMsd().equals("")){
|
||||
dataEntry.add(e.getKey().getMsd());
|
||||
}
|
||||
dataEntry.add(e.getValue().toString());
|
||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
||||
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
|
||||
|
||||
@@ -9,6 +9,7 @@ import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import data.MultipleHMKeys;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
@@ -85,11 +86,11 @@ public class Util {
|
||||
* Generic map converter -> since AtomicLongs aren't as comparable.
|
||||
* Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
|
||||
*/
|
||||
public static <K, V> Map<String, Long> atomicInt2StringAndInt(Map<K, V> map) {
|
||||
Map m = new HashMap<String, Long>();
|
||||
public static <K, V> Map<MultipleHMKeys, Long> atomicInt2StringAndInt(Map<K, V> map) {
|
||||
Map m = new HashMap<MultipleHMKeys, Long>();
|
||||
|
||||
for (Map.Entry<K, V> e : map.entrySet()) {
|
||||
m.put(e.getKey().toString(), ((AtomicLong) e.getValue()).longValue());
|
||||
m.put(e.getKey(), ((AtomicLong) e.getValue()).longValue());
|
||||
}
|
||||
|
||||
return m;
|
||||
@@ -148,7 +149,7 @@ public class Util {
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
static long mapSumFrequencies(Map<String, Long> map) {
|
||||
static long mapSumFrequencies(Map<MultipleHMKeys, Long> map) {
|
||||
long sum = 0;
|
||||
|
||||
for (long value : map.values()) {
|
||||
|
||||
Reference in New Issue
Block a user