Added functional additional combinational filters for words

master
Luka 6 years ago
parent e2ce656fc5
commit c073e12f55

@ -6,14 +6,11 @@ import java.util.List;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import data.*;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import data.CalculateFor;
import data.Sentence;
import data.StatisticsNew;
import data.Word;
import gui.ValidationUtil; import gui.ValidationUtil;
public class Ngrams { public class Ngrams {
@ -45,9 +42,26 @@ public class Ngrams {
continue; continue;
} }
// generate proper MultipleHMKeys depending on filter data
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor());
String lemma = "";
String wordType = "";
String msd = "";
for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){
if(otherKey.toString().equals("lema")){
lemma = wordToString(ngramCandidate, otherKey);
} else if(otherKey.toString().equals("besedna vrsta")){
wordType = wordToString(ngramCandidate, otherKey).substring(0, 1);
} else if(otherKey.toString().equals("oblikoskladenjska oznaka")){
msd = wordToString(ngramCandidate, otherKey);
}
}
MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
// UPDATE TAXONOMY HERE!!! // UPDATE TAXONOMY HERE!!!
stats.updateTaxonomyResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()), ngramCandidate.get(0).getTaxonomy()); stats.updateTaxonomyResults(multipleKeys, ngramCandidate.get(0).getTaxonomy());
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor())); // stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
} }
} }
} }
@ -129,7 +143,9 @@ public class Ngrams {
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) { for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
// TODO: locila? // TODO: locila?
stats.updateTaxonomyResults(word.substring(i, i + stats.getFilter().getStringLength()), taxonomy);
MultipleHMKeys multipleKeys = new MultipleHMKeys(word.substring(i, i + stats.getFilter().getStringLength()));
stats.updateTaxonomyResults(multipleKeys, taxonomy);
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor())); // stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));

@ -24,7 +24,8 @@ public class Filter {
TAXONOMY, TAXONOMY,
MSD, MSD,
HAS_MSD, HAS_MSD,
SOLAR_FILTERS SOLAR_FILTERS,
MULTIPLE_KEYS
} }
public Filter() { public Filter() {
@ -141,4 +142,23 @@ public class Filter {
public HashMap<String, HashSet<String>> getSolarFilters() { public HashMap<String, HashSet<String>> getSolarFilters() {
return (HashMap<String, HashSet<String>>) filter.get(SOLAR_FILTERS); return (HashMap<String, HashSet<String>>) filter.get(SOLAR_FILTERS);
} }
public void setMultipleKeys(ArrayList<String> keys) {
ArrayList<CalculateFor> newKeys = new ArrayList<>();
if (keys != null) {
for (String key : keys) {
newKeys.add(CalculateFor.factory(key));
}
}
filter.put(MULTIPLE_KEYS, newKeys);
}
public ArrayList<CalculateFor> getMultipleKeys() {
if (filter.containsKey(MULTIPLE_KEYS) && filter.get(MULTIPLE_KEYS) != null) {
return (ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS);
} else {
return new ArrayList<>();
}
}
} }

@ -2,48 +2,54 @@ package data;
/* /*
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously. Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
*/ */
final class MultipleHMKeys { public final class MultipleHMKeys {
private final String key1, key2, key3; private final String key, lemma, wordType, msd;
public MultipleHMKeys(String key1) { public MultipleHMKeys(String key) {
this.key1 = key1; this.key = key;
this.key2 = null; this.lemma = "";
this.key3 = null; this.wordType = "";
this.msd = "";
} }
public MultipleHMKeys(String key1, String key2) { public MultipleHMKeys(String key, String lemma, String wordType, String msd) {
this.key1 = key1; this.key = key;
this.key2 = key2; this.lemma = lemma;
this.key3 = null; this.wordType = wordType;
this.msd = msd;
} }
public MultipleHMKeys(String key1, String key2, String key3) { public String getKey() {
this.key1 = key1; return key;
this.key2 = key2;
this.key3 = key3;
} }
public String getKey1() { public String getLemma() {
return key1; return lemma;
} }
public String getKey2() { public String getWordType() {
return key2; return wordType;
} }
public String getKey3() { public String getMsd() {
return key3; return msd;
} }
@Override @Override
public int hashCode() { public int hashCode() {
return key1.hashCode() ^ key2.hashCode() ^ key3.hashCode(); // if(key2 == null){
// return key1.hashCode();
// } else if (key3 == null){
// return key1.hashCode() ^ key2.hashCode();
// }
return key.hashCode() ^ lemma.hashCode() ^ wordType.hashCode() ^ msd.hashCode();
} }
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key1.equals(key1) return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key.equals(key)
&& ((MultipleHMKeys) obj).key2.equals(key2) && ((MultipleHMKeys) obj).lemma.equals(lemma)
&& ((MultipleHMKeys) obj).key3.equals(key3); && ((MultipleHMKeys) obj).wordType.equals(wordType)
&& ((MultipleHMKeys) obj).msd.equals(msd);
} }
} }

@ -222,7 +222,7 @@ public class Statistics {
// return sortedM; // return sortedM;
// } // }
private Map<String, Long> getSortedResult(Map<String, AtomicLong> map, int limit) { private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit); return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
} }

@ -32,10 +32,10 @@ public class StatisticsNew {
private String resultTitle; private String resultTitle;
private Map<String, AtomicLong> result; private Map<String, AtomicLong> result;
private Map<String, Map<String, AtomicLong>> taxonomyResult; private Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult;
private Object[][] resultCustom; // for when calculating percentages that don't add up to 100% private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedSuffix; private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedSuffix;
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedPrefix; private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedPrefix;
private boolean useDB; private boolean useDB;
private RDB db; private RDB db;
private boolean analysisProducedResults; private boolean analysisProducedResults;
@ -194,7 +194,7 @@ public class StatisticsNew {
} }
public boolean saveResultToDisk(int... limit) throws UnsupportedEncodingException { public boolean saveResultToDisk(int... limit) throws UnsupportedEncodingException {
Set<Pair<String, Map<String, Long>>> stats = new HashSet<>(); Set<Pair<String, Map<MultipleHMKeys, Long>>> stats = new HashSet<>();
if (useDB) { if (useDB) {
result = db.getDump(); result = db.getDump();
@ -223,13 +223,14 @@ public class StatisticsNew {
} }
Map<WordLevelType, Map<String, Map<String, Long>>> results = new HashMap<>(); Map<WordLevelType, Map<String, Map<String, Long>>> results = new HashMap<>();
if (!isEmpty(resultNestedSuffix)) { // UNCOMMENT!!!!!!
results.put(WordLevelType.SUFFIX, sortNestedMap(resultNestedSuffix, Util.getValidInt(limit))); // if (!isEmpty(resultNestedSuffix)) {
} // results.put(WordLevelType.SUFFIX, sortNestedMap(resultNestedSuffix, Util.getValidInt(limit)));
// }
if (!isEmpty(resultNestedPrefix)) { //
results.put(WordLevelType.PREFIX, sortNestedMap(resultNestedPrefix, Util.getValidInt(limit))); // if (!isEmpty(resultNestedPrefix)) {
} // results.put(WordLevelType.PREFIX, sortNestedMap(resultNestedPrefix, Util.getValidInt(limit)));
// }
// if no results and nothing to save, return false // if no results and nothing to save, return false
if (!(results.size() > 0)) { if (!(results.size() > 0)) {
@ -266,8 +267,8 @@ public class StatisticsNew {
return true; return true;
} }
private Map<String, Map<String, Long>> sortNestedMap(Map<String, ConcurrentHashMap<String, AtomicLong>> nestedMap, int limit) { private Map<String, Map<MultipleHMKeys, Long>> sortNestedMap(Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> nestedMap, int limit) {
Map<String, Map<String, Long>> sorted = new HashMap<>(); Map<String, Map<MultipleHMKeys, Long>> sorted = new HashMap<>();
for (String s : nestedMap.keySet()) { for (String s : nestedMap.keySet()) {
sorted.put(s, getSortedResult(nestedMap.get(s), Util.getValidInt(limit))); sorted.put(s, getSortedResult(nestedMap.get(s), Util.getValidInt(limit)));
@ -277,11 +278,11 @@ public class StatisticsNew {
} }
private Map<String, Long> getSortedResult(Map<String, AtomicLong> map, int limit) { private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit); return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
} }
public void updateTaxonomyResults(String o, List<String> taxonomy) { public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {
for (String key : taxonomyResult.keySet()) { for (String key : taxonomyResult.keySet()) {
// first word should have the same taxonomy as others // first word should have the same taxonomy as others
if (taxonomy.contains(key) || key.equals("Total")) { if (taxonomy.contains(key) || key.equals("Total")) {
@ -335,9 +336,11 @@ public class StatisticsNew {
} }
public void updateResultsNestedSuffix(String key, String stringValue) { public void updateResultsNestedSuffix(String key, String stringValue) {
MultipleHMKeys mkStringValue = new MultipleHMKeys(stringValue);
if (resultNestedSuffix.containsKey(key)) { if (resultNestedSuffix.containsKey(key)) {
// if not in map // if not in map
AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(stringValue, new AtomicLong(1)); AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));
// else // else
if (r != null) { if (r != null) {
@ -345,7 +348,7 @@ public class StatisticsNew {
} }
} else { } else {
resultNestedSuffix.putIfAbsent(key, new ConcurrentHashMap<>()); resultNestedSuffix.putIfAbsent(key, new ConcurrentHashMap<>());
AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(stringValue, new AtomicLong(1)); AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));
if (r != null) { if (r != null) {
resultNestedSuffix.get(key).get(stringValue).incrementAndGet(); resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
@ -354,9 +357,11 @@ public class StatisticsNew {
} }
public void updateResultsNestedPrefix(String key, String stringValue) { public void updateResultsNestedPrefix(String key, String stringValue) {
MultipleHMKeys mkStringValue = new MultipleHMKeys(stringValue);
if (resultNestedPrefix.containsKey(key)) { if (resultNestedPrefix.containsKey(key)) {
// if not in map // if not in map
AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(stringValue, new AtomicLong(1)); AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));
// else // else
if (r != null) { if (r != null) {
@ -364,7 +369,7 @@ public class StatisticsNew {
} }
} else { } else {
resultNestedPrefix.putIfAbsent(key, new ConcurrentHashMap<>()); resultNestedPrefix.putIfAbsent(key, new ConcurrentHashMap<>());
AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(stringValue, new AtomicLong(1)); AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));
if (r != null) { if (r != null) {
resultNestedPrefix.get(key).get(stringValue).incrementAndGet(); resultNestedPrefix.get(key).get(stringValue).incrementAndGet();

@ -82,6 +82,7 @@ public class OneWordAnalysisTab {
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsDifferential = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList<String> alsoVisualizeItemsDifferential = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
// TODO: pass observables for taxonomy based on header scan // TODO: pass observables for taxonomy based on header scan
// after header scan // after header scan
@ -95,6 +96,37 @@ public class OneWordAnalysisTab {
// calculateForCB // calculateForCB
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> { calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
calculateFor = CalculateFor.factory(newValue); calculateFor = CalculateFor.factory(newValue);
if(newValue.equals("lema")){
alsoVisualizeCCB.getItems().removeAll();
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma);
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
alsoVisualize.addAll(checkedItems);
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
});
alsoVisualizeCCB.getCheckModel().clearChecks();
} else if(newValue.equals("različnica")){
alsoVisualizeCCB.getItems().removeAll();
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsDifferential);
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
alsoVisualize.addAll(checkedItems);
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
});
alsoVisualizeCCB.getCheckModel().clearChecks();
} else {
alsoVisualizeCCB.getItems().removeAll();
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
alsoVisualize.addAll(checkedItems);
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
});
alsoVisualizeCCB.getCheckModel().clearChecks();
}
logger.info("calculateForCB:", calculateFor.toString()); logger.info("calculateForCB:", calculateFor.toString());
}); });
@ -294,6 +326,7 @@ public class OneWordAnalysisTab {
filter.setIsCvv(false); filter.setIsCvv(false);
filter.setSolarFilters(solarFiltersMap); filter.setSolarFilters(solarFiltersMap);
filter.setStringLength(1); filter.setStringLength(1);
filter.setMultipleKeys(alsoVisualize);
String message = Validation.validateForStringLevel(filter); String message = Validation.validateForStringLevel(filter);
if (message == null) { if (message == null) {

@ -10,6 +10,7 @@ import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import data.Filter; import data.Filter;
import data.MultipleHMKeys;
import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter; import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.tuple.Pair;
@ -20,22 +21,22 @@ import data.Enums.WordLevelType;
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public class Export { public class Export {
public static void SetToJSON(Set<Pair<String, Map<String, Long>>> set) { public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) {
JSONArray wrapper = new JSONArray(); JSONArray wrapper = new JSONArray();
for (Pair<String, Map<String, Long>> p : set) { for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
JSONArray data_wrapper = new JSONArray(); JSONArray data_wrapper = new JSONArray();
JSONObject metric = new JSONObject(); JSONObject metric = new JSONObject();
String title = p.getLeft(); String title = p.getLeft();
Map<String, Long> map = p.getRight(); Map<MultipleHMKeys, Long> map = p.getRight();
if (map.isEmpty()) if (map.isEmpty())
continue; continue;
long total = Util.mapSumFrequencies(map); long total = Util.mapSumFrequencies(map);
for (Map.Entry<String, Long> e : map.entrySet()) { for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
JSONObject data_entry = new JSONObject(); JSONObject data_entry = new JSONObject();
data_entry.put("word", e.getKey()); data_entry.put("word", e.getKey());
data_entry.put("frequency", e.getValue()); data_entry.put("frequency", e.getValue());
@ -56,8 +57,8 @@ public class Export {
} }
} }
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock, public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
Map<String, Map<String, AtomicLong>> taxonomyResults) { Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults) {
//Delimiter used in CSV file //Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n"; String NEW_LINE_SEPARATOR = "\n";
List<Object> FILE_HEADER_AL = new ArrayList<Object>(); List<Object> FILE_HEADER_AL = new ArrayList<Object>();
@ -65,8 +66,8 @@ public class Export {
//Count frequencies //Count frequencies
long num_frequencies = 0; long num_frequencies = 0;
for (Pair<String, Map<String, Long>> p : set) { for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
Map<String, Long> map = p.getRight(); Map<MultipleHMKeys, Long> map = p.getRight();
if (map.isEmpty()) if (map.isEmpty())
continue; continue;
num_frequencies = Util.mapSumFrequencies(map); num_frequencies = Util.mapSumFrequencies(map);
@ -88,21 +89,48 @@ public class Export {
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) { if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies)); headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Različnica"); FILE_HEADER_AL.add("Različnica");
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse različnice");
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) { } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies)); headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Lema"); FILE_HEADER_AL.add("Lema");
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse leme");
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) { } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies)); headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Oblikoskladenjska oznaka"); FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
} else { } else {
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies)); headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Lema"); FILE_HEADER_AL.add("Lema");
}
for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) {
for (MultipleHMKeys key : value.keySet()){
if(!key.getLemma().equals("")){
FILE_HEADER_AL.add("Lema");
}
if(!key.getWordType().equals("")){
FILE_HEADER_AL.add("Besedna vrsta");
}
if(!key.getMsd().equals("")){
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
}
break;
}
break;
}
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse različnice");
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse leme");
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
} else {
FILE_HEADER_AL.add("Skupna absolutna pogostost"); FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse leme"); FILE_HEADER_AL.add("Delež glede na vse leme");
} }
@ -122,14 +150,14 @@ public class Export {
String fileName = ""; String fileName = "";
for (Pair<String, Map<String, Long>> p : set) { for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
String title = p.getLeft(); String title = p.getLeft();
fileName = title.replace(": ", "-"); fileName = title.replace(": ", "-");
fileName = fileName.replace(" ", "_").concat(".csv"); fileName = fileName.replace(" ", "_").concat(".csv");
fileName = resultsPath.toString().concat(File.separator).concat(fileName); fileName = resultsPath.toString().concat(File.separator).concat(fileName);
Map<String, Long> map = p.getRight(); Map<MultipleHMKeys, Long> map = p.getRight();
if (map.isEmpty()) if (map.isEmpty())
continue; continue;
@ -155,9 +183,18 @@ public class Export {
//Create CSV file header //Create CSV file header
csvFilePrinter.printRecord(FILE_HEADER); csvFilePrinter.printRecord(FILE_HEADER);
for (Map.Entry<String, Long> e : map.entrySet()) { for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
List dataEntry = new ArrayList<>(); List dataEntry = new ArrayList<>();
dataEntry.add(e.getKey()); dataEntry.add(e.getKey().getKey());
if(!e.getKey().getLemma().equals("")){
dataEntry.add(e.getKey().getLemma());
}
if(!e.getKey().getWordType().equals("")){
dataEntry.add(e.getKey().getWordType());
}
if(!e.getKey().getMsd().equals("")){
dataEntry.add(e.getKey().getMsd());
}
dataEntry.add(e.getValue().toString()); dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies)); dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies)); dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));

@ -9,6 +9,7 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Stream; import java.util.stream.Stream;
import data.MultipleHMKeys;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
@ -85,11 +86,11 @@ public class Util {
* Generic map converter -> since AtomicLongs aren't as comparable. * Generic map converter -> since AtomicLongs aren't as comparable.
* Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long> * Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
*/ */
public static <K, V> Map<String, Long> atomicInt2StringAndInt(Map<K, V> map) { public static <K, V> Map<MultipleHMKeys, Long> atomicInt2StringAndInt(Map<K, V> map) {
Map m = new HashMap<String, Long>(); Map m = new HashMap<MultipleHMKeys, Long>();
for (Map.Entry<K, V> e : map.entrySet()) { for (Map.Entry<K, V> e : map.entrySet()) {
m.put(e.getKey().toString(), ((AtomicLong) e.getValue()).longValue()); m.put(e.getKey(), ((AtomicLong) e.getValue()).longValue());
} }
return m; return m;
@ -148,7 +149,7 @@ public class Util {
System.out.println(); System.out.println();
} }
static long mapSumFrequencies(Map<String, Long> map) { static long mapSumFrequencies(Map<MultipleHMKeys, Long> map) {
long sum = 0; long sum = 0;
for (long value : map.values()) { for (long value : map.values()) {

Loading…
Cancel
Save