Added functional additional combinational filters for words
This commit is contained in:
parent
e2ce656fc5
commit
c073e12f55
|
@ -6,14 +6,11 @@ import java.util.List;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import data.*;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
|
|
||||||
import data.CalculateFor;
|
|
||||||
import data.Sentence;
|
|
||||||
import data.StatisticsNew;
|
|
||||||
import data.Word;
|
|
||||||
import gui.ValidationUtil;
|
import gui.ValidationUtil;
|
||||||
|
|
||||||
public class Ngrams {
|
public class Ngrams {
|
||||||
|
@ -45,9 +42,26 @@ public class Ngrams {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// generate proper MultipleHMKeys depending on filter data
|
||||||
|
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor());
|
||||||
|
String lemma = "";
|
||||||
|
String wordType = "";
|
||||||
|
String msd = "";
|
||||||
|
for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){
|
||||||
|
if(otherKey.toString().equals("lema")){
|
||||||
|
lemma = wordToString(ngramCandidate, otherKey);
|
||||||
|
} else if(otherKey.toString().equals("besedna vrsta")){
|
||||||
|
wordType = wordToString(ngramCandidate, otherKey).substring(0, 1);
|
||||||
|
} else if(otherKey.toString().equals("oblikoskladenjska oznaka")){
|
||||||
|
msd = wordToString(ngramCandidate, otherKey);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
|
||||||
|
|
||||||
// UPDATE TAXONOMY HERE!!!
|
// UPDATE TAXONOMY HERE!!!
|
||||||
stats.updateTaxonomyResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()), ngramCandidate.get(0).getTaxonomy());
|
stats.updateTaxonomyResults(multipleKeys, ngramCandidate.get(0).getTaxonomy());
|
||||||
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -129,7 +143,9 @@ public class Ngrams {
|
||||||
|
|
||||||
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
|
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
|
||||||
// TODO: locila?
|
// TODO: locila?
|
||||||
stats.updateTaxonomyResults(word.substring(i, i + stats.getFilter().getStringLength()), taxonomy);
|
|
||||||
|
MultipleHMKeys multipleKeys = new MultipleHMKeys(word.substring(i, i + stats.getFilter().getStringLength()));
|
||||||
|
stats.updateTaxonomyResults(multipleKeys, taxonomy);
|
||||||
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,8 @@ public class Filter {
|
||||||
TAXONOMY,
|
TAXONOMY,
|
||||||
MSD,
|
MSD,
|
||||||
HAS_MSD,
|
HAS_MSD,
|
||||||
SOLAR_FILTERS
|
SOLAR_FILTERS,
|
||||||
|
MULTIPLE_KEYS
|
||||||
}
|
}
|
||||||
|
|
||||||
public Filter() {
|
public Filter() {
|
||||||
|
@ -141,4 +142,23 @@ public class Filter {
|
||||||
public HashMap<String, HashSet<String>> getSolarFilters() {
|
public HashMap<String, HashSet<String>> getSolarFilters() {
|
||||||
return (HashMap<String, HashSet<String>>) filter.get(SOLAR_FILTERS);
|
return (HashMap<String, HashSet<String>>) filter.get(SOLAR_FILTERS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setMultipleKeys(ArrayList<String> keys) {
|
||||||
|
ArrayList<CalculateFor> newKeys = new ArrayList<>();
|
||||||
|
if (keys != null) {
|
||||||
|
for (String key : keys) {
|
||||||
|
newKeys.add(CalculateFor.factory(key));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
filter.put(MULTIPLE_KEYS, newKeys);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<CalculateFor> getMultipleKeys() {
|
||||||
|
if (filter.containsKey(MULTIPLE_KEYS) && filter.get(MULTIPLE_KEYS) != null) {
|
||||||
|
return (ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS);
|
||||||
|
} else {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,48 +2,54 @@ package data;
|
||||||
/*
|
/*
|
||||||
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
|
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
|
||||||
*/
|
*/
|
||||||
final class MultipleHMKeys {
|
public final class MultipleHMKeys {
|
||||||
private final String key1, key2, key3;
|
private final String key, lemma, wordType, msd;
|
||||||
|
|
||||||
public MultipleHMKeys(String key1) {
|
public MultipleHMKeys(String key) {
|
||||||
this.key1 = key1;
|
this.key = key;
|
||||||
this.key2 = null;
|
this.lemma = "";
|
||||||
this.key3 = null;
|
this.wordType = "";
|
||||||
|
this.msd = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
public MultipleHMKeys(String key1, String key2) {
|
public MultipleHMKeys(String key, String lemma, String wordType, String msd) {
|
||||||
this.key1 = key1;
|
this.key = key;
|
||||||
this.key2 = key2;
|
this.lemma = lemma;
|
||||||
this.key3 = null;
|
this.wordType = wordType;
|
||||||
|
this.msd = msd;
|
||||||
}
|
}
|
||||||
|
|
||||||
public MultipleHMKeys(String key1, String key2, String key3) {
|
public String getKey() {
|
||||||
this.key1 = key1;
|
return key;
|
||||||
this.key2 = key2;
|
|
||||||
this.key3 = key3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getKey1() {
|
public String getLemma() {
|
||||||
return key1;
|
return lemma;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getKey2() {
|
public String getWordType() {
|
||||||
return key2;
|
return wordType;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getKey3() {
|
public String getMsd() {
|
||||||
return key3;
|
return msd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return key1.hashCode() ^ key2.hashCode() ^ key3.hashCode();
|
// if(key2 == null){
|
||||||
|
// return key1.hashCode();
|
||||||
|
// } else if (key3 == null){
|
||||||
|
// return key1.hashCode() ^ key2.hashCode();
|
||||||
|
// }
|
||||||
|
return key.hashCode() ^ lemma.hashCode() ^ wordType.hashCode() ^ msd.hashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key1.equals(key1)
|
return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key.equals(key)
|
||||||
&& ((MultipleHMKeys) obj).key2.equals(key2)
|
&& ((MultipleHMKeys) obj).lemma.equals(lemma)
|
||||||
&& ((MultipleHMKeys) obj).key3.equals(key3);
|
&& ((MultipleHMKeys) obj).wordType.equals(wordType)
|
||||||
|
&& ((MultipleHMKeys) obj).msd.equals(msd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -222,7 +222,7 @@ public class Statistics {
|
||||||
// return sortedM;
|
// return sortedM;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
private Map<String, Long> getSortedResult(Map<String, AtomicLong> map, int limit) {
|
private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {
|
||||||
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,10 +32,10 @@ public class StatisticsNew {
|
||||||
|
|
||||||
private String resultTitle;
|
private String resultTitle;
|
||||||
private Map<String, AtomicLong> result;
|
private Map<String, AtomicLong> result;
|
||||||
private Map<String, Map<String, AtomicLong>> taxonomyResult;
|
private Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult;
|
||||||
private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
|
private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
|
||||||
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedSuffix;
|
private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedSuffix;
|
||||||
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedPrefix;
|
private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedPrefix;
|
||||||
private boolean useDB;
|
private boolean useDB;
|
||||||
private RDB db;
|
private RDB db;
|
||||||
private boolean analysisProducedResults;
|
private boolean analysisProducedResults;
|
||||||
|
@ -194,7 +194,7 @@ public class StatisticsNew {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean saveResultToDisk(int... limit) throws UnsupportedEncodingException {
|
public boolean saveResultToDisk(int... limit) throws UnsupportedEncodingException {
|
||||||
Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
|
Set<Pair<String, Map<MultipleHMKeys, Long>>> stats = new HashSet<>();
|
||||||
|
|
||||||
if (useDB) {
|
if (useDB) {
|
||||||
result = db.getDump();
|
result = db.getDump();
|
||||||
|
@ -223,13 +223,14 @@ public class StatisticsNew {
|
||||||
}
|
}
|
||||||
Map<WordLevelType, Map<String, Map<String, Long>>> results = new HashMap<>();
|
Map<WordLevelType, Map<String, Map<String, Long>>> results = new HashMap<>();
|
||||||
|
|
||||||
if (!isEmpty(resultNestedSuffix)) {
|
// UNCOMMENT!!!!!!
|
||||||
results.put(WordLevelType.SUFFIX, sortNestedMap(resultNestedSuffix, Util.getValidInt(limit)));
|
// if (!isEmpty(resultNestedSuffix)) {
|
||||||
}
|
// results.put(WordLevelType.SUFFIX, sortNestedMap(resultNestedSuffix, Util.getValidInt(limit)));
|
||||||
|
// }
|
||||||
if (!isEmpty(resultNestedPrefix)) {
|
//
|
||||||
results.put(WordLevelType.PREFIX, sortNestedMap(resultNestedPrefix, Util.getValidInt(limit)));
|
// if (!isEmpty(resultNestedPrefix)) {
|
||||||
}
|
// results.put(WordLevelType.PREFIX, sortNestedMap(resultNestedPrefix, Util.getValidInt(limit)));
|
||||||
|
// }
|
||||||
|
|
||||||
// if no results and nothing to save, return false
|
// if no results and nothing to save, return false
|
||||||
if (!(results.size() > 0)) {
|
if (!(results.size() > 0)) {
|
||||||
|
@ -266,8 +267,8 @@ public class StatisticsNew {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Map<String, Map<String, Long>> sortNestedMap(Map<String, ConcurrentHashMap<String, AtomicLong>> nestedMap, int limit) {
|
private Map<String, Map<MultipleHMKeys, Long>> sortNestedMap(Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> nestedMap, int limit) {
|
||||||
Map<String, Map<String, Long>> sorted = new HashMap<>();
|
Map<String, Map<MultipleHMKeys, Long>> sorted = new HashMap<>();
|
||||||
|
|
||||||
for (String s : nestedMap.keySet()) {
|
for (String s : nestedMap.keySet()) {
|
||||||
sorted.put(s, getSortedResult(nestedMap.get(s), Util.getValidInt(limit)));
|
sorted.put(s, getSortedResult(nestedMap.get(s), Util.getValidInt(limit)));
|
||||||
|
@ -277,11 +278,11 @@ public class StatisticsNew {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Map<String, Long> getSortedResult(Map<String, AtomicLong> map, int limit) {
|
private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {
|
||||||
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void updateTaxonomyResults(String o, List<String> taxonomy) {
|
public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {
|
||||||
for (String key : taxonomyResult.keySet()) {
|
for (String key : taxonomyResult.keySet()) {
|
||||||
// first word should have the same taxonomy as others
|
// first word should have the same taxonomy as others
|
||||||
if (taxonomy.contains(key) || key.equals("Total")) {
|
if (taxonomy.contains(key) || key.equals("Total")) {
|
||||||
|
@ -335,9 +336,11 @@ public class StatisticsNew {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void updateResultsNestedSuffix(String key, String stringValue) {
|
public void updateResultsNestedSuffix(String key, String stringValue) {
|
||||||
|
MultipleHMKeys mkStringValue = new MultipleHMKeys(stringValue);
|
||||||
|
|
||||||
if (resultNestedSuffix.containsKey(key)) {
|
if (resultNestedSuffix.containsKey(key)) {
|
||||||
// if not in map
|
// if not in map
|
||||||
AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
|
AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));
|
||||||
|
|
||||||
// else
|
// else
|
||||||
if (r != null) {
|
if (r != null) {
|
||||||
|
@ -345,7 +348,7 @@ public class StatisticsNew {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
resultNestedSuffix.putIfAbsent(key, new ConcurrentHashMap<>());
|
resultNestedSuffix.putIfAbsent(key, new ConcurrentHashMap<>());
|
||||||
AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
|
AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));
|
||||||
|
|
||||||
if (r != null) {
|
if (r != null) {
|
||||||
resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
|
resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
|
||||||
|
@ -354,9 +357,11 @@ public class StatisticsNew {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void updateResultsNestedPrefix(String key, String stringValue) {
|
public void updateResultsNestedPrefix(String key, String stringValue) {
|
||||||
|
MultipleHMKeys mkStringValue = new MultipleHMKeys(stringValue);
|
||||||
|
|
||||||
if (resultNestedPrefix.containsKey(key)) {
|
if (resultNestedPrefix.containsKey(key)) {
|
||||||
// if not in map
|
// if not in map
|
||||||
AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
|
AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));
|
||||||
|
|
||||||
// else
|
// else
|
||||||
if (r != null) {
|
if (r != null) {
|
||||||
|
@ -364,7 +369,7 @@ public class StatisticsNew {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
resultNestedPrefix.putIfAbsent(key, new ConcurrentHashMap<>());
|
resultNestedPrefix.putIfAbsent(key, new ConcurrentHashMap<>());
|
||||||
AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
|
AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));
|
||||||
|
|
||||||
if (r != null) {
|
if (r != null) {
|
||||||
resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
|
resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
|
||||||
|
|
|
@ -82,6 +82,7 @@ public class OneWordAnalysisTab {
|
||||||
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||||
private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
||||||
private static final ObservableList<String> alsoVisualizeItemsDifferential = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
private static final ObservableList<String> alsoVisualizeItemsDifferential = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||||
|
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
||||||
|
|
||||||
// TODO: pass observables for taxonomy based on header scan
|
// TODO: pass observables for taxonomy based on header scan
|
||||||
// after header scan
|
// after header scan
|
||||||
|
@ -95,6 +96,37 @@ public class OneWordAnalysisTab {
|
||||||
// calculateForCB
|
// calculateForCB
|
||||||
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
calculateFor = CalculateFor.factory(newValue);
|
calculateFor = CalculateFor.factory(newValue);
|
||||||
|
if(newValue.equals("lema")){
|
||||||
|
alsoVisualizeCCB.getItems().removeAll();
|
||||||
|
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma);
|
||||||
|
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||||
|
alsoVisualize = new ArrayList<>();
|
||||||
|
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
|
||||||
|
alsoVisualize.addAll(checkedItems);
|
||||||
|
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
||||||
|
});
|
||||||
|
alsoVisualizeCCB.getCheckModel().clearChecks();
|
||||||
|
} else if(newValue.equals("različnica")){
|
||||||
|
alsoVisualizeCCB.getItems().removeAll();
|
||||||
|
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsDifferential);
|
||||||
|
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||||
|
alsoVisualize = new ArrayList<>();
|
||||||
|
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
|
||||||
|
alsoVisualize.addAll(checkedItems);
|
||||||
|
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
||||||
|
});
|
||||||
|
alsoVisualizeCCB.getCheckModel().clearChecks();
|
||||||
|
} else {
|
||||||
|
alsoVisualizeCCB.getItems().removeAll();
|
||||||
|
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
|
||||||
|
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||||
|
alsoVisualize = new ArrayList<>();
|
||||||
|
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
|
||||||
|
alsoVisualize.addAll(checkedItems);
|
||||||
|
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
||||||
|
});
|
||||||
|
alsoVisualizeCCB.getCheckModel().clearChecks();
|
||||||
|
}
|
||||||
logger.info("calculateForCB:", calculateFor.toString());
|
logger.info("calculateForCB:", calculateFor.toString());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -294,6 +326,7 @@ public class OneWordAnalysisTab {
|
||||||
filter.setIsCvv(false);
|
filter.setIsCvv(false);
|
||||||
filter.setSolarFilters(solarFiltersMap);
|
filter.setSolarFilters(solarFiltersMap);
|
||||||
filter.setStringLength(1);
|
filter.setStringLength(1);
|
||||||
|
filter.setMultipleKeys(alsoVisualize);
|
||||||
|
|
||||||
String message = Validation.validateForStringLevel(filter);
|
String message = Validation.validateForStringLevel(filter);
|
||||||
if (message == null) {
|
if (message == null) {
|
||||||
|
|
|
@ -10,6 +10,7 @@ import java.util.concurrent.ConcurrentMap;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import data.Filter;
|
import data.Filter;
|
||||||
|
import data.MultipleHMKeys;
|
||||||
import org.apache.commons.csv.CSVFormat;
|
import org.apache.commons.csv.CSVFormat;
|
||||||
import org.apache.commons.csv.CSVPrinter;
|
import org.apache.commons.csv.CSVPrinter;
|
||||||
import org.apache.commons.lang3.tuple.Pair;
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
@ -20,22 +21,22 @@ import data.Enums.WordLevelType;
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public class Export {
|
public class Export {
|
||||||
public static void SetToJSON(Set<Pair<String, Map<String, Long>>> set) {
|
public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) {
|
||||||
JSONArray wrapper = new JSONArray();
|
JSONArray wrapper = new JSONArray();
|
||||||
|
|
||||||
for (Pair<String, Map<String, Long>> p : set) {
|
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||||
JSONArray data_wrapper = new JSONArray();
|
JSONArray data_wrapper = new JSONArray();
|
||||||
JSONObject metric = new JSONObject();
|
JSONObject metric = new JSONObject();
|
||||||
|
|
||||||
String title = p.getLeft();
|
String title = p.getLeft();
|
||||||
Map<String, Long> map = p.getRight();
|
Map<MultipleHMKeys, Long> map = p.getRight();
|
||||||
|
|
||||||
if (map.isEmpty())
|
if (map.isEmpty())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
long total = Util.mapSumFrequencies(map);
|
long total = Util.mapSumFrequencies(map);
|
||||||
|
|
||||||
for (Map.Entry<String, Long> e : map.entrySet()) {
|
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
||||||
JSONObject data_entry = new JSONObject();
|
JSONObject data_entry = new JSONObject();
|
||||||
data_entry.put("word", e.getKey());
|
data_entry.put("word", e.getKey());
|
||||||
data_entry.put("frequency", e.getValue());
|
data_entry.put("frequency", e.getValue());
|
||||||
|
@ -56,8 +57,8 @@ public class Export {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
||||||
Map<String, Map<String, AtomicLong>> taxonomyResults) {
|
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults) {
|
||||||
//Delimiter used in CSV file
|
//Delimiter used in CSV file
|
||||||
String NEW_LINE_SEPARATOR = "\n";
|
String NEW_LINE_SEPARATOR = "\n";
|
||||||
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
|
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
|
||||||
|
@ -65,8 +66,8 @@ public class Export {
|
||||||
|
|
||||||
//Count frequencies
|
//Count frequencies
|
||||||
long num_frequencies = 0;
|
long num_frequencies = 0;
|
||||||
for (Pair<String, Map<String, Long>> p : set) {
|
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||||
Map<String, Long> map = p.getRight();
|
Map<MultipleHMKeys, Long> map = p.getRight();
|
||||||
if (map.isEmpty())
|
if (map.isEmpty())
|
||||||
continue;
|
continue;
|
||||||
num_frequencies = Util.mapSumFrequencies(map);
|
num_frequencies = Util.mapSumFrequencies(map);
|
||||||
|
@ -88,21 +89,48 @@ public class Export {
|
||||||
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
||||||
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
||||||
FILE_HEADER_AL.add("Različnica");
|
FILE_HEADER_AL.add("Različnica");
|
||||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
|
||||||
FILE_HEADER_AL.add("Delež glede na vse različnice");
|
|
||||||
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
||||||
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
|
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
|
||||||
FILE_HEADER_AL.add("Lema");
|
FILE_HEADER_AL.add("Lema");
|
||||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
|
||||||
FILE_HEADER_AL.add("Delež glede na vse leme");
|
|
||||||
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
||||||
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
|
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
|
||||||
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
||||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
|
||||||
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
|
|
||||||
} else {
|
} else {
|
||||||
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
||||||
FILE_HEADER_AL.add("Lema");
|
FILE_HEADER_AL.add("Lema");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) {
|
||||||
|
for (MultipleHMKeys key : value.keySet()){
|
||||||
|
if(!key.getLemma().equals("")){
|
||||||
|
FILE_HEADER_AL.add("Lema");
|
||||||
|
}
|
||||||
|
if(!key.getWordType().equals("")){
|
||||||
|
FILE_HEADER_AL.add("Besedna vrsta");
|
||||||
|
}
|
||||||
|
if(!key.getMsd().equals("")){
|
||||||
|
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
||||||
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||||
|
FILE_HEADER_AL.add("Delež glede na vse različnice");
|
||||||
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
||||||
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||||
|
FILE_HEADER_AL.add("Delež glede na vse leme");
|
||||||
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
||||||
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||||
|
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
|
||||||
|
} else {
|
||||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||||
FILE_HEADER_AL.add("Delež glede na vse leme");
|
FILE_HEADER_AL.add("Delež glede na vse leme");
|
||||||
}
|
}
|
||||||
|
@ -122,14 +150,14 @@ public class Export {
|
||||||
|
|
||||||
String fileName = "";
|
String fileName = "";
|
||||||
|
|
||||||
for (Pair<String, Map<String, Long>> p : set) {
|
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||||
String title = p.getLeft();
|
String title = p.getLeft();
|
||||||
fileName = title.replace(": ", "-");
|
fileName = title.replace(": ", "-");
|
||||||
fileName = fileName.replace(" ", "_").concat(".csv");
|
fileName = fileName.replace(" ", "_").concat(".csv");
|
||||||
|
|
||||||
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
||||||
|
|
||||||
Map<String, Long> map = p.getRight();
|
Map<MultipleHMKeys, Long> map = p.getRight();
|
||||||
|
|
||||||
if (map.isEmpty())
|
if (map.isEmpty())
|
||||||
continue;
|
continue;
|
||||||
|
@ -155,9 +183,18 @@ public class Export {
|
||||||
//Create CSV file header
|
//Create CSV file header
|
||||||
csvFilePrinter.printRecord(FILE_HEADER);
|
csvFilePrinter.printRecord(FILE_HEADER);
|
||||||
|
|
||||||
for (Map.Entry<String, Long> e : map.entrySet()) {
|
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
||||||
List dataEntry = new ArrayList<>();
|
List dataEntry = new ArrayList<>();
|
||||||
dataEntry.add(e.getKey());
|
dataEntry.add(e.getKey().getKey());
|
||||||
|
if(!e.getKey().getLemma().equals("")){
|
||||||
|
dataEntry.add(e.getKey().getLemma());
|
||||||
|
}
|
||||||
|
if(!e.getKey().getWordType().equals("")){
|
||||||
|
dataEntry.add(e.getKey().getWordType());
|
||||||
|
}
|
||||||
|
if(!e.getKey().getMsd().equals("")){
|
||||||
|
dataEntry.add(e.getKey().getMsd());
|
||||||
|
}
|
||||||
dataEntry.add(e.getValue().toString());
|
dataEntry.add(e.getValue().toString());
|
||||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
||||||
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
|
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
|
||||||
|
|
|
@ -9,6 +9,7 @@ import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import data.MultipleHMKeys;
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
|
|
||||||
|
@ -85,11 +86,11 @@ public class Util {
|
||||||
* Generic map converter -> since AtomicLongs aren't as comparable.
|
* Generic map converter -> since AtomicLongs aren't as comparable.
|
||||||
* Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
|
* Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
|
||||||
*/
|
*/
|
||||||
public static <K, V> Map<String, Long> atomicInt2StringAndInt(Map<K, V> map) {
|
public static <K, V> Map<MultipleHMKeys, Long> atomicInt2StringAndInt(Map<K, V> map) {
|
||||||
Map m = new HashMap<String, Long>();
|
Map m = new HashMap<MultipleHMKeys, Long>();
|
||||||
|
|
||||||
for (Map.Entry<K, V> e : map.entrySet()) {
|
for (Map.Entry<K, V> e : map.entrySet()) {
|
||||||
m.put(e.getKey().toString(), ((AtomicLong) e.getValue()).longValue());
|
m.put(e.getKey(), ((AtomicLong) e.getValue()).longValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
return m;
|
return m;
|
||||||
|
@ -148,7 +149,7 @@ public class Util {
|
||||||
System.out.println();
|
System.out.println();
|
||||||
}
|
}
|
||||||
|
|
||||||
static long mapSumFrequencies(Map<String, Long> map) {
|
static long mapSumFrequencies(Map<MultipleHMKeys, Long> map) {
|
||||||
long sum = 0;
|
long sum = 0;
|
||||||
|
|
||||||
for (long value : map.values()) {
|
for (long value : map.values()) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user