Added functional additional combinational filters for words

This commit is contained in:
2018-07-16 10:14:21 +02:00
parent e2ce656fc5
commit c073e12f55
8 changed files with 192 additions and 74 deletions

View File

@@ -6,14 +6,11 @@ import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import data.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import data.CalculateFor;
import data.Sentence;
import data.StatisticsNew;
import data.Word;
import gui.ValidationUtil;
public class Ngrams {
@@ -45,9 +42,26 @@ public class Ngrams {
continue;
}
// generate proper MultipleHMKeys depending on filter data
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor());
String lemma = "";
String wordType = "";
String msd = "";
for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){
if(otherKey.toString().equals("lema")){
lemma = wordToString(ngramCandidate, otherKey);
} else if(otherKey.toString().equals("besedna vrsta")){
wordType = wordToString(ngramCandidate, otherKey).substring(0, 1);
} else if(otherKey.toString().equals("oblikoskladenjska oznaka")){
msd = wordToString(ngramCandidate, otherKey);
}
}
MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
// UPDATE TAXONOMY HERE!!!
stats.updateTaxonomyResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()), ngramCandidate.get(0).getTaxonomy());
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
stats.updateTaxonomyResults(multipleKeys, ngramCandidate.get(0).getTaxonomy());
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
}
}
}
@@ -129,7 +143,9 @@ public class Ngrams {
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
// TODO: locila?
stats.updateTaxonomyResults(word.substring(i, i + stats.getFilter().getStringLength()), taxonomy);
MultipleHMKeys multipleKeys = new MultipleHMKeys(word.substring(i, i + stats.getFilter().getStringLength()));
stats.updateTaxonomyResults(multipleKeys, taxonomy);
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));