Added some performance measures
This commit is contained in:
parent
179f09c4bd
commit
9b5fa4616b
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,5 +1,6 @@
|
|||
# Created by .ignore support plugin (hsz.mobi)
|
||||
### Maven template
|
||||
src/main/resources/META-INF/
|
||||
target/
|
||||
corpus_analyzer_jar/
|
||||
pom.xml.tag
|
||||
|
|
|
@ -5,6 +5,7 @@ import static data.Enums.solar.SolarFilters.*;
|
|||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
|
@ -261,7 +262,7 @@ public class XML_processing {
|
|||
|
||||
if (c3Content.equals(".") && includeThisBlock) {
|
||||
// add sentence to corpus
|
||||
corpus.add(new Sentence(stavek));
|
||||
corpus.add(new Sentence(stavek, null));
|
||||
// and start a new one
|
||||
stavek = new ArrayList<>();
|
||||
|
||||
|
@ -293,7 +294,7 @@ public class XML_processing {
|
|||
|
||||
// "word" node value
|
||||
if (in_word) {
|
||||
stavek.add(new Word(characters.getData(), lemma, msd, null));
|
||||
stavek.add(new Word(characters.getData(), lemma, msd));
|
||||
in_word = false;
|
||||
} else if(inPunctuation){
|
||||
String punctuation = ",";
|
||||
|
@ -543,7 +544,7 @@ public class XML_processing {
|
|||
// "word" node value
|
||||
if (inWord) {
|
||||
String word = characters.getData();
|
||||
sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
|
||||
sentence.add(new Word(word, lemma, msd));
|
||||
inWord = false;
|
||||
}
|
||||
if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
||||
|
@ -588,7 +589,7 @@ public class XML_processing {
|
|||
sentence = runFilters(sentence, stats.getFilter());
|
||||
|
||||
if (!ValidationUtil.isEmpty(sentence)) {
|
||||
corpus.add(new Sentence(sentence));
|
||||
corpus.add(new Sentence(sentence, currentFiletaxonomyLong));
|
||||
}
|
||||
|
||||
// and start a new one
|
||||
|
@ -655,6 +656,7 @@ public class XML_processing {
|
|||
boolean inPunctuation = false;
|
||||
boolean inOrthDiv = false;
|
||||
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
|
||||
boolean inSeparatedWord = false;
|
||||
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
||||
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
||||
String lemma = "";
|
||||
|
@ -662,7 +664,10 @@ public class XML_processing {
|
|||
|
||||
List<Word> sentence = new ArrayList<>();
|
||||
List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
|
||||
Map<String, List<Word>> GOSCorpusHM = new ConcurrentHashMap<>();
|
||||
String GOSCorpusHMKey = "";
|
||||
String sentenceDelimiter = "seg";
|
||||
int wordIndex = 0;
|
||||
|
||||
String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm
|
||||
|
||||
|
@ -674,6 +679,8 @@ public class XML_processing {
|
|||
XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
eventReader = factory.createXMLEventReader(new FileInputStream(path));
|
||||
|
||||
// created hashmap to combine words with normalized words
|
||||
|
||||
while (eventReader.hasNext()) {
|
||||
XMLEvent event = eventReader.nextEvent();
|
||||
// System.out.print(String.format("%s", event.toString().replaceAll("\\['http://www.tei-c.org/ns/1.0'\\]::", "")));
|
||||
|
@ -711,6 +718,8 @@ public class XML_processing {
|
|||
// msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
|
||||
// lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
|
||||
// }
|
||||
} else if (atts.containsKey("type") && atts.get("type").equals("separated")) {
|
||||
inSeparatedWord = true;
|
||||
}
|
||||
|
||||
// }
|
||||
|
@ -730,38 +739,91 @@ public class XML_processing {
|
|||
}
|
||||
} else if (qName.equalsIgnoreCase("div")) {
|
||||
gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
|
||||
} else if (qName.equalsIgnoreCase("seg")) {
|
||||
HashMap<String, String> atts = extractAttributes(startElement);
|
||||
|
||||
if (atts.keySet().contains("id")) {
|
||||
if (inOrthDiv) {
|
||||
GOSCorpusHMKey = atts.get("id") + ".norm";
|
||||
} else {
|
||||
GOSCorpusHMKey = atts.get("id");
|
||||
}
|
||||
} else {
|
||||
System.out.println("No attribute \"id\"");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case XMLStreamConstants.CHARACTERS:
|
||||
// "word" node value
|
||||
if (inWord) {
|
||||
// if (GOSCorpusHMKey.equals("gos.028-0108.norm") && wordIndex > 8){
|
||||
// System.out.println(wordIndex);
|
||||
// }
|
||||
// if algorithm is in orthodox part add new word to sentence
|
||||
if (inOrthDiv){
|
||||
// GOSCorpusHM.put(GOSCorpusHMKey, sentence);
|
||||
String word = "";
|
||||
Characters characters = event.asCharacters();
|
||||
if (gosType.equals("norm") && msd != null) {
|
||||
sentence.add(new Word(characters.getData(), lemma, msd, currentFiletaxonomyLong));
|
||||
sentence.add(new Word(characters.getData(), "", ""));
|
||||
// if algorithm is in normalized part find orthodox word and add other info to it
|
||||
} else {
|
||||
sentence.add(new Word(characters.getData(), lemma, msd, currentFiletaxonomyLong));
|
||||
Characters characters = event.asCharacters();
|
||||
// System.out.println(wordIndex);
|
||||
// System.out.println(GOSCorpusHMKey + " " + lemma + " " + wordIndex);
|
||||
if (wordIndex < GOSCorpusHM.get(GOSCorpusHMKey).size()) {
|
||||
Word currentWord = GOSCorpusHM.get(GOSCorpusHMKey).get(wordIndex);
|
||||
currentWord.setLemma(lemma);
|
||||
currentWord.setMsd(msd);
|
||||
currentWord.setNormalizedWord(characters.getData());
|
||||
|
||||
wordIndex += 1;
|
||||
|
||||
// when a word is separated from one to many we have to create these duplicates
|
||||
if (inSeparatedWord){
|
||||
GOSCorpusHM.get(GOSCorpusHMKey).add(wordIndex, new Word(currentWord.getWord(), "", ""));
|
||||
}
|
||||
} //else {
|
||||
// System.out.println("Error");
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
inWord = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case XMLStreamConstants.END_ELEMENT:
|
||||
EndElement endElement = event.asEndElement();
|
||||
|
||||
// parser reached end of the current sentence
|
||||
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
|
||||
// add sentence to corpus if it passes filters
|
||||
boolean saveSentence = computeForOrth == inOrthDiv;
|
||||
if (endElement.getName().getLocalPart().equals("w")) {
|
||||
if (inWord){
|
||||
inWord = false;
|
||||
} else if(inSeparatedWord) {
|
||||
// when there are no separated words left we have to delete last aditional duplicate
|
||||
GOSCorpusHM.get(GOSCorpusHMKey).remove(wordIndex);
|
||||
|
||||
if (includeFile && saveSentence && !ValidationUtil.isEmpty(sentence)) {
|
||||
sentence = runFilters(sentence, stats.getFilter());
|
||||
corpus.add(new Sentence(sentence));
|
||||
inSeparatedWord = false;
|
||||
}
|
||||
}
|
||||
|
||||
// and start a new one
|
||||
sentence = new ArrayList<>();
|
||||
// parser reached end of the current sentence
|
||||
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
|
||||
if (inOrthDiv){
|
||||
// add sentence to corpus
|
||||
GOSCorpusHM.put(GOSCorpusHMKey, sentence);
|
||||
} else {
|
||||
|
||||
|
||||
sentence = GOSCorpusHM.remove(GOSCorpusHMKey);
|
||||
// add sentence to corpus if it passes filters
|
||||
if (includeFile && !ValidationUtil.isEmpty(sentence)) {
|
||||
sentence = runFilters(sentence, stats.getFilter());
|
||||
corpus.add(new Sentence(sentence, currentFiletaxonomyLong));
|
||||
}
|
||||
|
||||
wordIndex = 0;
|
||||
|
||||
|
||||
|
||||
/* Invoke Fork-Join when we reach maximum limit of
|
||||
* sentences (because we can't read everything to
|
||||
|
@ -773,6 +835,11 @@ public class XML_processing {
|
|||
// the data anymore
|
||||
corpus.clear();
|
||||
}
|
||||
}
|
||||
// start a new sentence
|
||||
sentence = new ArrayList<>();
|
||||
|
||||
|
||||
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
|
||||
// before proceeding to read this file, make sure that taxonomy filters are a match
|
||||
if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
|
||||
|
|
|
@ -122,9 +122,9 @@ public class InflectedJOSCount {
|
|||
static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
||||
for (Sentence s : corpus) {
|
||||
// disregard if wrong taxonomy
|
||||
if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
|
||||
continue;
|
||||
}
|
||||
// if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
|
||||
// continue;
|
||||
// }
|
||||
|
||||
for (Word word : s.getWords()) {
|
||||
// skip if current word is not inflected
|
||||
|
|
|
@ -3,9 +3,11 @@ package alg.ngram;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.sun.xml.internal.bind.v2.runtime.reflect.Lister;
|
||||
import data.*;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
|
@ -28,6 +30,9 @@ public class Ngrams {
|
|||
}
|
||||
|
||||
public static void generateNgramCandidates(List<Sentence> corpus, StatisticsNew stats) {
|
||||
// preprocess CalculateFor for this corpus and prepare data for MultipleHMKeys
|
||||
ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();
|
||||
|
||||
for (Sentence s : corpus) {
|
||||
// skip sentences shorter than specified ngram length
|
||||
if (s.getWords().size() < stats.getFilter().getNgramValue()) {
|
||||
|
@ -46,29 +51,62 @@ public class Ngrams {
|
|||
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor());
|
||||
|
||||
// if last letter is ',' erase it
|
||||
key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
// String key = "aaaaaaaaaaaaaaaaaaaaaaa";
|
||||
|
||||
String lemma = "";
|
||||
String wordType = "";
|
||||
String msd = "";
|
||||
for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){
|
||||
if(otherKey.toString().equals("lema")){
|
||||
// lemma = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
|
||||
lemma = wordToString(ngramCandidate, otherKey);
|
||||
} else if(otherKey.toString().equals("besedna vrsta")){
|
||||
wordType = wordToString(ngramCandidate, otherKey).substring(0, 1);
|
||||
} else if(otherKey.toString().equals("oblikoskladenjska oznaka")){
|
||||
msd = wordToString(ngramCandidate, otherKey);
|
||||
}
|
||||
// if (key.equals("")){
|
||||
// String test = key;
|
||||
// }
|
||||
|
||||
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
|
||||
MultipleHMKeys multipleKeys;
|
||||
|
||||
// create MultipleHMKeys for different amount of other keys
|
||||
switch (otherKeys.size()) {
|
||||
case 0:
|
||||
multipleKeys = new MultipleHMKeys1(key);
|
||||
break;
|
||||
case 1:
|
||||
multipleKeys = new MultipleHMKeys2(key, wordToString(ngramCandidate, otherKeys.get(0)));
|
||||
break;
|
||||
case 2:
|
||||
multipleKeys = new MultipleHMKeys3(key, wordToString(ngramCandidate, otherKeys.get(0)),
|
||||
wordToString(ngramCandidate, otherKeys.get(1)));
|
||||
break;
|
||||
case 3:
|
||||
multipleKeys = new MultipleHMKeys4(key, wordToString(ngramCandidate, otherKeys.get(0)),
|
||||
wordToString(ngramCandidate, otherKeys.get(1)),
|
||||
wordToString(ngramCandidate, otherKeys.get(2)));
|
||||
break;
|
||||
case 4:
|
||||
multipleKeys = new MultipleHMKeys5(key, wordToString(ngramCandidate, otherKeys.get(0)),
|
||||
wordToString(ngramCandidate, otherKeys.get(1)),
|
||||
wordToString(ngramCandidate, otherKeys.get(2)),
|
||||
wordToString(ngramCandidate, otherKeys.get(3)));
|
||||
break;
|
||||
default:
|
||||
multipleKeys = null;
|
||||
}
|
||||
|
||||
|
||||
// String lemma = "";
|
||||
// String wordType = "";
|
||||
// String msd = "";
|
||||
// for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){
|
||||
// if(otherKey.toString().equals("lema")){
|
||||
// lemma = wordToString(ngramCandidate, otherKey);
|
||||
// } else if(otherKey.toString().equals("besedna vrsta")){
|
||||
// wordType = wordToString(ngramCandidate, otherKey).substring(0, 1);
|
||||
// } else if(otherKey.toString().equals("oblikoskladenjska oznaka")){
|
||||
// msd = wordToString(ngramCandidate, otherKey);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
|
||||
|
||||
|
||||
MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
|
||||
|
||||
// UPDATE TAXONOMY HERE!!!
|
||||
stats.updateTaxonomyResults(multipleKeys, ngramCandidate.get(0).getTaxonomy());
|
||||
stats.updateTaxonomyResults(multipleKeys, s.getTaxonomy());
|
||||
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
||||
}
|
||||
}
|
||||
|
@ -102,26 +140,31 @@ public class Ngrams {
|
|||
.stream()
|
||||
.map(Word::getLemma)
|
||||
.collect(Collectors.toList()));
|
||||
break;
|
||||
return StringUtils.join(candidate, " ");
|
||||
case WORD:
|
||||
candidate.addAll(ngramCandidate
|
||||
.stream()
|
||||
.map(Word::getWord)
|
||||
.collect(Collectors.toList()));
|
||||
break;
|
||||
return StringUtils.join(candidate, " ");
|
||||
case MORPHOSYNTACTIC_SPECS:
|
||||
case MORPHOSYNTACTIC_PROPERTY:
|
||||
candidate.addAll(ngramCandidate
|
||||
.stream()
|
||||
.map(Word::getMsd)
|
||||
.collect(Collectors.toList()));
|
||||
break;
|
||||
return StringUtils.join(candidate, " ");
|
||||
case WORD_TYPE:
|
||||
candidate.addAll(ngramCandidate
|
||||
.stream()
|
||||
.map(w -> Character.toString(w.getMsd().charAt(0)))
|
||||
.collect(Collectors.toList()));
|
||||
break;
|
||||
// candidate.addAll(ngramCandidate
|
||||
// .stream()
|
||||
// .map(w -> Character.toString(w.getMsd().charAt(0)))
|
||||
// .collect(Collectors.toList()));
|
||||
// .substring(0, 1)
|
||||
return StringUtils.join(candidate, " ");
|
||||
}
|
||||
|
||||
return StringUtils.join(candidate, " ");
|
||||
|
@ -136,7 +179,7 @@ public class Ngrams {
|
|||
private static void generateNgramLetterCandidates(List<Sentence> corpus, StatisticsNew stats) {
|
||||
for (Sentence s : corpus) {
|
||||
for (Word w : s.getWords()) {
|
||||
List<String> taxonomy = w.getTaxonomy();
|
||||
List<String> taxonomy = s.getTaxonomy();
|
||||
String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv());
|
||||
|
||||
// skip this iteration if:
|
||||
|
@ -152,7 +195,7 @@ public class Ngrams {
|
|||
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
|
||||
// TODO: locila?
|
||||
|
||||
MultipleHMKeys multipleKeys = new MultipleHMKeys(word.substring(i, i + stats.getFilter().getStringLength()));
|
||||
MultipleHMKeys multipleKeys = new MultipleHMKeys1(word.substring(i, i + stats.getFilter().getStringLength()));
|
||||
stats.updateTaxonomyResults(multipleKeys, taxonomy);
|
||||
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
||||
|
||||
|
@ -183,8 +226,7 @@ public class Ngrams {
|
|||
String punctuation = ",";
|
||||
return new Word(sentence.get(i).getWord() + punctuation,
|
||||
sentence.get(i).getLemma() + punctuation,
|
||||
sentence.get(i).getMsd() + punctuation,
|
||||
sentence.get(i).getTaxonomy());
|
||||
sentence.get(i).getMsd() + punctuation);
|
||||
}
|
||||
}
|
||||
return sentence.get(i);
|
||||
|
@ -204,6 +246,10 @@ public class Ngrams {
|
|||
for (Sentence s : corpus) {
|
||||
List<Word> sentence = s.getWords();
|
||||
|
||||
if (sentence == null){
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int i = 0; i <= sentence.size() - ngram; i++) { // 1gram
|
||||
for (int j = i + 1; j <= i + skip + 1; j++) { // 2gram
|
||||
if (ngram == 2 && j < sentence.size()) {
|
||||
|
@ -260,7 +306,7 @@ public class Ngrams {
|
|||
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) {
|
||||
String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
|
||||
key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
stats.updateTaxonomyResults(new MultipleHMKeys(key, "", "", ""),
|
||||
stats.updateTaxonomyResults(new MultipleHMKeys1(key),
|
||||
stats.getCorpus().getTaxonomy());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,79 +89,79 @@ class WordCount {
|
|||
}
|
||||
}
|
||||
|
||||
private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
|
||||
for (Sentence s : corpus) {
|
||||
if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
|
||||
List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
List<Word> filteredWords = new ArrayList<>();
|
||||
// private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
// if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
|
||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
// List<Word> filteredWords = new ArrayList<>();
|
||||
//
|
||||
// for (Word word : s.getWords()) {
|
||||
// if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
|
||||
// filteredWords.add(word);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
// sentence.addAll(filteredWords
|
||||
// .stream()
|
||||
// .map(Word::getLemma)
|
||||
// .collect(Collectors.toList()));
|
||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
||||
// sentence.addAll(filteredWords
|
||||
// .stream()
|
||||
// .map(Word::getWord)
|
||||
// .collect(Collectors.toList()));
|
||||
// }
|
||||
//
|
||||
// for (String word : sentence) {
|
||||
// Common.updateMap(stats.result, word);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
for (Word word : s.getWords()) {
|
||||
if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
|
||||
filteredWords.add(word);
|
||||
}
|
||||
}
|
||||
// private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
// if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
|
||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
//
|
||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
// sentence.addAll(s.getWords()
|
||||
// .stream()
|
||||
// .map(Word::getLemma)
|
||||
// .collect(Collectors.toList()));
|
||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
||||
// sentence.addAll(s.getWords()
|
||||
// .stream()
|
||||
// .map(Word::getWord)
|
||||
// .collect(Collectors.toList()));
|
||||
// }
|
||||
//
|
||||
// for (String word : sentence) {
|
||||
// Common.updateMap(stats.result, word);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
sentence.addAll(filteredWords
|
||||
.stream()
|
||||
.map(Word::getLemma)
|
||||
.collect(Collectors.toList()));
|
||||
} else if (stats.getCf() == CalculateFor.WORD) {
|
||||
sentence.addAll(filteredWords
|
||||
.stream()
|
||||
.map(Word::getWord)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
for (String word : sentence) {
|
||||
Common.updateMap(stats.result, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
|
||||
for (Sentence s : corpus) {
|
||||
if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
|
||||
List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
|
||||
if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
sentence.addAll(s.getWords()
|
||||
.stream()
|
||||
.map(Word::getLemma)
|
||||
.collect(Collectors.toList()));
|
||||
} else if (stats.getCf() == CalculateFor.WORD) {
|
||||
sentence.addAll(s.getWords()
|
||||
.stream()
|
||||
.map(Word::getWord)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
for (String word : sentence) {
|
||||
Common.updateMap(stats.result, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void calculateForAll(List<Sentence> corpus, Statistics stats) {
|
||||
boolean taxonomyIsSet = stats.isTaxonomySet();
|
||||
boolean JosTypeIsSet = stats.isJOSTypeSet();
|
||||
|
||||
// branching because even though the only difference is an if or two &&
|
||||
// O(if) = 1, the amount of ifs adds up and this saves some time
|
||||
if (taxonomyIsSet && JosTypeIsSet) {
|
||||
calculateForTaxonomyAndJosType(corpus, stats);
|
||||
} else if (taxonomyIsSet && !JosTypeIsSet) {
|
||||
calculateForTaxonomy(corpus, stats);
|
||||
} else if (!taxonomyIsSet && JosTypeIsSet) {
|
||||
calculateForJosType(corpus, stats);
|
||||
} else {
|
||||
if (stats.isVcc()) {
|
||||
calculateVCC(corpus, stats);
|
||||
} else {
|
||||
calculateNoFilter(corpus, stats);
|
||||
}
|
||||
}
|
||||
}
|
||||
// static void calculateForAll(List<Sentence> corpus, Statistics stats) {
|
||||
// boolean taxonomyIsSet = stats.isTaxonomySet();
|
||||
// boolean JosTypeIsSet = stats.isJOSTypeSet();
|
||||
//
|
||||
// // branching because even though the only difference is an if or two &&
|
||||
// // O(if) = 1, the amount of ifs adds up and this saves some time
|
||||
// if (taxonomyIsSet && JosTypeIsSet) {
|
||||
// calculateForTaxonomyAndJosType(corpus, stats);
|
||||
// } else if (taxonomyIsSet && !JosTypeIsSet) {
|
||||
// calculateForTaxonomy(corpus, stats);
|
||||
// } else if (!taxonomyIsSet && JosTypeIsSet) {
|
||||
// calculateForJosType(corpus, stats);
|
||||
// } else {
|
||||
// if (stats.isVcc()) {
|
||||
// calculateVCC(corpus, stats);
|
||||
// } else {
|
||||
// calculateNoFilter(corpus, stats);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
}
|
|
@ -5,49 +5,16 @@ import java.util.Objects;
|
|||
/*
|
||||
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
|
||||
*/
|
||||
public final class MultipleHMKeys {
|
||||
private final String key, lemma, wordType, msd;
|
||||
private MultipleHMKeys actual_obj;
|
||||
public MultipleHMKeys(String key) {
|
||||
this.key = key;
|
||||
this.lemma = "";
|
||||
this.wordType = "";
|
||||
this.msd = "";
|
||||
}
|
||||
|
||||
public MultipleHMKeys(String key, String lemma, String wordType, String msd) {
|
||||
this.key = key;
|
||||
this.lemma = lemma;
|
||||
this.wordType = wordType;
|
||||
this.msd = msd;
|
||||
}
|
||||
|
||||
public String getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
public String getLemma() {
|
||||
return lemma;
|
||||
}
|
||||
|
||||
public String getWordType() {
|
||||
return wordType;
|
||||
}
|
||||
|
||||
public String getMsd() {
|
||||
return msd;
|
||||
}
|
||||
public interface MultipleHMKeys {
|
||||
String getK1();
|
||||
String getK2();
|
||||
String getK3();
|
||||
String getK4();
|
||||
String getK5();
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(key, lemma, wordType, msd);
|
||||
}
|
||||
int hashCode();
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key.equals(key)
|
||||
&& ((MultipleHMKeys) obj).lemma.equals(lemma)
|
||||
&& ((MultipleHMKeys) obj).wordType.equals(wordType)
|
||||
&& ((MultipleHMKeys) obj).msd.equals(msd);
|
||||
}
|
||||
boolean equals(Object obj);
|
||||
}
|
||||
|
|
44
src/main/java/data/MultipleHMKeys1.java
Executable file
44
src/main/java/data/MultipleHMKeys1.java
Executable file
|
@ -0,0 +1,44 @@
|
|||
package data;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/*
|
||||
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
|
||||
*/
|
||||
public final class MultipleHMKeys1 implements MultipleHMKeys {
|
||||
private final String k1;
|
||||
|
||||
public MultipleHMKeys1(String k1) {
|
||||
this.k1 = k1;
|
||||
}
|
||||
|
||||
public String getK1() {
|
||||
return k1;
|
||||
}
|
||||
|
||||
public String getK2() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getK3() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getK4() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getK5() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return k1.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof MultipleHMKeys1) && ((MultipleHMKeys1) obj).k1.equals(k1);
|
||||
}
|
||||
}
|
49
src/main/java/data/MultipleHMKeys2.java
Executable file
49
src/main/java/data/MultipleHMKeys2.java
Executable file
|
@ -0,0 +1,49 @@
|
|||
package data;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/*
|
||||
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
|
||||
*/
|
||||
public final class MultipleHMKeys2 implements MultipleHMKeys {
|
||||
private final String k1, k2;
|
||||
|
||||
public MultipleHMKeys2(String k1, String k2) {
|
||||
this.k1 = k1;
|
||||
this.k2 = k2;
|
||||
}
|
||||
|
||||
public String getK1() {
|
||||
return k1;
|
||||
}
|
||||
|
||||
public String getK2() {
|
||||
return k2;
|
||||
}
|
||||
|
||||
public String getK3() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getK4() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getK5() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(k1, k2);
|
||||
// return key.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof MultipleHMKeys2) && ((MultipleHMKeys2) obj).k1.equals(k1)
|
||||
&& ((MultipleHMKeys2) obj).k2.equals(k2);
|
||||
|
||||
// return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key.equals(key);
|
||||
}
|
||||
}
|
48
src/main/java/data/MultipleHMKeys3.java
Executable file
48
src/main/java/data/MultipleHMKeys3.java
Executable file
|
@ -0,0 +1,48 @@
|
|||
package data;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/*
|
||||
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
|
||||
*/
|
||||
public final class MultipleHMKeys3 implements MultipleHMKeys {
|
||||
private final String k1, k2, k3;
|
||||
|
||||
public MultipleHMKeys3(String k1, String k2, String k3) {
|
||||
this.k1 = k1;
|
||||
this.k2 = k2;
|
||||
this.k3 = k3;
|
||||
}
|
||||
|
||||
public String getK1() {
|
||||
return k1;
|
||||
}
|
||||
|
||||
public String getK2() {
|
||||
return k2;
|
||||
}
|
||||
|
||||
public String getK3() {
|
||||
return k3;
|
||||
}
|
||||
|
||||
public String getK4() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getK5() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(k1, k2, k3);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof MultipleHMKeys3) && ((MultipleHMKeys3) obj).k1.equals(k1)
|
||||
&& ((MultipleHMKeys3) obj).k2.equals(k2)
|
||||
&& ((MultipleHMKeys3) obj).k3.equals(k3);
|
||||
}
|
||||
}
|
50
src/main/java/data/MultipleHMKeys4.java
Executable file
50
src/main/java/data/MultipleHMKeys4.java
Executable file
|
@ -0,0 +1,50 @@
|
|||
package data;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/*
|
||||
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
|
||||
*/
|
||||
public final class MultipleHMKeys4 implements MultipleHMKeys {
|
||||
private final String k1, k2, k3, k4;
|
||||
|
||||
public MultipleHMKeys4(String k1, String k2, String k3, String k4) {
|
||||
this.k1 = k1;
|
||||
this.k2 = k2;
|
||||
this.k3 = k3;
|
||||
this.k4 = k4;
|
||||
}
|
||||
|
||||
public String getK1() {
|
||||
return k1;
|
||||
}
|
||||
|
||||
public String getK2() {
|
||||
return k2;
|
||||
}
|
||||
|
||||
public String getK3() {
|
||||
return k3;
|
||||
}
|
||||
|
||||
public String getK4() {
|
||||
return k4;
|
||||
}
|
||||
|
||||
public String getK5() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(k1, k2, k3, k4);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof MultipleHMKeys4) && ((MultipleHMKeys4) obj).k1.equals(k1)
|
||||
&& ((MultipleHMKeys4) obj).k2.equals(k2)
|
||||
&& ((MultipleHMKeys4) obj).k3.equals(k3)
|
||||
&& ((MultipleHMKeys4) obj).k4.equals(k4);
|
||||
}
|
||||
}
|
52
src/main/java/data/MultipleHMKeys5.java
Executable file
52
src/main/java/data/MultipleHMKeys5.java
Executable file
|
@ -0,0 +1,52 @@
|
|||
package data;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/*
|
||||
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
|
||||
*/
|
||||
public final class MultipleHMKeys5 implements MultipleHMKeys {
|
||||
private final String k1, k2, k3, k4, k5;
|
||||
|
||||
public MultipleHMKeys5(String k1, String k2, String k3, String k4, String k5) {
|
||||
this.k1 = k1;
|
||||
this.k2 = k2;
|
||||
this.k3 = k3;
|
||||
this.k4 = k4;
|
||||
this.k5 = k5;
|
||||
}
|
||||
|
||||
public String getK1() {
|
||||
return k1;
|
||||
}
|
||||
|
||||
public String getK2() {
|
||||
return k2;
|
||||
}
|
||||
|
||||
public String getK3() {
|
||||
return k3;
|
||||
}
|
||||
|
||||
public String getK4() {
|
||||
return k4;
|
||||
}
|
||||
|
||||
public String getK5() {
|
||||
return k5;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(k1, k2, k3, k4, k5);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof MultipleHMKeys5) && ((MultipleHMKeys5) obj).k1.equals(k1)
|
||||
&& ((MultipleHMKeys5) obj).k2.equals(k2)
|
||||
&& ((MultipleHMKeys5) obj).k3.equals(k3)
|
||||
&& ((MultipleHMKeys5) obj).k4.equals(k4)
|
||||
&& ((MultipleHMKeys5) obj).k5.equals(k5);
|
||||
}
|
||||
}
|
|
@ -7,30 +7,30 @@ public class Sentence {
|
|||
|
||||
|
||||
private List<Word> words;
|
||||
private String taksonomija;
|
||||
private List<String> taxonomy;
|
||||
|
||||
// GOS
|
||||
private String type;
|
||||
private Map<String, String> properties;
|
||||
|
||||
public Sentence(List<Word> words, String taksonomija) {
|
||||
public Sentence(List<Word> words, List<String> taxonomy) {
|
||||
this.words = words;
|
||||
this.taksonomija = taksonomija;
|
||||
this.taxonomy = taxonomy;
|
||||
}
|
||||
|
||||
public Sentence(List<Word> words) {
|
||||
this.words = words;
|
||||
}
|
||||
// public Sentence(List<Word> words) {
|
||||
// this.words = words;
|
||||
// }
|
||||
|
||||
public Sentence(List<Word> words, String taksonomija, Map<String, String> properties) {
|
||||
public Sentence(List<Word> words, List<String> taxonomy, Map<String, String> properties) {
|
||||
this.words = words;
|
||||
this.taksonomija = taksonomija;
|
||||
this.taxonomy = taxonomy;
|
||||
this.properties = properties;
|
||||
}
|
||||
|
||||
public Sentence(List<Word> words, String taksonomija, String type) {
|
||||
public Sentence(List<Word> words, List<String> taxonomy, String type) {
|
||||
this.words = words;
|
||||
this.taksonomija = taksonomija;
|
||||
this.taxonomy = taxonomy;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
|
@ -38,8 +38,8 @@ public class Sentence {
|
|||
return words;
|
||||
}
|
||||
|
||||
public String getTaxonomy() {
|
||||
return taksonomija;
|
||||
public List<String> getTaxonomy() {
|
||||
return taxonomy;
|
||||
}
|
||||
|
||||
public List<Word> getSublist(int indexFrom, int indexTo) {
|
||||
|
|
|
@ -213,7 +213,7 @@ public class StatisticsNew {
|
|||
removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
|
||||
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
|
||||
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
|
||||
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult);
|
||||
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult, filter);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -376,7 +376,7 @@ public class StatisticsNew {
|
|||
}
|
||||
|
||||
public void updateResultsNestedSuffix(String key, String stringValue) {
|
||||
MultipleHMKeys mkStringValue = new MultipleHMKeys(stringValue);
|
||||
MultipleHMKeys mkStringValue = new MultipleHMKeys1(stringValue);
|
||||
|
||||
if (resultNestedSuffix.containsKey(key)) {
|
||||
// if not in map
|
||||
|
@ -397,7 +397,7 @@ public class StatisticsNew {
|
|||
}
|
||||
|
||||
public void updateResultsNestedPrefix(String key, String stringValue) {
|
||||
MultipleHMKeys mkStringValue = new MultipleHMKeys(stringValue);
|
||||
MultipleHMKeys mkStringValue = new MultipleHMKeys1(stringValue);
|
||||
|
||||
if (resultNestedPrefix.containsKey(key)) {
|
||||
// if not in map
|
||||
|
|
|
@ -16,8 +16,7 @@ public class Word implements Serializable {
|
|||
private String word;
|
||||
private String lemma;
|
||||
private String msd;
|
||||
// private String msd;
|
||||
private List<String> taxonomy;
|
||||
private String normalizedWord;
|
||||
private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));
|
||||
|
||||
/**
|
||||
|
@ -41,7 +40,8 @@ public class Word implements Serializable {
|
|||
//private char besedna_vrsta;
|
||||
public Word(String word, String lemma, String msd) {
|
||||
this.lemma = lemma;
|
||||
this.msd = normalizeMsd(msd);
|
||||
this.msd = msd; //normalizeMsd(msd);
|
||||
this.normalizedWord = "";
|
||||
|
||||
// veliko zacetnico ohranimo samo za lastna imena
|
||||
if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
|
||||
|
@ -53,12 +53,11 @@ public class Word implements Serializable {
|
|||
}
|
||||
}
|
||||
|
||||
//private char besedna_vrsta;
|
||||
public Word(String word, String lemma, String msd, List<String> taxonomy) {
|
||||
public Word(String word, String lemma, String msd, String normalizedWord) {
|
||||
this.lemma = lemma;
|
||||
// this.msd = normalizeMsd(msd);
|
||||
this.msd = msd;
|
||||
this.taxonomy = taxonomy;
|
||||
this.normalizedWord = normalizedWord;
|
||||
|
||||
// veliko zacetnico ohranimo samo za lastna imena
|
||||
if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
|
||||
|
@ -73,21 +72,21 @@ public class Word implements Serializable {
|
|||
public Word() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends a number of '-' to msds which are not properly sized.
|
||||
* E.g. nouns should have 5 attributes, yet the last one isn't always defined (Somei vs. Sometd)
|
||||
*
|
||||
* @param msdInput
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private String normalizeMsd(String msdInput) {
|
||||
if (ValidationUtil.isEmpty(msdInput)) {
|
||||
return "";
|
||||
} else {
|
||||
return StringUtils.rightPad(msdInput, Msd.getMsdLengthForType(msdInput), PAD_CHARACTER);
|
||||
}
|
||||
}
|
||||
// /**
|
||||
// * Appends a number of '-' to msds which are not properly sized.
|
||||
// * E.g. nouns should have 5 attributes, yet the last one isn't always defined (Somei vs. Sometd)
|
||||
// *
|
||||
// * @param msdInput
|
||||
// *
|
||||
// * @return
|
||||
// */
|
||||
// private String normalizeMsd(String msdInput) {
|
||||
// if (ValidationUtil.isEmpty(msdInput)) {
|
||||
// return "";
|
||||
// } else {
|
||||
// return StringUtils.rightPad(msdInput, Msd.getMsdLengthForType(msdInput), PAD_CHARACTER);
|
||||
// }
|
||||
// }
|
||||
|
||||
public Word(String word) {
|
||||
this.word = word;
|
||||
|
@ -119,10 +118,6 @@ public class Word implements Serializable {
|
|||
this.word = word;
|
||||
}
|
||||
|
||||
public List<String> getTaxonomy() {
|
||||
return taxonomy;
|
||||
}
|
||||
|
||||
public String getLemma() {
|
||||
return lemma;
|
||||
}
|
||||
|
@ -139,6 +134,14 @@ public class Word implements Serializable {
|
|||
this.msd = msd;
|
||||
}
|
||||
|
||||
public String getNormalizedWord() {
|
||||
return normalizedWord;
|
||||
}
|
||||
|
||||
public void setNormalizedWord(String normalizedWord) {
|
||||
this.normalizedWord = normalizedWord;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
|
@ -150,6 +153,8 @@ public class Word implements Serializable {
|
|||
.append("\n")
|
||||
.append("msd:\t")
|
||||
.append(getMsd())
|
||||
.append("normalized word:\t")
|
||||
.append(getNormalizedWord())
|
||||
.append("\n");
|
||||
|
||||
return sb.toString();
|
||||
|
|
|
@ -9,6 +9,7 @@ import java.util.concurrent.ConcurrentHashMap;
|
|||
import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import data.CalculateFor;
|
||||
import data.Filter;
|
||||
import data.MultipleHMKeys;
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
|
@ -59,7 +60,7 @@ public class Export {
|
|||
}
|
||||
|
||||
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
||||
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults) {
|
||||
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults, Filter filter) {
|
||||
//Delimiter used in CSV file
|
||||
String NEW_LINE_SEPARATOR = "\n";
|
||||
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
|
||||
|
@ -98,8 +99,10 @@ public class Export {
|
|||
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
|
||||
if (headerInfoBlock.get("Analiza").equals("Besede")){
|
||||
FILE_HEADER_AL.add("Lema");
|
||||
FILE_HEADER_AL.add("Lema male črke");
|
||||
} else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
|
||||
FILE_HEADER_AL.add("Leme");
|
||||
FILE_HEADER_AL.add("Leme male črke");
|
||||
}
|
||||
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
||||
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
|
||||
|
@ -111,25 +114,26 @@ public class Export {
|
|||
} else {
|
||||
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
||||
FILE_HEADER_AL.add("Lema");
|
||||
FILE_HEADER_AL.add("Lema male črke");
|
||||
}
|
||||
|
||||
|
||||
for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) {
|
||||
for (MultipleHMKeys key : value.keySet()){
|
||||
if(!key.getLemma().equals("")){
|
||||
// for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) {
|
||||
for (CalculateFor otherKey : filter.getMultipleKeys()){
|
||||
if(otherKey.equals(CalculateFor.LEMMA)){
|
||||
FILE_HEADER_AL.add("Lema");
|
||||
FILE_HEADER_AL.add("Lema male črke");
|
||||
}
|
||||
if(!key.getWordType().equals("")){
|
||||
if(otherKey.equals(CalculateFor.WORD_TYPE)){
|
||||
FILE_HEADER_AL.add("Besedna vrsta");
|
||||
}
|
||||
if(!key.getMsd().equals("")){
|
||||
if(otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
// break;
|
||||
// }
|
||||
|
||||
|
||||
|
||||
|
@ -198,16 +202,47 @@ public class Export {
|
|||
|
||||
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
||||
List dataEntry = new ArrayList<>();
|
||||
dataEntry.add(e.getKey().getKey());
|
||||
if(!e.getKey().getLemma().equals("")){
|
||||
dataEntry.add(e.getKey().getLemma());
|
||||
dataEntry.add(e.getKey().getK1());
|
||||
if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi")) &&
|
||||
headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")){
|
||||
dataEntry.add(e.getKey().getK1().toLowerCase());
|
||||
}
|
||||
if(!e.getKey().getWordType().equals("")){
|
||||
dataEntry.add(e.getKey().getWordType());
|
||||
|
||||
int i = 0;
|
||||
for (CalculateFor otherKey : filter.getMultipleKeys()){
|
||||
switch(i){
|
||||
case 0:
|
||||
if (otherKey.equals(CalculateFor.LEMMA)){
|
||||
dataEntry.add(e.getKey().getK2());
|
||||
dataEntry.add(e.getKey().getK2().toLowerCase());
|
||||
} else {
|
||||
dataEntry.add(e.getKey().getK2());
|
||||
}
|
||||
if(!e.getKey().getMsd().equals("")){
|
||||
dataEntry.add(e.getKey().getMsd());
|
||||
break;
|
||||
case 1:
|
||||
dataEntry.add(e.getKey().getK3());
|
||||
break;
|
||||
case 2:
|
||||
dataEntry.add(e.getKey().getK4());
|
||||
break;
|
||||
case 3:
|
||||
dataEntry.add(e.getKey().getK5());
|
||||
break;
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
// if(!e.getKey().getLemma().equals("")){
|
||||
// dataEntry.add(e.getKey().getLemma());
|
||||
// dataEntry.add(e.getKey().getLemma().toLowerCase());
|
||||
// }
|
||||
// if(!e.getKey().getWordType().equals("")){
|
||||
// dataEntry.add(e.getKey().getWordType());
|
||||
// }
|
||||
// if(!e.getKey().getMsd().equals("")){
|
||||
// dataEntry.add(e.getKey().getMsd());
|
||||
// }
|
||||
dataEntry.add(e.getValue().toString());
|
||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
||||
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
|
||||
|
|
|
@ -55,7 +55,7 @@ public class Util {
|
|||
}
|
||||
|
||||
public static String formatNumberAsPercent(Object o) {
|
||||
return MessageFormat.format("{0,number,#.###%}", o);
|
||||
return MessageFormat.format("{0,number,#.### %}", o).replace('.', ',');
|
||||
}
|
||||
|
||||
private static boolean isInstanceOfInteger(Object o) {
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
<AnchorPane fx:id="characterAnalysisTab" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.112" xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.CharacterAnalysisTab">
|
||||
<Pane>
|
||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Število črk" />
|
||||
<TextField fx:id="stringLengthTF" layoutX="100.0" layoutY="20.0" prefWidth="180.0" />
|
||||
<TextField fx:id="stringLengthTF" layoutX="185.0" layoutY="20.0" prefWidth="180.0" />
|
||||
|
||||
<HBox layoutX="10.0" layoutY="60.0">
|
||||
<children>
|
||||
|
@ -29,15 +29,15 @@
|
|||
</HBox>
|
||||
<Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Omejitev podatkov" />
|
||||
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Oznaka MSD" />
|
||||
<TextField fx:id="msdTF" layoutX="100.0" layoutY="160.0" prefWidth="180.0" />
|
||||
<TextField fx:id="msdTF" layoutX="185.0" layoutY="160.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Taksonomija" />
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0" />
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Minimalno število pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="100.0" layoutY="240.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Min. št. pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Minimalno število taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="100.0" layoutY="280.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
|
||||
|
||||
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0">
|
||||
<children>
|
||||
|
@ -45,7 +45,7 @@
|
|||
</children>
|
||||
</Pane>
|
||||
|
||||
<Button fx:id="computeNgramsB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
|
||||
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="422.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
|
||||
</Pane>
|
||||
|
||||
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:" />
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
<Pane/>
|
||||
<Button fx:id="chooseCorpusLocationB" layoutX="10.0" layoutY="20.0" mnemonicParsing="false"
|
||||
text="Nastavi lokacijo korpusa"/>
|
||||
<CheckBox fx:id="readHeaderInfoChB" layoutX="176.0" layoutY="24.0" mnemonicParsing="false"
|
||||
<CheckBox fx:id="readHeaderInfoChB" layoutX="185.0" layoutY="24.0" mnemonicParsing="false"
|
||||
text="Preberi info iz headerjev"/>
|
||||
<Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0">
|
||||
<children>
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.OneWordAnalysisTab">
|
||||
<Pane>
|
||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za"/>
|
||||
<ComboBox fx:id="calculateForCB" layoutX="100.0" layoutY="20.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
|
||||
<ComboBox fx:id="calculateForCB" layoutX="185.0" layoutY="20.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
|
||||
visibleRowCount="5">
|
||||
<items>
|
||||
<FXCollections fx:factory="observableArrayList">
|
||||
|
@ -30,30 +30,31 @@
|
|||
</items>
|
||||
</ComboBox>
|
||||
|
||||
<Label layoutX="300.0" layoutY="20.0" prefHeight="25.0" text="Izpiši tudi:" />
|
||||
<CheckComboBox fx:id="alsoVisualizeCCB" layoutX="400.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" />
|
||||
<CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
<!-- MSD and Taxonomy separated -->
|
||||
|
||||
<Label layoutX="10.0" layoutY="80.0" prefHeight="25.0" text="Omejitev podatkov" />
|
||||
<Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Omejitev podatkov" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Oznaka MSD"/>
|
||||
<TextField fx:id="msdTF" layoutX="100.0" layoutY="120.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Taksonomija"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="160.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Oznaka MSD"/>
|
||||
<TextField fx:id="msdTF" layoutX="185.0" layoutY="160.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Taksonomija"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
|
||||
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Minimalno število pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="100.0" layoutY="200.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Min. št. pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Minimalno število taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="100.0" layoutY="240.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
|
||||
|
||||
<Button fx:id="computeNgramsB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false"
|
||||
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false"
|
||||
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
||||
</Pane>
|
||||
|
||||
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
|
||||
text=" " wrapText="true"/>
|
||||
<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
|
||||
<Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />
|
||||
</Pane>
|
||||
|
||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
|
||||
|
||||
|
|
|
@ -13,104 +13,94 @@
|
|||
<?import javafx.scene.layout.Pane?>
|
||||
<?import org.controlsfx.control.CheckComboBox?>
|
||||
|
||||
<AnchorPane fx:id="stringAnalysisTabPaneNew2" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
||||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.StringAnalysisTabNew2">
|
||||
<AnchorPane fx:id="stringAnalysisTabPaneNew2" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.121" xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.StringAnalysisTabNew2">
|
||||
<Pane>
|
||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="N-gram nivo"/>
|
||||
<ComboBox fx:id="ngramValueCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0" promptText="izberi"
|
||||
visibleRowCount="5">
|
||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="N-gram nivo" />
|
||||
<ComboBox fx:id="ngramValueCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
|
||||
<items>
|
||||
<FXCollections fx:factory="observableArrayList">
|
||||
<String fx:value="2"/>
|
||||
<String fx:value="3"/>
|
||||
<String fx:value="4"/>
|
||||
<String fx:value="5"/>
|
||||
<String fx:value="2" />
|
||||
<String fx:value="3" />
|
||||
<String fx:value="4" />
|
||||
<String fx:value="5" />
|
||||
</FXCollections>
|
||||
</items>
|
||||
</ComboBox>
|
||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izračunaj za"/>
|
||||
<ComboBox fx:id="calculateForCB" layoutX="100.0" layoutY="60.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
|
||||
visibleRowCount="5">
|
||||
|
||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za" />
|
||||
<ComboBox fx:id="calculateForCB" layoutX="185.0" layoutY="20.0" minWidth="180.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
|
||||
<items>
|
||||
<FXCollections fx:factory="observableArrayList">
|
||||
<String fx:value="lema"/>
|
||||
<String fx:value="različnica"/>
|
||||
<String fx:value="oblikoskladenjska oznaka"/>
|
||||
<String fx:value="oblikoskladenjska lastnost"/>
|
||||
<String fx:value="besedna vrsta"/>
|
||||
<String fx:value="lema" />
|
||||
<String fx:value="različnica" />
|
||||
<String fx:value="oblikoskladenjska oznaka" />
|
||||
<String fx:value="oblikoskladenjska lastnost" />
|
||||
<String fx:value="besedna vrsta" />
|
||||
</FXCollections>
|
||||
</items>
|
||||
</ComboBox>
|
||||
|
||||
|
||||
<Pane fx:id="paneWords" layoutX="0.0" layoutY="100.0" prefHeight="36.0" prefWidth="380.0">
|
||||
|
||||
<Pane fx:id="paneWords">
|
||||
<children>
|
||||
<Label layoutX="10.0" prefHeight="25.0" text="Preskok besed"/>
|
||||
<ComboBox fx:id="skipValueCB" layoutX="100.0" prefWidth="180.0" promptText="izberi"
|
||||
visibleRowCount="5">
|
||||
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Preskok besed" />
|
||||
<ComboBox fx:id="skipValueCB" layoutX="185.0" layoutY="100.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
|
||||
<items>
|
||||
<FXCollections fx:factory="observableArrayList">
|
||||
<String fx:value="0"/>
|
||||
<String fx:value="1"/>
|
||||
<String fx:value="2"/>
|
||||
<String fx:value="3"/>
|
||||
<String fx:value="4"/>
|
||||
<String fx:value="5"/>
|
||||
<String fx:value="6"/>
|
||||
<String fx:value="7"/>
|
||||
<String fx:value="0" />
|
||||
<String fx:value="1" />
|
||||
<String fx:value="2" />
|
||||
<String fx:value="3" />
|
||||
<String fx:value="4" />
|
||||
<String fx:value="5" />
|
||||
<String fx:value="6" />
|
||||
<String fx:value="7" />
|
||||
</FXCollections>
|
||||
</items>
|
||||
</ComboBox>
|
||||
</children>
|
||||
<children>
|
||||
<Label layoutX="10.0" layoutY="40.0" prefHeight="25.0" text="Upoštevaj ločila"/>
|
||||
<CheckBox fx:id="notePunctuationsChB" layoutX="176.0" layoutY="45.0" selected="true"/>
|
||||
</children>
|
||||
</Pane>
|
||||
<Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Upoštevaj ločila" />
|
||||
<CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="145.0" selected="true" />
|
||||
|
||||
|
||||
<!-- MSD and Taxonomy separated -->
|
||||
|
||||
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov"/>
|
||||
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD"/>
|
||||
<TextField fx:id="msdTF" layoutX="100.0" layoutY="240.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD" />
|
||||
<TextField fx:id="msdTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija" />
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Minimalno število pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="100.0" layoutY="320.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Minimalno število taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="100.0" layoutY="360.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="360.0" prefWidth="180.0" />
|
||||
|
||||
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
|
||||
|
||||
</Pane>
|
||||
<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
|
||||
<Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />
|
||||
<!-- samoglasniki/soglasniki -->
|
||||
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="280.0" prefHeight="84.0" prefWidth="380.0">
|
||||
<Pane fx:id="paneLetters">
|
||||
<children>
|
||||
<CheckBox fx:id="calculatecvvCB" layoutX="10.0" mnemonicParsing="false" prefHeight="25.0"
|
||||
text="Izračunaj za kombinacije samoglasnikov in soglasnikov"/>
|
||||
<TextField fx:id="stringLengthTF" layoutX="100.0" layoutY="40.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="40.0" prefHeight="25.0" text="Dolžina niza"/>
|
||||
<CheckBox fx:id="calculatecvvCB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" text="Izračunaj za kombinacije samoglasnikov in soglasnikov" />
|
||||
<Label layoutX="10.0" layoutY="400.0" prefHeight="25.0" text="Dolžina niza" />
|
||||
<TextField fx:id="stringLengthTF" layoutX="185.0" layoutY="400.0" prefWidth="180.0" />
|
||||
</children>
|
||||
</Pane>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<Button fx:id="computeNgramsB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false"
|
||||
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
||||
</Pane>
|
||||
|
||||
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
|
||||
text=" " wrapText="true"/>
|
||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
|
||||
<Button fx:id="cancel" layoutX="540.0" layoutY="482.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Prekini" />
|
||||
|
||||
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0" />
|
||||
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0" />
|
||||
|
||||
<Button fx:id="cancel" layoutX="540.0" layoutY="482.0" mnemonicParsing="false"
|
||||
prefHeight="25.0" prefWidth="250.0" text="Prekini"/>
|
||||
|
||||
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
|
||||
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
|
||||
|
||||
</AnchorPane>
|
||||
|
|
|
@ -8,15 +8,15 @@
|
|||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordFormationTab">
|
||||
<Pane>
|
||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
|
||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Minimalno število pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="100.0" layoutY="60.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Minimalno število taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="100.0" layoutY="100.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0" />
|
||||
|
||||
<Button fx:id="computeB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false"
|
||||
<Button fx:id="computeB" layoutX="10.0" layoutY="422.0" mnemonicParsing="false"
|
||||
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
||||
</Pane>
|
||||
|
||||
|
|
|
@ -8,13 +8,13 @@
|
|||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordLevelTab">
|
||||
<Pane>
|
||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
|
||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Minimalno število pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="100.0" layoutY="60.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Minimalno število taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="100.0" layoutY="100.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0" />
|
||||
|
||||
<Button fx:id="computeB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false"
|
||||
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
||||
|
|
|
@ -19,66 +19,66 @@ public class Common {
|
|||
ArrayList<String> taxonomy = new ArrayList<>();
|
||||
taxonomy.add("#Ft.Z.N.N");
|
||||
List<Word> words = new ArrayList<>();
|
||||
words.add(new Word("ker", "ker", "Vd", taxonomy));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy));
|
||||
words.add(new Word("junak", "junak", "Somei", taxonomy));
|
||||
words.add(new Word("v", "v", "Dm", taxonomy));
|
||||
words.add(new Word("posesti", "posest", "Sozem", taxonomy));
|
||||
words.add(new Word("nekaj", "nekaj", "Rsn", taxonomy));
|
||||
words.add(new Word("o", "o", "Dm", taxonomy));
|
||||
words.add(new Word("čemer", "kar", "Zz-sem", taxonomy));
|
||||
words.add(new Word("se", "se", "Zp------k", taxonomy));
|
||||
words.add(new Word("mu", "on", "Zotmed--k", taxonomy));
|
||||
words.add(new Word("ne", "ne", "L", taxonomy));
|
||||
words.add(new Word("sanja", "sanjati", "Ggnste", taxonomy));
|
||||
words.add(new Word("a", "a", "Vp", taxonomy));
|
||||
words.add(new Word("se", "se", "Zp------k", taxonomy));
|
||||
words.add(new Word("onemu", "oni", "Zk-sed", taxonomy));
|
||||
words.add(new Word("zdi", "zdeti", "Ggnste", taxonomy));
|
||||
words.add(new Word("ključno", "ključen", "Ppnsei", taxonomy));
|
||||
words.add(new Word("pri", "pri", "Dm", taxonomy));
|
||||
words.add(new Word("operaciji", "operacija", "Sozem", taxonomy));
|
||||
words.add(new Word("666", "666", "Kag", taxonomy));
|
||||
words.add(new Word("ker", "ker", "Vd"));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n"));
|
||||
words.add(new Word("junak", "junak", "Somei"));
|
||||
words.add(new Word("v", "v", "Dm"));
|
||||
words.add(new Word("posesti", "posest", "Sozem"));
|
||||
words.add(new Word("nekaj", "nekaj", "Rsn"));
|
||||
words.add(new Word("o", "o", "Dm"));
|
||||
words.add(new Word("čemer", "kar", "Zz-sem"));
|
||||
words.add(new Word("se", "se", "Zp------k"));
|
||||
words.add(new Word("mu", "on", "Zotmed--k"));
|
||||
words.add(new Word("ne", "ne", "L"));
|
||||
words.add(new Word("sanja", "sanjati", "Ggnste"));
|
||||
words.add(new Word("a", "a", "Vp"));
|
||||
words.add(new Word("se", "se", "Zp------k"));
|
||||
words.add(new Word("onemu", "oni", "Zk-sed"));
|
||||
words.add(new Word("zdi", "zdeti", "Ggnste"));
|
||||
words.add(new Word("ključno", "ključen", "Ppnsei"));
|
||||
words.add(new Word("pri", "pri", "Dm"));
|
||||
words.add(new Word("operaciji", "operacija", "Sozem"));
|
||||
words.add(new Word("666", "666", "Kag"));
|
||||
|
||||
testSentence = new Sentence(words, "#Ft.Z.N.N");
|
||||
testSentence = new Sentence(words, taxonomy);
|
||||
corpus = new ArrayList<>();
|
||||
corpus.add(testSentence);
|
||||
|
||||
// three word sentence
|
||||
testSentence = new Sentence(corpus.get(0).getSublist(0, 3), "#Ft.Z.N.N");
|
||||
testSentence = new Sentence(corpus.get(0).getSublist(0, 3), taxonomy);
|
||||
minCorpus = new ArrayList<>();
|
||||
minCorpus.add(testSentence);
|
||||
|
||||
// five word sentence
|
||||
words = new ArrayList<>();
|
||||
words.add(new Word("ker", "ker", "Vd", taxonomy));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy));
|
||||
words.add(new Word("junak", "junak", "Somei", taxonomy));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy));
|
||||
words.add(new Word("posesti", "posest", "Sozem", taxonomy));
|
||||
testSentence = new Sentence(words, "#Ft.Z.N.N");
|
||||
words.add(new Word("ker", "ker", "Vd"));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n"));
|
||||
words.add(new Word("junak", "junak", "Somei"));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n"));
|
||||
words.add(new Word("posesti", "posest", "Sozem"));
|
||||
testSentence = new Sentence(words, taxonomy);
|
||||
|
||||
midCorpus = new ArrayList<>();
|
||||
midCorpus.add(testSentence);
|
||||
|
||||
// five word sentence - for skipgrams
|
||||
words = new ArrayList<>();
|
||||
words.add(new Word("ker", "ker", "Vd", taxonomy));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy));
|
||||
words.add(new Word("junak", "junak", "Somei", taxonomy));
|
||||
words.add(new Word("v", "v", "Dm", taxonomy));
|
||||
words.add(new Word("posesti", "posest", "Sozem", taxonomy));
|
||||
testSentence = new Sentence(words, "#Ft.Z.N.N");
|
||||
words.add(new Word("ker", "ker", "Vd"));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n"));
|
||||
words.add(new Word("junak", "junak", "Somei"));
|
||||
words.add(new Word("v", "v", "Dm"));
|
||||
words.add(new Word("posesti", "posest", "Sozem"));
|
||||
testSentence = new Sentence(words, taxonomy);
|
||||
|
||||
midCorpusSkip = new ArrayList<>();
|
||||
midCorpusSkip.add(testSentence);
|
||||
|
||||
// JOS test
|
||||
words = new ArrayList<>();
|
||||
words.add(new Word("junak", "junak", "Somei", taxonomy));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy));
|
||||
words.add(new Word("posesti", "posest", "Sozem", taxonomy));
|
||||
testSentence = new Sentence(words, "#Ft.Z.N.N");
|
||||
words.add(new Word("junak", "junak", "Somei"));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n"));
|
||||
words.add(new Word("posesti", "posest", "Sozem"));
|
||||
testSentence = new Sentence(words, taxonomy);
|
||||
|
||||
josTest = new ArrayList<>();
|
||||
josTest.add(testSentence);
|
||||
|
|
|
@ -140,9 +140,9 @@ public class NgramTests {
|
|||
taxonomyResult = stats.getTaxonomyResult();
|
||||
|
||||
assertEquals(3, taxonomyResult.get("Total").size());
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("ker ima junak", "", "", "")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("ima junak ima", "", "", "")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("junak ima posesti", "", "", "")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("ker ima junak")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("ima junak ima")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("junak ima posesti")));
|
||||
|
||||
// tests:
|
||||
// - normal ngrams - lemmas
|
||||
|
@ -152,9 +152,9 @@ public class NgramTests {
|
|||
taxonomyResult = stats.getTaxonomyResult();
|
||||
|
||||
assertEquals(3, taxonomyResult.get("Total").size());
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("ker imeti junak", "", "", "")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("imeti junak imeti", "", "", "")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("junak imeti posest", "", "", "")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("ker imeti junak")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("imeti junak imeti")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("junak imeti posest")));
|
||||
|
||||
// tests:
|
||||
// - normal ngrams - msd
|
||||
|
@ -164,9 +164,9 @@ public class NgramTests {
|
|||
taxonomyResult = stats.getTaxonomyResult();
|
||||
|
||||
assertEquals(3, taxonomyResult.get("Total").size());
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("Vd Ggnste-n Somei", "", "", "")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("Ggnste-n Somei Ggnste-n", "", "", "")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("Somei Ggnste-n Sozem", "", "", "")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("Vd Ggnste-n Somei")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("Ggnste-n Somei Ggnste-n")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("Somei Ggnste-n Sozem")));
|
||||
|
||||
// tests:
|
||||
// - ngrams - word - regex filter
|
||||
|
@ -182,7 +182,7 @@ public class NgramTests {
|
|||
taxonomyResult = stats.getTaxonomyResult();
|
||||
|
||||
assertEquals(1, taxonomyResult.get("Total").size());
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("junak ima posesti", "", "", "")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("junak ima posesti")));
|
||||
|
||||
// tests:
|
||||
// - ngrams - word - regex filter
|
||||
|
@ -198,7 +198,7 @@ public class NgramTests {
|
|||
taxonomyResult = stats.getTaxonomyResult();
|
||||
|
||||
assertEquals(1, taxonomyResult.get("Total").size());
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("ima junak", "", "", "")));
|
||||
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("ima junak")));
|
||||
}
|
||||
|
||||
|
||||
|
@ -316,7 +316,7 @@ public class NgramTests {
|
|||
|
||||
Set<String> bigrams = new HashSet<>(Arrays.asList("ker ima", "ima junak", "junak v", "v posesti"));
|
||||
Set<MultipleHMKeys> bigramsMultipleHMKeys = taxonomyResult.get("Total").keySet();
|
||||
Set<String> bigramsActual = new HashSet<>(bigramsMultipleHMKeys.stream().map(MultipleHMKeys::getKey).collect(Collectors.toList()));
|
||||
Set<String> bigramsActual = new HashSet<>(bigramsMultipleHMKeys.stream().map(MultipleHMKeys::getK1).collect(Collectors.toList()));
|
||||
assertEquals(bigrams, bigramsActual);
|
||||
|
||||
// test:
|
||||
|
@ -329,7 +329,7 @@ public class NgramTests {
|
|||
|
||||
Set<String> twoSkipBigrams = new HashSet<>(Arrays.asList("ker ima", "ker junak", "ker v", "ima junak", "ima v", "ima posesti", "junak v", "junak posesti", "v posesti"));
|
||||
Set<MultipleHMKeys> twoSkipBigramsMultipleHMKeys = taxonomyResult.get("Total").keySet();
|
||||
Set<String> twoSkipBigramsActual = new HashSet<>(twoSkipBigramsMultipleHMKeys.stream().map(MultipleHMKeys::getKey).collect(Collectors.toList()));
|
||||
Set<String> twoSkipBigramsActual = new HashSet<>(twoSkipBigramsMultipleHMKeys.stream().map(MultipleHMKeys::getK1).collect(Collectors.toList()));
|
||||
|
||||
assertEquals(twoSkipBigrams, twoSkipBigramsActual);
|
||||
|
||||
|
@ -342,7 +342,7 @@ public class NgramTests {
|
|||
taxonomyResult = stats.getTaxonomyResult();
|
||||
Set<String> trigrams = new HashSet<>(Arrays.asList("ker ima junak", "ima junak v", "junak v posesti"));
|
||||
Set<MultipleHMKeys> trigramsMultipleHMKeys = taxonomyResult.get("Total").keySet();
|
||||
Set<String> trigramsActual = new HashSet<>(trigramsMultipleHMKeys.stream().map(MultipleHMKeys::getKey).collect(Collectors.toList()));
|
||||
Set<String> trigramsActual = new HashSet<>(trigramsMultipleHMKeys.stream().map(MultipleHMKeys::getK1).collect(Collectors.toList()));
|
||||
|
||||
assertEquals(trigrams, trigramsActual);
|
||||
|
||||
|
@ -355,7 +355,7 @@ public class NgramTests {
|
|||
taxonomyResult = stats.getTaxonomyResult();
|
||||
HashSet<String> twoSkipTrigrams = new HashSet<>(Arrays.asList("ker ima junak", "ker ima v", "ker ima posesti", "ker junak v", "ker junak posesti", "ker v posesti", "ima junak v", "ima junak posesti", "ima v posesti", "junak v posesti"));
|
||||
Set<MultipleHMKeys> twoSkipTrigramsMultipleHMKeys = taxonomyResult.get("Total").keySet();
|
||||
Set<String> twoSkipTrigramsActual = new HashSet<>(twoSkipTrigramsMultipleHMKeys.stream().map(MultipleHMKeys::getKey).collect(Collectors.toList()));
|
||||
Set<String> twoSkipTrigramsActual = new HashSet<>(twoSkipTrigramsMultipleHMKeys.stream().map(MultipleHMKeys::getK1).collect(Collectors.toList()));
|
||||
|
||||
assertEquals(twoSkipTrigrams, twoSkipTrigramsActual);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user