Added some performance measures

This commit is contained in:
Luka 2018-08-09 09:21:06 +02:00
parent 179f09c4bd
commit 9b5fa4616b
24 changed files with 734 additions and 379 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
# Created by .ignore support plugin (hsz.mobi) # Created by .ignore support plugin (hsz.mobi)
### Maven template ### Maven template
src/main/resources/META-INF/
target/ target/
corpus_analyzer_jar/ corpus_analyzer_jar/
pom.xml.tag pom.xml.tag

View File

@ -5,6 +5,7 @@ import static data.Enums.solar.SolarFilters.*;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.util.*; import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ForkJoinPool; import java.util.concurrent.ForkJoinPool;
import javax.xml.namespace.QName; import javax.xml.namespace.QName;
@ -261,7 +262,7 @@ public class XML_processing {
if (c3Content.equals(".") && includeThisBlock) { if (c3Content.equals(".") && includeThisBlock) {
// add sentence to corpus // add sentence to corpus
corpus.add(new Sentence(stavek)); corpus.add(new Sentence(stavek, null));
// and start a new one // and start a new one
stavek = new ArrayList<>(); stavek = new ArrayList<>();
@ -293,7 +294,7 @@ public class XML_processing {
// "word" node value // "word" node value
if (in_word) { if (in_word) {
stavek.add(new Word(characters.getData(), lemma, msd, null)); stavek.add(new Word(characters.getData(), lemma, msd));
in_word = false; in_word = false;
} else if(inPunctuation){ } else if(inPunctuation){
String punctuation = ","; String punctuation = ",";
@ -543,7 +544,7 @@ public class XML_processing {
// "word" node value // "word" node value
if (inWord) { if (inWord) {
String word = characters.getData(); String word = characters.getData();
sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong)); sentence.add(new Word(word, lemma, msd));
inWord = false; inWord = false;
} }
if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) { if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
@ -588,7 +589,7 @@ public class XML_processing {
sentence = runFilters(sentence, stats.getFilter()); sentence = runFilters(sentence, stats.getFilter());
if (!ValidationUtil.isEmpty(sentence)) { if (!ValidationUtil.isEmpty(sentence)) {
corpus.add(new Sentence(sentence)); corpus.add(new Sentence(sentence, currentFiletaxonomyLong));
} }
// and start a new one // and start a new one
@ -655,6 +656,7 @@ public class XML_processing {
boolean inPunctuation = false; boolean inPunctuation = false;
boolean inOrthDiv = false; boolean inOrthDiv = false;
boolean computeForOrth = stats.getCorpus().isGosOrthMode(); boolean computeForOrth = stats.getCorpus().isGosOrthMode();
boolean inSeparatedWord = false;
ArrayList<String> currentFiletaxonomy = new ArrayList<>(); ArrayList<String> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>(); ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = ""; String lemma = "";
@ -662,7 +664,10 @@ public class XML_processing {
List<Word> sentence = new ArrayList<>(); List<Word> sentence = new ArrayList<>();
List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
Map<String, List<Word>> GOSCorpusHM = new ConcurrentHashMap<>();
String GOSCorpusHMKey = "";
String sentenceDelimiter = "seg"; String sentenceDelimiter = "seg";
int wordIndex = 0;
String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm
@ -674,6 +679,8 @@ public class XML_processing {
XMLInputFactory factory = XMLInputFactory.newInstance(); XMLInputFactory factory = XMLInputFactory.newInstance();
eventReader = factory.createXMLEventReader(new FileInputStream(path)); eventReader = factory.createXMLEventReader(new FileInputStream(path));
// created hashmap to combine words with normalized words
while (eventReader.hasNext()) { while (eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent(); XMLEvent event = eventReader.nextEvent();
// System.out.print(String.format("%s", event.toString().replaceAll("\\['http://www.tei-c.org/ns/1.0'\\]::", ""))); // System.out.print(String.format("%s", event.toString().replaceAll("\\['http://www.tei-c.org/ns/1.0'\\]::", "")));
@ -711,6 +718,8 @@ public class XML_processing {
// msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue()); // msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
// lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue()); // lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
// } // }
} else if (atts.containsKey("type") && atts.get("type").equals("separated")) {
inSeparatedWord = true;
} }
// } // }
@ -730,38 +739,91 @@ public class XML_processing {
} }
} else if (qName.equalsIgnoreCase("div")) { } else if (qName.equalsIgnoreCase("div")) {
gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue()); gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
} else if (qName.equalsIgnoreCase("seg")) {
HashMap<String, String> atts = extractAttributes(startElement);
if (atts.keySet().contains("id")) {
if (inOrthDiv) {
GOSCorpusHMKey = atts.get("id") + ".norm";
} else {
GOSCorpusHMKey = atts.get("id");
}
} else {
System.out.println("No attribute \"id\"");
}
} }
break; break;
case XMLStreamConstants.CHARACTERS: case XMLStreamConstants.CHARACTERS:
// "word" node value // "word" node value
if (inWord) { if (inWord) {
// if (GOSCorpusHMKey.equals("gos.028-0108.norm") && wordIndex > 8){
// System.out.println(wordIndex);
// }
// if algorithm is in orthodox part add new word to sentence
if (inOrthDiv){
// GOSCorpusHM.put(GOSCorpusHMKey, sentence);
String word = "";
Characters characters = event.asCharacters(); Characters characters = event.asCharacters();
if (gosType.equals("norm") && msd != null) { sentence.add(new Word(characters.getData(), "", ""));
sentence.add(new Word(characters.getData(), lemma, msd, currentFiletaxonomyLong)); // if algorithm is in normalized part find orthodox word and add other info to it
} else { } else {
sentence.add(new Word(characters.getData(), lemma, msd, currentFiletaxonomyLong)); Characters characters = event.asCharacters();
// System.out.println(wordIndex);
// System.out.println(GOSCorpusHMKey + " " + lemma + " " + wordIndex);
if (wordIndex < GOSCorpusHM.get(GOSCorpusHMKey).size()) {
Word currentWord = GOSCorpusHM.get(GOSCorpusHMKey).get(wordIndex);
currentWord.setLemma(lemma);
currentWord.setMsd(msd);
currentWord.setNormalizedWord(characters.getData());
wordIndex += 1;
// when a word is separated from one to many we have to create these duplicates
if (inSeparatedWord){
GOSCorpusHM.get(GOSCorpusHMKey).add(wordIndex, new Word(currentWord.getWord(), "", ""));
}
} //else {
// System.out.println("Error");
// }
} }
inWord = false;
} }
break; break;
case XMLStreamConstants.END_ELEMENT: case XMLStreamConstants.END_ELEMENT:
EndElement endElement = event.asEndElement(); EndElement endElement = event.asEndElement();
// parser reached end of the current sentence if (endElement.getName().getLocalPart().equals("w")) {
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) { if (inWord){
// add sentence to corpus if it passes filters inWord = false;
boolean saveSentence = computeForOrth == inOrthDiv; } else if(inSeparatedWord) {
// when there are no separated words left we have to delete last aditional duplicate
GOSCorpusHM.get(GOSCorpusHMKey).remove(wordIndex);
if (includeFile && saveSentence && !ValidationUtil.isEmpty(sentence)) { inSeparatedWord = false;
sentence = runFilters(sentence, stats.getFilter()); }
corpus.add(new Sentence(sentence));
} }
// and start a new one // parser reached end of the current sentence
sentence = new ArrayList<>(); if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
if (inOrthDiv){
// add sentence to corpus
GOSCorpusHM.put(GOSCorpusHMKey, sentence);
} else {
sentence = GOSCorpusHM.remove(GOSCorpusHMKey);
// add sentence to corpus if it passes filters
if (includeFile && !ValidationUtil.isEmpty(sentence)) {
sentence = runFilters(sentence, stats.getFilter());
corpus.add(new Sentence(sentence, currentFiletaxonomyLong));
}
wordIndex = 0;
/* Invoke Fork-Join when we reach maximum limit of /* Invoke Fork-Join when we reach maximum limit of
* sentences (because we can't read everything to * sentences (because we can't read everything to
@ -773,6 +835,11 @@ public class XML_processing {
// the data anymore // the data anymore
corpus.clear(); corpus.clear();
} }
}
// start a new sentence
sentence = new ArrayList<>();
} else if (endElement.getName().getLocalPart().equals("teiHeader")) { } else if (endElement.getName().getLocalPart().equals("teiHeader")) {
// before proceeding to read this file, make sure that taxonomy filters are a match // before proceeding to read this file, make sure that taxonomy filters are a match
if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) { if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {

View File

@ -122,9 +122,9 @@ public class InflectedJOSCount {
static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) { static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
for (Sentence s : corpus) { for (Sentence s : corpus) {
// disregard if wrong taxonomy // disregard if wrong taxonomy
if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) { // if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
continue; // continue;
} // }
for (Word word : s.getWords()) { for (Word word : s.getWords()) {
// skip if current word is not inflected // skip if current word is not inflected

View File

@ -3,9 +3,11 @@ package alg.ngram;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Set;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import com.sun.xml.internal.bind.v2.runtime.reflect.Lister;
import data.*; import data.*;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
@ -28,6 +30,9 @@ public class Ngrams {
} }
public static void generateNgramCandidates(List<Sentence> corpus, StatisticsNew stats) { public static void generateNgramCandidates(List<Sentence> corpus, StatisticsNew stats) {
// preprocess CalculateFor for this corpus and prepare data for MultipleHMKeys
ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();
for (Sentence s : corpus) { for (Sentence s : corpus) {
// skip sentences shorter than specified ngram length // skip sentences shorter than specified ngram length
if (s.getWords().size() < stats.getFilter().getNgramValue()) { if (s.getWords().size() < stats.getFilter().getNgramValue()) {
@ -46,29 +51,62 @@ public class Ngrams {
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor()); String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor());
// if last letter is ',' erase it // if last letter is ',' erase it
key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
// String key = "aaaaaaaaaaaaaaaaaaaaaaa";
String lemma = ""; // if (key.equals("")){
String wordType = ""; // String test = key;
String msd = ""; // }
for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){
if(otherKey.toString().equals("lema")){ // key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
// lemma = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
lemma = wordToString(ngramCandidate, otherKey); MultipleHMKeys multipleKeys;
} else if(otherKey.toString().equals("besedna vrsta")){
wordType = wordToString(ngramCandidate, otherKey).substring(0, 1); // create MultipleHMKeys for different amount of other keys
} else if(otherKey.toString().equals("oblikoskladenjska oznaka")){ switch (otherKeys.size()) {
msd = wordToString(ngramCandidate, otherKey); case 0:
} multipleKeys = new MultipleHMKeys1(key);
break;
case 1:
multipleKeys = new MultipleHMKeys2(key, wordToString(ngramCandidate, otherKeys.get(0)));
break;
case 2:
multipleKeys = new MultipleHMKeys3(key, wordToString(ngramCandidate, otherKeys.get(0)),
wordToString(ngramCandidate, otherKeys.get(1)));
break;
case 3:
multipleKeys = new MultipleHMKeys4(key, wordToString(ngramCandidate, otherKeys.get(0)),
wordToString(ngramCandidate, otherKeys.get(1)),
wordToString(ngramCandidate, otherKeys.get(2)));
break;
case 4:
multipleKeys = new MultipleHMKeys5(key, wordToString(ngramCandidate, otherKeys.get(0)),
wordToString(ngramCandidate, otherKeys.get(1)),
wordToString(ngramCandidate, otherKeys.get(2)),
wordToString(ngramCandidate, otherKeys.get(3)));
break;
default:
multipleKeys = null;
} }
// String lemma = "";
// String wordType = "";
// String msd = "";
// for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){
// if(otherKey.toString().equals("lema")){
// lemma = wordToString(ngramCandidate, otherKey);
// } else if(otherKey.toString().equals("besedna vrsta")){
// wordType = wordToString(ngramCandidate, otherKey).substring(0, 1);
// } else if(otherKey.toString().equals("oblikoskladenjska oznaka")){
// msd = wordToString(ngramCandidate, otherKey);
// }
// }
//
// MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
// UPDATE TAXONOMY HERE!!! // UPDATE TAXONOMY HERE!!!
stats.updateTaxonomyResults(multipleKeys, ngramCandidate.get(0).getTaxonomy()); stats.updateTaxonomyResults(multipleKeys, s.getTaxonomy());
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor())); // stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
} }
} }
@ -102,26 +140,31 @@ public class Ngrams {
.stream() .stream()
.map(Word::getLemma) .map(Word::getLemma)
.collect(Collectors.toList())); .collect(Collectors.toList()));
break; return StringUtils.join(candidate, " ");
case WORD: case WORD:
candidate.addAll(ngramCandidate candidate.addAll(ngramCandidate
.stream() .stream()
.map(Word::getWord) .map(Word::getWord)
.collect(Collectors.toList())); .collect(Collectors.toList()));
break; return StringUtils.join(candidate, " ");
case MORPHOSYNTACTIC_SPECS: case MORPHOSYNTACTIC_SPECS:
case MORPHOSYNTACTIC_PROPERTY: case MORPHOSYNTACTIC_PROPERTY:
candidate.addAll(ngramCandidate candidate.addAll(ngramCandidate
.stream() .stream()
.map(Word::getMsd) .map(Word::getMsd)
.collect(Collectors.toList())); .collect(Collectors.toList()));
break; return StringUtils.join(candidate, " ");
case WORD_TYPE: case WORD_TYPE:
candidate.addAll(ngramCandidate candidate.addAll(ngramCandidate
.stream() .stream()
.map(w -> Character.toString(w.getMsd().charAt(0))) .map(w -> Character.toString(w.getMsd().charAt(0)))
.collect(Collectors.toList())); .collect(Collectors.toList()));
break; // candidate.addAll(ngramCandidate
// .stream()
// .map(w -> Character.toString(w.getMsd().charAt(0)))
// .collect(Collectors.toList()));
// .substring(0, 1)
return StringUtils.join(candidate, " ");
} }
return StringUtils.join(candidate, " "); return StringUtils.join(candidate, " ");
@ -136,7 +179,7 @@ public class Ngrams {
private static void generateNgramLetterCandidates(List<Sentence> corpus, StatisticsNew stats) { private static void generateNgramLetterCandidates(List<Sentence> corpus, StatisticsNew stats) {
for (Sentence s : corpus) { for (Sentence s : corpus) {
for (Word w : s.getWords()) { for (Word w : s.getWords()) {
List<String> taxonomy = w.getTaxonomy(); List<String> taxonomy = s.getTaxonomy();
String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv()); String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv());
// skip this iteration if: // skip this iteration if:
@ -152,7 +195,7 @@ public class Ngrams {
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) { for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
// TODO: locila? // TODO: locila?
MultipleHMKeys multipleKeys = new MultipleHMKeys(word.substring(i, i + stats.getFilter().getStringLength())); MultipleHMKeys multipleKeys = new MultipleHMKeys1(word.substring(i, i + stats.getFilter().getStringLength()));
stats.updateTaxonomyResults(multipleKeys, taxonomy); stats.updateTaxonomyResults(multipleKeys, taxonomy);
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor())); // stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
@ -183,8 +226,7 @@ public class Ngrams {
String punctuation = ","; String punctuation = ",";
return new Word(sentence.get(i).getWord() + punctuation, return new Word(sentence.get(i).getWord() + punctuation,
sentence.get(i).getLemma() + punctuation, sentence.get(i).getLemma() + punctuation,
sentence.get(i).getMsd() + punctuation, sentence.get(i).getMsd() + punctuation);
sentence.get(i).getTaxonomy());
} }
} }
return sentence.get(i); return sentence.get(i);
@ -204,6 +246,10 @@ public class Ngrams {
for (Sentence s : corpus) { for (Sentence s : corpus) {
List<Word> sentence = s.getWords(); List<Word> sentence = s.getWords();
if (sentence == null){
continue;
}
for (int i = 0; i <= sentence.size() - ngram; i++) { // 1gram for (int i = 0; i <= sentence.size() - ngram; i++) { // 1gram
for (int j = i + 1; j <= i + skip + 1; j++) { // 2gram for (int j = i + 1; j <= i + skip + 1; j++) { // 2gram
if (ngram == 2 && j < sentence.size()) { if (ngram == 2 && j < sentence.size()) {
@ -260,7 +306,7 @@ public class Ngrams {
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) { if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) {
String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor()); String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key; key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
stats.updateTaxonomyResults(new MultipleHMKeys(key, "", "", ""), stats.updateTaxonomyResults(new MultipleHMKeys1(key),
stats.getCorpus().getTaxonomy()); stats.getCorpus().getTaxonomy());
} }
} }

View File

@ -89,79 +89,79 @@ class WordCount {
} }
} }
private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) { // private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
for (Sentence s : corpus) { // for (Sentence s : corpus) {
if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) { // if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
List<String> sentence = new ArrayList<>(s.getWords().size()); // List<String> sentence = new ArrayList<>(s.getWords().size());
List<Word> filteredWords = new ArrayList<>(); // List<Word> filteredWords = new ArrayList<>();
//
// for (Word word : s.getWords()) {
// if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
// filteredWords.add(word);
// }
// }
//
// if (stats.getCf() == CalculateFor.LEMMA) {
// sentence.addAll(filteredWords
// .stream()
// .map(Word::getLemma)
// .collect(Collectors.toList()));
// } else if (stats.getCf() == CalculateFor.WORD) {
// sentence.addAll(filteredWords
// .stream()
// .map(Word::getWord)
// .collect(Collectors.toList()));
// }
//
// for (String word : sentence) {
// Common.updateMap(stats.result, word);
// }
// }
// }
// }
for (Word word : s.getWords()) { // private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) { // for (Sentence s : corpus) {
filteredWords.add(word); // if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
} // List<String> sentence = new ArrayList<>(s.getWords().size());
} //
// if (stats.getCf() == CalculateFor.LEMMA) {
// sentence.addAll(s.getWords()
// .stream()
// .map(Word::getLemma)
// .collect(Collectors.toList()));
// } else if (stats.getCf() == CalculateFor.WORD) {
// sentence.addAll(s.getWords()
// .stream()
// .map(Word::getWord)
// .collect(Collectors.toList()));
// }
//
// for (String word : sentence) {
// Common.updateMap(stats.result, word);
// }
// }
// }
// }
if (stats.getCf() == CalculateFor.LEMMA) { // static void calculateForAll(List<Sentence> corpus, Statistics stats) {
sentence.addAll(filteredWords // boolean taxonomyIsSet = stats.isTaxonomySet();
.stream() // boolean JosTypeIsSet = stats.isJOSTypeSet();
.map(Word::getLemma) //
.collect(Collectors.toList())); // // branching because even though the only difference is an if or two &&
} else if (stats.getCf() == CalculateFor.WORD) { // // O(if) = 1, the amount of ifs adds up and this saves some time
sentence.addAll(filteredWords // if (taxonomyIsSet && JosTypeIsSet) {
.stream() // calculateForTaxonomyAndJosType(corpus, stats);
.map(Word::getWord) // } else if (taxonomyIsSet && !JosTypeIsSet) {
.collect(Collectors.toList())); // calculateForTaxonomy(corpus, stats);
} // } else if (!taxonomyIsSet && JosTypeIsSet) {
// calculateForJosType(corpus, stats);
for (String word : sentence) { // } else {
Common.updateMap(stats.result, word); // if (stats.isVcc()) {
} // calculateVCC(corpus, stats);
} // } else {
} // calculateNoFilter(corpus, stats);
} // }
// }
private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) { // }
for (Sentence s : corpus) {
if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
List<String> sentence = new ArrayList<>(s.getWords().size());
if (stats.getCf() == CalculateFor.LEMMA) {
sentence.addAll(s.getWords()
.stream()
.map(Word::getLemma)
.collect(Collectors.toList()));
} else if (stats.getCf() == CalculateFor.WORD) {
sentence.addAll(s.getWords()
.stream()
.map(Word::getWord)
.collect(Collectors.toList()));
}
for (String word : sentence) {
Common.updateMap(stats.result, word);
}
}
}
}
static void calculateForAll(List<Sentence> corpus, Statistics stats) {
boolean taxonomyIsSet = stats.isTaxonomySet();
boolean JosTypeIsSet = stats.isJOSTypeSet();
// branching because even though the only difference is an if or two &&
// O(if) = 1, the amount of ifs adds up and this saves some time
if (taxonomyIsSet && JosTypeIsSet) {
calculateForTaxonomyAndJosType(corpus, stats);
} else if (taxonomyIsSet && !JosTypeIsSet) {
calculateForTaxonomy(corpus, stats);
} else if (!taxonomyIsSet && JosTypeIsSet) {
calculateForJosType(corpus, stats);
} else {
if (stats.isVcc()) {
calculateVCC(corpus, stats);
} else {
calculateNoFilter(corpus, stats);
}
}
}
} }

View File

@ -5,49 +5,16 @@ import java.util.Objects;
/* /*
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously. Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
*/ */
public final class MultipleHMKeys { public interface MultipleHMKeys {
private final String key, lemma, wordType, msd; String getK1();
private MultipleHMKeys actual_obj; String getK2();
public MultipleHMKeys(String key) { String getK3();
this.key = key; String getK4();
this.lemma = ""; String getK5();
this.wordType = "";
this.msd = "";
}
public MultipleHMKeys(String key, String lemma, String wordType, String msd) {
this.key = key;
this.lemma = lemma;
this.wordType = wordType;
this.msd = msd;
}
public String getKey() {
return key;
}
public String getLemma() {
return lemma;
}
public String getWordType() {
return wordType;
}
public String getMsd() {
return msd;
}
@Override @Override
public int hashCode() { int hashCode();
return Objects.hash(key, lemma, wordType, msd);
}
@Override @Override
public boolean equals(Object obj) { boolean equals(Object obj);
return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key.equals(key)
&& ((MultipleHMKeys) obj).lemma.equals(lemma)
&& ((MultipleHMKeys) obj).wordType.equals(wordType)
&& ((MultipleHMKeys) obj).msd.equals(msd);
}
} }

View File

@ -0,0 +1,44 @@
package data;
import java.util.Objects;
/*
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
*/
public final class MultipleHMKeys1 implements MultipleHMKeys {
private final String k1;
public MultipleHMKeys1(String k1) {
this.k1 = k1;
}
public String getK1() {
return k1;
}
public String getK2() {
return null;
}
public String getK3() {
return null;
}
public String getK4() {
return null;
}
public String getK5() {
return null;
}
@Override
public int hashCode() {
return k1.hashCode();
}
@Override
public boolean equals(Object obj) {
return (obj instanceof MultipleHMKeys1) && ((MultipleHMKeys1) obj).k1.equals(k1);
}
}

View File

@ -0,0 +1,49 @@
package data;
import java.util.Objects;
/*
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
*/
public final class MultipleHMKeys2 implements MultipleHMKeys {
private final String k1, k2;
public MultipleHMKeys2(String k1, String k2) {
this.k1 = k1;
this.k2 = k2;
}
public String getK1() {
return k1;
}
public String getK2() {
return k2;
}
public String getK3() {
return null;
}
public String getK4() {
return null;
}
public String getK5() {
return null;
}
@Override
public int hashCode() {
return Objects.hash(k1, k2);
// return key.hashCode();
}
@Override
public boolean equals(Object obj) {
return (obj instanceof MultipleHMKeys2) && ((MultipleHMKeys2) obj).k1.equals(k1)
&& ((MultipleHMKeys2) obj).k2.equals(k2);
// return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key.equals(key);
}
}

View File

@ -0,0 +1,48 @@
package data;
import java.util.Objects;
/*
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
*/
public final class MultipleHMKeys3 implements MultipleHMKeys {
private final String k1, k2, k3;
public MultipleHMKeys3(String k1, String k2, String k3) {
this.k1 = k1;
this.k2 = k2;
this.k3 = k3;
}
public String getK1() {
return k1;
}
public String getK2() {
return k2;
}
public String getK3() {
return k3;
}
public String getK4() {
return null;
}
public String getK5() {
return null;
}
@Override
public int hashCode() {
return Objects.hash(k1, k2, k3);
}
@Override
public boolean equals(Object obj) {
return (obj instanceof MultipleHMKeys3) && ((MultipleHMKeys3) obj).k1.equals(k1)
&& ((MultipleHMKeys3) obj).k2.equals(k2)
&& ((MultipleHMKeys3) obj).k3.equals(k3);
}
}

View File

@ -0,0 +1,50 @@
package data;
import java.util.Objects;
/*
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
*/
public final class MultipleHMKeys4 implements MultipleHMKeys {
private final String k1, k2, k3, k4;
public MultipleHMKeys4(String k1, String k2, String k3, String k4) {
this.k1 = k1;
this.k2 = k2;
this.k3 = k3;
this.k4 = k4;
}
public String getK1() {
return k1;
}
public String getK2() {
return k2;
}
public String getK3() {
return k3;
}
public String getK4() {
return k4;
}
public String getK5() {
return null;
}
@Override
public int hashCode() {
return Objects.hash(k1, k2, k3, k4);
}
@Override
public boolean equals(Object obj) {
return (obj instanceof MultipleHMKeys4) && ((MultipleHMKeys4) obj).k1.equals(k1)
&& ((MultipleHMKeys4) obj).k2.equals(k2)
&& ((MultipleHMKeys4) obj).k3.equals(k3)
&& ((MultipleHMKeys4) obj).k4.equals(k4);
}
}

View File

@ -0,0 +1,52 @@
package data;
import java.util.Objects;
/*
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
*/
public final class MultipleHMKeys5 implements MultipleHMKeys {
private final String k1, k2, k3, k4, k5;
public MultipleHMKeys5(String k1, String k2, String k3, String k4, String k5) {
this.k1 = k1;
this.k2 = k2;
this.k3 = k3;
this.k4 = k4;
this.k5 = k5;
}
public String getK1() {
return k1;
}
public String getK2() {
return k2;
}
public String getK3() {
return k3;
}
public String getK4() {
return k4;
}
public String getK5() {
return k5;
}
@Override
public int hashCode() {
return Objects.hash(k1, k2, k3, k4, k5);
}
@Override
public boolean equals(Object obj) {
return (obj instanceof MultipleHMKeys5) && ((MultipleHMKeys5) obj).k1.equals(k1)
&& ((MultipleHMKeys5) obj).k2.equals(k2)
&& ((MultipleHMKeys5) obj).k3.equals(k3)
&& ((MultipleHMKeys5) obj).k4.equals(k4)
&& ((MultipleHMKeys5) obj).k5.equals(k5);
}
}

View File

@ -7,30 +7,30 @@ public class Sentence {
private List<Word> words; private List<Word> words;
private String taksonomija; private List<String> taxonomy;
// GOS // GOS
private String type; private String type;
private Map<String, String> properties; private Map<String, String> properties;
public Sentence(List<Word> words, String taksonomija) { public Sentence(List<Word> words, List<String> taxonomy) {
this.words = words; this.words = words;
this.taksonomija = taksonomija; this.taxonomy = taxonomy;
} }
public Sentence(List<Word> words) { // public Sentence(List<Word> words) {
this.words = words; // this.words = words;
} // }
public Sentence(List<Word> words, String taksonomija, Map<String, String> properties) { public Sentence(List<Word> words, List<String> taxonomy, Map<String, String> properties) {
this.words = words; this.words = words;
this.taksonomija = taksonomija; this.taxonomy = taxonomy;
this.properties = properties; this.properties = properties;
} }
public Sentence(List<Word> words, String taksonomija, String type) { public Sentence(List<Word> words, List<String> taxonomy, String type) {
this.words = words; this.words = words;
this.taksonomija = taksonomija; this.taxonomy = taxonomy;
this.type = type; this.type = type;
} }
@ -38,8 +38,8 @@ public class Sentence {
return words; return words;
} }
public String getTaxonomy() { public List<String> getTaxonomy() {
return taksonomija; return taxonomy;
} }
public List<Word> getSublist(int indexFrom, int indexTo) { public List<Word> getSublist(int indexFrom, int indexTo) {

View File

@ -213,7 +213,7 @@ public class StatisticsNew {
removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences()); removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy()); removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit)))); stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult); Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult, filter);
return true; return true;
} }
@ -376,7 +376,7 @@ public class StatisticsNew {
} }
public void updateResultsNestedSuffix(String key, String stringValue) { public void updateResultsNestedSuffix(String key, String stringValue) {
MultipleHMKeys mkStringValue = new MultipleHMKeys(stringValue); MultipleHMKeys mkStringValue = new MultipleHMKeys1(stringValue);
if (resultNestedSuffix.containsKey(key)) { if (resultNestedSuffix.containsKey(key)) {
// if not in map // if not in map
@ -397,7 +397,7 @@ public class StatisticsNew {
} }
public void updateResultsNestedPrefix(String key, String stringValue) { public void updateResultsNestedPrefix(String key, String stringValue) {
MultipleHMKeys mkStringValue = new MultipleHMKeys(stringValue); MultipleHMKeys mkStringValue = new MultipleHMKeys1(stringValue);
if (resultNestedPrefix.containsKey(key)) { if (resultNestedPrefix.containsKey(key)) {
// if not in map // if not in map

View File

@ -16,8 +16,7 @@ public class Word implements Serializable {
private String word; private String word;
private String lemma; private String lemma;
private String msd; private String msd;
// private String msd; private String normalizedWord;
private List<String> taxonomy;
private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u')); private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));
/** /**
@ -41,7 +40,8 @@ public class Word implements Serializable {
//private char besedna_vrsta; //private char besedna_vrsta;
public Word(String word, String lemma, String msd) { public Word(String word, String lemma, String msd) {
this.lemma = lemma; this.lemma = lemma;
this.msd = normalizeMsd(msd); this.msd = msd; //normalizeMsd(msd);
this.normalizedWord = "";
// veliko zacetnico ohranimo samo za lastna imena // veliko zacetnico ohranimo samo za lastna imena
if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S' if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
@ -53,12 +53,11 @@ public class Word implements Serializable {
} }
} }
//private char besedna_vrsta; public Word(String word, String lemma, String msd, String normalizedWord) {
public Word(String word, String lemma, String msd, List<String> taxonomy) {
this.lemma = lemma; this.lemma = lemma;
// this.msd = normalizeMsd(msd); // this.msd = normalizeMsd(msd);
this.msd = msd; this.msd = msd;
this.taxonomy = taxonomy; this.normalizedWord = normalizedWord;
// veliko zacetnico ohranimo samo za lastna imena // veliko zacetnico ohranimo samo za lastna imena
if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S' if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
@ -73,21 +72,21 @@ public class Word implements Serializable {
public Word() { public Word() {
} }
/** // /**
* Appends a number of '-' to msds which are not properly sized. // * Appends a number of '-' to msds which are not properly sized.
* E.g. nouns should have 5 attributes, yet the last one isn't always defined (Somei vs. Sometd) // * E.g. nouns should have 5 attributes, yet the last one isn't always defined (Somei vs. Sometd)
* // *
* @param msdInput // * @param msdInput
* // *
* @return // * @return
*/ // */
private String normalizeMsd(String msdInput) { // private String normalizeMsd(String msdInput) {
if (ValidationUtil.isEmpty(msdInput)) { // if (ValidationUtil.isEmpty(msdInput)) {
return ""; // return "";
} else { // } else {
return StringUtils.rightPad(msdInput, Msd.getMsdLengthForType(msdInput), PAD_CHARACTER); // return StringUtils.rightPad(msdInput, Msd.getMsdLengthForType(msdInput), PAD_CHARACTER);
} // }
} // }
public Word(String word) { public Word(String word) {
this.word = word; this.word = word;
@ -119,10 +118,6 @@ public class Word implements Serializable {
this.word = word; this.word = word;
} }
public List<String> getTaxonomy() {
return taxonomy;
}
public String getLemma() { public String getLemma() {
return lemma; return lemma;
} }
@ -139,6 +134,14 @@ public class Word implements Serializable {
this.msd = msd; this.msd = msd;
} }
public String getNormalizedWord() {
return normalizedWord;
}
public void setNormalizedWord(String normalizedWord) {
this.normalizedWord = normalizedWord;
}
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
@ -150,6 +153,8 @@ public class Word implements Serializable {
.append("\n") .append("\n")
.append("msd:\t") .append("msd:\t")
.append(getMsd()) .append(getMsd())
.append("normalized word:\t")
.append(getNormalizedWord())
.append("\n"); .append("\n");
return sb.toString(); return sb.toString();

View File

@ -9,6 +9,7 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import data.CalculateFor;
import data.Filter; import data.Filter;
import data.MultipleHMKeys; import data.MultipleHMKeys;
import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVFormat;
@ -59,7 +60,7 @@ public class Export {
} }
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock, public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults) { Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults, Filter filter) {
//Delimiter used in CSV file //Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n"; String NEW_LINE_SEPARATOR = "\n";
List<Object> FILE_HEADER_AL = new ArrayList<Object>(); List<Object> FILE_HEADER_AL = new ArrayList<Object>();
@ -98,8 +99,10 @@ public class Export {
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies)); headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
if (headerInfoBlock.get("Analiza").equals("Besede")){ if (headerInfoBlock.get("Analiza").equals("Besede")){
FILE_HEADER_AL.add("Lema"); FILE_HEADER_AL.add("Lema");
FILE_HEADER_AL.add("Lema male črke");
} else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) { } else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
FILE_HEADER_AL.add("Leme"); FILE_HEADER_AL.add("Leme");
FILE_HEADER_AL.add("Leme male črke");
} }
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) { } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies)); headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
@ -111,25 +114,26 @@ public class Export {
} else { } else {
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies)); headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Lema"); FILE_HEADER_AL.add("Lema");
FILE_HEADER_AL.add("Lema male črke");
} }
for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) { // for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) {
for (MultipleHMKeys key : value.keySet()){ for (CalculateFor otherKey : filter.getMultipleKeys()){
if(!key.getLemma().equals("")){ if(otherKey.equals(CalculateFor.LEMMA)){
FILE_HEADER_AL.add("Lema"); FILE_HEADER_AL.add("Lema");
FILE_HEADER_AL.add("Lema male črke");
} }
if(!key.getWordType().equals("")){ if(otherKey.equals(CalculateFor.WORD_TYPE)){
FILE_HEADER_AL.add("Besedna vrsta"); FILE_HEADER_AL.add("Besedna vrsta");
} }
if(!key.getMsd().equals("")){ if(otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
FILE_HEADER_AL.add("Oblikoskladenjska oznaka"); FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
} }
break;
} }
break; // break;
} // }
@ -198,16 +202,47 @@ public class Export {
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) { for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
List dataEntry = new ArrayList<>(); List dataEntry = new ArrayList<>();
dataEntry.add(e.getKey().getKey()); dataEntry.add(e.getKey().getK1());
if(!e.getKey().getLemma().equals("")){ if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi")) &&
dataEntry.add(e.getKey().getLemma()); headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")){
dataEntry.add(e.getKey().getK1().toLowerCase());
} }
if(!e.getKey().getWordType().equals("")){
dataEntry.add(e.getKey().getWordType()); int i = 0;
for (CalculateFor otherKey : filter.getMultipleKeys()){
switch(i){
case 0:
if (otherKey.equals(CalculateFor.LEMMA)){
dataEntry.add(e.getKey().getK2());
dataEntry.add(e.getKey().getK2().toLowerCase());
} else {
dataEntry.add(e.getKey().getK2());
} }
if(!e.getKey().getMsd().equals("")){ break;
dataEntry.add(e.getKey().getMsd()); case 1:
dataEntry.add(e.getKey().getK3());
break;
case 2:
dataEntry.add(e.getKey().getK4());
break;
case 3:
dataEntry.add(e.getKey().getK5());
break;
} }
i++;
}
// if(!e.getKey().getLemma().equals("")){
// dataEntry.add(e.getKey().getLemma());
// dataEntry.add(e.getKey().getLemma().toLowerCase());
// }
// if(!e.getKey().getWordType().equals("")){
// dataEntry.add(e.getKey().getWordType());
// }
// if(!e.getKey().getMsd().equals("")){
// dataEntry.add(e.getKey().getMsd());
// }
dataEntry.add(e.getValue().toString()); dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies)); dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies)); dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));

View File

@ -55,7 +55,7 @@ public class Util {
} }
public static String formatNumberAsPercent(Object o) { public static String formatNumberAsPercent(Object o) {
return MessageFormat.format("{0,number,#.###%}", o); return MessageFormat.format("{0,number,#.### %}", o).replace('.', ',');
} }
private static boolean isInstanceOfInteger(Object o) { private static boolean isInstanceOfInteger(Object o) {

View File

@ -16,7 +16,7 @@
<AnchorPane fx:id="characterAnalysisTab" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.112" xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.CharacterAnalysisTab"> <AnchorPane fx:id="characterAnalysisTab" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.112" xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.CharacterAnalysisTab">
<Pane> <Pane>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Število črk" /> <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Število črk" />
<TextField fx:id="stringLengthTF" layoutX="100.0" layoutY="20.0" prefWidth="180.0" /> <TextField fx:id="stringLengthTF" layoutX="185.0" layoutY="20.0" prefWidth="180.0" />
<HBox layoutX="10.0" layoutY="60.0"> <HBox layoutX="10.0" layoutY="60.0">
<children> <children>
@ -29,15 +29,15 @@
</HBox> </HBox>
<Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Omejitev podatkov" /> <Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Omejitev podatkov" />
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Oznaka MSD" /> <Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Oznaka MSD" />
<TextField fx:id="msdTF" layoutX="100.0" layoutY="160.0" prefWidth="180.0" /> <TextField fx:id="msdTF" layoutX="185.0" layoutY="160.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Taksonomija" /> <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Taksonomija" />
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0" /> <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Minimalno število pojavitev" /> <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="100.0" layoutY="240.0" prefWidth="180.0" /> <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Minimalno število taksonomij" /> <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="100.0" layoutY="280.0" prefWidth="180.0" /> <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0"> <Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0">
<children> <children>
@ -45,7 +45,7 @@
</children> </children>
</Pane> </Pane>
<Button fx:id="computeNgramsB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" /> <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="422.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
</Pane> </Pane>
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:" /> <Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:" />

View File

@ -14,7 +14,7 @@
<Pane/> <Pane/>
<Button fx:id="chooseCorpusLocationB" layoutX="10.0" layoutY="20.0" mnemonicParsing="false" <Button fx:id="chooseCorpusLocationB" layoutX="10.0" layoutY="20.0" mnemonicParsing="false"
text="Nastavi lokacijo korpusa"/> text="Nastavi lokacijo korpusa"/>
<CheckBox fx:id="readHeaderInfoChB" layoutX="176.0" layoutY="24.0" mnemonicParsing="false" <CheckBox fx:id="readHeaderInfoChB" layoutX="185.0" layoutY="24.0" mnemonicParsing="false"
text="Preberi info iz headerjev"/> text="Preberi info iz headerjev"/>
<Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0"> <Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0">
<children> <children>

View File

@ -17,7 +17,7 @@
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.OneWordAnalysisTab"> xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.OneWordAnalysisTab">
<Pane> <Pane>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za"/> <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za"/>
<ComboBox fx:id="calculateForCB" layoutX="100.0" layoutY="20.0" minWidth="180.0" prefWidth="150.0" promptText="izberi" <ComboBox fx:id="calculateForCB" layoutX="185.0" layoutY="20.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
visibleRowCount="5"> visibleRowCount="5">
<items> <items>
<FXCollections fx:factory="observableArrayList"> <FXCollections fx:factory="observableArrayList">
@ -30,30 +30,31 @@
</items> </items>
</ComboBox> </ComboBox>
<Label layoutX="300.0" layoutY="20.0" prefHeight="25.0" text="Izpiši tudi:" /> <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" />
<CheckComboBox fx:id="alsoVisualizeCCB" layoutX="400.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/> <CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>
<!-- MSD and Taxonomy separated --> <!-- MSD and Taxonomy separated -->
<Label layoutX="10.0" layoutY="80.0" prefHeight="25.0" text="Omejitev podatkov" /> <Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Omejitev podatkov" />
<Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Oznaka MSD"/> <Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="100.0" layoutY="120.0" prefWidth="180.0"/> <TextField fx:id="msdTF" layoutX="185.0" layoutY="160.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Taksonomija"/> <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="160.0" prefHeight="25.0" prefWidth="180.0"/> <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Minimalno število pojavitev" /> <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="100.0" layoutY="200.0" prefWidth="180.0" /> <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Minimalno število taksonomij" /> <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="100.0" layoutY="240.0" prefWidth="180.0" /> <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
<Button fx:id="computeNgramsB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false" <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/> prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
</Pane> </Pane>
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/> <Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0" <Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />
text=" " wrapText="true"/> <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />
</Pane>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" /> <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />

View File

@ -13,12 +13,10 @@
<?import javafx.scene.layout.Pane?> <?import javafx.scene.layout.Pane?>
<?import org.controlsfx.control.CheckComboBox?> <?import org.controlsfx.control.CheckComboBox?>
<AnchorPane fx:id="stringAnalysisTabPaneNew2" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111" <AnchorPane fx:id="stringAnalysisTabPaneNew2" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.121" xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.StringAnalysisTabNew2">
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.StringAnalysisTabNew2">
<Pane> <Pane>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="N-gram nivo"/> <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="N-gram nivo" />
<ComboBox fx:id="ngramValueCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0" promptText="izberi" <ComboBox fx:id="ngramValueCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
visibleRowCount="5">
<items> <items>
<FXCollections fx:factory="observableArrayList"> <FXCollections fx:factory="observableArrayList">
<String fx:value="2" /> <String fx:value="2" />
@ -28,9 +26,9 @@
</FXCollections> </FXCollections>
</items> </items>
</ComboBox> </ComboBox>
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izračunaj za"/>
<ComboBox fx:id="calculateForCB" layoutX="100.0" layoutY="60.0" minWidth="180.0" prefWidth="150.0" promptText="izberi" <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za" />
visibleRowCount="5"> <ComboBox fx:id="calculateForCB" layoutX="185.0" layoutY="20.0" minWidth="180.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
<items> <items>
<FXCollections fx:factory="observableArrayList"> <FXCollections fx:factory="observableArrayList">
<String fx:value="lema" /> <String fx:value="lema" />
@ -43,11 +41,11 @@
</ComboBox> </ComboBox>
<Pane fx:id="paneWords" layoutX="0.0" layoutY="100.0" prefHeight="36.0" prefWidth="380.0">
<Pane fx:id="paneWords">
<children> <children>
<Label layoutX="10.0" prefHeight="25.0" text="Preskok besed"/> <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Preskok besed" />
<ComboBox fx:id="skipValueCB" layoutX="100.0" prefWidth="180.0" promptText="izberi" <ComboBox fx:id="skipValueCB" layoutX="185.0" layoutY="100.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
visibleRowCount="5">
<items> <items>
<FXCollections fx:factory="observableArrayList"> <FXCollections fx:factory="observableArrayList">
<String fx:value="0" /> <String fx:value="0" />
@ -62,11 +60,9 @@
</items> </items>
</ComboBox> </ComboBox>
</children> </children>
<children>
<Label layoutX="10.0" layoutY="40.0" prefHeight="25.0" text="Upoštevaj ločila"/>
<CheckBox fx:id="notePunctuationsChB" layoutX="176.0" layoutY="45.0" selected="true"/>
</children>
</Pane> </Pane>
<Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Upoštevaj ločila" />
<CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="145.0" selected="true" />
<!-- MSD and Taxonomy separated --> <!-- MSD and Taxonomy separated -->
@ -74,43 +70,37 @@
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov" /> <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov" />
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD" /> <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD" />
<TextField fx:id="msdTF" layoutX="100.0" layoutY="240.0" prefWidth="180.0"/> <TextField fx:id="msdTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija" /> <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija" />
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0"/> <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Minimalno število pojavitev" /> <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="100.0" layoutY="320.0" prefWidth="180.0" /> <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Minimalno število taksonomij" /> <Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="100.0" layoutY="360.0" prefWidth="180.0" /> <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="360.0" prefWidth="180.0" />
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
</Pane>
<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
<Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />
<!-- samoglasniki/soglasniki --> <!-- samoglasniki/soglasniki -->
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="280.0" prefHeight="84.0" prefWidth="380.0"> <Pane fx:id="paneLetters">
<children> <children>
<CheckBox fx:id="calculatecvvCB" layoutX="10.0" mnemonicParsing="false" prefHeight="25.0" <CheckBox fx:id="calculatecvvCB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" text="Izračunaj za kombinacije samoglasnikov in soglasnikov" />
text="Izračunaj za kombinacije samoglasnikov in soglasnikov"/> <Label layoutX="10.0" layoutY="400.0" prefHeight="25.0" text="Dolžina niza" />
<TextField fx:id="stringLengthTF" layoutX="100.0" layoutY="40.0" prefWidth="180.0"/> <TextField fx:id="stringLengthTF" layoutX="185.0" layoutY="400.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="40.0" prefHeight="25.0" text="Dolžina niza"/>
</children> </children>
</Pane> </Pane>
<Button fx:id="computeNgramsB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
</Pane> </Pane>
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
text=" " wrapText="true"/>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" /> <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
<Button fx:id="cancel" layoutX="540.0" layoutY="482.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Prekini" />
<Button fx:id="cancel" layoutX="540.0" layoutY="482.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Prekini"/>
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0" /> <ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0" />
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0" /> <Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0" />
</AnchorPane> </AnchorPane>

View File

@ -8,15 +8,15 @@
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordFormationTab"> xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordFormationTab">
<Pane> <Pane>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/> <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/> <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Minimalno število pojavitev" /> <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="100.0" layoutY="60.0" prefWidth="180.0" /> <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Minimalno število taksonomij" /> <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="100.0" layoutY="100.0" prefWidth="180.0" /> <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0" />
<Button fx:id="computeB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false" <Button fx:id="computeB" layoutX="10.0" layoutY="422.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/> prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
</Pane> </Pane>

View File

@ -8,13 +8,13 @@
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordLevelTab"> xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordLevelTab">
<Pane> <Pane>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/> <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/> <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Minimalno število pojavitev" /> <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="100.0" layoutY="60.0" prefWidth="180.0" /> <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Minimalno število taksonomij" /> <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="100.0" layoutY="100.0" prefWidth="180.0" /> <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0" />
<Button fx:id="computeB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false" <Button fx:id="computeB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/> prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>

View File

@ -19,66 +19,66 @@ public class Common {
ArrayList<String> taxonomy = new ArrayList<>(); ArrayList<String> taxonomy = new ArrayList<>();
taxonomy.add("#Ft.Z.N.N"); taxonomy.add("#Ft.Z.N.N");
List<Word> words = new ArrayList<>(); List<Word> words = new ArrayList<>();
words.add(new Word("ker", "ker", "Vd", taxonomy)); words.add(new Word("ker", "ker", "Vd"));
words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy)); words.add(new Word("ima", "imeti", "Ggnste-n"));
words.add(new Word("junak", "junak", "Somei", taxonomy)); words.add(new Word("junak", "junak", "Somei"));
words.add(new Word("v", "v", "Dm", taxonomy)); words.add(new Word("v", "v", "Dm"));
words.add(new Word("posesti", "posest", "Sozem", taxonomy)); words.add(new Word("posesti", "posest", "Sozem"));
words.add(new Word("nekaj", "nekaj", "Rsn", taxonomy)); words.add(new Word("nekaj", "nekaj", "Rsn"));
words.add(new Word("o", "o", "Dm", taxonomy)); words.add(new Word("o", "o", "Dm"));
words.add(new Word("čemer", "kar", "Zz-sem", taxonomy)); words.add(new Word("čemer", "kar", "Zz-sem"));
words.add(new Word("se", "se", "Zp------k", taxonomy)); words.add(new Word("se", "se", "Zp------k"));
words.add(new Word("mu", "on", "Zotmed--k", taxonomy)); words.add(new Word("mu", "on", "Zotmed--k"));
words.add(new Word("ne", "ne", "L", taxonomy)); words.add(new Word("ne", "ne", "L"));
words.add(new Word("sanja", "sanjati", "Ggnste", taxonomy)); words.add(new Word("sanja", "sanjati", "Ggnste"));
words.add(new Word("a", "a", "Vp", taxonomy)); words.add(new Word("a", "a", "Vp"));
words.add(new Word("se", "se", "Zp------k", taxonomy)); words.add(new Word("se", "se", "Zp------k"));
words.add(new Word("onemu", "oni", "Zk-sed", taxonomy)); words.add(new Word("onemu", "oni", "Zk-sed"));
words.add(new Word("zdi", "zdeti", "Ggnste", taxonomy)); words.add(new Word("zdi", "zdeti", "Ggnste"));
words.add(new Word("ključno", "ključen", "Ppnsei", taxonomy)); words.add(new Word("ključno", "ključen", "Ppnsei"));
words.add(new Word("pri", "pri", "Dm", taxonomy)); words.add(new Word("pri", "pri", "Dm"));
words.add(new Word("operaciji", "operacija", "Sozem", taxonomy)); words.add(new Word("operaciji", "operacija", "Sozem"));
words.add(new Word("666", "666", "Kag", taxonomy)); words.add(new Word("666", "666", "Kag"));
testSentence = new Sentence(words, "#Ft.Z.N.N"); testSentence = new Sentence(words, taxonomy);
corpus = new ArrayList<>(); corpus = new ArrayList<>();
corpus.add(testSentence); corpus.add(testSentence);
// three word sentence // three word sentence
testSentence = new Sentence(corpus.get(0).getSublist(0, 3), "#Ft.Z.N.N"); testSentence = new Sentence(corpus.get(0).getSublist(0, 3), taxonomy);
minCorpus = new ArrayList<>(); minCorpus = new ArrayList<>();
minCorpus.add(testSentence); minCorpus.add(testSentence);
// five word sentence // five word sentence
words = new ArrayList<>(); words = new ArrayList<>();
words.add(new Word("ker", "ker", "Vd", taxonomy)); words.add(new Word("ker", "ker", "Vd"));
words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy)); words.add(new Word("ima", "imeti", "Ggnste-n"));
words.add(new Word("junak", "junak", "Somei", taxonomy)); words.add(new Word("junak", "junak", "Somei"));
words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy)); words.add(new Word("ima", "imeti", "Ggnste-n"));
words.add(new Word("posesti", "posest", "Sozem", taxonomy)); words.add(new Word("posesti", "posest", "Sozem"));
testSentence = new Sentence(words, "#Ft.Z.N.N"); testSentence = new Sentence(words, taxonomy);
midCorpus = new ArrayList<>(); midCorpus = new ArrayList<>();
midCorpus.add(testSentence); midCorpus.add(testSentence);
// five word sentence - for skipgrams // five word sentence - for skipgrams
words = new ArrayList<>(); words = new ArrayList<>();
words.add(new Word("ker", "ker", "Vd", taxonomy)); words.add(new Word("ker", "ker", "Vd"));
words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy)); words.add(new Word("ima", "imeti", "Ggnste-n"));
words.add(new Word("junak", "junak", "Somei", taxonomy)); words.add(new Word("junak", "junak", "Somei"));
words.add(new Word("v", "v", "Dm", taxonomy)); words.add(new Word("v", "v", "Dm"));
words.add(new Word("posesti", "posest", "Sozem", taxonomy)); words.add(new Word("posesti", "posest", "Sozem"));
testSentence = new Sentence(words, "#Ft.Z.N.N"); testSentence = new Sentence(words, taxonomy);
midCorpusSkip = new ArrayList<>(); midCorpusSkip = new ArrayList<>();
midCorpusSkip.add(testSentence); midCorpusSkip.add(testSentence);
// JOS test // JOS test
words = new ArrayList<>(); words = new ArrayList<>();
words.add(new Word("junak", "junak", "Somei", taxonomy)); words.add(new Word("junak", "junak", "Somei"));
words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy)); words.add(new Word("ima", "imeti", "Ggnste-n"));
words.add(new Word("posesti", "posest", "Sozem", taxonomy)); words.add(new Word("posesti", "posest", "Sozem"));
testSentence = new Sentence(words, "#Ft.Z.N.N"); testSentence = new Sentence(words, taxonomy);
josTest = new ArrayList<>(); josTest = new ArrayList<>();
josTest.add(testSentence); josTest.add(testSentence);

View File

@ -140,9 +140,9 @@ public class NgramTests {
taxonomyResult = stats.getTaxonomyResult(); taxonomyResult = stats.getTaxonomyResult();
assertEquals(3, taxonomyResult.get("Total").size()); assertEquals(3, taxonomyResult.get("Total").size());
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("ker ima junak", "", "", ""))); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("ker ima junak")));
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("ima junak ima", "", "", ""))); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("ima junak ima")));
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("junak ima posesti", "", "", ""))); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("junak ima posesti")));
// tests: // tests:
// - normal ngrams - lemmas // - normal ngrams - lemmas
@ -152,9 +152,9 @@ public class NgramTests {
taxonomyResult = stats.getTaxonomyResult(); taxonomyResult = stats.getTaxonomyResult();
assertEquals(3, taxonomyResult.get("Total").size()); assertEquals(3, taxonomyResult.get("Total").size());
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("ker imeti junak", "", "", ""))); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("ker imeti junak")));
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("imeti junak imeti", "", "", ""))); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("imeti junak imeti")));
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("junak imeti posest", "", "", ""))); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("junak imeti posest")));
// tests: // tests:
// - normal ngrams - msd // - normal ngrams - msd
@ -164,9 +164,9 @@ public class NgramTests {
taxonomyResult = stats.getTaxonomyResult(); taxonomyResult = stats.getTaxonomyResult();
assertEquals(3, taxonomyResult.get("Total").size()); assertEquals(3, taxonomyResult.get("Total").size());
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("Vd Ggnste-n Somei", "", "", ""))); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("Vd Ggnste-n Somei")));
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("Ggnste-n Somei Ggnste-n", "", "", ""))); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("Ggnste-n Somei Ggnste-n")));
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("Somei Ggnste-n Sozem", "", "", ""))); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("Somei Ggnste-n Sozem")));
// tests: // tests:
// - ngrams - word - regex filter // - ngrams - word - regex filter
@ -182,7 +182,7 @@ public class NgramTests {
taxonomyResult = stats.getTaxonomyResult(); taxonomyResult = stats.getTaxonomyResult();
assertEquals(1, taxonomyResult.get("Total").size()); assertEquals(1, taxonomyResult.get("Total").size());
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("junak ima posesti", "", "", ""))); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("junak ima posesti")));
// tests: // tests:
// - ngrams - word - regex filter // - ngrams - word - regex filter
@ -198,7 +198,7 @@ public class NgramTests {
taxonomyResult = stats.getTaxonomyResult(); taxonomyResult = stats.getTaxonomyResult();
assertEquals(1, taxonomyResult.get("Total").size()); assertEquals(1, taxonomyResult.get("Total").size());
assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("ima junak", "", "", ""))); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys1("ima junak")));
} }
@ -316,7 +316,7 @@ public class NgramTests {
Set<String> bigrams = new HashSet<>(Arrays.asList("ker ima", "ima junak", "junak v", "v posesti")); Set<String> bigrams = new HashSet<>(Arrays.asList("ker ima", "ima junak", "junak v", "v posesti"));
Set<MultipleHMKeys> bigramsMultipleHMKeys = taxonomyResult.get("Total").keySet(); Set<MultipleHMKeys> bigramsMultipleHMKeys = taxonomyResult.get("Total").keySet();
Set<String> bigramsActual = new HashSet<>(bigramsMultipleHMKeys.stream().map(MultipleHMKeys::getKey).collect(Collectors.toList())); Set<String> bigramsActual = new HashSet<>(bigramsMultipleHMKeys.stream().map(MultipleHMKeys::getK1).collect(Collectors.toList()));
assertEquals(bigrams, bigramsActual); assertEquals(bigrams, bigramsActual);
// test: // test:
@ -329,7 +329,7 @@ public class NgramTests {
Set<String> twoSkipBigrams = new HashSet<>(Arrays.asList("ker ima", "ker junak", "ker v", "ima junak", "ima v", "ima posesti", "junak v", "junak posesti", "v posesti")); Set<String> twoSkipBigrams = new HashSet<>(Arrays.asList("ker ima", "ker junak", "ker v", "ima junak", "ima v", "ima posesti", "junak v", "junak posesti", "v posesti"));
Set<MultipleHMKeys> twoSkipBigramsMultipleHMKeys = taxonomyResult.get("Total").keySet(); Set<MultipleHMKeys> twoSkipBigramsMultipleHMKeys = taxonomyResult.get("Total").keySet();
Set<String> twoSkipBigramsActual = new HashSet<>(twoSkipBigramsMultipleHMKeys.stream().map(MultipleHMKeys::getKey).collect(Collectors.toList())); Set<String> twoSkipBigramsActual = new HashSet<>(twoSkipBigramsMultipleHMKeys.stream().map(MultipleHMKeys::getK1).collect(Collectors.toList()));
assertEquals(twoSkipBigrams, twoSkipBigramsActual); assertEquals(twoSkipBigrams, twoSkipBigramsActual);
@ -342,7 +342,7 @@ public class NgramTests {
taxonomyResult = stats.getTaxonomyResult(); taxonomyResult = stats.getTaxonomyResult();
Set<String> trigrams = new HashSet<>(Arrays.asList("ker ima junak", "ima junak v", "junak v posesti")); Set<String> trigrams = new HashSet<>(Arrays.asList("ker ima junak", "ima junak v", "junak v posesti"));
Set<MultipleHMKeys> trigramsMultipleHMKeys = taxonomyResult.get("Total").keySet(); Set<MultipleHMKeys> trigramsMultipleHMKeys = taxonomyResult.get("Total").keySet();
Set<String> trigramsActual = new HashSet<>(trigramsMultipleHMKeys.stream().map(MultipleHMKeys::getKey).collect(Collectors.toList())); Set<String> trigramsActual = new HashSet<>(trigramsMultipleHMKeys.stream().map(MultipleHMKeys::getK1).collect(Collectors.toList()));
assertEquals(trigrams, trigramsActual); assertEquals(trigrams, trigramsActual);
@ -355,7 +355,7 @@ public class NgramTests {
taxonomyResult = stats.getTaxonomyResult(); taxonomyResult = stats.getTaxonomyResult();
HashSet<String> twoSkipTrigrams = new HashSet<>(Arrays.asList("ker ima junak", "ker ima v", "ker ima posesti", "ker junak v", "ker junak posesti", "ker v posesti", "ima junak v", "ima junak posesti", "ima v posesti", "junak v posesti")); HashSet<String> twoSkipTrigrams = new HashSet<>(Arrays.asList("ker ima junak", "ker ima v", "ker ima posesti", "ker junak v", "ker junak posesti", "ker v posesti", "ima junak v", "ima junak posesti", "ima v posesti", "junak v posesti"));
Set<MultipleHMKeys> twoSkipTrigramsMultipleHMKeys = taxonomyResult.get("Total").keySet(); Set<MultipleHMKeys> twoSkipTrigramsMultipleHMKeys = taxonomyResult.get("Total").keySet();
Set<String> twoSkipTrigramsActual = new HashSet<>(twoSkipTrigramsMultipleHMKeys.stream().map(MultipleHMKeys::getKey).collect(Collectors.toList())); Set<String> twoSkipTrigramsActual = new HashSet<>(twoSkipTrigramsMultipleHMKeys.stream().map(MultipleHMKeys::getK1).collect(Collectors.toList()));
assertEquals(twoSkipTrigrams, twoSkipTrigramsActual); assertEquals(twoSkipTrigrams, twoSkipTrigramsActual);
} }