Fixed slow combination of words and lemmas presentation

This commit is contained in:
Luka 2018-07-17 16:04:26 +02:00
parent c073e12f55
commit 84d0086a66
7 changed files with 113 additions and 73 deletions

View File

@ -44,11 +44,14 @@ public class Ngrams {
// generate proper MultipleHMKeys depending on filter data // generate proper MultipleHMKeys depending on filter data
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor()); String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor());
// String key = "aaaaaaaaaaaaaaaaaaaaaaa";
String lemma = ""; String lemma = "";
String wordType = ""; String wordType = "";
String msd = ""; String msd = "";
for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){ for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){
if(otherKey.toString().equals("lema")){ if(otherKey.toString().equals("lema")){
// lemma = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
lemma = wordToString(ngramCandidate, otherKey); lemma = wordToString(ngramCandidate, otherKey);
} else if(otherKey.toString().equals("besedna vrsta")){ } else if(otherKey.toString().equals("besedna vrsta")){
wordType = wordToString(ngramCandidate, otherKey).substring(0, 1); wordType = wordToString(ngramCandidate, otherKey).substring(0, 1);
@ -222,7 +225,8 @@ public class Ngrams {
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats) { private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats) {
// count if no regex is set or if it is & candidate passes it // count if no regex is set or if it is & candidate passes it
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) { if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) {
stats.updateResults(wordToString(skipgramCandidate, stats.getFilter().getCalculateFor())); stats.updateTaxonomyResults(new MultipleHMKeys(wordToString(skipgramCandidate, stats.getFilter().getCalculateFor()), "", "", ""),
stats.getCorpus().getTaxonomy());
} }
} }
} }

View File

@ -1,10 +1,13 @@
package data; package data;
import java.util.Objects;
/* /*
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously. Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
*/ */
public final class MultipleHMKeys { public final class MultipleHMKeys {
private final String key, lemma, wordType, msd; private final String key, lemma, wordType, msd;
private MultipleHMKeys actual_obj;
public MultipleHMKeys(String key) { public MultipleHMKeys(String key) {
this.key = key; this.key = key;
this.lemma = ""; this.lemma = "";
@ -37,12 +40,7 @@ public final class MultipleHMKeys {
@Override @Override
public int hashCode() { public int hashCode() {
// if(key2 == null){ return Objects.hash(key, lemma, wordType, msd);
// return key1.hashCode();
// } else if (key3 == null){
// return key1.hashCode() ^ key2.hashCode();
// }
return key.hashCode() ^ lemma.hashCode() ^ wordType.hashCode() ^ msd.hashCode();
} }
@Override @Override

View File

@ -304,6 +304,10 @@ public class StatisticsNew {
} }
public Map<String, Map<MultipleHMKeys, AtomicLong>> getTaxonomyResult() {
return taxonomyResult;
}
public void updateResults(String o) { public void updateResults(String o) {
// if not in map // if not in map
AtomicLong r = result.putIfAbsent(o, new AtomicLong(1)); AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));

View File

@ -16,27 +16,29 @@ public class Common {
Sentence testSentence; Sentence testSentence;
// full sentence // full sentence
ArrayList<String> taxonomy = new ArrayList<>();
taxonomy.add("#Ft.Z.N.N");
List<Word> words = new ArrayList<>(); List<Word> words = new ArrayList<>();
words.add(new Word("ker", "ker", "Vd")); words.add(new Word("ker", "ker", "Vd", taxonomy));
words.add(new Word("ima", "imeti", "Ggnste-n")); words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy));
words.add(new Word("junak", "junak", "Somei")); words.add(new Word("junak", "junak", "Somei", taxonomy));
words.add(new Word("v", "v", "Dm")); words.add(new Word("v", "v", "Dm", taxonomy));
words.add(new Word("posesti", "posest", "Sozem")); words.add(new Word("posesti", "posest", "Sozem", taxonomy));
words.add(new Word("nekaj", "nekaj", "Rsn")); words.add(new Word("nekaj", "nekaj", "Rsn", taxonomy));
words.add(new Word("o", "o", "Dm")); words.add(new Word("o", "o", "Dm", taxonomy));
words.add(new Word("čemer", "kar", "Zz-sem")); words.add(new Word("čemer", "kar", "Zz-sem", taxonomy));
words.add(new Word("se", "se", "Zp------k")); words.add(new Word("se", "se", "Zp------k", taxonomy));
words.add(new Word("mu", "on", "Zotmed--k")); words.add(new Word("mu", "on", "Zotmed--k", taxonomy));
words.add(new Word("ne", "ne", "L")); words.add(new Word("ne", "ne", "L", taxonomy));
words.add(new Word("sanja", "sanjati", "Ggnste")); words.add(new Word("sanja", "sanjati", "Ggnste", taxonomy));
words.add(new Word("a", "a", "Vp")); words.add(new Word("a", "a", "Vp", taxonomy));
words.add(new Word("se", "se", "Zp------k")); words.add(new Word("se", "se", "Zp------k", taxonomy));
words.add(new Word("onemu", "oni", "Zk-sed")); words.add(new Word("onemu", "oni", "Zk-sed", taxonomy));
words.add(new Word("zdi", "zdeti", "Ggnste")); words.add(new Word("zdi", "zdeti", "Ggnste", taxonomy));
words.add(new Word("ključno", "ključen", "Ppnsei")); words.add(new Word("ključno", "ključen", "Ppnsei", taxonomy));
words.add(new Word("pri", "pri", "Dm")); words.add(new Word("pri", "pri", "Dm", taxonomy));
words.add(new Word("operaciji", "operacija", "Sozem")); words.add(new Word("operaciji", "operacija", "Sozem", taxonomy));
words.add(new Word("666", "666", "Kag")); words.add(new Word("666", "666", "Kag", taxonomy));
testSentence = new Sentence(words, "#Ft.Z.N.N"); testSentence = new Sentence(words, "#Ft.Z.N.N");
corpus = new ArrayList<>(); corpus = new ArrayList<>();
@ -49,11 +51,11 @@ public class Common {
// five word sentence // five word sentence
words = new ArrayList<>(); words = new ArrayList<>();
words.add(new Word("ker", "ker", "Vd")); words.add(new Word("ker", "ker", "Vd", taxonomy));
words.add(new Word("ima", "imeti", "Ggnste-n")); words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy));
words.add(new Word("junak", "junak", "Somei")); words.add(new Word("junak", "junak", "Somei", taxonomy));
words.add(new Word("ima", "imeti", "Ggnste-n")); words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy));
words.add(new Word("posesti", "posest", "Sozem")); words.add(new Word("posesti", "posest", "Sozem", taxonomy));
testSentence = new Sentence(words, "#Ft.Z.N.N"); testSentence = new Sentence(words, "#Ft.Z.N.N");
midCorpus = new ArrayList<>(); midCorpus = new ArrayList<>();
@ -61,11 +63,11 @@ public class Common {
// five word sentence - for skipgrams // five word sentence - for skipgrams
words = new ArrayList<>(); words = new ArrayList<>();
words.add(new Word("ker", "ker", "Vd")); words.add(new Word("ker", "ker", "Vd", taxonomy));
words.add(new Word("ima", "imeti", "Ggnste-n")); words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy));
words.add(new Word("junak", "junak", "Somei")); words.add(new Word("junak", "junak", "Somei", taxonomy));
words.add(new Word("v", "v", "Dm")); words.add(new Word("v", "v", "Dm", taxonomy));
words.add(new Word("posesti", "posest", "Sozem")); words.add(new Word("posesti", "posest", "Sozem", taxonomy));
testSentence = new Sentence(words, "#Ft.Z.N.N"); testSentence = new Sentence(words, "#Ft.Z.N.N");
midCorpusSkip = new ArrayList<>(); midCorpusSkip = new ArrayList<>();
@ -73,9 +75,9 @@ public class Common {
// JOS test // JOS test
words = new ArrayList<>(); words = new ArrayList<>();
words.add(new Word("junak", "junak", "Somei")); words.add(new Word("junak", "junak", "Somei", taxonomy));
words.add(new Word("ima", "imeti", "Ggnste-n")); words.add(new Word("ima", "imeti", "Ggnste-n", taxonomy));
words.add(new Word("posesti", "posest", "Sozem")); words.add(new Word("posesti", "posest", "Sozem", taxonomy));
testSentence = new Sentence(words, "#Ft.Z.N.N"); testSentence = new Sentence(words, "#Ft.Z.N.N");
josTest = new ArrayList<>(); josTest = new ArrayList<>();

View File

@ -15,7 +15,7 @@ public class CorpusTests {
public void solarTest() { public void solarTest() {
//File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/Solar"); //File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/Solar");
// File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/GOS"); // File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/GOS");
File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/Gigafida_subset"); File selectedDirectory = new File("/home/luka/Developement/corpus-analyzer2/src/main/resources/Gigafida_subset");
Settings.resultsFilePath = new File(selectedDirectory.getAbsolutePath().concat(File.separator)); Settings.resultsFilePath = new File(selectedDirectory.getAbsolutePath().concat(File.separator));

View File

@ -3,7 +3,9 @@ import static org.junit.Assert.*;
import java.util.*; import java.util.*;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javafx.collections.FXCollections;
import org.junit.Test; import org.junit.Test;
import alg.ngram.Ngrams; import alg.ngram.Ngrams;
@ -21,10 +23,17 @@ public class NgramTests {
filter.setStringLength(4); filter.setStringLength(4);
filter.setNgramValue(0); // letters filter.setNgramValue(0); // letters
filter.setCalculateFor(CalculateFor.WORD); filter.setCalculateFor(CalculateFor.WORD);
ArrayList<String> tax= new ArrayList<>();
tax.add("SSJ.T.P.C");
filter.setTaxonomy(tax);
Corpus testCorpus = new Corpus(); Corpus testCorpus = new Corpus();
testCorpus.setCorpusType(CorpusType.GIGAFIDA); testCorpus.setCorpusType(CorpusType.GIGAFIDA);
testCorpus.setDetectedCorpusFiles(new ArrayList<>()); testCorpus.setDetectedCorpusFiles(new ArrayList<>());
ArrayList<String> taxForCombo = new ArrayList<>();
taxForCombo.add("SSJ.T.P.C");
testCorpus.setTaxonomy(FXCollections.observableArrayList(taxForCombo));
// tests: // tests:
// - no regex // - no regex
@ -103,15 +112,24 @@ public class NgramTests {
@Test @Test
public void wordsNgramsTest() { public void wordsNgramsTest() {
Map<String, AtomicLong> result = null; Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult;
Filter filter = new Filter(); Filter filter = new Filter();
filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setNgramValue(3); filter.setNgramValue(3);
ArrayList<String> tax= new ArrayList<>();
tax.add("SSJ.T.P.C");
filter.setTaxonomy(tax);
ArrayList<String> mKeys = new ArrayList<>();
//mKeys.add("lema");
filter.setMultipleKeys(mKeys);
Corpus testCorpus = new Corpus(); Corpus testCorpus = new Corpus();
testCorpus.setCorpusType(CorpusType.GIGAFIDA); testCorpus.setCorpusType(CorpusType.GIGAFIDA);
testCorpus.setDetectedCorpusFiles(new ArrayList<>()); testCorpus.setDetectedCorpusFiles(new ArrayList<>());
ArrayList<String> taxForCombo = new ArrayList<>();
taxForCombo.add("SSJ.T.P.C");
testCorpus.setTaxonomy(FXCollections.observableArrayList(taxForCombo));
// tests: // tests:
// - normal ngrams - word // - normal ngrams - word
@ -119,36 +137,36 @@ public class NgramTests {
filter.setCalculateFor(CalculateFor.WORD); filter.setCalculateFor(CalculateFor.WORD);
StatisticsNew stats = new StatisticsNew(testCorpus, filter, false); StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats); Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult(); taxonomyResult = stats.getTaxonomyResult();
assertEquals(3, result.size()); assertEquals(3, taxonomyResult.get("Total").size());
assertTrue(result.containsKey("ker ima junak")); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("ker ima junak", "", "", "")));
assertTrue(result.containsKey("ima junak ima")); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("ima junak ima", "", "", "")));
assertTrue(result.containsKey("junak ima posesti")); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("junak ima posesti", "", "", "")));
// tests: // tests:
// - normal ngrams - lemmas // - normal ngrams - lemmas
filter.setCalculateFor(CalculateFor.LEMMA); filter.setCalculateFor(CalculateFor.LEMMA);
stats = new StatisticsNew(testCorpus, filter, false); stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats); Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult(); taxonomyResult = stats.getTaxonomyResult();
assertEquals(3, result.size()); assertEquals(3, taxonomyResult.get("Total").size());
assertTrue(result.containsKey("ker imeti junak")); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("ker imeti junak", "", "", "")));
assertTrue(result.containsKey("imeti junak imeti")); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("imeti junak imeti", "", "", "")));
assertTrue(result.containsKey("junak imeti posest")); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("junak imeti posest", "", "", "")));
// tests: // tests:
// - normal ngrams - msd // - normal ngrams - msd
filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY); filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
stats = new StatisticsNew(testCorpus, filter, false); stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats); Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult(); taxonomyResult = stats.getTaxonomyResult();
assertEquals(3, result.size()); assertEquals(3, taxonomyResult.get("Total").size());
assertTrue(result.containsKey("Vd Ggnste-n Somei")); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("Vd Ggnste-n Somei", "", "", "")));
assertTrue(result.containsKey("Ggnste-n Somei Ggnste-n")); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("Ggnste-n Somei Ggnste-n", "", "", "")));
assertTrue(result.containsKey("Somei Ggnste-n Sozem")); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("Somei Ggnste-n Sozem", "", "", "")));
// tests: // tests:
// - ngrams - word - regex filter // - ngrams - word - regex filter
@ -161,10 +179,10 @@ public class NgramTests {
stats = new StatisticsNew(testCorpus, filter, false); stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats); Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult(); taxonomyResult = stats.getTaxonomyResult();
assertEquals(1, result.size()); assertEquals(1, taxonomyResult.get("Total").size());
assertTrue(result.containsKey("junak ima posesti")); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("junak ima posesti", "", "", "")));
// tests: // tests:
// - ngrams - word - regex filter // - ngrams - word - regex filter
@ -177,10 +195,10 @@ public class NgramTests {
stats = new StatisticsNew(testCorpus, filter, false); stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats); Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult(); taxonomyResult = stats.getTaxonomyResult();
assertEquals(1, result.size()); assertEquals(1, taxonomyResult.get("Total").size());
assertTrue(result.containsKey("ima junak")); assertTrue(taxonomyResult.get("Total").containsKey(new MultipleHMKeys("ima junak", "", "", "")));
} }
@ -273,25 +291,32 @@ public class NgramTests {
@Test @Test
public void skipgramsTest() { public void skipgramsTest() {
Map<String, AtomicLong> result = null; Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult;
Filter filter = new Filter(); Filter filter = new Filter();
filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setCalculateFor(CalculateFor.WORD); filter.setCalculateFor(CalculateFor.WORD);
ArrayList<String> tax= new ArrayList<>();
tax.add("SSJ.T.P.C");
filter.setTaxonomy(tax);
Corpus testCorpus = new Corpus(); Corpus testCorpus = new Corpus();
testCorpus.setCorpusType(CorpusType.GIGAFIDA); testCorpus.setCorpusType(CorpusType.GIGAFIDA);
testCorpus.setDetectedCorpusFiles(new ArrayList<>()); testCorpus.setDetectedCorpusFiles(new ArrayList<>());
ArrayList<String> taxForCombo = new ArrayList<>();
taxForCombo.add("tisk-periodično-časopis");
testCorpus.setTaxonomy(FXCollections.observableArrayList(taxForCombo));
// tests: // tests:
// - bigrams // - bigrams
filter.setNgramValue(2); filter.setNgramValue(2);
StatisticsNew stats = new StatisticsNew(testCorpus, filter, false); StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpusSkip, stats); Ngrams.calculateForAll(Common.midCorpusSkip, stats);
result = stats.getResult(); taxonomyResult = stats.getTaxonomyResult();
Set<String> bigrams = new HashSet<>(Arrays.asList("ker ima", "ima junak", "junak v", "v posesti")); Set<String> bigrams = new HashSet<>(Arrays.asList("ker ima", "ima junak", "junak v", "v posesti"));
Set<String> bigramsActual = result.keySet(); Set<MultipleHMKeys> bigramsMultipleHMKeys = taxonomyResult.get("Total").keySet();
Set<String> bigramsActual = new HashSet<>(bigramsMultipleHMKeys.stream().map(MultipleHMKeys::getKey).collect(Collectors.toList()));
assertEquals(bigrams, bigramsActual); assertEquals(bigrams, bigramsActual);
// test: // test:
@ -300,10 +325,11 @@ public class NgramTests {
filter.setSkipValue(2); filter.setSkipValue(2);
stats = new StatisticsNew(testCorpus, filter, false); stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpusSkip, stats); Ngrams.calculateForAll(Common.midCorpusSkip, stats);
result = stats.getResult(); taxonomyResult = stats.getTaxonomyResult();
Set<String> twoSkipBigrams = new HashSet<>(Arrays.asList("ker ima", "ker junak", "ker v", "ima junak", "ima v", "ima posesti", "junak v", "junak posesti", "v posesti")); Set<String> twoSkipBigrams = new HashSet<>(Arrays.asList("ker ima", "ker junak", "ker v", "ima junak", "ima v", "ima posesti", "junak v", "junak posesti", "v posesti"));
Set<String> twoSkipBigramsActual = result.keySet(); Set<MultipleHMKeys> twoSkipBigramsMultipleHMKeys = taxonomyResult.get("Total").keySet();
Set<String> twoSkipBigramsActual = new HashSet<>(twoSkipBigramsMultipleHMKeys.stream().map(MultipleHMKeys::getKey).collect(Collectors.toList()));
assertEquals(twoSkipBigrams, twoSkipBigramsActual); assertEquals(twoSkipBigrams, twoSkipBigramsActual);
@ -313,9 +339,10 @@ public class NgramTests {
filter.setSkipValue(null); filter.setSkipValue(null);
stats = new StatisticsNew(testCorpus, filter, false); stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpusSkip, stats); Ngrams.calculateForAll(Common.midCorpusSkip, stats);
result = stats.getResult(); taxonomyResult = stats.getTaxonomyResult();
Set<String> trigrams = new HashSet<>(Arrays.asList("ker ima junak", "ima junak v", "junak v posesti")); Set<String> trigrams = new HashSet<>(Arrays.asList("ker ima junak", "ima junak v", "junak v posesti"));
Set<String> trigramsActual = result.keySet(); Set<MultipleHMKeys> trigramsMultipleHMKeys = taxonomyResult.get("Total").keySet();
Set<String> trigramsActual = new HashSet<>(trigramsMultipleHMKeys.stream().map(MultipleHMKeys::getKey).collect(Collectors.toList()));
assertEquals(trigrams, trigramsActual); assertEquals(trigrams, trigramsActual);
@ -325,9 +352,10 @@ public class NgramTests {
filter.setSkipValue(2); filter.setSkipValue(2);
stats = new StatisticsNew(testCorpus, filter, false); stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpusSkip, stats); Ngrams.calculateForAll(Common.midCorpusSkip, stats);
result = stats.getResult(); taxonomyResult = stats.getTaxonomyResult();
HashSet<String> twoSkipTrigrams = new HashSet<>(Arrays.asList("ker ima junak", "ker ima v", "ker ima posesti", "ker junak v", "ker junak posesti", "ker v posesti", "ima junak v", "ima junak posesti", "ima v posesti", "junak v posesti")); HashSet<String> twoSkipTrigrams = new HashSet<>(Arrays.asList("ker ima junak", "ker ima v", "ker ima posesti", "ker junak v", "ker junak posesti", "ker v posesti", "ima junak v", "ima junak posesti", "ima v posesti", "junak v posesti"));
Set<String> twoSkipTrigramsActual = result.keySet(); Set<MultipleHMKeys> twoSkipTrigramsMultipleHMKeys = taxonomyResult.get("Total").keySet();
Set<String> twoSkipTrigramsActual = new HashSet<>(twoSkipTrigramsMultipleHMKeys.stream().map(MultipleHMKeys::getKey).collect(Collectors.toList()));
assertEquals(twoSkipTrigrams, twoSkipTrigramsActual); assertEquals(twoSkipTrigrams, twoSkipTrigramsActual);
} }

View File

@ -3,6 +3,7 @@ import java.util.ArrayList;
import java.util.Map; import java.util.Map;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import javafx.collections.FXCollections;
import org.junit.Test; import org.junit.Test;
import alg.inflectedJOS.WordFormation; import alg.inflectedJOS.WordFormation;
@ -22,6 +23,9 @@ public class WordFormationTest {
Corpus testCorpus = new Corpus(); Corpus testCorpus = new Corpus();
testCorpus.setCorpusType(CorpusType.GIGAFIDA); testCorpus.setCorpusType(CorpusType.GIGAFIDA);
testCorpus.setDetectedCorpusFiles(new ArrayList<>()); testCorpus.setDetectedCorpusFiles(new ArrayList<>());
ArrayList<String> taxForCombo = new ArrayList<>();
taxForCombo.add("tisk-periodično-časopis");
testCorpus.setTaxonomy(FXCollections.observableArrayList(taxForCombo));
// tests: // tests:
// - normal ngrams - word // - normal ngrams - word