Added some optimizations and new taxonomy names

This commit is contained in:
2018-08-31 07:57:58 +02:00
parent 1c00f1a283
commit 426a9ccc46
21 changed files with 1345 additions and 1182 deletions

View File

@@ -8,6 +8,7 @@ import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import javafx.collections.FXCollections;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -15,6 +16,7 @@ import org.apache.logging.log4j.Logger;
import data.Enums.solar.SolarFilters;
import gui.ValidationUtil;
import javafx.collections.ObservableList;
import org.controlsfx.control.CheckComboBox;
public class Corpus {
public final static Logger logger = LogManager.getLogger(Corpus.class);
@@ -82,6 +84,11 @@ public class Corpus {
public ObservableList<String> getTaxonomy() {
return taxonomy;
}
//
// public ObservableList<String> getFormattedTaxonomy() {
// ArrayList<String> al = Tax.getTaxonomyFormatted(new ArrayList<>(taxonomy), corpusType);
// return FXCollections.observableArrayList(al);
// }
public void setTaxonomy(ObservableList<String> taxonomy) {
this.taxonomy = taxonomy;

View File

@@ -2,10 +2,7 @@ package data;
import static data.Filter.filterName.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.*;
import java.util.regex.Pattern;
import gui.ValidationUtil;
@@ -17,6 +14,7 @@ public class Filter {
public enum filterName {
ANALYSIS_LEVEL,
CALCULATE_FOR,
WORD_PARTS,
NGRAM_VALUE,
SKIP_VALUE,
IS_CVV,
@@ -36,6 +34,7 @@ public class Filter {
public Filter() {
filter = new HashMap<>();
filter.put(WRITE_MSD_AT_THE_END, false);
filter.put(WORD_PARTS, new ArrayList<CalculateFor>());
}
public Filter(AnalysisLevel al, CalculateFor cf) {
@@ -43,6 +42,10 @@ public class Filter {
filter.put(ANALYSIS_LEVEL, al);
filter.put(CALCULATE_FOR, cf);
filter.put(WORD_PARTS, new ArrayList<CalculateFor>());
addWordPart(cf);
filter.put(WRITE_MSD_AT_THE_END, false);
}
@@ -56,6 +59,8 @@ public class Filter {
public void setCalculateFor(CalculateFor cf) {
filter.put(CALCULATE_FOR, cf);
filter.put(WORD_PARTS, new ArrayList<CalculateFor>());
addWordPart(cf);
}
public CalculateFor getCalculateFor() {
@@ -137,6 +142,8 @@ public class Filter {
public void setHasMsd(boolean hasMsd) {
filter.put(HAS_MSD, hasMsd);
if (hasMsd)
addWordPart(CalculateFor.MORPHOSYNTACTIC_SPECS);
}
public boolean hasMsd() {
@@ -170,7 +177,9 @@ public class Filter {
ArrayList<CalculateFor> newKeys = new ArrayList<>();
if (keys != null) {
for (String key : keys) {
newKeys.add(CalculateFor.factory(key));
CalculateFor cf = CalculateFor.factory(key);
newKeys.add(cf);
addWordPart(cf);
}
}
@@ -185,6 +194,14 @@ public class Filter {
}
}
public ArrayList<CalculateFor> getWordParts() {
if (filter.containsKey(WORD_PARTS) && filter.get(WORD_PARTS) != null) {
return (ArrayList<CalculateFor>) filter.get(WORD_PARTS);
} else {
return new ArrayList<>();
}
}
public void setNotePunctuations(boolean notePunctuations) {
filter.put(NOTE_PUNCTUATIONS, notePunctuations);
}
@@ -209,4 +226,32 @@ public class Filter {
public Integer getMinimalTaxonomy() {
return (Integer) filter.get(MINIMAL_TAXONOMY);
}
private void addWordPart(CalculateFor wp){
ArrayList<CalculateFor> oldWp = ((ArrayList<CalculateFor>) filter.get(WORD_PARTS));
switch (wp) {
case WORD:
case DIST_WORDS:
if (!oldWp.contains(CalculateFor.WORD))
oldWp.add(CalculateFor.WORD);
break;
case LEMMA:
case DIST_LEMMAS:
if (!oldWp.contains(CalculateFor.LEMMA))
oldWp.add(CalculateFor.LEMMA);
break;
case MORPHOSYNTACTIC_PROPERTY:
case MORPHOSYNTACTIC_SPECS:
case WORD_TYPE:
if (!oldWp.contains(CalculateFor.MORPHOSYNTACTIC_SPECS))
oldWp.add(CalculateFor.MORPHOSYNTACTIC_SPECS);
break;
case NORMALIZED_WORD:
if (!oldWp.contains(CalculateFor.NORMALIZED_WORD))
oldWp.add(CalculateFor.NORMALIZED_WORD);
break;
}
}
}

View File

@@ -16,67 +16,67 @@ public class Tax {
// GIGAFIDA ----------------------------
GIGAFIDA_TAXONOMY = new LinkedHashMap<>();
GIGAFIDA_TAXONOMY.put("SSJ.T", "tisk");
GIGAFIDA_TAXONOMY.put("SSJ.T.K", "tisk-knjižno");
GIGAFIDA_TAXONOMY.put("SSJ.T.K.L", "tisk-knjižno-leposlovno");
GIGAFIDA_TAXONOMY.put("SSJ.T.K.S", "tisk-knjižno-strokovno");
GIGAFIDA_TAXONOMY.put("SSJ.T.P", "tisk-periodično");
GIGAFIDA_TAXONOMY.put("SSJ.T.P.C", "tisk-periodično-časopis");
GIGAFIDA_TAXONOMY.put("SSJ.T.P.R", "tisk-periodično-revija");
GIGAFIDA_TAXONOMY.put("SSJ.T.D", "tisk-drugo");
GIGAFIDA_TAXONOMY.put("SSJ.I", "internet");
GIGAFIDA_TAXONOMY.put("SSJ.T", "SSJ.T - tisk");
GIGAFIDA_TAXONOMY.put("SSJ.T.K", "SSJ.T.K - tisk-knjižno");
GIGAFIDA_TAXONOMY.put("SSJ.T.K.L", "SSJ.T.K.L - tisk-knjižno-leposlovno");
GIGAFIDA_TAXONOMY.put("SSJ.T.K.S", "SSJ.T.K.S - tisk-knjižno-strokovno");
GIGAFIDA_TAXONOMY.put("SSJ.T.P", "SSJ.T.P - tisk-periodično");
GIGAFIDA_TAXONOMY.put("SSJ.T.P.C", "SSJ.T.P.C - tisk-periodično-časopis");
GIGAFIDA_TAXONOMY.put("SSJ.T.P.R", "SSJ.T.P.R - tisk-periodično-revija");
GIGAFIDA_TAXONOMY.put("SSJ.T.D", "SSJ.T.D - tisk-drugo");
GIGAFIDA_TAXONOMY.put("SSJ.I", "SSJ.I - internet");
GIGAFIDA_TAXONOMY.put("Ft.P", "prenosnik");
GIGAFIDA_TAXONOMY.put("Ft.P.G", "prenosnik-govorni");
GIGAFIDA_TAXONOMY.put("Ft.P.E", "prenosnik-elektronski");
GIGAFIDA_TAXONOMY.put("Ft.P.P", "prenosnik-pisni");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O", "prenosnik-pisni-objavljeno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.K", "prenosnik-pisni-objavljeno-knjižno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P", "prenosnik-pisni-objavljeno-periodično");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C", "prenosnik-pisni-objavljeno-periodično-časopisno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.D", "prenosnik-pisni-objavljeno-periodično-časopisno-dnevno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.V", "prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.T", "prenosnik-pisni-objavljeno-periodično-časopisno-tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R", "prenosnik-pisni-objavljeno-periodično-revialno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.T", "prenosnik-pisni-objavljeno-periodično-revialno-tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.S", "prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.M", "prenosnik-pisni-objavljeno-periodično-revialno-mesečno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.D", "prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.O", "prenosnik-pisni-objavljeno-periodično-revialno-občasno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N", "prenosnik-pisni-neobjavljeno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.J", "prenosnik-pisni-neobjavljeno-javno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.I", "prenosnik-pisni-neobjavljeno-interno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.Z", "prenosnik-pisni-neobjavljeno-zasebno");
GIGAFIDA_TAXONOMY.put("Ft.P", "Ft.P - prenosnik");
GIGAFIDA_TAXONOMY.put("Ft.P.G", "Ft.P.G - prenosnik-govorni");
GIGAFIDA_TAXONOMY.put("Ft.P.E", "Ft.P.E - prenosnik-elektronski");
GIGAFIDA_TAXONOMY.put("Ft.P.P", "Ft.P.P - prenosnik-pisni");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O", "Ft.P.P.O - prenosnik-pisni-objavljeno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.K", "Ft.P.P.O.K - prenosnik-pisni-objavljeno-knjižno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P", "Ft.P.P.O.P - prenosnik-pisni-objavljeno-periodično");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C", "Ft.P.P.O.P.C - prenosnik-pisni-objavljeno-periodično-časopisno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.D", "Ft.P.P.O.P.C.D - prenosnik-pisni-objavljeno-periodično-časopisno-dnevno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.V", "Ft.P.P.O.P.C.V - prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.T", "Ft.P.P.O.P.C.T - prenosnik-pisni-objavljeno-periodično-časopisno-tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R", "Ft.P.P.O.P.R - prenosnik-pisni-objavljeno-periodično-revialno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.T", "Ft.P.P.O.P.R.T - prenosnik-pisni-objavljeno-periodično-revialno-tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.S", "Ft.P.P.O.P.R.S - prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.M", "Ft.P.P.O.P.R.M - prenosnik-pisni-objavljeno-periodično-revialno-mesečno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.D", "Ft.P.P.O.P.R.D - prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.O", "Ft.P.P.O.P.R.O - prenosnik-pisni-objavljeno-periodično-revialno-občasno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N", "Ft.P.P.N - prenosnik-pisni-neobjavljeno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.J", "Ft.P.P.N.J - prenosnik-pisni-neobjavljeno-javno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.I", "Ft.P.P.N.I - prenosnik-pisni-neobjavljeno-interno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.Z", "Ft.P.P.N.Z - prenosnik-pisni-neobjavljeno-zasebno");
GIGAFIDA_TAXONOMY.put("Ft.Z", "zvrst");
GIGAFIDA_TAXONOMY.put("Ft.Z.U", "zvrst-umetnostna");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.P", "zvrst-umetnostna-pesniška");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.R", "zvrst-umetnostna-prozna");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.D", "zvrst-umetnostna-dramska");
GIGAFIDA_TAXONOMY.put("Ft.Z.N", "zvrst-neumetnostna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S", "zvrst-neumetnostna-strokovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.H", "zvrst-neumetnostna-strokovna-humanistična in družboslovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.N", "zvrst-neumetnostna-strokovna-naravoslovna in tehnična");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.N", "zvrst-neumetnostna-nestrokovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.P", "zvrst-neumetnostna-pravna");
GIGAFIDA_TAXONOMY.put("Ft.L", "zvrst-lektorirano");
GIGAFIDA_TAXONOMY.put("Ft.L.D", "zvrst-lektorirano-da");
GIGAFIDA_TAXONOMY.put("Ft.L.N", "zvrst-lektorirano-ne");
GIGAFIDA_TAXONOMY.put("Ft.Z", "Ft.Z - zvrst");
GIGAFIDA_TAXONOMY.put("Ft.Z.U", "Ft.Z.U - zvrst-umetnostna");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.P", "Ft.Z.U.P - zvrst-umetnostna-pesniška");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.R", "Ft.Z.U.R - zvrst-umetnostna-prozna");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.D", "Ft.Z.U.D - zvrst-umetnostna-dramska");
GIGAFIDA_TAXONOMY.put("Ft.Z.N", "Ft.Z.N - zvrst-neumetnostna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S", "Ft.Z.N.S - zvrst-neumetnostna-strokovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.H", "Ft.Z.N.S.H - zvrst-neumetnostna-strokovna-humanistična in družboslovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.N", "Ft.Z.N.S.N - zvrst-neumetnostna-strokovna-naravoslovna in tehnična");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.N", "Ft.Z.N.N - zvrst-neumetnostna-nestrokovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.P", "Ft.Z.N.P - zvrst-neumetnostna-pravna");
GIGAFIDA_TAXONOMY.put("Ft.L", "Ft.L - zvrst-lektorirano");
GIGAFIDA_TAXONOMY.put("Ft.L.D", "Ft.L.D - zvrst-lektorirano-da");
GIGAFIDA_TAXONOMY.put("Ft.L.N", "Ft.L.N - zvrst-lektorirano-ne");
// GOS ----------------------------------
GOS_TAXONOMY = new LinkedHashMap<>();
GOS_TAXONOMY.put("gos.T", "diskurz");
GOS_TAXONOMY.put("gos.T.J", "diskurz-javni");
GOS_TAXONOMY.put("gos.T.J.I", "diskurz-javni-informativno-izobraževalni");
GOS_TAXONOMY.put("gos.T.J.R", "diskurz-javni-razvedrilni");
GOS_TAXONOMY.put("gos.T.N", "diskurz-nejavni");
GOS_TAXONOMY.put("gos.T.N.N", "diskurz-nejavni-nezasebni");
GOS_TAXONOMY.put("gos.T.N.Z", "diskurz-nejavni-zasebni");
GOS_TAXONOMY.put("gos.T", "gos.T - diskurz");
GOS_TAXONOMY.put("gos.T.J", "gos.T.J - diskurz-javni");
GOS_TAXONOMY.put("gos.T.J.I", "gos.T.J.I - diskurz-javni-informativno-izobraževalni");
GOS_TAXONOMY.put("gos.T.J.R", "gos.T.J.R - diskurz-javni-razvedrilni");
GOS_TAXONOMY.put("gos.T.N", "gos.T.N - diskurz-nejavni");
GOS_TAXONOMY.put("gos.T.N.N", "gos.T.N.N - diskurz-nejavni-nezasebni");
GOS_TAXONOMY.put("gos.T.N.Z", "gos.T.N.Z - diskurz-nejavni-zasebni");
GOS_TAXONOMY.put("gos.S", "situacija");
GOS_TAXONOMY.put("gos.S.R", "situacija-radio");
GOS_TAXONOMY.put("gos.S.T", "situacija-televizija");
GOS_TAXONOMY.put("gos.S", "gos.S - situacija");
GOS_TAXONOMY.put("gos.S.R", "gos.S.R - situacija-radio");
GOS_TAXONOMY.put("gos.S.T", "gos.S.T - situacija-televizija");
}
/**
@@ -147,6 +147,33 @@ public class Tax {
return result;
}
// public static ArrayList<String> getTaxonomyFormatted(ArrayList<String> taxonomyNames, CorpusType corpusType) {
// ArrayList<String> result = new ArrayList<>();
//
// if (ValidationUtil.isEmpty(taxonomyNames)) {
// return result;
// }
//
// LinkedHashMap<String, String> tax = new LinkedHashMap<>();
//
// if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
// tax = GIGAFIDA_TAXONOMY;
// } else if (corpusType == CorpusType.GOS) {
// tax = GOS_TAXONOMY;
// }
//
// // for easier lookup
// Map<String, String> taxInversed = tax.entrySet()
// .stream()
// .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
//
// for (String taxonomyName : taxonomyNames) {
// result.add(taxInversed.get(taxonomyName) + " - " + taxonomyName);
// }
//
// return result;
// }
/**
* Returns a list of proper names for codes
*

View File

@@ -1,110 +1,94 @@
package data;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils;
/*
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
*/
public interface Word {
String getW1();
default String getW2(){ return null; }
default String getW3(){ return null; }
default String getW4(){ return null; }
import data.Enums.Msd;
import gui.ValidationUtil;
default String get(ArrayList<CalculateFor> wordParts, CalculateFor cf){
if (wordParts.size() > 0 && wordParts.get(0).equals(cf))
return getW1();
if (wordParts.size() > 1 && wordParts.get(1).equals(cf))
return getW2();
if (wordParts.size() > 2 && wordParts.get(2).equals(cf))
return getW3();
if (wordParts.size() > 3 && wordParts.get(3).equals(cf))
return getW4();
return null;
}
public class Word implements Serializable {
public static final char PAD_CHARACTER = '-';
default String getWord(ArrayList<CalculateFor> wordParts){
return get(wordParts, CalculateFor.WORD);
}
private String word;
private String lemma;
private String msd;
private String normalizedWord;
private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));
default String getLemma(ArrayList<CalculateFor> wordParts){
return get(wordParts, CalculateFor.LEMMA);
}
/**
* Possible values:
* <p>
* <ul>
* <li>S = samostalnik</li>
* <li>G = glagol</li>
* <li>P = pridevnik</li>
* <li>R = prislov</li>
* <li>Z = zaimek</li>
* <li>K = števnik</li>
* <li>D = predlog</li>
* <li>V = veznik</li>
* <li>L = členek</li>
* <li>M = medmet</li>
* <li>O = okrajšava</li>
* <li>N = neuvrščeno</li>
* </ul>
*/
//private char besedna_vrsta;
public Word(String word, String lemma, String msd) {
this.lemma = lemma;
this.msd = msd; //normalizeMsd(msd);
this.normalizedWord = "";
default String getMsd(ArrayList<CalculateFor> wordParts){
return get(wordParts, CalculateFor.MORPHOSYNTACTIC_SPECS);
}
// veliko zacetnico ohranimo samo za lastna imena
if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
&& this.msd.length() >= 2
&& this.msd.charAt(1) == 'l')) {
this.word = word.toLowerCase();
} else {
this.word = word;
default String getNormalizedWord(ArrayList<CalculateFor> wordParts){
return get(wordParts, CalculateFor.NORMALIZED_WORD);
}
void setW1(String w);
default void setW2(String w){}
default void setW3(String w){}
default void setW4(String w){}
default void set(String w, ArrayList<CalculateFor> wordParts, CalculateFor cf){
switch(wordParts.indexOf(cf)){
case 0:
setW1(w);
break;
case 1:
setW2(w);
break;
case 2:
setW3(w);
break;
case 3:
setW4(w);
break;
}
}
public Word(String word, String lemma, String msd, String normalizedWord) {
this.lemma = lemma;
// this.msd = normalizeMsd(msd);
this.msd = msd;
this.normalizedWord = normalizedWord;
// veliko zacetnico ohranimo samo za lastna imena
if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
&& this.msd.length() >= 2
&& this.msd.charAt(1) == 'l')) {
this.word = word.toLowerCase();
} else {
this.word = word;
}
default void setLemma(String w, ArrayList<CalculateFor> wordParts){
set(w, wordParts, CalculateFor.LEMMA);
}
public Word() {
default void setMsd(String w, ArrayList<CalculateFor> wordParts){
set(w, wordParts, CalculateFor.MORPHOSYNTACTIC_SPECS);
}
// /**
// * Appends a number of '-' to msds which are not properly sized.
// * E.g. nouns should have 5 attributes, yet the last one isn't always defined (Somei vs. Sometd)
// *
// * @param msdInput
// *
// * @return
// */
// private String normalizeMsd(String msdInput) {
// if (ValidationUtil.isEmpty(msdInput)) {
// return "";
// } else {
// return StringUtils.rightPad(msdInput, Msd.getMsdLengthForType(msdInput), PAD_CHARACTER);
// }
// }
public Word(String word) {
this.word = word;
default void setNormalizedWord(String w, ArrayList<CalculateFor> wordParts){
set(w, wordParts, CalculateFor.NORMALIZED_WORD);
}
public String getWord() {
return word;
default String getCVVWord(ArrayList<CalculateFor> cf) {
return covertToCvv(getWord(cf));
}
public String getCVVWord() {
return covertToCvv(word);
default String getCVVLemma(ArrayList<CalculateFor> cf) {
return covertToCvv(getLemma(cf));
}
public String getCVVLemma() {
return covertToCvv(lemma);
}
default String covertToCvv(String s) {
final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));
private String covertToCvv(String s) {
char[] StringCA = s.toCharArray();
for (int i = 0; i < StringCA.length; i++) {
@@ -114,59 +98,13 @@ public class Word implements Serializable {
return new String(StringCA);
}
public void setWord(String word) {
this.word = word;
}
public String getLemma() {
return lemma;
}
public void setLemma(String lemma) {
this.lemma = lemma;
}
public String getMsd() {
return msd;
}
public void setMsd(String msd) {
this.msd = msd;
}
public String getNormalizedWord() {
return normalizedWord;
}
public void setNormalizedWord(String normalizedWord) {
this.normalizedWord = normalizedWord;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("beseda:\t")
.append(getWord())
.append("\n")
.append("lema:\t")
.append(getLemma())
.append("\n")
.append("msd:\t")
.append(getMsd())
.append("normalized word:\t")
.append(getNormalizedWord())
.append("\n");
return sb.toString();
}
public String getForCf(CalculateFor calculateFor, boolean cvv) {
default String getForCf(CalculateFor calculateFor, boolean cvv, ArrayList<CalculateFor> cf) {
String returnValue = "";
if (cvv) {
returnValue = calculateFor == CalculateFor.WORD ? getCVVWord() : getCVVLemma();
returnValue = calculateFor == CalculateFor.WORD ? getCVVWord(cf) : getCVVLemma(cf);
} else {
returnValue = calculateFor == CalculateFor.WORD ? getWord() : getLemma();
returnValue = calculateFor == CalculateFor.WORD ? getWord(cf) : getLemma(cf);
}
return returnValue;

17
src/main/java/data/Word1.java Executable file
View File

@@ -0,0 +1,17 @@
package data;
import java.io.Serializable;
public class Word1 implements Serializable, Word {
private String w1;
public Word1(String w1) {
this.w1 = w1;
}
public String getW1() {
return w1;
}
public void setW1(String w){w1 = w;}
}

22
src/main/java/data/Word2.java Executable file
View File

@@ -0,0 +1,22 @@
package data;
import java.io.Serializable;
public class Word2 implements Serializable, Word {
private String w1, w2;
public Word2(String w1, String w2) {
this.w1 = w1;
this.w2 = w2;
}
public String getW1() {
return w1;
}
public String getW2() {
return w2;
}
public void setW1(String w){w1 = w;}
public void setW2(String w){w2 = w;}
}

27
src/main/java/data/Word3.java Executable file
View File

@@ -0,0 +1,27 @@
package data;
import java.io.Serializable;
public class Word3 implements Serializable, Word {
private String w1, w2, w3;
public Word3(String w1, String w2, String w3) {
this.w1 = w1;
this.w2 = w2;
this.w3 = w3;
}
public String getW1() {
return w1;
}
public String getW2() {
return w2;
}
public String getW3() {
return w3;
}
public void setW1(String w){w1 = w;}
public void setW2(String w){w2 = w;}
public void setW3(String w){w3 = w;}
}

32
src/main/java/data/Word4.java Executable file
View File

@@ -0,0 +1,32 @@
package data;
import java.io.Serializable;
public class Word4 implements Serializable, Word {
private String w1, w2, w3, w4;
public Word4(String w1, String w2, String w3, String w4) {
this.w1 = w1;
this.w2 = w2;
this.w3 = w3;
this.w4 = w4;
}
public String getW1() {
return w1;
}
public String getW2() {
return w2;
}
public String getW3() {
return w3;
}
public String getW4() {
return w4;
}
public void setW1(String w){w1 = w;}
public void setW2(String w){w2 = w;}
public void setW3(String w){w3 = w;}
public void setW4(String w){w4 = w;}
}