You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
3.1 KiB

package alg.word;
import static data.Enums.WordLevelDefaultValues.*;
import java.util.HashSet;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import data.Enums.WordLevelDefaultValues;
import data.Enums.WordLevelType;
import data.Sentence;
import data.StatisticsNew;
import data.Word;
@SuppressWarnings("Duplicates")
public class WordLevel {
private static HashSet<String> suffixes;
private static int minSuffixLength;
private static int maxSuffixLength;
private static HashSet<String> prefixes;
private static int minPrefixLength;
private static int maxPrefixLength;
static {
suffixes = WordLevelDefaultValues.getSuffixes();
calculateSuffixesLengths();
prefixes = WordLevelDefaultValues.getPrefixes();
calculatePrefixesLengths();
}
public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats) {
for (Sentence s : corpus) {
for (Word word : s.getWords()) {
calculateForSuffixes(word.getWord(), stats);
calculateForPrefixes(word.getWord(), stats);
}
}
}
private static void calculateForPrefixes(String word, StatisticsNew stats) {
for (int tmpPrefixLength = maxPrefixLength; tmpPrefixLength >= minPrefixLength; tmpPrefixLength++) {
if (word.length() - tmpPrefixLength < MIN_N_OF_CHARACTERS_LEFT_PREFIX) {
return;
}
String extractedPrefix = StringUtils.left(word, tmpPrefixLength);
if (prefixes.contains(extractedPrefix)) {
// save suffix and full word
stats.updateResultsNested(WordLevelType.PREFIX, extractedPrefix, word);
return;
}
}
}
public static void calculateForSuffixes(String word, StatisticsNew stats) {
for (int tmpSuffixLength = maxSuffixLength; tmpSuffixLength >= minSuffixLength; tmpSuffixLength++) {
// preveri, da je beseda - cuttan suffix daljši od prednastavljene vrednosti
// ker gremo od najdaljše opcije k najkrajši, se ob dosegu tega pogoja lahko zaključi računanje za trenutno besedo
if (word.length() - tmpSuffixLength < MIN_N_OF_CHARACTERS_LEFT_SUFFIX) {
return;
}
String extractedSuffix = StringUtils.right(word, tmpSuffixLength);
if (suffixes.contains(extractedSuffix)) {
// save suffix and full word
stats.updateResultsNested(WordLevelType.SUFFIX, extractedSuffix, word);
return;
}
}
}
// finds the shortest and longest suffix for quicker calculations
public static void calculateSuffixesLengths() {
minSuffixLength = -1;
maxSuffixLength = -1;
for (String suffix : suffixes) {
if (suffix.length() > maxSuffixLength) {
maxSuffixLength = suffix.length();
if (minSuffixLength < 0) {
minSuffixLength = maxSuffixLength;
}
} else if (suffix.length() < minSuffixLength) {
minSuffixLength = suffix.length();
}
}
}
// finds the shortest and longest suffix for quicker calculations
public static void calculatePrefixesLengths() {
minPrefixLength = -1;
maxPrefixLength = -1;
for (String prefix : prefixes) {
if (prefix.length() > maxPrefixLength) {
maxPrefixLength = prefix.length();
if (minPrefixLength < 0) {
minPrefixLength = maxPrefixLength;
}
} else if (prefix.length() < minPrefixLength) {
minPrefixLength = prefix.length();
}
}
}
}