package alg.word; import static data.Enums.WordLevelDefaultValues.*; import java.util.HashSet; import java.util.List; import org.apache.commons.lang3.StringUtils; import data.Enums.WordLevelDefaultValues; import data.Enums.WordLevelType; import data.Sentence; import data.StatisticsNew; import data.Word; @SuppressWarnings("Duplicates") public class WordLevel { private static HashSet suffixes; private static int minSuffixLength; private static int maxSuffixLength; private static HashSet prefixes; private static int minPrefixLength; private static int maxPrefixLength; static { suffixes = WordLevelDefaultValues.getSuffixes(); calculateSuffixesLengths(); prefixes = WordLevelDefaultValues.getPrefixes(); calculatePrefixesLengths(); } public static void calculateForAll(List corpus, StatisticsNew stats) { for (Sentence s : corpus) { for (Word word : s.getWords()) { calculateForSuffixes(word.getWord(), stats); calculateForPrefixes(word.getWord(), stats); } } } private static void calculateForPrefixes(String word, StatisticsNew stats) { for (int tmpPrefixLength = maxPrefixLength; tmpPrefixLength >= minPrefixLength; tmpPrefixLength++) { if (word.length() - tmpPrefixLength < MIN_N_OF_CHARACTERS_LEFT_PREFIX) { return; } String extractedPrefix = StringUtils.left(word, tmpPrefixLength); if (prefixes.contains(extractedPrefix)) { // save suffix and full word stats.updateResultsNested(WordLevelType.PREFIX, extractedPrefix, word); return; } } } public static void calculateForSuffixes(String word, StatisticsNew stats) { for (int tmpSuffixLength = maxSuffixLength; tmpSuffixLength >= minSuffixLength; tmpSuffixLength++) { // preveri, da je beseda - cuttan suffix daljši od prednastavljene vrednosti // ker gremo od najdaljše opcije k najkrajši, se ob dosegu tega pogoja lahko zaključi računanje za trenutno besedo if (word.length() - tmpSuffixLength < MIN_N_OF_CHARACTERS_LEFT_SUFFIX) { return; } String extractedSuffix = StringUtils.right(word, tmpSuffixLength); if (suffixes.contains(extractedSuffix)) { // save suffix and full word stats.updateResultsNested(WordLevelType.SUFFIX, extractedSuffix, word); return; } } } // finds the shortest and longest suffix for quicker calculations public static void calculateSuffixesLengths() { minSuffixLength = -1; maxSuffixLength = -1; for (String suffix : suffixes) { if (suffix.length() > maxSuffixLength) { maxSuffixLength = suffix.length(); if (minSuffixLength < 0) { minSuffixLength = maxSuffixLength; } } else if (suffix.length() < minSuffixLength) { minSuffixLength = suffix.length(); } } } // finds the shortest and longest suffix for quicker calculations public static void calculatePrefixesLengths() { minPrefixLength = -1; maxPrefixLength = -1; for (String prefix : prefixes) { if (prefix.length() > maxPrefixLength) { maxPrefixLength = prefix.length(); if (minPrefixLength < 0) { minPrefixLength = maxPrefixLength; } } else if (prefix.length() < minPrefixLength) { minPrefixLength = prefix.length(); } } } }