package alg.word; import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; import alg.Common; import data.CalculateFor; import data.Sentence; import data.Statistics; import data.Word; class WordCount { private static void calculateNoFilter(List corpus, Statistics stats) { for (Sentence s : corpus) { List sentence = new ArrayList<>(s.getWords().size()); if (stats.getCf() == CalculateFor.LEMMA) { sentence.addAll(s.getWords() .stream() .map(Word::getLemma) .collect(Collectors.toList())); } else if (stats.getCf() == CalculateFor.WORD) { sentence.addAll(s.getWords() .stream() .map(Word::getWord) .collect(Collectors.toList())); } for (String word : sentence) { Common.updateMap(stats.result, word); } } } private static void calculateVCC(List corpus, Statistics stats) { for (Sentence s : corpus) { List sentence = new ArrayList<>(s.getWords().size()); if (stats.getCf() == CalculateFor.LEMMA) { sentence.addAll(s.getWords() .stream() .map(Word::getCVVLemma) .collect(Collectors.toList())); } else if (stats.getCf() == CalculateFor.WORD) { sentence.addAll(s.getWords() .stream() .map(Word::getCVVWord) .collect(Collectors.toList())); } for (String word : sentence) { if (word.length() > stats.getSubstringLength()) { for (int i = 0; i <= word.length() - stats.getSubstringLength(); i++) { String substring = word.substring(i, i + stats.getSubstringLength()); Common.updateMap(stats.result, substring); } } } } } private static void calculateForJosType(List corpus, Statistics stats) { for (Sentence s : corpus) { List sentence = new ArrayList<>(s.getWords().size()); List filteredWords = new ArrayList<>(); for (Word word : s.getWords()) { if (word.getMsd() != null && word.getMsd().charAt(0) == stats.getDistributionJosWordType()) { filteredWords.add(word); } } if (stats.getCf() == CalculateFor.LEMMA) { sentence.addAll(filteredWords .stream() .map(Word::getLemma) .collect(Collectors.toList())); } else if (stats.getCf() == CalculateFor.WORD) { sentence.addAll(filteredWords .stream() .map(Word::getWord) .collect(Collectors.toList())); } for (String word : sentence) { Common.updateMap(stats.result, word); } } } private static void calculateForTaxonomyAndJosType(List corpus, Statistics stats) { for (Sentence s : corpus) { if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) { List sentence = new ArrayList<>(s.getWords().size()); List filteredWords = new ArrayList<>(); for (Word word : s.getWords()) { if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) { filteredWords.add(word); } } if (stats.getCf() == CalculateFor.LEMMA) { sentence.addAll(filteredWords .stream() .map(Word::getLemma) .collect(Collectors.toList())); } else if (stats.getCf() == CalculateFor.WORD) { sentence.addAll(filteredWords .stream() .map(Word::getWord) .collect(Collectors.toList())); } for (String word : sentence) { Common.updateMap(stats.result, word); } } } } private static void calculateForTaxonomy(List corpus, Statistics stats) { for (Sentence s : corpus) { if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) { List sentence = new ArrayList<>(s.getWords().size()); if (stats.getCf() == CalculateFor.LEMMA) { sentence.addAll(s.getWords() .stream() .map(Word::getLemma) .collect(Collectors.toList())); } else if (stats.getCf() == CalculateFor.WORD) { sentence.addAll(s.getWords() .stream() .map(Word::getWord) .collect(Collectors.toList())); } for (String word : sentence) { Common.updateMap(stats.result, word); } } } } static void calculateForAll(List corpus, Statistics stats) { boolean taxonomyIsSet = stats.isTaxonomySet(); boolean JosTypeIsSet = stats.isJOSTypeSet(); // branching because even though the only difference is an if or two && // O(if) = 1, the amount of ifs adds up and this saves some time if (taxonomyIsSet && JosTypeIsSet) { calculateForTaxonomyAndJosType(corpus, stats); } else if (taxonomyIsSet && !JosTypeIsSet) { calculateForTaxonomy(corpus, stats); } else if (!taxonomyIsSet && JosTypeIsSet) { calculateForJosType(corpus, stats); } else { if (stats.isVcc()) { calculateVCC(corpus, stats); } else { calculateNoFilter(corpus, stats); } } } }