list/src/main/java/alg/word/WordCount.java

package alg.word;

import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

import alg.Common;
import data.CalculateFor;
import data.Sentence;
import data.Statistics;
import data.Word;

//class WordCount {
//	private static void calculateNoFilter(List<Sentence> corpus, Statistics stats) {
//		for (Sentence s : corpus) {
//			List<String> sentence = new ArrayList<>(s.getWords().size());
//
//			if (stats.getCf() == CalculateFor.LEMMA) {
//				sentence.addAll(s.getWords()
//						.stream()
//						.map(Word::getLemma)
//						.collect(Collectors.toList()));
//			} else if (stats.getCf() == CalculateFor.WORD) {
//				sentence.addAll(s.getWords()
//						.stream()
//						.map(Word::getWord)
//						.collect(Collectors.toList()));
//			}
//
//			for (String word : sentence) {
//				Common.updateMap(stats.result, word);
//			}
//		}
//	}
//
//	private static void calculateVCC(List<Sentence> corpus, Statistics stats) {
//		for (Sentence s : corpus) {
//			List<String> sentence = new ArrayList<>(s.getWords().size());
//
//			if (stats.getCf() == CalculateFor.LEMMA) {
//				sentence.addAll(s.getWords()
//						.stream()
//						.map(Word::getCVVLemma)
//						.collect(Collectors.toList()));
//			} else if (stats.getCf() == CalculateFor.WORD) {
//				sentence.addAll(s.getWords()
//						.stream()
//						.map(Word::getCVVWord)
//						.collect(Collectors.toList()));
//			}
//
//			for (String word : sentence) {
//				if (word.length() > stats.getSubstringLength()) {
//					for (int i = 0; i <= word.length() - stats.getSubstringLength(); i++) {
//						String substring = word.substring(i, i + stats.getSubstringLength());
//						Common.updateMap(stats.result, substring);
//					}
//				}
//			}
//		}
//	}
//
//	private static void calculateForJosType(List<Sentence> corpus, Statistics stats) {
//		for (Sentence s : corpus) {
//			List<String> sentence = new ArrayList<>(s.getWords().size());
//			List<Word> filteredWords = new ArrayList<>();
//
//			for (Word word : s.getWords()) {
//				if (word.getMsd() != null && word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
//					filteredWords.add(word);
//				}
//			}
//
//			if (stats.getCf() == CalculateFor.LEMMA) {
//				sentence.addAll(filteredWords
//						.stream()
//						.map(Word::getLemma)
//						.collect(Collectors.toList()));
//			} else if (stats.getCf() == CalculateFor.WORD) {
//				sentence.addAll(filteredWords
//						.stream()
//						.map(Word::getWord)
//						.collect(Collectors.toList()));
//			}
//
//			for (String word : sentence) {
//				Common.updateMap(stats.result, word);
//			}
//		}
//	}

//	private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
//		for (Sentence s : corpus) {
//			if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
//				List<String> sentence = new ArrayList<>(s.getWords().size());
//				List<Word> filteredWords = new ArrayList<>();
//
//				for (Word word : s.getWords()) {
//					if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
//						filteredWords.add(word);
//					}
//				}
//
//				if (stats.getCf() == CalculateFor.LEMMA) {
//					sentence.addAll(filteredWords
//							.stream()
//							.map(Word::getLemma)
//							.collect(Collectors.toList()));
//				} else if (stats.getCf() == CalculateFor.WORD) {
//					sentence.addAll(filteredWords
//							.stream()
//							.map(Word::getWord)
//							.collect(Collectors.toList()));
//				}
//
//				for (String word : sentence) {
//					Common.updateMap(stats.result, word);
//				}
//			}
//		}
//	}

//	private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
//		for (Sentence s : corpus) {
//			if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
//				List<String> sentence = new ArrayList<>(s.getWords().size());
//
//				if (stats.getCf() == CalculateFor.LEMMA) {
//					sentence.addAll(s.getWords()
//							.stream()
//							.map(Word::getLemma)
//							.collect(Collectors.toList()));
//				} else if (stats.getCf() == CalculateFor.WORD) {
//					sentence.addAll(s.getWords()
//							.stream()
//							.map(Word::getWord)
//							.collect(Collectors.toList()));
//				}
//
//				for (String word : sentence) {
//					Common.updateMap(stats.result, word);
//				}
//			}
//		}
//	}

//	static void calculateForAll(List<Sentence> corpus, Statistics stats) {
//		boolean taxonomyIsSet = stats.isTaxonomySet();
//		boolean JosTypeIsSet = stats.isJOSTypeSet();
//
//		// branching because even though the only difference is an if or two &&
//		// O(if) = 1, the amount of ifs adds up and this saves some time
//		if (taxonomyIsSet && JosTypeIsSet) {
//			calculateForTaxonomyAndJosType(corpus, stats);
//		} else if (taxonomyIsSet && !JosTypeIsSet) {
//			calculateForTaxonomy(corpus, stats);
//		} else if (!taxonomyIsSet && JosTypeIsSet) {
//			calculateForJosType(corpus, stats);
//		} else {
//			if (stats.isVcc()) {
//				calculateVCC(corpus, stats);
//			} else {
//				calculateNoFilter(corpus, stats);
//			}
//		}
//	}
//}