list/src/main/java/data/StatisticsNew.java

package data;

import static gui.ValidationUtil.*;

import java.io.UnsupportedEncodingException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;

import gui.I18N;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import alg.inflectedJOS.WordFormation;
import data.Enums.WordLevelType;
import javafx.collections.ObservableList;
import util.Export;
import util.Util;
import util.db.RDB;

@SuppressWarnings("Duplicates")
public class StatisticsNew {
	public final static Logger logger = LogManager.getLogger(StatisticsNew.class);

	private Corpus corpus;
	private Filter filter;

	private String resultTitle;
	private Map<String, AtomicLong> result;
	private Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> taxonomyResult;
	private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
	private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedSuffix;
	private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedPrefix;
	private boolean useDB;
	private RDB db;
	private boolean analysisProducedResults;
	private LocalDateTime timeBeginning;
	private LocalDateTime timeEnding;
	private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
	private Map<Taxonomy, AtomicLong> uniGramTaxonomyOccurrences;

	public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
		this.corpus = corpus;
		this.filter = filter;
		this.taxonomyResult = new ConcurrentHashMap<>();
		this.taxonomyResult.put(corpus.getTotal(), new ConcurrentHashMap<>());
		this.collocability = new ConcurrentHashMap<>();
		this.uniGramTaxonomyOccurrences = new ConcurrentHashMap<>();
		this.uniGramTaxonomyOccurrences.put(corpus.getTotal(), new AtomicLong(0L));


        // create table for counting word occurrences per taxonomies
		if (this.corpus.getObservableListTaxonomy() != null && filter.getDisplayTaxonomy()) {
			if (this.filter.getTaxonomy().isEmpty()) {
				for (int i = 0; i < this.corpus.getObservableListTaxonomy().size(); i++) {
					this.taxonomyResult.put(Taxonomy.factoryLongName(this.corpus.getObservableListTaxonomy().get(i), corpus), new ConcurrentHashMap<>());
				}
			} else {
				for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
//					Tax taxonomy = new Tax();
					this.taxonomyResult.put(this.filter.getTaxonomy().get(i), new ConcurrentHashMap<>());
				}
			}
		}

		if (useDB) {
			this.useDB = true;
			db = new RDB();
		}

		if (filter.getAl() == AnalysisLevel.WORD_LEVEL) {
			resultNestedSuffix = new ConcurrentHashMap<>();
			resultNestedPrefix = new ConcurrentHashMap<>();
		} else {
			result = new ConcurrentHashMap<>();
		}

        this.timeBeginning = LocalDateTime.now();

//		resultTitle = generateResultTitle();

		logger.debug(toString());
	}

	/**
	 * Result's title consists of:
	 * <ul>
	 * <li>Corpus type</li>
	 * <li>Analysis level</li>
	 * <li>Calculate for</li>
	 * <li></li>
	 * <li></li>
	 * <li></li>
	 * <li></li>
	 * </ul>
	 *
	 * @return
	 */
	public String generateResultTitle() {
		String separator = "_";
		StringBuilder sb = new StringBuilder();

		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
			Integer ngramLevel = filter.getNgramValue();
			String name = corpus.getCorpusName();
			if(ngramLevel == 0) {
				if(!name.equals("")) {
					sb.append(name)
							.append(separator);
				}
				sb.append(I18N.get("exportFileName.letters"))
						.append(separator)
						.append(filter.getCalculateFor())
						.append(separator);
			} else if(ngramLevel == 1) {
				if (filter.getSuffixLength() != null && filter.getSuffixList() != null && filter.getPrefixLength() != null && filter.getPrefixList() != null) {
					if(!name.equals("")) {
						sb.append(name)
								.append(separator);
					}
					sb.append(I18N.get("exportFileName.wordParts"))
							.append(separator)
							.append(filter.getCalculateFor())
							.append(separator);
				} else {
					if(!name.equals("")) {
						sb.append(name)
								.append(separator);
					}
					sb.append(I18N.get("exportFileName.words"))
							.append(separator)
							.append(filter.getCalculateFor())
							.append(separator);
				}
			}
			else {
				if(!name.equals("")) {
					sb.append(name)
							.append(separator);
				}
				sb.append(I18N.get("exportFileName.wordSets"))
						.append(separator);
				sb.append(filter.getCalculateFor().toString())
						.append(separator);
				// ngram value
				sb.append(filter.getNgramValue()).append(I18N.get("exportFileName.gram"))
						.append(separator);
				sb.append(filter.getSkipValue()).append(I18N.get("exportFileName.skip"))
						.append(separator);
			}
			// TODO: assure skip is not null but zero

		} else {
			sb.append(filter.getAl().toString()) // analysis level
					.append(separator)
					.append(corpus.getCorpusType().toString())
					.append(separator);
		}
		// skip value
		// msd ?
		// if taxonomy -> taxonomy
		// if cvv -> cvv + dolžina


		sb.append(getTimeEnding());
		return sb.toString();

	}

	public void setTimeEnding(){
        this.timeEnding = LocalDateTime.now();
    }

    public String getTimeEnding(){
        return timeEnding.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss"));
    }

	public boolean isAnalysisProducedResults() {
		return analysisProducedResults;
	}

	public void setAnalysisProducedResults(boolean analysisProducedResults) {
		this.analysisProducedResults = analysisProducedResults;
	}

	public String toString() {
		String newLine = "\n\t- ";
		StringBuilder sb = new StringBuilder();
		sb.append(newLine).append("Statistic properties:");
		sb.append(newLine).append(corpus.getCorpusType().toString()).append(String.format(" (%d files)", corpus.getDetectedCorpusFiles().size()));
		sb.append(newLine).append(useDB ? "use DB" : "run in memory");
		sb.append(newLine).append(filter.toString());

		return sb.toString();
	}

	public String getResultTitle() {
		return resultTitle;
	}

	// ****************************************
	// ***************** util *****************
	// ****************************************

	/**
	 * Stores results from this batch to a database and clears results map
	 */
//	public void storeTmpResultsToDB() {
//		try {
//			db.writeBatch(result);
//			result = new ConcurrentHashMap<>();
//		} catch (UnsupportedEncodingException e) {
//			logger.error("Store tmp results to DB", e);
//			// e.printStackTrace();
//		}
//	}

	public Filter getFilter() {
		return filter;
	}

	public Corpus getCorpus() {
		return corpus;
	}

	public boolean saveResultToDisk(int... limit) throws UnsupportedEncodingException {
		Set<Pair<String, Map<MultipleHMKeys, Long>>> stats = new HashSet<>();

		if (useDB) {
			result = db.getDump();
			db.delete();
		}

		removeMinimalOccurrences(filter.getMinimalOccurrences());
		removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());

		// if no results and nothing to save, return false
		if (!(taxonomyResult.get(corpus.getTotal()).size() > 0)) {
			analysisProducedResults = false;
			return false;
		} else {
			analysisProducedResults = true;
		}

		stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get(corpus.getTotal()), Util.getValidInt(limit))));
		Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
		return true;
	}

	/**
	 * Removes lines, where number of different taxonomies is lower than specified number (minimalTaxonomy)
	 */
	private void removeMinimalTaxonomy(Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> taxonomyResult, Integer minimalTaxonomy) {
		if (minimalTaxonomy == 1)
			return;
		int occurances;
		for (MultipleHMKeys key : taxonomyResult.get(corpus.getTotal()).keySet()){
			occurances = 0;
			for (Taxonomy columnNameKey : taxonomyResult.keySet()){
				if(!columnNameKey.equals(corpus.getTotal()) && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1)
					occurances++;
			}
			if(occurances < minimalTaxonomy){
				taxonomyResult.get(corpus.getTotal()).remove(key);
			}
		}
	}

	/**
	 * Removes lines where total number of occurrences is lower than specified number (minimalOccurrences)
	 */
	private void removeMinimalOccurrences(Integer minimalOccurrences) {
		if (minimalOccurrences == 0)
			return;
		for (MultipleHMKeys key : taxonomyResult.get(corpus.getTotal()).keySet()){
			if(taxonomyResult.get(corpus.getTotal()).get(key).intValue() < minimalOccurrences){
				for (Taxonomy t : taxonomyResult.keySet()){
					taxonomyResult.get(t).remove(key);
				}
			}
		}
	}

	public boolean saveResultNestedToDisk(int... limit) throws UnsupportedEncodingException {
		resultTitle = generateResultTitle();

		if (useDB) {
			result = db.getDump();
			db.delete();
		}
		Map<WordLevelType, Map<String, Map<String, Long>>> results = new HashMap<>();

		// UNCOMMENT!!!!!!
//		if (!isEmpty(resultNestedSuffix)) {
//			results.put(WordLevelType.SUFFIX, sortNestedMap(resultNestedSuffix, Util.getValidInt(limit)));
//		}
//
//		if (!isEmpty(resultNestedPrefix)) {
//			results.put(WordLevelType.PREFIX, sortNestedMap(resultNestedPrefix, Util.getValidInt(limit)));
//		}

		// if no results and nothing to save, return false
		if (!(results.size() > 0)) {
			analysisProducedResults = false;
			return false;
		} else {
			analysisProducedResults = true;
		}

		Export.nestedMapToCSV(resultTitle, results, corpus.getChosenResultsLocation(), headerInfoBlock());
		return true;
	}

	public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {
		filter.setAl(AnalysisLevel.WORD_FORMATION);
		resultTitle = generateResultTitle();

		if (useDB) {
			result = db.getDump();
			db.delete();
		}

		// if no results and nothing to save, return false
		if (!(result.size() > 0)) {
			analysisProducedResults = false;
			return false;
		} else {
			analysisProducedResults = true;
		}

		WordFormation.calculateStatistics(this);

		Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());
		return true;
	}

	private Map<String, Map<MultipleHMKeys, Long>> sortNestedMap(Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> nestedMap, int limit) {
		Map<String, Map<MultipleHMKeys, Long>> sorted = new HashMap<>();

		for (String s : nestedMap.keySet()) {
			sorted.put(s, getSortedResult(nestedMap.get(s), Util.getValidInt(limit)));
		}

		return sorted;
	}


	private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {
		return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
	}

	public void updateUniGramOccurrences(int amount, ArrayList<Taxonomy> taxonomy){
        uniGramTaxonomyOccurrences.get(corpus.getTotal()).set(uniGramTaxonomyOccurrences.get(corpus.getTotal()).longValue() + amount);
        for (Taxonomy t : taxonomy){
            if (uniGramTaxonomyOccurrences.get(t) != null){
                uniGramTaxonomyOccurrences.get(t).set(uniGramTaxonomyOccurrences.get(t).longValue() + amount);
            } else {
                uniGramTaxonomyOccurrences.put(t, new AtomicLong(amount));
            }
        }
    }

    public Map<Taxonomy, AtomicLong> getUniGramOccurrences(){
//	    return uniGramTaxonomyOccurrences.get(corpus.getTotal()).longValue();
	    return uniGramTaxonomyOccurrences;
    }

	public void updateTaxonomyResults(MultipleHMKeys o, List<Taxonomy> taxonomy) {
		for (Taxonomy key : taxonomyResult.keySet()) {
			// first word should have the same taxonomy as others
			if (key.equals(corpus.getTotal()) || taxonomy.contains(key)) {
//			if (key.equals(corpus.getTotal()) || taxonomy != null && taxonomy.contains(key)) {
				// if taxonomy not in map and in this word
				AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));

				if (r != null)
					taxonomyResult.get(key).get(o).incrementAndGet();
			} else {
				// if taxonomy not in map and not in this word
				AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(0));
			}
		}

		// if not in map


		// else

	}

	public Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> getTaxonomyResult() {
		return taxonomyResult;
	}

	public void updateResults(String o) {
		// if not in map
		AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));

		// else
		if (r != null)
			result.get(o).incrementAndGet();
	}

	public Map<String, AtomicLong> getResult() {
		return result;
	}

	public Object[][] getResultCustom() {
		return resultCustom;
	}

	public void setResultCustom(Object[][] resultCustom) {
		this.resultCustom = resultCustom;
	}

	public void updateResultsNested(WordLevelType type, String key, String stringValue) {
		ConcurrentHashMap<String, ConcurrentHashMap<String, AtomicLong>> resultsMap;

		if (type == WordLevelType.SUFFIX) {
			updateResultsNestedSuffix(key, stringValue);
		} else if (type == WordLevelType.PREFIX) {
			updateResultsNestedPrefix(key, stringValue);
		}
	}

	public void updateResultsNestedSuffix(String key, String stringValue) {
		MultipleHMKeys mkStringValue = new MultipleHMKeys1(stringValue);

		if (resultNestedSuffix.containsKey(key)) {
			// if not in map
			AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));

			// else
			if (r != null) {
				resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
			}
		} else {
			resultNestedSuffix.putIfAbsent(key, new ConcurrentHashMap<>());
			AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));

			if (r != null) {
				resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
			}
		}
	}

	public void updateResultsNestedPrefix(String key, String stringValue) {
		MultipleHMKeys mkStringValue = new MultipleHMKeys1(stringValue);

		if (resultNestedPrefix.containsKey(key)) {
			// if not in map
			AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));

			// else
			if (r != null) {
				resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
			}
		} else {
			resultNestedPrefix.putIfAbsent(key, new ConcurrentHashMap<>());
			AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));

			if (r != null) {
				resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
			}
		}
	}

	private LinkedHashMap<String, String> headerInfoBlock() {
		LinkedHashMap<String, String> info = new LinkedHashMap<>();

		info.put(I18N.get("exportHeader.corpus"), corpus.getCorpusType().toString());
		setTimeEnding();
		info.put(I18N.get("exportHeader.date"), timeEnding.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));

		// time elapsed
		long seconds = ChronoUnit.MILLIS.between(timeBeginning, timeEnding) / 1000;
		info.put(I18N.get("exportHeader.executionTime"), String.valueOf(seconds) + " s");

		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
			Integer ngramLevel = filter.getNgramValue();
			if (ngramLevel == 0)
				info.put(I18N.get("exportHeader.analysis"), I18N.get("exportHeader.analysis.letters"));
			else if (ngramLevel == 1) {
				// if suffixes or prefixes are not null print word parts
				if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
					info.put(I18N.get("exportHeader.analysis"), I18N.get("exportHeader.analysis.wordParts"));
				} else {
					info.put(I18N.get("exportHeader.analysis"), I18N.get("exportHeader.analysis.words"));
				}
			} else
				info.put(I18N.get("exportHeader.analysis"), I18N.get("exportHeader.analysis.wordSets"));
		} else {
			info.put(I18N.get("exportHeader.analysis"), filter.getAl().toString());
		}

//		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
		Integer ngramLevel = filter.getNgramValue();

		if (ngramLevel == 0){
			info.put(I18N.get("exportHeader.numberLetters"), filter.getStringLength().toString());
		}

		// calculate for
		info.put(I18N.get("exportHeader.calculateFor"), filter.getCalculateFor().toString());

		// also write
		if (ngramLevel > 0) {
			if (filter.getMultipleKeys().size() > 0) {

				StringBuilder mk = new StringBuilder();
				for (CalculateFor s : filter.getMultipleKeys()) {
					mk.append(s.toString()).append("; ");
				}
				info.put(I18N.get("exportHeader.alsoFilter"), String.join("; ", mk.substring(0, mk.length() - 2)));
			} else {
				info.put(I18N.get("exportHeader.alsoFilter"), "");
			}
		}

		// data limitations
		if (filter.getDisplayTaxonomy()){
			info.put(I18N.get("exportHeader.displayTaxonomies"), I18N.get("exportHeader.yes"));
		} else {
			info.put(I18N.get("exportHeader.displayTaxonomies"), I18N.get("exportHeader.no"));
		}

		// n.gram nivo
		if (ngramLevel > 1) {
			info.put(I18N.get("exportHeader.ngramLevel"), String.valueOf(ngramLevel));
		}

		// skip
		if (ngramLevel > 1)
			info.put(I18N.get("exportHeader.skipValue"), isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");

		// note punctuations - ngram > 1
		if(ngramLevel > 1) {
			if (filter.getNotePunctuations()) {
				info.put(I18N.get("exportHeader.notePunctuations"), I18N.get("exportHeader.yes"));
			} else {
				info.put(I18N.get("exportHeader.notePunctuations"), I18N.get("exportHeader.no"));
			}
		}

		// also write - n - gram > 1
		if(ngramLevel > 1) {
			if (filter.getCollocability().size() > 0) {
				StringBuilder mk = new StringBuilder();
				for (Collocability s : filter.getCollocability()) {
					mk.append(s.toString()).append("; ");
				}
				info.put(I18N.get("exportHeader.collocability"), String.join("; ", mk.substring(0, mk.length() - 2)));
			} else {
				info.put(I18N.get("exportHeader.collocability"), "");
			}
		}

		// fragmented MSD - n-gram = 1
		if (info.get(I18N.get("exportHeader.analysis")).equals(I18N.get("exportHeader.analysis.words"))){
			if (filter.getWriteMsdAtTheEnd()){
				info.put(I18N.get("exportHeader.writeMSDAtTheEnd"), I18N.get("exportHeader.yes"));
			} else {
				info.put(I18N.get("exportHeader.writeMSDAtTheEnd"), I18N.get("exportHeader.no"));
			}
		}

		if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
			if (filter.getPrefixLength() > 0 || filter.getSuffixLength() > 0) {
				info.put(I18N.get("exportHeader.prefixLength"), String.valueOf(filter.getPrefixLength()));
				info.put(I18N.get("exportHeader.suffixLength"), String.valueOf(filter.getSuffixLength()));
			} else {
				info.put(I18N.get("exportHeader.prefixList"), String.join("; ", filter.getPrefixList()));
				info.put(I18N.get("exportHeader.suffixList"), String.join("; ", filter.getSuffixList()));
			}
		}

		// msd
		if (!isEmpty(filter.getMsd())) {
			StringBuilder msdPattern = new StringBuilder();
			for (Pattern pattern : filter.getMsd()) {
				msdPattern.append(pattern.toString()).append(" ");
			}

			info.put(I18N.get("exportHeader.msd"), msdPattern.toString());
		} else {
			info.put(I18N.get("exportHeader.msd"), "");
		}


//		}

		info.put(I18N.get("exportHeader.taxonomy"), "");
		if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) || filter.getDisplayTaxonomy()) {
			ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());

			if (filter.getDisplayTaxonomy() && tax.size() == 0) {


//				ArrayList<String> intList = (new ArrayList<>(taxonomyResult.keySet()).stream()
//						.forEach(x -> {x.toString();}));
//				ArrayList<String> taxonomyString = new ArrayList<>();
//				for (Taxonomy t : taxonomyResult.keySet()){
//					taxonomyString.add(t.toString());
//				}
//				ObservableList<String> taxonomyObservableString = Tax.getTaxonomyForComboBox(corpus.getCorpusType(), new HashSet<>(taxonomyString));
//				ArrayList<String> sortedTaxonomyString = new ArrayList<>();
//				for (String t : taxonomyObservableString){
//					sortedTaxonomyString.add(t);
//				}
//				getTaxonomyForTaxonomyResult
				tax = Tax.getTaxonomyForTaxonomyResult(corpus, taxonomyResult.keySet());
			}

//			String sep = "";
			for (String s : tax) {

				if (corpus.getTaxonomy().size() == 0 || s == null) {
					continue;
				}

//				info.put(sep = sep + " ", s);
				if (uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s, corpus)) == null) {
					info.put(s, "");
					continue;
				}
				int n = uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s, corpus)).intValue();
				if (n == 0) {
					info.put(s, "");
				} else {
					info.put(s, String.valueOf(n));
				}

			}

		}

		info.put(I18N.get("exportHeader.minOccurrences"), String.valueOf(filter.getMinimalOccurrences()));
		info.put(I18N.get("exportHeader.minTaxonomies"), String.valueOf(filter.getMinimalTaxonomy()));

		if (corpus.getCorpusType() == CorpusType.SOLAR) {
			HashMap<String, ObservableList<String>> filters = corpus.getSolarSelectedFilters();

			if (!isEmpty(filters)) {
				info.put(I18N.get("exportHeader.additionalFilters"), "");

				for (Map.Entry<String, ObservableList<String>> f : filters.entrySet()) {
					info.put(I18N.get(f.getKey() + "L"), StringUtils.join(f.getValue(), ", "));
				}
			}
		}

		return info;
	}

    public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
        Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();

       Map<Collocability, Map<MultipleHMKeys, Double>> collocabilityMap = new ConcurrentHashMap<>();

       for(Collocability c : filter.getCollocability()){
           collocabilityMap.put(c, new ConcurrentHashMap<>());
       }

        // count number of all words
        long N = 0;
        for(AtomicLong a : oneWordTaxonomyResult.get(corpus.getTotal()).values()){
            N += a.longValue();
        }

        for(MultipleHMKeys hmKey : taxonomyResult.get(corpus.getTotal()).keySet()) {
//            String[] splitedString = hmKey.getK1().split("\\s+");

            long sum_fwi =0L;
            long mul_fwi =1L;

//			if(hmKey.getK1().equals("ja ja ja ja ja") || hmKey.getK1().equals("ne ne ne ne ne")){
//				System.out.println("TEST");
//
//			}
			for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){
//            	System.out.println(smallHmKey.getK1());
				sum_fwi += oneWordTaxonomyResult.get(corpus.getTotal()).get(smallHmKey).longValue();
				mul_fwi *= oneWordTaxonomyResult.get(corpus.getTotal()).get(smallHmKey).longValue();
			}
//            String t = hmKey.getK1();
			if(mul_fwi < 0){
				mul_fwi = Long.MAX_VALUE;
			}

            double O = (double)taxonomyResult.get(corpus.getTotal()).get(hmKey).longValue();
            double n = (double)filter.getNgramValue();
            double E = (double)mul_fwi / Math.pow(N, n - 1);
            if (collocabilityMap.keySet().contains(Collocability.DICE)){
                double dice_value = n * O / sum_fwi;
                collocabilityMap.get(Collocability.DICE).put(hmKey, dice_value);
            }
            if (collocabilityMap.keySet().contains(Collocability.TSCORE)){
                double t_score = (O - E) / Math.sqrt(O);
                collocabilityMap.get(Collocability.TSCORE).put(hmKey, t_score);
            }
            if (collocabilityMap.keySet().contains(Collocability.MI)){
                double MI = Math.log(O / E) / Math.log(2);
                collocabilityMap.get(Collocability.MI).put(hmKey, MI);
            }
            if (collocabilityMap.keySet().contains(Collocability.MI3)){
                double MI3 = Math.log(Math.pow(O, 3.0) / E) / Math.log(2);
                collocabilityMap.get(Collocability.MI3).put(hmKey, MI3);
            }
            if (collocabilityMap.keySet().contains(Collocability.LOGDICE)){
                double dice_value = n * O / sum_fwi;
                double log_dice = 14 + Math.log(dice_value) / Math.log(2);
                collocabilityMap.get(Collocability.LOGDICE).put(hmKey, log_dice);
            }
            if (collocabilityMap.keySet().contains(Collocability.SIMPLELL)){
                double simple_ll = 2 * (O * Math.log10(O / E) - (O - E));
                collocabilityMap.get(Collocability.SIMPLELL).put(hmKey, simple_ll);
            }
        }

        for(Collocability c : collocabilityMap.keySet()){
            collocability.put(c, collocabilityMap.get(c));
        }

    }

    public Map<Collocability, Map<MultipleHMKeys, Double>> getCollocability(){
	    return this.collocability;
    }
}