list/src/main/java/data/StatisticsNew.java

package data;

import static gui.ValidationUtil.*;

import java.io.UnsupportedEncodingException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import alg.inflectedJOS.WordFormation;
import data.Enums.WordLevelType;
import javafx.collections.ObservableList;
import util.Export;
import util.Util;
import util.db.RDB;

@SuppressWarnings("Duplicates")
public class StatisticsNew {
	public final static Logger logger = LogManager.getLogger(StatisticsNew.class);

	private Corpus corpus;
	private Filter filter;

	private String resultTitle;
	private Map<String, AtomicLong> result;
	private Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult;
	private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
	private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedSuffix;
	private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedPrefix;
	private boolean useDB;
	private RDB db;
	private boolean analysisProducedResults;
	private LocalDateTime time;
	private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;

	public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
		this.corpus = corpus;
		this.filter = filter;
		this.taxonomyResult = new ConcurrentHashMap<>();
		this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
		this.collocability = new ConcurrentHashMap<>();

		// create table for counting word occurrences per taxonomies
		if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
			if (this.filter.getTaxonomy().isEmpty()) {
				for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
					this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>());
				}
			} else {
				for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
					Tax taxonomy = new Tax();
					this.taxonomyResult.put(taxonomy.getLongTaxonomyName(this.filter.getTaxonomy().get(i)), new ConcurrentHashMap<>());
				}
			}
		}

		if (useDB) {
			this.useDB = true;
			db = new RDB();
		}

		if (filter.getAl() == AnalysisLevel.WORD_LEVEL) {
			resultNestedSuffix = new ConcurrentHashMap<>();
			resultNestedPrefix = new ConcurrentHashMap<>();
		} else {
			result = new ConcurrentHashMap<>();
		}

		resultTitle = generateResultTitle();

		logger.debug(toString());
	}

	/**
	 * Result's title consists of:
	 * <ul>
	 * <li>Corpus type</li>
	 * <li>Analysis level</li>
	 * <li>Calculate for</li>
	 * <li></li>
	 * <li></li>
	 * <li></li>
	 * <li></li>
	 * </ul>
	 *
	 * @return
	 */
	private String generateResultTitle() {
		String separator = "_";
		StringBuilder sb = new StringBuilder();

		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
			Integer ngramLevel = filter.getNgramValue();
			if(ngramLevel == 0) {
				sb.append(corpus.getCorpusType().toString())
						.append(separator)
						.append("crke")
						.append(separator)
						.append(filter.getCalculateFor())
						.append(separator);
			} else if(ngramLevel == 1) {
				sb.append(corpus.getCorpusType().toString())
						.append(separator)
						.append("besede")
						.append(separator)
						.append(filter.getCalculateFor())
						.append(separator);
			}
			else {
				sb.append(filter.getAl().toString())
						.append(separator)
						.append(corpus.getCorpusType().toString())
						.append(separator);
				sb.append(filter.getCalculateFor().toString())
						.append(separator);
				// ngram value
				sb.append(filter.getNgramValue()).append("-gram")
						.append(separator);
				sb.append(filter.getSkipValue()).append("-preskok")
						.append(separator);
			}
			// TODO: assure skip is not null but zero

		} else {
			sb.append(filter.getAl().toString()) // analysis level
					.append(separator)
					.append(corpus.getCorpusType().toString())
					.append(separator);
		}
		// skip value
		// msd ?
		// if taxonomy -> taxonomy
		// if cvv -> cvv + dolžina

		this.time = this.time != null ? this.time : LocalDateTime.now();

		sb.append(time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss")));
		return sb.toString();

	}

	public boolean isAnalysisProducedResults() {
		return analysisProducedResults;
	}

	public void setAnalysisProducedResults(boolean analysisProducedResults) {
		this.analysisProducedResults = analysisProducedResults;
	}

	public String toString() {
		String newLine = "\n\t- ";
		StringBuilder sb = new StringBuilder();
		sb.append(newLine).append("Statistic properties:");
		sb.append(newLine).append(corpus.getCorpusType().toString()).append(String.format(" (%d files)", corpus.getDetectedCorpusFiles().size()));
		sb.append(newLine).append(useDB ? "use DB" : "run in memory");
		sb.append(newLine).append(filter.toString());

		return sb.toString();
	}

	public String getResultTitle() {
		return resultTitle;
	}

	// ****************************************
	// ***************** util *****************
	// ****************************************

	/**
	 * Stores results from this batch to a database and clears results map
	 */
	public void storeTmpResultsToDB() {
		try {
			db.writeBatch(result);
			result = new ConcurrentHashMap<>();
		} catch (UnsupportedEncodingException e) {
			logger.error("Store tmp results to DB", e);
			// e.printStackTrace();
		}
	}

	public Filter getFilter() {
		return filter;
	}

	public Corpus getCorpus() {
		return corpus;
	}

	public boolean saveResultToDisk(int... limit) throws UnsupportedEncodingException {
		Set<Pair<String, Map<MultipleHMKeys, Long>>> stats = new HashSet<>();

		if (useDB) {
			result = db.getDump();
			db.delete();
		}

		// if no results and nothing to save, return false
		if (!(taxonomyResult.get("Total").size() > 0)) {
			analysisProducedResults = false;
			return false;
		} else {
			analysisProducedResults = true;
		}

		removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
		removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
		stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
		Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
		return true;
	}

	/**
	 * Removes lines, where number of different taxonomies is lower than specified number (minimalTaxonomy)
	 */
	private void removeMinimalTaxonomy(Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult, Integer minimalTaxonomy) {
		if (minimalTaxonomy == 1)
			return;
		int occurances;
		for (MultipleHMKeys key : taxonomyResult.get("Total").keySet()){
			occurances = 0;
			for (String columnNameKey : taxonomyResult.keySet()){
				if(!columnNameKey.equals("Total") && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1)
					occurances++;
			}
			if(occurances < minimalTaxonomy){
				taxonomyResult.get("Total").remove(key);
			}
		}
	}

	/**
	 * Removes lines where total number of occurrences is lower than specified number (minimalOccurrences)
	 */
	private void removeMinimalOccurrences(Map<MultipleHMKeys, AtomicLong> taxonomyResultTotal, Integer minimalOccurrences) {
		if (minimalOccurrences == 0)
			return;
		for (MultipleHMKeys key : taxonomyResultTotal.keySet()){
			if(taxonomyResultTotal.get(key).intValue() < minimalOccurrences){
				taxonomyResultTotal.remove(key);
			}
		}
	}

	public boolean saveResultNestedToDisk(int... limit) throws UnsupportedEncodingException {
		resultTitle = generateResultTitle();

		if (useDB) {
			result = db.getDump();
			db.delete();
		}
		Map<WordLevelType, Map<String, Map<String, Long>>> results = new HashMap<>();

		// UNCOMMENT!!!!!!
//		if (!isEmpty(resultNestedSuffix)) {
//			results.put(WordLevelType.SUFFIX, sortNestedMap(resultNestedSuffix, Util.getValidInt(limit)));
//		}
//
//		if (!isEmpty(resultNestedPrefix)) {
//			results.put(WordLevelType.PREFIX, sortNestedMap(resultNestedPrefix, Util.getValidInt(limit)));
//		}

		// if no results and nothing to save, return false
		if (!(results.size() > 0)) {
			analysisProducedResults = false;
			return false;
		} else {
			analysisProducedResults = true;
		}

		Export.nestedMapToCSV(resultTitle, results, corpus.getChosenResultsLocation(), headerInfoBlock());
		return true;
	}

	public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {
		filter.setAl(AnalysisLevel.WORD_FORMATION);
		resultTitle = generateResultTitle();

		if (useDB) {
			result = db.getDump();
			db.delete();
		}

		// if no results and nothing to save, return false
		if (!(result.size() > 0)) {
			analysisProducedResults = false;
			return false;
		} else {
			analysisProducedResults = true;
		}

		WordFormation.calculateStatistics(this);

		Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());
		return true;
	}

	private Map<String, Map<MultipleHMKeys, Long>> sortNestedMap(Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> nestedMap, int limit) {
		Map<String, Map<MultipleHMKeys, Long>> sorted = new HashMap<>();

		for (String s : nestedMap.keySet()) {
			sorted.put(s, getSortedResult(nestedMap.get(s), Util.getValidInt(limit)));
		}

		return sorted;
	}


	private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {
		return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
	}

	public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {
		for (String key : taxonomyResult.keySet()) {
			// first word should have the same taxonomy as others
			if (key.equals("Total") || taxonomy.contains(key)) {
//			if (key.equals("Total") || taxonomy != null && taxonomy.contains(key)) {
				// if taxonomy not in map and in this word
				AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));

				if (r != null)
					taxonomyResult.get(key).get(o).incrementAndGet();
			} else {
				// if taxonomy not in map and not in this word
				AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(0));
			}
		}

		// if not in map


		// else

	}

	public Map<String, Map<MultipleHMKeys, AtomicLong>> getTaxonomyResult() {
		return taxonomyResult;
	}

	public void updateResults(String o) {
		// if not in map
		AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));

		// else
		if (r != null)
			result.get(o).incrementAndGet();
	}

	public Map<String, AtomicLong> getResult() {
		return result;
	}

	public Object[][] getResultCustom() {
		return resultCustom;
	}

	public void setResultCustom(Object[][] resultCustom) {
		this.resultCustom = resultCustom;
	}

	public void updateResultsNested(WordLevelType type, String key, String stringValue) {
		ConcurrentHashMap<String, ConcurrentHashMap<String, AtomicLong>> resultsMap;

		if (type == WordLevelType.SUFFIX) {
			updateResultsNestedSuffix(key, stringValue);
		} else if (type == WordLevelType.PREFIX) {
			updateResultsNestedPrefix(key, stringValue);
		}
	}

	public void updateResultsNestedSuffix(String key, String stringValue) {
		MultipleHMKeys mkStringValue = new MultipleHMKeys1(stringValue);

		if (resultNestedSuffix.containsKey(key)) {
			// if not in map
			AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));

			// else
			if (r != null) {
				resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
			}
		} else {
			resultNestedSuffix.putIfAbsent(key, new ConcurrentHashMap<>());
			AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));

			if (r != null) {
				resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
			}
		}
	}

	public void updateResultsNestedPrefix(String key, String stringValue) {
		MultipleHMKeys mkStringValue = new MultipleHMKeys1(stringValue);

		if (resultNestedPrefix.containsKey(key)) {
			// if not in map
			AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));

			// else
			if (r != null) {
				resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
			}
		} else {
			resultNestedPrefix.putIfAbsent(key, new ConcurrentHashMap<>());
			AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));

			if (r != null) {
				resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
			}
		}
	}

	private LinkedHashMap<String, String> headerInfoBlock() {
		LinkedHashMap<String, String> info = new LinkedHashMap<>();

		info.put("Korpus:", corpus.getCorpusType().toString());
		info.put("Datum:", time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
			Integer ngramLevel = filter.getNgramValue();
			if (ngramLevel == 0)
				info.put("Analiza", "Črke");
			else if (ngramLevel == 1)
				info.put("Analiza", "Besede");
			else
				info.put("Analiza", filter.getAl().toString());
		} else {
			info.put("Analiza", filter.getAl().toString());
		}

		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
			Integer ngramLevel = filter.getNgramValue();

			// n.gram nivo
			if (ngramLevel > 1) {
				info.put("n-gram nivo:", String.valueOf(ngramLevel));
			}

			// skip
			if (ngramLevel > 1)
				info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");

			// izračunaj za
			info.put("Izračunaj za:", filter.getCalculateFor().toString());

			// msd
			if (!isEmpty(filter.getMsd())) {
				StringBuilder msdPattern = new StringBuilder();
				for (Pattern pattern : filter.getMsd()) {
					msdPattern.append(pattern.toString()).append(" ");
				}

				info.put("MSD:", msdPattern.toString());
			}


		}

		if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
			ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());

			info.put("Taksonomija: ", "");
			String sep = "";
			for (String s : tax) {
				info.put(sep = sep + " ", s);
			}
		}

		if (corpus.getCorpusType() == CorpusType.SOLAR) {
			HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();

			if (!isEmpty(filters)) {
				info.put("Dodatni filtri: ", "");

				for (Map.Entry<String, ObservableList<String>> f : filters.entrySet()) {
					info.put(f.getKey(), StringUtils.join(f.getValue(), ", "));
				}
			}
		}

		return info;
	}

    public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
        Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();

        Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();

        for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
//            String[] splitedString = hmKey.getK1().split("\\s+");

            long sum_fwi =0L;

            for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){
            	System.out.println(smallHmKey.getK1());
                sum_fwi += oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue();
            }
            double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
            collocabilityMap.put(hmKey, dice_value);
        }

        collocability.put(filter.getCollocability().get(0), collocabilityMap);
    }

    public Map<Collocability, Map<MultipleHMKeys, Double>> getCollocability(){
	    return this.collocability;
    }
}
Project copied 2018-06-19 07:15:37 +00:00			`package data;`

			`import static gui.ValidationUtil.*;`

			`import java.io.UnsupportedEncodingException;`
			`import java.time.LocalDateTime;`
			`import java.time.format.DateTimeFormatter;`
			`import java.util.*;`
			`import java.util.concurrent.ConcurrentHashMap;`
			`import java.util.concurrent.atomic.AtomicLong;`
			`import java.util.regex.Pattern;`

			`import org.apache.commons.lang3.StringUtils;`
			`import org.apache.commons.lang3.tuple.ImmutablePair;`
			`import org.apache.commons.lang3.tuple.Pair;`
			`import org.apache.logging.log4j.LogManager;`
			`import org.apache.logging.log4j.Logger;`

			`import alg.inflectedJOS.WordFormation;`
			`import data.Enums.WordLevelType;`
			`import javafx.collections.ObservableList;`
			`import util.Export;`
			`import util.Util;`
			`import util.db.RDB;`

			`@SuppressWarnings("Duplicates")`
			`public class StatisticsNew {`
			`public final static Logger logger = LogManager.getLogger(StatisticsNew.class);`

			`private Corpus corpus;`
			`private Filter filter;`

			`private String resultTitle;`
			`private Map<String, AtomicLong> result;`
Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00			`private Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult;`
Project copied 2018-06-19 07:15:37 +00:00			`private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%`
Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00			`private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedSuffix;`
			`private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedPrefix;`
Project copied 2018-06-19 07:15:37 +00:00			`private boolean useDB;`
			`private RDB db;`
			`private boolean analysisProducedResults;`
			`private LocalDateTime time;`
Added collocability functionality - implemented Dice method 2018-10-24 08:36:07 +00:00			`private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;`
Project copied 2018-06-19 07:15:37 +00:00
			`public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {`
			`this.corpus = corpus;`
			`this.filter = filter;`
Added taxonomy presentation in results 2018-06-29 10:53:29 +00:00			`this.taxonomyResult = new ConcurrentHashMap<>();`
Refactored results - moved to taxonomyResults 2018-07-05 07:37:35 +00:00			`this.taxonomyResult.put("Total", new ConcurrentHashMap<>());`
Added collocability functionality - implemented Dice method 2018-10-24 08:36:07 +00:00			`this.collocability = new ConcurrentHashMap<>();`
Added taxonomy presentation in results 2018-06-29 10:53:29 +00:00
Added collocability functionality - implemented Dice method 2018-10-24 08:36:07 +00:00			`// create table for counting word occurrences per taxonomies`
Some functionality from OneWord copied to StringAnalysis and fixed 2018-08-22 07:11:14 +00:00			`if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {`
Added functionality for n-grams (comma separation), minimal occurances etc. 2018-07-31 06:58:17 +00:00			`if (this.filter.getTaxonomy().isEmpty()) {`
			`for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {`
			`this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>());`
			`}`
			`} else {`
			`for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {`
			`Tax taxonomy = new Tax();`
			`this.taxonomyResult.put(taxonomy.getLongTaxonomyName(this.filter.getTaxonomy().get(i)), new ConcurrentHashMap<>());`
			`}`
Added taxonomy presentation in results 2018-06-29 10:53:29 +00:00			`}`
			`}`
Project copied 2018-06-19 07:15:37 +00:00
			`if (useDB) {`
			`this.useDB = true;`
			`db = new RDB();`
			`}`

			`if (filter.getAl() == AnalysisLevel.WORD_LEVEL) {`
			`resultNestedSuffix = new ConcurrentHashMap<>();`
			`resultNestedPrefix = new ConcurrentHashMap<>();`
			`} else {`
			`result = new ConcurrentHashMap<>();`
			`}`

			`resultTitle = generateResultTitle();`

			`logger.debug(toString());`
			`}`

			`/**`
			`* Result's title consists of:`
			`* <ul>`
			`* <li>Corpus type</li>`
			`* <li>Analysis level</li>`
			`* <li>Calculate for</li>`
			`* <li></li>`
			`* <li></li>`
			`* <li></li>`
			`* <li></li>`
			`* </ul>`
			`*`
			`* @return`
			`*/`
			`private String generateResultTitle() {`
			`String separator = "_";`
			`StringBuilder sb = new StringBuilder();`

			`if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {`
			`Integer ngramLevel = filter.getNgramValue();`
			`if(ngramLevel == 0) {`
Refactored results - moved to taxonomyResults 2018-07-05 07:37:35 +00:00			`sb.append(corpus.getCorpusType().toString())`
			`.append(separator)`
			`.append("crke")`
			`.append(separator)`
			`.append(filter.getCalculateFor())`
Project copied 2018-06-19 07:15:37 +00:00			`.append(separator);`
			`} else if(ngramLevel == 1) {`
Refactored results - moved to taxonomyResults 2018-07-05 07:37:35 +00:00			`sb.append(corpus.getCorpusType().toString())`
			`.append(separator)`
			`.append("besede")`
			`.append(separator)`
			`.append(filter.getCalculateFor())`
Project copied 2018-06-19 07:15:37 +00:00			`.append(separator);`
			`}`
			`else {`
			`sb.append(filter.getAl().toString())`
			`.append(separator)`
			`.append(corpus.getCorpusType().toString())`
			`.append(separator);`
			`sb.append(filter.getCalculateFor().toString())`
			`.append(separator);`
			`// ngram value`
			`sb.append(filter.getNgramValue()).append("-gram")`
			`.append(separator);`
			`sb.append(filter.getSkipValue()).append("-preskok")`
			`.append(separator);`
			`}`
			`// TODO: assure skip is not null but zero`

			`} else {`
			`sb.append(filter.getAl().toString()) // analysis level`
			`.append(separator)`
			`.append(corpus.getCorpusType().toString())`
			`.append(separator);`
			`}`
			`// skip value`
			`// msd ?`
			`// if taxonomy -> taxonomy`
			`// if cvv -> cvv + dolžina`

			`this.time = this.time != null ? this.time : LocalDateTime.now();`

			`sb.append(time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss")));`
			`return sb.toString();`

			`}`

			`public boolean isAnalysisProducedResults() {`
			`return analysisProducedResults;`
			`}`

			`public void setAnalysisProducedResults(boolean analysisProducedResults) {`
			`this.analysisProducedResults = analysisProducedResults;`
			`}`

			`public String toString() {`
			`String newLine = "\n\t- ";`
			`StringBuilder sb = new StringBuilder();`
			`sb.append(newLine).append("Statistic properties:");`
			`sb.append(newLine).append(corpus.getCorpusType().toString()).append(String.format(" (%d files)", corpus.getDetectedCorpusFiles().size()));`
			`sb.append(newLine).append(useDB ? "use DB" : "run in memory");`
			`sb.append(newLine).append(filter.toString());`

			`return sb.toString();`
			`}`

			`public String getResultTitle() {`
			`return resultTitle;`
			`}`

			`// ****************************************`
			`// *************** util ***************`
			`// ****************************************`

			`/**`
			`* Stores results from this batch to a database and clears results map`
			`*/`
			`public void storeTmpResultsToDB() {`
			`try {`
			`db.writeBatch(result);`
			`result = new ConcurrentHashMap<>();`
			`} catch (UnsupportedEncodingException e) {`
			`logger.error("Store tmp results to DB", e);`
			`// e.printStackTrace();`
			`}`
			`}`

			`public Filter getFilter() {`
			`return filter;`
			`}`

			`public Corpus getCorpus() {`
			`return corpus;`
			`}`

			`public boolean saveResultToDisk(int... limit) throws UnsupportedEncodingException {`
Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00			`Set<Pair<String, Map<MultipleHMKeys, Long>>> stats = new HashSet<>();`
Project copied 2018-06-19 07:15:37 +00:00
			`if (useDB) {`
			`result = db.getDump();`
			`db.delete();`
			`}`

			`// if no results and nothing to save, return false`
Refactored results - moved to taxonomyResults 2018-07-05 07:37:35 +00:00			`if (!(taxonomyResult.get("Total").size() > 0)) {`
Project copied 2018-06-19 07:15:37 +00:00			`analysisProducedResults = false;`
			`return false;`
			`} else {`
			`analysisProducedResults = true;`
			`}`

Added functionality for n-grams (comma separation), minimal occurances etc. 2018-07-31 06:58:17 +00:00			`removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());`
			`removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());`
Refactored results - moved to taxonomyResults 2018-07-05 07:37:35 +00:00			`stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));`
Added collocability functionality - implemented Dice method 2018-10-24 08:36:07 +00:00			`Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);`
Project copied 2018-06-19 07:15:37 +00:00			`return true;`
			`}`

Added functionality for n-grams (comma separation), minimal occurances etc. 2018-07-31 06:58:17 +00:00			`/**`
			`* Removes lines, where number of different taxonomies is lower than specified number (minimalTaxonomy)`
			`*/`
			`private void removeMinimalTaxonomy(Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult, Integer minimalTaxonomy) {`
			`if (minimalTaxonomy == 1)`
			`return;`
			`int occurances;`
			`for (MultipleHMKeys key : taxonomyResult.get("Total").keySet()){`
			`occurances = 0;`
			`for (String columnNameKey : taxonomyResult.keySet()){`
			`if(!columnNameKey.equals("Total") && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1)`
			`occurances++;`
			`}`
			`if(occurances < minimalTaxonomy){`
			`taxonomyResult.get("Total").remove(key);`
			`}`
			`}`
			`}`

			`/**`
			`* Removes lines where total number of occurrences is lower than specified number (minimalOccurrences)`
			`*/`
			`private void removeMinimalOccurrences(Map<MultipleHMKeys, AtomicLong> taxonomyResultTotal, Integer minimalOccurrences) {`
			`if (minimalOccurrences == 0)`
			`return;`
			`for (MultipleHMKeys key : taxonomyResultTotal.keySet()){`
			`if(taxonomyResultTotal.get(key).intValue() < minimalOccurrences){`
			`taxonomyResultTotal.remove(key);`
			`}`
			`}`
			`}`

Project copied 2018-06-19 07:15:37 +00:00			`public boolean saveResultNestedToDisk(int... limit) throws UnsupportedEncodingException {`
			`resultTitle = generateResultTitle();`

			`if (useDB) {`
			`result = db.getDump();`
			`db.delete();`
			`}`
			`Map<WordLevelType, Map<String, Map<String, Long>>> results = new HashMap<>();`

Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00			`// UNCOMMENT!!!!!!`
			`// if (!isEmpty(resultNestedSuffix)) {`
			`// results.put(WordLevelType.SUFFIX, sortNestedMap(resultNestedSuffix, Util.getValidInt(limit)));`
			`// }`
			`//`
			`// if (!isEmpty(resultNestedPrefix)) {`
			`// results.put(WordLevelType.PREFIX, sortNestedMap(resultNestedPrefix, Util.getValidInt(limit)));`
			`// }`
Project copied 2018-06-19 07:15:37 +00:00
			`// if no results and nothing to save, return false`
			`if (!(results.size() > 0)) {`
			`analysisProducedResults = false;`
			`return false;`
			`} else {`
			`analysisProducedResults = true;`
			`}`

			`Export.nestedMapToCSV(resultTitle, results, corpus.getChosenResultsLocation(), headerInfoBlock());`
			`return true;`
			`}`

			`public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {`
			`filter.setAl(AnalysisLevel.WORD_FORMATION);`
			`resultTitle = generateResultTitle();`

			`if (useDB) {`
			`result = db.getDump();`
			`db.delete();`
			`}`

			`// if no results and nothing to save, return false`
			`if (!(result.size() > 0)) {`
			`analysisProducedResults = false;`
			`return false;`
			`} else {`
			`analysisProducedResults = true;`
			`}`

			`WordFormation.calculateStatistics(this);`

			`Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());`
			`return true;`
			`}`

Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00			`private Map<String, Map<MultipleHMKeys, Long>> sortNestedMap(Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> nestedMap, int limit) {`
			`Map<String, Map<MultipleHMKeys, Long>> sorted = new HashMap<>();`
Project copied 2018-06-19 07:15:37 +00:00
			`for (String s : nestedMap.keySet()) {`
			`sorted.put(s, getSortedResult(nestedMap.get(s), Util.getValidInt(limit)));`
			`}`

			`return sorted;`
			`}`


Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00			`private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {`
Project copied 2018-06-19 07:15:37 +00:00			`return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);`
			`}`

Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00			`public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {`
Added taxonomy presentation in results 2018-06-29 10:53:29 +00:00			`for (String key : taxonomyResult.keySet()) {`
			`// first word should have the same taxonomy as others`
Added functionality for n-grams (comma separation), minimal occurances etc. 2018-07-31 06:58:17 +00:00			`if (key.equals("Total") \|\| taxonomy.contains(key)) {`
			`// if (key.equals("Total") \|\| taxonomy != null && taxonomy.contains(key)) {`
Added taxonomy presentation in results 2018-06-29 10:53:29 +00:00			`// if taxonomy not in map and in this word`
			`AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));`

			`if (r != null)`
			`taxonomyResult.get(key).get(o).incrementAndGet();`
			`} else {`
			`// if taxonomy not in map and not in this word`
			`AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(0));`
			`}`
			`}`

			`// if not in map`


			`// else`

			`}`

Fixed slow combination of words and lemmas presentation 2018-07-17 14:04:26 +00:00			`public Map<String, Map<MultipleHMKeys, AtomicLong>> getTaxonomyResult() {`
			`return taxonomyResult;`
			`}`

Project copied 2018-06-19 07:15:37 +00:00			`public void updateResults(String o) {`
			`// if not in map`
			`AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));`

			`// else`
			`if (r != null)`
			`result.get(o).incrementAndGet();`
			`}`

			`public Map<String, AtomicLong> getResult() {`
			`return result;`
			`}`

			`public Object[][] getResultCustom() {`
			`return resultCustom;`
			`}`

			`public void setResultCustom(Object[][] resultCustom) {`
			`this.resultCustom = resultCustom;`
			`}`

			`public void updateResultsNested(WordLevelType type, String key, String stringValue) {`
			`ConcurrentHashMap<String, ConcurrentHashMap<String, AtomicLong>> resultsMap;`

			`if (type == WordLevelType.SUFFIX) {`
			`updateResultsNestedSuffix(key, stringValue);`
			`} else if (type == WordLevelType.PREFIX) {`
			`updateResultsNestedPrefix(key, stringValue);`
			`}`
			`}`

			`public void updateResultsNestedSuffix(String key, String stringValue) {`
Added some performance measures 2018-08-09 07:21:06 +00:00			`MultipleHMKeys mkStringValue = new MultipleHMKeys1(stringValue);`
Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00
Project copied 2018-06-19 07:15:37 +00:00			`if (resultNestedSuffix.containsKey(key)) {`
			`// if not in map`
Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00			`AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));`
Project copied 2018-06-19 07:15:37 +00:00
			`// else`
			`if (r != null) {`
			`resultNestedSuffix.get(key).get(stringValue).incrementAndGet();`
			`}`
			`} else {`
			`resultNestedSuffix.putIfAbsent(key, new ConcurrentHashMap<>());`
Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00			`AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));`
Project copied 2018-06-19 07:15:37 +00:00
			`if (r != null) {`
			`resultNestedSuffix.get(key).get(stringValue).incrementAndGet();`
			`}`
			`}`
			`}`

			`public void updateResultsNestedPrefix(String key, String stringValue) {`
Added some performance measures 2018-08-09 07:21:06 +00:00			`MultipleHMKeys mkStringValue = new MultipleHMKeys1(stringValue);`
Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00
Project copied 2018-06-19 07:15:37 +00:00			`if (resultNestedPrefix.containsKey(key)) {`
			`// if not in map`
Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00			`AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));`
Project copied 2018-06-19 07:15:37 +00:00
			`// else`
			`if (r != null) {`
			`resultNestedPrefix.get(key).get(stringValue).incrementAndGet();`
			`}`
			`} else {`
			`resultNestedPrefix.putIfAbsent(key, new ConcurrentHashMap<>());`
Added functional additional combinational filters for words 2018-07-16 08:14:21 +00:00			`AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));`
Project copied 2018-06-19 07:15:37 +00:00
			`if (r != null) {`
			`resultNestedPrefix.get(key).get(stringValue).incrementAndGet();`
			`}`
			`}`
			`}`

			`private LinkedHashMap<String, String> headerInfoBlock() {`
			`LinkedHashMap<String, String> info = new LinkedHashMap<>();`

			`info.put("Korpus:", corpus.getCorpusType().toString());`
			`info.put("Datum:", time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));`
			`if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {`
			`Integer ngramLevel = filter.getNgramValue();`
			`if (ngramLevel == 0)`
Added functionality for n-grams (comma separation), minimal occurances etc. 2018-07-31 06:58:17 +00:00			`info.put("Analiza", "Črke");`
Project copied 2018-06-19 07:15:37 +00:00			`else if (ngramLevel == 1)`
			`info.put("Analiza", "Besede");`
			`else`
Added functionality for n-grams (comma separation), minimal occurances etc. 2018-07-31 06:58:17 +00:00			`info.put("Analiza", filter.getAl().toString());`
Project copied 2018-06-19 07:15:37 +00:00			`} else {`
Added functionality for n-grams (comma separation), minimal occurances etc. 2018-07-31 06:58:17 +00:00			`info.put("Analiza", filter.getAl().toString());`
Project copied 2018-06-19 07:15:37 +00:00			`}`

			`if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {`
			`Integer ngramLevel = filter.getNgramValue();`

			`// n.gram nivo`
			`if (ngramLevel > 1) {`
			`info.put("n-gram nivo:", String.valueOf(ngramLevel));`
			`}`
Modified word tab 2018-06-27 08:14:40 +00:00
Project copied 2018-06-19 07:15:37 +00:00			`// skip`
			`if (ngramLevel > 1)`
			`info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");`

			`// izračunaj za`
			`info.put("Izračunaj za:", filter.getCalculateFor().toString());`

			`// msd`
			`if (!isEmpty(filter.getMsd())) {`
			`StringBuilder msdPattern = new StringBuilder();`
			`for (Pattern pattern : filter.getMsd()) {`
			`msdPattern.append(pattern.toString()).append(" ");`
			`}`

			`info.put("MSD:", msdPattern.toString());`
			`}`


			`}`

Added taxonomy presentation in results 2018-06-29 10:53:29 +00:00			`if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {`
			`ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());`

			`info.put("Taksonomija: ", "");`
			`String sep = "";`
			`for (String s : tax) {`
			`info.put(sep = sep + " ", s);`
			`}`
			`}`
Project copied 2018-06-19 07:15:37 +00:00
			`if (corpus.getCorpusType() == CorpusType.SOLAR) {`
			`HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();`

			`if (!isEmpty(filters)) {`
			`info.put("Dodatni filtri: ", "");`

			`for (Map.Entry<String, ObservableList<String>> f : filters.entrySet()) {`
			`info.put(f.getKey(), StringUtils.join(f.getValue(), ", "));`
			`}`
			`}`
			`}`

			`return info;`
			`}`
Added collocability functionality - implemented Dice method 2018-10-24 08:36:07 +00:00
			`public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {`
			`Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();`

			`Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();`

			`for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {`
Added initial functionality for word parts implementation 2018-11-05 09:30:41 +00:00			`// String[] splitedString = hmKey.getK1().split("\\s+");`
Added collocability functionality - implemented Dice method 2018-10-24 08:36:07 +00:00
			`long sum_fwi =0L;`
Added initial functionality for word parts implementation 2018-11-05 09:30:41 +00:00
			`for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){`
			`System.out.println(smallHmKey.getK1());`
			`sum_fwi += oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue();`
Added collocability functionality - implemented Dice method 2018-10-24 08:36:07 +00:00			`}`
			`double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;`
			`collocabilityMap.put(hmKey, dice_value);`
			`}`

			`collocability.put(filter.getCollocability().get(0), collocabilityMap);`
			`}`

			`public Map<Collocability, Map<MultipleHMKeys, Double>> getCollocability(){`
			`return this.collocability;`
			`}`
Project copied 2018-06-19 07:15:37 +00:00			`}`