Project copied

2018-06-19 09:15:37 +02:00
commit a18e52a599
94 changed files with 87092 additions and 0 deletions
--- a/src/main/java/META-INF/MANIFEST.MF
+++ b/src/main/java/META-INF/MANIFEST.MF
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Main-Class: gui.GUIController
+
--- a/src/main/java/alg/Common.java
+++ b/src/main/java/alg/Common.java
@@ -0,0 +1,15 @@
+package alg;
+
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+public class Common {
+	public static <K, V> void updateMap(Map<K, AtomicLong> map, K o) {
+		// if not in map
+		AtomicLong r = map.putIfAbsent(o, new AtomicLong(1));
+
+		// else
+		if (r != null)
+			map.get(o).incrementAndGet();
+	}
+}
--- a/src/main/java/alg/XML_processing.java
+++ b/src/main/java/alg/XML_processing.java
@@ -0,0 +1,794 @@
+package alg;
+
+import static data.Enums.solar.SolarFilters.*;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.util.*;
+import java.util.concurrent.ForkJoinPool;
+
+import javax.xml.namespace.QName;
+import javax.xml.stream.XMLEventReader;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.events.*;
+
+import org.apache.logging.log4j.LogManager;
+
+import data.*;
+import gui.ValidationUtil;
+
+public class XML_processing {
+	public final static org.apache.logging.log4j.Logger logger = LogManager.getLogger(XML_processing.class);
+
+	// public static void processCorpus(Statistics stats) {
+	// 	// we can preset the list's size, so there won't be a need to resize it
+	// 	List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT);
+	//
+	// 	int i = 0;
+	// 	for (File f : Settings.corpus) {
+	// 		i++;
+	// 		readXML(f.toString(), stats);
+	// 	}
+	// }
+
+	// public static void readXML(String path, Statistics stats) {
+	// 	if (stats.getCorpusType() == CorpusType.GIGAFIDA) {
+	// 		readXMLGigafida(path, stats);
+	// 	} else if (stats.getCorpusType() == CorpusType.GOS) {
+	// 		readXMLGos(path, stats);
+	// 	} else if (stats.getCorpusType() == CorpusType.SOLAR) {
+	// 		readXMLSolar(path, stats);
+	// 	}
+	// }
+
+	public static void readXML(String path, StatisticsNew stats) {
+		if (stats.getCorpus().getCorpusType() == CorpusType.GIGAFIDA
+				|| stats.getCorpus().getCorpusType() == CorpusType.CCKRES) {
+			readXMLGigafida(path, stats);
+		} else if (stats.getCorpus().getCorpusType() == CorpusType.GOS) {
+			readXMLGos(path, stats);
+		} else if (stats.getCorpus().getCorpusType() == CorpusType.SOLAR) {
+			readXMLSolar(path, stats);
+		}
+	}
+
+	/**
+	 * Reads and returns the value of a passed header tag or an empty string.
+	 * E.g. title tag, for discerning the corpus' type.
+	 * Notice: returns only the value of the first occurrence of a given tag name.
+	 */
+	public static String readXMLHeaderTag(String path, String tag) {
+		XMLInputFactory factory = XMLInputFactory.newInstance();
+		XMLEventReader eventReader = null;
+
+		try {
+			eventReader = factory.createXMLEventReader(new FileInputStream(path));
+			while (eventReader.hasNext()) {
+				XMLEvent xmlEvent = eventReader.nextEvent();
+				if (xmlEvent.isStartElement()) {
+					StartElement startElement = xmlEvent.asStartElement();
+					String var = startElement.getName().getLocalPart();
+
+					if (var.equalsIgnoreCase(tag)) {
+						return eventReader.nextEvent().asCharacters().getData();
+					}
+				}
+			}
+		} catch (FileNotFoundException | XMLStreamException e) {
+			e.printStackTrace();
+		} finally {
+			if (eventReader != null) {
+				try {
+					eventReader.close();
+				} catch (XMLStreamException e) {
+					logger.error("closing stream", e);
+				}
+			}
+		}
+		return "";
+	}
+
+	private static void fj(List<Sentence> corpus, StatisticsNew stats) {
+		ForkJoinPool pool = new ForkJoinPool();
+
+		if (stats.getFilter().getAl() == AnalysisLevel.STRING_LEVEL) {
+			alg.ngram.ForkJoin wc = new alg.ngram.ForkJoin(corpus, stats);
+			pool.invoke(wc);
+		} else if (stats.getFilter().getAl() == AnalysisLevel.WORD_LEVEL) {
+			alg.word.ForkJoin wc = new alg.word.ForkJoin(corpus, stats);
+			pool.invoke(wc);
+		} else {
+			// TODO:
+			// alg.inflectedJOS.ForkJoin wc = new alg.inflectedJOS.ForkJoin(corpus, stats);
+			// pool.invoke(wc);
+		}
+	}
+
+	// public static void readXMLGos(String path, Statistics stats) {
+	// 	boolean in_word = false;
+	// 	String taksonomija = "";
+	// 	String lemma = "";
+	// 	String msd = "";
+	// 	String type = stats.isGosOrthMode() ? "orth" : "norm"; // orth & norm
+	//
+	// 	List<Word> stavek = new ArrayList<>();
+	// 	List<Sentence> corpus = new ArrayList<>();
+	// 	String sentenceDelimiter = "seg";
+	// 	String taxonomyPrefix = "gos.";
+	//
+	// 	try {
+	// 		XMLInputFactory factory = XMLInputFactory.newInstance();
+	// 		XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
+	//
+	// 		while (eventReader.hasNext()) {
+	// 			XMLEvent event = eventReader.nextEvent();
+	//
+	// 			switch (event.getEventType()) {
+	// 				case XMLStreamConstants.START_ELEMENT:
+	//
+	// 					StartElement startElement = event.asStartElement();
+	// 					String qName = startElement.getName().getLocalPart();
+	//
+	// 					// "word" node
+	// 					if (qName.equals("w")) {
+	// 						in_word = true;
+	//
+	// 						if (type.equals("norm")) {
+	// 							// make sure we're looking at <w lemma...> and not <w type...>
+	// 							Iterator var = startElement.getAttributes();
+	// 							ArrayList<Object> attributes = new ArrayList<>();
+	// 							while (var.hasNext()) {
+	// 								attributes.add(var.next());
+	// 							}
+	//
+	// 							if (attributes.contains("msd")) {
+	// 								msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
+	// 							} else {
+	// 								msd = null;
+	// 							}
+	//
+	// 							if (attributes.contains("lemma")) {
+	// 								lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
+	// 							}
+	// 						}
+	// 					}
+	// 					// taxonomy node
+	// 					else if (qName.equalsIgnoreCase("catRef")) {
+	// 						// there are some term nodes at the beginning that are of no interest to us
+	// 						// they differ by not having the attribute "ref", so test will equal null
+	// 						Attribute test = startElement.getAttributeByName(QName.valueOf("target"));
+	//
+	// 						if (test != null) {
+	// 							// keep only taxonomy properties
+	// 							taksonomija = String.valueOf(test.getValue()).replace(taxonomyPrefix, "");
+	// 						}
+	// 					} else if (qName.equalsIgnoreCase("div")) {
+	// 						type = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
+	//
+	// 					}
+	// 					break;
+	//
+	// 				case XMLStreamConstants.CHARACTERS:
+	// 					Characters characters = event.asCharacters();
+	//
+	// 					// "word" node value
+	// 					if (in_word) {
+	// 						if (type.equals("norm") && msd != null) {
+	// 							stavek.add(new Word(characters.getData(), lemma, msd));
+	// 						} else {
+	// 							stavek.add(new Word(characters.getData()));
+	// 						}
+	//
+	// 						in_word = false;
+	// 					}
+	// 					break;
+	//
+	// 				case XMLStreamConstants.END_ELEMENT:
+	// 					EndElement endElement = event.asEndElement();
+	//
+	// 					// parser reached end of the current sentence
+	// 					if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
+	// 						// add sentence to corpus
+	// 						corpus.add(new Sentence(stavek, taksonomija, type));
+	// 						// and start a new one
+	// 						stavek = new ArrayList<>();
+	//
+	// 						/* Invoke Fork-Join when we reach maximum limit of
+	// 						 * sentences (because we can't read everything to
+	// 						 * memory) or we reach the end of the file.
+	// 						 */
+	// 						if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
+	// 							fj(corpus, stats);
+	// 							// empty the current corpus, since we don't need
+	// 							// the data anymore
+	// 							corpus.clear();
+	// 						}
+	// 					}
+	//
+	// 					// backup
+	// 					if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
+	// 						fj(corpus, stats);
+	// 						corpus.clear();
+	// 					}
+	//
+	// 					break;
+	// 			}
+	// 		}
+	// 	} catch (FileNotFoundException | XMLStreamException e) {
+	// 		e.printStackTrace();
+	// 	}
+	// }
+
+	@SuppressWarnings("unused")
+	public static void readXMLSolar(String path, StatisticsNew stats) {
+		boolean in_word = false;
+		String lemma = "";
+		String msd = "";
+
+		List<Word> stavek = new ArrayList<>();
+		List<Sentence> corpus = new ArrayList<>();
+
+		// used for filter
+		Set<String> headTags = new HashSet<>(Arrays.asList("sola", "predmet", "razred", "regija", "tip", "leto"));
+		Map<String, String> headBlock = null;
+		boolean includeThisBlock = false;
+
+		try {
+			XMLInputFactory factory = XMLInputFactory.newInstance();
+			XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
+
+			while (eventReader.hasNext()) {
+				XMLEvent event = eventReader.nextEvent();
+
+				switch (event.getEventType()) {
+					case XMLStreamConstants.START_ELEMENT:
+
+						StartElement startElement = event.asStartElement();
+						// System.out.println(String.format("%s", startElement.toString()));
+						String qName = startElement.getName().getLocalPart();
+
+						// "word" node
+						if (qName.equals("w3")) {
+							in_word = true;
+
+							msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
+							lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
+						} else if (qName.equals("c3")) {
+							String c3Content = eventReader.nextEvent().asCharacters().getData();
+
+							if (c3Content.equals(".") && includeThisBlock) {
+								// add sentence to corpus
+								corpus.add(new Sentence(stavek));
+								// and start a new one
+								stavek = new ArrayList<>();
+
+							/* Invoke Fork-Join when we reach maximum limit of
+							 * sentences (because we can't read everything to
+							 * memory) or we reach the end of the file.
+							 */
+								if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
+									fj(corpus, stats);
+									// empty the current corpus, since we don't need
+									// the data anymore
+									corpus.clear();
+								}
+							}
+						} else if (headTags.contains(qName)) {
+							String tagContent = eventReader.nextEvent().asCharacters().getData();
+							headBlock.put(qName, tagContent);
+						} else if (qName.equals("head")) {
+							headBlock = new HashMap<>();
+						}
+
+						break;
+
+					case XMLStreamConstants.CHARACTERS:
+						Characters characters = event.asCharacters();
+
+						// "word" node value
+						if (in_word) {
+							stavek.add(new Word(characters.getData(), lemma, msd));
+							in_word = false;
+						}
+						break;
+
+					case XMLStreamConstants.END_ELEMENT:
+						EndElement endElement = event.asEndElement();
+						String qNameEnd = endElement.getName().getLocalPart();
+
+						if (qNameEnd.equals("head")) {
+							// validate and set boolean
+							if (validateHeadBlock(headBlock, stats.getFilter().getSolarFilters())) {
+								includeThisBlock = true;
+							}
+						} else if (qNameEnd.equals("body")) {
+							// new block, reset filter status
+							includeThisBlock = false;
+						}
+
+						// backup
+						if (endElement.getName().getLocalPart().equalsIgnoreCase("korpus")) {
+							fj(corpus, stats);
+							corpus.clear();
+						}
+
+						break;
+				}
+			}
+		} catch (FileNotFoundException | XMLStreamException e) {
+			e.printStackTrace();
+		}
+	}
+
+	/**
+	 * @param readHeadBlock block of tags read from the corpus
+	 * @param userSetFilter tags with values set by the user
+	 *
+	 * @return
+	 */
+	private static boolean validateHeadBlock(Map<String, String> readHeadBlock, HashMap<String, HashSet<String>> userSetFilter) {
+		boolean pass = true;
+
+		if (userSetFilter == null) {
+			return true;
+		}
+
+		for (Map.Entry<String, HashSet<String>> filterEntry : userSetFilter.entrySet()) {
+			String key = filterEntry.getKey();
+			HashSet<String> valueObject = filterEntry.getValue();
+
+			// if (valueObject instanceof String) {
+			// 	pass = validateHeadBlockEntry(readHeadBlock, key, (String) valueObject);
+			// } else
+			if (valueObject != null) {
+				//noinspection unchecked
+				for (String value : valueObject) {
+					pass = validateHeadBlockEntry(readHeadBlock, key, value);
+				}
+			}
+
+			if (!pass) {
+				// current head block does not include one of the set filters - not likely, but an edge case anyway
+				return false;
+			}
+		}
+
+		// if it gets to this point, it passed all the filters
+		return true;
+	}
+
+	private static boolean validateHeadBlockEntry(Map<String, String> readHeadBlock, String userSetKey, String userSetValue) {
+		if (!readHeadBlock.keySet().contains(userSetKey)) {
+			// current head block does not include one of the set filters - not likely, but an edge case anyway
+			return false;
+		} else if (!readHeadBlock.get(userSetKey).equals(userSetValue)) {
+			// different values -> doesn't pass the filter
+			return false;
+		}
+
+		return true;
+	}
+
+	/**
+	 * Parses XML headers for information about its taxonomy (if supported) or filters (solar)
+	 *
+	 * @param filepath
+	 * @param corpusIsSplit is corpus split into multiple xml files, or are all entries grouped into one large xml file
+	 * @param corpusType
+	 */
+	public static Object readXmlHeaderTaxonomyAndFilters(String filepath, boolean corpusIsSplit, CorpusType corpusType) {
+		boolean parseTaxonomy = Tax.getCorpusTypesWithTaxonomy().contains(corpusType);
+		// solar
+		Set<String> headTags = null;
+		HashMap<String, HashSet<String>> resultFilters = new HashMap<>();
+		// taxonomy corpora
+		HashSet<String> resultTaxonomy = new HashSet<>();
+
+		String headTagName;
+
+		if (corpusType == CorpusType.SOLAR) {
+			headTagName = "head";
+			// used for filter
+			headTags = new HashSet<>(Arrays.asList(SOLA, PREDMET, RAZRED, REGIJA, TIP, LETO));
+
+			// init results now to avoid null pointers
+			headTags.forEach(f -> resultFilters.put(f, new HashSet<>()));
+		} else {
+			headTagName = "teiHeader";
+		}
+
+		XMLInputFactory factory = XMLInputFactory.newInstance();
+		XMLEventReader xmlEventReader = null;
+		try {
+			xmlEventReader = factory.createXMLEventReader(new FileInputStream(filepath));
+			boolean insideHeader = false;
+
+			while (xmlEventReader.hasNext()) {
+				XMLEvent xmlEvent = xmlEventReader.nextEvent();
+
+				if (xmlEvent.isStartElement()) {
+					StartElement startElement = xmlEvent.asStartElement();
+					String elementName = startElement.getName().getLocalPart();
+
+					if (elementName.equalsIgnoreCase(headTagName)) {
+						// if the corpus is split into files, we skip bodies
+						// this toggle is true when we're inside a header (next block of code executes)
+						// and false when we're not (skip reading unnecessary attributes)
+						insideHeader = true;
+					}
+
+					if (insideHeader) {
+						if (parseTaxonomy && elementName.equalsIgnoreCase("catRef")) {
+							HashMap<String, String> atts = extractAttributes(startElement);
+							String debug = "";
+
+							String tax = startElement.getAttributeByName(QName.valueOf("target"))
+									.getValue()
+									.replace("#", "");
+
+							resultTaxonomy.add(tax);
+						} else if (!parseTaxonomy && headTags.contains(elementName)) {
+							String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
+							resultFilters.get(elementName).add(tagContent);
+						}
+					}
+				} else if (xmlEvent.isEndElement() && corpusIsSplit && isEndElementEndOfHeader(xmlEvent, headTagName)) {
+					// if the corpus is split into multiple files, each with only one header block per file
+					// that means we should stop after we reach the end of the header
+					return parseTaxonomy ? resultTaxonomy : resultFilters;
+				} else if (xmlEvent.isEndElement() && !corpusIsSplit && isEndElementEndOfHeader(xmlEvent, headTagName)) {
+					// whole corpus in one file, so we have to continue reading in order to find all header blocks
+					insideHeader = false;
+				}
+			}
+		} catch (XMLStreamException e) {
+			logger.error("Streaming error", e);
+			return parseTaxonomy ? resultTaxonomy : resultFilters;
+		} catch (FileNotFoundException e) {
+			logger.error("File not found", e);
+			return parseTaxonomy ? resultTaxonomy : resultFilters;
+			// TODO: keep a list of files that threw this error and a dirty boolean marker -> if true, alert user
+		} finally {
+			if (xmlEventReader != null) {
+				try {
+					xmlEventReader.close();
+				} catch (XMLStreamException e) {
+					logger.error("closing stream", e);
+				}
+			}
+		}
+		return parseTaxonomy ? resultTaxonomy : resultFilters;
+	}
+
+	private static boolean isEndElementEndOfHeader(XMLEvent event, String headerTag) {
+		return event.asEndElement()
+				.getName()
+				.getLocalPart()
+				.equalsIgnoreCase(headerTag);
+	}
+
+	@SuppressWarnings("Duplicates")
+	public static boolean readXMLGigafida(String path, StatisticsNew stats) {
+		boolean inWord = false;
+		ArrayList<String> currentFiletaxonomy = new ArrayList<>();
+		String lemma = "";
+		String msd = "";
+
+		List<Word> sentence = new ArrayList<>();
+		List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
+		String sentenceDelimiter = "s";
+
+		XMLEventReader eventReader = null;
+		try {
+			XMLInputFactory factory = XMLInputFactory.newInstance();
+			eventReader = factory.createXMLEventReader(new FileInputStream(path));
+
+			while (eventReader.hasNext()) {
+				XMLEvent event = eventReader.nextEvent();
+
+				switch (event.getEventType()) {
+					case XMLStreamConstants.START_ELEMENT:
+						StartElement startElement = event.asStartElement();
+						String qName = startElement.getName().getLocalPart();
+
+						// "word" node
+						if (qName.equals("w")) {
+							inWord = true;
+
+							msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
+							lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
+						}
+						// taxonomy node
+						else if (qName.equalsIgnoreCase("catRef")) {
+							// there are some term nodes at the beginning that are of no interest to us
+							// they differ by not having the attribute "ref", so test will equal null
+							Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
+
+							if (tax != null) {
+								// keep only taxonomy properties
+								currentFiletaxonomy.add(String.valueOf(tax.getValue()).replace("#", ""));
+							}
+						}
+						break;
+
+					case XMLStreamConstants.CHARACTERS:
+						Characters characters = event.asCharacters();
+
+						// "word" node value
+						if (inWord) {
+							String word = characters.getData();
+							sentence.add(new Word(word, lemma, msd));
+							inWord = false;
+						}
+						break;
+
+					case XMLStreamConstants.END_ELEMENT:
+						EndElement endElement = event.asEndElement();
+
+						String var = endElement.getName().getLocalPart();
+						String debug = "";
+
+						// parser reached end of the current sentence
+						if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
+							// add sentence to corpus if it passes filters
+							sentence = runFilters(sentence, stats.getFilter());
+
+							if (!ValidationUtil.isEmpty(sentence)) {
+								corpus.add(new Sentence(sentence));
+							}
+
+							// and start a new one
+							sentence = new ArrayList<>();
+
+							/* Invoke Fork-Join when we reach maximum limit of
+							 * sentences (because we can't read everything to
+							 * memory) or we reach the end of the file.
+							 */
+							if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
+								fj(corpus, stats);
+								// empty the current corpus, since we don't need the data anymore
+								corpus.clear();
+
+								// TODO: if (stats.isUseDB()) {
+								// 	stats.storeTmpResultsToDB();
+								// }
+							}
+						} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
+							// before proceeding to read this file, make sure that taxonomy filters are a match
+
+							if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
+								currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
+
+								if (currentFiletaxonomy.isEmpty()) {
+									// taxonomies don't match so stop
+									return false;
+								}
+							}
+						}
+
+						// fallback
+						else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
+							fj(corpus, stats);
+							corpus.clear();
+
+							// TODO: if (stats.isUseDB()) {
+							// 	stats.storeTmpResultsToDB();
+							// }
+						}
+
+						break;
+				}
+			}
+		} catch (FileNotFoundException | XMLStreamException e) {
+			e.printStackTrace();
+		} finally {
+			if (eventReader != null) {
+				try {
+					eventReader.close();
+				} catch (XMLStreamException e) {
+					logger.error("closing stream", e);
+				}
+			}
+		}
+
+		return true;
+	}
+
+	@SuppressWarnings("Duplicates")
+	public static boolean readXMLGos(String path, StatisticsNew stats) {
+		boolean inWord = false;
+		boolean inOrthDiv = false;
+		boolean computeForOrth = stats.getCorpus().isGosOrthMode();
+		ArrayList<String> currentFiletaxonomy = new ArrayList<>();
+		String lemma = "";
+		String msd = "";
+
+		List<Word> sentence = new ArrayList<>();
+		List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
+		String sentenceDelimiter = "seg";
+
+		String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm
+
+		XMLEventReader eventReader = null;
+
+		boolean includeFile = true;
+
+		try {
+			XMLInputFactory factory = XMLInputFactory.newInstance();
+			eventReader = factory.createXMLEventReader(new FileInputStream(path));
+
+			while (eventReader.hasNext()) {
+				XMLEvent event = eventReader.nextEvent();
+				// System.out.print(String.format("%s", event.toString().replaceAll("\\['http://www.tei-c.org/ns/1.0'\\]::", "")));
+
+				switch (event.getEventType()) {
+					case XMLStreamConstants.START_ELEMENT:
+						StartElement startElement = event.asStartElement();
+						String qName = startElement.getName().getLocalPart();
+
+						if (qName.equals("div")) {
+							HashMap<String, String> atts = extractAttributes(startElement);
+
+							if (atts.keySet().contains("type")) {
+								inOrthDiv = atts.get("type").equals("orth");
+							}
+						}
+
+						// "word" node
+						if (qName.equals("w")) {
+							// check that it's not a type
+							HashMap<String, String> atts = extractAttributes(startElement);
+
+							if (!atts.containsKey("type")) {
+								inWord = true;
+
+								if (atts.containsKey("msd")) {
+									msd = atts.get("msd");
+
+								}
+								if (atts.containsKey("lemma")) {
+									lemma = atts.get("lemma");
+								}
+								//
+								// if (!inOrthDiv) {
+								// 	msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
+								// 	lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
+								// }
+							}
+
+							// }
+						}
+						// taxonomy node
+						else if (qName.equalsIgnoreCase("catRef")) {
+							// there are some term nodes at the beginning that are of no interest to us
+							// they differ by not having the attribute "ref", so test will equal null
+							Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
+
+							if (tax != null) {
+								// keep only taxonomy properties
+								currentFiletaxonomy.add(String.valueOf(tax.getValue()));
+							}
+						} else if (qName.equalsIgnoreCase("div")) {
+							gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
+						}
+						break;
+
+					case XMLStreamConstants.CHARACTERS:
+						// "word" node value
+						if (inWord) {
+							Characters characters = event.asCharacters();
+							if (gosType.equals("norm") && msd != null) {
+								sentence.add(new Word(characters.getData(), lemma, msd));
+							} else {
+								sentence.add(new Word(characters.getData()));
+							}
+
+							inWord = false;
+						}
+						break;
+
+					case XMLStreamConstants.END_ELEMENT:
+						EndElement endElement = event.asEndElement();
+
+						// parser reached end of the current sentence
+						if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
+							// add sentence to corpus if it passes filters
+							boolean saveSentence = computeForOrth == inOrthDiv;
+
+							if (includeFile && saveSentence && !ValidationUtil.isEmpty(sentence)) {
+								sentence = runFilters(sentence, stats.getFilter());
+								corpus.add(new Sentence(sentence));
+							}
+
+							// and start a new one
+							sentence = new ArrayList<>();
+
+							/* Invoke Fork-Join when we reach maximum limit of
+							 * sentences (because we can't read everything to
+							 * memory) or we reach the end of the file.
+							 */
+							if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
+								fj(corpus, stats);
+								// empty the current corpus, since we don't need
+								// the data anymore
+								corpus.clear();
+							}
+						} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
+							// before proceeding to read this file, make sure that taxonomy filters are a match
+							if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
+								currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
+
+								// disregard this entry if taxonomies don't match
+								includeFile = !currentFiletaxonomy.isEmpty();
+
+								currentFiletaxonomy = new ArrayList<>();
+							}
+						}
+
+						// backup
+						else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
+							fj(corpus, stats);
+							corpus.clear();
+						}
+
+						break;
+				}
+			}
+		} catch (FileNotFoundException | XMLStreamException e) {
+			e.printStackTrace();
+		} finally {
+			if (eventReader != null) {
+				try {
+					eventReader.close();
+				} catch (XMLStreamException e) {
+					logger.error("closing stream", e);
+				} catch (Exception e) {
+					logger.error("general error", e);
+				}
+			}
+		}
+
+		return true;
+	}
+
+	/**
+	 * Runs the sentence through some filters, so we don't do calculations when unnecessary.
+	 * Filters:
+	 * <ol>
+	 * <li><b>Ngrams:</b> omit sentences that are shorter than the ngram value (e.g. 3 gram of a single word sentence)</li>
+	 * <li><b>Letter ngrams:</b> omit words that are shorter than the specified string length (e.g. combinations of 3 letters when the word consists of only 2 letters)</li>
+	 * </ol>
+	 *
+	 * @return Empty sentence (if fails 1.) or a sentence with some words removed (2.)
+	 */
+	private static List<Word> runFilters(List<Word> sentence, Filter filter) {
+		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
+			// ngram level: if not 0 must be less than or equal to number of words in this sentence.
+			if (filter.getNgramValue() > 0 && filter.getNgramValue() > sentence.size()) {
+				return null;
+			}
+
+			// if we're calculating values for letters, omit words that are shorter than string length
+			if (filter.getNgramValue() == 0) {
+				sentence.removeIf(w -> (filter.getCalculateFor() == CalculateFor.WORD && w.getWord().length() < filter.getStringLength())
+						|| (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma().length() < filter.getStringLength()));
+			}
+		}
+
+		return sentence;
+	}
+
+	private static HashMap<String, String> extractAttributes(StartElement se) {
+		Iterator attributesIt = se.getAttributes();
+		HashMap<String, String> atts = new HashMap<>();
+
+		while (attributesIt.hasNext()) {
+			Attribute a = (Attribute) attributesIt.next();
+			atts.put(a.getName().getLocalPart(), a.getValue());
+		}
+
+		return atts;
+	}
+}
--- a/src/main/java/alg/inflectedJOS/ForkJoin.java
+++ b/src/main/java/alg/inflectedJOS/ForkJoin.java
@@ -0,0 +1,67 @@
+package alg.inflectedJOS;
+
+import java.util.List;
+import java.util.concurrent.RecursiveAction;
+
+import data.Sentence;
+import data.Statistics;
+
+public class ForkJoin extends RecursiveAction {
+	private static final long serialVersionUID = -1260951004477299634L;
+
+	private static final int ACCEPTABLE_SIZE = 1000;
+	private List<Sentence> corpus;
+	private Statistics stats;
+	private int start;
+	private int end;
+
+
+	/**
+	 * Constructor for subproblems.
+	 */
+	private ForkJoin(List<Sentence> corpus, int start, int end, Statistics stats) {
+		this.corpus = corpus;
+		this.start = start;
+		this.end = end;
+		this.stats = stats;
+	}
+
+	/**
+	 * Default constructor for the initial problem
+	 */
+	public ForkJoin(List<Sentence> corpus, Statistics stats) {
+		this.corpus = corpus;
+		this.start = 0;
+		this.end = corpus.size();
+		this.stats = stats;
+	}
+
+	private void computeDirectly() {
+		List<Sentence> subCorpus = corpus.subList(start, end);
+
+		if (stats.isTaxonomySet()) {
+			InflectedJOSCount.calculateForAll(subCorpus, stats, stats.getInflectedJosTaxonomy());
+		} else {
+			InflectedJOSCount.calculateForAll(subCorpus, stats, null);
+		}
+	}
+
+	@Override
+	protected void compute() {
+		int subCorpusSize = end - start;
+
+		if (subCorpusSize < ACCEPTABLE_SIZE) {
+			computeDirectly();
+		} else {
+			int mid = start + subCorpusSize / 2;
+			ForkJoin left = new ForkJoin(corpus, start, mid, stats);
+			ForkJoin right = new ForkJoin(corpus, mid, end, stats);
+
+			// fork (push to queue)-> compute -> join
+			left.fork();
+			right.fork();
+			left.join();
+			right.join();
+		}
+	}
+}
--- a/src/main/java/alg/inflectedJOS/InflectedJOSCount.java
+++ b/src/main/java/alg/inflectedJOS/InflectedJOSCount.java
@@ -0,0 +1,170 @@
+package alg.inflectedJOS;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+
+import alg.Common;
+import data.Sentence;
+import data.Statistics;
+import data.StatisticsNew;
+import data.Word;
+
+public class InflectedJOSCount {
+
+	public static HashMap<Integer, ArrayList<ArrayList<Integer>>> indices;
+
+	// static {
+	// 	// calculate all possible combinations of indices we will substitute with a '-' for substring statistics
+	// 	indices = new HashMap<>();
+	// 	for (int i = 5; i <= 8; i++) {
+	// 		indices.put(i, calculateCombinations(i));
+	// 	}
+	// }
+	//
+	// private static List<Integer> calculateCombinations(int i) {
+	// 	int arr[] = {1, 2, 3, 4, 5};
+	// 	int r = 3;
+	// 	int n = arr.length;
+	// 	ArrayList<ArrayList<Integer>> result = new ArrayList<>();
+	//
+	// 	return printCombination(arr, n, r);
+	// }
+	//
+	// /* arr[]  ---> Input Array
+	// data[] ---> Temporary array to store current combination
+	// start & end ---> Staring and Ending indexes in arr[]
+	// index  ---> Current index in data[]
+	// r ---> Size of a combination to be printed */
+	// static void combinationUtil(int arr[], int data[], int start,
+	// 							int end, int index, int r, ArrayList<ArrayList<Integer>> result) {
+	// 	// Current combination is ready to be printed, print it
+	// 	ArrayList<Integer> tmpResult = new ArrayList<>();
+	//
+	// 	if (index == r) {
+	// 		ArrayList<Integer> tmpResult = new ArrayList<>();
+	// 		for (int j = 0; j < r; j++)
+	// 			System.out.print(data[j] + " ");
+	// 		System.out.println("");
+	// 		return;
+	// 	}
+	//
+	// 	// replace index with all possible elements. The condition
+	// 	// "end-i+1 >= r-index" makes sure that including one element
+	// 	// at index will make a combination with remaining elements
+	// 	// at remaining positions
+	// 	for (int i = start; i <= end && end - i + 1 >= r - index; i++) {
+	// 		data[index] = arr[i];
+	// 		combinationUtil(arr, data, i + 1, end, index + 1, r);
+	// 	}
+	// }
+	//
+	// // The main function that prints all combinations of size r
+	// // in arr[] of size n. This function mainly uses combinationUtil()
+	// static void printCombination(int arr[], int n, int r) {
+	// 	// A temporary array to store all combination one by one
+	// 	int data[] = new int[r];
+	//
+	// 	// Print all combination using temprary array 'data[]'
+	// 	combinationUtil(arr, data, 0, n - 1, 0, r);
+	// }
+
+	// public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
+	// 	for (Sentence s : corpus) {
+	// 		// disregard if wrong taxonomy
+	// 		if (!(s.getTaxonomy().startsWith(taxonomy))) {
+	// 			continue;
+	// 		}
+	//
+	// 		calculateCommon(s, stats.result);
+	//
+	// 		for (Word word : s.getWords()) {
+	// 			// skip if current word is not inflected
+	// 			if (!(word.getMsd().length() > 0)) {
+	// 				continue;
+	// 			}
+	//
+	// 			String msd = word.getMsd();
+	//
+	// 			StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
+	//
+	// 			for (int i = 1; i < msd.length(); i++) {
+	// 				entry.setCharAt(i, msd.charAt(i));
+	// 				Common.updateMap(stats.result, entry.toString());
+	// 				entry.setCharAt(i, '-');
+	// 			}
+	// 		}
+	// 	}
+	// }
+
+	// public static void calculateForAll(List<Sentence> corpus, Statistics stats) {
+	// 	for (Sentence s : corpus) {
+	// 		for (Word word : s.getWords()) {
+	// 			if (!(word.getMsd().length() > 0)) {
+	// 				continue;
+	// 			}
+	//
+	// 			String msd = word.getMsd();
+	//
+	// 			StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
+	//
+	// 			for (int i = 1; i < msd.length(); i++) {
+	// 				entry.setCharAt(i, msd.charAt(i));
+	// 				Common.updateMap(stats.result, entry.toString());
+	// 				entry.setCharAt(i, '-');
+	// 			}
+	// 		}
+	// 	}
+	// }
+
+	static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
+		for (Sentence s : corpus) {
+			// disregard if wrong taxonomy
+			if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
+				continue;
+			}
+
+			for (Word word : s.getWords()) {
+				// skip if current word is not inflected
+				if (!(word.getMsd().length() > 0)) {
+					continue;
+				}
+
+				String msd = word.getMsd();
+
+				StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
+
+				for (int i = 1; i < msd.length(); i++) {
+					entry.setCharAt(i, msd.charAt(i));
+					Common.updateMap(stats.result, entry.toString());
+					entry.setCharAt(i, '-');
+				}
+			}
+		}
+	}
+
+	public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats, String taxonomy) {
+		for (Sentence s : corpus) {
+
+			for (Word word : s.getWords()) {
+				// skip if current word is not inflected
+				// // TODO: if has defined msd and is of correct type (create a set)
+				// if (!(word.getMsd().length() > 0)) {
+				// 	continue;
+				// }
+
+				String msd = word.getMsd();
+
+				StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
+
+				for (int i = 1; i < msd.length(); i++) {
+					entry.setCharAt(i, msd.charAt(i));
+					stats.updateResults(entry.toString());
+					entry.setCharAt(i, '-');
+				}
+			}
+		}
+	}
+}
--- a/src/main/java/alg/inflectedJOS/WordFormation.java
+++ b/src/main/java/alg/inflectedJOS/WordFormation.java
@@ -0,0 +1,131 @@
+package alg.inflectedJOS;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
+
+import data.Enums.InflectedJosTypes;
+import data.StatisticsNew;
+import gui.ValidationUtil;
+import util.Combinations;
+
+// adapted from http://www.geeksforgeeks.org/print-all-possible-combinations-of-r-elements-in-a-given-array-of-size-n/
+public class WordFormation {
+	private static HashMap<String, Long> josTypeResult;
+	private static Object[][] tmpResults;
+
+	private static HashMap<Integer, HashSet<HashSet<Integer>>> indices;
+
+	static {
+		indices = new HashMap<>();
+
+		for (int i = 4; i <= 8; i++) {
+			indices.put(i, Combinations.generateIndices(i));
+		}
+	}
+
+	public static void calculateStatistics(StatisticsNew stat) {
+		Map<String, AtomicLong> result = stat.getResult();
+
+		// 1. filter - keep only inflected types
+		result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.charAt(0)));
+
+		// 2. for each inflected type get all possible subcombinations
+		for (Character josChar : InflectedJosTypes.inflectedJosTypes) {
+			josTypeResult = new HashMap<>();
+
+			// filter out results for a single word type
+			Map<String, AtomicLong> singleTypeResults = result.entrySet().stream()
+					.filter(x -> x.getKey().charAt(0) == josChar)
+					.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+
+			if (ValidationUtil.isEmpty(singleTypeResults)) {
+				continue;
+			}
+
+			// get all possible indices combos for a msd of this length
+			// HashSet<HashSet<Integer>> indicesCombos = indices.get()
+			//Combinations.generateIndices(singleTypeResults.keySet().stream().findFirst().get().length());
+
+			for (Map.Entry<String, AtomicLong> e : singleTypeResults.entrySet()) {
+				int l = e.getKey().length();
+
+				for (HashSet<Integer> indicesCombo : indices.get(e.getKey().length())) {
+					updateResults(mask(e.getKey(), indicesCombo), e.getValue().longValue());
+				}
+			}
+
+			resultsMapToArray(singleTypeResults.values().stream().mapToLong(Number::longValue).sum());
+		}
+
+		stat.setResultCustom(tmpResults);
+	}
+
+	private static String mask(String word, HashSet<Integer> indicesCombo) {
+		StringBuilder sb = new StringBuilder();
+
+		sb.append(word.charAt(0));
+		for (int i = 1; i < word.length(); i++) {
+			sb.append(indicesCombo.contains(i) ? word.charAt(i) : ".");
+		}
+
+		return sb.toString();
+	}
+
+
+	private static void updateResults(String s, Long nOfOccurences) {
+		// if not in map add
+		Long r = josTypeResult.putIfAbsent(s, nOfOccurences);
+
+		// else update
+		if (r != null) {
+			josTypeResult.put(s, josTypeResult.get(s) + nOfOccurences);
+		}
+	}
+
+	private static void resultsMapToArray(Long totalValue) {
+		Double total = totalValue * 1.0;
+		Object[][] josTypeResultArray = new Object[josTypeResult.size()][3];
+
+		int i = 0;
+		for (Map.Entry<String, Long> e : josTypeResult.entrySet()) {
+			josTypeResultArray[i][0] = e.getKey();
+			josTypeResultArray[i][1] = e.getValue();
+			josTypeResultArray[i][2] = e.getValue() / total;
+
+			if (e.getValue() > total) {
+
+				String debug = "";
+
+			}
+
+			i++;
+		}
+
+		if (tmpResults == null) {
+			tmpResults = josTypeResultArray;
+		} else {
+			int firstLength = tmpResults.length;
+			int secondLength = josTypeResultArray.length;
+			Object[][] tmp = new Object[firstLength + secondLength][3];
+
+			System.arraycopy(tmpResults, 0, tmp, 0, firstLength);
+			System.arraycopy(josTypeResultArray, 0, tmp, firstLength, secondLength);
+
+			tmpResults = tmp;
+
+			// tmpResults = ArrayUtils.addAll(tmpResults, josTypeResultArray);
+		}
+	}
+
+	private static void printArray() {
+		for (int i = 0; i < tmpResults.length; i++) {
+			for (int j = 0; j < tmpResults[i].length; j++) {
+				System.out.print(tmpResults[i][j] + "\t");
+			}
+			System.out.println();
+		}
+	}
+}
--- a/src/main/java/alg/ngram/ForkJoin.java
+++ b/src/main/java/alg/ngram/ForkJoin.java
@@ -0,0 +1,62 @@
+package alg.ngram;
+
+import java.util.List;
+import java.util.concurrent.RecursiveAction;
+
+import data.Sentence;
+import data.StatisticsNew;
+
+public class ForkJoin extends RecursiveAction {
+	private static final long serialVersionUID = 5074814035083362355L;
+
+	private static final int ACCEPTABLE_SIZE = 1000;
+	private List<Sentence> corpus;
+	private StatisticsNew stats;
+	private int start;
+	private int end;
+
+
+	/**
+	 * Constructor for subproblems.
+	 */
+	private ForkJoin(List<Sentence> corpus, int start, int end, StatisticsNew stats) {
+		this.corpus = corpus;
+		this.start = start;
+		this.end = end;
+		this.stats = stats;
+	}
+
+	/**
+	 * Default constructor for the initial problem
+	 */
+	public ForkJoin(List<Sentence> corpus, StatisticsNew stats) {
+		this.corpus = corpus;
+		this.start = 0;
+		this.end = corpus.size();
+		this.stats = stats;
+	}
+
+	private void computeDirectly() {
+		List<Sentence> subCorpus = corpus.subList(start, end);
+		Ngrams.calculateForAll(subCorpus, stats);
+	}
+
+	@Override
+	protected void compute() {
+		int subCorpusSize = end - start;
+
+		if (subCorpusSize < ACCEPTABLE_SIZE) {
+			computeDirectly();
+		} else {
+			int mid = start + subCorpusSize / 2;
+			ForkJoin left = new ForkJoin(corpus, start, mid, stats);
+			ForkJoin right = new ForkJoin(corpus, mid, end, stats);
+
+			// fork (push to queue)-> compute -> join
+			left.fork();
+			right.fork();
+			left.join();
+			right.join();
+		}
+	}
+}
--- a/src/main/java/alg/ngram/Ngrams.java
+++ b/src/main/java/alg/ngram/Ngrams.java
@@ -0,0 +1,204 @@
+package alg.ngram;
+
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import data.CalculateFor;
+import data.Sentence;
+import data.StatisticsNew;
+import data.Word;
+import gui.ValidationUtil;
+
+public class Ngrams {
+	public final static Logger logger = LogManager.getLogger(Ngrams.class);
+
+
+	public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats) {
+		if (stats.getFilter().getNgramValue() == 0) { // letter ngram
+			generateNgramLetterCandidates(corpus, stats);
+		} else if (!ValidationUtil.isEmpty(stats.getFilter().getSkipValue()) && stats.getFilter().getSkipValue() > 0) {
+			generateSkipgramCandidates(corpus, stats);
+		} else {
+			generateNgramCandidates(corpus, stats);
+		}
+	}
+
+	public static void generateNgramCandidates(List<Sentence> corpus, StatisticsNew stats) {
+		for (Sentence s : corpus) {
+			// skip sentences shorter than specified ngram length
+			if (s.getWords().size() < stats.getFilter().getNgramValue()) {
+				continue;
+			}
+
+			for (int i = 0; i < s.getWords().size() - stats.getFilter().getNgramValue() + 1; i++) {
+				List<Word> ngramCandidate = s.getSublist(i, i + stats.getFilter().getNgramValue());
+
+				// if msd regex is set and this candidate doesn't pass it, skip this iteration
+				if (stats.getFilter().hasMsd() && !passesRegex(ngramCandidate, stats.getFilter().getMsd())) {
+					continue;
+				}
+
+				stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
+			}
+		}
+	}
+
+	/**
+	 * Checks whether an ngram candidate passes specified regex filter.
+	 */
+	private static boolean passesRegex(List<Word> ngramCandidate, ArrayList<Pattern> regex) {
+		if (ngramCandidate.size() != regex.size()) {
+			logger.error("ngramCandidate.size() & msd.size() mismatch"); // should not occur anyway
+			return false;
+		}
+
+		for (int i = 0; i < regex.size(); i++) {
+			if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
+				return false;
+			}
+		}
+
+		return true;
+	}
+
+	private static String wordToString(List<Word> ngramCandidate, CalculateFor calculateFor) {
+		ArrayList<String> candidate = new ArrayList<>(ngramCandidate.size());
+
+		switch (calculateFor) {
+			case LEMMA:
+				candidate.addAll(ngramCandidate
+						.stream()
+						.map(Word::getLemma)
+						.collect(Collectors.toList()));
+				break;
+			case WORD:
+				candidate.addAll(ngramCandidate
+						.stream()
+						.map(Word::getWord)
+						.collect(Collectors.toList()));
+				break;
+			case MORPHOSYNTACTIC_SPECS:
+			case MORPHOSYNTACTIC_PROPERTY:
+				candidate.addAll(ngramCandidate
+						.stream()
+						.map(Word::getMsd)
+						.collect(Collectors.toList()));
+				break;
+			case WORD_TYPE:
+				candidate.addAll(ngramCandidate
+						.stream()
+						.map(w -> Character.toString(w.getMsd().charAt(0)))
+						.collect(Collectors.toList()));
+				break;
+		}
+
+		return StringUtils.join(candidate, " ");
+	}
+
+	/**
+	 * Generates candidates and updates results
+	 *
+	 * @param corpus
+	 * @param stats
+	 */
+	private static void generateNgramLetterCandidates(List<Sentence> corpus, StatisticsNew stats) {
+		for (Sentence s : corpus) {
+			for (Word w : s.getWords()) {
+				String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv());
+
+				// skip this iteration if:
+				// - word doesn't contain a proper version (missing lemma for example)
+				// - msd regex is given but this word's msd doesn't match it, skip this iteration
+				// - given substring length is larger than the word length
+				if (ValidationUtil.isEmpty(word)
+						|| stats.getFilter().hasMsd() && !w.getMsd().matches(stats.getFilter().getMsd().get(0).pattern())
+						|| word.length() < stats.getFilter().getStringLength()) {
+					continue;
+				}
+
+				for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
+					// TODO: locila?
+					stats.updateResults(word.substring(i, i + stats.getFilter().getStringLength()));
+				}
+			}
+		}
+	}
+
+
+	/**
+	 * Extracts skipgram candidates.
+	 *
+	 * @return List of candidates represented as a list<candidates(String)>
+	 */
+	public static void generateSkipgramCandidates(List<Sentence> corpus, StatisticsNew stats) {
+		ArrayList<Word> currentLoop;
+		int ngram = stats.getFilter().getNgramValue();
+		int skip = stats.getFilter().getSkipValue();
+
+		for (Sentence s : corpus) {
+			List<Word> sentence = s.getWords();
+
+			for (int i = 0; i <= sentence.size() - ngram; i++) { // 1gram
+				for (int j = i + 1; j <= i + skip + 1; j++) { // 2gram
+					if (ngram == 2 && j < sentence.size()) {
+						currentLoop = new ArrayList<>();
+						currentLoop.add(sentence.get(i));
+						currentLoop.add(sentence.get(j));
+
+						validateAndCountSkipgramCandidate(currentLoop, stats);
+					} else {
+						for (int k = j + 1; k <= j + 1 + skip; k++) { // 3gram
+							if (ngram == 3 && k < sentence.size()) {
+								currentLoop = new ArrayList<>();
+								currentLoop.add(sentence.get(i));
+								currentLoop.add(sentence.get(j));
+								currentLoop.add(sentence.get(k));
+
+								validateAndCountSkipgramCandidate(currentLoop, stats);
+							} else {
+								for (int l = k + 1; l <= k + 1 + skip; l++) { // 4gram
+									if (ngram == 4 && k < sentence.size()) {
+										currentLoop = new ArrayList<>();
+										currentLoop.add(sentence.get(i));
+										currentLoop.add(sentence.get(j));
+										currentLoop.add(sentence.get(k));
+										currentLoop.add(sentence.get(l));
+
+										validateAndCountSkipgramCandidate(currentLoop, stats);
+									} else {
+										for (int m = k + 1; m <= k + 1 + skip; m++) { // 5gram
+											if (ngram == 5 && k < sentence.size()) {
+												currentLoop = new ArrayList<>();
+												currentLoop.add(sentence.get(i));
+												currentLoop.add(sentence.get(j));
+												currentLoop.add(sentence.get(k));
+												currentLoop.add(sentence.get(l));
+												currentLoop.add(sentence.get(m));
+
+												validateAndCountSkipgramCandidate(currentLoop, stats);
+											}
+										}
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats) {
+		// count if no regex is set or if it is & candidate passes it
+		if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) {
+			stats.updateResults(wordToString(skipgramCandidate, stats.getFilter().getCalculateFor()));
+		}
+	}
+}
--- a/src/main/java/alg/word/ForkJoin.java
+++ b/src/main/java/alg/word/ForkJoin.java
@@ -0,0 +1,62 @@
+package alg.word;
+
+import java.util.List;
+import java.util.concurrent.RecursiveAction;
+
+import data.Sentence;
+import data.StatisticsNew;
+
+public class ForkJoin extends RecursiveAction {
+	private static final long serialVersionUID = 7711587510996456040L;
+
+	private static final int ACCEPTABLE_SIZE = 1000;
+	private List<Sentence> corpus;
+	private StatisticsNew stats;
+	private int start;
+	private int end;
+
+
+	/**
+	 * Constructor for subproblems.
+	 */
+	private ForkJoin(List<Sentence> corpus, int start, int end, StatisticsNew stats) {
+		this.corpus = corpus;
+		this.start = start;
+		this.end = end;
+		this.stats = stats;
+	}
+
+	/**
+	 * Default constructor for the initial problem
+	 */
+	public ForkJoin(List<Sentence> corpus, StatisticsNew stats) {
+		this.corpus = corpus;
+		this.start = 0;
+		this.end = corpus.size();
+		this.stats = stats;
+	}
+
+	private void computeDirectly() {
+		List<Sentence> subCorpus = corpus.subList(start, end);
+		WordLevel.calculateForAll(subCorpus, stats);
+	}
+
+	@Override
+	protected void compute() {
+		int subCorpusSize = end - start;
+
+		if (subCorpusSize < ACCEPTABLE_SIZE) {
+			computeDirectly();
+		} else {
+			int mid = start + subCorpusSize / 2;
+			ForkJoin left = new ForkJoin(corpus, start, mid, stats);
+			ForkJoin right = new ForkJoin(corpus, mid, end, stats);
+
+			// fork (push to queue)-> compute -> join
+			left.fork();
+			right.fork();
+			left.join();
+			right.join();
+		}
+	}
+}
--- a/src/main/java/alg/word/WordCount.java
+++ b/src/main/java/alg/word/WordCount.java
@@ -0,0 +1,167 @@
+package alg.word;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import alg.Common;
+import data.CalculateFor;
+import data.Sentence;
+import data.Statistics;
+import data.Word;
+
+class WordCount {
+	private static void calculateNoFilter(List<Sentence> corpus, Statistics stats) {
+		for (Sentence s : corpus) {
+			List<String> sentence = new ArrayList<>(s.getWords().size());
+
+			if (stats.getCf() == CalculateFor.LEMMA) {
+				sentence.addAll(s.getWords()
+						.stream()
+						.map(Word::getLemma)
+						.collect(Collectors.toList()));
+			} else if (stats.getCf() == CalculateFor.WORD) {
+				sentence.addAll(s.getWords()
+						.stream()
+						.map(Word::getWord)
+						.collect(Collectors.toList()));
+			}
+
+			for (String word : sentence) {
+				Common.updateMap(stats.result, word);
+			}
+		}
+	}
+
+	private static void calculateVCC(List<Sentence> corpus, Statistics stats) {
+		for (Sentence s : corpus) {
+			List<String> sentence = new ArrayList<>(s.getWords().size());
+
+			if (stats.getCf() == CalculateFor.LEMMA) {
+				sentence.addAll(s.getWords()
+						.stream()
+						.map(Word::getCVVLemma)
+						.collect(Collectors.toList()));
+			} else if (stats.getCf() == CalculateFor.WORD) {
+				sentence.addAll(s.getWords()
+						.stream()
+						.map(Word::getCVVWord)
+						.collect(Collectors.toList()));
+			}
+
+			for (String word : sentence) {
+				if (word.length() > stats.getSubstringLength()) {
+					for (int i = 0; i <= word.length() - stats.getSubstringLength(); i++) {
+						String substring = word.substring(i, i + stats.getSubstringLength());
+						Common.updateMap(stats.result, substring);
+					}
+				}
+			}
+		}
+	}
+
+	private static void calculateForJosType(List<Sentence> corpus, Statistics stats) {
+		for (Sentence s : corpus) {
+			List<String> sentence = new ArrayList<>(s.getWords().size());
+			List<Word> filteredWords = new ArrayList<>();
+
+			for (Word word : s.getWords()) {
+				if (word.getMsd() != null && word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
+					filteredWords.add(word);
+				}
+			}
+
+			if (stats.getCf() == CalculateFor.LEMMA) {
+				sentence.addAll(filteredWords
+						.stream()
+						.map(Word::getLemma)
+						.collect(Collectors.toList()));
+			} else if (stats.getCf() == CalculateFor.WORD) {
+				sentence.addAll(filteredWords
+						.stream()
+						.map(Word::getWord)
+						.collect(Collectors.toList()));
+			}
+
+			for (String word : sentence) {
+				Common.updateMap(stats.result, word);
+			}
+		}
+	}
+
+	private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
+		for (Sentence s : corpus) {
+			if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
+				List<String> sentence = new ArrayList<>(s.getWords().size());
+				List<Word> filteredWords = new ArrayList<>();
+
+				for (Word word : s.getWords()) {
+					if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
+						filteredWords.add(word);
+					}
+				}
+
+				if (stats.getCf() == CalculateFor.LEMMA) {
+					sentence.addAll(filteredWords
+							.stream()
+							.map(Word::getLemma)
+							.collect(Collectors.toList()));
+				} else if (stats.getCf() == CalculateFor.WORD) {
+					sentence.addAll(filteredWords
+							.stream()
+							.map(Word::getWord)
+							.collect(Collectors.toList()));
+				}
+
+				for (String word : sentence) {
+					Common.updateMap(stats.result, word);
+				}
+			}
+		}
+	}
+
+	private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
+		for (Sentence s : corpus) {
+			if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
+				List<String> sentence = new ArrayList<>(s.getWords().size());
+
+				if (stats.getCf() == CalculateFor.LEMMA) {
+					sentence.addAll(s.getWords()
+							.stream()
+							.map(Word::getLemma)
+							.collect(Collectors.toList()));
+				} else if (stats.getCf() == CalculateFor.WORD) {
+					sentence.addAll(s.getWords()
+							.stream()
+							.map(Word::getWord)
+							.collect(Collectors.toList()));
+				}
+
+				for (String word : sentence) {
+					Common.updateMap(stats.result, word);
+				}
+			}
+		}
+	}
+
+	static void calculateForAll(List<Sentence> corpus, Statistics stats) {
+		boolean taxonomyIsSet = stats.isTaxonomySet();
+		boolean JosTypeIsSet = stats.isJOSTypeSet();
+
+		// branching because even though the only difference is an if or two &&
+		// O(if) = 1, the amount of ifs adds up and this saves some time
+		if (taxonomyIsSet && JosTypeIsSet) {
+			calculateForTaxonomyAndJosType(corpus, stats);
+		} else if (taxonomyIsSet && !JosTypeIsSet) {
+			calculateForTaxonomy(corpus, stats);
+		} else if (!taxonomyIsSet && JosTypeIsSet) {
+			calculateForJosType(corpus, stats);
+		} else {
+			if (stats.isVcc()) {
+				calculateVCC(corpus, stats);
+			} else {
+				calculateNoFilter(corpus, stats);
+			}
+		}
+	}
+}
--- a/src/main/java/alg/word/WordLevel.java
+++ b/src/main/java/alg/word/WordLevel.java
@@ -0,0 +1,112 @@
+package alg.word;
+
+import static data.Enums.WordLevelDefaultValues.*;
+
+import java.util.HashSet;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+
+import data.Enums.WordLevelDefaultValues;
+import data.Enums.WordLevelType;
+import data.Sentence;
+import data.StatisticsNew;
+import data.Word;
+
+@SuppressWarnings("Duplicates")
+public class WordLevel {
+	private static HashSet<String> suffixes;
+	private static int minSuffixLength;
+	private static int maxSuffixLength;
+
+	private static HashSet<String> prefixes;
+	private static int minPrefixLength;
+	private static int maxPrefixLength;
+
+	static {
+		suffixes = WordLevelDefaultValues.getSuffixes();
+		calculateSuffixesLengths();
+
+		prefixes = WordLevelDefaultValues.getPrefixes();
+		calculatePrefixesLengths();
+	}
+
+	public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats) {
+		for (Sentence s : corpus) {
+			for (Word word : s.getWords()) {
+				calculateForSuffixes(word.getWord(), stats);
+				calculateForPrefixes(word.getWord(), stats);
+			}
+		}
+	}
+
+	private static void calculateForPrefixes(String word, StatisticsNew stats) {
+		for (int tmpPrefixLength = maxPrefixLength; tmpPrefixLength >= minPrefixLength; tmpPrefixLength++) {
+			if (word.length() - tmpPrefixLength < MIN_N_OF_CHARACTERS_LEFT_PREFIX) {
+				return;
+			}
+
+			String extractedPrefix = StringUtils.left(word, tmpPrefixLength);
+
+			if (prefixes.contains(extractedPrefix)) {
+				// save suffix and full word
+				stats.updateResultsNested(WordLevelType.PREFIX, extractedPrefix, word);
+				return;
+			}
+		}
+	}
+
+	public static void calculateForSuffixes(String word, StatisticsNew stats) {
+		for (int tmpSuffixLength = maxSuffixLength; tmpSuffixLength >= minSuffixLength; tmpSuffixLength++) {
+			// preveri, da je beseda - cuttan suffix daljši od prednastavljene vrednosti
+			// ker gremo od najdaljše opcije k najkrajši, se ob dosegu tega pogoja lahko zaključi računanje za trenutno besedo
+			if (word.length() - tmpSuffixLength < MIN_N_OF_CHARACTERS_LEFT_SUFFIX) {
+				return;
+			}
+
+			String extractedSuffix = StringUtils.right(word, tmpSuffixLength);
+
+			if (suffixes.contains(extractedSuffix)) {
+				// save suffix and full word
+				stats.updateResultsNested(WordLevelType.SUFFIX, extractedSuffix, word);
+				return;
+			}
+		}
+	}
+
+	// finds the shortest and longest suffix for quicker calculations
+	public static void calculateSuffixesLengths() {
+		minSuffixLength = -1;
+		maxSuffixLength = -1;
+
+		for (String suffix : suffixes) {
+			if (suffix.length() > maxSuffixLength) {
+				maxSuffixLength = suffix.length();
+
+				if (minSuffixLength < 0) {
+					minSuffixLength = maxSuffixLength;
+				}
+			} else if (suffix.length() < minSuffixLength) {
+				minSuffixLength = suffix.length();
+			}
+		}
+	}
+
+	// finds the shortest and longest suffix for quicker calculations
+	public static void calculatePrefixesLengths() {
+		minPrefixLength = -1;
+		maxPrefixLength = -1;
+
+		for (String prefix : prefixes) {
+			if (prefix.length() > maxPrefixLength) {
+				maxPrefixLength = prefix.length();
+
+				if (minPrefixLength < 0) {
+					minPrefixLength = maxPrefixLength;
+				}
+			} else if (prefix.length() < minPrefixLength) {
+				minPrefixLength = prefix.length();
+			}
+		}
+	}
+}
--- a/src/main/java/data/AnalysisLevel.java
+++ b/src/main/java/data/AnalysisLevel.java
@@ -0,0 +1,17 @@
+package data;
+
+public enum AnalysisLevel {
+	STRING_LEVEL("Besedni nizi"),
+	WORD_LEVEL("Nivo besed in delov besed"),
+	WORD_FORMATION("Besedotvorni procesi");
+
+	private final String name;
+
+	AnalysisLevel(String name) {
+		this.name = name;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+}
--- a/src/main/java/data/CalculateFor.java
+++ b/src/main/java/data/CalculateFor.java
@@ -0,0 +1,43 @@
+package data;
+
+public enum CalculateFor {
+	WORD("različnica"),
+	LEMMA("lema"),
+	MORPHOSYNTACTIC_SPECS("oblikoskladenjska oznaka"),
+	MORPHOSYNTACTIC_PROPERTY("oblikoskladenjska lastnost"),
+	WORD_TYPE("besedna vrsta"),
+	DIST_WORDS("različnica"),
+	DIST_LEMMAS("lema");
+
+
+	private final String name;
+
+	CalculateFor(String name) {
+		this.name = name;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public static CalculateFor factory(String cf) {
+		if (cf != null) {
+			if (WORD.toString().equals(cf)) {
+				return WORD;
+			}
+			if (LEMMA.toString().equals(cf)) {
+				return LEMMA;
+			}
+			if (MORPHOSYNTACTIC_SPECS.toString().equals(cf)) {
+				return MORPHOSYNTACTIC_SPECS;
+			}
+			if (MORPHOSYNTACTIC_PROPERTY.toString().equals(cf)) {
+				return MORPHOSYNTACTIC_PROPERTY;
+			}
+			if (WORD_TYPE.toString().equals(cf)) {
+				return WORD_TYPE;
+			}
+		}
+		return null;
+	}
+}
--- a/src/main/java/data/Corpus.java
+++ b/src/main/java/data/Corpus.java
@@ -0,0 +1,163 @@
+package data;
+
+import static gui.Messages.*;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import data.Enums.solar.SolarFilters;
+import gui.ValidationUtil;
+import javafx.collections.ObservableList;
+
+public class Corpus {
+	public final static Logger logger = LogManager.getLogger(Corpus.class);
+
+	private CorpusType corpusType;
+	private File chosenResultsLocation;
+	private File chosenCorpusLocation;
+	private Collection<File> detectedCorpusFiles;
+	boolean headerRead;
+	private ObservableList<String> taxonomy; // if gigafida or gos
+	private HashMap<String, ObservableList<String>> solarFilters; // if solar
+	private HashMap<String, HashSet<String>> solarFiltersForXML; // if solar - used while parsing xml
+	private boolean gosOrthMode;
+	boolean hasMsdData;
+	private ArrayList<String> validationErrors;
+
+	public Corpus() {
+		validationErrors = new ArrayList<>();
+	}
+
+	public CorpusType getCorpusType() {
+		return corpusType;
+	}
+
+	public void setCorpusType(CorpusType corpusType) {
+		this.corpusType = corpusType;
+		logger.info("Corpus.set: ", corpusType);
+	}
+
+	public File getChosenResultsLocation() {
+		return chosenResultsLocation;
+	}
+
+	public void setChosenResultsLocation(File chosenResultsLocation) {
+		this.chosenResultsLocation = chosenResultsLocation;
+		logger.info("Corpus.set: ", chosenResultsLocation);
+	}
+
+	public File getChosenCorpusLocation() {
+		return chosenCorpusLocation;
+	}
+
+	public void setChosenCorpusLocation(File chosenCorpusLocation) {
+		this.chosenCorpusLocation = chosenCorpusLocation;
+		logger.info("Corpus.set: ", chosenCorpusLocation);
+	}
+
+	public Collection<File> getDetectedCorpusFiles() {
+		return detectedCorpusFiles;
+	}
+
+	public void setDetectedCorpusFiles(Collection<File> detectedCorpusFiles) {
+		this.detectedCorpusFiles = detectedCorpusFiles;
+		logger.info("Corpus.set: ", detectedCorpusFiles);
+	}
+
+	public boolean isHeaderRead() {
+		return headerRead;
+	}
+
+	public void setHeaderRead(boolean headerRead) {
+		this.headerRead = headerRead;
+	}
+
+	public ObservableList<String> getTaxonomy() {
+		return taxonomy;
+	}
+
+	public void setTaxonomy(ObservableList<String> taxonomy) {
+		this.taxonomy = taxonomy;
+		logger.info("Corpus.set: ", taxonomy);
+	}
+
+	public HashMap<String, ObservableList<String>> getSolarFilters() {
+		return solarFilters;
+	}
+
+	public void setSolarFilters(HashMap<String, ObservableList<String>> solarFilters) {
+		this.solarFilters = solarFilters;
+		logger.info("Corpus.set: ", solarFilters);
+	}
+
+	public HashMap<String, HashSet<String>> getSolarFiltersForXML() {
+		return solarFiltersForXML;
+	}
+
+	public void setSolarFiltersForXML(HashMap<String, HashSet<String>> solarFiltersForXML) {
+		this.solarFiltersForXML = solarFiltersForXML;
+		logger.info("Corpus.set: ", solarFiltersForXML);
+	}
+
+	public boolean isGosOrthMode() {
+		return gosOrthMode;
+	}
+
+	public void setGosOrthMode(boolean gosOrthMode) {
+		this.gosOrthMode = gosOrthMode;
+		logger.info("Corpus.set: ", gosOrthMode);
+	}
+
+	public ArrayList<String> getValidationErrors() {
+		return validationErrors;
+	}
+
+	public String getValidationErrorsToString() {
+		return StringUtils.join(validationErrors, "\n - ");
+	}
+
+	public void setValidationErrors(ArrayList<String> validationErrors) {
+		this.validationErrors = validationErrors;
+	}
+
+	public boolean validate() {
+		if (corpusType == null) {
+			validationErrors.add(LABEL_RESULTS_CORPUS_TYPE_NOT_SET);
+		}
+
+		if (chosenCorpusLocation == null) {
+			validationErrors.add(LABEL_CORPUS_LOCATION_NOT_SET);
+		}
+
+		if (chosenResultsLocation == null) {
+			validationErrors.add(LABEL_RESULTS_LOCATION_NOT_SET);
+		}
+
+		if (!headerRead && corpusType != null) {
+			// if user didn't opt into reading the headers, set default taxonomy or solar filters
+			if (Tax.getCorpusTypesWithTaxonomy().contains(corpusType)) {
+				taxonomy = Tax.getTaxonomyForComboBox(corpusType);
+			} else if (corpusType == CorpusType.SOLAR && solarFilters == null) {
+				setSolarFilters(SolarFilters.getFiltersForComboBoxes());
+			}
+		}
+
+		if (headerRead && ValidationUtil.isEmpty(taxonomy)) {
+			// mustn't happen, intercept at gui level
+		}
+
+		if (!ValidationUtil.isEmpty(validationErrors)) {
+			logger.error("Corpus validation error: ", StringUtils.join(validationErrors, "\n - "));
+			return false;
+		} else {
+			return true;
+		}
+	}
+}
--- a/src/main/java/data/CorpusType.java
+++ b/src/main/java/data/CorpusType.java
@@ -0,0 +1,25 @@
+package data;
+
+public enum CorpusType {
+	GIGAFIDA("Gigafida", "gigafida"),
+	CCKRES("ccKres ", "cckres"),
+	SOLAR("Šolar", "šolar"),
+	GOS("GOS", "gos");
+
+
+	private final String name;
+	private final String nameLowerCase;
+
+	CorpusType(String name, String nameLowerCase) {
+		this.name = name;
+		this.nameLowerCase = nameLowerCase;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public String getNameLowerCase() {
+		return nameLowerCase;
+	}
+}
--- a/src/main/java/data/Enums/InflectedJosTypes.java
+++ b/src/main/java/data/Enums/InflectedJosTypes.java
@@ -0,0 +1,12 @@
+package data.Enums;
+
+import java.util.Arrays;
+import java.util.HashSet;
+
+public class InflectedJosTypes {
+	public static final HashSet<Character> inflectedJosTypes = new HashSet<>();
+
+	static {
+		inflectedJosTypes.addAll(Arrays.asList('S', 'G', 'P'));
+	}
+}
--- a/src/main/java/data/Enums/Msd.java
+++ b/src/main/java/data/Enums/Msd.java
@@ -0,0 +1,68 @@
+package data.Enums;
+
+import java.util.HashMap;
+
+public enum Msd {
+	NOUN("samostalnik", 'S', "Noun", 'N', 5),
+	VERB("glagol", 'G', "Verb", 'V', 7),
+	ADJECTIVE("pridevnik", 'P', "Adjective", 'A', 6),
+	ADVERB("prislov", 'R', "Adverb", 'R', 2),
+	PRONOUN("zaimek", 'Z', "Pronoun", 'P', 8),
+	NUMERAL("števnik", 'K', "Numeral", 'M', 6),
+	PREPOSITION("predlog", 'D', "Preposition", 'S', 1),
+	CONJUNCTION("veznik", 'V', "Conjunction", 'C', 1),
+	PARTICLE("členek", 'L', "Particle", 'Q', 0),
+	INTERJECTION("medmet", 'M', "Interjection", 'I', 0),
+	ABBREVIATION("okrajšava", 'O', "Abbreviation", 'Y', 0),
+	RESIDUAL("neuvrščeno", 'N', "Residual", 'X', 1);
+
+	private final String siName;
+	private final Character siCode;
+	private final String enName;
+	private final Character enCode;
+	private final Integer nOfAttributes;
+
+	private static HashMap<Character, Integer> siCodeNOfAttributes;
+
+	static {
+		siCodeNOfAttributes = new HashMap<>();
+		for (Msd msd : Msd.values()) {
+			siCodeNOfAttributes.put(msd.getSiCode(), msd.nOfAttributes);
+		}
+	}
+
+	Msd(String siName, Character siCode, String enName, Character enCode, int nOfAttributes) {
+		this.siName = siName;
+		this.siCode = siCode;
+		this.enName = enName;
+		this.enCode = enCode;
+		this.nOfAttributes = nOfAttributes;
+	}
+
+	public String getSiName() {
+		return siName;
+	}
+
+	public Character getSiCode() {
+		return siCode;
+	}
+
+	public String getEnName() {
+		return enName;
+	}
+
+	public Character getEnCode() {
+		return enCode;
+	}
+
+	/**
+	 * Returns the number of attributes for the given type.
+	 *
+	 * @param msd
+	 *
+	 * @return
+	 */
+	public static int getMsdLengthForType(String msd) {
+		return siCodeNOfAttributes.get(msd.charAt(0)) + 1;
+	}
+}
--- a/src/main/java/data/Enums/WordLevelDefaultValues.java
+++ b/src/main/java/data/Enums/WordLevelDefaultValues.java
@@ -0,0 +1,55 @@
+package data.Enums;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+public class WordLevelDefaultValues {
+	public final static Logger logger = LogManager.getLogger(WordLevelDefaultValues.class);
+
+	private static HashSet<String> suffixes;
+	private static final String SUFFIXES_FILE = "/Lists/suffixes.txt";
+	public static final int MIN_N_OF_CHARACTERS_LEFT_SUFFIX = 2;
+
+	private static HashSet<String> prefixes;
+	private static final String PREFIXES_FILE = "/Lists/prefixes.txt";
+	public static final int MIN_N_OF_CHARACTERS_LEFT_PREFIX = 2;
+
+	static {
+		suffixes = new HashSet<>();
+		suffixes = readFromFile(SUFFIXES_FILE);
+		prefixes = new HashSet<>();
+		prefixes = readFromFile(PREFIXES_FILE);
+	}
+
+	private static HashSet<String> readFromFile(String fileName) {
+		Set<String> dictionary = new HashSet<>();
+
+		try (InputStream is = WordLevelDefaultValues.class.getClass().getResourceAsStream(fileName)) {
+			if (is != null) {
+				// TODO: warn if !exists
+				BufferedReader reader = new BufferedReader(new InputStreamReader(is));
+				dictionary = reader.lines().collect(Collectors.toSet());
+			}
+		} catch (IOException e) {
+			logger.error("Problem reading init dictionary", e);
+		}
+
+		return (HashSet<String>) dictionary;
+	}
+
+	public static HashSet<String> getSuffixes() {
+		return suffixes;
+	}
+
+	public static HashSet<String> getPrefixes() {
+		return prefixes;
+	}
+}
--- a/src/main/java/data/Enums/WordLevelType.java
+++ b/src/main/java/data/Enums/WordLevelType.java
@@ -0,0 +1,16 @@
+package data.Enums;
+
+public enum WordLevelType {
+	SUFFIX("pripona"),
+	PREFIX("predpona");
+
+	private final String name;
+
+	WordLevelType(String name) {
+		this.name = name;
+	}
+
+	public String getName() {
+		return name;
+	}
+}
--- a/src/main/java/data/Enums/solar/SolarFilters.java
+++ b/src/main/java/data/Enums/solar/SolarFilters.java
@@ -0,0 +1,57 @@
+package data.Enums.solar;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import javafx.collections.FXCollections;
+import javafx.collections.ObservableList;
+
+public class SolarFilters {
+	private static HashMap<String, ObservableList<String>> SOLAR_FILTERS;
+	public static final String SOLA = "sola";
+	public static final String PREDMET = "predmet";
+	public static final String RAZRED = "razred";
+	public static final String REGIJA = "regija";
+	public static final String TIP = "tip";
+	public static final String LETO = "leto";
+
+	static {
+		SOLAR_FILTERS = new HashMap<>();
+
+		SOLAR_FILTERS.put(REGIJA, FXCollections.observableArrayList("Celje", "Gorica", "Koper", "Kranj", "Krško", "Ljubljana", "Maribor", "Murska Sobota", "Novo mesto", "Postojna", "Slovenj Gradec"));
+		SOLAR_FILTERS.put(PREDMET, FXCollections.observableArrayList("državljanska vzgoja in etika", "ekonomija", "filozofija", "geografija", "kemija", "podjetništvo", "psihologija", "slovenščina", "sociologija", "umetnostna vzgoja", "zgodovina"));
+		SOLAR_FILTERS.put(RAZRED, FXCollections.observableArrayList("6. razred", "7. razred", "8. razred", "9. razred", "1. letnik", "2. letnik", "3. letnik", "4. letnik", "5. letnik", "maturitetni tečaj"));
+		SOLAR_FILTERS.put(LETO, FXCollections.observableArrayList("2007", "2008", "2009", "2009/2010", "2010"));
+		SOLAR_FILTERS.put(SOLA, FXCollections.observableArrayList("gimnazija", "osnovna šola", "poklicna šola", "strokovna šola"));
+		SOLAR_FILTERS.put(TIP, FXCollections.observableArrayList("esej/spis", "pisni izdelek (učna ura)", "test (daljše besedilo)", "test (odgovori na vprašanja)"));
+	}
+
+	public static final ObservableList<String> N_GRAM_COMPUTE_FOR_FULL = FXCollections.observableArrayList("različnica", "lema", "oblikoskladenjska oznaka", "oblikoskladenjska lastnost", "besedna vrsta");
+	public static final ObservableList<String> N_GRAM_COMPUTE_FOR_LIMITED = FXCollections.observableArrayList("različnica", "lema");
+
+	/**
+	 * Returns filters with all possible values
+	 */
+	public static HashMap<String, ObservableList<String>> getFiltersForComboBoxes() {
+		return SOLAR_FILTERS;
+	}
+
+	/**
+	 * Returns filters with all possible values
+	 */
+	public static HashMap<String, ObservableList<String>> getFiltersForComboBoxes(HashMap<String, HashSet<String>> foundFilters) {
+		HashMap<String, ObservableList<String>> filtersForComboBoxes = new HashMap<>();
+
+		for (Map.Entry<String, ObservableList<String>> e : SOLAR_FILTERS.entrySet()) {
+			if (!foundFilters.containsKey(e.getKey())) {
+				// if, by some reason a specific filter wasn't in the corpus, return a blank list for that filter
+				filtersForComboBoxes.put(e.getKey(), FXCollections.observableArrayList());
+			} else {
+				filtersForComboBoxes.put(e.getKey(), FXCollections.observableArrayList(foundFilters.get(e.getKey())).sorted());
+			}
+		}
+
+		return filtersForComboBoxes;
+	}
+}
--- a/src/main/java/data/Filter.java
+++ b/src/main/java/data/Filter.java
@@ -0,0 +1,144 @@
+package data;
+
+import static data.Filter.filterName.*;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import gui.ValidationUtil;
+
+@SuppressWarnings("unchecked")
+public class Filter {
+	private HashMap<filterName, Object> filter;
+
+	public enum filterName {
+		ANALYSIS_LEVEL,
+		CALCULATE_FOR,
+		NGRAM_VALUE,
+		SKIP_VALUE,
+		IS_CVV,
+		STRING_LENGTH,
+		TAXONOMY,
+		MSD,
+		HAS_MSD,
+		SOLAR_FILTERS
+	}
+
+	public Filter() {
+		filter = new HashMap<>();
+	}
+
+	public Filter(AnalysisLevel al, CalculateFor cf) {
+		filter = new HashMap<>();
+
+		filter.put(ANALYSIS_LEVEL, al);
+		filter.put(CALCULATE_FOR, cf);
+	}
+
+	public void setAl(AnalysisLevel al) {
+		filter.put(ANALYSIS_LEVEL, al);
+	}
+
+	public AnalysisLevel getAl() {
+		return (AnalysisLevel) filter.get(ANALYSIS_LEVEL);
+	}
+
+	public void setCalculateFor(CalculateFor cf) {
+		filter.put(CALCULATE_FOR, cf);
+	}
+
+	public CalculateFor getCalculateFor() {
+		return (CalculateFor) filter.get(CALCULATE_FOR);
+	}
+
+	public void setNgramValue(Integer ngramValue) {
+		filter.put(NGRAM_VALUE, ngramValue);
+	}
+
+	public Integer getNgramValue() {
+		return (Integer) filter.get(NGRAM_VALUE);
+	}
+
+	public void setSkipValue(Integer skipValue) {
+		filter.put(SKIP_VALUE, skipValue);
+	}
+
+	public Integer getSkipValue() {
+		return (Integer) filter.get(SKIP_VALUE);
+	}
+
+	public void setIsCvv(boolean isCvv) {
+		filter.put(IS_CVV, isCvv);
+	}
+
+	public boolean isCvv() {
+		return filter.containsKey(IS_CVV) && (boolean) filter.get(IS_CVV);
+	}
+
+	public void setStringLength(int stringLength) {
+		filter.put(STRING_LENGTH, stringLength);
+	}
+
+	public Integer getStringLength() {
+		return (Integer) filter.get(STRING_LENGTH);
+	}
+
+	public void setTaxonomy(ArrayList<String> taxonomy) {
+		filter.put(TAXONOMY, taxonomy);
+	}
+
+	public ArrayList<String> getTaxonomy() {
+		if (filter.containsKey(TAXONOMY) && filter.get(TAXONOMY) != null) {
+			return (ArrayList<String>) filter.get(TAXONOMY);
+		} else {
+			return new ArrayList<>();
+		}
+	}
+
+	public void setMsd(ArrayList<Pattern> msd) {
+		filter.put(MSD, msd);
+		if (!ValidationUtil.isEmpty(msd)) {
+			setHasMsd(true);
+		} else {
+			setHasMsd(false);
+		}
+	}
+
+	public ArrayList<Pattern> getMsd() {
+		return (ArrayList<Pattern>) filter.get(MSD);
+	}
+
+	public void setHasMsd(boolean hasMsd) {
+		filter.put(HAS_MSD, hasMsd);
+	}
+
+	public boolean hasMsd() {
+		return filter.containsKey(HAS_MSD) && (boolean) filter.get(HAS_MSD);
+	}
+
+	public String toString() {
+		String newLine = "\n\t- ";
+		StringBuilder sb = new StringBuilder();
+
+		sb.append(newLine).append("Filter:");
+		for (Map.Entry<filterName, Object> entry : filter.entrySet()) {
+			sb.append(newLine)
+					.append(entry.getKey().toString())
+					.append(": ")
+					.append(entry.getValue() != null ? entry.getValue().toString() : "null");
+		}
+
+		return sb.toString();
+	}
+
+	public void setSolarFilters(HashMap<String, HashSet<String>> filters) {
+		filter.put(SOLAR_FILTERS, filters);
+	}
+
+	public HashMap<String, HashSet<String>> getSolarFilters() {
+		return (HashMap<String, HashSet<String>>) filter.get(SOLAR_FILTERS);
+	}
+}
--- a/src/main/java/data/GigafidaJosWordType.java
+++ b/src/main/java/data/GigafidaJosWordType.java
@@ -0,0 +1,71 @@
+package data;
+
+public enum GigafidaJosWordType {
+	SAMOSTALNIK("samostalnik", 'S'),
+	GLAGOL("glagol", 'G'),
+	PRIDEVNIK("pridevnik", 'P'),
+	PRISLOV("prislov", 'R'),
+	ZAIMEK("zaimek", 'Z'),
+	STEVNIK("stevnik", 'K'),
+	PREDLOG("predlog", 'D'),
+	VEZNIK("veznik", 'V'),
+	CLENEK("clenek", 'L'),
+	MEDMET("medmet", 'M'),
+	OKRAJSAVA("okrajsava", 'O');
+
+
+	private final String name;
+	private final char wordType;
+
+	GigafidaJosWordType(String name, char wordType) {
+		this.name = name;
+		this.wordType = wordType;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public char getWordType() {
+		return wordType;
+	}
+
+	public static GigafidaJosWordType factory(String wType) {
+		if (wType != null) {
+			if (SAMOSTALNIK.toString().equals(wType)) {
+				return SAMOSTALNIK;
+			}
+			if (GLAGOL.toString().equals(wType)) {
+				return GLAGOL;
+			}
+			if (PRIDEVNIK.toString().equals(wType)) {
+				return PRIDEVNIK;
+			}
+			if (PRISLOV.toString().equals(wType)) {
+				return PRISLOV;
+			}
+			if (ZAIMEK.toString().equals(wType)) {
+				return ZAIMEK;
+			}
+			if (STEVNIK.toString().equals(wType)) {
+				return STEVNIK;
+			}
+			if (PREDLOG.toString().equals(wType)) {
+				return PREDLOG;
+			}
+			if (VEZNIK.toString().equals(wType)) {
+				return VEZNIK;
+			}
+			if (CLENEK.toString().equals(wType)) {
+				return CLENEK;
+			}
+			if (MEDMET.toString().equals(wType)) {
+				return MEDMET;
+			}
+			if (OKRAJSAVA.toString().equals(wType)) {
+				return OKRAJSAVA;
+			}
+		}
+		return null;
+	}
+}
--- a/src/main/java/data/GigafidaTaxonomy.java
+++ b/src/main/java/data/GigafidaTaxonomy.java
@@ -0,0 +1,76 @@
+package data;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+import javafx.collections.FXCollections;
+import javafx.collections.ObservableList;
+
+public enum GigafidaTaxonomy {
+	TISK("tisk", "T"),
+	KNJIZNO("knjižno", "T.K"),
+	LEPOSLOVNO("leposlovno", "T.K.L"),
+	STROKOVNO("strokovno", "T.K.S"),
+	PERIODICNO("periodično", "T.P"),
+	CASOPIS("časopis", "T.P.C"),
+	REVIJA("revija", "T.P.R"),
+	INTERNET("internet", "I");
+
+	private final String name;
+	private final String taxonomy;
+
+	private static final ObservableList<String> FOR_COMBO_BOX;
+
+	static {
+		ArrayList<String> values = Arrays.stream(GigafidaTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new));
+		FOR_COMBO_BOX = FXCollections.observableArrayList(values);
+	}
+
+	GigafidaTaxonomy(String name, String taxonomy) {
+		this.name = name;
+		this.taxonomy = taxonomy;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public String getTaxonomnyString() {
+		return this.taxonomy;
+	}
+
+	public static GigafidaTaxonomy factory(String tax) {
+		if (tax != null) {
+			if (TISK.toString().equals(tax)) {
+				return TISK;
+			}
+			if (KNJIZNO.toString().equals(tax)) {
+				return KNJIZNO;
+			}
+			if (LEPOSLOVNO.toString().equals(tax)) {
+				return LEPOSLOVNO;
+			}
+			if (STROKOVNO.toString().equals(tax)) {
+				return STROKOVNO;
+			}
+			if (PERIODICNO.toString().equals(tax)) {
+				return PERIODICNO;
+			}
+			if (CASOPIS.toString().equals(tax)) {
+				return CASOPIS;
+			}
+			if (REVIJA.toString().equals(tax)) {
+				return REVIJA;
+			}
+			if (INTERNET.toString().equals(tax)) {
+				return INTERNET;
+			}
+		}
+		return null;
+	}
+
+	public static ObservableList<String> getForComboBox() {
+		return FOR_COMBO_BOX;
+	}
+}
--- a/src/main/java/data/GosTaxonomy.java
+++ b/src/main/java/data/GosTaxonomy.java
@@ -0,0 +1,85 @@
+package data;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+import javafx.collections.FXCollections;
+import javafx.collections.ObservableList;
+
+public enum GosTaxonomy {
+	JAVNI("javni", "gos.T.J"),
+	INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "gos.T.J.I"),
+	RAZVEDRILNI("razvedrilni", "gos.T.J.R"),
+	NEJAVNI("nejavni", "gos.T.N"),
+	NEZASEBNI("nezasebni", "gos.T.N.N"),
+	ZASEBNI("zasebni", "gos.T.N.Z"),
+	OSEBNI_STIK("osebni stik", "gos.K.O"),
+	TELEFON("telefon", "gos.K.P"),
+	RADIO("radio", "gos.K.R"),
+	TELEVIZIJA("televizija", "gos.K.T");
+
+
+	private final String name;
+	private final String taxonomy;
+
+	private static final ObservableList<String> FOR_COMBO_BOX;
+
+	static {
+		ArrayList<String> values = Arrays.stream(GosTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new));
+		FOR_COMBO_BOX = FXCollections.observableArrayList(values);
+	}
+
+	GosTaxonomy(String name, String taxonomy) {
+		this.name = name;
+		this.taxonomy = taxonomy;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public String getTaxonomnyString() {
+		return this.taxonomy;
+	}
+
+	public static GosTaxonomy factory(String tax) {
+		if (tax != null) {
+			if (JAVNI.toString().equals(tax)) {
+				return JAVNI;
+			}
+			if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
+				return INFORMATIVNO_IZOBRAZEVALNI;
+			}
+			if (RAZVEDRILNI.toString().equals(tax)) {
+				return RAZVEDRILNI;
+			}
+			if (NEJAVNI.toString().equals(tax)) {
+				return NEJAVNI;
+			}
+			if (NEZASEBNI.toString().equals(tax)) {
+				return NEZASEBNI;
+			}
+			if (ZASEBNI.toString().equals(tax)) {
+				return ZASEBNI;
+			}
+			if (OSEBNI_STIK.toString().equals(tax)) {
+				return OSEBNI_STIK;
+			}
+			if (TELEFON.toString().equals(tax)) {
+				return TELEFON;
+			}
+			if (RADIO.toString().equals(tax)) {
+				return RADIO;
+			}
+			if (TELEVIZIJA.toString().equals(tax)) {
+				return TELEVIZIJA;
+			}
+		}
+		return null;
+	}
+
+	public static ObservableList<String> getForComboBox() {
+		return FOR_COMBO_BOX;
+	}
+}
--- a/src/main/java/data/Sentence.java
+++ b/src/main/java/data/Sentence.java
@@ -0,0 +1,56 @@
+package data;
+
+import java.util.List;
+import java.util.Map;
+
+public class Sentence {
+
+
+	private List<Word> words;
+	private String taksonomija;
+
+	// GOS
+	private String type;
+	private Map<String, String> properties;
+
+	public Sentence(List<Word> words, String taksonomija) {
+		this.words = words;
+		this.taksonomija = taksonomija;
+	}
+
+	public Sentence(List<Word> words) {
+		this.words = words;
+	}
+
+	public Sentence(List<Word> words, String taksonomija, Map<String, String> properties) {
+		this.words = words;
+		this.taksonomija = taksonomija;
+		this.properties = properties;
+	}
+
+	public Sentence(List<Word> words, String taksonomija, String type) {
+		this.words = words;
+		this.taksonomija = taksonomija;
+		this.type = type;
+	}
+
+	public List<Word> getWords() {
+		return words;
+	}
+
+	public String getTaxonomy() {
+		return taksonomija;
+	}
+
+	public List<Word> getSublist(int indexFrom, int indexTo) {
+		return this.words.subList(indexFrom, indexTo);
+	}
+
+	public String getType() {
+		return type;
+	}
+
+	public void setType(String type) {
+		this.type = type;
+	}
+}
--- a/src/main/java/data/Settings.java
+++ b/src/main/java/data/Settings.java
@@ -0,0 +1,16 @@
+package data;
+
+
+import java.io.File;
+import java.util.Collection;
+
+public class Settings {
+	public static final int CORPUS_SENTENCE_LIMIT = 50000;
+	public static final boolean PRINT_LOG = false;
+
+	public static final String FX_ACCENT_OK = "-fx-accent: forestgreen;";
+	public static final String FX_ACCENT_NOK = "-fx-accent: red;";
+
+	public static Collection<File> corpus;
+	public static File resultsFilePath;
+}
--- a/src/main/java/data/Statistics.java
+++ b/src/main/java/data/Statistics.java
@@ -0,0 +1,299 @@
+package data;
+
+import java.io.UnsupportedEncodingException;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.regex.Pattern;
+
+import util.Util;
+import util.db.RDB;
+
+public class Statistics {
+	private CorpusType corpusType;
+	private AnalysisLevel analysisLevel;
+	private boolean useDB;
+	private RDB db;
+
+	private boolean analysisProducedResults;
+
+	private String taxonomy;
+	private boolean taxonomyIsSet;
+
+	private char JOSType;
+	private boolean JOSTypeIsSet;
+
+	private String resultTitle;
+	public Map<String, AtomicLong> result = new ConcurrentHashMap<>();
+
+	// nGrams
+	private int nGramLevel;
+	private Integer skip;
+	private CalculateFor cf;
+	private List<Pattern> morphosyntacticFilter;
+
+	// distributions
+	private String distributionTaxonomy;
+	private char distributionJosWordType;
+	private boolean vcc;
+	private Integer substringLength;
+
+	// inflected JOS
+	private String inflectedJosTaxonomy;
+
+	// GOS
+	boolean gosOrthMode;
+
+	// šolar
+	Map<String, Object> solarHeadBlockFilter;
+
+
+	// for ngrams
+	public Statistics(AnalysisLevel al, int nGramLevel, Integer skip, CalculateFor cf) {
+		String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
+		this.cf = cf;
+		this.analysisLevel = al;
+		this.nGramLevel = nGramLevel;
+		this.skip = skip == null || skip == 0 ? null : skip;
+
+		this.resultTitle = String.format("%s%d-gram_%s_%s",
+				this.skip != null ? String.format("%d-%s-", skip, "skip") : "",
+				nGramLevel,
+				cf.toString(),
+				dateTime);
+	}
+
+	// for words distributions
+	public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) {
+		String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
+
+		this.resultTitle = String.format("%s_%s_%s",
+				distributionTaxonomy != null ? distributionTaxonomy.toString() : "",
+				distributionJosWordType != null ? distributionJosWordType.toString() : "",
+				dateTime);
+
+		this.analysisLevel = al;
+		this.cf = cf;
+		this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null;
+		this.taxonomyIsSet = distributionTaxonomy != null;
+
+		this.JOSTypeIsSet = distributionJosWordType != null;
+		this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' ';
+	}
+
+	public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) {
+		String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
+
+		this.resultTitle = String.format("%s_%d_%s",
+				"Distribucija zaporedij samoglasnikov in soglasnikov",
+				substringLength,
+				dateTime);
+
+		this.analysisLevel = al;
+		this.cf = cf;
+		this.substringLength = substringLength;
+		this.vcc = true;
+	}
+
+	public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) {
+		String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
+
+		this.resultTitle = String.format("InflectedJOS_%s_%s",
+				distributionTaxonomy != null ? distributionTaxonomy : "",
+				dateTime);
+
+		this.analysisLevel = al;
+		this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null;
+		this.taxonomyIsSet = inflectedJosTaxonomy != null;
+	}
+
+	public Integer getSkip() {
+		return skip;
+	}
+
+	public Integer getSubstringLength() {
+		return substringLength;
+	}
+
+	public String getInflectedJosTaxonomy() {
+		return inflectedJosTaxonomy;
+	}
+
+	public void setSubstringLength(Integer substringLength) {
+		this.substringLength = substringLength;
+	}
+
+	public boolean isVcc() {
+		return vcc;
+	}
+
+	public void setVcc(boolean vcc) {
+		this.vcc = vcc;
+	}
+
+	public String getDistributionTaxonomy() {
+		return distributionTaxonomy;
+	}
+
+	public void setDistributionTaxonomy(String distributionTaxonomy) {
+		this.distributionTaxonomy = distributionTaxonomy;
+	}
+
+	public char getDistributionJosWordType() {
+		return distributionJosWordType;
+	}
+
+	public void setDistributionJosWordType(char distributionJosWordType) {
+		this.distributionJosWordType = distributionJosWordType;
+	}
+
+	public void setMorphosyntacticFilter(List<String> morphosyntacticFilter) {
+		// change filter strings to regex patterns
+		this.morphosyntacticFilter = new ArrayList<>();
+		for (String s : morphosyntacticFilter) {
+			this.morphosyntacticFilter.add(Pattern.compile(s.replaceAll("\\*", ".")));
+		}
+	}
+
+	public List<Pattern> getMsd() {
+		return morphosyntacticFilter;
+	}
+
+	public Map<String, AtomicLong> getResult() {
+		return result;
+	}
+
+	public void setTaxonomy(String taxonomy) {
+		this.taxonomy = taxonomy;
+	}
+
+	public void setTaxonomyIsSet(boolean taxonomyIsSet) {
+		this.taxonomyIsSet = taxonomyIsSet;
+	}
+
+	public char getJOSType() {
+		return JOSType;
+	}
+
+	public void setJOSType(char JOSType) {
+		this.JOSType = JOSType;
+	}
+
+	public boolean isJOSTypeSet() {
+		return JOSTypeIsSet;
+	}
+
+	public void setJOSType(boolean JOSTypeIsSet) {
+		this.JOSTypeIsSet = JOSTypeIsSet;
+	}
+
+	public void saveResultToDisk(int... limit) throws UnsupportedEncodingException {
+		// Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
+		//
+		// if (useDB) {
+		// 	result = db.getDump();
+		// 	db.delete();
+		// }
+		//
+		// // if no results and nothing to save, return false
+		// if (!(result.size() > 0)) {
+		// 	analysisProducedResults = false;
+		// 	return;
+		// } else {
+		// 	analysisProducedResults = true;
+		// }
+		//
+		// stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
+		// Export.SetToCSV(stats);
+	}
+
+	// private Map<String, Integer> getSortedResultInflected(Map map) {
+	// 	// first convert to <String, Integer>
+	// 	Map<String, Integer> m = Util.sortByValue(Util.atomicInt2StringAndInt(map), 0);
+	//
+	// 	Map<String, Integer> sortedM = new TreeMap<>();
+	//
+	// 	sortedM.putAll(m);
+	//
+	// 	return sortedM;
+	// }
+
+	private Map<String, Long> getSortedResult(Map<String, AtomicLong> map, int limit) {
+		return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
+	}
+
+	public String getTaxonomy() {
+		return taxonomy;
+	}
+
+	public boolean isTaxonomySet() {
+		return taxonomyIsSet;
+	}
+
+	public int getnGramLevel() {
+		return nGramLevel;
+	}
+
+	public CalculateFor getCf() {
+		return cf;
+	}
+
+	public AnalysisLevel getAnalysisLevel() {
+		return analysisLevel;
+	}
+
+	public CorpusType getCorpusType() {
+		return corpusType;
+	}
+
+	public void setCorpusType(CorpusType corpusType) {
+		this.corpusType = corpusType;
+	}
+
+	public boolean isGosOrthMode() {
+		return gosOrthMode;
+	}
+
+	public void setGosOrthMode(boolean gosOrthMode) {
+		this.gosOrthMode = gosOrthMode;
+	}
+
+	public Map<String, Object> getSolarHeadBlockFilter() {
+		return solarHeadBlockFilter;
+	}
+
+	public void setSolarHeadBlockFilter(Map<String, Object> solarHeadBlockFilter) {
+		this.solarHeadBlockFilter = solarHeadBlockFilter;
+	}
+
+	public boolean isUseDB() {
+		return useDB;
+	}
+
+	public void setUseDB(boolean useDB) {
+		if (useDB && db == null) {
+			db = new RDB();
+		}
+		this.useDB = useDB;
+	}
+
+	/**
+	 * Stores results from this batch to a database and clears results map
+	 */
+	public void storeTmpResultsToDB() {
+		try {
+			db.writeBatch(result);
+			result = new ConcurrentHashMap<>();
+		} catch (UnsupportedEncodingException e) {
+			e.printStackTrace();
+		}
+	}
+
+	public boolean isAnalysisProducedResults() {
+		return analysisProducedResults;
+	}
+}
--- a/src/main/java/data/StatisticsNew.java
+++ b/src/main/java/data/StatisticsNew.java
@@ -0,0 +1,409 @@
+package data;
+
+import static gui.ValidationUtil.*;
+
+import java.io.UnsupportedEncodingException;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import alg.inflectedJOS.WordFormation;
+import data.Enums.WordLevelType;
+import javafx.collections.ObservableList;
+import util.Export;
+import util.Util;
+import util.db.RDB;
+
+@SuppressWarnings("Duplicates")
+public class StatisticsNew {
+	public final static Logger logger = LogManager.getLogger(StatisticsNew.class);
+
+	private Corpus corpus;
+	private Filter filter;
+
+	private String resultTitle;
+	private Map<String, AtomicLong> result;
+	private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
+	private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedSuffix;
+	private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedPrefix;
+	private boolean useDB;
+	private RDB db;
+	private boolean analysisProducedResults;
+	private LocalDateTime time;
+
+	public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
+		this.corpus = corpus;
+		this.filter = filter;
+
+		if (useDB) {
+			this.useDB = true;
+			db = new RDB();
+		}
+
+		if (filter.getAl() == AnalysisLevel.WORD_LEVEL) {
+			resultNestedSuffix = new ConcurrentHashMap<>();
+			resultNestedPrefix = new ConcurrentHashMap<>();
+		} else {
+			result = new ConcurrentHashMap<>();
+		}
+
+		resultTitle = generateResultTitle();
+
+		logger.debug(toString());
+	}
+
+	/**
+	 * Result's title consists of:
+	 * <ul>
+	 * <li>Corpus type</li>
+	 * <li>Analysis level</li>
+	 * <li>Calculate for</li>
+	 * <li></li>
+	 * <li></li>
+	 * <li></li>
+	 * <li></li>
+	 * </ul>
+	 *
+	 * @return
+	 */
+	private String generateResultTitle() {
+		String separator = "_";
+		StringBuilder sb = new StringBuilder();
+
+		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
+			Integer ngramLevel = filter.getNgramValue();
+			if(ngramLevel == 0) {
+				sb.append("Crke").
+						append(separator)
+						.append(corpus.getCorpusType().toString())
+						.append(separator);
+			} else if(ngramLevel == 1) {
+				sb.append("Besede").append(separator)
+						.append(corpus.getCorpusType().toString())
+						.append(separator);
+			}
+			else {
+				sb.append(filter.getAl().toString())
+						.append(separator)
+						.append(corpus.getCorpusType().toString())
+						.append(separator);
+				sb.append(filter.getCalculateFor().toString())
+						.append(separator);
+				// ngram value
+				sb.append(filter.getNgramValue()).append("-gram")
+						.append(separator);
+				sb.append(filter.getSkipValue()).append("-preskok")
+						.append(separator);
+			}
+			// TODO: assure skip is not null but zero
+
+		} else {
+			sb.append(filter.getAl().toString()) // analysis level
+					.append(separator)
+					.append(corpus.getCorpusType().toString())
+					.append(separator);
+		}
+		// skip value
+		// msd ?
+		// if taxonomy -> taxonomy
+		// if cvv -> cvv + dolžina
+
+		this.time = this.time != null ? this.time : LocalDateTime.now();
+
+		sb.append(time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss")));
+		return sb.toString();
+
+	}
+
+	public boolean isAnalysisProducedResults() {
+		return analysisProducedResults;
+	}
+
+	public void setAnalysisProducedResults(boolean analysisProducedResults) {
+		this.analysisProducedResults = analysisProducedResults;
+	}
+
+	public String toString() {
+		String newLine = "\n\t- ";
+		StringBuilder sb = new StringBuilder();
+		sb.append(newLine).append("Statistic properties:");
+		sb.append(newLine).append(corpus.getCorpusType().toString()).append(String.format(" (%d files)", corpus.getDetectedCorpusFiles().size()));
+		sb.append(newLine).append(useDB ? "use DB" : "run in memory");
+		sb.append(newLine).append(filter.toString());
+
+		return sb.toString();
+	}
+
+	public String getResultTitle() {
+		return resultTitle;
+	}
+
+	// ****************************************
+	// ***************** util *****************
+	// ****************************************
+
+	/**
+	 * Stores results from this batch to a database and clears results map
+	 */
+	public void storeTmpResultsToDB() {
+		try {
+			db.writeBatch(result);
+			result = new ConcurrentHashMap<>();
+		} catch (UnsupportedEncodingException e) {
+			logger.error("Store tmp results to DB", e);
+			// e.printStackTrace();
+		}
+	}
+
+	public Filter getFilter() {
+		return filter;
+	}
+
+	public Corpus getCorpus() {
+		return corpus;
+	}
+
+	public boolean saveResultToDisk(int... limit) throws UnsupportedEncodingException {
+		Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
+
+		if (useDB) {
+			result = db.getDump();
+			db.delete();
+		}
+
+		// if no results and nothing to save, return false
+		if (!(result.size() > 0)) {
+			analysisProducedResults = false;
+			return false;
+		} else {
+			analysisProducedResults = true;
+		}
+
+		stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
+		Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock());
+		return true;
+	}
+
+	public boolean saveResultNestedToDisk(int... limit) throws UnsupportedEncodingException {
+		resultTitle = generateResultTitle();
+
+		if (useDB) {
+			result = db.getDump();
+			db.delete();
+		}
+		Map<WordLevelType, Map<String, Map<String, Long>>> results = new HashMap<>();
+
+		if (!isEmpty(resultNestedSuffix)) {
+			results.put(WordLevelType.SUFFIX, sortNestedMap(resultNestedSuffix, Util.getValidInt(limit)));
+		}
+
+		if (!isEmpty(resultNestedPrefix)) {
+			results.put(WordLevelType.PREFIX, sortNestedMap(resultNestedPrefix, Util.getValidInt(limit)));
+		}
+
+		// if no results and nothing to save, return false
+		if (!(results.size() > 0)) {
+			analysisProducedResults = false;
+			return false;
+		} else {
+			analysisProducedResults = true;
+		}
+
+		Export.nestedMapToCSV(resultTitle, results, corpus.getChosenResultsLocation(), headerInfoBlock());
+		return true;
+	}
+
+	public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {
+		filter.setAl(AnalysisLevel.WORD_FORMATION);
+		resultTitle = generateResultTitle();
+
+		if (useDB) {
+			result = db.getDump();
+			db.delete();
+		}
+
+		// if no results and nothing to save, return false
+		if (!(result.size() > 0)) {
+			analysisProducedResults = false;
+			return false;
+		} else {
+			analysisProducedResults = true;
+		}
+
+		WordFormation.calculateStatistics(this);
+
+		Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());
+		return true;
+	}
+
+	private Map<String, Map<String, Long>> sortNestedMap(Map<String, ConcurrentHashMap<String, AtomicLong>> nestedMap, int limit) {
+		Map<String, Map<String, Long>> sorted = new HashMap<>();
+
+		for (String s : nestedMap.keySet()) {
+			sorted.put(s, getSortedResult(nestedMap.get(s), Util.getValidInt(limit)));
+		}
+
+		return sorted;
+	}
+
+
+	private Map<String, Long> getSortedResult(Map<String, AtomicLong> map, int limit) {
+		return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
+	}
+
+	public void updateResults(String o) {
+		// if not in map
+		AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));
+
+		// else
+		if (r != null)
+			result.get(o).incrementAndGet();
+	}
+
+	public Map<String, AtomicLong> getResult() {
+		return result;
+	}
+
+	public Object[][] getResultCustom() {
+		return resultCustom;
+	}
+
+	public void setResultCustom(Object[][] resultCustom) {
+		this.resultCustom = resultCustom;
+	}
+
+	public void updateResultsNested(WordLevelType type, String key, String stringValue) {
+		ConcurrentHashMap<String, ConcurrentHashMap<String, AtomicLong>> resultsMap;
+
+		if (type == WordLevelType.SUFFIX) {
+			updateResultsNestedSuffix(key, stringValue);
+		} else if (type == WordLevelType.PREFIX) {
+			updateResultsNestedPrefix(key, stringValue);
+		}
+	}
+
+	public void updateResultsNestedSuffix(String key, String stringValue) {
+		if (resultNestedSuffix.containsKey(key)) {
+			// if not in map
+			AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
+
+			// else
+			if (r != null) {
+				resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
+			}
+		} else {
+			resultNestedSuffix.putIfAbsent(key, new ConcurrentHashMap<>());
+			AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
+
+			if (r != null) {
+				resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
+			}
+		}
+	}
+
+	public void updateResultsNestedPrefix(String key, String stringValue) {
+		if (resultNestedPrefix.containsKey(key)) {
+			// if not in map
+			AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
+
+			// else
+			if (r != null) {
+				resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
+			}
+		} else {
+			resultNestedPrefix.putIfAbsent(key, new ConcurrentHashMap<>());
+			AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
+
+			if (r != null) {
+				resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
+			}
+		}
+	}
+
+	private LinkedHashMap<String, String> headerInfoBlock() {
+		LinkedHashMap<String, String> info = new LinkedHashMap<>();
+
+		info.put("Korpus:", corpus.getCorpusType().toString());
+		info.put("Datum:", time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
+		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
+			Integer ngramLevel = filter.getNgramValue();
+			if (ngramLevel == 0)
+				info.put("Analiza:", "Črke");
+			else if (ngramLevel == 1)
+				info.put("Analiza", "Besede");
+			else
+				info.put("Analiza:", filter.getAl().toString());
+		} else {
+			info.put("Analiza:", filter.getAl().toString());
+		}
+
+		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
+			Integer ngramLevel = filter.getNgramValue();
+
+			// n.gram nivo
+			if (ngramLevel > 1) {
+				info.put("n-gram nivo:", String.valueOf(ngramLevel));
+			} else if (ngramLevel == 1){
+				info.put("n-gram nivo:", "nivo besed");
+			} else {
+				info.put("n-gram nivo:", "nivo črk");
+			}
+			// skip
+			if (ngramLevel > 1)
+				info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
+
+			// izračunaj za
+			info.put("Izračunaj za:", filter.getCalculateFor().toString());
+
+			// msd
+			if (!isEmpty(filter.getMsd())) {
+				StringBuilder msdPattern = new StringBuilder();
+				for (Pattern pattern : filter.getMsd()) {
+					msdPattern.append(pattern.toString()).append(" ");
+				}
+
+				info.put("MSD:", msdPattern.toString());
+			}
+
+			// taksonomija
+			if (!isEmpty(filter.getTaxonomy())) {
+				info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
+			}
+
+
+		}
+
+		if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+			ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
+
+			info.put("Taksonomija: ", "");
+			String sep = "";
+			for (String s : tax) {
+				info.put(sep = sep + " ", s);
+			}
+		}
+
+		if (corpus.getCorpusType() == CorpusType.SOLAR) {
+			HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();
+
+			if (!isEmpty(filters)) {
+				info.put("Dodatni filtri: ", "");
+
+				for (Map.Entry<String, ObservableList<String>> f : filters.entrySet()) {
+					info.put(f.getKey(), StringUtils.join(f.getValue(), ", "));
+				}
+			}
+		}
+
+		return info;
+	}
+}
--- a/src/main/java/data/Tax.java
+++ b/src/main/java/data/Tax.java
@@ -0,0 +1,175 @@
+package data;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+import gui.ValidationUtil;
+import javafx.collections.FXCollections;
+import javafx.collections.ObservableList;
+
+public class Tax {
+	private static LinkedHashMap<String, String> GIGAFIDA_TAXONOMY;
+	private static LinkedHashMap<String, String> GOS_TAXONOMY;
+	private static final HashSet<CorpusType> corpusTypesWithTaxonomy = new HashSet<>(Arrays.asList(CorpusType.GIGAFIDA, CorpusType.GOS, CorpusType.CCKRES));
+
+	static {
+		// GIGAFIDA ----------------------------
+		GIGAFIDA_TAXONOMY = new LinkedHashMap<>();
+
+		GIGAFIDA_TAXONOMY.put("SSJ.T", "tisk");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.K", "tisk-knjižno");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.K.L", "tisk-knjižno-leposlovno");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.K.S", "tisk-knjižno-strokovno");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.P", "tisk-periodično");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.P.C", "tisk-periodično-časopis");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.P.R", "tisk-periodično-revija");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.D", "tisk-drugo");
+		GIGAFIDA_TAXONOMY.put("SSJ.I", "internet");
+
+		GIGAFIDA_TAXONOMY.put("Ft.P", "prenosnik");
+		GIGAFIDA_TAXONOMY.put("Ft.P.G", "prenosnik-govorni");
+		GIGAFIDA_TAXONOMY.put("Ft.P.E", "prenosnik-elektronski");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P", "prenosnik-pisni");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O", "prenosnik-pisni-objavljeno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.K", "prenosnik-pisni-objavljeno-knjižno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P", "prenosnik-pisni-objavljeno-periodično");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C", "prenosnik-pisni-objavljeno-periodično-časopisno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.D", "prenosnik-pisni-objavljeno-periodično-časopisno-dnevno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.V", "prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.T", "prenosnik-pisni-objavljeno-periodično-časopisno-tedensko");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R", "prenosnik-pisni-objavljeno-periodično-revialno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.T", "prenosnik-pisni-objavljeno-periodično-revialno-tedensko");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.S", "prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.M", "prenosnik-pisni-objavljeno-periodično-revialno-mesečno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.D", "prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.O", "prenosnik-pisni-objavljeno-periodično-revialno-občasno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.N", "prenosnik-pisni-neobjavljeno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.N.J", "prenosnik-pisni-neobjavljeno-javno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.N.I", "prenosnik-pisni-neobjavljeno-interno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.N.Z", "prenosnik-pisni-neobjavljeno-zasebno");
+
+		GIGAFIDA_TAXONOMY.put("Ft.Z", "zvrst");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.U", "zvrst-umetnostna");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.U.P", "zvrst-umetnostna-pesniška");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.U.R", "zvrst-umetnostna-prozna");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.U.D", "zvrst-umetnostna-dramska");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.N", "zvrst-neumetnostna");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.N.S", "zvrst-neumetnostna-strokovna");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.H", "zvrst-neumetnostna-strokovna-humanistična in družboslovna");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.N", "zvrst-neumetnostna-strokovna-naravoslovna in tehnična");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.N.N", "zvrst-neumetnostna-nestrokovna");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.N.P", "zvrst-neumetnostna-pravna");
+		GIGAFIDA_TAXONOMY.put("Ft.L", "zvrst-lektorirano");
+		GIGAFIDA_TAXONOMY.put("Ft.L.D", "zvrst-lektorirano-da");
+		GIGAFIDA_TAXONOMY.put("Ft.L.N", "zvrst-lektorirano-ne");
+
+		// GOS ----------------------------------
+		GOS_TAXONOMY = new LinkedHashMap<>();
+
+		GOS_TAXONOMY.put("gos.T", "diskurz");
+		GOS_TAXONOMY.put("gos.T.J", "diskurz-javni");
+		GOS_TAXONOMY.put("gos.T.J.I", "diskurz-javni-informativno-izobraževalni");
+		GOS_TAXONOMY.put("gos.T.J.R", "diskurz-javni-razvedrilni");
+		GOS_TAXONOMY.put("gos.T.N", "diskurz-nejavni");
+		GOS_TAXONOMY.put("gos.T.N.N", "diskurz-nejavni-nezasebni");
+		GOS_TAXONOMY.put("gos.T.N.Z", "diskurz-nejavni-zasebni");
+
+		GOS_TAXONOMY.put("gos.S", "situacija");
+		GOS_TAXONOMY.put("gos.S.R", "situacija-radio");
+		GOS_TAXONOMY.put("gos.S.T", "situacija-televizija");
+	}
+
+	/**
+	 * Returns the whole default taxonomy for the specified corpus type
+	 */
+	public static ObservableList<String> getTaxonomyForComboBox(CorpusType corpusType) {
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
+			return FXCollections.observableArrayList(GIGAFIDA_TAXONOMY.values());
+		} else if (corpusType == CorpusType.GOS) {
+			return FXCollections.observableArrayList(GOS_TAXONOMY.values());
+		}
+
+		return FXCollections.observableArrayList(new ArrayList<>());
+	}
+
+	/**
+	 * Returns taxonomy names only for items found in headers
+	 */
+	public static ObservableList<String> getTaxonomyForComboBox(CorpusType corpusType, HashSet<String> foundTax) {
+		LinkedHashMap<String, String> tax = new LinkedHashMap<>();
+
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
+			tax = GIGAFIDA_TAXONOMY;
+		} else if (corpusType == CorpusType.GOS) {
+			tax = GOS_TAXONOMY;
+		}
+
+		ArrayList<String> taxForCombo = new ArrayList<>();
+
+		// assures same relative order
+		for (String t : tax.keySet()) {
+			if (foundTax.contains(t)) {
+				taxForCombo.add(tax.get(t));
+			}
+		}
+
+		return FXCollections.observableArrayList(taxForCombo);
+	}
+
+	public static HashSet<CorpusType> getCorpusTypesWithTaxonomy() {
+		return corpusTypesWithTaxonomy;
+	}
+
+	public static ArrayList<String> getTaxonomyCodes(ArrayList<String> taxonomyNames, CorpusType corpusType) {
+		ArrayList<String> result = new ArrayList<>();
+
+		if (ValidationUtil.isEmpty(taxonomyNames)) {
+			return result;
+		}
+
+		LinkedHashMap<String, String> tax = new LinkedHashMap<>();
+
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
+			tax = GIGAFIDA_TAXONOMY;
+		} else if (corpusType == CorpusType.GOS) {
+			tax = GOS_TAXONOMY;
+		}
+
+		// for easier lookup
+		Map<String, String> taxInversed = tax.entrySet()
+				.stream()
+				.collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
+
+		for (String taxonomyName : taxonomyNames) {
+			result.add(taxInversed.get(taxonomyName));
+		}
+
+		return result;
+	}
+
+	/**
+	 * Returns a list of proper names for codes
+	 *
+	 * @param corpusType
+	 * @param taxonomy
+	 *
+	 * @return
+	 */
+	public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<String> taxonomy) {
+		LinkedHashMap<String, String> tax = new LinkedHashMap<>();
+
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
+			tax = GIGAFIDA_TAXONOMY;
+		} else if (corpusType == CorpusType.GOS) {
+			tax = GOS_TAXONOMY;
+		}
+
+		ArrayList<String> result = new ArrayList<>();
+
+		for (String t : taxonomy) {
+			result.add(tax.get(t));
+		}
+
+		return result;
+	}
+}
--- a/src/main/java/data/Taxonomy.java
+++ b/src/main/java/data/Taxonomy.java
@@ -0,0 +1,171 @@
+package data;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+import javafx.collections.FXCollections;
+import javafx.collections.ObservableList;
+
+public enum Taxonomy {
+	// GOS
+	JAVNI("javni", "T.J", "gos"),
+	INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "T.J.I", "gos"),
+	RAZVEDRILNI("razvedrilni", "T.J.R", "gos"),
+	NEJAVNI("nejavni", "T.N", "gos"),
+	NEZASEBNI("nezasebni", "T.N.N", "gos"),
+	ZASEBNI("zasebni", "T.N.Z", "gos"),
+	OSEBNI_STIK("osebni stik", "K.O", "gos"),
+	TELEFON("telefon", "K.P", "gos"),
+	RADIO("radio", "K.R", "gos"),
+	TELEVIZIJA("televizija", "K.T", "gos"),
+	// Gigafida
+	KNJIZNO("knjižno", "T.K", "gigafida"),
+	LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"),
+	STROKOVNO("strokovno", "T.K.S", "gigafida"),
+	PERIODICNO("periodično", "T.P", "gigafida"),
+	CASOPIS("časopis", "T.P.C", "gigafida"),
+	REVIJA("revija", "T.P.R", "gigafida"),
+	INTERNET("internet", "I", "gigafida"),
+
+	SSJ_TISK("tisk", "SSJ.T", "gigafida"),
+	SSJ_KNJIZNO("opis", "identifikator", "gigafida"),
+	SSJ_LEPOSLOVNO("opis", "identifikator", "gigafida"),
+	SSJ_STROKOVNO("opis", "identifikator", "gigafida"),
+	SSJ_PERIODICNO("opis", "identifikator", "gigafida"),
+	SSJ_CASOPIS("opis", "identifikator", "gigafida"),
+	SSJ_REVIJA("opis", "identifikator", "gigafida"),
+	SSJ_DRUGO("opis", "identifikator", "gigafida"),
+	SSJ_INTERNET("opis", "identifikator", "gigafida"),
+	FT_P_PRENOSNIK("opis", "identifikator", "gigafida"),
+	FT_P_GOVORNI("opis", "identifikator", "gigafida"),
+	FT_P_ELEKTRONSKI("opis", "identifikator", "gigafida"),
+	FT_P_PISNI("opis", "identifikator", "gigafida"),
+	FT_P_OBJAVLJENO("opis", "identifikator", "gigafida"),
+	FT_P_KNJIZNO("opis", "identifikator", "gigafida"),
+	FT_P_PERIODICNO("opis", "identifikator", "gigafida"),
+	FT_P_CASOPISNO("opis", "identifikator", "gigafida"),
+	FT_P_DNEVNO("opis", "identifikator", "gigafida"),
+	FT_P_VECKRAT_TEDENSKO("opis", "identifikator", "gigafida"),
+	// FT_P_TEDENSKO("opis", "identifikator", "gigafida"),
+	FT_P_REVIALNO("opis", "identifikator", "gigafida"),
+	FT_P_TEDENSKO("opis", "identifikator", "gigafida"),
+	FT_P_STIRINAJSTDNEVNO("opis", "identifikator", "gigafida"),
+	FT_P_MESECNO("opis", "identifikator", "gigafida"),
+	FT_P_REDKEJE_KOT_MESECNO("opis", "identifikator", "gigafida"),
+	FT_P_OBCASNO("opis", "identifikator", "gigafida"),
+	FT_P_NEOBJAVLJENO("opis", "identifikator", "gigafida"),
+	FT_P_JAVNO("opis", "identifikator", "gigafida"),
+	FT_P_INTERNO("opis", "identifikator", "gigafida"),
+	FT_P_ZASEBNO("opis", "identifikator", "gigafida"),
+	FT_ZVRST("opis", "identifikator", "gigafida"),
+	FT_UMETNOSTNA("opis", "identifikator", "gigafida"),
+	FT_PESNISKA("opis", "identifikator", "gigafida"),
+	FT_PROZNA("opis", "identifikator", "gigafida"),
+	FT_DRAMSKA("opis", "identifikator", "gigafida"),
+	FT_NEUMETNOSTNA("opis", "identifikator", "gigafida"),
+	FT_STROKOVNA("opis", "identifikator", "gigafida"),
+	FT_HID("opis", "identifikator", "gigafida"),
+	FT_NIT("opis", "identifikator", "gigafida"),
+	FT_NESTROKOVNA("opis", "identifikator", "gigafida"),
+	FT_PRAVNA("opis", "identifikator", "gigafida"),
+	FT_LEKTORIRANO("opis", "identifikator", "gigafida"),
+	FT_DA("opis", "identifikator", "gigafida"),
+	FT_NE("opis", "identifikator", "gigafida");
+
+
+
+	private final String name;
+	private final String taxonomy;
+	private final String corpus;
+
+	Taxonomy(String name, String taxonomy, String corpusType) {
+		this.name = name;
+		this.taxonomy = taxonomy;
+		this.corpus = corpusType;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public String getTaxonomnyString() {
+		return this.taxonomy;
+	}
+
+	public static Taxonomy factory(String tax) {
+		if (tax != null) {
+			// GOS
+			if (JAVNI.toString().equals(tax)) {
+				return JAVNI;
+			}
+			if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
+				return INFORMATIVNO_IZOBRAZEVALNI;
+			}
+			if (RAZVEDRILNI.toString().equals(tax)) {
+				return RAZVEDRILNI;
+			}
+			if (NEJAVNI.toString().equals(tax)) {
+				return NEJAVNI;
+			}
+			if (NEZASEBNI.toString().equals(tax)) {
+				return NEZASEBNI;
+			}
+			if (ZASEBNI.toString().equals(tax)) {
+				return ZASEBNI;
+			}
+			if (OSEBNI_STIK.toString().equals(tax)) {
+				return OSEBNI_STIK;
+			}
+			if (TELEFON.toString().equals(tax)) {
+				return TELEFON;
+			}
+			if (RADIO.toString().equals(tax)) {
+				return RADIO;
+			}
+			if (TELEVIZIJA.toString().equals(tax)) {
+				return TELEVIZIJA;
+			}
+
+			// Gigafida
+			// if (TISK.toString().equals(tax)) {
+			// 	return TISK;
+			// }
+			if (KNJIZNO.toString().equals(tax)) {
+				return KNJIZNO;
+			}
+			if (LEPOSLOVNO.toString().equals(tax)) {
+				return LEPOSLOVNO;
+			}
+			if (STROKOVNO.toString().equals(tax)) {
+				return STROKOVNO;
+			}
+			if (PERIODICNO.toString().equals(tax)) {
+				return PERIODICNO;
+			}
+			if (CASOPIS.toString().equals(tax)) {
+				return CASOPIS;
+			}
+			if (REVIJA.toString().equals(tax)) {
+				return REVIJA;
+			}
+			if (INTERNET.toString().equals(tax)) {
+				return INTERNET;
+			}
+		}
+		return null;
+	}
+
+	public static ObservableList<String> getDefaultForComboBox(String corpusType) {
+		ArrayList<String> values = Arrays.stream(Taxonomy.values())
+				.filter(x -> x.corpus.equals(corpusType))
+				.map(x -> x.name)
+				.collect(Collectors.toCollection(ArrayList::new));
+
+		return FXCollections.observableArrayList(values);
+	}
+
+	public static ObservableList<String> getDefaultForComboBox(CorpusType corpusType) {
+		return getDefaultForComboBox(corpusType.toString());
+	}
+}
--- a/src/main/java/data/Validation.java
+++ b/src/main/java/data/Validation.java
@@ -0,0 +1,53 @@
+package data;
+
+import static gui.ValidationUtil.*;
+
+import java.util.ArrayList;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.StringUtils;
+
+import gui.Messages;
+import gui.ValidationUtil;
+
+public class Validation {
+
+	public static String validateForStringLevel(Filter filter) {
+		ArrayList<String> errors = new ArrayList<>();
+
+		// should not be null, error if null, because init failed
+		if (filter.getNgramValue() == null) {
+			errors.add(Messages.MISSING_NGRAM_LEVEL);
+		}
+
+		// should not be null, error if null, because init failed
+		if (filter.getCalculateFor() == null) {
+			errors.add(Messages.MISSING_CALCULATE_FOR);
+		}
+
+		if (filter.getSkipValue() == null) {
+			filter.setSkipValue(0);
+		}
+
+		if (filter.getNgramValue() != null && ValidationUtil.isEmpty(filter.getMsd()) &&
+				(filter.getMsd().size() != filter.getNgramValue())) {
+			if (!(filter.getMsd().size() == 1 && filter.getNgramValue() == 0) && !ValidationUtil.isEmpty(filter.getMsd()))
+				errors.add(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES);
+		}
+
+		Integer ngramValue = filter.getNgramValue();
+		ArrayList<Pattern> msd = filter.getMsd();
+
+		if (ngramValue > 0 && !ValidationUtil.isEmpty(msd) && ngramValue != msd.size()) {
+			errors.add(String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, ngramValue, msd.size()));
+		}
+
+		if (filter.getNgramValue() != null && filter.getNgramValue() == 0 && isEmpty(filter.getStringLength())) {
+			// if count letters, make sure that the length is given
+			// TODO: check that words we're adding in xml reader are longer than this value
+			errors.add(Messages.MISSING_STRING_LENGTH);
+		}
+
+		return isEmpty(errors) ? null : StringUtils.join(errors, ", \n");
+	}
+}
--- a/src/main/java/data/Word.java
+++ b/src/main/java/data/Word.java
@@ -0,0 +1,141 @@
+package data;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.HashSet;
+
+import org.apache.commons.lang3.StringUtils;
+
+import data.Enums.Msd;
+import gui.ValidationUtil;
+
+public class Word implements Serializable {
+	public static final char PAD_CHARACTER = '-';
+
+	private String word;
+	private String lemma;
+	private String msd;
+	private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));
+
+	/**
+	 * Possible values:
+	 * <p>
+	 * <ul>
+	 * <li>S = samostalnik</li>
+	 * <li>G = glagol</li>
+	 * <li>P = pridevnik</li>
+	 * <li>R = prislov</li>
+	 * <li>Z = zaimek</li>
+	 * <li>K = števnik</li>
+	 * <li>D = predlog</li>
+	 * <li>V = veznik</li>
+	 * <li>L = členek</li>
+	 * <li>M = medmet</li>
+	 * <li>O = okrajšava</li>
+	 * <li>N = neuvrščeno</li>
+	 * </ul>
+	 */
+	//private char besedna_vrsta;
+	public Word(String word, String lemma, String msd) {
+		this.lemma = lemma;
+		this.msd = normalizeMsd(msd);
+
+		// veliko zacetnico ohranimo samo za lastna imena
+		if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
+				&& this.msd.length() >= 2
+				&& this.msd.charAt(1) == 'l')) {
+			this.word = word.toLowerCase();
+		} else {
+			this.word = word;
+		}
+	}
+
+	public Word() {
+	}
+
+	/**
+	 * Appends a number of '-' to msds which are not properly sized.
+	 * E.g. nouns should have 5 attributes, yet the last one isn't always defined (Somei vs. Sometd)
+	 *
+	 * @param msdInput
+	 *
+	 * @return
+	 */
+	private String normalizeMsd(String msdInput) {
+		if (ValidationUtil.isEmpty(msdInput)) {
+			return "";
+		} else {
+			return StringUtils.rightPad(msdInput, Msd.getMsdLengthForType(msdInput), PAD_CHARACTER);
+		}
+	}
+
+	public Word(String word) {
+		this.word = word;
+	}
+
+	public String getWord() {
+		return word;
+	}
+
+	public String getCVVWord() {
+		return covertToCvv(word);
+	}
+
+	public String getCVVLemma() {
+		return covertToCvv(lemma);
+	}
+
+	private String covertToCvv(String s) {
+		char[] StringCA = s.toCharArray();
+
+		for (int i = 0; i < StringCA.length; i++) {
+			StringCA[i] = VOWELS.contains(StringCA[i]) ? 'V' : 'C';
+		}
+
+		return new String(StringCA);
+	}
+
+	public void setWord(String word) {
+		this.word = word;
+	}
+
+	public String getLemma() {
+		return lemma;
+	}
+
+	public void setLemma(String lemma) {
+		this.lemma = lemma;
+	}
+
+	public String getMsd() {
+		return msd;
+	}
+
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+
+		sb.append("beseda:\t")
+				.append(getWord())
+				.append("\n")
+				.append("lema:\t")
+				.append(getLemma())
+				.append("\n")
+				.append("msd:\t")
+				.append(getMsd())
+				.append("\n");
+
+		return sb.toString();
+	}
+
+	public String getForCf(CalculateFor calculateFor, boolean cvv) {
+		String returnValue = "";
+
+		if (cvv) {
+			returnValue = calculateFor == CalculateFor.WORD ? getCVVWord() : getCVVLemma();
+		} else {
+			returnValue = calculateFor == CalculateFor.WORD ? getWord() : getLemma();
+		}
+
+		return returnValue;
+	}
+}
--- a/src/main/java/gui/CharacterAnalysisTab.java
+++ b/src/main/java/gui/CharacterAnalysisTab.java
@@ -0,0 +1,454 @@
+package gui;
+
+import data.*;
+import javafx.application.HostServices;
+import javafx.beans.value.ChangeListener;
+import javafx.beans.value.ObservableValue;
+import javafx.collections.FXCollections;
+import javafx.collections.ListChangeListener;
+import javafx.collections.ObservableList;
+import javafx.concurrent.Task;
+import javafx.fxml.FXML;
+import javafx.scene.control.*;
+import javafx.scene.layout.Pane;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.controlsfx.control.CheckComboBox;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.util.*;
+import java.util.regex.Pattern;
+
+import static alg.XML_processing.readXML;
+import static gui.GUIController.showAlert;
+import static gui.Messages.*;
+
+@SuppressWarnings("Duplicates")
+public class CharacterAnalysisTab {
+	public final static Logger logger = LogManager.getLogger(CharacterAnalysisTab.class);
+
+	@FXML
+	public Label selectedFiltersLabel;
+	@FXML
+	public Label solarFilters;
+
+	@FXML
+	private TextField msdTF;
+	private ArrayList<Pattern> msd;
+	private ArrayList<String> msdStrings;
+
+	@FXML
+	private CheckComboBox<String> taxonomyCCB;
+	private ArrayList<String> taxonomy;
+
+	@FXML
+	private CheckBox calculatecvvCB;
+	private boolean calculateCvv;
+
+	@FXML
+	private TextField stringLengthTF;
+	private Integer stringLength;
+
+	@FXML
+	private ToggleGroup calculateForRB;
+	private  CalculateFor calculateFor;
+
+	@FXML
+	private RadioButton lemmaRB;
+
+	@FXML
+	private RadioButton varietyRB;
+
+	@FXML
+	private Pane paneLetters;
+
+	@FXML
+	private Button computeNgramsB;
+
+	@FXML
+	public ProgressBar ngramProgressBar;
+	@FXML
+	public Label progressLabel;
+
+	@FXML
+	private Hyperlink helpH;
+
+	private enum MODE {
+		LETTER
+	}
+
+	private MODE currentMode;
+
+	private Corpus corpus;
+	private HashMap<String, HashSet<String>> solarFiltersMap;
+	private Filter filter;
+	private boolean useDb;
+	private HostServices hostService;
+
+	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("različnica", "lema");
+	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
+
+
+	// TODO: pass observables for taxonomy based on header scan
+	// after header scan
+	private ObservableList<String> taxonomyCCBValues;
+	private CorpusType currentCorpusType;
+
+	public void init() {
+		currentMode = MODE.LETTER;
+		toggleMode(currentMode);
+
+		calculateForRB.selectedToggleProperty().addListener(new ChangeListener<Toggle>() {
+			@Override
+			public void changed(ObservableValue<? extends Toggle> observable, Toggle oldValue, Toggle newValue) {
+				//logger.info("calculateForRB:", newValue.toString());
+				RadioButton chk = (RadioButton)newValue.getToggleGroup().getSelectedToggle(); // Cast object to radio button
+				calculateFor = CalculateFor.factory(chk.getText());
+				logger.info("calculateForRB:", chk.getText());
+				//System.out.println("Selected Radio Button - "+chk.getText());
+			}
+		});
+
+		// msd
+		msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
+			if (!newValue) {
+				// focus lost
+				String value = msdTF.getText();
+				logger.info("msdTf: ", value);
+
+				if (!ValidationUtil.isEmpty(value)) {
+					ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
+
+					int nOfRequiredMsdTokens = 1;
+					if (msdTmp.size() != nOfRequiredMsdTokens) {
+						String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
+						logAlert(msg);
+						showAlert(Alert.AlertType.ERROR, msg);
+					}
+					msd = new ArrayList<>();
+					msdStrings = new ArrayList<>();
+					for (String msdToken : msdTmp) {
+						msd.add(Pattern.compile(msdToken));
+						msdStrings.add(msdToken);
+					}
+					logger.info(String.format("msd accepted (%d)", msd.size()));
+
+				} else if (!ValidationUtil.isEmpty(newValue)) {
+					msd = new ArrayList<>();
+					msdStrings = new ArrayList<>();
+				}
+			}
+		});
+
+		msdTF.setText("");
+		msd = new ArrayList<>();
+
+		// taxonomy
+		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+			taxonomyCCB.getItems().removeAll();
+			taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
+			taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+				taxonomy = new ArrayList<>();
+				ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+				taxonomy.addAll(checkedItems);
+				logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+			});
+			taxonomyCCB.getCheckModel().clearChecks();
+		} else {
+			taxonomyCCB.setDisable(true);
+		}
+
+		// cvv
+		calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
+			calculateCvv = newValue;
+			logger.info("calculate cvv: " + calculateCvv);
+		});
+
+
+		// string length
+		stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
+			if (!newValue) {
+				// focus lost
+				String value = stringLengthTF.getText();
+				if (!ValidationUtil.isEmpty(value)) {
+					if (!ValidationUtil.isNumber(value)) {
+						logAlert("stringlengthTf: " + WARNING_ONLY_NUMBERS_ALLOWED);
+						GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
+					}
+					stringLength = Integer.parseInt(value);
+				} else {
+					GUIController.showAlert(Alert.AlertType.ERROR, WARNING_MISSING_STRING_LENGTH);
+					stringLengthTF.setText("1");
+					logAlert(WARNING_MISSING_STRING_LENGTH);
+				}
+			}
+		});
+
+		computeNgramsB.setOnAction(e -> {
+			compute();
+			logger.info("compute button");
+		});
+
+        helpH.setOnAction(e -> openHelpWebsite());
+	}
+
+	/**
+	 * case a: values for combo boxes can change after a corpus change
+	 * <ul>
+	 * <li>different corpus type - reset all fields so no old values remain</li>
+	 * <li>same corpus type, different subset - keep</li>
+	 * </ul>
+	 * <p>
+	 * case b: values for combo boxes can change after a header scan
+	 * <ul>
+	 * <li>at first, fields are populated by corpus type defaults</li>
+	 * <li>after, with gathered data</li>
+	 * </ul>
+	 * <p></p>
+	 * ngrams: 1
+	 * calculateFor: word
+	 * msd:
+	 * taxonomy:
+	 * skip: 0
+	 * iscvv: false
+	 * string length: 1
+	 */
+	public void populateFields() {
+		// corpus changed if: current one is null (this is first run of the app)
+		// or if currentCorpus != gui's corpus
+		boolean corpusChanged = currentCorpusType == null
+				|| currentCorpusType != corpus.getCorpusType();
+
+		// TODO: check for GOS, GIGAFIDA, SOLAR...
+		// refresh and:
+		// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
+		if (calculateFor == null) {
+			calculateForRB.selectToggle(lemmaRB);
+			calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString());
+		}
+
+		if (!filter.hasMsd()) {
+			// if current corpus doesn't have msd data, disable this field
+			msd = new ArrayList<>();
+			msdTF.setText("");
+			msdTF.setDisable(true);
+			logger.info("no msd data");
+		} else {
+			if (ValidationUtil.isEmpty(msd)
+					|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
+				// msd has not been set previously
+				// or msd has been set but the corpus changed -> reset
+				msd = new ArrayList<>();
+				msdTF.setText("");
+				msdTF.setDisable(false);
+				logger.info("msd reset");
+			} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
+				// if msd has been set, but corpus type remained the same, we can keep any set msd value
+				msdTF.setText(StringUtils.join(msdStrings, " "));
+				msdTF.setDisable(false);
+				logger.info("msd kept");
+			}
+		}
+
+		// TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
+
+		// keep calculateCvv
+		calculatecvvCB.setSelected(calculateCvv);
+
+		// keep string length if set
+		if (stringLength != null) {
+			stringLengthTF.setText(String.valueOf(stringLength));
+		} else {
+			stringLengthTF.setText("1");
+			stringLength = 1;
+		}
+
+		// TODO: trigger on rescan
+		if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
+			// user changed corpus (by type) or by selection & triggered a rescan of headers
+			// see if we read taxonomy from headers, otherwise use default values for given corpus
+			ObservableList<String> tax = corpus.getTaxonomy();
+			taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
+
+			currentCorpusType = corpus.getCorpusType();
+			// setTaxonomyIsDirty(false);
+		} else {
+
+		}
+
+		// see if we read taxonomy from headers, otherwise use default values for given corpus
+		ObservableList<String> tax = corpus.getTaxonomy();
+		taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
+		taxonomyCCB.getItems().addAll(taxonomyCCBValues);
+
+	}
+
+	/**
+	 * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
+	 * sets combobox values to what is applicable ...
+	 *
+	 * @param mode
+	 */
+	public void toggleMode(MODE mode) {
+		if (mode == null) {
+			mode = currentMode;
+		}
+
+		logger.info("mode: ", mode.toString());
+
+		if (mode == MODE.LETTER) {
+			paneLetters.setVisible(true);
+
+			// populate with default cvv length value
+			if (stringLength == null) {
+				stringLengthTF.setText("1");
+				stringLength = 1;
+			} else {
+				stringLengthTF.setText(String.valueOf(stringLength));
+			}
+
+			// if calculateFor was selected for something other than a word or a lemma -> reset
+			if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
+				// if the user selected something else before selecting ngram for letters, reset that choice
+				calculateFor = CalculateFor.LEMMA;
+				calculateForRB.selectToggle(lemmaRB);
+			}
+		}
+
+		// override if orth mode, allow only word
+		if (corpus.isGosOrthMode()) {
+			// TODO change to
+			varietyRB.setDisable(true);
+			msdTF.setDisable(true);
+		} else {
+			msdTF.setDisable(false);
+			varietyRB.setDisable(false);
+		}
+	}
+
+	private void compute() {
+		Filter filter = new Filter();
+		filter.setNgramValue(0);
+		filter.setCalculateFor(calculateFor);
+		filter.setMsd(msd);
+		filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+		filter.setAl(AnalysisLevel.STRING_LEVEL);
+		filter.setSkipValue(0);
+		filter.setIsCvv(calculateCvv);
+		filter.setSolarFilters(solarFiltersMap);
+		filter.setStringLength(stringLength);
+
+		String message = Validation.validateForStringLevel(filter);
+		if (message == null) {
+			// no errors
+			logger.info("Executing: ", filter.toString());
+			StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
+			execute(statistic);
+		} else {
+			logAlert(message);
+			showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
+		}
+	}
+
+	private void openHelpWebsite(){
+		hostService.showDocument(Messages.HELP_URL);
+	}
+
+	private void logAlert(String alert) {
+		logger.info("alert: " + alert);
+	}
+
+	public Corpus getCorpus() {
+		return corpus;
+	}
+
+	public void setCorpus(Corpus corpus) {
+		this.corpus = corpus;
+
+		if (corpus.getCorpusType() != CorpusType.SOLAR) {
+			setSelectedFiltersLabel(null);
+		} else {
+			setSelectedFiltersLabel("/");
+		}
+	}
+
+	public void setSelectedFiltersLabel(String content) {
+		if (content != null) {
+			solarFilters.setVisible(true);
+			selectedFiltersLabel.setVisible(true);
+			selectedFiltersLabel.setText(content);
+		} else {
+			solarFilters.setVisible(false);
+			selectedFiltersLabel.setVisible(false);
+		}
+	}
+
+	private void execute(StatisticsNew statistic) {
+		logger.info("Started execution: ", statistic.getFilter());
+
+		Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
+		boolean corpusIsSplit = corpusFiles.size() > 1;
+
+		final Task<Void> task = new Task<Void>() {
+			@SuppressWarnings("Duplicates")
+			@Override
+			protected Void call() throws Exception {
+				long i = 0;
+				for (File f : corpusFiles) {
+					readXML(f.toString(), statistic);
+					i++;
+					this.updateProgress(i, corpusFiles.size());
+					this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
+				}
+
+				return null;
+			}
+		};
+
+		ngramProgressBar.progressProperty().bind(task.progressProperty());
+		progressLabel.textProperty().bind(task.messageProperty());
+
+		task.setOnSucceeded(e -> {
+			try {
+				boolean successullySaved = statistic.saveResultToDisk();
+				if (successullySaved) {
+					showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+				} else {
+					showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+				}
+			} catch (UnsupportedEncodingException e1) {
+				showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
+				logger.error("Error while saving", e1);
+			}
+
+			ngramProgressBar.progressProperty().unbind();
+			ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+			progressLabel.textProperty().unbind();
+			progressLabel.setText("");
+		});
+
+		task.setOnFailed(e -> {
+			showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
+			logger.error("Error while executing", e);
+			ngramProgressBar.progressProperty().unbind();
+			ngramProgressBar.setProgress(0.0);
+			ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
+			progressLabel.textProperty().unbind();
+			progressLabel.setText("");
+		});
+
+		final Thread thread = new Thread(task, "task");
+		thread.setDaemon(true);
+		thread.start();
+	}
+
+	public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
+		this.solarFiltersMap = solarFiltersMap;
+	}
+
+	public void setHostServices(HostServices hostServices){
+		this.hostService = hostServices;
+	}
+}
--- a/src/main/java/gui/CorpusTab.java
+++ b/src/main/java/gui/CorpusTab.java
@@ -0,0 +1,517 @@
+package gui;
+
+import static data.CorpusType.*;
+import static gui.GUIController.*;
+import static gui.Messages.*;
+import static util.Util.*;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOCase;
+import org.apache.commons.io.filefilter.FileFilterUtils;
+import org.apache.commons.io.filefilter.TrueFileFilter;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import alg.XML_processing;
+import data.Corpus;
+import data.CorpusType;
+import data.Enums.solar.SolarFilters;
+import data.Tax;
+import javafx.collections.ObservableList;
+import javafx.concurrent.Task;
+import javafx.fxml.FXML;
+import javafx.scene.control.*;
+import javafx.scene.layout.Pane;
+import javafx.stage.DirectoryChooser;
+import javafx.stage.Stage;
+import javafx.application.HostServices;
+
+public class CorpusTab {
+	public final static Logger logger = LogManager.getLogger(CorpusTab.class);
+	public Pane setCorpusWrapperP;
+
+	private Stage stage;
+
+	@FXML
+	private Button chooseCorpusLocationB;
+	private File chosenCorpusLocation;
+
+	@FXML
+	private CheckBox readHeaderInfoChB;
+	private boolean readHeaderInfo;
+
+	@FXML
+	private CheckBox gosUseOrthChB;
+	private boolean gosUseOrth;
+
+	@FXML
+	private Button chooseResultsLocationB;
+
+	@FXML
+	private Label chooseCorpusL;
+	private String chooseCorpusLabelContent;
+
+	@FXML
+	private Label chooseResultsL;
+	private String chooseResultsLabelContent;
+
+	@FXML
+	private ProgressIndicator locationScanPI;
+
+	@FXML
+	private Hyperlink helpH;
+
+	// *** shared ***
+	private Corpus corpus;
+	private CorpusType corpusType;
+
+	// tabs - used to enable/disable
+	private Tab stringLevelTabNew2;
+	private Tab oneWordAnalysisTab;
+	private Tab characterLevelTab;
+	private Tab wordFormationTab;
+	private Tab wordLevelTab;
+	private Tab filterTab;
+	private TabPane tabPane;
+	private StringAnalysisTabNew2 satNew2Controller;
+	private OneWordAnalysisTab oneWordTabController;
+	private CharacterAnalysisTab catController;
+	private FiltersForSolar ffsController;
+	//private WordFormationTab wfController;
+	private WordLevelTab wlController;
+	private HostServices hostService;
+
+
+	public void initialize() {
+		stage = new Stage();
+
+		// add listeners
+		chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation());
+		chooseCorpusLocationB.setTooltip(new Tooltip(TOOLTIP_chooseCorpusLocationB));
+		helpH.setOnAction(e -> openHelpWebsite());
+
+		readHeaderInfoChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
+			readHeaderInfo = newValue;
+			logger.info("read headers: ", readHeaderInfo);
+		});
+		readHeaderInfoChB.setTooltip(new Tooltip(TOOLTIP_readHeaderInfoChB));
+
+		gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
+			gosUseOrth = newValue;
+			corpus.setGosOrthMode(gosUseOrth);
+			wordFormationTab.setDisable(gosUseOrth);
+			satNew2Controller.toggleMode(null);
+			oneWordTabController.toggleMode(null);
+			catController.toggleMode(null);
+
+			logger.info("gosUseOrth: ", gosUseOrth);
+		});
+
+		chooseResultsLocationB.setOnAction(e -> chooseResultsLocation(null));
+
+		// set labels and toggle visibility
+		toggleGosChBVisibility();
+
+		chooseCorpusLabelContent = Messages.LABEL_CORPUS_LOCATION_NOT_SET;
+		chooseCorpusL.setText(chooseCorpusLabelContent);
+
+		chooseResultsLabelContent = Messages.LABEL_RESULTS_LOCATION_NOT_SET;
+		chooseResultsL.setText(chooseResultsLabelContent);
+
+		togglePiAndSetCorpusWrapper(false);
+	}
+
+	private void togglePiAndSetCorpusWrapper(boolean piIsActive) {
+		locationScanPI.setVisible(piIsActive);
+		setCorpusWrapperP.setLayoutX(piIsActive ? 100.0 : 10.0);
+	}
+
+	private void openHelpWebsite(){
+		hostService.showDocument(Messages.HELP_URL);
+	}
+
+	/**
+	 * In order for a directory to pass as a valid corpus location, following criteria has to be met:
+	 * <ul>
+	 * <li>it can't be null</li>
+	 * <li>it has to be readable</li>
+	 * <li>it has to contain xml files</li>
+	 * <li>xml files have to contain valid headers from which we can infer the corpus type</li>
+	 * <li>corpus type must be one of the expected corpus types - as noted in the @see data.CorpusType.class	</li>
+	 * </ul>
+	 * <p>
+	 * Additionally, if the user checks to read taxonomy/filters from the corpus files, that read
+	 * has to produce a non-empty list results list
+	 */
+	private void chooseCorpusLocation() {
+		File selectedDirectory = directoryChooser();
+
+		if (selectedDirectory != null && ValidationUtil.isReadableDirectory(selectedDirectory)) {
+			logger.info("selected corpus dir: ", selectedDirectory.getAbsolutePath());
+
+			// scan for xml files
+			Collection<File> corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("xml", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
+
+			// make sure there are corpus files in selected directory or notify the user about it
+			if (corpusFiles.size() == 0) {
+				logger.info("alert: ", WARNING_CORPUS_NOT_FOUND);
+				showAlert(Alert.AlertType.ERROR, WARNING_CORPUS_NOT_FOUND, null);
+			} else {
+				String chooseCorpusLabelContentTmp = detectCorpusType(corpusFiles, selectedDirectory.getAbsolutePath());
+
+				if (chooseCorpusLabelContentTmp == null) {
+					logger.info("alert: ", WARNING_CORPUS_NOT_FOUND);
+					showAlert(Alert.AlertType.ERROR, WARNING_CORPUS_NOT_FOUND, null);
+				} else {
+					initNewCorpus(selectedDirectory, corpusFiles);
+
+					corpus.setChosenCorpusLocation(selectedDirectory);
+					corpus.setDetectedCorpusFiles(corpusFiles);
+					chooseCorpusLabelContent = chooseCorpusLabelContentTmp;
+					logger.info("corpus dir: ", corpus.getChosenCorpusLocation().getAbsolutePath());
+
+					if (readHeaderInfo) {
+						logger.info("reading header info...");
+						readHeaderInfo();
+					} else {
+						setResults();
+
+						setCorpusForAnalysis();
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * If a user selects a valid corpus location, we define a new corpus (so none of the old data gets carried over)
+	 *
+	 * @param selectedDirectory
+	 * @param corpusFiles
+	 */
+	private void initNewCorpus(File selectedDirectory, Collection<File> corpusFiles) {
+		corpus = new Corpus();
+		corpus.setCorpusType(corpusType);
+		corpus.setDetectedCorpusFiles(corpusFiles);
+		corpus.setChosenCorpusLocation(selectedDirectory);
+		chooseResultsLocation(selectedDirectory);
+	}
+
+	private void chooseResultsLocation(File dir) {
+		// results location can be set either to default value (after selecting valid corpus location) - dir attribute
+		// or to a dir picked via directoryChooser (when dir == null
+		File selectedDirectory = dir == null ? directoryChooser() : dir;
+
+		if (selectedDirectory != null) {
+			String resultsLocationPath = selectedDirectory.getAbsolutePath().concat(File.separator);
+			File chosenResultsLocationTmp = new File(resultsLocationPath);
+
+			if (!ValidationUtil.isValidDirectory(chosenResultsLocationTmp)) {
+				showAlert(Alert.AlertType.ERROR, WARNING_RESULTS_DIR_NOT_VALID);
+				logger.info("alert: ", WARNING_RESULTS_DIR_NOT_VALID);
+			} else {
+				corpus.setChosenResultsLocation(chosenResultsLocationTmp);
+				chooseResultsLabelContent = corpus.getChosenResultsLocation().getAbsolutePath();
+				chooseResultsL.setText(chooseResultsLabelContent);
+				logger.info("results dir: " + chooseResultsLabelContent);
+			}
+		}
+	}
+
+	private void setResults() {
+		// if everything is ok
+		// check and enable checkbox if GOS
+		toggleGosChBVisibility();
+
+		// set default results location
+		String defaultResultsLocationPath = corpus.getChosenCorpusLocation().getAbsolutePath();
+		logger.info("setting default results location to: ", defaultResultsLocationPath);
+
+		chooseCorpusL.setText(chooseCorpusLabelContent);
+	}
+
+	private void readHeaderInfo() {
+		CorpusType corpusType = corpus.getCorpusType();
+		Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
+		togglePiAndSetCorpusWrapper(true);
+		chooseCorpusL.setText(LABEL_SCANNING_CORPUS);
+
+		logger.info("reading header data for ", corpusType.toString());
+
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.GOS || corpusType == CorpusType.CCKRES) {
+			boolean corpusIsSplit = corpusFiles.size() > 1;
+
+			final Task<HashSet<String>> task = new Task<HashSet<String>>() {
+				@Override
+				protected HashSet<String> call() throws Exception {
+					HashSet<String> values = new HashSet<>();
+					long i = 0;
+
+					if (!corpusIsSplit) {
+						updateProgress(-1.0f, -1.0f);
+					}
+
+					for (File file : corpusFiles) {
+						values.addAll((Collection<? extends String>) XML_processing.readXmlHeaderTaxonomyAndFilters(file.getAbsolutePath(), corpusIsSplit, corpusType));
+						i++;
+
+						if (corpusIsSplit) {
+							updateProgress(i, corpusFiles.size());
+						}
+					}
+
+					updateProgress(1.0f, 1.0f);
+					return values;
+				}
+			};
+
+			locationScanPI.progressProperty().bind(task.progressProperty());
+
+			task.setOnSucceeded(e -> {
+				ObservableList<String> readTaxonomy = Tax.getTaxonomyForComboBox(corpusType, task.getValue());
+
+				if (ValidationUtil.isEmpty(readTaxonomy)) {
+					// if no taxonomy found alert the user and keep other tabs disabled
+					logger.info("No taxonomy found in headers.");
+					GUIController.showAlert(Alert.AlertType.ERROR, WARNING_NO_TAXONOMY_FOUND);
+				} else {
+					// set taxonomy, update label
+					corpus.setTaxonomy(readTaxonomy);
+					corpus.setHeaderRead(true);
+					chooseCorpusL.setText(chooseCorpusLabelContent);
+					setResults();
+					setCorpusForAnalysis();
+				}
+
+				togglePiAndSetCorpusWrapper(false);
+
+			});
+
+			task.setOnCancelled(e -> togglePiAndSetCorpusWrapper(false));
+			task.setOnFailed(e -> togglePiAndSetCorpusWrapper(false));
+
+			final Thread thread = new Thread(task, "task");
+			thread.setDaemon(true);
+			thread.start();
+		} else if (corpusType == CorpusType.SOLAR) {
+			// many many fields
+			boolean corpusIsSplit = corpusFiles.size() > 1;
+
+			final Task<HashMap<String, HashSet<String>>> task = new Task<HashMap<String, HashSet<String>>>() {
+				@Override
+				protected HashMap<String, HashSet<String>> call() throws Exception {
+					HashMap<String, HashSet<String>> values = new HashMap<>();
+					long i = 0;
+
+					if (!corpusIsSplit) {
+						updateProgress(-1.0f, -1.0f);
+					}
+
+					for (File file : corpusFiles) {
+						HashMap<String, HashSet<String>> tmpvalues = (HashMap<String, HashSet<String>>) XML_processing.readXmlHeaderTaxonomyAndFilters(file.getAbsolutePath(), corpusIsSplit, corpusType);
+
+						// update final results
+						for (Map.Entry<String, HashSet<String>> entry : tmpvalues.entrySet()) {
+							if (values.containsKey(entry.getKey())) {
+								values.get(entry.getKey()).addAll(entry.getValue());
+							} else {
+								values.put(entry.getKey(), entry.getValue());
+							}
+						}
+
+						i++;
+
+						if (corpusIsSplit) {
+							updateProgress(i, corpusFiles.size());
+						}
+					}
+
+					updateProgress(1.0f, 1.0f);
+					return values;
+				}
+			};
+
+			locationScanPI.progressProperty().bind(task.progressProperty());
+
+			task.setOnSucceeded(e -> {
+				HashMap<String, HashSet<String>> values = task.getValue();
+
+				if (ValidationUtil.isEmpty(values)) {
+					// if no taxonomy found alert the user and keep other tabs disabled
+					logger.info("No solar filters found in headers.");
+					GUIController.showAlert(Alert.AlertType.ERROR, WARNING_NO_SOLAR_FILTERS_FOUND);
+				} else {
+					HashMap<String, ObservableList<String>> filtersForComboBoxes = SolarFilters.getFiltersForComboBoxes(values);
+					// set taxonomy, update label
+					corpus.setSolarFiltersForXML(values);
+					corpus.setSolarFilters(filtersForComboBoxes);
+					corpus.setHeaderRead(true);
+					chooseCorpusL.setText(chooseCorpusLabelContent);
+					setResults();
+					setCorpusForAnalysis();
+				}
+
+				togglePiAndSetCorpusWrapper(false);
+
+			});
+
+			task.setOnCancelled(e -> togglePiAndSetCorpusWrapper(false));
+			task.setOnFailed(e -> togglePiAndSetCorpusWrapper(false));
+
+			final Thread thread = new Thread(task, "task");
+			thread.setDaemon(true);
+			thread.start();
+		}
+
+	}
+
+	private void setCorpusForAnalysis() {
+		if (corpus.validate()) {
+			// new statistic, enable tabs...
+			stringLevelTabNew2.setDisable(false);
+			satNew2Controller.setCorpus(corpus);
+			satNew2Controller.init();
+			oneWordAnalysisTab.setDisable(false);
+			oneWordTabController.setCorpus(corpus);
+			oneWordTabController.init();
+			characterLevelTab.setDisable(false);
+			catController.setCorpus(corpus);
+			catController.init();
+			wordFormationTab.setDisable(false);
+			wordLevelTab.setDisable(false);
+			//wfController.setCorpus(corpus);
+			//wfController.init();
+			wlController.setCorpus(corpus);
+			wlController.init();
+
+			if (corpus.getCorpusType() == CorpusType.SOLAR) {
+				filterTab.setDisable(false);
+				tabPane.getTabs().add(1, filterTab);
+				ffsController.setCorpus(corpus);
+				ffsController.initFilters();
+			} else {
+				filterTab.setDisable(true);
+				tabPane.getTabs().removeAll(filterTab);
+			}
+		} else {
+			GUIController.showAlert(Alert.AlertType.ERROR, corpus.getValidationErrorsToString());
+		}
+	}
+
+	private File directoryChooser() {
+		DirectoryChooser directoryChooser = new DirectoryChooser();
+
+		// open in the folder where the jar is located if possible
+		File workingDir = getWorkingDirectory();
+
+		if (workingDir != null) {
+			directoryChooser.setInitialDirectory(workingDir);
+		}
+
+		return directoryChooser.showDialog(stage);
+	}
+
+	/**
+	 * Hides GOS related checkbox until needed.
+	 */
+	private void toggleGosChBVisibility() {
+		gosUseOrthChB.setVisible(corpus != null && corpus.getCorpusType() != null && corpus.getCorpusType() == CorpusType.GOS);
+	}
+
+	private String detectCorpusType(Collection<File> corpusFiles, String corpusLocation) {
+		// check that we recognize this corpus
+		// read first file only, maybe later do all, if toll on resources is acceptable
+		File f = corpusFiles.iterator().next();
+		String title = XML_processing.readXMLHeaderTag(f.getAbsolutePath(), "title").toLowerCase();
+		String test = CCKRES.getNameLowerCase();
+		String debug = "";
+
+		// check if XML file's title contains any of recognized corpus titles
+		corpusType = null;
+		if (title.contains(SOLAR.getNameLowerCase())) {
+			corpusType = SOLAR;
+		} else if (title.contains(GIGAFIDA.getNameLowerCase())) {
+			corpusType = GIGAFIDA;
+		} else if (title.contains(CCKRES.getNameLowerCase())) {
+			corpusType = CCKRES;
+		} else if (title.contains(GOS.getNameLowerCase())) {
+			corpusType = GOS;
+		}
+
+		if (corpusType == null) {
+			return null;
+		} else {
+			corpus.setCorpusType(corpusType);
+
+			StringBuilder sb = new StringBuilder();
+			sb.append(corpusLocation)
+					.append("\n")
+					.append(String.format(NOTIFICATION_FOUND_X_FILES, corpusFiles.size()))
+					.append("\n")
+					.append(String.format("Korpus: %s", corpusType.toString()));
+
+			String result = sb.toString();
+
+			logger.debug(result);
+			return result;
+		}
+	}
+
+	public Corpus getCorpus() {
+		return corpus;
+	}
+
+	public void setCorpus(Corpus corpus) {
+		this.corpus = corpus;
+	}
+
+	public void setStringLevelTabNew2(Tab stringLevelTabNew2) { this.stringLevelTabNew2 = stringLevelTabNew2; }
+
+	public void setOneWordAnalysisTab(Tab oneWordAnalysisTab) { this.oneWordAnalysisTab = oneWordAnalysisTab; }
+
+	public void setCharacterLevelTab(Tab characterLevelTab) { this.characterLevelTab = characterLevelTab; }
+
+	public void setWordLevelTab(Tab wordLevelTab) {
+		this.wordLevelTab = wordLevelTab;
+	}
+
+	public void setFilterTab(Tab filterTab) {
+		this.filterTab = filterTab;
+	}
+
+	public void setFfsController(FiltersForSolar ffsController) {
+		this.ffsController = ffsController;
+	}
+
+	public void setTabPane(TabPane tabPane) {
+		this.tabPane = tabPane;
+	}
+
+	public void setSatNew2Controller(StringAnalysisTabNew2 satNew2Controller) { this.satNew2Controller = satNew2Controller; }
+
+	public void setOneWordTabController(OneWordAnalysisTab oneWordTabController) { this.oneWordTabController = oneWordTabController; }
+
+	public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
+
+	/*public void setWfController(WordFormationTab wfController) {
+		this.wfController = wfController;
+	}*/
+
+	public void setWlController(WordLevelTab wlController) {
+		this.wlController = wlController;
+	}
+
+	public void setWordFormationTab(Tab wordFormationTab) {
+		this.wordFormationTab = wordFormationTab;
+	}
+
+	public void setHostServices(HostServices hostServices){
+		this.hostService = hostServices;
+	}
+}
--- a/src/main/java/gui/FiltersForSolar.java
+++ b/src/main/java/gui/FiltersForSolar.java
@@ -0,0 +1,187 @@
+package gui;
+
+import static data.Enums.solar.SolarFilters.*;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import javafx.application.HostServices;
+import javafx.scene.control.Hyperlink;
+import org.controlsfx.control.CheckComboBox;
+
+import data.Corpus;
+import javafx.collections.ListChangeListener;
+import javafx.collections.ObservableList;
+import javafx.fxml.FXML;
+import javafx.scene.control.Label;
+import javafx.scene.layout.AnchorPane;
+import util.Util;
+
+public class FiltersForSolar {
+
+	@FXML
+	public AnchorPane solarFiltersTabPane;
+	@FXML
+	public CheckComboBox<String> solarRegijaCCB;
+	@FXML
+	public CheckComboBox<String> solarPredmetCCB;
+	@FXML
+	public CheckComboBox<String> solarRazredCCB;
+	@FXML
+	public CheckComboBox<String> solarLetoCCB;
+	@FXML
+	public CheckComboBox<String> solarSolaCCB;
+	@FXML
+	public CheckComboBox<String> solarVrstaBesedilaCCB;
+	@FXML
+	public Label selectedFiltersLabel;
+	@FXML
+	private Hyperlink helpH;
+
+	private HashMap<String, ObservableList<String>> selectedFilters;
+	private Corpus corpus;
+
+	private StringAnalysisTabNew2 satNew2Controller;
+	private OneWordAnalysisTab oneWordTabController;
+	private CharacterAnalysisTab catController;
+	//private WordFormationTab wfController;
+	private WordLevelTab wlController;
+	private HostServices hostService;
+
+	@SuppressWarnings("unchecked")
+	public void initialize() {
+		selectedFilters = new HashMap<>();
+
+		solarRegijaCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
+			selectedFilters.put(REGIJA, solarRegijaCCB.getCheckModel().getCheckedItems());
+			updateSolarFilterLabel();
+		});
+
+		solarPredmetCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
+			selectedFilters.put(PREDMET, solarPredmetCCB.getCheckModel().getCheckedItems());
+			updateSolarFilterLabel();
+		});
+
+		solarRazredCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
+			selectedFilters.put(RAZRED, solarRazredCCB.getCheckModel().getCheckedItems());
+			updateSolarFilterLabel();
+		});
+
+		solarLetoCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
+			selectedFilters.put(LETO, solarLetoCCB.getCheckModel().getCheckedItems());
+			updateSolarFilterLabel();
+		});
+
+		solarSolaCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
+			selectedFilters.put(SOLA, solarSolaCCB.getCheckModel().getCheckedItems());
+			updateSolarFilterLabel();
+		});
+
+		solarVrstaBesedilaCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
+			selectedFilters.put(TIP, solarVrstaBesedilaCCB.getCheckModel().getCheckedItems());
+			updateSolarFilterLabel();
+		});
+
+        helpH.setOnAction(e -> openHelpWebsite());
+	}
+
+	public void initFilters() {
+		solarRegijaCCB.getItems().removeAll();
+		solarRegijaCCB.getItems().setAll(corpus.getSolarFilters().get(REGIJA));
+		solarRegijaCCB.getItems().sorted();
+		solarPredmetCCB.getItems().removeAll();
+		solarPredmetCCB.getItems().setAll(corpus.getSolarFilters().get(PREDMET));
+		solarPredmetCCB.getItems().sorted();
+		solarRazredCCB.getItems().removeAll();
+		solarRazredCCB.getItems().setAll(corpus.getSolarFilters().get(RAZRED));
+		solarRazredCCB.getItems().sorted();
+		solarLetoCCB.getItems().removeAll();
+		solarLetoCCB.getItems().setAll(corpus.getSolarFilters().get(LETO));
+		solarLetoCCB.getItems().sorted();
+		solarSolaCCB.getItems().removeAll();
+		solarSolaCCB.getItems().setAll(corpus.getSolarFilters().get(SOLA));
+		solarSolaCCB.getItems().sorted();
+		solarVrstaBesedilaCCB.getItems().removeAll();
+		solarVrstaBesedilaCCB.getItems().setAll(corpus.getSolarFilters().get(TIP));
+		solarVrstaBesedilaCCB.getItems().sorted();
+	}
+
+	private void updateSolarFilterLabel() {
+		if (Util.isMapEmpty(selectedFilters)) {
+			setSOlarFIlterLabelText("/");
+		} else {
+			StringBuilder allFilters = new StringBuilder();
+			for (Map.Entry<String, ObservableList<String>> entry : selectedFilters.entrySet()) {
+				ArrayList<String> values = new ArrayList<>(entry.getValue());
+
+				if (!values.isEmpty()) {
+					allFilters.append(entry.getKey())
+							.append(": ");
+
+					for (int i = 0; i < values.size(); i++) {
+						allFilters.append(values.get(i));
+
+						if (i < values.size() - 1) {
+							// so we won't append a comma after the last element
+							allFilters.append(", ");
+						}
+					}
+					allFilters.append("\n\n");
+				}
+			}
+
+			setSOlarFIlterLabelText(allFilters.toString());
+		}
+
+		HashMap<String, HashSet<String>> solarFiltersMap = new HashMap<>();
+		for (Map.Entry<String, ObservableList<String>> e : selectedFilters.entrySet()) {
+			HashSet<String> values = new HashSet<>();
+			values.addAll(e.getValue());
+
+			solarFiltersMap.put(e.getKey(), values);
+		}
+
+		satNew2Controller.setSolarFiltersMap(solarFiltersMap);
+		oneWordTabController.setSolarFiltersMap(solarFiltersMap);
+		catController.setSolarFiltersMap(solarFiltersMap);
+		//wfController.setSolarFiltersMap(solarFiltersMap);
+		wlController.setSolarFiltersMap(solarFiltersMap);
+	}
+
+	private void openHelpWebsite(){
+		hostService.showDocument(Messages.HELP_URL);
+	}
+
+	private void setSOlarFIlterLabelText(String content) {
+		selectedFiltersLabel.setText(content);
+		satNew2Controller.setSelectedFiltersLabel(content);
+		oneWordTabController.setSelectedFiltersLabel(content);
+		catController.setSelectedFiltersLabel(content);
+		//wfController.setSelectedFiltersLabel(content);
+		wlController.setSelectedFiltersLabel(content);
+	}
+
+	public void setCorpus(Corpus corpus) {
+		this.corpus = corpus;
+	}
+
+	public void setSatNew2Controller(StringAnalysisTabNew2 satNew2Controller) { this.satNew2Controller = satNew2Controller; }
+
+	public void setOneWordTabController(OneWordAnalysisTab oneWordTabController) { this.oneWordTabController = oneWordTabController; }
+
+	public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
+
+	/*public void setWfController(WordFormationTab wfController) {
+		this.wfController = wfController;
+	}*/
+
+	public void setWlController(WordLevelTab wlController) {
+		this.wlController = wlController;
+	}
+
+	public void setHostServices(HostServices hostServices){
+		this.hostService = hostServices;
+	}
+}
--- a/src/main/java/gui/GUIController.java
+++ b/src/main/java/gui/GUIController.java
@@ -0,0 +1,150 @@
+package gui;
+
+import java.io.IOException;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.kordamp.ikonli.fontawesome.FontAwesome;
+import org.kordamp.ikonli.javafx.FontIcon;
+
+import data.Corpus;
+import javafx.application.Application;
+import javafx.fxml.FXML;
+import javafx.fxml.FXMLLoader;
+import javafx.scene.Parent;
+import javafx.scene.Scene;
+import javafx.scene.control.Alert;
+import javafx.scene.control.Tab;
+import javafx.scene.control.TabPane;
+import javafx.stage.Stage;
+
+public class GUIController extends Application {
+	public final static Logger logger = LogManager.getLogger(GUIController.class);
+
+	@FXML
+	public Tab StringLevelTabNew2;
+	@FXML
+	public Tab OneWordAnalysisTab;
+	@FXML
+	public Tab CharacterLevelTabNew;
+	@FXML
+	public Tab corpusTab;
+	public TabPane tabPane;
+	@FXML
+	private CharacterAnalysisTab catController;
+	@FXML
+	private static Parent sat;
+	@FXML
+	private StringAnalysisTabNew2 satNew2Controller;
+	@FXML
+	private static Parent satNew2;
+	@FXML
+	private OneWordAnalysisTab oneWordTabController;
+	@FXML
+	private static Parent oneWordTab;
+	@FXML
+	private CorpusTab ctController;
+	@FXML
+	private Parent ct;
+	//@FXML
+	//private WordFormationTab wfController;
+	@FXML
+	private Parent wf;
+	@FXML
+	private WordLevelTab wlController;
+	@FXML
+	private Parent wl;
+	@FXML
+	private FiltersForSolar ffsController;
+	@FXML
+	private Parent ffs;
+	@FXML
+	private SelectedFiltersPane sfpController;
+	@FXML
+	private Parent sfp;
+	@FXML
+	public Tab stringLevelTab;
+	@FXML
+	public Tab wordLevelTab;
+	/*@FXML
+	public Tab wordFormationTab;*/
+
+
+	@FXML
+	public Tab filterTab;
+	public Stage stage;
+
+	private Corpus corpus;
+
+
+	@Override
+	public void start(Stage primaryStage) throws IOException {
+		Parent root = FXMLLoader.load(getClass().getResource("/GUI.fxml"));
+		primaryStage.setTitle("GUI");
+		Scene scene = new Scene(root, 800, 600);
+		// https://github.com/dicolar/jbootx
+		// scene.getStylesheets().add(GUIController.class.getResource("bootstrap3.css").toExternalForm())
+		primaryStage.setScene(scene);
+		stage = primaryStage;
+		primaryStage.show();
+	}
+
+	public static void main(String[] args) {
+		launch(args);
+	}
+
+	public void initialize() {
+		corpus = new Corpus();
+		ctController.setCorpus(corpus);
+		ctController.setFilterTab(filterTab);
+		ctController.setStringLevelTabNew2(StringLevelTabNew2);
+		ctController.setOneWordAnalysisTab(OneWordAnalysisTab);
+		ctController.setCharacterLevelTab(CharacterLevelTabNew);
+		ctController.setSatNew2Controller(satNew2Controller);
+		ctController.setOneWordTabController(oneWordTabController);
+		ctController.setCatController(catController);
+		//ctController.setWfController(wfController);
+		ctController.setWlController(wlController);
+		ctController.setTabPane(tabPane);
+		ctController.setFfsController(ffsController);
+		//ctController.setWordFormationTab(wordFormationTab);
+		ctController.setWordLevelTab(wordLevelTab);
+		ctController.setHostServices(getHostServices());
+
+		satNew2Controller.setCorpus(corpus);
+		satNew2Controller.setHostServices(getHostServices());
+		oneWordTabController.setCorpus(corpus);
+		oneWordTabController.setHostServices(getHostServices());
+		catController.setCorpus(corpus);
+		catController.setHostServices(getHostServices());
+		//wfController.setCorpus(corpus);
+		//wfController.setHostServices(getHostServices());
+		wlController.setCorpus(corpus);
+		wlController.setHostServices(getHostServices());
+		ffsController.setSatNew2Controller(satNew2Controller);
+		ffsController.setOneWordTabController(oneWordTabController);
+		ffsController.setCatController(catController);
+		//ffsController.setWfController(wfController);
+		ffsController.setWlController(wlController);
+		ffsController.setHostServices(getHostServices());
+
+		// set tab icons
+		corpusTab.setGraphic(new FontIcon(FontAwesome.COG));
+		filterTab.setGraphic(new FontIcon(FontAwesome.FILTER));
+
+		// hide filter tab
+		tabPane.getTabs().removeAll(filterTab);
+	}
+
+	static void showAlert(Alert.AlertType alertType, String headerText, String contentText) {
+		Alert alert = new Alert(alertType);
+		alert.setTitle(Messages.windowTitles.get(alertType));
+		alert.setHeaderText(headerText != null ? headerText : "");
+		alert.setContentText(contentText != null ? contentText : "");
+		alert.showAndWait();
+	}
+
+	static void showAlert(Alert.AlertType alertType, String headerText) {
+		showAlert(alertType, headerText, null);
+	}
+}
--- a/src/main/java/gui/Messages.java
+++ b/src/main/java/gui/Messages.java
@@ -0,0 +1,74 @@
+package gui;
+
+import static javafx.scene.control.Alert.AlertType.*;
+
+import java.util.HashMap;
+
+import javafx.scene.control.Alert;
+
+public class Messages {
+
+	// warnings & errors
+	public static final String WARNING_CORPUS_NOT_FOUND = "V izbranem direktoriju ni ustreznih korpusnih datotek.";
+	public static final String WARNING_RESULTS_DIR_NOT_VALID = "Za dostop do izbranega direktorija nimate potrebnih pravic.";
+	public static final String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS = "Izbran nivo ngramov in vpisano št. besed v filtru se ne ujemata.";
+	public static final String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS_INFO = "Izberite drugo število ali popravite filter.";
+	public static final String WARNING_WORD_OR_LEMMA = "Izberite, če želite statistiko izračunati za besede ali leme.";
+	public static final String WARNING_ONLY_NUMBERS_ALLOWED = "Prosim vnesite veljavno število.";
+	public static final String WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES = "Število za ngram (%d) in število msd oznak (%d) se morata ujemati.";
+	public static final String WARNING_MISSING_STRING_LENGTH = "Dolžina niza mora biti večja od 0. Vstavljena je privzeta vrednost (1).";
+	public static final String WARNING_NO_TAXONOMY_FOUND = "Iz korpusnih datotek ni bilo moč razbrati taksonomije. Prosim izberite drugo lokacijo ali korpus.";
+	public static final String WARNING_NO_SOLAR_FILTERS_FOUND = "Iz korpusnih datotek ni bilo moč razbrati filtrov. Prosim izberite drugo lokacijo ali korpus.";
+	public static final String ERROR_WHILE_EXECUTING = "Prišlo je do napake med izvajanjem.";
+	public static final String ERROR_WHILE_SAVING_RESULTS_TO_CSV = "Prišlo je do napake med shranjevanje rezultatov.";
+
+	// missing
+	public static final String MISSING_NGRAM_LEVEL = "N-gram nivo";
+	public static final String MISSING_CALCULATE_FOR = "Izračunaj za";
+	public static final String MISSING_SKIP = "";
+	public static final String MISSING_STRING_LENGTH = "Dolžina niza";
+	public static final String MISMATCHED_STRING_LENGTH_AND_MSD_REGEX = "Neujemajoča dolžina niza in regex filter";
+
+
+	// general notifications - static content/set only once
+	public static final String NOTIFICATION_FOUND_X_FILES = "Št. najdenih datotek: %d";
+	public static final String NOTIFICATION_ANALYSIS_COMPLETED = "Analiza je zaključena, rezultati so shranjeni.";
+	public static final String NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS = "Analiza je zaključena, vendar ni bilo moč izračunati statistike, ki bi ustrezala vsem navedenim pogojem.";
+	public static final String RESULTS_PATH_SET_TO_DEFAULT = "Lokacija za shranjevanje rezultatov je nastavljena na lokacijo korpusa.";
+
+	// ongoing notifications - displayed while processing, dynamically changing
+	public static final String ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y = "Analiziram datoteko %d od %d (%s)";
+
+	// Labels
+	public static final String LABEL_CORPUS_LOCATION_NOT_SET = "Lokacija korpusa ni nastavljena";
+	public static final String LABEL_RESULTS_LOCATION_NOT_SET = "Lokacija za shranjevanje rezultatov ni nastavljena";
+	public static final String LABEL_RESULTS_CORPUS_TYPE_NOT_SET = "Vrsta korpusa ni nastavljena";
+
+	public static final String LABEL_SCANNING_CORPUS = "Iskanje in analiza korpusnih datotek...";
+	public static final String LABEL_SCANNING_SINGLE_FILE_CORPUS = "Analiza vnosa ";
+	public static final String COMPLETED = "končano";
+
+	public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.";
+	public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.";
+
+
+
+	// Not properly to be here. TODO move somewhere else in future
+	public static final String HELP_URL = "http://slovnica.ijs.si/";
+
+	// helper maps
+	/**
+	 * Typical window titles
+	 * ERROR = "Napaka"
+	 * WARNING = "Opozorilo"
+	 * CONFIRMATION = "Potrdilo"
+	 */
+	static HashMap<Alert.AlertType, String> windowTitles = new HashMap<>();
+
+	static {
+		// automatically set window's title
+		windowTitles.put(ERROR, "Napaka");
+		windowTitles.put(WARNING, "Opozorilo");
+		windowTitles.put(CONFIRMATION, "Potrdilo");
+	}
+}
--- a/src/main/java/gui/OneWordAnalysisTab.java
+++ b/src/main/java/gui/OneWordAnalysisTab.java
@@ -0,0 +1,389 @@
+package gui;
+
+import data.*;
+import javafx.application.HostServices;
+import javafx.collections.FXCollections;
+import javafx.collections.ListChangeListener;
+import javafx.collections.ObservableList;
+import javafx.concurrent.Task;
+import javafx.fxml.FXML;
+import javafx.scene.control.*;
+import javafx.scene.layout.Pane;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.controlsfx.control.CheckComboBox;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.util.*;
+import java.util.regex.Pattern;
+
+import static alg.XML_processing.readXML;
+import static gui.GUIController.showAlert;
+import static gui.Messages.*;
+
+@SuppressWarnings("Duplicates")
+public class OneWordAnalysisTab {
+    public final static Logger logger = LogManager.getLogger(OneWordAnalysisTab.class);
+
+    @FXML
+    public Label selectedFiltersLabel;
+    @FXML
+    public Label solarFilters;
+
+    @FXML
+    private TextField msdTF;
+    private ArrayList<Pattern> msd;
+    private ArrayList<String> msdStrings;
+
+    @FXML
+    private CheckComboBox<String> taxonomyCCB;
+    private ArrayList<String> taxonomy;
+
+    @FXML
+    private ComboBox<String> calculateForCB;
+    private CalculateFor calculateFor;
+
+
+    @FXML
+    private Button computeNgramsB;
+
+    @FXML
+    public ProgressBar ngramProgressBar;
+    @FXML
+    public Label progressLabel;
+
+    @FXML
+    private Hyperlink helpH;
+
+    private enum MODE {
+        LETTER,
+        WORD
+    }
+
+    private MODE currentMode;
+
+    private Corpus corpus;
+    private HashMap<String, HashSet<String>> solarFiltersMap;
+    private Filter filter;
+    private boolean useDb;
+    private HostServices hostService;
+
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
+
+
+    // TODO: pass observables for taxonomy based on header scan
+    // after header scan
+    private ObservableList<String> taxonomyCCBValues;
+    private CorpusType currentCorpusType;
+
+    public void init() {
+        currentMode = MODE.WORD;
+        toggleMode(currentMode);
+
+        // calculateForCB
+        calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
+            calculateFor = CalculateFor.factory(newValue);
+            logger.info("calculateForCB:", calculateFor.toString());
+        });
+
+        calculateForCB.getSelectionModel().select(0);
+
+        // msd
+        msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
+            if (!newValue) {
+                // focus lost
+                String value = msdTF.getText();
+                logger.info("msdTf: ", value);
+
+                if (!ValidationUtil.isEmpty(value)) {
+                    ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
+
+                    int nOfRequiredMsdTokens = 1;
+                    if (msdTmp.size() != nOfRequiredMsdTokens) {
+                        String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
+                        logAlert(msg);
+                        showAlert(Alert.AlertType.ERROR, msg);
+                    }
+                    msd = new ArrayList<>();
+                    msdStrings = new ArrayList<>();
+                    for (String msdToken : msdTmp) {
+                        msd.add(Pattern.compile(msdToken));
+                        msdStrings.add(msdToken);
+                    }
+                    logger.info(String.format("msd accepted (%d)", msd.size()));
+
+                } else if (!ValidationUtil.isEmpty(newValue)) {
+                    msd = new ArrayList<>();
+                    msdStrings = new ArrayList<>();
+                }
+            }
+        });
+
+        msdTF.setText("");
+        msd = new ArrayList<>();
+
+        // taxonomy
+        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+            taxonomyCCB.getItems().removeAll();
+            taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
+            taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+                taxonomy = new ArrayList<>();
+                ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+                taxonomy.addAll(checkedItems);
+                logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+            });
+            taxonomyCCB.getCheckModel().clearChecks();
+        } else {
+            taxonomyCCB.setDisable(true);
+        }
+
+        computeNgramsB.setOnAction(e -> {
+            compute();
+            logger.info("compute button");
+        });
+        helpH.setOnAction(e -> openHelpWebsite());
+    }
+
+    /**
+     * case a: values for combo boxes can change after a corpus change
+     * <ul>
+     * <li>different corpus type - reset all fields so no old values remain</li>
+     * <li>same corpus type, different subset - keep</li>
+     * </ul>
+     * <p>
+     * case b: values for combo boxes can change after a header scan
+     * <ul>
+     * <li>at first, fields are populated by corpus type defaults</li>
+     * <li>after, with gathered data</li>
+     * </ul>
+     * <p></p>
+     * ngrams: 1
+     * calculateFor: word
+     * msd:
+     * taxonomy:
+     * skip: 0
+     * iscvv: false
+     * string length: 1
+     */
+    public void populateFields() {
+        // corpus changed if: current one is null (this is first run of the app)
+        // or if currentCorpus != gui's corpus
+        boolean corpusChanged = currentCorpusType == null
+                || currentCorpusType != corpus.getCorpusType();
+
+
+        // TODO: check for GOS, GIGAFIDA, SOLAR...
+        // refresh and:
+        // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
+        if (calculateFor == null) {
+            calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
+            calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
+        }
+
+        if (!filter.hasMsd()) {
+            // if current corpus doesn't have msd data, disable this field
+            msd = new ArrayList<>();
+            msdTF.setText("");
+            msdTF.setDisable(true);
+            logger.info("no msd data");
+        } else {
+            if (ValidationUtil.isEmpty(msd)
+                    || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
+                // msd has not been set previously
+                // or msd has been set but the corpus changed -> reset
+                msd = new ArrayList<>();
+                msdTF.setText("");
+                msdTF.setDisable(false);
+                logger.info("msd reset");
+            } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
+                // if msd has been set, but corpus type remained the same, we can keep any set msd value
+                msdTF.setText(StringUtils.join(msdStrings, " "));
+                msdTF.setDisable(false);
+                logger.info("msd kept");
+            }
+        }
+
+        // TODO: trigger on rescan
+        if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
+            // user changed corpus (by type) or by selection & triggered a rescan of headers
+            // see if we read taxonomy from headers, otherwise use default values for given corpus
+            ObservableList<String> tax = corpus.getTaxonomy();
+            taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
+
+            currentCorpusType = corpus.getCorpusType();
+            // setTaxonomyIsDirty(false);
+        } else {
+
+        }
+
+        // see if we read taxonomy from headers, otherwise use default values for given corpus
+        ObservableList<String> tax = corpus.getTaxonomy();
+        taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
+        taxonomyCCB.getItems().addAll(taxonomyCCBValues);
+
+    }
+
+    /**
+     * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
+     * sets combobox values to what is applicable ...
+     *
+     * @param mode
+     */
+    public void toggleMode(MODE mode) {
+        if (mode == null) {
+            mode = currentMode;
+        }
+
+        logger.info("mode: ", mode.toString());
+
+        if (mode == MODE.WORD) {
+            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
+        } else if (mode == MODE.LETTER) {
+            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS);
+
+
+            // if calculateFor was selected for something other than a word or a lemma -> reset
+            if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
+                // if the user selected something else before selecting ngram for letters, reset that choice
+                calculateFor = CalculateFor.WORD;
+                calculateForCB.getSelectionModel().select("različnica");
+            }
+        }
+
+        // override if orth mode, allow only word
+        if (corpus.isGosOrthMode()) {
+            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
+            msdTF.setDisable(true);
+        } else {
+            msdTF.setDisable(false);
+        }
+    }
+
+    private void compute() {
+        Filter filter = new Filter();
+        filter.setNgramValue(1);
+        filter.setCalculateFor(calculateFor);
+        filter.setMsd(msd);
+        filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+        filter.setAl(AnalysisLevel.STRING_LEVEL);
+        filter.setSkipValue(0);
+        filter.setIsCvv(false);
+        filter.setSolarFilters(solarFiltersMap);
+        filter.setStringLength(1);
+
+        String message = Validation.validateForStringLevel(filter);
+        if (message == null) {
+            // no errors
+            logger.info("Executing: ", filter.toString());
+            StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
+            execute(statistic);
+        } else {
+            logAlert(message);
+            showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
+        }
+    }
+
+    private void logAlert(String alert) {
+        logger.info("alert: " + alert);
+    }
+
+    private void openHelpWebsite(){
+        hostService.showDocument(Messages.HELP_URL);
+    }
+
+    public Corpus getCorpus() {
+        return corpus;
+    }
+
+    public void setCorpus(Corpus corpus) {
+        this.corpus = corpus;
+
+        if (corpus.getCorpusType() != CorpusType.SOLAR) {
+            setSelectedFiltersLabel(null);
+        } else {
+            setSelectedFiltersLabel("/");
+        }
+    }
+
+    public void setSelectedFiltersLabel(String content) {
+        if (content != null) {
+            solarFilters.setVisible(true);
+            selectedFiltersLabel.setVisible(true);
+            selectedFiltersLabel.setText(content);
+        } else {
+            solarFilters.setVisible(false);
+            selectedFiltersLabel.setVisible(false);
+        }
+    }
+
+    private void execute(StatisticsNew statistic) {
+        logger.info("Started execution: ", statistic.getFilter());
+
+        Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
+        boolean corpusIsSplit = corpusFiles.size() > 1;
+
+        final Task<Void> task = new Task<Void>() {
+            @SuppressWarnings("Duplicates")
+            @Override
+            protected Void call() throws Exception {
+                long i = 0;
+                for (File f : corpusFiles) {
+                    readXML(f.toString(), statistic);
+                    i++;
+                    this.updateProgress(i, corpusFiles.size());
+                    this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
+                }
+
+                return null;
+            }
+        };
+
+        ngramProgressBar.progressProperty().bind(task.progressProperty());
+        progressLabel.textProperty().bind(task.messageProperty());
+
+        task.setOnSucceeded(e -> {
+            try {
+                boolean successullySaved = statistic.saveResultToDisk();
+                if (successullySaved) {
+                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+                } else {
+                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+                }
+            } catch (UnsupportedEncodingException e1) {
+                showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
+                logger.error("Error while saving", e1);
+            }
+
+            ngramProgressBar.progressProperty().unbind();
+            ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+            progressLabel.textProperty().unbind();
+            progressLabel.setText("");
+        });
+
+        task.setOnFailed(e -> {
+            showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
+            logger.error("Error while executing", e);
+            ngramProgressBar.progressProperty().unbind();
+            ngramProgressBar.setProgress(0.0);
+            ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
+            progressLabel.textProperty().unbind();
+            progressLabel.setText("");
+        });
+
+        final Thread thread = new Thread(task, "task");
+        thread.setDaemon(true);
+        thread.start();
+    }
+
+    public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
+        this.solarFiltersMap = solarFiltersMap;
+    }
+    public void setHostServices(HostServices hostServices){
+        this.hostService = hostServices;
+    }
+
+}
--- a/src/main/java/gui/SelectedFiltersPane.java
+++ b/src/main/java/gui/SelectedFiltersPane.java
@@ -0,0 +1,18 @@
+package gui;
+
+import javafx.scene.control.Label;
+
+public class SelectedFiltersPane {
+
+
+	public Label selectedFiltersLabel;
+
+	public Label getSelectedFiltersLabel() {
+		return selectedFiltersLabel;
+	}
+
+	public void setSelectedFiltersLabel(String filters) {
+		this.selectedFiltersLabel = new Label(filters);
+		this.selectedFiltersLabel.setText("test?");
+	}
+}
--- a/src/main/java/gui/StringAnalysisTabNew2.java
+++ b/src/main/java/gui/StringAnalysisTabNew2.java
@@ -0,0 +1,511 @@
+package gui;
+
+import static alg.XML_processing.*;
+import static gui.GUIController.*;
+import static gui.Messages.*;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.util.*;
+import java.util.regex.Pattern;
+
+import javafx.application.HostServices;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.controlsfx.control.CheckComboBox;
+
+import data.*;
+import javafx.collections.FXCollections;
+import javafx.collections.ListChangeListener;
+import javafx.collections.ObservableList;
+import javafx.concurrent.Task;
+import javafx.fxml.FXML;
+import javafx.scene.control.*;
+import javafx.scene.layout.Pane;
+
+@SuppressWarnings("Duplicates")
+public class StringAnalysisTabNew2 {
+    public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class);
+
+    @FXML
+    public Label selectedFiltersLabel;
+    @FXML
+    public Label solarFilters;
+
+    @FXML
+    private TextField msdTF;
+    private ArrayList<Pattern> msd;
+    private ArrayList<String> msdStrings;
+
+    @FXML
+    private CheckComboBox<String> taxonomyCCB;
+    private ArrayList<String> taxonomy;
+
+    @FXML
+    private CheckBox calculatecvvCB;
+    private boolean calculateCvv;
+
+    @FXML
+    private TextField stringLengthTF;
+    private Integer stringLength;
+
+    @FXML
+    private ComboBox<String> calculateForCB;
+    private CalculateFor calculateFor;
+
+    @FXML
+    private ComboBox<String> ngramValueCB;
+    private Integer ngramValue;
+
+    @FXML
+    private ComboBox<String> skipValueCB;
+    private Integer skipValue;
+
+    @FXML
+    private Pane paneWords;
+
+    @FXML
+    private Pane paneLetters;
+
+    @FXML
+    private Button computeNgramsB;
+
+    @FXML
+    public ProgressBar ngramProgressBar;
+    @FXML
+    public Label progressLabel;
+
+    @FXML
+    private Hyperlink helpH;
+
+    private enum MODE {
+        LETTER,
+        WORD
+    }
+
+    private MODE currentMode;
+
+    private Corpus corpus;
+    private HashMap<String, HashSet<String>> solarFiltersMap;
+    private Filter filter;
+    private boolean useDb;
+    private HostServices hostService;
+
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
+
+
+    // TODO: pass observables for taxonomy based on header scan
+    // after header scan
+    private ObservableList<String> taxonomyCCBValues;
+    private CorpusType currentCorpusType;
+
+    public void init() {
+        currentMode = MODE.WORD;
+        toggleMode(currentMode);
+
+        // ngram value CB
+        ngramValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
+            if (newValue.equals("nivo črk")) {
+                ngramValue = 0;
+                toggleMode(MODE.LETTER);
+            } else {
+                ngramValue = Integer.valueOf(newValue);
+                toggleMode(MODE.WORD);
+            }
+
+            // skip only on ngrams of more than one word
+            if (ngramValue > 1) {
+                skipValueCB.setDisable(false);
+            } else {
+                skipValueCB.getSelectionModel().select(0);
+                skipValue = 0;
+                skipValueCB.setDisable(true);
+            }
+
+            logger.info("ngramValueCB:", ngramValue);
+        });
+
+        // set first n-gram value to 2 at index 0
+        ngramValueCB.getSelectionModel().select(0); // selected index
+        ngramValue = 2; // actual value at that index
+
+        // calculateForCB
+        calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
+            calculateFor = CalculateFor.factory(newValue);
+            logger.info("calculateForCB:", calculateFor.toString());
+        });
+
+        calculateForCB.getSelectionModel().select(0);
+
+        // msd
+        msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
+            if (!newValue) {
+                // focus lost
+                String value = msdTF.getText();
+                logger.info("msdTf: ", value);
+
+                if (!ValidationUtil.isEmpty(value)) {
+                    ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
+
+                    int nOfRequiredMsdTokens = ngramValue == 0 ? 1 : ngramValue;
+                    if (msdTmp.size() != nOfRequiredMsdTokens) {
+                        String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
+                        logAlert(msg);
+                        showAlert(Alert.AlertType.ERROR, msg);
+                    }
+                    msd = new ArrayList<>();
+                    msdStrings = new ArrayList<>();
+                    for (String msdToken : msdTmp) {
+                        msd.add(Pattern.compile(msdToken));
+                        msdStrings.add(msdToken);
+                    }
+                    logger.info(String.format("msd accepted (%d)", msd.size()));
+
+                } else if (!ValidationUtil.isEmpty(newValue)) {
+                    msd = new ArrayList<>();
+                    msdStrings = new ArrayList<>();
+                }
+            }
+        });
+
+        msdTF.setText("");
+        msd = new ArrayList<>();
+
+        // taxonomy
+        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+            taxonomyCCB.getItems().removeAll();
+            taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
+            taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+                taxonomy = new ArrayList<>();
+                ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+                taxonomy.addAll(checkedItems);
+                logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+            });
+            taxonomyCCB.getCheckModel().clearChecks();
+        } else {
+            taxonomyCCB.setDisable(true);
+        }
+
+        // skip
+        skipValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
+            skipValue = Integer.valueOf(newValue);
+            logger.info("Skip " + skipValue);
+        });
+
+        skipValueCB.getSelectionModel().select(0);
+        skipValue = 0;
+
+        // cvv
+        calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
+            calculateCvv = newValue;
+            logger.info("calculate cvv: " + calculateCvv);
+        });
+
+        calculatecvvCB.setSelected(false);
+
+        // string length
+        stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
+            if (!newValue) {
+                // focus lost
+                String value = stringLengthTF.getText();
+                if (!ValidationUtil.isEmpty(value)) {
+                    if (!ValidationUtil.isNumber(value)) {
+                        logAlert("stringlengthTf: " + WARNING_ONLY_NUMBERS_ALLOWED);
+                        GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
+                    }
+                    stringLength = Integer.parseInt(value);
+                } else {
+                    GUIController.showAlert(Alert.AlertType.ERROR, WARNING_MISSING_STRING_LENGTH);
+                    stringLengthTF.setText("1");
+                    logAlert(WARNING_MISSING_STRING_LENGTH);
+                }
+            }
+        });
+
+        computeNgramsB.setOnAction(e -> {
+            compute();
+            logger.info("compute button");
+        });
+
+        helpH.setOnAction(e -> openHelpWebsite());
+    }
+
+    /**
+     * case a: values for combo boxes can change after a corpus change
+     * <ul>
+     * <li>different corpus type - reset all fields so no old values remain</li>
+     * <li>same corpus type, different subset - keep</li>
+     * </ul>
+     * <p>
+     * case b: values for combo boxes can change after a header scan
+     * <ul>
+     * <li>at first, fields are populated by corpus type defaults</li>
+     * <li>after, with gathered data</li>
+     * </ul>
+     * <p></p>
+     * ngrams: 1
+     * calculateFor: word
+     * msd:
+     * taxonomy:
+     * skip: 0
+     * iscvv: false
+     * string length: 1
+     */
+    public void populateFields() {
+        // corpus changed if: current one is null (this is first run of the app)
+        // or if currentCorpus != gui's corpus
+        boolean corpusChanged = currentCorpusType == null
+                || currentCorpusType != corpus.getCorpusType();
+
+        // keep ngram value if set
+        if (ngramValue == null) {
+            ngramValueCB.getSelectionModel().select("1");
+            ngramValue = 1;
+        }
+
+        // TODO: check for GOS, GIGAFIDA, SOLAR...
+        // refresh and:
+        // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
+        if (calculateFor == null) {
+            calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
+            calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
+        }
+
+        if (!filter.hasMsd()) {
+            // if current corpus doesn't have msd data, disable this field
+            msd = new ArrayList<>();
+            msdTF.setText("");
+            msdTF.setDisable(true);
+            logger.info("no msd data");
+        } else {
+            if (ValidationUtil.isEmpty(msd)
+                    || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
+                // msd has not been set previously
+                // or msd has been set but the corpus changed -> reset
+                msd = new ArrayList<>();
+                msdTF.setText("");
+                msdTF.setDisable(false);
+                logger.info("msd reset");
+            } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
+                // if msd has been set, but corpus type remained the same, we can keep any set msd value
+                msdTF.setText(StringUtils.join(msdStrings, " "));
+                msdTF.setDisable(false);
+                logger.info("msd kept");
+            }
+        }
+
+        // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
+
+        // keep skip value
+        if (skipValue == null) {
+            skipValueCB.getSelectionModel().select("0");
+            skipValue = 0;
+        }
+
+        // keep calculateCvv
+        calculatecvvCB.setSelected(calculateCvv);
+
+        // keep string length if set
+        if (stringLength != null) {
+            stringLengthTF.setText(String.valueOf(stringLength));
+        } else {
+            stringLengthTF.setText("1");
+            stringLength = 1;
+        }
+
+        // TODO: trigger on rescan
+        if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
+            // user changed corpus (by type) or by selection & triggered a rescan of headers
+            // see if we read taxonomy from headers, otherwise use default values for given corpus
+            ObservableList<String> tax = corpus.getTaxonomy();
+            taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
+
+            currentCorpusType = corpus.getCorpusType();
+            // setTaxonomyIsDirty(false);
+        } else {
+
+        }
+
+        // see if we read taxonomy from headers, otherwise use default values for given corpus
+        ObservableList<String> tax = corpus.getTaxonomy();
+        taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
+        taxonomyCCB.getItems().addAll(taxonomyCCBValues);
+
+    }
+
+    /**
+     * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
+     * sets combobox values to what is applicable ...
+     *
+     * @param mode
+     */
+    public void toggleMode(MODE mode) {
+        if (mode == null) {
+            mode = currentMode;
+        }
+
+        logger.info("mode: ", mode.toString());
+
+        if (mode == MODE.WORD) {
+            paneWords.setVisible(true);
+            paneLetters.setVisible(false);
+            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
+        } else if (mode == MODE.LETTER) {
+            paneWords.setVisible(false);
+            paneLetters.setVisible(true);
+            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS);
+
+            // populate with default cvv length value
+            if (stringLength == null) {
+                stringLengthTF.setText("1");
+                stringLength = 1;
+            } else {
+                stringLengthTF.setText(String.valueOf(stringLength));
+            }
+
+            // if calculateFor was selected for something other than a word or a lemma -> reset
+            if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
+                // if the user selected something else before selecting ngram for letters, reset that choice
+                calculateFor = CalculateFor.WORD;
+                calculateForCB.getSelectionModel().select("različnica");
+            }
+        }
+
+        // override if orth mode, allow only word
+        if (corpus.isGosOrthMode()) {
+            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
+            msdTF.setDisable(true);
+        } else {
+            msdTF.setDisable(false);
+        }
+    }
+
+    private void compute() {
+        Filter filter = new Filter();
+        filter.setNgramValue(ngramValue);
+        filter.setCalculateFor(calculateFor);
+        filter.setMsd(msd);
+        filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+        filter.setAl(AnalysisLevel.STRING_LEVEL);
+        filter.setSkipValue(skipValue);
+        filter.setIsCvv(calculateCvv);
+        filter.setSolarFilters(solarFiltersMap);
+
+        if (ngramValue != null && ngramValue == 0) {
+            filter.setStringLength(stringLength);
+        }
+
+        String message = Validation.validateForStringLevel(filter);
+        if (message == null) {
+            // no errors
+            logger.info("Executing: ", filter.toString());
+            StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
+            execute(statistic);
+        } else {
+            logAlert(message);
+            showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
+        }
+    }
+
+    private void logAlert(String alert) {
+        logger.info("alert: " + alert);
+    }
+
+    private void openHelpWebsite(){
+        hostService.showDocument(Messages.HELP_URL);
+    }
+
+    public Corpus getCorpus() {
+        return corpus;
+    }
+
+    public void setCorpus(Corpus corpus) {
+        this.corpus = corpus;
+
+        if (corpus.getCorpusType() != CorpusType.SOLAR) {
+            setSelectedFiltersLabel(null);
+        } else {
+            setSelectedFiltersLabel("/");
+        }
+    }
+
+    public void setSelectedFiltersLabel(String content) {
+        if (content != null) {
+            solarFilters.setVisible(true);
+            selectedFiltersLabel.setVisible(true);
+            selectedFiltersLabel.setText(content);
+        } else {
+            solarFilters.setVisible(false);
+            selectedFiltersLabel.setVisible(false);
+        }
+    }
+
+    private void execute(StatisticsNew statistic) {
+        logger.info("Started execution: ", statistic.getFilter());
+
+        Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
+        boolean corpusIsSplit = corpusFiles.size() > 1;
+
+        final Task<Void> task = new Task<Void>() {
+            @SuppressWarnings("Duplicates")
+            @Override
+            protected Void call() throws Exception {
+                long i = 0;
+                for (File f : corpusFiles) {
+                    readXML(f.toString(), statistic);
+                    i++;
+                    this.updateProgress(i, corpusFiles.size());
+                    this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
+                }
+
+                return null;
+            }
+        };
+
+        ngramProgressBar.progressProperty().bind(task.progressProperty());
+        progressLabel.textProperty().bind(task.messageProperty());
+
+        task.setOnSucceeded(e -> {
+            try {
+                boolean successullySaved = statistic.saveResultToDisk();
+                if (successullySaved) {
+                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+                } else {
+                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+                }
+            } catch (UnsupportedEncodingException e1) {
+                showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
+                logger.error("Error while saving", e1);
+            }
+
+            ngramProgressBar.progressProperty().unbind();
+            ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+            progressLabel.textProperty().unbind();
+            progressLabel.setText("");
+        });
+
+        task.setOnFailed(e -> {
+            showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
+            logger.error("Error while executing", e);
+            ngramProgressBar.progressProperty().unbind();
+            ngramProgressBar.setProgress(0.0);
+            ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
+            progressLabel.textProperty().unbind();
+            progressLabel.setText("");
+        });
+
+        final Thread thread = new Thread(task, "task");
+        thread.setDaemon(true);
+        thread.start();
+    }
+
+    public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
+        this.solarFiltersMap = solarFiltersMap;
+    }
+    public void setHostServices(HostServices hostServices){
+        this.hostService = hostServices;
+    }
+}
--- a/src/main/java/gui/ValidationUtil.java
+++ b/src/main/java/gui/ValidationUtil.java
@@ -0,0 +1,77 @@
+package gui;
+
+import java.io.File;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang3.math.NumberUtils;
+
+public class ValidationUtil {
+
+	public static boolean isNumber(String value) {
+		return NumberUtils.isCreatable(value);
+	}
+
+	/**
+	 * Checks if an object is empty or null. Null part is especially important,
+	 * since Java's built-in isEmpty() methods don't check for this condition
+	 * and throw a nullPointerException as a result.
+	 * <p>
+	 * Supported structures:
+	 * <ul>
+	 * <li>String: empty if null or length is zero</li>
+	 * <li>List: empty if null or size() == 0</li>
+	 * <li>Map: empty if null or if it contains no keys, or if all keys map to an empty value </li>
+	 * </ul>
+	 */
+	public static boolean isEmpty(Object o) {
+		if (o == null) {
+			return true;
+		}
+
+		if (o instanceof String) {
+			if (((String) o).length() == 0) {
+				return true;
+			}
+		}
+
+		if (o instanceof List) {
+			if (((List) o).isEmpty()) {
+				return true;
+			}
+		}
+
+		if (o instanceof Map) {
+			if (((Map) o).keySet().isEmpty()) {
+				return true;
+			} else {
+				for (Object val : ((Map) o).values()) {
+					if (!isEmpty(val)) {
+						// if map contains any value that isn't empty, the map isn't considered empty
+						return false;
+					}
+				}
+			}
+		}
+
+		return false;
+	}
+
+	public static boolean isNotEmpty(Object o) {
+		return !isEmpty(o);
+	}
+
+	/**
+	 * Checks whether a given File is a folder for which we have appropriate permission
+	 */
+	public static boolean isValidDirectory(File f) {
+		return f.isDirectory() && f.canRead() && f.canWrite();
+	}
+
+	/**
+	 * Checks whether a given File is a folder for which we have appropriate permission
+	 */
+	public static boolean isReadableDirectory(File f) {
+		return f.isDirectory() && f.canRead();
+	}
+}
--- a/src/main/java/gui/WordFormationTab.java
+++ b/src/main/java/gui/WordFormationTab.java
@@ -0,0 +1,208 @@
+package gui;
+
+import static alg.XML_processing.*;
+import static gui.GUIController.*;
+import static gui.Messages.*;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import javafx.application.HostServices;
+import javafx.scene.control.*;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.controlsfx.control.CheckComboBox;
+
+import data.*;
+import javafx.collections.ListChangeListener;
+import javafx.collections.ObservableList;
+import javafx.concurrent.Task;
+import javafx.fxml.FXML;
+import javafx.scene.layout.AnchorPane;
+
+@SuppressWarnings("Duplicates")
+public class WordFormationTab {
+	public final static Logger logger = LogManager.getLogger(WordFormationTab.class);
+
+	public AnchorPane wordAnalysisTabPane;
+
+	@FXML
+	public Label selectedFiltersLabel;
+	@FXML
+	public Label solarFilters;
+
+	@FXML
+	private CheckComboBox<String> taxonomyCCB;
+	private ArrayList<String> taxonomy;
+
+	@FXML
+	private Button computeB;
+
+	@FXML
+	public ProgressBar ngramProgressBar;
+	@FXML
+	public Label progressLabel;
+
+	@FXML
+	private Hyperlink helpH;
+
+	private Corpus corpus;
+	private HashMap<String, HashSet<String>> solarFiltersMap;
+	private HostServices hostService;
+
+	// after header scan
+	private ObservableList<String> taxonomyCCBValues;
+	private CorpusType currentCorpusType;
+	private boolean useDb;
+
+
+	public void init() {
+		// taxonomy
+		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+			taxonomyCCB.getItems().removeAll();
+			taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
+			taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+				taxonomy = new ArrayList<>();
+				ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+				taxonomy.addAll(checkedItems);
+				logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+			});
+			taxonomyCCB.getCheckModel().clearChecks();
+		} else {
+			taxonomyCCB.setDisable(true);
+		}
+
+		computeB.setOnAction(e -> {
+			compute();
+			logger.info("compute button");
+		});
+
+        helpH.setOnAction(e -> openHelpWebsite());
+	}
+
+	private void compute() {
+		Filter filter = new Filter();
+		filter.setNgramValue(1);
+		filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
+		filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+		filter.setAl(AnalysisLevel.STRING_LEVEL);
+		filter.setSkipValue(0);
+		filter.setMsd(new ArrayList<>());
+		filter.setIsCvv(false);
+		filter.setSolarFilters(solarFiltersMap);
+
+		String message = Validation.validateForStringLevel(filter);
+		if (message == null) {
+			// no errors
+			logger.info("Executing: ", filter.toString());
+			StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
+			execute(statistic);
+		} else {
+			logAlert(message);
+			showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
+		}
+	}
+
+	private void openHelpWebsite(){
+		hostService.showDocument(Messages.HELP_URL);
+	}
+
+	private void execute(StatisticsNew statistic) {
+		logger.info("Started execution: ", statistic.getFilter());
+
+		Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
+
+		final Task<Void> task = new Task<Void>() {
+			@SuppressWarnings("Duplicates")
+			@Override
+			protected Void call() throws Exception {
+				long i = 0;
+				for (File f : corpusFiles) {
+					readXML(f.toString(), statistic);
+					i++;
+					this.updateProgress(i, corpusFiles.size());
+					this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
+				}
+
+				return null;
+			}
+		};
+
+		ngramProgressBar.progressProperty().bind(task.progressProperty());
+		progressLabel.textProperty().bind(task.messageProperty());
+
+		task.setOnSucceeded(e -> {
+			try {
+				// first, we have to recalculate all occurrences to detailed statistics
+				boolean successullySaved = statistic.recalculateAndSaveResultToDisk();
+
+				if (successullySaved) {
+					showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+				} else {
+					showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+				}
+			} catch (UnsupportedEncodingException e1) {
+				showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
+				logger.error("Error while saving", e1);
+			}
+
+			ngramProgressBar.progressProperty().unbind();
+			ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+			progressLabel.textProperty().unbind();
+			progressLabel.setText("");
+		});
+
+		task.setOnFailed(e -> {
+			showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
+			logger.error("Error while executing", e);
+			ngramProgressBar.progressProperty().unbind();
+			ngramProgressBar.setProgress(0.0);
+			ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
+			progressLabel.textProperty().unbind();
+			progressLabel.setText("");
+		});
+
+		final Thread thread = new Thread(task, "task");
+		thread.setDaemon(true);
+		thread.start();
+	}
+
+	private void logAlert(String alert) {
+		logger.info("alert: " + alert);
+	}
+
+
+	public void setCorpus(Corpus corpus) {
+		this.corpus = corpus;
+
+		if (corpus.getCorpusType() != CorpusType.SOLAR) {
+			setSelectedFiltersLabel(null);
+		} else {
+			setSelectedFiltersLabel("/");
+		}
+	}
+
+	public void setSelectedFiltersLabel(String content) {
+		if (content != null) {
+			solarFilters.setVisible(true);
+			selectedFiltersLabel.setVisible(true);
+			selectedFiltersLabel.setText(content);
+		} else {
+			solarFilters.setVisible(false);
+			selectedFiltersLabel.setVisible(false);
+		}
+	}
+
+	public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
+		this.solarFiltersMap = solarFiltersMap;
+	}
+
+	public void setHostServices(HostServices hostServices){
+		this.hostService = hostServices;
+	}
+}
--- a/src/main/java/gui/WordLevelTab.java
+++ b/src/main/java/gui/WordLevelTab.java
@@ -0,0 +1,207 @@
+package gui;
+
+import static alg.XML_processing.*;
+import static gui.GUIController.*;
+import static gui.Messages.*;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import javafx.application.HostServices;
+import javafx.scene.control.*;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.controlsfx.control.CheckComboBox;
+
+import data.*;
+import javafx.collections.ListChangeListener;
+import javafx.collections.ObservableList;
+import javafx.concurrent.Task;
+import javafx.fxml.FXML;
+import javafx.scene.layout.AnchorPane;
+
+@SuppressWarnings("Duplicates")
+public class WordLevelTab {
+	public final static Logger logger = LogManager.getLogger(WordLevelTab.class);
+
+	public AnchorPane wordLevelAnalysisTabPane;
+
+	@FXML
+	public Label selectedFiltersLabel;
+	@FXML
+	public Label solarFilters;
+
+	@FXML
+	private CheckComboBox<String> taxonomyCCB;
+	private ArrayList<String> taxonomy;
+
+	@FXML
+	private Button computeB;
+
+	@FXML
+	public ProgressBar ngramProgressBar;
+	@FXML
+	public Label progressLabel;
+
+	@FXML
+	private Hyperlink helpH;
+
+	private Corpus corpus;
+	private HashMap<String, HashSet<String>> solarFiltersMap;
+	private HostServices hostService;
+
+	// after header scan
+	private ObservableList<String> taxonomyCCBValues;
+	private CorpusType currentCorpusType;
+	private boolean useDb;
+
+
+	public void init() {
+		// taxonomy
+		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+			taxonomyCCB.getItems().removeAll();
+			taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
+			taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+				taxonomy = new ArrayList<>();
+				ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+				taxonomy.addAll(checkedItems);
+				logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+			});
+			taxonomyCCB.getCheckModel().clearChecks();
+		} else {
+			taxonomyCCB.setDisable(true);
+		}
+
+		computeB.setOnAction(e -> {
+			compute();
+			logger.info("compute button");
+		});
+
+		helpH.setOnAction(e -> openHelpWebsite());
+	}
+
+	private void openHelpWebsite(){
+		hostService.showDocument(Messages.HELP_URL);
+	}
+	private void compute() {
+		Filter filter = new Filter();
+		filter.setNgramValue(1);
+		filter.setCalculateFor(CalculateFor.WORD);
+		filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+		filter.setAl(AnalysisLevel.WORD_LEVEL);
+		filter.setSkipValue(0);
+		filter.setMsd(new ArrayList<>());
+		filter.setIsCvv(false);
+		filter.setSolarFilters(solarFiltersMap);
+
+		String message = Validation.validateForStringLevel(filter);
+		if (message == null) {
+			// no errors
+			logger.info("Executing: ", filter.toString());
+			StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
+			execute(statistic);
+		} else {
+			logAlert(message);
+			showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
+		}
+	}
+
+	private void execute(StatisticsNew statistic) {
+		logger.info("Started execution: ", statistic.getFilter());
+
+		Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
+
+		final Task<Void> task = new Task<Void>() {
+			@SuppressWarnings("Duplicates")
+			@Override
+			protected Void call() throws Exception {
+				long i = 0;
+				for (File f : corpusFiles) {
+					readXML(f.toString(), statistic);
+					i++;
+					this.updateProgress(i, corpusFiles.size());
+					this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
+				}
+
+				return null;
+			}
+		};
+
+		ngramProgressBar.progressProperty().bind(task.progressProperty());
+		progressLabel.textProperty().bind(task.messageProperty());
+
+		task.setOnSucceeded(e -> {
+			try {
+				// first, we have to recalculate all occurrences to detailed statistics
+				boolean successullySaved = statistic.saveResultNestedToDisk();
+
+				if (successullySaved) {
+					showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+				} else {
+					showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+				}
+			} catch (UnsupportedEncodingException e1) {
+				showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
+				logger.error("Error while saving", e1);
+			}
+
+			ngramProgressBar.progressProperty().unbind();
+			ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+			progressLabel.textProperty().unbind();
+			progressLabel.setText("");
+		});
+
+		task.setOnFailed(e -> {
+			showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
+			logger.error("Error while executing", e);
+			ngramProgressBar.progressProperty().unbind();
+			ngramProgressBar.setProgress(0.0);
+			ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
+			progressLabel.textProperty().unbind();
+			progressLabel.setText("");
+		});
+
+		final Thread thread = new Thread(task, "task");
+		thread.setDaemon(true);
+		thread.start();
+	}
+
+	private void logAlert(String alert) {
+		logger.info("alert: " + alert);
+	}
+
+
+	public void setCorpus(Corpus corpus) {
+		this.corpus = corpus;
+
+		if (corpus.getCorpusType() != CorpusType.SOLAR) {
+			setSelectedFiltersLabel(null);
+		} else {
+			setSelectedFiltersLabel("/");
+		}
+	}
+
+	public void setSelectedFiltersLabel(String content) {
+		if (content != null) {
+			solarFilters.setVisible(true);
+			selectedFiltersLabel.setVisible(true);
+			selectedFiltersLabel.setText(content);
+		} else {
+			solarFilters.setVisible(false);
+			selectedFiltersLabel.setVisible(false);
+		}
+	}
+
+	public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
+		this.solarFiltersMap = solarFiltersMap;
+	}
+
+	public void setHostServices(HostServices hostServices){
+		this.hostService = hostServices;
+	}
+}
--- a/src/main/java/manifest/META-INF/MANIFEST.MF
+++ b/src/main/java/manifest/META-INF/MANIFEST.MF
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Main-Class: gui.GUIController
+
--- a/src/main/java/util/ByteUtils.java
+++ b/src/main/java/util/ByteUtils.java
@@ -0,0 +1,25 @@
+package util;
+
+import java.nio.ByteBuffer;
+
+public class ByteUtils {
+
+	/*
+	 * Taken from <a href="https://stackoverflow.com/a/4485196">StackOverflow</a>
+	 */
+	public static byte[] longToBytes(long x) {
+		ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
+		buffer.putLong(x);
+		return buffer.array();
+	}
+
+	/*
+	 * Taken from <a href="https://stackoverflow.com/a/4485196">StackOverflow</a>
+	 */
+	public static long bytesToLong(byte[] bytes) {
+		ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
+		buffer.put(bytes);
+		buffer.flip();//need flip
+		return buffer.getLong();
+	}
+}
--- a/src/main/java/util/Combinations.java
+++ b/src/main/java/util/Combinations.java
@@ -0,0 +1,46 @@
+package util;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.stream.IntStream;
+
+public class Combinations {
+	private static HashSet<HashSet<Integer>> result = new HashSet<>();
+
+
+	/* arr[]  ---> Input Array
+		data[] ---> Temporary array to store current combination
+		start & end ---> Staring and Ending indexes in arr[]
+		index  ---> Current index in data[]
+		r ---> Size of a combination to be printed */
+	static void combinationUtil(int arr[], Integer data[], int start, int end, int index, int combinationLength) {
+		// Current combination is ready to be printed, print it
+		if (index == combinationLength) {
+			result.add(new HashSet<>(Arrays.asList(data)));
+			return;
+		}
+
+		// replace index with all possible elements. The condition
+		// "end-i+1 >= r-index" makes sure that including one element
+		// at index will make a combination with remaining elements
+		// at remaining positions
+		for (int i = start; i <= end && end - i + 1 >= combinationLength - index; i++) {
+			data[index] = arr[i];
+			combinationUtil(arr, data, i + 1, end, index + 1, combinationLength);
+		}
+	}
+
+	public static HashSet<HashSet<Integer>> generateIndices(int maxNOfIndices) {
+		result = new HashSet<>();
+		int[] arr = IntStream.range(1, maxNOfIndices).toArray();
+		for (int i = 1; i < maxNOfIndices - 1; i++) {
+			// A temporary array to store all combination one by one
+			combinationUtil(arr, new Integer[i], 0, arr.length - 1, 0, i);
+		}
+
+		// also add an empty one for X.... (all of this type)
+		result.add(new HashSet<>());
+
+		return result;
+	}
+}
--- a/src/main/java/util/Export.java
+++ b/src/main/java/util/Export.java
@@ -0,0 +1,267 @@
+package util;
+
+import static util.Util.*;
+
+import java.io.*;
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVPrinter;
+import org.apache.commons.lang3.tuple.Pair;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+
+import data.Enums.WordLevelType;
+
+@SuppressWarnings("unchecked")
+public class Export {
+	public static void SetToJSON(Set<Pair<String, Map<String, Long>>> set) {
+		JSONArray wrapper = new JSONArray();
+
+		for (Pair<String, Map<String, Long>> p : set) {
+			JSONArray data_wrapper = new JSONArray();
+			JSONObject metric = new JSONObject();
+
+			String title = p.getLeft();
+			Map<String, Long> map = p.getRight();
+
+			if (map.isEmpty())
+				continue;
+
+			long total = Util.mapSumFrequencies(map);
+
+			for (Map.Entry<String, Long> e : map.entrySet()) {
+				JSONObject data_entry = new JSONObject();
+				data_entry.put("word", e.getKey());
+				data_entry.put("frequency", e.getValue());
+				data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
+
+				data_wrapper.add(data_entry);
+			}
+
+			metric.put("Title", title);
+			metric.put("data", data_wrapper);
+			wrapper.add(metric);
+		}
+
+		try (FileWriter file = new FileWriter("statistics.json")) {
+			file.write(wrapper.toJSONString());
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+
+	public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
+		//Delimiter used in CSV file
+		String NEW_LINE_SEPARATOR = "\n";
+
+		//CSV file header
+		Object[] FILE_HEADER = {"word", "frequency", "percent"};
+
+		String fileName = "";
+
+		for (Pair<String, Map<String, Long>> p : set) {
+			String title = p.getLeft();
+			fileName = title.replace(": ", "-");
+			fileName = fileName.replace(" ", "_").concat(".csv");
+
+			fileName = resultsPath.toString().concat(File.separator).concat(fileName);
+
+			Map<String, Long> map = p.getRight();
+
+			if (map.isEmpty())
+				continue;
+
+			long total = Util.mapSumFrequencies(map);
+
+			OutputStreamWriter fileWriter = null;
+			CSVPrinter csvFilePrinter = null;
+
+			//Create the CSVFormat object with "\n" as a record delimiter
+			CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
+
+			try {
+				//initialize FileWriter object
+				fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
+
+				//initialize CSVPrinter object
+				csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
+
+				// write info block
+				printHeaderInfo(csvFilePrinter, headerInfoBlock);
+
+				//Create CSV file header
+				csvFilePrinter.printRecord(FILE_HEADER);
+
+				for (Map.Entry<String, Long> e : map.entrySet()) {
+					List dataEntry = new ArrayList<>();
+					dataEntry.add(e.getKey());
+					dataEntry.add(e.getValue().toString());
+					dataEntry.add(formatNumberAsPercent((double) e.getValue() / total));
+					csvFilePrinter.printRecord(dataEntry);
+				}
+			} catch (Exception e) {
+				System.out.println("Error in CsvFileWriter!");
+				e.printStackTrace();
+			} finally {
+				try {
+					if (fileWriter != null) {
+						fileWriter.flush();
+						fileWriter.close();
+					}
+					if (csvFilePrinter != null) {
+						csvFilePrinter.close();
+					}
+				} catch (IOException e) {
+					System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
+					e.printStackTrace();
+				}
+			}
+		}
+
+		return fileName;
+	}
+
+	public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
+		//Delimiter used in CSV file
+		String NEW_LINE_SEPARATOR = "\n";
+
+		//CSV file header
+		Object[] FILE_HEADER = {"word", "frequency", "percent"};
+
+		String fileName = "";
+
+		fileName = title.replace(": ", "-");
+		fileName = fileName.replace(" ", "_").concat(".csv");
+
+		fileName = resultsPath.toString().concat(File.separator).concat(fileName);
+
+		OutputStreamWriter fileWriter = null;
+		CSVPrinter csvFilePrinter = null;
+
+		//Create the CSVFormat object with "\n" as a record delimiter
+		CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
+
+		try {
+			//initialize FileWriter object
+			fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
+
+			//initialize CSVPrinter object
+			csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
+
+			// write info block
+			printHeaderInfo(csvFilePrinter, headerInfoBlock);
+
+			//Create CSV file header
+			csvFilePrinter.printRecord(FILE_HEADER);
+
+			for (Object[] resultEntry : result) {
+				List dataEntry = new ArrayList<>();
+				dataEntry.add(resultEntry[0]);
+				dataEntry.add(resultEntry[1]);
+				dataEntry.add(formatNumberAsPercent(resultEntry[2]));
+				csvFilePrinter.printRecord(dataEntry);
+			}
+		} catch (Exception e) {
+			System.out.println("Error in CsvFileWriter!");
+			e.printStackTrace();
+		} finally {
+			try {
+				if (fileWriter != null) {
+					fileWriter.flush();
+					fileWriter.close();
+				}
+				if (csvFilePrinter != null) {
+					csvFilePrinter.close();
+				}
+			} catch (IOException e) {
+				System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
+				e.printStackTrace();
+			}
+		}
+
+		return fileName;
+	}
+
+	public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
+		//Delimiter used in CSV file
+		String NEW_LINE_SEPARATOR = "\n";
+
+		//CSV file header
+		Object[] FILE_HEADER = {"type", "key", "word", "frequency"};
+
+		String fileName = "";
+
+		fileName = title.replace(": ", "-");
+		fileName = fileName.replace(" ", "_").concat(".csv");
+
+		fileName = resultsPath.toString().concat(File.separator).concat(fileName);
+
+		OutputStreamWriter fileWriter = null;
+		CSVPrinter csvFilePrinter = null;
+
+		//Create the CSVFormat object with "\n" as a record delimiter
+		CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
+
+		try {
+			//initialize FileWriter object
+			fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
+
+			//initialize CSVPrinter object
+			csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
+
+			// write info block
+			printHeaderInfo(csvFilePrinter, headerInfoBlock);
+
+			//Create CSV file header
+			csvFilePrinter.printRecord(FILE_HEADER);
+
+			for (Map.Entry<WordLevelType, Map<String, Map<String, Long>>> typeEntry : result.entrySet()) {
+				for (Map.Entry<String, Map<String, Long>> keyWordEntry : typeEntry.getValue().entrySet()) {
+					for (Map.Entry<String, Long> calculationResults : keyWordEntry.getValue().entrySet()) {
+						List values = new ArrayList();
+						values.add(typeEntry.getKey().getName());
+						values.add(keyWordEntry.getKey());
+						values.add(calculationResults.getKey());
+						values.add(calculationResults.getValue());
+						csvFilePrinter.printRecord(values);
+					}
+				}
+			}
+		} catch (Exception e) {
+			System.out.println("Error in CsvFileWriter!");
+			e.printStackTrace();
+		} finally {
+			try {
+				if (fileWriter != null) {
+					fileWriter.flush();
+					fileWriter.close();
+				}
+				if (csvFilePrinter != null) {
+					csvFilePrinter.close();
+				}
+			} catch (IOException e) {
+				System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
+				e.printStackTrace();
+			}
+		}
+
+		return fileName;
+	}
+
+	private static void printHeaderInfo(CSVPrinter csvFilePrinter, LinkedHashMap<String, String> headerInfoBlock) throws IOException {
+		for (Map.Entry<String, String> entry : headerInfoBlock.entrySet()) {
+			List values = new ArrayList();
+			values.add(entry.getKey());
+			values.add(entry.getValue());
+			csvFilePrinter.printRecord(values);
+		}
+
+		// 2 empty lines
+		List values = new ArrayList();
+		csvFilePrinter.printRecord(values);
+		csvFilePrinter.printRecord(values);
+
+	}
+}
--- a/src/main/java/util/Key.java
+++ b/src/main/java/util/Key.java
@@ -0,0 +1,31 @@
+package util;
+
+public class Key /*implements Comparable<Key> */ {
+	// private final String value;
+	//
+	// Key(String value) {
+	// 	this.value = value;
+	// }
+	//
+	// @Override
+	// public int compareTo(Key o) {
+	// 	return Objects.compare(this.value, o.value);
+	// }
+	//
+	// @Override
+	// public boolean equals(Object o) {
+	// 	if (this.equals(o)) {
+	// 		return true;
+	// 	}
+	// 	if (o == null || getClass() != o.getClass()) {
+	// 		return false;
+	// 	}
+	// 	Key key = (Key) o;
+	// 	return Objects.equals(value, key.value);
+	// }
+	//
+	// @Override
+	// public int hashCode() {
+	// 	return 0;
+	// }
+}
--- a/src/main/java/util/TimeWatch.java
+++ b/src/main/java/util/TimeWatch.java
@@ -0,0 +1,63 @@
+package util;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Adapted from http://memorynotfound.com/calculating-elapsed-time-java/
+ */
+public class TimeWatch {
+
+	private long starts;
+
+	private TimeWatch() {
+		reset();
+	}
+
+	public static TimeWatch start() {
+		return new TimeWatch();
+	}
+
+	private TimeWatch reset() {
+		starts = System.nanoTime();
+		return this;
+	}
+
+	private long time() {
+		long ends = System.nanoTime();
+		return ends - starts;
+	}
+
+	private long time(TimeUnit unit) {
+		return unit.convert(time(), TimeUnit.NANOSECONDS);
+	}
+
+	private String toMinuteSeconds() {
+		return String.format("%d min, %d sec", time(TimeUnit.MINUTES),
+				time(TimeUnit.SECONDS) - time(TimeUnit.MINUTES));
+	}
+
+	public String toFullTime() {
+		long hours = time(TimeUnit.HOURS);
+		long minutes = time(TimeUnit.MINUTES) - TimeUnit.HOURS.toMinutes(hours);
+		long seconds = time(TimeUnit.SECONDS) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
+		long milliseconds = time(TimeUnit.MILLISECONDS) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
+
+		return String.format("%d h, %d min, %d s, %d ms", hours, minutes, seconds, milliseconds);
+	}
+
+	public String toString() {
+
+		return "Elapsed Time in nano seconds: ";
+	}
+
+	private void exampleUsage() {
+		TimeWatch watch = TimeWatch.start();
+
+		// do something...
+
+		System.out.println("Elapsed Time custom format: " + watch.toMinuteSeconds());
+		System.out.println("Elapsed Time in seconds: " + watch.time(TimeUnit.SECONDS));
+		System.out.println("Elapsed Time in nano seconds: " + watch.time());
+
+	}
+}
--- a/src/main/java/util/Util.java
+++ b/src/main/java/util/Util.java
@@ -0,0 +1,225 @@
+package util;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.text.MessageFormat;
+import java.util.*;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Stream;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import data.Settings;
+import gui.GUIController;
+import gui.ValidationUtil;
+
+public class Util {
+	public final static Logger logger = LogManager.getLogger(Util.class);
+
+
+	public static String toReadableTime(long time) {
+		long hours = time(TimeUnit.HOURS, time);
+		long minutes = time(TimeUnit.MINUTES, time) - TimeUnit.HOURS.toMinutes(hours);
+		long seconds = time(TimeUnit.SECONDS, time) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
+		long milliseconds = time(TimeUnit.MILLISECONDS, time) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
+		long microseconds = time(TimeUnit.MICROSECONDS, time) - TimeUnit.HOURS.toMicros(hours) - TimeUnit.MINUTES.toMicros(minutes) - TimeUnit.SECONDS.toMicros(seconds) - TimeUnit.MILLISECONDS.toMicros(milliseconds);
+		long nanoseconds = time(TimeUnit.NANOSECONDS, time) - TimeUnit.HOURS.toNanos(hours) - TimeUnit.MINUTES.toNanos(minutes) - TimeUnit.SECONDS.toNanos(seconds) - TimeUnit.MILLISECONDS.toNanos(milliseconds) - TimeUnit.MICROSECONDS.toNanos(microseconds);
+
+		return String.format("%d h, %d min, %d s, %d ms, %d µs, %d ns", hours, minutes, seconds, milliseconds, microseconds, nanoseconds);
+	}
+
+	private static long time(TimeUnit unit, long t) {
+		return unit.convert(t, TimeUnit.NANOSECONDS);
+	}
+
+	/**
+	 * Converts a number to a more readable format.
+	 * 12345 -> 12.345
+	 * 12345,678 -> 12.345,67
+	 *
+	 * @param o byte, double, float, int,long, short
+	 *
+	 * @return number formatted with thousands separator and 2 decimal places (floats)
+	 */
+	private static String formatNumberReadable(Object o) {
+		if (isInstanceOfInteger(o))
+			return String.format("%,d", o);
+		else if (isInstanceOfFloat(o))
+			return String.format("%,.2f", o);
+		else
+			return "- invalid input format -";
+	}
+
+	public static String formatNumberAsPercent(Object o) {
+		return MessageFormat.format("{0,number,#.###%}", o);
+	}
+
+	private static boolean isInstanceOfInteger(Object o) {
+		Set<Class<?>> types = new HashSet<>();
+		types.add(Byte.class);
+		types.add(Short.class);
+		types.add(Integer.class);
+		types.add(Long.class);
+
+		return types.contains(o.getClass());
+	}
+
+	private static boolean isInstanceOfFloat(Object o) {
+		Set<Class<?>> types = new HashSet<>();
+		types.add(Float.class);
+		types.add(Double.class);
+
+		return types.contains(o.getClass());
+	}
+
+	public static <K, V> void printMap(Map<K, V> map) {
+		System.out.println("\nkey: value");
+		map.forEach((k, v) -> System.out.print(String.format("%s:\t %,8d%n", k, v)));
+		System.out.println();
+	}
+
+	/**
+	 * Generic map converter -> since AtomicLongs aren't as comparable.
+	 * Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
+	 */
+	public static <K, V> Map<String, Long> atomicInt2StringAndInt(Map<K, V> map) {
+		Map m = new HashMap<String, Long>();
+
+		for (Map.Entry<K, V> e : map.entrySet()) {
+			m.put(e.getKey().toString(), ((AtomicLong) e.getValue()).longValue());
+		}
+
+		return m;
+	}
+
+	/**
+	 * Sorts a map in a descending order by value.
+	 */
+	public static <K, V extends Comparable<? super V>> Map<K, V> sortByValue(Map<K, V> map, int limit) {
+		/*
+		sorted() in itself is O(1), since it's an intermediate operation that
+		doesn't consume the stream, but simply adds an operation to the pipeline.
+		Once the stream is consumed by a terminal operation, the sort happens and
+		 either
+		- it doesn't do anything (O(1)) because the stream knows that the
+		elements are already sorted (because they come from a SortedSet, for example)
+		- or the stream is not parallel, and it delegates to Arrays.sort() (O(n log n))
+		- or the stream is parallel, and it delegates to Arrays.parallelSort() (O(n log n))
+
+		As of JDK 8, the main sorting algorithm which is also used in standard
+		stream API implementation for sequential sorting is TimSort. Its worst
+		case is O(n log n), but it works incredibly fast (with O(n) and quite
+		small constant) if data is presorted (in forward or reverse direction)
+		or partially presorted (for example, if you concatenate two sorted lists
+		and sort them again).
+		*/
+		// if limit is set to 0 or less, we take that to mean no limit at all
+		if (limit <= 0) {
+			limit = map.size();
+		}
+
+		Map<K, V> result = new LinkedHashMap<>();
+		TimeWatch watch = TimeWatch.start();
+
+		Stream<Map.Entry<K, V>> st = map.entrySet().stream();
+
+		st.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).limit(limit)
+				.forEachOrdered(e -> result.put(e.getKey(), e.getValue()));
+
+		if (Settings.PRINT_LOG) {
+			System.out.println(String.format("Elapsed time for sorting %s items: %s",
+					formatNumberReadable(result.size()),
+					watch.toFullTime()));
+		}
+
+		return result;
+	}
+
+	public static <K, V> void printMap(Map<K, Integer> map, String title, int number_of_words) {
+		System.out.println(String.format("\n%s\n------------\nkey: value\tpercent", title));
+		map.forEach((k, v) ->
+				System.out.println(String.format("%s:\t %s\t %s%%",
+						k,
+						Util.formatNumberReadable(v),
+						Util.formatNumberReadable((double) v / number_of_words * 100))));
+		System.out.println();
+	}
+
+	static long mapSumFrequencies(Map<String, Long> map) {
+		long sum = 0;
+
+		for (long value : map.values()) {
+			sum += value;
+		}
+
+		return sum;
+	}
+
+	/**
+	 * Used for passing optional integer values for sorting.
+	 */
+	public static int getValidInt(int... i) {
+		if (i == null || i.length < 1 || i[0] <= 0) {
+			return 0;
+		} else {
+			return i[0];
+		}
+	}
+
+	/**
+	 * Check whether a map is empty. It also considers an edge case where map's keys are lists to check if those lists are empty.
+	 */
+	public static <K, V> boolean isMapEmpty(Map<K, V> map) {
+		if (map.isEmpty()) {
+			// default
+			return true;
+		}
+
+		// otherwise check if keys map to values that are empty
+		for (V v : map.values()) {
+			// todo: generalize to all collections if/when needed
+			ArrayList<String> vl = new ArrayList((List<String>) v);
+			if (!vl.isEmpty()) {
+				return false;
+			}
+		}
+
+		return true;
+	}
+
+	/**
+	 * Returns the location of the main class if possible, otherwise null
+	 */
+	public static File getWorkingDirectory() {
+		// get location of the currently executing class
+		String path = GUIController.class.getProtectionDomain().getCodeSource().getLocation().getPath();
+
+		logger.info("working dir path: ", path);
+
+		String decodedPath = null;
+		try {
+			decodedPath = URLDecoder.decode(path, "UTF-8");
+		} catch (UnsupportedEncodingException e) {
+			logger.error("decoding: ", e);
+			// e.printStackTrace();
+		}
+
+		if (decodedPath != null) {
+			File workingDirectory = new File(decodedPath);
+
+			// in case it's a file (class is packaged inside a jar), select its parent folder
+			workingDirectory = workingDirectory.isFile() ? workingDirectory.getParentFile() : workingDirectory;
+
+			if (ValidationUtil.isReadableDirectory(workingDirectory)) {
+				logger.info("working dir is ok: ", workingDirectory.getAbsolutePath());
+				return workingDirectory;
+			}
+		}
+
+		logger.info("working dir returing null");
+		return null;
+	}
+}
--- a/src/main/java/util/db/RDB.java
+++ b/src/main/java/util/db/RDB.java
@@ -0,0 +1,132 @@
+package util.db;
+
+import static util.ByteUtils.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.time.LocalDateTime;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.commons.io.FileUtils;
+import org.rocksdb.*;
+
+import util.TimeWatch;
+
+public class RDB {
+
+	private RocksDB db;
+	private String path;
+	private static final String UTF_8 = "UTF-8";
+
+	public RDB() {
+		// different dbs i ncase of concurrent calculations
+		this.path = System.getProperty("java.io.tmpdir")
+				.concat(File.separator)
+				.concat(String.format("corpusAnalyzer_db%d", LocalDateTime.now().toString().hashCode()));
+
+		this.db = createDB();
+	}
+
+
+	private RocksDB createDB() {
+		RocksDB.loadLibrary();
+
+		// the Options class contains a set of configurable DB options
+		// that determines the behaviour of the database.
+		try (final Options options = new Options()) {
+			options.setCreateIfMissing(true);
+
+			// a factory method that returns a RocksDB instance
+			try (final RocksDB rdb = RocksDB.open(options, path)) {
+				if (db != null) {
+					return rdb;
+				} else {
+					this.db = rdb;
+				}
+			}
+		} catch (RocksDBException e) {
+			// do some error handling
+		}
+		return null;
+	}
+
+	public void writeBatch(Map<String, AtomicLong> results) throws UnsupportedEncodingException {
+		RocksDB.loadLibrary();
+
+		// a factory method that returns a RocksDB instance
+		try (final RocksDB rdb = RocksDB.open(new Options(), path)) {
+			final WriteBatch wb = new WriteBatch();
+
+			for (Map.Entry<String, AtomicLong> entry : results.entrySet()) {
+				byte[] key = entry.getKey().getBytes(UTF_8);
+				long resultValue = entry.getValue().longValue();
+
+				try {
+					final byte[] dbValue = rdb.get(key);
+					if (dbValue != null) {
+						// value == null if key does not exist in db.
+						wb.put(key, longToBytes(bytesToLong(dbValue) + resultValue));
+					} else {
+						wb.put(key, longToBytes(entry.getValue().longValue()));
+					}
+				} catch (RocksDBException e) {
+					// TODO: error handling
+				}
+			}
+			TimeWatch watch = TimeWatch.start();
+			rdb.write(new WriteOptions(), wb);
+			System.out.println(String.format("Writing %d entries took: %s", wb.count(), watch.toFullTime()));
+		} catch (RocksDBException e) {
+			// do some error handling
+		}
+	}
+
+	// public byte[] atomicIntToByteArray(final AtomicLong i) {
+	// 	BigInteger bigInt = BigInteger.valueOf(i.intValue());
+	//
+	// 	return bigInt.toByteArray();
+	// }
+
+	public RocksDB getDb() {
+		return db;
+	}
+
+	public Map<String, AtomicLong> getDump() throws UnsupportedEncodingException {
+		Map<String, AtomicLong> dump = new HashMap<>();
+		RocksDB.loadLibrary();
+
+		// the Options class contains a set of configurable DB options
+		// that determines the behaviour of the database.
+		// a factory method that returns a RocksDB instance
+		try (final RocksDB rdb = RocksDB.open(new Options(), path)) {
+			try (RocksIterator it = rdb.newIterator()) {
+				it.seekToFirst();
+				// it.next();
+
+				while (it.isValid()) {
+					byte[] key = it.key();
+					byte[] value = it.value();
+
+					dump.put(new String(key, UTF_8), new AtomicLong(bytesToLong(value)));
+
+					it.next();
+				}
+			}
+		} catch (RocksDBException e) {
+			e.printStackTrace();
+		}
+
+		return dump;
+	}
+
+	public void delete() {
+		try {
+			FileUtils.deleteDirectory(new File(path));
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+}