diff --git a/src/main/java/alg/XML_processing.java b/src/main/java/alg/XML_processing.java
index a60fc3a..c393fad 100755
--- a/src/main/java/alg/XML_processing.java
+++ b/src/main/java/alg/XML_processing.java
@@ -14,6 +14,10 @@ import javax.xml.stream.XMLStreamConstants;
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.events.*;
 
+import gui.I18N;
+import javafx.beans.property.ReadOnlyDoubleProperty;
+import javafx.beans.property.ReadOnlyDoubleWrapper;
+import javafx.concurrent.Task;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.LineIterator;
 import org.apache.logging.log4j.LogManager;
@@ -22,290 +26,341 @@ import data.*;
 import gui.ValidationUtil;
 
 public class XML_processing {
-	public final static org.apache.logging.log4j.Logger logger = LogManager.getLogger(XML_processing.class);
+    public final static org.apache.logging.log4j.Logger logger = LogManager.getLogger(XML_processing.class);
 
-	// public static void processCorpus(Statistics stats) {
-	// 	// we can preset the list's size, so there won't be a need to resize it
-	// 	List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT);
-	//
-	// 	int i = 0;
-	// 	for (File f : Settings.corpus) {
-	// 		i++;
-	// 		readXML(f.toString(), stats);
-	// 	}
-	// }
+    // progress tracking functionality
+    private static final ReadOnlyDoubleWrapper progress = new ReadOnlyDoubleWrapper();
 
-	// public static void readXML(String path, Statistics stats) {
-	// 	if (stats.getCorpusType() == CorpusType.GIGAFIDA) {
-	// 		readXMLGigafida(path, stats);
-	// 	} else if (stats.getCorpusType() == CorpusType.GOS) {
-	// 		readXMLGos(path, stats);
-	// 	} else if (stats.getCorpusType() == CorpusType.SOLAR) {
-	// 		readXMLSolar(path, stats);
-	// 	}
-	// }
+    public static boolean isCancelled = false;
+    public static Date startTime = new Date();
+    public static boolean isCollocability = false;
 
-	public static void readXML(String path, StatisticsNew stats) {
-		if (stats.getCorpus().getCorpusType() == CorpusType.GIGAFIDA
-				|| stats.getCorpus().getCorpusType() == CorpusType.CCKRES) {
-			readXMLGigafida(path, stats);
-		} else if (stats.getCorpus().getCorpusType() == CorpusType.GOS) {
-			readXMLGos(path, stats);
-		} else if (stats.getCorpus().getCorpusType() == CorpusType.SOLAR) {
-			readXMLSolar(path, stats);
-		} else if (stats.getCorpus().getCorpusType() == CorpusType.SSJ500K) {
-            readXMLSSJ500K(path, stats);
+    public double getProgress() {
+        return progressProperty().get();
+    }
+
+    public ReadOnlyDoubleProperty progressProperty() {
+        return progress ;
+    }
+
+    // public static void processCorpus(Statistics stats) {
+    // 	// we can preset the list's size, so there won't be a need to resize it
+    // 	List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT);
+    //
+    // 	int i = 0;
+    // 	for (File f : Settings.corpus) {
+    // 		i++;
+    // 		readXML(f.toString(), stats);
+    // 	}
+    // }
+
+    // public static void readXML(String path, Statistics stats) {
+    // 	if (stats.getCorpusType() == CorpusType.GIGAFIDA) {
+    // 		readXMLGigafida(path, stats);
+    // 	} else if (stats.getCorpusType() == CorpusType.GOS) {
+    // 		readXMLGos(path, stats);
+    // 	} else if (stats.getCorpusType() == CorpusType.SOLAR) {
+    // 		readXMLSolar(path, stats);
+    // 	}
+    // }
+
+    public static boolean readXML(String path, StatisticsNew stats) {
+        if (stats.getCorpus().getCorpusType() == CorpusType.GIGAFIDA
+                || stats.getCorpus().getCorpusType() == CorpusType.CCKRES) {
+            return readXMLGigafida(path, stats);
+        } else if (stats.getCorpus().getCorpusType() == CorpusType.GOS) {
+            return readXMLGos(path, stats);
+        } else if (stats.getCorpus().getCorpusType() == CorpusType.SOLAR) {
+            return readXMLSolar(path, stats);
+        } else if (stats.getCorpus().getCorpusType() == CorpusType.SSJ500K ||
+                stats.getCorpus().getCorpusType() == CorpusType.GIGAFIDA2) {
+            return readXMLSSJ500K(path, stats);
+        } else if (stats.getCorpus().getCorpusType() == CorpusType.VERT) {
+            return readVERT(path, stats);
         }
-	}
+//        task.updateProgress(fileNum, size);
+        return false;
+    }
 
-	/**
-	 * Reads and returns the value of a passed header tag or an empty string.
-	 * E.g. title tag, for discerning the corpus' type.
-	 * Notice: returns only the value of the first occurrence of a given tag name.
-	 */
-	public static String readXMLHeaderTag(String path, String tag) {
-		XMLInputFactory factory = XMLInputFactory.newInstance();
-		XMLEventReader eventReader = null;
+    /**
+     * Reads and returns the value of a passed header tag or an empty string.
+     * E.g. title tag, for discerning the corpus' type.
+     * Notice: returns only the value of the first occurrence of a given tag name.
+     */
+    public static String readXMLHeaderTag(String path, String tag) {
+        XMLInputFactory factory = XMLInputFactory.newInstance();
+        XMLEventReader eventReader = null;
 
-		try {
-			eventReader = factory.createXMLEventReader(new FileInputStream(path));
-			while (eventReader.hasNext()) {
-				XMLEvent xmlEvent = eventReader.nextEvent();
-				if (xmlEvent.isStartElement()) {
-					StartElement startElement = xmlEvent.asStartElement();
-					String var = startElement.getName().getLocalPart();
+        try {
+            eventReader = factory.createXMLEventReader(new FileInputStream(path));
+            while (eventReader.hasNext()) {
+                XMLEvent xmlEvent = eventReader.nextEvent();
+                if (xmlEvent.isStartElement()) {
+                    StartElement startElement = xmlEvent.asStartElement();
+                    String var = startElement.getName().getLocalPart();
 
-					if (var.equalsIgnoreCase(tag)) {
-						return eventReader.nextEvent().asCharacters().getData();
-					}
-				}
-			}
-		} catch (FileNotFoundException | XMLStreamException e) {
-			e.printStackTrace();
-		} finally {
-			if (eventReader != null) {
-				try {
-					eventReader.close();
-				} catch (XMLStreamException e) {
-					logger.error("closing stream", e);
-				}
-			}
-		}
-		return "";
-	}
+                    if (var.equalsIgnoreCase(tag)) {
+                        return eventReader.nextEvent().asCharacters().getData();
+                    }
+                }
+            }
+        } catch (FileNotFoundException | XMLStreamException e) {
+            e.printStackTrace();
+        } finally {
+            if (eventReader != null) {
+                try {
+                    eventReader.close();
+                } catch (XMLStreamException e) {
+                    logger.error("closing stream", e);
+                }
+            }
+        }
+        return "";
+    }
 
-	/**
-	 * Reads and returns the value of a passed header attribute or an empty string.
-	 * E.g. body base attribute, for discerning the corpus' type of ssj500k.
-	 * Notice: returns only the value of the first occurrence of a given tag name.
-	 */
-	public static String readXMLHeaderAttribute(String path, String tag, String attribute) {
-		XMLInputFactory factory = XMLInputFactory.newInstance();
-		XMLEventReader eventReader = null;
+    /**
+     * Reads and returns the value of a passed header attribute or an empty string.
+     * E.g. body base attribute, for discerning the corpus' type of ssj500k.
+     * Notice: returns only the value of the first occurrence of a given tag name.
+     */
+    public static String readXMLHeaderAttribute(String path, String tag, String attribute) {
+        XMLInputFactory factory = XMLInputFactory.newInstance();
+        XMLEventReader eventReader = null;
 
-		try {
-			eventReader = factory.createXMLEventReader(new FileInputStream(path));
-			while (eventReader.hasNext()) {
-				XMLEvent xmlEvent = eventReader.nextEvent();
-				if (xmlEvent.isStartElement()) {
-					StartElement startElement = xmlEvent.asStartElement();
-					String var = startElement.getName().getLocalPart();
+        try {
+            eventReader = factory.createXMLEventReader(new FileInputStream(path));
+            while (eventReader.hasNext()) {
+                XMLEvent xmlEvent = eventReader.nextEvent();
+                if (xmlEvent.isStartElement()) {
+                    StartElement startElement = xmlEvent.asStartElement();
+                    String var = startElement.getName().getLocalPart();
 
-					if (var.equalsIgnoreCase(tag)) {
+                    if (var.equalsIgnoreCase(tag)) {
                         HashMap<String, String> att = extractAttributes(startElement);
 
-						if (att.containsKey("base")) {
-							return att.get("base").substring(0, att.get("base").length() - 12);
-						}
+                        if (att.containsKey("base")) {
+                            return att.get("base").substring(0, att.get("base").length() - 12);
+                        }
 
 
 
-						return eventReader.nextEvent().asCharacters().getData();
-					}
-				}
-			}
-		} catch (FileNotFoundException | XMLStreamException e) {
-			e.printStackTrace();
-		} finally {
-			if (eventReader != null) {
-				try {
-					eventReader.close();
-				} catch (XMLStreamException e) {
-					logger.error("closing stream", e);
-				}
-			}
-		}
-		return "";
-	}
+                        return eventReader.nextEvent().asCharacters().getData();
+                    }
+                }
+            }
+        } catch (FileNotFoundException | XMLStreamException e) {
+            e.printStackTrace();
+        } finally {
+            if (eventReader != null) {
+                try {
+                    eventReader.close();
+                } catch (XMLStreamException e) {
+                    logger.error("closing stream", e);
+                }
+            }
+        }
+        return "";
+    }
 
-	private static void fj(List<Sentence> corpus, StatisticsNew stats) {
-		ForkJoinPool pool = new ForkJoinPool();
+    private static void fj(List<Sentence> corpus, StatisticsNew stats) {
+        ForkJoinPool pool = new ForkJoinPool();
 
-		if (stats.getFilter().getAl() == AnalysisLevel.STRING_LEVEL) {
-			alg.ngram.ForkJoin wc = new alg.ngram.ForkJoin(corpus, stats);
-			pool.invoke(wc);
-		} else if (stats.getFilter().getAl() == AnalysisLevel.WORD_LEVEL) {
-			alg.word.ForkJoin wc = new alg.word.ForkJoin(corpus, stats);
-			pool.invoke(wc);
-		} else {
-			// TODO:
-			// alg.inflectedJOS.ForkJoin wc = new alg.inflectedJOS.ForkJoin(corpus, stats);
-			// pool.invoke(wc);
-		}
-	}
+        if (stats.getFilter().getAl() == AnalysisLevel.STRING_LEVEL) {
+            alg.ngram.ForkJoin wc = new alg.ngram.ForkJoin(corpus, stats);
+            pool.invoke(wc);
+        } else if (stats.getFilter().getAl() == AnalysisLevel.WORD_LEVEL) {
+            alg.word.ForkJoin wc = new alg.word.ForkJoin(corpus, stats);
+            pool.invoke(wc);
+        } else {
+            // TODO:
+            // alg.inflectedJOS.ForkJoin wc = new alg.inflectedJOS.ForkJoin(corpus, stats);
+            // pool.invoke(wc);
+        }
+    }
 
-	// public static void readXMLGos(String path, Statistics stats) {
-	// 	boolean in_word = false;
-	// 	String taksonomija = "";
-	// 	String lemma = "";
-	// 	String msd = "";
-	// 	String type = stats.isGosOrthMode() ? "orth" : "norm"; // orth & norm
-	//
-	// 	List<Word> stavek = new ArrayList<>();
-	// 	List<Sentence> corpus = new ArrayList<>();
-	// 	String sentenceDelimiter = "seg";
-	// 	String taxonomyPrefix = "gos.";
-	//
-	// 	try {
-	// 		XMLInputFactory factory = XMLInputFactory.newInstance();
-	// 		XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
-	//
-	// 		while (eventReader.hasNext()) {
-	// 			XMLEvent event = eventReader.nextEvent();
-	//
-	// 			switch (event.getEventType()) {
-	// 				case XMLStreamConstants.START_ELEMENT:
-	//
-	// 					StartElement startElement = event.asStartElement();
-	// 					String qName = startElement.getName().getLocalPart();
-	//
-	// 					// "word" node
-	// 					if (qName.equals("w")) {
-	// 						in_word = true;
-	//
-	// 						if (type.equals("norm")) {
-	// 							// make sure we're looking at <w lemma...> and not <w type...>
-	// 							Iterator var = startElement.getAttributes();
-	// 							ArrayList<Object> attributes = new ArrayList<>();
-	// 							while (var.hasNext()) {
-	// 								attributes.add(var.next());
-	// 							}
-	//
-	// 							if (attributes.contains("msd")) {
-	// 								msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
-	// 							} else {
-	// 								msd = null;
-	// 							}
-	//
-	// 							if (attributes.contains("lemma")) {
-	// 								lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
-	// 							}
-	// 						}
-	// 					}
-	// 					// taxonomy node
-	// 					else if (qName.equalsIgnoreCase("catRef")) {
-	// 						// there are some term nodes at the beginning that are of no interest to us
-	// 						// they differ by not having the attribute "ref", so test will equal null
-	// 						Attribute test = startElement.getAttributeByName(QName.valueOf("target"));
-	//
-	// 						if (test != null) {
-	// 							// keep only taxonomy properties
-	// 							taksonomija = String.valueOf(test.getValue()).replace(taxonomyPrefix, "");
-	// 						}
-	// 					} else if (qName.equalsIgnoreCase("div")) {
-	// 						type = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
-	//
-	// 					}
-	// 					break;
-	//
-	// 				case XMLStreamConstants.CHARACTERS:
-	// 					Characters characters = event.asCharacters();
-	//
-	// 					// "word" node value
-	// 					if (in_word) {
-	// 						if (type.equals("norm") && msd != null) {
-	// 							stavek.add(new Word(characters.getData(), lemma, msd));
-	// 						} else {
-	// 							stavek.add(new Word(characters.getData()));
-	// 						}
-	//
-	// 						in_word = false;
-	// 					}
-	// 					break;
-	//
-	// 				case XMLStreamConstants.END_ELEMENT:
-	// 					EndElement endElement = event.asEndElement();
-	//
-	// 					// parser reached end of the current sentence
-	// 					if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
-	// 						// add sentence to corpus
-	// 						corpus.add(new Sentence(stavek, taksonomija, type));
-	// 						// and start a new one
-	// 						stavek = new ArrayList<>();
-	//
-	// 						/* Invoke Fork-Join when we reach maximum limit of
-	// 						 * sentences (because we can't read everything to
-	// 						 * memory) or we reach the end of the file.
-	// 						 */
-	// 						if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
-	// 							fj(corpus, stats);
-	// 							// empty the current corpus, since we don't need
-	// 							// the data anymore
-	// 							corpus.clear();
-	// 						}
-	// 					}
-	//
-	// 					// backup
-	// 					if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
-	// 						fj(corpus, stats);
-	// 						corpus.clear();
-	// 					}
-	//
-	// 					break;
-	// 			}
-	// 		}
-	// 	} catch (FileNotFoundException | XMLStreamException e) {
-	// 		e.printStackTrace();
-	// 	}
-	// }
+    // public static void readXMLGos(String path, Statistics stats) {
+    // 	boolean in_word = false;
+    // 	String taksonomija = "";
+    // 	String lemma = "";
+    // 	String msd = "";
+    // 	String type = stats.isGosOrthMode() ? "orth" : "norm"; // orth & norm
+    //
+    // 	List<Word> stavek = new ArrayList<>();
+    // 	List<Sentence> corpus = new ArrayList<>();
+    // 	String sentenceDelimiter = "seg";
+    // 	String taxonomyPrefix = "gos.";
+    //
+    // 	try {
+    // 		XMLInputFactory factory = XMLInputFactory.newInstance();
+    // 		XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
+    //
+    // 		while (eventReader.hasNext()) {
+    // 			XMLEvent event = eventReader.nextEvent();
+    //
+    // 			switch (event.getEventType()) {
+    // 				case XMLStreamConstants.START_ELEMENT:
+    //
+    // 					StartElement startElement = event.asStartElement();
+    // 					String qName = startElement.getName().getLocalPart();
+    //
+    // 					// "word" node
+    // 					if (qName.equals("w")) {
+    // 						in_word = true;
+    //
+    // 						if (type.equals("norm")) {
+    // 							// make sure we're looking at <w lemma...> and not <w type...>
+    // 							Iterator var = startElement.getAttributes();
+    // 							ArrayList<Object> attributes = new ArrayList<>();
+    // 							while (var.hasNext()) {
+    // 								attributes.add(var.next());
+    // 							}
+    //
+    // 							if (attributes.contains("msd")) {
+    // 								msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
+    // 							} else {
+    // 								msd = null;
+    // 							}
+    //
+    // 							if (attributes.contains("lemma")) {
+    // 								lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
+    // 							}
+    // 						}
+    // 					}
+    // 					// taxonomy node
+    // 					else if (qName.equalsIgnoreCase("catRef")) {
+    // 						// there are some term nodes at the beginning that are of no interest to us
+    // 						// they differ by not having the attribute "ref", so test will equal null
+    // 						Attribute test = startElement.getAttributeByName(QName.valueOf("target"));
+    //
+    // 						if (test != null) {
+    // 							// keep only taxonomy properties
+    // 							taksonomija = String.valueOf(test.getValue()).replace(taxonomyPrefix, "");
+    // 						}
+    // 					} else if (qName.equalsIgnoreCase("div")) {
+    // 						type = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
+    //
+    // 					}
+    // 					break;
+    //
+    // 				case XMLStreamConstants.CHARACTERS:
+    // 					Characters characters = event.asCharacters();
+    //
+    // 					// "word" node value
+    // 					if (in_word) {
+    // 						if (type.equals("norm") && msd != null) {
+    // 							stavek.add(new Word(characters.getData(), lemma, msd));
+    // 						} else {
+    // 							stavek.add(new Word(characters.getData()));
+    // 						}
+    //
+    // 						in_word = false;
+    // 					}
+    // 					break;
+    //
+    // 				case XMLStreamConstants.END_ELEMENT:
+    // 					EndElement endElement = event.asEndElement();
+    //
+    // 					// parser reached end of the current sentence
+    // 					if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
+    // 						// add sentence to corpus
+    // 						corpus.add(new Sentence(stavek, taksonomija, type));
+    // 						// and start a new one
+    // 						stavek = new ArrayList<>();
+    //
+    // 						/* Invoke Fork-Join when we reach maximum limit of
+    // 						 * sentences (because we can't read everything to
+    // 						 * memory) or we reach the end of the file.
+    // 						 */
+    // 						if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
+    // 							fj(corpus, stats);
+    // 							// empty the current corpus, since we don't need
+    // 							// the data anymore
+    // 							corpus.clear();
+    // 						}
+    // 					}
+    //
+    // 					// backup
+    // 					if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
+    // 						fj(corpus, stats);
+    // 						corpus.clear();
+    // 					}
+    //
+    // 					break;
+    // 			}
+    // 		}
+    // 	} catch (FileNotFoundException | XMLStreamException e) {
+    // 		e.printStackTrace();
+    // 	}
+    // }
 
-	@SuppressWarnings("unused")
-	public static void readXMLSolar(String path, StatisticsNew stats) {
-		boolean in_word = false;
+    @SuppressWarnings("unused")
+    public static boolean readXMLSolar(String path, StatisticsNew stats) {
+        boolean in_word = false;
         boolean inPunctuation = false;
         String lemma = "";
-		String msd = "";
+        String msd = "";
 
-		List<Word> stavek = new ArrayList<>();
-		List<Sentence> corpus = new ArrayList<>();
+        List<Word> stavek = new ArrayList<>();
+        List<Sentence> corpus = new ArrayList<>();
 
-		// used for filter
-		Set<String> headTags = new HashSet<>(Arrays.asList("sola", "predmet", "razred", "regija", "tip", "leto"));
-		Map<String, String> headBlock = null;
-		boolean includeThisBlock = false;
+        // used for filter
+        Set<String> headTags = new HashSet<>(Arrays.asList("sola", "predmet", "razred", "regija", "tip", "leto"));
+        Map<String, String> headBlock = null;
+        boolean includeThisBlock = false;
 
-		try {
-			XMLInputFactory factory = XMLInputFactory.newInstance();
-			XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
+        int numLines = 0;
+        int lineNum = 0;
+        progress.set(0.0);
+        if(!isCollocability) {
+            startTime = new Date();
+        }
+        // get number of lines
+        try {
+            XMLInputFactory factory = XMLInputFactory.newInstance();
+            XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
 
-			while (eventReader.hasNext()) {
-				XMLEvent event = eventReader.nextEvent();
+            while (eventReader.hasNext())
+            {
+                eventReader.next();
+                numLines ++;
+                // Loop just in case the file is > Long.MAX_VALUE or skip() decides to not read the entire file
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (XMLStreamException e) {
+            e.printStackTrace();
+        }
 
-				switch (event.getEventType()) {
-					case XMLStreamConstants.START_ELEMENT:
+        try {
+            XMLInputFactory factory = XMLInputFactory.newInstance();
+            XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
 
-						StartElement startElement = event.asStartElement();
-						// System.out.println(String.format("%s", startElement.toString()));
-						String qName = startElement.getName().getLocalPart();
+            while (eventReader.hasNext()) {
+                int percentage = (int) (lineNum * 100.0 / numLines);
+                if(progress.get() < percentage) {
+                    progress.set(percentage);
+                }
+                if(isCancelled) {
+                    return false;
+                }
+                lineNum ++;
+                XMLEvent event = eventReader.nextEvent();
 
-						// "word" node
-						if (qName.equals("w3")) {
-							in_word = true;
+                switch (event.getEventType()) {
+                    case XMLStreamConstants.START_ELEMENT:
 
-							msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
-							lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
-						} else if (qName.equals("c3")) {
-							String c3Content = eventReader.nextEvent().asCharacters().getData();
+                        StartElement startElement = event.asStartElement();
+                        // System.out.println(String.format("%s", startElement.toString()));
+                        String qName = startElement.getName().getLocalPart();
+
+                        // "word" node
+                        if (qName.equals("w3")) {
+                            in_word = true;
+
+                            msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
+                            lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
+                        } else if (qName.equals("c3")) {
+                            String c3Content = eventReader.nextEvent().asCharacters().getData();
 
                             if(stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() &&
                                     stavek.size() > 0){
@@ -313,485 +368,318 @@ public class XML_processing {
 
                             }
 
-							if (c3Content.equals(".") && includeThisBlock) {
-								if (stats.getFilter().getNgramValue() == 0){
-									int numSentenceParts = 0;
-									for(Word w : stavek){
-										int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
-										numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
-									}
-									stats.updateUniGramOccurrences(numSentenceParts, new ArrayList<>());
-								} else if(stats.getFilter().getNgramValue() >= 1) {
-									stats.updateUniGramOccurrences(stavek.size(), new ArrayList<>());
-								}
+                            if (c3Content.equals(".") && includeThisBlock) {
+                                if (stats.getFilter().getNgramValue() == 0){
+                                    int numSentenceParts = 0;
+                                    for(Word w : stavek){
+                                        int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
+                                        numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
+                                    }
+                                    stats.updateUniGramOccurrences(numSentenceParts, new ArrayList<>());
+                                } else if(stats.getFilter().getNgramValue() >= 1) {
+                                    stats.updateUniGramOccurrences(stavek.size(), new ArrayList<>());
+                                }
 
-								// add sentence to corpus
-								corpus.add(new Sentence(stavek, null));
-								// and start a new one
-								stavek = new ArrayList<>();
+                                // add sentence to corpus
+                                corpus.add(new Sentence(stavek, null));
+                                // and start a new one
+                                stavek = new ArrayList<>();
 
-							/* Invoke Fork-Join when we reach maximum limit of
-							 * sentences (because we can't read everything to
-							 * memory) or we reach the end of the file.
-							 */
-								if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
-									fj(corpus, stats);
-									// empty the current corpus, since we don't need
-									// the data anymore
-									corpus.clear();
-								}
-							}
-						} else if (headTags.contains(qName)) {
-							String tagContent = eventReader.nextEvent().asCharacters().getData();
-							headBlock.put(qName, tagContent);
-						} else if (qName.equals("head")) {
-							headBlock = new HashMap<>();
-						}
+                                /* Invoke Fork-Join when we reach maximum limit of
+                                 * sentences (because we can't read everything to
+                                 * memory) or we reach the end of the file.
+                                 */
+                                if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
+                                    fj(corpus, stats);
+                                    // empty the current corpus, since we don't need
+                                    // the data anymore
+                                    corpus.clear();
+                                }
+                            }
+                        } else if (headTags.contains(qName)) {
+                            String tagContent = eventReader.nextEvent().asCharacters().getData();
+                            headBlock.put(qName, tagContent);
+                        } else if (qName.equals("head")) {
+                            headBlock = new HashMap<>();
+                        }
 
-						break;
+                        break;
 
-					case XMLStreamConstants.CHARACTERS:
-						Characters characters = event.asCharacters();
+                    case XMLStreamConstants.CHARACTERS:
+                        Characters characters = event.asCharacters();
 
-						// "word" node value
-						if (in_word) {
-							stavek.add(createWord(characters.getData(), lemma, msd, "", stats.getFilter()));
-							in_word = false;
-						}
-						break;
+                        // "word" node value
+                        if (in_word) {
+                            stavek.add(createWord(characters.getData(), lemma, msd, "", stats.getFilter()));
+                            in_word = false;
+                        }
+                        break;
 
-					case XMLStreamConstants.END_ELEMENT:
-						EndElement endElement = event.asEndElement();
-						String qNameEnd = endElement.getName().getLocalPart();
+                    case XMLStreamConstants.END_ELEMENT:
+                        EndElement endElement = event.asEndElement();
+                        String qNameEnd = endElement.getName().getLocalPart();
 
-						if (qNameEnd.equals("head")) {
-							// validate and set boolean
-							if (validateHeadBlock(headBlock, stats.getFilter().getSolarFilters())) {
-								includeThisBlock = true;
-							}
-						} else if (qNameEnd.equals("body")) {
-							// new block, reset filter status
-							includeThisBlock = false;
-						}
+                        if (qNameEnd.equals("head")) {
+                            // validate and set boolean
+                            if (validateHeadBlock(headBlock, stats.getFilter().getSolarFilters())) {
+                                includeThisBlock = true;
+                            }
+                        } else if (qNameEnd.equals("body")) {
+                            // new block, reset filter status
+                            includeThisBlock = false;
+                        }
 
-						// backup
-						if (endElement.getName().getLocalPart().equalsIgnoreCase("korpus")) {
-							fj(corpus, stats);
-							corpus.clear();
-						}
+                        // backup
+                        if (endElement.getName().getLocalPart().equalsIgnoreCase("korpus")) {
+                            fj(corpus, stats);
+                            corpus.clear();
+                        }
 
-						break;
-				}
-			}
-		} catch (FileNotFoundException | XMLStreamException e) {
-			e.printStackTrace();
-		}
-	}
+                        break;
+                }
+            }
+        } catch (FileNotFoundException | XMLStreamException e) {
+            e.printStackTrace();
+        }
+        return true;
+    }
 
-	/**
-	 * @param readHeadBlock block of tags read from the corpus
-	 * @param userSetFilter tags with values set by the user
-	 *
-	 * @return
-	 */
-	private static boolean validateHeadBlock(Map<String, String> readHeadBlock, HashMap<String, HashSet<String>> userSetFilter) {
-		boolean pass = true;
+    /**
+     * @param readHeadBlock block of tags read from the corpus
+     * @param userSetFilter tags with values set by the user
+     *
+     * @return
+     */
+    private static boolean validateHeadBlock(Map<String, String> readHeadBlock, HashMap<String, HashSet<String>> userSetFilter) {
+        boolean pass = true;
 
-		if (userSetFilter == null) {
-			return true;
-		}
+        if (userSetFilter == null) {
+            return true;
+        }
 
-		for (Map.Entry<String, HashSet<String>> filterEntry : userSetFilter.entrySet()) {
-			String key = filterEntry.getKey();
-			HashSet<String> valueObject = filterEntry.getValue();
+        for (Map.Entry<String, HashSet<String>> filterEntry : userSetFilter.entrySet()) {
+            String key = filterEntry.getKey();
+            HashSet<String> valueObject = filterEntry.getValue();
 
-			// if (valueObject instanceof String) {
-			// 	pass = validateHeadBlockEntry(readHeadBlock, key, (String) valueObject);
-			// } else
-			if (valueObject != null) {
-				//noinspection unchecked
-				for (String value : valueObject) {
-					pass = validateHeadBlockEntry(readHeadBlock, key, value);
-				}
-			}
+            // if (valueObject instanceof String) {
+            // 	pass = validateHeadBlockEntry(readHeadBlock, key, (String) valueObject);
+            // } else
+            if (valueObject != null) {
+                //noinspection unchecked
+                for (String value : valueObject) {
+                    pass = validateHeadBlockEntry(readHeadBlock, key, value);
+                }
+            }
 
-			if (!pass) {
-				// current head block does not include one of the set filters - not likely, but an edge case anyway
-				return false;
-			}
-		}
+            if (!pass) {
+                // current head block does not include one of the set filters - not likely, but an edge case anyway
+                return false;
+            }
+        }
 
-		// if it gets to this point, it passed all the filters
-		return true;
-	}
+        // if it gets to this point, it passed all the filters
+        return true;
+    }
 
-	private static boolean validateHeadBlockEntry(Map<String, String> readHeadBlock, String userSetKey, String userSetValue) {
-		if (!readHeadBlock.keySet().contains(userSetKey)) {
-			// current head block does not include one of the set filters - not likely, but an edge case anyway
-			return false;
-		} else if (!readHeadBlock.get(userSetKey).equals(userSetValue)) {
-			// different values -> doesn't pass the filter
-			return false;
-		}
+    private static boolean validateHeadBlockEntry(Map<String, String> readHeadBlock, String userSetKey, String userSetValue) {
+        if (!readHeadBlock.keySet().contains(userSetKey)) {
+            // current head block does not include one of the set filters - not likely, but an edge case anyway
+            return false;
+        } else if (!readHeadBlock.get(userSetKey).equals(userSetValue)) {
+            // different values -> doesn't pass the filter
+            return false;
+        }
 
-		return true;
-	}
+        return true;
+    }
 
 
-	/**
-	 * Parses XML headers for information about its taxonomy (if supported) or filters (solar)
-	 *
-	 * @param filepath
-	 * @param corpusIsSplit is corpus split into multiple xml files, or are all entries grouped into one large xml file
-	 * @param corpusType
-	 */
-	public static HashSet<String> readVertHeaderTaxonomyAndFilters(String filepath, boolean corpusIsSplit, CorpusType corpusType) {
-//		boolean parseTaxonomy = Tax.getCorpusTypesWithTaxonomy().contains(corpusType);
-		// solar
-		Set<String> headTags = null;
-		HashMap<String, HashSet<String>> resultFilters = new HashMap<>();
-		// taxonomy corpora
-		HashSet<String> resultTaxonomy = new HashSet<>();
+    /**
+     * Parses XML headers for information about its taxonomy (if supported) or filters (solar)
+     *
+     * @param filepath
+     * @param corpusIsSplit is corpus split into multiple xml files, or are all entries grouped into one large xml file
+     * @param corpusType
+     */
+    public static HashSet<String> readVertHeaderTaxonomyAndFilters(String filepath, boolean corpusIsSplit, CorpusType corpusType) {
+        // taxonomy corpora
+        HashSet<String> resultTaxonomy = new HashSet<>();
 
-		LineIterator it = null;
-		try {
-			it = FileUtils.lineIterator(new File(filepath), "UTF-8");
-			try {
-				boolean insideHeader = false;
+        LineIterator it = null;
+        try {
+            it = FileUtils.lineIterator(new File(filepath), "UTF-8");
+            try {
+                boolean insideHeader = false;
 
-				while (it.hasNext()) {
-					String line = it.nextLine();
+                while (it.hasNext()) {
+                    String line = it.nextLine();
 
-					if (line.length() > 4 && line.substring(1, 5).equals("text")) {
-						// split over "\" "
-						String[] split = line.split("\" ");
+                    if (line.length() > 4 && line.substring(1, 5).equals("text")) {
+                        // split over "\" "
+                        String[] split = line.split("\" ");
 //						String mediumId = "";
 //						String typeId = "";
 //						String proofreadId = "";
-						for (String el : split) {
-							String[] attribute = el.split("=\"");
-							if (attribute[0].equals("medium_id")) {
+                        boolean idsPresent = false;
+                        for (String el : split) {
+                            String[] attribute = el.split("=\"");
+                            if (attribute[0].equals("medium_id")) {
 //								mediumId = attribute[1];
-								resultTaxonomy.add(attribute[1]);
-							} else if (attribute[0].equals("type_id")) {
+                                idsPresent = true;
+                                resultTaxonomy.add(attribute[1]);
+                            } else if (attribute[0].equals("type_id")) {
 //								typeId = attribute[1];
-								resultTaxonomy.add(attribute[1]);
-							} else if (attribute[0].equals("proofread_id")) {
+                                idsPresent = true;
+                                resultTaxonomy.add(attribute[1]);
+                            } else if (attribute[0].equals("proofread_id")) {
 //								proofreadId = attribute[1];
-								resultTaxonomy.add(attribute[1]);
-							}
-						}
-					}
-				}
-			} finally {
-				LineIterator.closeQuietly(it);
-			}
-		} catch (IOException e) {
-			e.printStackTrace();
-		}
-		resultTaxonomy.remove("-");
-		return resultTaxonomy;
-	}
+                                idsPresent = true;
+                                resultTaxonomy.add(attribute[1]);
+                            }
+                        }
+                        if (!idsPresent){
+                            for (String el : split) {
+                                String[] attribute = el.split("=\"");
+                                if (attribute[0].equals("medium")) {
+//								mediumId = attribute[1];
+                                    resultTaxonomy.add(attribute[1]);
+                                } else if (attribute[0].equals("type")) {
+//								typeId = attribute[1];
+                                    resultTaxonomy.add(attribute[1]);
+                                } else if (attribute[0].equals("proofread")) {
+//								proofreadId = attribute[1];
+                                    resultTaxonomy.add(attribute[1]);
+                                }
+                            }
+                        }
+                    }
+                }
+            } finally {
+                LineIterator.closeQuietly(it);
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+        resultTaxonomy.remove("-");
+        return resultTaxonomy;
+    }
 
-	/**
-	 * Parses XML headers for information about its taxonomy (if supported) or filters (solar)
-	 *
-	 * @param filepath
-	 * @param corpusIsSplit is corpus split into multiple xml files, or are all entries grouped into one large xml file
-	 * @param corpusType
-	 */
-	public static Object readXmlHeaderTaxonomyAndFilters(String filepath, boolean corpusIsSplit, CorpusType corpusType) {
-		boolean parseTaxonomy = Tax.getCorpusTypesWithTaxonomy().contains(corpusType);
-		// solar
-		Set<String> headTags = null;
-		HashMap<String, HashSet<String>> resultFilters = new HashMap<>();
-		// taxonomy corpora
-		HashSet<String> resultTaxonomy = new HashSet<>();
+    /**
+     * Parses XML headers for information about its taxonomy (if supported) or filters (solar)
+     *
+     * @param filepath
+     * @param corpusIsSplit is corpus split into multiple xml files, or are all entries grouped into one large xml file
+     * @param corpusType
+     */
+    public static Object readXmlHeaderTaxonomyAndFilters(String filepath, boolean corpusIsSplit, CorpusType corpusType) {
+        boolean parseTaxonomy = Tax.getCorpusTypesWithTaxonomy().contains(corpusType);
+        // solar
+        Set<String> headTags = null;
+        HashMap<String, HashSet<String>> resultFilters = new HashMap<>();
+        // taxonomy corpora
+        HashSet<String> resultTaxonomy = new HashSet<>();
 
-		String headTagName;
+        String headTagName;
 
-		if (corpusType == CorpusType.SOLAR) {
-			headTagName = "head";
-			// used for filter
-			headTags = new HashSet<>(Arrays.asList(SOLA, PREDMET, RAZRED, REGIJA, TIP, LETO));
+        if (corpusType == CorpusType.SOLAR) {
+            headTagName = "head";
+            // used for filter
+            headTags = new HashSet<>(Arrays.asList(SOLA, PREDMET, RAZRED, REGIJA, TIP, LETO));
 
-			// init results now to avoid null pointers
-			headTags.forEach(f -> resultFilters.put(f, new HashSet<>()));
-		} else if (corpusType == CorpusType.SSJ500K) {
+            // init results now to avoid null pointers
+            headTags.forEach(f -> resultFilters.put(f, new HashSet<>()));
+        } else if (corpusType == CorpusType.SSJ500K) {
             headTagName = "bibl";
         } else {
-			headTagName = "teiHeader";
-		}
+            headTagName = "teiHeader";
+        }
 
-		XMLInputFactory factory = XMLInputFactory.newInstance();
-		XMLEventReader xmlEventReader = null;
-		try {
-			xmlEventReader = factory.createXMLEventReader(new FileInputStream(filepath));
-			boolean insideHeader = false;
+        XMLInputFactory factory = XMLInputFactory.newInstance();
+        XMLEventReader xmlEventReader = null;
+        try {
+            xmlEventReader = factory.createXMLEventReader(new FileInputStream(filepath));
+            boolean insideHeader = false;
 
-			while (xmlEventReader.hasNext()) {
-				XMLEvent xmlEvent = xmlEventReader.nextEvent();
+            while (xmlEventReader.hasNext()) {
+                XMLEvent xmlEvent = xmlEventReader.nextEvent();
 
-				if (xmlEvent.isStartElement()) {
-					StartElement startElement = xmlEvent.asStartElement();
-					String elementName = startElement.getName().getLocalPart();
+                if (xmlEvent.isStartElement()) {
+                    StartElement startElement = xmlEvent.asStartElement();
+                    String elementName = startElement.getName().getLocalPart();
 
-					if (elementName.equalsIgnoreCase(headTagName)) {
-						// if the corpus is split into files, we skip bodies
-						// this toggle is true when we're inside a header (next block of code executes)
-						// and false when we're not (skip reading unnecessary attributes)
-						insideHeader = true;
-					}
+                    if (elementName.equalsIgnoreCase(headTagName)) {
+                        // if the corpus is split into files, we skip bodies
+                        // this toggle is true when we're inside a header (next block of code executes)
+                        // and false when we're not (skip reading unnecessary attributes)
+                        insideHeader = true;
+                    }
 
-					if (insideHeader) {
-						if (parseTaxonomy && elementName.equalsIgnoreCase("catRef")) {
-							HashMap<String, String> atts = extractAttributes(startElement);
-							String debug = "";
+                    if (insideHeader) {
+                        if (parseTaxonomy && elementName.equalsIgnoreCase("catRef")) {
+                            HashMap<String, String> atts = extractAttributes(startElement);
+                            String debug = "";
 
-							String tax = startElement.getAttributeByName(QName.valueOf("target"))
-									.getValue()
-									.replace("#", "");
+                            String tax = startElement.getAttributeByName(QName.valueOf("target"))
+                                    .getValue()
+                                    .replace("#", "");
 
-							resultTaxonomy.add(tax);
-						} else if (parseTaxonomy && elementName.equalsIgnoreCase("term")) {
+                            if (tax.indexOf(':') >= 0) {
+                                tax = tax.split(":")[1];
+                            }
+                            resultTaxonomy.add(tax);
+                        } else if (parseTaxonomy && elementName.equalsIgnoreCase("term")) {
                             String tax = startElement.getAttributeByName(QName.valueOf("ref"))
                                     .getValue()
                                     .replace("#", "");
 
                             resultTaxonomy.add(tax);
                         } else if (!parseTaxonomy && headTags.contains(elementName)) {
-							String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
-							resultFilters.get(elementName).add(tagContent);
-						}
-					}
-				} else if (xmlEvent.isEndElement() && corpusIsSplit && isEndElementEndOfHeader(xmlEvent, headTagName)) {
-					// if the corpus is split into multiple files, each with only one header block per file
-					// that means we should stop after we reach the end of the header
-					return parseTaxonomy ? resultTaxonomy : resultFilters;
-				} else if (xmlEvent.isEndElement() && !corpusIsSplit && isEndElementEndOfHeader(xmlEvent, headTagName)) {
-					// whole corpus in one file, so we have to continue reading in order to find all header blocks
-					insideHeader = false;
-				}
-			}
-		} catch (XMLStreamException e) {
-			logger.error("Streaming error", e);
-			return parseTaxonomy ? resultTaxonomy : resultFilters;
-		} catch (FileNotFoundException e) {
-			logger.error("File not found", e);
-			return parseTaxonomy ? resultTaxonomy : resultFilters;
-			// TODO: keep a list of files that threw this error and a dirty boolean marker -> if true, alert user
-		} finally {
-			if (xmlEventReader != null) {
-				try {
-					xmlEventReader.close();
-				} catch (XMLStreamException e) {
-					logger.error("closing stream", e);
-				}
-			}
-		}
-		return parseTaxonomy ? resultTaxonomy : resultFilters;
-	}
-
-	private static boolean isEndElementEndOfHeader(XMLEvent event, String headerTag) {
-		return event.asEndElement()
-				.getName()
-				.getLocalPart()
-				.equalsIgnoreCase(headerTag);
-	}
-
-	@SuppressWarnings("Duplicates")
-	public static boolean readXMLGigafida(String path, StatisticsNew stats) {
-		boolean inWord = false;
-		boolean inPunctuation = false;
-		boolean taxonomyMatch = true;
-		ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
-//		ArrayList<Taxonomy> currentFiletaxonomyLong = new ArrayList<>();
-		String lemma = "";
-		String msd = "";
-
-		List<Word> sentence = new ArrayList<>();
-		List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
-		String sentenceDelimiter = "s";
-
-		XMLEventReader eventReader = null;
-		try {
-			XMLInputFactory factory = XMLInputFactory.newInstance();
-			eventReader = factory.createXMLEventReader(new FileInputStream(path));
-
-			while (eventReader.hasNext()) {
-				XMLEvent event = eventReader.nextEvent();
-
-				switch (event.getEventType()) {
-					case XMLStreamConstants.START_ELEMENT:
-						StartElement startElement = event.asStartElement();
-						String qName = startElement.getName().getLocalPart();
-
-						// "word" node
-						if (qName.equals("w")) {
-							inWord = true;
-
-							msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
-							lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
-						}
-
-						if (qName.equals("c")){
-							inPunctuation = true;
-						}
-
-						// taxonomy node
-						else if (qName.equalsIgnoreCase("catRef")) {
-							// there are some term nodes at the beginning that are of no interest to us
-							// they differ by not having the attribute "ref", so test will equal null
-							Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
-
-							if (tax != null) {
-								// keep only taxonomy properties
-								Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""));
-								currentFiletaxonomy.add(currentFiletaxonomyElement);
-								Tax taxonomy = new Tax();
-//								currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
-							}
-						}
-						break;
-
-					case XMLStreamConstants.CHARACTERS:
-						Characters characters = event.asCharacters();
-
-						// "word" node value
-						if (inWord) {
-							String word = characters.getData();
-							sentence.add(createWord(word, lemma, msd, word, stats.getFilter()));
-							inWord = false;
-						}
-						if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
-						    String punctuation = characters.getData();
-							sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter()));
-							inPunctuation = false;
-
-//						    String punctuation = ",";
-//
-//                            sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
-//                            sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
-//                            sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
-//                            inPunctuation = false;
+                            String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
+                            resultFilters.get(elementName).add(tagContent);
                         }
-						break;
+                    }
+                } else if (xmlEvent.isEndElement() && corpusIsSplit && isEndElementEndOfHeader(xmlEvent, headTagName)) {
+                    // if the corpus is split into multiple files, each with only one header block per file
+                    // that means we should stop after we reach the end of the header
+                    return parseTaxonomy ? resultTaxonomy : resultFilters;
+                } else if (xmlEvent.isEndElement() && !corpusIsSplit && isEndElementEndOfHeader(xmlEvent, headTagName)) {
+                    // whole corpus in one file, so we have to continue reading in order to find all header blocks
+                    insideHeader = false;
+                }
+            }
+        } catch (XMLStreamException e) {
+            logger.error("Streaming error", e);
+            return parseTaxonomy ? resultTaxonomy : resultFilters;
+        } catch (FileNotFoundException e) {
+            logger.error("File not found", e);
+            return parseTaxonomy ? resultTaxonomy : resultFilters;
+            // TODO: keep a list of files that threw this error and a dirty boolean marker -> if true, alert user
+        } finally {
+            if (xmlEventReader != null) {
+                try {
+                    xmlEventReader.close();
+                } catch (XMLStreamException e) {
+                    logger.error("closing stream", e);
+                }
+            }
+        }
+        return parseTaxonomy ? resultTaxonomy : resultFilters;
+    }
 
-//                    if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
-//                        String actualPunctuation = characters.getData();
-//                        if (actualPunctuation.equals(".") || actualPunctuation.equals("!") || actualPunctuation.equals("?") || actualPunctuation.equals("..."))
-//                            break;
-//                        String punctuation = ",";
-//                        int skip_number = 0;
-//                        if (!ValidationUtil.isEmpty(stats.getFilter().getSkipValue())){
-//                            skip_number = stats.getFilter().getSkipValue();
-//                        }
-//                        for(int i = 1; i < skip_number + 2; i ++){
-//                            if (i < sentence.size() && !sentence.get(sentence.size() - i).equals(punctuation)) {
-//                                sentence.get(sentence.size() - i).setWord(sentence.get(sentence.size() - i).getWord() + punctuation);
-//                                sentence.get(sentence.size() - i).setLemma(sentence.get(sentence.size() - i).getLemma() + punctuation);
-//                                sentence.get(sentence.size() - i).setMsd(sentence.get(sentence.size() - i).getMsd() + punctuation);
-//                            }
-//                        }
-//                        inPunctuation = false;
-//                    }
-
-					case XMLStreamConstants.END_ELEMENT:
-						EndElement endElement = event.asEndElement();
-
-						String var = endElement.getName().getLocalPart();
-						String debug = "";
-
-						// parser reached end of the current sentence
-						if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
-                            // count all UniGramOccurrences in sentence for statistics
-							if (stats.getFilter().getNgramValue() == 0){
-								int numSentenceParts = 0;
-								for(Word w : sentence){
-									int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
-									numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
-								}
-								stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
-							} else if(stats.getFilter().getNgramValue() >= 1) {
-								stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
-							}
-							// add sentence to corpus if it passes filters
-							sentence = runFilters(sentence, stats.getFilter());
-
-
-
-							if (!ValidationUtil.isEmpty(sentence) && taxonomyMatch) {
-								corpus.add(new Sentence(sentence, currentFiletaxonomy));
-							}
-
-//							taxonomyMatch = true;
-							// and start a new one
-							sentence = new ArrayList<>();
-
-							/* Invoke Fork-Join when we reach maximum limit of
-							 * sentences (because we can't read everything to
-							 * memory) or we reach the end of the file.
-							 */
-							if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
-								fj(corpus, stats);
-								// empty the current corpus, since we don't need the data anymore
-								corpus.clear();
-
-								// TODO: if (stats.isUseDB()) {
-								// 	stats.storeTmpResultsToDB();
-								// }
-							}
-						} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
-							// before proceeding to read this file, make sure that taxonomy filters are a match
-
-							if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
-								currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
-
-								if (currentFiletaxonomy.isEmpty()) {
-									// taxonomies don't match so stop
-//									return false;
-                                    taxonomyMatch = false;
-//									System.out.println("TEST");
-								}
-							}
-						}
-
-						// fallback
-						else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
-							// join corpus and stats
-							fj(corpus, stats);
-							corpus.clear();
-
-							// TODO: if (stats.isUseDB()) {
-							// 	stats.storeTmpResultsToDB();
-							// }
-						}
-
-						break;
-				}
-			}
-		} catch (FileNotFoundException | XMLStreamException e) {
-			e.printStackTrace();
-		} finally {
-			if (eventReader != null) {
-				try {
-					eventReader.close();
-				} catch (XMLStreamException e) {
-					logger.error("closing stream", e);
-				}
-			}
-		}
-
-		return true;
-	}
+    private static boolean isEndElementEndOfHeader(XMLEvent event, String headerTag) {
+        return event.asEndElement()
+                .getName()
+                .getLocalPart()
+                .equalsIgnoreCase(headerTag);
+    }
 
     @SuppressWarnings("Duplicates")
-    public static boolean readXMLSSJ500K(String path, StatisticsNew stats) {
+    public static boolean readXMLGigafida(String path, StatisticsNew stats) {
         boolean inWord = false;
         boolean inPunctuation = false;
         boolean taxonomyMatch = true;
         ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
-//        ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
+//		ArrayList<Taxonomy> currentFiletaxonomyLong = new ArrayList<>();
         String lemma = "";
         String msd = "";
 
@@ -815,7 +703,225 @@ public class XML_processing {
                         // "word" node
                         if (qName.equals("w")) {
                             inWord = true;
-                            if (!String.valueOf(startElement.getAttributeByName(QName.valueOf("ana")).getValue()).substring(0, 4).equals("msd:")){
+
+                            msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
+                            lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
+                        }
+
+                        if (qName.equals("c")){
+                            inPunctuation = true;
+                        }
+
+                        // taxonomy node
+                        else if (qName.equalsIgnoreCase("catRef")) {
+                            // there are some term nodes at the beginning that are of no interest to us
+                            // they differ by not having the attribute "ref", so test will equal null
+                            Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
+
+                            if (tax != null) {
+                                // keep only taxonomy properties
+                                Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""), stats.getCorpus());
+                                currentFiletaxonomy.add(currentFiletaxonomyElement);
+                                Tax taxonomy = new Tax();
+//								currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
+                            }
+                        }
+                        break;
+
+                    case XMLStreamConstants.CHARACTERS:
+                        Characters characters = event.asCharacters();
+
+                        // "word" node value
+                        if (inWord) {
+                            String word = characters.getData();
+                            sentence.add(createWord(word, lemma, msd, word, stats.getFilter()));
+                            inWord = false;
+                        }
+                        if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
+                            String punctuation = characters.getData();
+                            sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter()));
+                            inPunctuation = false;
+
+//						    String punctuation = ",";
+//
+//                            sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
+//                            sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
+//                            sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
+//                            inPunctuation = false;
+                        }
+                        break;
+
+//                    if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
+//                        String actualPunctuation = characters.getData();
+//                        if (actualPunctuation.equals(".") || actualPunctuation.equals("!") || actualPunctuation.equals("?") || actualPunctuation.equals("..."))
+//                            break;
+//                        String punctuation = ",";
+//                        int skip_number = 0;
+//                        if (!ValidationUtil.isEmpty(stats.getFilter().getSkipValue())){
+//                            skip_number = stats.getFilter().getSkipValue();
+//                        }
+//                        for(int i = 1; i < skip_number + 2; i ++){
+//                            if (i < sentence.size() && !sentence.get(sentence.size() - i).equals(punctuation)) {
+//                                sentence.get(sentence.size() - i).setWord(sentence.get(sentence.size() - i).getWord() + punctuation);
+//                                sentence.get(sentence.size() - i).setLemma(sentence.get(sentence.size() - i).getLemma() + punctuation);
+//                                sentence.get(sentence.size() - i).setMsd(sentence.get(sentence.size() - i).getMsd() + punctuation);
+//                            }
+//                        }
+//                        inPunctuation = false;
+//                    }
+
+                    case XMLStreamConstants.END_ELEMENT:
+                        EndElement endElement = event.asEndElement();
+
+                        String var = endElement.getName().getLocalPart();
+                        String debug = "";
+
+                        // parser reached end of the current sentence
+                        if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
+                            // count all UniGramOccurrences in sentence for statistics
+                            if (stats.getFilter().getNgramValue() == 0){
+                                int numSentenceParts = 0;
+                                for(Word w : sentence){
+                                    int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
+                                    numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
+                                }
+                                stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
+                            } else if(stats.getFilter().getNgramValue() >= 1) {
+                                stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
+                            }
+                            // add sentence to corpus if it passes filters
+                            sentence = runFilters(sentence, stats.getFilter());
+
+
+
+                            if (!ValidationUtil.isEmpty(sentence) && taxonomyMatch) {
+                                corpus.add(new Sentence(sentence, currentFiletaxonomy));
+                            }
+
+//							taxonomyMatch = true;
+                            // and start a new one
+                            sentence = new ArrayList<>();
+
+                            /* Invoke Fork-Join when we reach maximum limit of
+                             * sentences (because we can't read everything to
+                             * memory) or we reach the end of the file.
+                             */
+                            if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
+                                fj(corpus, stats);
+                                // empty the current corpus, since we don't need the data anymore
+                                corpus.clear();
+
+                                // TODO: if (stats.isUseDB()) {
+                                // 	stats.storeTmpResultsToDB();
+                                // }
+                            }
+                        } else if (endElement.getName().getLocalPart().equals("teiHeader")) {
+                            // before proceeding to read this file, make sure that taxonomy filters are a match
+
+                            if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
+                                currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
+
+                                if (currentFiletaxonomy.isEmpty()) {
+                                    // taxonomies don't match so stop
+//									return false;
+                                    taxonomyMatch = false;
+//									System.out.println("TEST");
+                                }
+                            }
+                        }
+
+                        // fallback
+                        else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
+                            // join corpus and stats
+                            fj(corpus, stats);
+                            corpus.clear();
+
+                            // TODO: if (stats.isUseDB()) {
+                            // 	stats.storeTmpResultsToDB();
+                            // }
+                        }
+
+                        break;
+                }
+            }
+        } catch (FileNotFoundException | XMLStreamException e) {
+            e.printStackTrace();
+        } finally {
+            if (eventReader != null) {
+                try {
+                    eventReader.close();
+                } catch (XMLStreamException e) {
+                    logger.error("closing stream", e);
+                }
+            }
+        }
+
+        return true;
+    }
+
+    @SuppressWarnings("Duplicates")
+    public static boolean readXMLSSJ500K(String path, StatisticsNew stats) {
+        boolean inWord = false;
+        boolean inPunctuation = false;
+        boolean taxonomyMatch = true;
+        ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
+//        ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
+        String lemma = "";
+        String msd = "";
+
+        List<Word> sentence = new ArrayList<>();
+        List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
+        String sentenceDelimiter = "s";
+
+        int numLines = 0;
+        int lineNum = 0;
+        progress.set(0.0);
+        if(!isCollocability) {
+            startTime = new Date();
+        }
+        // get number of lines
+        try {
+            XMLInputFactory factory = XMLInputFactory.newInstance();
+            XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
+
+            while (eventReader.hasNext())
+            {
+                eventReader.next();
+                numLines ++;
+                // Loop just in case the file is > Long.MAX_VALUE or skip() decides to not read the entire file
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (XMLStreamException e) {
+            e.printStackTrace();
+        }
+
+        XMLEventReader eventReader = null;
+        try {
+            XMLInputFactory factory = XMLInputFactory.newInstance();
+            eventReader = factory.createXMLEventReader(new FileInputStream(path));
+
+            while (eventReader.hasNext()) {
+                int percentage = (int) (lineNum * 100.0 / numLines);
+                if(progress.get() < percentage) {
+                    progress.set(percentage);
+                }
+                if(isCancelled) {
+                    return false;
+                }
+                lineNum ++;
+                XMLEvent event = eventReader.nextEvent();
+
+                switch (event.getEventType()) {
+                    case XMLStreamConstants.START_ELEMENT:
+                        StartElement startElement = event.asStartElement();
+                        String qName = startElement.getName().getLocalPart();
+
+                        // "word" node
+                        if (qName.equals("w")) {
+                            inWord = true;
+                            if (!(String.valueOf(startElement.getAttributeByName(QName.valueOf("ana")).getValue()).substring(0, 4).equals("msd:") ||
+                                    String.valueOf(startElement.getAttributeByName(QName.valueOf("ana")).getValue()).substring(0, 4).equals("mte:"))){
                                 System.out.println("MSD written incorrectly");
                             }
                             msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("ana")).getValue()).substring(4);
@@ -834,28 +940,78 @@ public class XML_processing {
 
                             if (tax != null) {
                                 // keep only taxonomy properties
-                                Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""));
+                                Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""), stats.getCorpus());
                                 currentFiletaxonomy.add(currentFiletaxonomyElement);
 //                                Tax taxonomy = new Tax();
 //                                currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
                             }
-                        } else if (qName.equals("bibl")) {
-							// before proceeding to read this file, make sure that taxonomy filters are a match
-							taxonomyMatch = true;
+                        } else if (qName.equalsIgnoreCase("catRef")) {
+                            // get value from attribute target
+                            Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
 
-						}
+                            if (tax != null && !tax.getValue().equals("dedup:nodup")) {
+                                // keep only taxonomy properties
+                                Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).split(":")[1], stats.getCorpus());
+                                currentFiletaxonomy.add(currentFiletaxonomyElement);
+//                                Tax taxonomy = new Tax();
+//                                currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
+                            }
+
+
+
+
+
+//							if (parseTaxonomy && elementName.equalsIgnoreCase("catRef")) {
+//								HashMap<String, String> atts = extractAttributes(startElement);
+//								String debug = "";
+//
+//								String tax = startElement.getAttributeByName(QName.valueOf("target"))
+//										.getValue()
+//										.replace("#", "");
+//
+//								if (tax.indexOf(':') >= 0) {
+//									tax = tax.split(":")[1];
+//								}
+//								resultTaxonomy.add(tax);
+//							} else if (parseTaxonomy && elementName.equalsIgnoreCase("term")) {
+//								String tax = startElement.getAttributeByName(QName.valueOf("ref"))
+//										.getValue()
+//										.replace("#", "");
+//
+//								resultTaxonomy.add(tax);
+//							} else if (!parseTaxonomy && headTags.contains(elementName)) {
+//								String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
+//								resultFilters.get(elementName).add(tagContent);
+//							}
+
+
+
+
+
+                        } else if (qName.equals("bibl")) {
+                            // before proceeding to read this file, make sure that taxonomy filters are a match
+                            taxonomyMatch = true;
+
+                        } else if (qName.equals("text")){
+                            taxonomyMatch = true;
+                        }
                         break;
 
                     case XMLStreamConstants.CHARACTERS:
                         Characters characters = event.asCharacters();
 
+
                         // "word" node value
                         if (inWord) {
                             String word = characters.getData();
+//							if (word.equals("Banovec")){
+//								System.out.println("Test");
+//							}
                             sentence.add(createWord(word, lemma, msd, word, stats.getFilter()));
                             inWord = false;
                         }
-                        if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
+                        if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation) {
+//                        if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
                             String punctuation = characters.getData();
                             sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter()));
                             inPunctuation = false;
@@ -870,16 +1026,16 @@ public class XML_processing {
 
                         // parser reached end of the current sentence
                         if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
-							if (stats.getFilter().getNgramValue() == 0){
-								int numSentenceParts = 0;
-								for(Word w : sentence){
-									int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
-									numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
-								}
-								stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
-							} else if(stats.getFilter().getNgramValue() >= 1) {
-								stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
-							}
+                            if (stats.getFilter().getNgramValue() == 0){
+                                int numSentenceParts = 0;
+                                for(Word w : sentence){
+                                    int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
+                                    numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
+                                }
+                                stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
+                            } else if(stats.getFilter().getNgramValue() >= 1) {
+                                stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
+                            }
 
                             // add sentence to corpus if it passes filters
                             sentence = runFilters(sentence, stats.getFilter());
@@ -906,7 +1062,8 @@ public class XML_processing {
                             }
                         }
                         // fallback
-                        else if (endElement.getName().getLocalPart().equalsIgnoreCase("div")) {
+                        else if (endElement.getName().getLocalPart().equalsIgnoreCase("div") &&
+                                stats.getCorpus().getCorpusType() == CorpusType.SSJ500K) {
                             // join corpus and stats
                             fj(corpus, stats);
                             corpus.clear();
@@ -914,23 +1071,34 @@ public class XML_processing {
                             currentFiletaxonomy = new ArrayList<>();
 //                            currentFiletaxonomyLong = new ArrayList<>();
                         } else if (endElement.getName().getLocalPart().equals("bibl")) {
-							// before proceeding to read this file, make sure that taxonomy filters are a match
+                            // before proceeding to read this file, make sure that taxonomy filters are a match
 
-							if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
-								currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
+                            if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
+                                currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
 
-								if (currentFiletaxonomy.isEmpty()) {
-									// taxonomies don't match so stop
+                                if (currentFiletaxonomy.isEmpty()) {
+                                    // taxonomies don't match so stop
 //									return false;
-									taxonomyMatch = false;
+                                    taxonomyMatch = false;
 //									System.out.println("TEST");
-								}
-							}
-						}
+                                }
+                            }
+                        } else if (endElement.getName().getLocalPart().equals("text")){
+                            taxonomyMatch = false;
+                        }
 
                         break;
                 }
             }
+            if (corpus.size() > 0) {
+                fj(corpus, stats);
+                // empty the current corpus, since we don't need the data anymore
+                corpus.clear();
+
+                // TODO: if (stats.isUseDB()) {
+                // 	stats.storeTmpResultsToDB();
+                // }
+            }
         } catch (FileNotFoundException | XMLStreamException e) {
             e.printStackTrace();
         } finally {
@@ -946,150 +1114,183 @@ public class XML_processing {
         return true;
     }
 
-	@SuppressWarnings("Duplicates")
-	public static boolean readXMLGos(String path, StatisticsNew stats) {
-		boolean inWord = false;
+    @SuppressWarnings("Duplicates")
+    public static boolean readXMLGos(String path, StatisticsNew stats) {
+        boolean inWord = false;
         boolean inPunctuation = false;
-		boolean inOrthDiv = false;
-		boolean computeForOrth = stats.getCorpus().isGosOrthMode();
-		boolean inSeparatedWord = false;
-		ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
+        boolean inOrthDiv = false;
+        boolean computeForOrth = stats.getCorpus().isGosOrthMode();
+        boolean inSeparatedWord = false;
+        ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
 //		ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
-		String lemma = "";
-		String msd = "";
+        String lemma = "";
+        String msd = "";
 
-		List<Word> sentence = new ArrayList<>();
-		List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
-		Map<String, List<Word>> GOSCorpusHM = new ConcurrentHashMap<>();
-		String GOSCorpusHMKey = "";
-		String sentenceDelimiter = "seg";
-		int wordIndex = 0;
+        List<Word> sentence = new ArrayList<>();
+        List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
+        Map<String, List<Word>> GOSCorpusHM = new ConcurrentHashMap<>();
+        String GOSCorpusHMKey = "";
+        String sentenceDelimiter = "seg";
+        int wordIndex = 0;
 
-		String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm
+        String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm
 
-		XMLEventReader eventReader = null;
 
-		boolean includeFile = true;
 
-		try {
-			XMLInputFactory factory = XMLInputFactory.newInstance();
-			eventReader = factory.createXMLEventReader(new FileInputStream(path));
+        int numLines = 0;
+        int lineNum = 0;
+        progress.set(0.0);
+        if(!isCollocability) {
+            startTime = new Date();
+        }
+        // get number of lines
+        try {
+            XMLInputFactory factory = XMLInputFactory.newInstance();
+            XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
 
-			// created hashmap to combine words with normalized words
+            while (eventReader.hasNext())
+            {
+                eventReader.next();
+                numLines ++;
+                // Loop just in case the file is > Long.MAX_VALUE or skip() decides to not read the entire file
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (XMLStreamException e) {
+            e.printStackTrace();
+        }
 
-			while (eventReader.hasNext()) {
-				XMLEvent event = eventReader.nextEvent();
-				// System.out.print(String.format("%s", event.toString().replaceAll("\\['http://www.tei-c.org/ns/1.0'\\]::", "")));
 
-				switch (event.getEventType()) {
-					case XMLStreamConstants.START_ELEMENT:
-						StartElement startElement = event.asStartElement();
-						String qName = startElement.getName().getLocalPart();
 
-						if (qName.equals("div")) {
-							HashMap<String, String> atts = extractAttributes(startElement);
+        XMLEventReader eventReader = null;
+        boolean includeFile = true;
+        try {
+            XMLInputFactory factory = XMLInputFactory.newInstance();
+            eventReader = factory.createXMLEventReader(new FileInputStream(path));
 
-							if (atts.keySet().contains("type")) {
-								inOrthDiv = atts.get("type").equals("orth");
-							}
-						}
+            // created hashmap to combine words with normalized words
 
-						// "word" node
-						if (qName.equals("w")) {
-							// check that it's not a type
-							HashMap<String, String> atts = extractAttributes(startElement);
+            while (eventReader.hasNext()) {
+                int percentage = (int) (lineNum * 100.0 / numLines);
+                if(progress.get() < percentage) {
+                    progress.set(percentage);
+                }
+                if(isCancelled) {
+                    return false;
+                }
+                lineNum ++;
+                XMLEvent event = eventReader.nextEvent();
+                // System.out.print(String.format("%s", event.toString().replaceAll("\\['http://www.tei-c.org/ns/1.0'\\]::", "")));
 
-							if (!atts.containsKey("type")) {
-								inWord = true;
+                switch (event.getEventType()) {
+                    case XMLStreamConstants.START_ELEMENT:
+                        StartElement startElement = event.asStartElement();
+                        String qName = startElement.getName().getLocalPart();
 
-								if (atts.containsKey("msd")) {
-									msd = atts.get("msd");
+                        if (qName.equals("div")) {
+                            HashMap<String, String> atts = extractAttributes(startElement);
 
-								}
-								if (atts.containsKey("lemma")) {
-									lemma = atts.get("lemma");
-								}
-								//
-								// if (!inOrthDiv) {
-								// 	msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
-								// 	lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
-								// }
-							} else if (atts.containsKey("type") && atts.get("type").equals("separated")) {
+                            if (atts.keySet().contains("type")) {
+                                inOrthDiv = atts.get("type").equals("orth");
+                            }
+                        }
+
+                        // "word" node
+                        if (qName.equals("w")) {
+                            // check that it's not a type
+                            HashMap<String, String> atts = extractAttributes(startElement);
+
+                            if (!atts.containsKey("type")) {
+                                inWord = true;
+
+                                if (atts.containsKey("msd")) {
+                                    msd = atts.get("msd");
+
+                                }
+                                if (atts.containsKey("lemma")) {
+                                    lemma = atts.get("lemma");
+                                }
+                                //
+                                // if (!inOrthDiv) {
+                                // 	msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
+                                // 	lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
+                                // }
+                            } else if (atts.containsKey("type") && atts.get("type").equals("separated")) {
                                 inSeparatedWord = true;
                             }
 
-							// }
-						}
-						// taxonomy node
-						else if (qName.equalsIgnoreCase("catRef")) {
-							// there are some term nodes at the beginning that are of no interest to us
-							// they differ by not having the attribute "ref", so test will equal null
-							Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
+                            // }
+                        }
+                        // taxonomy node
+                        else if (qName.equalsIgnoreCase("catRef")) {
+                            // there are some term nodes at the beginning that are of no interest to us
+                            // they differ by not having the attribute "ref", so test will equal null
+                            Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
 
-							if (tax != null) {
-								// keep only taxonomy properties
-								Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()));
-								currentFiletaxonomy.add(currentFiletaxonomyElement);
+                            if (tax != null) {
+                                // keep only taxonomy properties
+                                Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()), stats.getCorpus());
+                                currentFiletaxonomy.add(currentFiletaxonomyElement);
 //								Tax taxonomy = new Tax();
 //								currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
-							}
-						} else if (qName.equalsIgnoreCase("div")) {
-							gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
-						} else if (qName.equalsIgnoreCase("seg")) {
-							HashMap<String, String> atts = extractAttributes(startElement);
+                            }
+                        } else if (qName.equalsIgnoreCase("div")) {
+                            gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
+                        } else if (qName.equalsIgnoreCase("seg")) {
+                            HashMap<String, String> atts = extractAttributes(startElement);
 
-							if (atts.keySet().contains("id")) {
-							    if (inOrthDiv) {
+                            if (atts.keySet().contains("id")) {
+                                if (inOrthDiv) {
                                     GOSCorpusHMKey = atts.get("id") + ".norm";
                                 } else {
                                     GOSCorpusHMKey = atts.get("id");
                                 }
-							} else {
-								System.out.println("No attribute \"id\"");
-							}
-						}
-						break;
+                            } else {
+                                System.out.println("No attribute \"id\"");
+                            }
+                        }
+                        break;
 
-					case XMLStreamConstants.CHARACTERS:
-						// "word" node value
-						if (inWord) {
+                    case XMLStreamConstants.CHARACTERS:
+                        // "word" node value
+                        if (inWord) {
 //						    if (GOSCorpusHMKey.equals("gos.028-0108.norm") && wordIndex > 8){
 //                                System.out.println(wordIndex);
 //                            }
-							// if algorithm is in orthodox part add new word to sentence
-							if (inOrthDiv){
+                            // if algorithm is in orthodox part add new word to sentence
+                            if (inOrthDiv){
 //								GOSCorpusHM.put(GOSCorpusHMKey, sentence);
-								String word = "";
-								Characters characters = event.asCharacters();
-								sentence.add(createWord(characters.getData(), "", "", "", stats.getFilter()));
-							// if algorithm is in normalized part find orthodox word and add other info to it
-							} else {
-								Characters characters = event.asCharacters();
+                                String word = "";
+                                Characters characters = event.asCharacters();
+                                sentence.add(createWord(characters.getData(), "", "", "", stats.getFilter()));
+                                // if algorithm is in normalized part find orthodox word and add other info to it
+                            } else {
+                                Characters characters = event.asCharacters();
 //								System.out.println(wordIndex);
 //								System.out.println(GOSCorpusHMKey + " " + lemma + " " + wordIndex);
-								if (wordIndex < GOSCorpusHM.get(GOSCorpusHMKey).size()) {
-									Word currentWord = GOSCorpusHM.get(GOSCorpusHMKey).get(wordIndex);
-									currentWord.setLemma(lemma, stats.getFilter().getWordParts());
-									currentWord.setMsd(msd, stats.getFilter().getWordParts());
-									currentWord.setNormalizedWord(characters.getData(), stats.getFilter().getWordParts());
+                                if (wordIndex < GOSCorpusHM.get(GOSCorpusHMKey).size()) {
+                                    Word currentWord = GOSCorpusHM.get(GOSCorpusHMKey).get(wordIndex);
+                                    currentWord.setLemma(lemma, stats.getFilter().getWordParts());
+                                    currentWord.setMsd(msd, stats.getFilter().getWordParts());
+                                    currentWord.setNormalizedWord(characters.getData(), stats.getFilter().getWordParts());
 
-									wordIndex += 1;
+                                    wordIndex += 1;
 
                                     // when a word is separated from one to many we have to create these duplicates
                                     if (inSeparatedWord){
                                         GOSCorpusHM.get(GOSCorpusHMKey).add(wordIndex, createWord(currentWord.getWord(stats.getFilter().getWordParts()),
                                                 "", "", "", stats.getFilter()));
                                     }
-								} //else {
+                                } //else {
 //								    System.out.println("Error");
 //                                }
-							}
+                            }
 
-						}
-						break;
+                        }
+                        break;
 
-					case XMLStreamConstants.END_ELEMENT:
-						EndElement endElement = event.asEndElement();
+                    case XMLStreamConstants.END_ELEMENT:
+                        EndElement endElement = event.asEndElement();
 
                         if (endElement.getName().getLocalPart().equals("w")) {
                             if (inWord){
@@ -1102,174 +1303,407 @@ public class XML_processing {
                             }
                         }
 
-						// parser reached end of the current sentence
-						if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
-							if (inOrthDiv){
-							    // add sentence to corpus
-								GOSCorpusHM.put(GOSCorpusHMKey, sentence);
-							} else {
+                        // parser reached end of the current sentence
+                        if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
+                            if (inOrthDiv){
+                                // add sentence to corpus
+                                GOSCorpusHM.put(GOSCorpusHMKey, sentence);
+                            } else {
 
 
                                 sentence = GOSCorpusHM.remove(GOSCorpusHMKey);
-								if (stats.getFilter().getNgramValue() == 0){
-									int numSentenceParts = 0;
-									for(Word w : sentence){
-										int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
-										numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
-									}
-									stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
-								} else if(stats.getFilter().getNgramValue() >= 1) {
-									stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
-								}
+                                if (stats.getFilter().getNgramValue() == 0){
+                                    int numSentenceParts = 0;
+                                    for(Word w : sentence){
+                                        int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
+                                        numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
+                                    }
+                                    stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
+                                } else if(stats.getFilter().getNgramValue() >= 1) {
+                                    stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
+                                }
 
-								// add sentence to corpus if it passes filters
-								if (includeFile && !ValidationUtil.isEmpty(sentence)) {
+                                // add sentence to corpus if it passes filters
+                                if (includeFile && !ValidationUtil.isEmpty(sentence)) {
 //									for(Word w : sentence) {
 //										if (w.getW1().equals("")) {
 //											System.out.println("HERE!!!");
 //										}
 //									}
-									sentence = runFilters(sentence, stats.getFilter());
+                                    sentence = runFilters(sentence, stats.getFilter());
 //									for(Word w : sentence) {
 //										if (w.getW1().equals("")) {
 //											System.out.println("HERE!!!");
 //										}
 //									}
-									corpus.add(new Sentence(sentence, currentFiletaxonomy));
-								}
+                                    corpus.add(new Sentence(sentence, currentFiletaxonomy));
+                                }
 
 
 
-								wordIndex = 0;
+                                wordIndex = 0;
 
 
 
-								/* Invoke Fork-Join when we reach maximum limit of
-								 * sentences (because we can't read everything to
-								 * memory) or we reach the end of the file.
-								 */
-								if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
-									fj(corpus, stats);
-									// empty the current corpus, since we don't need
-									// the data anymore
-									corpus.clear();
-								}
-							}
+                                /* Invoke Fork-Join when we reach maximum limit of
+                                 * sentences (because we can't read everything to
+                                 * memory) or we reach the end of the file.
+                                 */
+                                if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
+                                    fj(corpus, stats);
+                                    // empty the current corpus, since we don't need
+                                    // the data anymore
+                                    corpus.clear();
+                                }
+                            }
                             // start a new sentence
                             sentence = new ArrayList<>();
 
 
-						} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
-							// before proceeding to read this file, make sure that taxonomy filters are a match
-							if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
-								currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
+                        } else if (endElement.getName().getLocalPart().equals("teiHeader")) {
+                            // before proceeding to read this file, make sure that taxonomy filters are a match
+                            if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
+                                currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
 
-								// disregard this entry if taxonomies don't match
-								includeFile = !currentFiletaxonomy.isEmpty();
+                                // disregard this entry if taxonomies don't match
+                                includeFile = !currentFiletaxonomy.isEmpty();
 
 //								currentFiletaxonomy = new ArrayList<>();
-							}
-						}
+                            }
+                        }
 
-						// backup
-						else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
-							fj(corpus, stats);
-							corpus.clear();
+                        // backup
+                        else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
+                            fj(corpus, stats);
+                            corpus.clear();
 
                             currentFiletaxonomy = new ArrayList<>();
 //                            currentFiletaxonomyLong = new ArrayList<>();
-						}
+                        }
 
-						break;
-				}
-			}
-		} catch (FileNotFoundException | XMLStreamException e) {
-			e.printStackTrace();
-		} finally {
-			if (eventReader != null) {
-				try {
-					eventReader.close();
-				} catch (XMLStreamException e) {
-					logger.error("closing stream", e);
-				} catch (Exception e) {
-					logger.error("general error", e);
-				}
-			}
-		}
+                        break;
+                }
+            }
+        } catch (FileNotFoundException | XMLStreamException e) {
+            e.printStackTrace();
+        } finally {
+            if (eventReader != null) {
+                try {
+                    eventReader.close();
+                } catch (XMLStreamException e) {
+                    logger.error("closing stream", e);
+                } catch (Exception e) {
+                    logger.error("general error", e);
+                }
+            }
+        }
 
-		return true;
-	}
+        return true;
+    }
 
-	/**
-	 * Runs the sentence through some filters, so we don't do calculations when unnecessary.
-	 * Filters:
-	 * <ol>
-	 * <li><b>Ngrams:</b> omit sentences that are shorter than the ngram value (e.g. 3 gram of a single word sentence)</li>
-	 * <li><b>Letter ngrams:</b> omit words that are shorter than the specified string length (e.g. combinations of 3 letters when the word consists of only 2 letters)</li>
-	 * </ol>
-	 *
-	 * @return Empty sentence (if fails 1.) or a sentence with some words removed (2.)
-	 */
-	private static List<Word> runFilters(List<Word> sentence, Filter filter) {
-		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
-			// ngram level: if not 0 must be less than or equal to number of words in this sentence.
-			if (filter.getNgramValue() > 0 && filter.getNgramValue() > sentence.size()) {
-				return new ArrayList<>();
-			}
+    @SuppressWarnings("Duplicates")
+    public static boolean readVERT(String path, StatisticsNew stats) {
+        // taxonomy corpora
+//		HashSet<String> resultTaxonomy = new HashSet<>();
 
-			// if we're calculating values for letters, omit words that are shorter than string length
-			if (filter.getNgramValue() == 0) {
-				sentence.removeIf(w -> (filter.getCalculateFor() == CalculateFor.WORD && w.getWord(filter.getWordParts()).length() < filter.getStringLength())
-						|| (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma(filter.getWordParts()).length() < filter.getStringLength()));
-			}
-		}
 
-		return sentence;
-	}
+        // regi path
+        String regiPath = path.substring(0, path.length()-4) + "regi";
 
-	private static HashMap<String, String> extractAttributes(StartElement se) {
-		Iterator attributesIt = se.getAttributes();
-		HashMap<String, String> atts = new HashMap<>();
+        LineIterator regiIt;
+        int wordIndex = -1;
+        int lemmaIndex = -1;
+        int msdIndex = -1;
+        boolean slovene = false;
+        try {
+            // read regi file
+            regiIt = FileUtils.lineIterator(new File(regiPath), "UTF-8");
+            try {
+                boolean insideHeader = false;
+                int attributeIndex = 0;
+                while (regiIt.hasNext()) {
+                    String line = regiIt.nextLine();
 
-		while (attributesIt.hasNext()) {
-			Attribute a = (Attribute) attributesIt.next();
-			atts.put(a.getName().getLocalPart(), a.getValue());
-		}
+                    if (line.length() >= 9 && line.substring(0, 9).equals("ATTRIBUTE")) {
+                        // split over "\" "
+                        String[] split = line.split(" ");
+                        if (split[1].equals("word") && wordIndex == -1){
+                            wordIndex = attributeIndex;
+                        } else if (split[1].equals("lempos") && lemmaIndex == -1){
+                            lemmaIndex = attributeIndex;
+                        } else if (split[1].equals("tag") && msdIndex == -1){
+                            msdIndex = attributeIndex;
+                        }
+                        attributeIndex ++;
+                        if (wordIndex >= 0 && lemmaIndex >= 0 && msdIndex >= 0){
+                            break;
+                        }
+                    } else if (line.length() >= 8 && line.substring(0, 8).equals("LANGUAGE")) {
+                        String[] split = line.split(" ");
+                        if (split[1].equals("\"Slovenian\"")){
+                            slovene = true;
+                        }
+                    }
+                }
+            } finally {
+                LineIterator.closeQuietly(regiIt);
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
 
-		return atts;
-	}
+        int numLines = 0;
+        // get number of lines
+        try (FileReader input = new FileReader(path);
+             LineNumberReader count = new LineNumberReader(input)
+        )
+        {
+            while (count.skip(Long.MAX_VALUE) > 0)
+            {
+                // Loop just in case the file is > Long.MAX_VALUE or skip() decides to not read the entire file
+            }
 
-	public static Word createWord(String word, String lemma, String msd, String normalizedWord, Filter f){
-		List<String> wString = new ArrayList<>();
-		if (f.getWordParts().contains(CalculateFor.WORD))
-			wString.add(word);
-		if (f.getWordParts().contains(CalculateFor.LEMMA))
-			wString.add(lemma);
-		if (f.getWordParts().contains(CalculateFor.MORPHOSYNTACTIC_SPECS))
-			wString.add(msd);
-		if (f.getWordParts().contains(CalculateFor.NORMALIZED_WORD))
-			wString.add(normalizedWord);
+            numLines = count.getLineNumber() + 1;                                    // +1 because line index starts at 0
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
 
-		// find appropriate strings and put them in word
-		Word w;
+        LineIterator it;
 
-		switch (f.getWordParts().size()) {
-			case 1:
-				w = new Word1(wString.get(0));
-				break;
-			case 2:
-				w = new Word2(wString.get(0), wString.get(1));
-				break;
-			case 3:
-				w = new Word3(wString.get(0), wString.get(1), wString.get(2));
-				break;
-			case 4:
-				w = new Word4(wString.get(0), wString.get(1), wString.get(2), wString.get(3));
-				break;
-			default:
-				w = null;
+        ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
+        boolean inParagraph = false;
+        boolean inSentence = false;
+        boolean taxonomyMatch = true;
+        int lineNum = 0;
+        int numSentences = 0;
+        int numSentencesLimit = 1000;
+        List<Word> sentence = new ArrayList<>();
+        List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT);
 
-		}
-		return w;
-	}
+        progress.set(0.0);
+        if(!isCollocability) {
+            startTime = new Date();
+        }
+        try {
+            it = FileUtils.lineIterator(new File(path), "UTF-8");
+            try {
+                boolean insideHeader = false;
+
+                while (it.hasNext()) {
+                    int percentage = (int) (lineNum * 100.0 / numLines);
+                    if(progress.get() < percentage) {
+                        progress.set(percentage);
+                    }
+                    if(isCancelled) {
+                        return false;
+                    }
+                    lineNum ++;
+                    String line = it.nextLine();
+                    // beginning tags
+
+                    // taxonomy
+                    if (line.length() > 4 && line.substring(1, 5).equals("text")) {
+                        String[] split = line.split("\" ");
+                        currentFiletaxonomy = new ArrayList<>();
+
+                        boolean medium = false;
+                        boolean type = false;
+                        boolean proofread = false;
+                        for (String el : split) {
+                            String[] attribute = el.split("=\"");
+                            boolean idsPresent = false;
+                            if (attribute[0].equals("medium_id") && !attribute[1].equals("-")) {
+                                Taxonomy currentFiletaxonomyElement = Taxonomy.factory(attribute[1], stats.getCorpus());
+                                currentFiletaxonomy.add(currentFiletaxonomyElement);
+                                medium = true;
+                            } else if (attribute[0].equals("type_id") && !attribute[1].equals("-")) {
+                                Taxonomy currentFiletaxonomyElement = Taxonomy.factory(attribute[1], stats.getCorpus());
+                                currentFiletaxonomy.add(currentFiletaxonomyElement);
+                                type = true;
+                            } else if (attribute[0].equals("proofread_id") && !attribute[1].equals("-")) {
+                                Taxonomy currentFiletaxonomyElement = Taxonomy.factory(attribute[1], stats.getCorpus());
+                                currentFiletaxonomy.add(currentFiletaxonomyElement);
+                                proofread = true;
+                            }
+                            if (attribute[0].equals("medium") && !attribute[1].equals("-") && !medium) {
+                                Taxonomy currentFiletaxonomyElement = Taxonomy.factory(attribute[1], stats.getCorpus());
+                                currentFiletaxonomy.add(currentFiletaxonomyElement);
+                            } else if (attribute[0].equals("type") && !attribute[1].equals("-") && !type) {
+                                Taxonomy currentFiletaxonomyElement = Taxonomy.factory(attribute[1], stats.getCorpus());
+                                currentFiletaxonomy.add(currentFiletaxonomyElement);
+                            } else if (attribute[0].equals("proofread") && !attribute[1].equals("-") && !attribute[1].equals("-\">") && !proofread) {
+                                Taxonomy currentFiletaxonomyElement = Taxonomy.factory(attribute[1], stats.getCorpus());
+                                currentFiletaxonomy.add(currentFiletaxonomyElement);
+                            }
+
+                        }
+                        taxonomyMatch = true;
+                        if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
+                            currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
+
+                            if (currentFiletaxonomy.isEmpty()) {
+                                // taxonomies don't match so don't save
+                                taxonomyMatch = false;
+                            }
+                        }
+
+                    }
+//					else if((line.length() >= 3 && line.substring(0, 2).equals("<p") && line.substring(line.length() - 1, line.length()).equals(">")) ||
+//							(line.length() >= 3 && line.substring(0, 3).equals("<ab") && line.substring(line.length() - 1, line.length()).equals(">"))){
+//						inParagraph = true;
+//					} else if((line.length() == 4 && line.equals("</p>")) || (line.length() == 5 && line.equals("</ab>"))){
+//						inParagraph = false;
+//					}
+                    else if(line.length() >= 3 && line.substring(0, 2).equals("<s") && line.substring(line.length() - 1, line.length()).equals(">")){
+                        inSentence = true;
+                    } else if(line.length() == 4 && line.equals("</s>")){
+                        inSentence = false;
+
+                        if (stats.getFilter().getNgramValue() == 0){
+                            int numSentenceParts = 0;
+                            for(Word w : sentence){
+                                int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
+                                numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
+                            }
+                            stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
+                        } else if(stats.getFilter().getNgramValue() >= 1) {
+                            stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
+                        }
+
+                        sentence = runFilters(sentence, stats.getFilter());
+
+                        if (!ValidationUtil.isEmpty(sentence) && taxonomyMatch) {
+                            corpus.add(new Sentence(sentence, currentFiletaxonomy));
+                        }
+
+                        if (numSentences == numSentencesLimit) {
+                            fj(corpus, stats);
+                            corpus.clear();
+                            numSentences = 0;
+                        } else {
+                            numSentences ++;
+                        }
+
+                        // and start a new one
+                        sentence = new ArrayList<>();
+
+//						corpus.add(new Sentence(sentence, currentFiletaxonomy));
+                    } else if(!(line.charAt(0) == '<' && line.charAt(line.length() - 1) == '>') && inSentence){
+//					} else if(!(line.charAt(0) == '<' && line.charAt(line.length() - 1) == '>') && inSentence && inParagraph){
+                        String[] split = line.split("\t");
+                        if(slovene) {
+                            if (split[lemmaIndex].length() > 2 && split[lemmaIndex].charAt(split[lemmaIndex].length() - 2) == '-' && Character.isAlphabetic(split[lemmaIndex].charAt(split[lemmaIndex].length() - 1)) &&
+                                    !split[lemmaIndex].substring(split[lemmaIndex].length() - 2, split[lemmaIndex].length()).equals("-u")) {
+                                Word word = createWord(split[wordIndex], split[lemmaIndex].substring(0, split[lemmaIndex].length() - 2), split[msdIndex], split[wordIndex], stats.getFilter());
+                                sentence.add(word);
+                            } else if (stats.getFilter().getNotePunctuations() && (split[lemmaIndex].length() <= 2 || (split[lemmaIndex].charAt(split[lemmaIndex].length() - 2) != '-' && !Character.isAlphabetic(split[lemmaIndex].charAt(split[lemmaIndex].length() - 1))))) {
+                                Word word = createWord(split[wordIndex], split[lemmaIndex], split[msdIndex], split[wordIndex], stats.getFilter());
+                                sentence.add(word);
+                            } else if (split[lemmaIndex].length() > 2 && !split[lemmaIndex].substring(split[lemmaIndex].length() - 2, split[lemmaIndex].length()).equals("-u") ||
+                                    stats.getFilter().getNotePunctuations()) {
+                                Word word = createWord(split[wordIndex], split[lemmaIndex].substring(0, split[lemmaIndex].length() - 2), split[msdIndex], split[wordIndex], stats.getFilter());
+                                sentence.add(word);
+                            }
+                        } else {
+                            if (split[lemmaIndex].length() > 2 && split[lemmaIndex].charAt(split[lemmaIndex].length() - 2) == '-' && Character.isAlphabetic(split[lemmaIndex].charAt(split[lemmaIndex].length() - 1)) &&
+                                    !split[lemmaIndex].substring(split[lemmaIndex].length() - 2, split[lemmaIndex].length()).equals("-z")) {
+                                Word word = createWord(split[wordIndex], split[lemmaIndex].substring(0, split[lemmaIndex].length() - 2), split[msdIndex], split[wordIndex], stats.getFilter());
+                                sentence.add(word);
+                            } else if (stats.getFilter().getNotePunctuations() && (split[lemmaIndex].length() <= 2 || (split[lemmaIndex].charAt(split[lemmaIndex].length() - 2) != '-' && !Character.isAlphabetic(split[lemmaIndex].charAt(split[lemmaIndex].length() - 1))))) {
+                                Word word = createWord(split[wordIndex], split[lemmaIndex], split[msdIndex], split[wordIndex], stats.getFilter());
+                                sentence.add(word);
+                            } else if (split[lemmaIndex].length() > 2 && !split[lemmaIndex].substring(split[lemmaIndex].length() - 2, split[lemmaIndex].length()).equals("-z") ||
+                                    stats.getFilter().getNotePunctuations()) {
+                                Word word = createWord(split[wordIndex], split[lemmaIndex].substring(0, split[lemmaIndex].length() - 2), split[msdIndex], split[wordIndex], stats.getFilter());
+                                sentence.add(word);
+                            }
+                        }
+                    }
+                }
+                if (corpus.size() > 0) {
+                    fj(corpus, stats);
+                    corpus.clear();
+                }
+            } finally {
+                LineIterator.closeQuietly(it);
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+//		resultTaxonomy.remove("-");
+        return true;
+    }
+
+    /**
+     * Runs the sentence through some filters, so we don't do calculations when unnecessary.
+     * Filters:
+     * <ol>
+     * <li><b>Ngrams:</b> omit sentences that are shorter than the ngram value (e.g. 3 gram of a single word sentence)</li>
+     * <li><b>Letter ngrams:</b> omit words that are shorter than the specified string length (e.g. combinations of 3 letters when the word consists of only 2 letters)</li>
+     * </ol>
+     *
+     * @return Empty sentence (if fails 1.) or a sentence with some words removed (2.)
+     */
+    private static List<Word> runFilters(List<Word> sentence, Filter filter) {
+        if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
+            // ngram level: if not 0 must be less than or equal to number of words in this sentence.
+            if (filter.getNgramValue() > 0 && filter.getNgramValue() > sentence.size()) {
+                return new ArrayList<>();
+            }
+
+            // if we're calculating values for letters, omit words that are shorter than string length
+            if (filter.getNgramValue() == 0) {
+                sentence.removeIf(w -> (filter.getCalculateFor() == CalculateFor.WORD && w.getWord(filter.getWordParts()).length() < filter.getStringLength())
+                        || (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma(filter.getWordParts()).length() < filter.getStringLength()));
+            }
+        }
+
+        return sentence;
+    }
+
+    private static HashMap<String, String> extractAttributes(StartElement se) {
+        Iterator attributesIt = se.getAttributes();
+        HashMap<String, String> atts = new HashMap<>();
+
+        while (attributesIt.hasNext()) {
+            Attribute a = (Attribute) attributesIt.next();
+            atts.put(a.getName().getLocalPart(), a.getValue());
+        }
+
+        return atts;
+    }
+
+    public static Word createWord(String word, String lemma, String msd, String normalizedWord, Filter f){
+        List<String> wString = new ArrayList<>();
+        if (f.getWordParts().contains(CalculateFor.WORD))
+            wString.add(word);
+        if (f.getWordParts().contains(CalculateFor.LEMMA))
+            wString.add(lemma);
+        if (f.getWordParts().contains(CalculateFor.MORPHOSYNTACTIC_SPECS))
+            wString.add(msd);
+        if (f.getWordParts().contains(CalculateFor.NORMALIZED_WORD))
+            wString.add(normalizedWord);
+
+        // find appropriate strings and put them in word
+        Word w;
+
+        switch (f.getWordParts().size()) {
+            case 1:
+                w = new Word1(wString.get(0));
+                break;
+            case 2:
+                w = new Word2(wString.get(0), wString.get(1));
+                break;
+            case 3:
+                w = new Word3(wString.get(0), wString.get(1), wString.get(2));
+                break;
+            case 4:
+                w = new Word4(wString.get(0), wString.get(1), wString.get(2), wString.get(3));
+                break;
+            default:
+                w = null;
+
+        }
+        return w;
+    }
 }
diff --git a/src/main/java/alg/inflectedJOS/InflectedJOSCount.java b/src/main/java/alg/inflectedJOS/InflectedJOSCount.java
index b4f02bf..3f8c480 100755
--- a/src/main/java/alg/inflectedJOS/InflectedJOSCount.java
+++ b/src/main/java/alg/inflectedJOS/InflectedJOSCount.java
@@ -74,7 +74,7 @@
 //	// public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
 //	// 	for (Sentence s : corpus) {
 //	// 		// disregard if wrong taxonomy
-//	// 		if (!(s.getTaxonomy().startsWith(taxonomy))) {
+//	// 		if (!(s.getObservableListTaxonomy().startsWith(taxonomy))) {
 //	// 			continue;
 //	// 		}
 //	//
@@ -122,7 +122,7 @@
 //	static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
 //		for (Sentence s : corpus) {
 //			// disregard if wrong taxonomy
-////			if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
+////			if (taxonomy != null && !(s.getObservableListTaxonomy().startsWith(taxonomy))) {
 ////				continue;
 ////			}
 //
diff --git a/src/main/java/alg/ngram/Ngrams.java b/src/main/java/alg/ngram/Ngrams.java
index f8e42da..e93dcf9 100755
--- a/src/main/java/alg/ngram/Ngrams.java
+++ b/src/main/java/alg/ngram/Ngrams.java
@@ -432,7 +432,7 @@ public class Ngrams {
 //		    String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
 //            key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
 //			stats.updateTaxonomyResults(new MultipleHMKeys1(key),
-//										stats.getCorpus().getTaxonomy());
+//										stats.getCorpus().getObservableListTaxonomy());
 
 
 			ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();
diff --git a/src/main/java/alg/word/WordCount.java b/src/main/java/alg/word/WordCount.java
index 5ee2160..b6f4cbc 100755
--- a/src/main/java/alg/word/WordCount.java
+++ b/src/main/java/alg/word/WordCount.java
@@ -91,7 +91,7 @@ import data.Word;
 
 //	private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
 //		for (Sentence s : corpus) {
-//			if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
+//			if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
 //				List<String> sentence = new ArrayList<>(s.getWords().size());
 //				List<Word> filteredWords = new ArrayList<>();
 //
@@ -122,7 +122,7 @@ import data.Word;
 
 //	private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
 //		for (Sentence s : corpus) {
-//			if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
+//			if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
 //				List<String> sentence = new ArrayList<>(s.getWords().size());
 //
 //				if (stats.getCf() == CalculateFor.LEMMA) {
diff --git a/src/main/java/data/Corpus.java b/src/main/java/data/Corpus.java
index dfbd710..1899a4f 100755
--- a/src/main/java/data/Corpus.java
+++ b/src/main/java/data/Corpus.java
@@ -27,7 +27,8 @@ public class Corpus {
 	private File chosenCorpusLocation;
 	private Collection<File> detectedCorpusFiles;
 	boolean headerRead;
-	private ObservableList<String> taxonomy; // if gigafida or gos
+	private ArrayList<Taxonomy> taxonomy; // if gigafida or gos
+	private Taxonomy taxonomyTotal;
 	private HashMap<String, ObservableList<String>> solarFilters; // if solar
 	private HashMap<String, HashSet<String>> solarFiltersForXML; // if solar - used while parsing xml
 	private boolean gosOrthMode;
@@ -36,6 +37,7 @@ public class Corpus {
 
 	public Corpus() {
 		validationErrors = new ArrayList<>();
+		setTotal();
 	}
 
 	public CorpusType getCorpusType() {
@@ -82,9 +84,25 @@ public class Corpus {
 		this.headerRead = headerRead;
 	}
 
-	public ObservableList<String> getTaxonomy() {
+	public Taxonomy getTotal() {
+		return taxonomyTotal;
+	}
+
+	public void setTotal() {
+		taxonomyTotal = new Taxonomy("Total", false);
+	}
+
+	public ArrayList<Taxonomy> getTaxonomy() {
 		return taxonomy;
 	}
+
+	public ObservableList<String> getObservableListTaxonomy() {
+		ArrayList<String> al = new ArrayList<>();
+		for (Taxonomy t : this.taxonomy){
+			al.add(t.toLongNameString());
+		}
+		return FXCollections.observableArrayList(al);
+	}
 //
 //	public ObservableList<String> getFormattedTaxonomy() {
 //	    ArrayList<String> al = Tax.getTaxonomyFormatted(new ArrayList<>(taxonomy), corpusType);
@@ -92,7 +110,10 @@ public class Corpus {
 //	}
 
 	public void setTaxonomy(ObservableList<String> taxonomy) {
-		this.taxonomy = taxonomy;
+		this.taxonomy = new ArrayList<>();
+		for(String t : taxonomy){
+			this.taxonomy.add(new Taxonomy(t, true));
+		}
 		logger.info("Corpus.set: ", taxonomy);
 	}
 
@@ -151,7 +172,8 @@ public class Corpus {
 		if (!headerRead && corpusType != null) {
 			// if user didn't opt into reading the headers, set default taxonomy or solar filters
 			if (Tax.getCorpusTypesWithTaxonomy().contains(corpusType)) {
-				taxonomy = Tax.getTaxonomyForComboBox(corpusType);
+				Tax.getTaxonomyForComboBox(corpusType);
+				setTaxonomy(Tax.getTaxonomyForComboBox(corpusType));
 			} else if (corpusType == CorpusType.SOLAR && solarFilters == null) {
 				setSolarFilters(SolarFilters.getFiltersForComboBoxes());
 			}
diff --git a/src/main/java/data/CorpusType.java b/src/main/java/data/CorpusType.java
index 7cac659..e14c4df 100755
--- a/src/main/java/data/CorpusType.java
+++ b/src/main/java/data/CorpusType.java
@@ -2,6 +2,7 @@ package data;
 
 public enum CorpusType {
 	GIGAFIDA("Gigafida", "gigafida"),
+	GIGAFIDA2("Gigafida2.0", "gigafida2.0"),
 	CCKRES("ccKres ", "cckres"),
 	SOLAR("Šolar", "šolar"),
 	GOS("GOS", "gos"),
diff --git a/src/main/java/data/StatisticsNew.java b/src/main/java/data/StatisticsNew.java
index cc29469..9301c64 100755
--- a/src/main/java/data/StatisticsNew.java
+++ b/src/main/java/data/StatisticsNew.java
@@ -10,7 +10,6 @@ import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.regex.Pattern;
-import java.util.stream.Collectors;
 
 import gui.I18N;
 import org.apache.commons.lang3.StringUtils;
@@ -51,17 +50,17 @@ public class StatisticsNew {
 		this.corpus = corpus;
 		this.filter = filter;
 		this.taxonomyResult = new ConcurrentHashMap<>();
-		this.taxonomyResult.put(Taxonomy.TOTAL, new ConcurrentHashMap<>());
+		this.taxonomyResult.put(corpus.getTotal(), new ConcurrentHashMap<>());
 		this.collocability = new ConcurrentHashMap<>();
 		this.uniGramTaxonomyOccurrences = new ConcurrentHashMap<>();
-		this.uniGramTaxonomyOccurrences.put(Taxonomy.TOTAL, new AtomicLong(0L));
+		this.uniGramTaxonomyOccurrences.put(corpus.getTotal(), new AtomicLong(0L));
 
 
         // create table for counting word occurrences per taxonomies
-		if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
+		if (this.corpus.getObservableListTaxonomy() != null && filter.getDisplayTaxonomy()) {
 			if (this.filter.getTaxonomy().isEmpty()) {
-				for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
-					this.taxonomyResult.put(Taxonomy.factoryLongName(this.corpus.getTaxonomy().get(i)), new ConcurrentHashMap<>());
+				for (int i = 0; i < this.corpus.getObservableListTaxonomy().size(); i++) {
+					this.taxonomyResult.put(Taxonomy.factoryLongName(this.corpus.getObservableListTaxonomy().get(i), corpus), new ConcurrentHashMap<>());
 				}
 			} else {
 				for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
@@ -234,14 +233,14 @@ public class StatisticsNew {
 		removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
 
 		// if no results and nothing to save, return false
-		if (!(taxonomyResult.get(Taxonomy.TOTAL).size() > 0)) {
+		if (!(taxonomyResult.get(corpus.getTotal()).size() > 0)) {
 			analysisProducedResults = false;
 			return false;
 		} else {
 			analysisProducedResults = true;
 		}
 
-		stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get(Taxonomy.TOTAL), Util.getValidInt(limit))));
+		stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get(corpus.getTotal()), Util.getValidInt(limit))));
 		Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
 		return true;
 	}
@@ -253,14 +252,14 @@ public class StatisticsNew {
 		if (minimalTaxonomy == 1)
 			return;
 		int occurances;
-		for (MultipleHMKeys key : taxonomyResult.get(Taxonomy.TOTAL).keySet()){
+		for (MultipleHMKeys key : taxonomyResult.get(corpus.getTotal()).keySet()){
 			occurances = 0;
 			for (Taxonomy columnNameKey : taxonomyResult.keySet()){
-				if(!columnNameKey.equals(Taxonomy.TOTAL) && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1)
+				if(!columnNameKey.equals(corpus.getTotal()) && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1)
 					occurances++;
 			}
 			if(occurances < minimalTaxonomy){
-				taxonomyResult.get(Taxonomy.TOTAL).remove(key);
+				taxonomyResult.get(corpus.getTotal()).remove(key);
 			}
 		}
 	}
@@ -271,8 +270,8 @@ public class StatisticsNew {
 	private void removeMinimalOccurrences(Integer minimalOccurrences) {
 		if (minimalOccurrences == 0)
 			return;
-		for (MultipleHMKeys key : taxonomyResult.get(Taxonomy.TOTAL).keySet()){
-			if(taxonomyResult.get(Taxonomy.TOTAL).get(key).intValue() < minimalOccurrences){
+		for (MultipleHMKeys key : taxonomyResult.get(corpus.getTotal()).keySet()){
+			if(taxonomyResult.get(corpus.getTotal()).get(key).intValue() < minimalOccurrences){
 				for (Taxonomy t : taxonomyResult.keySet()){
 					taxonomyResult.get(t).remove(key);
 				}
@@ -349,7 +348,7 @@ public class StatisticsNew {
 	}
 
 	public void updateUniGramOccurrences(int amount, ArrayList<Taxonomy> taxonomy){
-        uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).set(uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).longValue() + amount);
+        uniGramTaxonomyOccurrences.get(corpus.getTotal()).set(uniGramTaxonomyOccurrences.get(corpus.getTotal()).longValue() + amount);
         for (Taxonomy t : taxonomy){
             if (uniGramTaxonomyOccurrences.get(t) != null){
                 uniGramTaxonomyOccurrences.get(t).set(uniGramTaxonomyOccurrences.get(t).longValue() + amount);
@@ -360,15 +359,15 @@ public class StatisticsNew {
     }
 
     public Map<Taxonomy, AtomicLong> getUniGramOccurrences(){
-//	    return uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).longValue();
+//	    return uniGramTaxonomyOccurrences.get(corpus.getTotal()).longValue();
 	    return uniGramTaxonomyOccurrences;
     }
 
 	public void updateTaxonomyResults(MultipleHMKeys o, List<Taxonomy> taxonomy) {
 		for (Taxonomy key : taxonomyResult.keySet()) {
 			// first word should have the same taxonomy as others
-			if (key.equals(Taxonomy.TOTAL) || taxonomy.contains(key)) {
-//			if (key.equals(Taxonomy.TOTAL) || taxonomy != null && taxonomy.contains(key)) {
+			if (key.equals(corpus.getTotal()) || taxonomy.contains(key)) {
+//			if (key.equals(corpus.getTotal()) || taxonomy != null && taxonomy.contains(key)) {
 				// if taxonomy not in map and in this word
 				AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));
 
@@ -607,7 +606,7 @@ public class StatisticsNew {
 //					sortedTaxonomyString.add(t);
 //				}
 //				getTaxonomyForTaxonomyResult
-				tax = Tax.getTaxonomyForTaxonomyResult(corpus.getCorpusType(), taxonomyResult.keySet());
+				tax = Tax.getTaxonomyForTaxonomyResult(corpus, taxonomyResult.keySet());
 			}
 
 //			String sep = "";
@@ -618,11 +617,11 @@ public class StatisticsNew {
 				}
 
 //				info.put(sep = sep + " ", s);
-				if (uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s)) == null) {
+				if (uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s, corpus)) == null) {
 					info.put(s, "");
 					continue;
 				}
-				int n = uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s)).intValue();
+				int n = uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s, corpus)).intValue();
 				if (n == 0) {
 					info.put(s, "");
 				} else {
@@ -662,11 +661,11 @@ public class StatisticsNew {
 
         // count number of all words
         long N = 0;
-        for(AtomicLong a : oneWordTaxonomyResult.get(Taxonomy.TOTAL).values()){
+        for(AtomicLong a : oneWordTaxonomyResult.get(corpus.getTotal()).values()){
             N += a.longValue();
         }
 
-        for(MultipleHMKeys hmKey : taxonomyResult.get(Taxonomy.TOTAL).keySet()) {
+        for(MultipleHMKeys hmKey : taxonomyResult.get(corpus.getTotal()).keySet()) {
 //            String[] splitedString = hmKey.getK1().split("\\s+");
 
             long sum_fwi =0L;
@@ -674,15 +673,15 @@ public class StatisticsNew {
 
             for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){
 //            	System.out.println(smallHmKey.getK1());
-                sum_fwi += oneWordTaxonomyResult.get(Taxonomy.TOTAL).get(smallHmKey).longValue();
-                mul_fwi *= oneWordTaxonomyResult.get(Taxonomy.TOTAL).get(smallHmKey).longValue();
+                sum_fwi += oneWordTaxonomyResult.get(corpus.getTotal()).get(smallHmKey).longValue();
+                mul_fwi *= oneWordTaxonomyResult.get(corpus.getTotal()).get(smallHmKey).longValue();
             }
 //            String t = hmKey.getK1();
 //            if(hmKey.getK1().equals("v Slovenija")){
 //                System.out.println("TEST");
 //
 //            }
-            double O = (double)taxonomyResult.get(Taxonomy.TOTAL).get(hmKey).longValue();
+            double O = (double)taxonomyResult.get(corpus.getTotal()).get(hmKey).longValue();
             double n = (double)filter.getNgramValue();
             double E = (double)mul_fwi / Math.pow(N, n - 1);
             if (collocabilityMap.keySet().contains(Collocability.DICE)){
diff --git a/src/main/java/data/Tax.java b/src/main/java/data/Tax.java
index 305fcb7..3e708e8 100755
--- a/src/main/java/data/Tax.java
+++ b/src/main/java/data/Tax.java
@@ -10,7 +10,7 @@ import javafx.collections.ObservableList;
 public class Tax {
 	private static LinkedHashMap<String, String> GIGAFIDA_TAXONOMY;
 	private static LinkedHashMap<String, String> GOS_TAXONOMY;
-	private static final HashSet<CorpusType> corpusTypesWithTaxonomy = new HashSet<>(Arrays.asList(CorpusType.GIGAFIDA, CorpusType.GOS, CorpusType.CCKRES, CorpusType.SSJ500K, CorpusType.VERT));
+	private static final HashSet<CorpusType> corpusTypesWithTaxonomy = new HashSet<>(Arrays.asList(CorpusType.GIGAFIDA, CorpusType.GOS, CorpusType.CCKRES, CorpusType.SSJ500K, CorpusType.GIGAFIDA2, CorpusType.VERT));
 
 	static {
 		// GIGAFIDA ----------------------------
@@ -104,7 +104,7 @@ public class Tax {
 	public static ObservableList<String> getTaxonomyForComboBox(CorpusType corpusType, HashSet<String> foundTax) {
 		LinkedHashMap<String, String> tax = new LinkedHashMap<>();
 
-		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K || corpusType == CorpusType.GIGAFIDA2) {
 			tax = GIGAFIDA_TAXONOMY;
 		} else if (corpusType == CorpusType.GOS) {
 			tax = GOS_TAXONOMY;
@@ -143,13 +143,13 @@ public class Tax {
 	/**
 	 * Returns taxonomy names only for items found in headers
 	 */
-	public static ArrayList<String> getTaxonomyForTaxonomyResult(CorpusType corpusType, Set<Taxonomy> foundTax) {
+	public static ArrayList<String> getTaxonomyForTaxonomyResult(Corpus corpus, Set<Taxonomy> foundTax) {
 		LinkedHashMap<String, String> tax = new LinkedHashMap<>();
 		Set<Taxonomy> foundTaxHS= new HashSet<>(foundTax);
 
-		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
+		if (corpus.getCorpusType() == CorpusType.GIGAFIDA || corpus.getCorpusType() == CorpusType.CCKRES || corpus.getCorpusType() == CorpusType.SSJ500K || corpus.getCorpusType() == CorpusType.GIGAFIDA2) {
 			tax = GIGAFIDA_TAXONOMY;
-		} else if (corpusType == CorpusType.GOS) {
+		} else if (corpus.getCorpusType() == CorpusType.GOS) {
 			tax = GOS_TAXONOMY;
 		}
 
@@ -161,7 +161,7 @@ public class Tax {
 		for(Taxonomy e : foundTaxHS){
 			String[] elList = e.toString().split("\\.");
 			for(int i = 1; i < elList.length - 1; i++){
-				Taxonomy candidate = Taxonomy.factory(String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i)));
+				Taxonomy candidate = Taxonomy.factory(String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i)), corpus);
 				genFoundTax.add(candidate);
 			}
 		}
@@ -186,7 +186,7 @@ public class Tax {
 
 		// assures same relative order
 		for (String t : tax.keySet()) {
-			if (foundTaxHS.contains(Taxonomy.factory(t))) {
+			if (foundTaxHS.contains(Taxonomy.factory(t, corpus))) {
 				taxForCombo.add(tax.get(t));
 			}
 		}
@@ -263,13 +263,19 @@ public class Tax {
 	public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<Taxonomy> taxonomy) {
 		LinkedHashMap<String, String> tax = new LinkedHashMap<>();
 
-		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
+		ArrayList<String> result = new ArrayList<>();
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K || corpusType == CorpusType.GIGAFIDA2) {
 			tax = GIGAFIDA_TAXONOMY;
 		} else if (corpusType == CorpusType.GOS) {
 			tax = GOS_TAXONOMY;
+		} else if (corpusType == CorpusType.VERT) {
+			for (Taxonomy t : taxonomy) {
+				result.add(t.toLongNameString());
+			}
+			return result;
 		}
 
-		ArrayList<String> result = new ArrayList<>();
+
 
 		for (Taxonomy t : taxonomy) {
 			result.add(tax.get(t.toString()));
diff --git a/src/main/java/data/Taxonomy.java b/src/main/java/data/Taxonomy.java
index ce4ae7d..a4da582 100755
--- a/src/main/java/data/Taxonomy.java
+++ b/src/main/java/data/Taxonomy.java
@@ -5,7 +5,7 @@ import java.util.concurrent.ConcurrentHashMap;
 
 import javafx.collections.ObservableList;
 
-public enum Taxonomy {
+enum TaxonomyEnum {
 	TOTAL("Total", "Total"),
 
 	// GOS
@@ -85,7 +85,7 @@ public enum Taxonomy {
 	private final String name;
 	private final String longName;
 
-	Taxonomy(String name, String longName) {
+	TaxonomyEnum(String name, String longName) {
 		this.name = name;
 		this.longName = longName;
 	}
@@ -98,7 +98,7 @@ public enum Taxonomy {
 		return this.longName;
 	}
 
-	public static Taxonomy factory(String tax) {
+	public static TaxonomyEnum factory(String tax) {
 		if (tax != null) {
 			// GOS
 			if (DISKURZ.toString().equals(tax)) {
@@ -289,7 +289,7 @@ public enum Taxonomy {
 		return null;
 	}
 
-	public static Taxonomy factoryLongName(String tax) {
+	public static TaxonomyEnum factoryLongName(String tax) {
 		if (tax != null) {
 			// GOS
 			if (DISKURZ.toLongNameString().equals(tax)) {
@@ -477,11 +477,15 @@ public enum Taxonomy {
 			}
 
 		}
+//		return new Taxonomy(tax, tax);
+		System.out.println("2.");
+		System.out.println(tax);
+
 		return null;
 	}
 
-	public static ArrayList<Taxonomy> taxonomySelected(Taxonomy disjointTaxonomy) {
-		ArrayList<Taxonomy> r = new ArrayList<>();
+	public static ArrayList<TaxonomyEnum> taxonomySelected(TaxonomyEnum disjointTaxonomy) {
+		ArrayList<TaxonomyEnum> r = new ArrayList<>();
 
 		System.out.println(disjointTaxonomy);
 		if(disjointTaxonomy.equals(DISKURZ)){
@@ -628,9 +632,9 @@ public enum Taxonomy {
 		return r;
 	}
 
-	public static ArrayList<Taxonomy> taxonomyDeselected(Taxonomy disjointTaxonomy){
-		ArrayList<Taxonomy> r = new ArrayList<>();
-		Map<Taxonomy, Taxonomy> connections = new ConcurrentHashMap<>();
+	public static ArrayList<TaxonomyEnum> taxonomyDeselected(TaxonomyEnum disjointTaxonomy){
+		ArrayList<TaxonomyEnum> r = new ArrayList<>();
+		Map<TaxonomyEnum, TaxonomyEnum> connections = new ConcurrentHashMap<>();
 		connections.put(DISKURZ_JAVNI, DISKURZ);
 		connections.put(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI, DISKURZ_JAVNI);
 		connections.put(DISKURZ_RAZVEDRILNI, DISKURZ_JAVNI);
@@ -685,7 +689,7 @@ public enum Taxonomy {
 		connections.put(FT_DA, FT_LEKTORIRANO);
 		connections.put(FT_NE, FT_LEKTORIRANO);
 
-		Taxonomy currentTaxonomy = disjointTaxonomy;
+		TaxonomyEnum currentTaxonomy = disjointTaxonomy;
 		r.add(currentTaxonomy);
 		while(connections.containsKey(currentTaxonomy)){
 			currentTaxonomy = connections.get(currentTaxonomy);
@@ -695,29 +699,36 @@ public enum Taxonomy {
 		return r;
 	}
 
-	public static ArrayList<Taxonomy> convertStringListToTaxonomyList(ObservableList<String> stringList){
+	public static ArrayList<TaxonomyEnum> convertStringListToTaxonomyList(ObservableList<String> stringList, Corpus corpus){
+		System.out.println("1.");
 		System.out.println(stringList);
-		ArrayList<Taxonomy> taxonomyList = new ArrayList<>();
+		ArrayList<TaxonomyEnum> taxonomyList = new ArrayList<>();
 
 //		System.out.println("INTERESTING STUFF");
 //		System.out.println(stringList);
 		for (String e : stringList) {
-			taxonomyList.add(factoryLongName(e));
+			for (Taxonomy t : corpus.getTaxonomy()){
+				if (t.toLongNameString().equals(e)) {
+					taxonomyList.add(t.getTaxonomyEnum());
+				}
+			}
 		}
 //		System.out.println(taxonomyList);
 //		System.out.println("-----------------");
 		return taxonomyList;
 	}
 
-	public static void modifyingTaxonomy(ArrayList<Taxonomy> taxonomy, ArrayList<Taxonomy> checkedItemsTaxonomy, Corpus corpus){
+	public static void modifyingTaxonomy(ArrayList<TaxonomyEnum> taxonomy, ArrayList<TaxonomyEnum> checkedItemsTaxonomy, Corpus corpus){
 		// get taxonomies that were selected/deselected by user
-//		System.out.println(taxonomy);
-//		System.out.println(checkedItemsTaxonomy);
+		System.out.println("Print here:");
+		System.out.println(taxonomy);
+		System.out.println(checkedItemsTaxonomy);
+		System.out.println("-------------");
 
-		Set<Taxonomy> disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy);
+		Set<TaxonomyEnum> disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy);
 		if (taxonomy != null) {
 			disjointTaxonomies.addAll(taxonomy);
-			for (Taxonomy s : checkedItemsTaxonomy) {
+			for (TaxonomyEnum s : checkedItemsTaxonomy) {
 				if (taxonomy.contains(s)) {
 					disjointTaxonomies.remove(s);
 				}
@@ -725,11 +736,11 @@ public enum Taxonomy {
 		}
 
 		// remove previously selected items plus remove taxonomies that are not presented in current setup
-		ArrayList<Taxonomy> disArr = new ArrayList<>(disjointTaxonomies);
+		ArrayList<TaxonomyEnum> disArr = new ArrayList<>(disjointTaxonomies);
 		int i = 0;
 		while(i < disArr.size()){
-			Taxonomy s = disArr.get(i);
-			if(!Taxonomy.convertStringListToTaxonomyList(corpus.getTaxonomy()).contains(s)){
+			TaxonomyEnum s = disArr.get(i);
+			if(!TaxonomyEnum.convertStringListToTaxonomyList(corpus.getObservableListTaxonomy(), corpus).contains(s)){
 				disjointTaxonomies.remove(s);
 				disArr.remove(s);
 //								taxonomy.remove(s);
@@ -740,14 +751,14 @@ public enum Taxonomy {
 
 
 		if (disjointTaxonomies.size() > 0) {
-			Taxonomy disjointTaxonomy = disjointTaxonomies.iterator().next();
+			TaxonomyEnum disjointTaxonomy = disjointTaxonomies.iterator().next();
 
 			// taxonomy was selected
 			if (checkedItemsTaxonomy.contains(disjointTaxonomy)) {
-				ArrayList<Taxonomy> addTaxonomies = Taxonomy.taxonomySelected(disjointTaxonomy);
+				ArrayList<TaxonomyEnum> addTaxonomies = TaxonomyEnum.taxonomySelected(disjointTaxonomy);
 				checkedItemsTaxonomy.addAll(addTaxonomies);
 			} else if (taxonomy.contains(disjointTaxonomy)) {
-				ArrayList<Taxonomy> removeTaxonomies = Taxonomy.taxonomyDeselected(disjointTaxonomy);
+				ArrayList<TaxonomyEnum> removeTaxonomies = TaxonomyEnum.taxonomyDeselected(disjointTaxonomy);
 				checkedItemsTaxonomy.removeAll(removeTaxonomies);
 			}
 		}
@@ -755,3 +766,203 @@ public enum Taxonomy {
 
 
 }
+
+public class Taxonomy {
+	private String name;
+	private String longName;
+	private TaxonomyEnum taxonomyEnum;
+
+	public Taxonomy(String tax, boolean longName) {
+		if (!longName) {
+			this.taxonomyEnum = TaxonomyEnum.factory(tax);
+		} else {
+			this.taxonomyEnum = TaxonomyEnum.factoryLongName(tax);
+		}
+		if (taxonomyEnum != null){
+			this.name = this.taxonomyEnum.toString();
+			this.longName = this.taxonomyEnum.toLongNameString();
+		} else {
+			this.name = tax;
+			this.longName = tax;
+		}
+	}
+
+	public Taxonomy(TaxonomyEnum taxonomyEnum) {
+		this.taxonomyEnum = taxonomyEnum;
+		this.name = this.taxonomyEnum.toString();
+		this.longName = this.taxonomyEnum.toLongNameString();
+
+	}
+
+//	public Taxonomy(String name, String longName) {
+//		this.name = name;
+//		this.longName = longName;
+//	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public String toLongNameString() {
+		return this.longName;
+	}
+
+	public TaxonomyEnum getTaxonomyEnum() {
+		return this.taxonomyEnum;
+	}
+
+	public static Taxonomy factory(String tax, Corpus corpus) {
+		for (Taxonomy t : corpus.getTaxonomy()){
+			if(tax.equals(t.toString()))
+				return t;
+		}
+		return null;
+//		return new Taxonomy(tax, false);
+	}
+
+	public static Taxonomy factoryLongName(String tax, Corpus corpus) {
+		for (Taxonomy t : corpus.getTaxonomy()){
+			if(tax.equals(t.toLongNameString()))
+				return t;
+		}
+		return null;
+//		return new Taxonomy(tax, true);
+	}
+
+//	public static ArrayList<Taxonomy> taxonomySelected(Taxonomy disjointTaxonomy) {
+//		ArrayList<TaxonomyEnum> rTaxonomyEnum = TaxonomyEnum.taxonomySelected(disjointTaxonomy.getTaxonomyEnum());
+//
+//		ArrayList<Taxonomy> r = new ArrayList<>();
+//
+//		for(TaxonomyEnum t : rTaxonomyEnum){
+//			r.add(new Taxonomy(t.toString(), false));
+//		}
+//
+//		return r;
+//	}
+
+	public static ArrayList<Taxonomy> taxonomyDeselected(Taxonomy disjointTaxonomy){
+//		ArrayList<TaxonomyEnum> r = new ArrayList<>();
+//		Map<TaxonomyEnum, TaxonomyEnum> connections = new ConcurrentHashMap<>();
+//		connections.put(DISKURZ_JAVNI, DISKURZ);
+//		connections.put(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI, DISKURZ_JAVNI);
+//		connections.put(DISKURZ_RAZVEDRILNI, DISKURZ_JAVNI);
+//		connections.put(DISKURZ_NEJAVNI, DISKURZ);
+//		connections.put(DISKURZ_NEZASEBNI, DISKURZ_NEJAVNI);
+//		connections.put(DISKURZ_ZASEBNI, DISKURZ_NEJAVNI);
+//		connections.put(SITUACIJA_RADIO, SITUACIJA);
+//		connections.put(SITUACIJA_TELEVIZIJA, SITUACIJA);
+//		connections.put(KANAL_OSEBNI_STIK, KANAL);
+//		connections.put(KANAL_TELEFON, KANAL);
+//		connections.put(KANAL_RADIO, KANAL);
+//		connections.put(KANAL_TELEVIZIJA, KANAL);
+//
+//		connections.put(SSJ_KNJIZNO, SSJ_TISK);
+//		connections.put(SSJ_LEPOSLOVNO, SSJ_KNJIZNO);
+//		connections.put(SSJ_STROKOVNO, SSJ_KNJIZNO);
+//		connections.put(SSJ_PERIODICNO, SSJ_TISK);
+//		connections.put(SSJ_CASOPIS, SSJ_PERIODICNO);
+//		connections.put(SSJ_REVIJA, SSJ_PERIODICNO);
+//		connections.put(SSJ_DRUGO, SSJ_TISK);
+//
+//		connections.put(FT_P_GOVORNI, FT_P_PRENOSNIK);
+//		connections.put(FT_P_ELEKTRONSKI, FT_P_PRENOSNIK);
+//		connections.put(FT_P_PISNI, FT_P_PRENOSNIK);
+//		connections.put(FT_P_OBJAVLJENO, FT_P_PISNI);
+//		connections.put(FT_P_KNJIZNO, FT_P_OBJAVLJENO);
+//		connections.put(FT_P_PERIODICNO, FT_P_OBJAVLJENO);
+//		connections.put(FT_P_CASOPISNO, FT_P_OBJAVLJENO);
+//		connections.put(FT_P_DNEVNO, FT_P_CASOPISNO);
+//		connections.put(FT_P_VECKRAT_TEDENSKO, FT_P_CASOPISNO);
+//		connections.put(FT_P_CASOPISNO_TEDENSKO, FT_P_CASOPISNO);
+//		connections.put(FT_P_REVIALNO, FT_P_PERIODICNO);
+//		connections.put(FT_P_TEDENSKO, FT_P_REVIALNO);
+//		connections.put(FT_P_STIRINAJSTDNEVNO, FT_P_REVIALNO);
+//		connections.put(FT_P_MESECNO, FT_P_REVIALNO);
+//		connections.put(FT_P_REDKEJE_KOT_MESECNO, FT_P_REVIALNO);
+//		connections.put(FT_P_OBCASNO, FT_P_REVIALNO);
+//		connections.put(FT_P_NEOBJAVLJENO, FT_P_PISNI);
+//		connections.put(FT_P_JAVNO, FT_P_NEOBJAVLJENO);
+//		connections.put(FT_P_INTERNO, FT_P_NEOBJAVLJENO);
+//		connections.put(FT_P_ZASEBNO, FT_P_NEOBJAVLJENO);
+//		connections.put(FT_UMETNOSTNA, FT_ZVRST);
+//		connections.put(FT_PESNISKA, FT_UMETNOSTNA);
+//		connections.put(FT_PROZNA, FT_UMETNOSTNA);
+//		connections.put(FT_DRAMSKA, FT_UMETNOSTNA);
+//		connections.put(FT_NEUMETNOSTNA, FT_ZVRST);
+//		connections.put(FT_STROKOVNA, FT_NEUMETNOSTNA);
+//		connections.put(FT_HID, FT_STROKOVNA);
+//		connections.put(FT_NIT, FT_STROKOVNA);
+//		connections.put(FT_NESTROKOVNA, FT_NEUMETNOSTNA);
+//		connections.put(FT_PRAVNA, FT_NEUMETNOSTNA);
+//		connections.put(FT_DA, FT_LEKTORIRANO);
+//		connections.put(FT_NE, FT_LEKTORIRANO);
+//
+//		TaxonomyEnum currentTaxonomy = disjointTaxonomy;
+//		r.add(currentTaxonomy);
+//		while(connections.containsKey(currentTaxonomy)){
+//			currentTaxonomy = connections.get(currentTaxonomy);
+//			r.add(currentTaxonomy);
+//		}
+//		Collections.reverse(r);
+//		return r;
+		return null;
+	}
+
+	public static ArrayList<Taxonomy> convertStringListToTaxonomyList(ObservableList<String> stringList, Corpus corpus){
+		ArrayList<Taxonomy> taxonomyList = new ArrayList<>();
+
+		for (String e : stringList) {
+			for (Taxonomy t : corpus.getTaxonomy()){
+				if (t.toLongNameString().equals(e)) {
+					taxonomyList.add(t);
+				}
+			}
+		}
+		return taxonomyList;
+	}
+
+	public static ArrayList<TaxonomyEnum> taxonomyToTaxonomyEnum(ArrayList<Taxonomy> taxonomy){
+	    System.out.println(taxonomy);
+	    if (taxonomy == null) {
+	    	return null;
+		}
+		ArrayList<TaxonomyEnum> r = new ArrayList<>();
+		for (Taxonomy t : taxonomy){
+			if (t.taxonomyEnum == null){
+				return null;
+			}
+			r.add(t.taxonomyEnum);
+		}
+		return r;
+	}
+
+	public static ArrayList<Taxonomy> taxonomyEnumToTaxonomy(ArrayList<TaxonomyEnum> taxonomy, Corpus corpus){
+//		ArrayList<Taxonomy> r = new ArrayList<>();
+//		for (TaxonomyEnum t : taxonomy){
+//			r.add(new Taxonomy(t));
+//		}
+//		return r;
+		ArrayList<Taxonomy> r = new ArrayList<>();
+		for (TaxonomyEnum te : taxonomy){
+			for (Taxonomy t : corpus.getTaxonomy()){
+				if (t.taxonomyEnum.equals(te)) {
+					r.add(t);
+					break;
+				}
+			}
+
+		}
+		return r;
+	}
+
+	public static ArrayList<Taxonomy> modifyingTaxonomy(ArrayList<Taxonomy> taxonomy, ObservableList<String> checkedItems, Corpus corpus){
+		ArrayList<TaxonomyEnum> checkedItemsTaxonomy = TaxonomyEnum.convertStringListToTaxonomyList(checkedItems, corpus);
+		if (checkedItemsTaxonomy != null && corpus.getCorpusType() != CorpusType.VERT) {
+			TaxonomyEnum.modifyingTaxonomy(Taxonomy.taxonomyToTaxonomyEnum(taxonomy), checkedItemsTaxonomy, corpus);
+			return taxonomyEnumToTaxonomy(checkedItemsTaxonomy, corpus);
+		} else {
+			return convertStringListToTaxonomyList(checkedItems, corpus);
+		}
+	}
+}
diff --git a/src/main/java/gui/CharacterAnalysisTab.java b/src/main/java/gui/CharacterAnalysisTab.java
index e068884..c286bad 100755
--- a/src/main/java/gui/CharacterAnalysisTab.java
+++ b/src/main/java/gui/CharacterAnalysisTab.java
@@ -1,8 +1,11 @@
 package gui;
 
+import alg.XML_processing;
 import data.*;
 import javafx.application.HostServices;
-import javafx.beans.binding.Bindings;
+import javafx.beans.InvalidationListener;
+import javafx.beans.Observable;
+import javafx.beans.property.ReadOnlyDoubleWrapper;
 import javafx.beans.value.ChangeListener;
 import javafx.beans.value.ObservableValue;
 import javafx.collections.FXCollections;
@@ -25,7 +28,6 @@ import java.util.regex.Pattern;
 
 import static alg.XML_processing.readXML;
 import static gui.GUIController.showAlert;
-import static gui.Messages.*;
 
 @SuppressWarnings("Duplicates")
 public class CharacterAnalysisTab {
@@ -160,6 +162,7 @@ public class CharacterAnalysisTab {
 	private boolean useDb;
 	private HostServices hostService;
 	private ListChangeListener<String> taxonomyListener;
+	private InvalidationListener progressBarListener;
 
 	private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA"};
 	private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY));
@@ -241,53 +244,56 @@ public class CharacterAnalysisTab {
 		msd = new ArrayList<>();
 
 		// taxonomy
-		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
-			if (taxonomyListener != null){
-				taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
-			}
-
-			taxonomyListener = new ListChangeListener<String>() {
-				boolean changing = true;
-
-				@Override
-				public void onChanged(ListChangeListener.Change<? extends String> c){
-					if(changing) {
-						ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
-						ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
-
-						Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
-
-						taxonomy = new ArrayList<>();
-						taxonomy.addAll(checkedItemsTaxonomy);
-
-						taxonomyCCB.getItems().removeAll();
-						taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
-
-						//				taxonomyCCB.getCheckModel().clearChecks();
-						changing = false;
-						taxonomyCCB.getCheckModel().clearChecks();
-						for (Taxonomy t : checkedItemsTaxonomy) {
-							taxonomyCCB.getCheckModel().check(t.toLongNameString());
-						}
-						changing = true;
-						logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
-					}
-				}
-			};
-			taxonomyCCB.getCheckModel().clearChecks();
+		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
 			taxonomyCCB.setDisable(false);
-			taxonomyCCB.getItems().removeAll();
-			taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
-
-			taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
 		} else {
 			taxonomyCCB.setDisable(true);
 		}
 
+		if (taxonomyListener != null){
+			taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
+		}
+
+		taxonomyListener = new ListChangeListener<String>() {
+			boolean changing = true;
+
+			@Override
+			public void onChanged(ListChangeListener.Change<? extends String> c){
+				if(changing) {
+					ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+//						ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
+
+					ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
+
+					taxonomy = new ArrayList<>();
+					taxonomy.addAll(checkedItemsTaxonomy);
+
+					taxonomyCCB.getItems().removeAll();
+					taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
+
+					//				taxonomyCCB.getCheckModel().clearChecks();
+					changing = false;
+					taxonomyCCB.getCheckModel().clearChecks();
+					for (Taxonomy t : checkedItemsTaxonomy) {
+						taxonomyCCB.getCheckModel().check(t.toLongNameString());
+					}
+					changing = true;
+					logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+				}
+			}
+		};
+		taxonomyCCB.getCheckModel().clearChecks();
+
+		taxonomyCCB.getItems().removeAll();
+		taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
+
+		taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
+
+
 		displayTaxonomy = false;
 		displayTaxonomyChB.setSelected(false);
 		// set
-		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
+		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
 			displayTaxonomyChB.setDisable(false);
 			displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
 				displayTaxonomy = newValue;
@@ -475,7 +481,7 @@ public class CharacterAnalysisTab {
 //		if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
 //			// user changed corpus (by type) or by selection & triggered a rescan of headers
 //			// see if we read taxonomy from headers, otherwise use default values for given corpus
-//			ObservableList<String> tax = corpus.getTaxonomy();
+//			ObservableList<String> tax = corpus.getObservableListTaxonomy();
 //			taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
 //
 //			currentCorpusType = corpus.getCorpusType();
@@ -485,7 +491,7 @@ public class CharacterAnalysisTab {
 //		}
 //
 //		// see if we read taxonomy from headers, otherwise use default values for given corpus
-//		ObservableList<String> tax = corpus.getTaxonomy();
+//		ObservableList<String> tax = corpus.getObservableListTaxonomy();
 //		taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
 //		taxonomyCCB.getItems().addAll(taxonomyCCBValues);
 //
@@ -548,7 +554,7 @@ public class CharacterAnalysisTab {
 			// if calculateFor was selected for something other than a word or a lemma -> reset
 			if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
 				// if the user selected something else before selecting ngram for letters, reset that choice
-				calculateFor = CalculateFor.LEMMA;
+				calculateFor = CalculateFor.WORD;
 
 				calculateForCB.getSelectionModel().select(0);
 			}
@@ -637,16 +643,66 @@ public class CharacterAnalysisTab {
 			@SuppressWarnings("Duplicates")
 			@Override
 			protected Void call() throws Exception {
-				long i = 0;
+				if(corpusFiles.size() > 1){
+					cancel.setVisible(true);
+				}
+				int i = 0;
+//				DateFormat df = new SimpleDateFormat("hh:mm:ss");
+				Date startTime = new Date();
+				Date previousTime = new Date();
+				int remainingSeconds = -1;
 				for (File f : corpusFiles) {
-					readXML(f.toString(), statistic);
+					final int iFinal = i;
+					XML_processing xml_processing = new XML_processing();
 					i++;
 					if (isCancelled()) {
 						updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
 						break;
 					}
-					this.updateProgress(i, corpusFiles.size());
-					this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
+					if (corpusFiles.size() > 1) {
+						if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
+							remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
+							previousTime = new Date();
+						}
+						this.updateProgress(i, corpusFiles.size());
+						this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
+					} else {
+						if(progressBarListener != null) {
+							xml_processing.progressProperty().removeListener(progressBarListener);
+						}
+
+						progressBarListener = new InvalidationListener() {
+							int remainingSeconds = -1;
+							Date previousTime = new Date();
+							@Override
+							public void invalidated(Observable observable) {
+								cancel.setVisible(true);
+								if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
+									remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
+											(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
+											((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
+									previousTime = new Date();
+								}
+								xml_processing.isCancelled = isCancelled();
+								updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
+								updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
+//								updateProgress((iFinal * 100) + (double) observable, corpusFiles.size() * 100);
+							}
+						};
+//						this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
+
+
+						xml_processing.progressProperty().addListener(progressBarListener);
+
+//						xml_processing.progressProperty().addListener((obs, oldProgress, newProgress) ->
+//								updateProgress((iFinal * 100) + newProgress.doubleValue(), corpusFiles.size() * 100));
+					}
+					xml_processing.readXML(f.toString(), statistic);
+					if (isCancelled()) {
+						updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
+						break;
+					}
+//					readXML(f.toString(), statistic, this, corpusFiles.size(), startTime, previousTime, i);
 				}
 
 				return null;
@@ -703,8 +759,6 @@ public class CharacterAnalysisTab {
 			logger.info("cancel button");
 		});
 
-		cancel.setVisible(true);
-
 		final Thread thread = new Thread(task, "task");
 		thread.setDaemon(true);
 		thread.start();
diff --git a/src/main/java/gui/CorpusTab.java b/src/main/java/gui/CorpusTab.java
index 2750f4b..73e4ed7 100755
--- a/src/main/java/gui/CorpusTab.java
+++ b/src/main/java/gui/CorpusTab.java
@@ -6,11 +6,13 @@ import static gui.Messages.*;
 import static util.Util.*;
 
 import java.io.File;
+import java.io.IOException;
 import java.util.*;
 
 import javafx.scene.layout.AnchorPane;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOCase;
+import org.apache.commons.io.LineIterator;
 import org.apache.commons.io.filefilter.FileFilterUtils;
 import org.apache.commons.io.filefilter.TrueFileFilter;
 import org.apache.logging.log4j.LogManager;
@@ -205,9 +207,6 @@ public class CorpusTab {
 			// scan for xml files
 			Collection<File> corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("xml", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
 
-			corpusLocation = selectedDirectory.getAbsolutePath();
-			corpusFilesSize = String.valueOf(corpusFiles.size());
-			Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
 
 			// make sure there are corpus files in selected directory or notify the user about it
 			if (corpusFiles.size() == 0) {
@@ -215,10 +214,20 @@ public class CorpusTab {
 				corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("vert", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
 				Collection<File> corpusFilesRegi = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("regi", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
 
+
+//				if (!checkRegiFile(corpusFilesRegi)){
+//					return;
+//				}
+
 				if (corpusFiles.size() == 0){
 					logger.info("alert: ", I18N.get("message.WARNING_CORPUS_NOT_FOUND"));
 					showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_CORPUS_NOT_FOUND"), null);
+				} else if (corpusFilesRegi.size() == 0){
+						GUIController.showAlert(Alert.AlertType.ERROR, String.format(I18N.get("message.ERROR_NO_REGI_FILE_FOUND"), selectedDirectory.getAbsolutePath()));
 				} else {
+					corpusLocation = selectedDirectory.getAbsolutePath();
+					corpusFilesSize = String.valueOf(corpusFiles.size());
+					Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
 					corpusType = VERT;
 
 					corpus.setCorpusType(corpusType);
@@ -255,12 +264,17 @@ public class CorpusTab {
 				}
 
 			} else {
+				corpusLocation = selectedDirectory.getAbsolutePath();
+				corpusFilesSize = String.valueOf(corpusFiles.size());
+				Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
+
 				String chooseCorpusLabelContentTmp = detectCorpusType(corpusFiles);
 
 				if (chooseCorpusLabelContentTmp == null) {
 					logger.info("alert: ", I18N.get("message.WARNING_CORPUS_NOT_FOUND"));
 					showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_CORPUS_NOT_FOUND"), null);
 				} else {
+
 					initNewCorpus(selectedDirectory, corpusFiles);
 					Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType.toString());
 
@@ -330,6 +344,28 @@ public class CorpusTab {
 		Messages.setChooseCorpusL(chooseCorpusL, chooseCorpusLabelContent);
 	}
 
+	private boolean checkRegiFile(Collection<File> corpusFiles) {
+//		CorpusType corpusType = corpus.getCorpusType();
+//		Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
+
+
+		for (File file : corpusFiles) {
+			// try to open .regi file
+			String regiPath = file.getAbsolutePath().substring(0, file.getAbsolutePath().length() - 4) + "regi";
+			LineIterator regiIt;
+			try {
+				// read regi file
+				regiIt = FileUtils.lineIterator(new File(regiPath), "UTF-8");
+				LineIterator.closeQuietly(regiIt);
+			} catch (IOException e) {
+				GUIController.showAlert(Alert.AlertType.ERROR, String.format(I18N.get("message.ERROR_NO_REGI_FILE_FOUND"), regiPath));
+				return false;
+			}
+		}
+		return true;
+
+	}
+
 	private void readHeaderInfo() {
 		CorpusType corpusType = corpus.getCorpusType();
 		Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
@@ -339,7 +375,7 @@ public class CorpusTab {
 
 		logger.info("reading header data for ", corpusType.toString());
 
-		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.GOS || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.GOS || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K || corpusType == CorpusType.GIGAFIDA2) {
 			boolean corpusIsSplit = corpusFiles.size() > 1;
 
 			final Task<HashSet<String>> task = new Task<HashSet<String>>() {
@@ -505,26 +541,27 @@ public class CorpusTab {
 			task.setOnSucceeded(e -> {
 				ObservableList<String> readTaxonomy = Tax.getTaxonomyForComboBox(corpusType, task.getValue());
 
-				if (ValidationUtil.isEmpty(readTaxonomy)) {
-					// if no taxonomy found alert the user and keep other tabs disabled
-					logger.info("No vert filters found in headers.");
-					GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_SOLAR_FILTERS_FOUND"));
-				} else {
+	//				if (ValidationUtil.isEmpty(readTaxonomy)) {
+	//					// if no taxonomy found alert the user and keep other tabs disabled
+	//					logger.info("No vert filters found in headers.");
+	//					GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_SOLAR_FILTERS_FOUND"));
+	//				} else {
 					// set taxonomy, update label
 					corpus.setTaxonomy(readTaxonomy);
 					corpus.setHeaderRead(true);
 					Messages.setChooseCorpusL(chooseCorpusL, chooseCorpusLabelContent);
 					setResults();
 					setCorpusForAnalysis();
-				}
+	//				}
 
-				togglePiAndSetCorpusWrapper(false);
+			togglePiAndSetCorpusWrapper(false);
 
 			});
 
 			task.setOnCancelled(e -> togglePiAndSetCorpusWrapper(false));
 			task.setOnFailed(e -> togglePiAndSetCorpusWrapper(false));
 
+
 			final Thread thread = new Thread(task, "task");
 			thread.setDaemon(true);
 			thread.start();
@@ -599,7 +636,12 @@ public class CorpusTab {
 		if (title.contains(SOLAR.getNameLowerCase())) {
 			corpusType = SOLAR;
 		} else if (title.contains(GIGAFIDA.getNameLowerCase())) {
-			corpusType = GIGAFIDA;
+			String edition = XML_processing.readXMLHeaderTag(f.getAbsolutePath(), "edition").toLowerCase();
+			if (Double.valueOf(edition) < 2.0) {
+				corpusType = GIGAFIDA;
+			} else {
+				corpusType = GIGAFIDA2;
+			}
 		} else if (title.contains(CCKRES.getNameLowerCase())) {
 			corpusType = CCKRES;
 		} else if (title.contains(GOS.getNameLowerCase())) {
diff --git a/src/main/java/gui/Messages.java b/src/main/java/gui/Messages.java
index 72db5c1..a910dc7 100755
--- a/src/main/java/gui/Messages.java
+++ b/src/main/java/gui/Messages.java
@@ -114,8 +114,10 @@ public class Messages {
 					.append(String.format(I18N.get("message.NOTIFICATION_CORPUS"), chooseCorpusLabelProperties[2]));
 
 			chooseCorpusLabelContent = sb.toString();
-			chooseCorpusL.textProperty().unbind();
-			chooseCorpusL.setText(chooseCorpusLabelContent);
+			if (chooseCorpusL != null) {
+				chooseCorpusL.textProperty().unbind();
+				chooseCorpusL.setText(chooseCorpusLabelContent);
+			}
 		}
 	}
 }
diff --git a/src/main/java/gui/OneWordAnalysisTab.java b/src/main/java/gui/OneWordAnalysisTab.java
index 655f176..1665177 100755
--- a/src/main/java/gui/OneWordAnalysisTab.java
+++ b/src/main/java/gui/OneWordAnalysisTab.java
@@ -1,22 +1,23 @@
 package gui;
 
+import alg.XML_processing;
 import data.*;
 import javafx.application.HostServices;
+import javafx.beans.InvalidationListener;
+import javafx.beans.Observable;
+import javafx.beans.property.ReadOnlyDoubleWrapper;
 import javafx.beans.value.ChangeListener;
 import javafx.beans.value.ObservableValue;
-import javafx.collections.FXCollections;
 import javafx.collections.ListChangeListener;
 import javafx.collections.ObservableList;
 import javafx.concurrent.Task;
 import javafx.fxml.FXML;
-import javafx.scene.Scene;
 import javafx.scene.control.*;
 import javafx.scene.layout.AnchorPane;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.controlsfx.control.CheckComboBox;
-import org.controlsfx.control.IndexedCheckModel;
 
 import java.io.File;
 import java.io.UnsupportedEncodingException;
@@ -26,7 +27,6 @@ import java.util.regex.Pattern;
 
 import static alg.XML_processing.readXML;
 import static gui.GUIController.showAlert;
-import static gui.Messages.*;
 
 @SuppressWarnings("Duplicates")
 public class OneWordAnalysisTab {
@@ -158,6 +158,7 @@ public class OneWordAnalysisTab {
     private ListChangeListener<String> taxonomyListener;
     private ListChangeListener<String> alsoVisualizeListener;
     private ChangeListener<String> calculateForListener;
+    private InvalidationListener progressBarListener;
 
 //    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
 //    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
@@ -383,54 +384,57 @@ public class OneWordAnalysisTab {
         alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
 
         // taxonomy
-        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
-            if (taxonomyListener != null){
-                taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
-            }
-
-            taxonomyListener = new ListChangeListener<String>() {
-                public boolean changing = true;
-
-                @Override
-                public void onChanged(Change<? extends String> c) {
-                    if (changing) {
-                        ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
-                        ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
-
-                        Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
-
-                        taxonomy = new ArrayList<>();
-                        taxonomy.addAll(checkedItemsTaxonomy);
-
-                        taxonomyCCB.getItems().removeAll();
-                        taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
-
-                        //				taxonomyCCB.getCheckModel().clearChecks();
-                        changing = false;
-                        taxonomyCCB.getCheckModel().clearChecks();
-                        for (Taxonomy t : checkedItemsTaxonomy) {
-                            taxonomyCCB.getCheckModel().check(t.toLongNameString());
-                        }
-                        changing = true;
-                        logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
-                    }
-                }
-            };
-
-            taxonomyCCB.getCheckModel().clearChecks();
+        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
             taxonomyCCB.setDisable(false);
-            taxonomyCCB.getItems().removeAll();
-            taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
-
-            taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
         } else {
             taxonomyCCB.setDisable(true);
         }
 
+        if (taxonomyListener != null){
+            taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
+        }
+
+        taxonomyListener = new ListChangeListener<String>() {
+            public boolean changing = true;
+
+            @Override
+            public void onChanged(Change<? extends String> c) {
+                if (changing) {
+                    ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+//                        ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
+                    ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
+
+//                        Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
+
+                    taxonomy = new ArrayList<>();
+                    taxonomy.addAll(checkedItemsTaxonomy);
+
+                    taxonomyCCB.getItems().removeAll();
+                    taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
+
+                    //				taxonomyCCB.getCheckModel().clearChecks();
+                    changing = false;
+                    taxonomyCCB.getCheckModel().clearChecks();
+                    for (Taxonomy t : checkedItemsTaxonomy) {
+                        taxonomyCCB.getCheckModel().check(t.toLongNameString());
+                    }
+                    changing = true;
+                    logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+                }
+            }
+        };
+
+        taxonomyCCB.getCheckModel().clearChecks();
+
+        taxonomyCCB.getItems().removeAll();
+        taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
+
+        taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
+
         displayTaxonomy = false;
         displayTaxonomyChB.setSelected(false);
         // set
-        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
+        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
             displayTaxonomyChB.setDisable(false);
             displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
                 displayTaxonomy = newValue;
@@ -586,7 +590,7 @@ public class OneWordAnalysisTab {
 //        if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
 //            // user changed corpus (by type) or by selection & triggered a rescan of headers
 //            // see if we read taxonomy from headers, otherwise use default values for given corpus
-//            ObservableList<String> tax = corpus.getTaxonomy();
+//            ObservableList<String> tax = corpus.getObservableListTaxonomy();
 //            taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
 //
 //            currentCorpusType = corpus.getCorpusType();
@@ -596,7 +600,7 @@ public class OneWordAnalysisTab {
 //        }
 //
 //        // see if we read taxonomy from headers, otherwise use default values for given corpus
-//        ObservableList<String> tax = corpus.getTaxonomy();
+//        ObservableList<String> tax = corpus.getObservableListTaxonomy();
 //        taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
 //        taxonomyCCB.getItems().addAll(taxonomyCCBValues);
 //
@@ -733,22 +737,63 @@ public class OneWordAnalysisTab {
         logger.info("Started execution: ", statistic.getFilter());
 
         Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
-        boolean corpusIsSplit = corpusFiles.size() > 1;
 
         final Task<Void> task = new Task<Void>() {
             @SuppressWarnings("Duplicates")
             @Override
             protected Void call() throws Exception {
-                long i = 0;
+                if(corpusFiles.size() > 1){
+                    cancel.setVisible(true);
+                }
+                int i = 0;
+                Date startTime = new Date();
+                Date previousTime = new Date();
+                int remainingSeconds = -1;
                 for (File f : corpusFiles) {
-                    readXML(f.toString(), statistic);
+                    final int iFinal = i;
+                    XML_processing xml_processing = new XML_processing();
                     i++;
+                    if (corpusFiles.size() > 1) {
+                        if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
+                            remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
+                            previousTime = new Date();
+                        }
+                        this.updateProgress(i, corpusFiles.size());
+                        this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
+//                        if (isCancelled()) {
+//                            updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
+//                            break;
+//                        }
+                    } else {
+                        if(progressBarListener != null) {
+                            xml_processing.progressProperty().removeListener(progressBarListener);
+                        }
+
+                        progressBarListener = new InvalidationListener() {
+                            int remainingSeconds = -1;
+                            Date previousTime = new Date();
+                            @Override
+                            public void invalidated(Observable observable) {
+                                cancel.setVisible(true);
+                                if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
+                                    remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
+                                            (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
+                                            ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
+                                    previousTime = new Date();
+                                }
+                                xml_processing.isCancelled = isCancelled();
+                                updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
+                                updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
+                            }
+                        };
+
+                        xml_processing.progressProperty().addListener(progressBarListener);
+                    }
+                    xml_processing.readXML(f.toString(), statistic);
                     if (isCancelled()) {
                         updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
                         break;
                     }
-                    this.updateProgress(i, corpusFiles.size());
-                    this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
                 }
 
                 return null;
@@ -805,7 +850,6 @@ public class OneWordAnalysisTab {
             logger.info("cancel button");
         });
 
-        cancel.setVisible(true);
         final Thread thread = new Thread(task, "task");
         thread.setDaemon(true);
         thread.start();
diff --git a/src/main/java/gui/StringAnalysisTabNew2.java b/src/main/java/gui/StringAnalysisTabNew2.java
index dec4053..4e7ed97 100755
--- a/src/main/java/gui/StringAnalysisTabNew2.java
+++ b/src/main/java/gui/StringAnalysisTabNew2.java
@@ -2,21 +2,20 @@ package gui;
 
 import static alg.XML_processing.*;
 import static gui.GUIController.*;
-import static gui.Messages.*;
 
 import java.io.File;
 import java.io.UnsupportedEncodingException;
 import java.util.*;
-import java.util.concurrent.*;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicLong;
 import java.util.regex.Pattern;
 
+import alg.XML_processing;
 import javafx.application.HostServices;
+import javafx.beans.InvalidationListener;
+import javafx.beans.Observable;
+import javafx.beans.property.ReadOnlyDoubleWrapper;
 import javafx.beans.value.ChangeListener;
 import javafx.beans.value.ObservableValue;
 import javafx.scene.layout.AnchorPane;
-import org.apache.commons.lang3.SerializationUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -208,6 +207,7 @@ public class StringAnalysisTabNew2 {
     private ListChangeListener<String> alsoVisualizeListener;
     private ListChangeListener<String> collocabilityListener;
     private ChangeListener<String> calculateForListener;
+    private InvalidationListener progressBarListener;
 
 //    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
 //    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
@@ -306,13 +306,14 @@ public class StringAnalysisTabNew2 {
             notePunctuations = newValue;
             logger.info("note punctuations: ", notePunctuations);
         });
+        notePunctuationsChB.setSelected(false);
         notePunctuationsChB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_readNotePunctuationsChB")));
 
         displayTaxonomy = false;
         displayTaxonomyChB.setSelected(false);
         // set
 
-        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
+        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
             displayTaxonomyChB.setDisable(false);
             displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
                 displayTaxonomy = newValue;
@@ -515,49 +516,52 @@ public class StringAnalysisTabNew2 {
         alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
 
         // taxonomy
-        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
-            if (taxonomyListener != null){
-                taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
-            }
-
-            taxonomyListener = new ListChangeListener<String>() {
-                boolean changing = true;
-
-                @Override
-                public void onChanged(ListChangeListener.Change<? extends String> c){
-                    if(changing) {
-                        ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
-                        ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
-
-                        Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
-
-                        taxonomy = new ArrayList<>();
-                        taxonomy.addAll(checkedItemsTaxonomy);
-
-                        taxonomyCCB.getItems().removeAll();
-                        taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
-
-                        //				taxonomyCCB.getCheckModel().clearChecks();
-                        changing = false;
-                        taxonomyCCB.getCheckModel().clearChecks();
-                        for (Taxonomy t : checkedItemsTaxonomy) {
-                            taxonomyCCB.getCheckModel().check(t.toLongNameString());
-                        }
-                        changing = true;
-                        logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
-                    }
-                }
-            };
-            taxonomyCCB.getCheckModel().clearChecks();
+        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
             taxonomyCCB.setDisable(false);
-            taxonomyCCB.getItems().removeAll();
-            taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
-
-            taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
         } else {
             taxonomyCCB.setDisable(true);
         }
 
+        if (taxonomyListener != null){
+            taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
+        }
+
+        taxonomyListener = new ListChangeListener<String>() {
+            boolean changing = true;
+
+            @Override
+            public void onChanged(ListChangeListener.Change<? extends String> c){
+                if(changing) {
+                    ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+//                        ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
+//
+//                        Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
+                    ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
+
+                    taxonomy = new ArrayList<>();
+                    taxonomy.addAll(checkedItemsTaxonomy);
+
+                    taxonomyCCB.getItems().removeAll();
+                    taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
+
+                    //				taxonomyCCB.getCheckModel().clearChecks();
+                    changing = false;
+                    taxonomyCCB.getCheckModel().clearChecks();
+                    for (Taxonomy t : checkedItemsTaxonomy) {
+                        taxonomyCCB.getCheckModel().check(t.toLongNameString());
+                    }
+                    changing = true;
+                    logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+                }
+            }
+        };
+        taxonomyCCB.getCheckModel().clearChecks();
+
+        taxonomyCCB.getItems().removeAll();
+        taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
+
+        taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
+
         // skip
         skipValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
             skipValue = Integer.valueOf(newValue);
@@ -738,7 +742,7 @@ public class StringAnalysisTabNew2 {
 //        if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
 //            // user changed corpus (by type) or by selection & triggered a rescan of headers
 //            // see if we read taxonomy from headers, otherwise use default values for given corpus
-//            ObservableList<String> tax = corpus.getTaxonomy();
+//            ObservableList<String> tax = corpus.getObservableListTaxonomy();
 //            taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
 //
 //            currentCorpusType = corpus.getCorpusType();
@@ -748,7 +752,7 @@ public class StringAnalysisTabNew2 {
 //        }
 //
 //        // see if we read taxonomy from headers, otherwise use default values for given corpus
-//        ObservableList<String> tax = corpus.getTaxonomy();
+//        ObservableList<String> tax = corpus.getObservableListTaxonomy();
 //        taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
 //        taxonomyCCB.getItems().addAll(taxonomyCCBValues);
 //
@@ -913,16 +917,78 @@ public class StringAnalysisTabNew2 {
             @SuppressWarnings("Duplicates")
             @Override
             protected Void call() throws Exception {
-                long i = corpusFiles.size();
+                if(corpusFiles.size() > 1){
+                    cancel.setVisible(true);
+                }
+                int i = corpusFiles.size();
+                Date startTime = new Date();
+                Date previousTime = new Date();
+                int remainingSeconds = -1;
+                int corpusSize;
+                if (statistic.getFilter().getCollocability().size() > 0) {
+                    corpusSize = corpusFiles.size() * 2;
+                } else {
+                    corpusSize = corpusFiles.size();
+                }
                 for (File f : corpusFiles) {
-                    readXML(f.toString(), statisticsOneGrams);
+                    final int iFinal = i;
+                    XML_processing xml_processing = new XML_processing();
                     i++;
-                    this.updateProgress(i, corpusFiles.size() * 2);
-                    if (statistic.getFilter().getCollocability().size() > 0) {
-                        this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
+                    if (corpusFiles.size() > 1) {
+                        if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
+                            remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
+                            previousTime = new Date();
+                        }
+                        this.updateProgress(i, corpusSize);
+                        this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
+//                        if (isCancelled()) {
+//                            updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
+//                            break;
+//                        }
                     } else {
-                        this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
+                        if(progressBarListener != null) {
+                            xml_processing.progressProperty().removeListener(progressBarListener);
+                        }
+
+                        progressBarListener = new InvalidationListener() {
+                            int remainingSeconds = -1;
+                            Date previousTime = new Date();
+                            @Override
+                            public void invalidated(Observable observable) {
+                                cancel.setVisible(true);
+                                if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
+                                    remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
+                                            (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
+                                            ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
+//                                    System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
+//                                    System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)));
+//                                    System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
+//                                    System.out.println(remainingSeconds);
+                                    previousTime = new Date();
+                                }
+                                xml_processing.isCancelled = isCancelled();
+                                updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
+                                updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusSize, f.getName(), remainingSeconds));
+                            }
+                        };
+
+                        xml_processing.progressProperty().addListener(progressBarListener);
                     }
+                    xml_processing.isCollocability = true;
+                    xml_processing.readXML(f.toString(), statisticsOneGrams);
+                    xml_processing.isCollocability = false;
+                    if (isCancelled()) {
+                        updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
+                        break;
+                    }
+//                    readXML(f.toString(), statisticsOneGrams);
+//                    i++;
+//                    this.updateProgress(i, corpusFiles.size() * 2);
+//                    if (statistic.getFilter().getCollocability().size() > 0) {
+//                        this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
+//                    } else {
+//                        this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
+//                    }
                 }
 
                 return null;
@@ -998,8 +1064,6 @@ public class StringAnalysisTabNew2 {
             task.cancel();
 //            logger.info("cancel button");
         });
-
-//        cancel.setVisible(true);
         return task;
     }
 
@@ -1009,28 +1073,90 @@ public class StringAnalysisTabNew2 {
 //        Task<Void> task_collocability = null;
 
         Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
-        boolean corpusIsSplit = corpusFiles.size() > 1;
 
         final Task<Void> task = new Task<Void>() {
             @SuppressWarnings("Duplicates")
             @Override
             protected Void call() throws Exception {
-                long i = 0;
+                if(corpusFiles.size() > 1){
+                    cancel.setVisible(true);
+                }
+                int i = 0;
+                Date startTime = new Date();
+                Date previousTime = new Date();
+                int remainingSeconds = -1;
+                int corpusSize;
+                if (statistic.getFilter().getCollocability().size() > 0) {
+                    corpusSize = corpusFiles.size() * 2;
+                } else {
+                    corpusSize = corpusFiles.size();
+                }
                 for (File f : corpusFiles) {
-                    readXML(f.toString(), statistic);
+                    final int iFinal = i;
+                    XML_processing xml_processing = new XML_processing();
                     i++;
+                    if (corpusFiles.size() > 1) {
+                        if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
+                            remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
+                            previousTime = new Date();
+                        }
+                        this.updateProgress(i, corpusSize);
+                        this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
+//                        if (isCancelled()) {
+//                            updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
+//                            break;
+//                        }
+                    } else {
+                        if(progressBarListener != null) {
+                            xml_processing.progressProperty().removeListener(progressBarListener);
+                        }
+
+                        progressBarListener = new InvalidationListener() {
+                            int remainingSeconds = -1;
+                            Date previousTime = new Date();
+                            @Override
+                            public void invalidated(Observable observable) {
+                                cancel.setVisible(true);
+                                if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
+                                    remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
+                                            (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
+                                            ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
+//                                    System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
+//                                    System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
+//                                    System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
+//                                    System.out.println(remainingSeconds);
+                                    previousTime = new Date();
+                                }
+                                xml_processing.isCancelled = isCancelled();
+                                updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
+                                updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusSize, f.getName(), remainingSeconds));
+                            }
+                        };
+
+                        xml_processing.progressProperty().addListener(progressBarListener);
+                    }
+                    xml_processing.readXML(f.toString(), statistic);
                     if (isCancelled()) {
                         updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
                         break;
                     }
-                    if (statistic.getFilter().getCollocability().size() > 0) {
-                        this.updateProgress(i, corpusFiles.size() * 2);
-                        this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
-                    } else {
-                        this.updateProgress(i, corpusFiles.size());
-                        this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
+                    if(!(corpusFiles.size() > 1)){
+                        cancel.setVisible(false);
                     }
-//                    this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
+//                    readXML(f.toString(), statistic);
+//                    i++;
+//                    if (isCancelled()) {
+//                        updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
+//                        break;
+//                    }
+//                    if (statistic.getFilter().getCollocability().size() > 0) {
+//                        this.updateProgress(i, corpusFiles.size() * 2);
+//                        this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
+//                    } else {
+//                        this.updateProgress(i, corpusFiles.size());
+//                        this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
+//                    }
+////                    this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
                 }
 
                 return null;
@@ -1106,8 +1232,6 @@ public class StringAnalysisTabNew2 {
             logger.info("cancel button");
         });
 
-        cancel.setVisible(true);
-
         final Thread thread = new Thread(task, "task");
         thread.setDaemon(true);
         thread.start();
diff --git a/src/main/java/gui/WordFormationTab.java b/src/main/java/gui/WordFormationTab.java
index 914afb1..4ea87a5 100755
--- a/src/main/java/gui/WordFormationTab.java
+++ b/src/main/java/gui/WordFormationTab.java
@@ -2,14 +2,10 @@ package gui;
 
 import static alg.XML_processing.*;
 import static gui.GUIController.*;
-import static gui.Messages.*;
 
 import java.io.File;
 import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
+import java.util.*;
 
 import javafx.application.HostServices;
 import javafx.scene.control.*;
@@ -73,11 +69,11 @@ public class WordFormationTab {
 		// taxonomy
 		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
 			taxonomyCCB.getItems().removeAll();
-			taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
+			taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
 			taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
 				taxonomy = new ArrayList<>();
 				ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
-				ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
+				ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus);
 				taxonomy.addAll(checkedItemsTaxonomy);
 				logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
 			});
@@ -175,7 +171,9 @@ public class WordFormationTab {
 			@SuppressWarnings("Duplicates")
 			@Override
 			protected Void call() throws Exception {
-				long i = 0;
+				int i = 0;
+				Date startTime = new Date();
+				Date previousTime = new Date();
 				for (File f : corpusFiles) {
 					readXML(f.toString(), statistic);
 					i++;
diff --git a/src/main/java/gui/WordLevelTab.java b/src/main/java/gui/WordLevelTab.java
index 9d83422..d276e03 100755
--- a/src/main/java/gui/WordLevelTab.java
+++ b/src/main/java/gui/WordLevelTab.java
@@ -1,10 +1,13 @@
 package gui;
 
+import alg.XML_processing;
 import data.*;
 import javafx.application.HostServices;
+import javafx.beans.InvalidationListener;
+import javafx.beans.Observable;
+import javafx.beans.property.ReadOnlyDoubleWrapper;
 import javafx.beans.value.ChangeListener;
 import javafx.beans.value.ObservableValue;
-import javafx.collections.FXCollections;
 import javafx.collections.ListChangeListener;
 import javafx.collections.ObservableList;
 import javafx.concurrent.Task;
@@ -24,7 +27,6 @@ import java.util.regex.Pattern;
 
 import static alg.XML_processing.readXML;
 import static gui.GUIController.showAlert;
-import static gui.Messages.*;
 
 @SuppressWarnings("Duplicates")
 public class WordLevelTab {
@@ -196,6 +198,7 @@ public class WordLevelTab {
 	private ListChangeListener<String> taxonomyListener;
 	private ListChangeListener<String> alsoVisualizeListener;
 	private ChangeListener<String> calculateForListener;
+	private InvalidationListener progressBarListener;
 
 //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
 //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
@@ -509,54 +512,57 @@ public class WordLevelTab {
 		alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
 
 		// taxonomy
-		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
-			if (taxonomyListener != null){
-				taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
-			}
-
-			taxonomyListener = new ListChangeListener<String>() {
-				boolean changing = true;
-
-				@Override
-				public void onChanged(ListChangeListener.Change<? extends String> c){
-					if(changing) {
-						ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
-						ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
-
-						Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
-
-						taxonomy = new ArrayList<>();
-						taxonomy.addAll(checkedItemsTaxonomy);
-
-						taxonomyCCB.getItems().removeAll();
-						taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
-
-						//				taxonomyCCB.getCheckModel().clearChecks();
-						changing = false;
-						taxonomyCCB.getCheckModel().clearChecks();
-						for (Taxonomy t : checkedItemsTaxonomy) {
-							taxonomyCCB.getCheckModel().check(t.toLongNameString());
-						}
-						changing = true;
-						logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
-					}
-				}
-			};
-
-			taxonomyCCB.getCheckModel().clearChecks();
+		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
 			taxonomyCCB.setDisable(false);
-			taxonomyCCB.getItems().removeAll();
-			taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
-
-			taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
 		} else {
 			taxonomyCCB.setDisable(true);
 		}
 
+		if (taxonomyListener != null){
+			taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
+		}
+
+		taxonomyListener = new ListChangeListener<String>() {
+			boolean changing = true;
+
+			@Override
+			public void onChanged(ListChangeListener.Change<? extends String> c){
+				if(changing) {
+					ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+//						ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
+
+					ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
+//						Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
+
+					taxonomy = new ArrayList<>();
+					taxonomy.addAll(checkedItemsTaxonomy);
+
+					taxonomyCCB.getItems().removeAll();
+					taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
+
+					//				taxonomyCCB.getCheckModel().clearChecks();
+					changing = false;
+					taxonomyCCB.getCheckModel().clearChecks();
+					for (Taxonomy t : checkedItemsTaxonomy) {
+						taxonomyCCB.getCheckModel().check(t.toLongNameString());
+					}
+					changing = true;
+					logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+				}
+			}
+		};
+
+		taxonomyCCB.getCheckModel().clearChecks();
+
+		taxonomyCCB.getItems().removeAll();
+		taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
+
+		taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
+
 		displayTaxonomy = false;
 		displayTaxonomyChB.setSelected(false);
 		// set
-		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
+		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
 			displayTaxonomyChB.setDisable(false);
 			displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
 				displayTaxonomy = newValue;
@@ -714,7 +720,7 @@ public class WordLevelTab {
 //		if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
 //			// user changed corpus (by type) or by selection & triggered a rescan of headers
 //			// see if we read taxonomy from headers, otherwise use default values for given corpus
-//			ObservableList<String> tax = corpus.getTaxonomy();
+//			ObservableList<String> tax = corpus.getObservableListTaxonomy();
 //			taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
 //
 //			currentCorpusType = corpus.getCorpusType();
@@ -724,7 +730,7 @@ public class WordLevelTab {
 //		}
 //
 //		// see if we read taxonomy from headers, otherwise use default values for given corpus
-//		ObservableList<String> tax = corpus.getTaxonomy();
+//		ObservableList<String> tax = corpus.getObservableListTaxonomy();
 //		taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
 //		taxonomyCCB.getItems().addAll(taxonomyCCBValues);
 //
@@ -879,22 +885,63 @@ public class WordLevelTab {
 		logger.info("Started execution: ", statistic.getFilter());
 
 		Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
-		boolean corpusIsSplit = corpusFiles.size() > 1;
 
 		final Task<Void> task = new Task<Void>() {
 			@SuppressWarnings("Duplicates")
 			@Override
 			protected Void call() throws Exception {
-				long i = 0;
+				if(corpusFiles.size() > 1){
+					cancel.setVisible(true);
+				}
+				int i = 0;
+				Date startTime = new Date();
+				Date previousTime = new Date();
+				int remainingSeconds = -1;
 				for (File f : corpusFiles) {
-					readXML(f.toString(), statistic);
+					final int iFinal = i;
+					XML_processing xml_processing = new XML_processing();
 					i++;
 					if (isCancelled()) {
 						updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
 						break;
 					}
-					this.updateProgress(i, corpusFiles.size());
-					this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
+					if (corpusFiles.size() > 1) {
+						if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
+							remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
+							previousTime = new Date();
+						}
+						this.updateProgress(i, corpusFiles.size());
+						this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
+					} else {
+						if(progressBarListener != null) {
+							xml_processing.progressProperty().removeListener(progressBarListener);
+						}
+
+						progressBarListener = new InvalidationListener() {
+							int remainingSeconds = -1;
+							Date previousTime = new Date();
+							@Override
+							public void invalidated(Observable observable) {
+								cancel.setVisible(true);
+								if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
+									remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
+											(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
+											((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
+									previousTime = new Date();
+								}
+								xml_processing.isCancelled = isCancelled();
+								updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
+								updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
+							}
+						};
+
+						xml_processing.progressProperty().addListener(progressBarListener);
+					}
+					xml_processing.readXML(f.toString(), statistic);
+					if (isCancelled()) {
+						updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
+						break;
+					}
 				}
 
 				return null;
@@ -951,7 +998,6 @@ public class WordLevelTab {
 			logger.info("cancel button");
 		});
 
-		cancel.setVisible(true);
 		final Thread thread = new Thread(task, "task");
 		thread.setDaemon(true);
 		thread.start();
diff --git a/src/main/java/util/Export.java b/src/main/java/util/Export.java
index fafac1b..f65f013 100755
--- a/src/main/java/util/Export.java
+++ b/src/main/java/util/Export.java
@@ -111,8 +111,8 @@ public class Export {
             }
 		}
 
-		headerInfoBlock.put(filter.getCalculateFor().totalSumString(filter.getNgramValue()), String.valueOf(num_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
-		headerInfoBlock.put(filter.getCalculateFor().foundSumString(filter.getNgramValue()), String.valueOf(num_selected_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
+		headerInfoBlock.put(filter.getCalculateFor().totalSumString(filter.getNgramValue()), String.valueOf(num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
+		headerInfoBlock.put(filter.getCalculateFor().foundSumString(filter.getNgramValue()), String.valueOf(num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
 //		headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
 
 		for (CalculateFor otherKey : filter.getMultipleKeys()) {
@@ -134,7 +134,7 @@ public class Export {
         }
 
 		for (Taxonomy key : taxonomyResults.keySet()) {
-			if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
+			if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
 				FILE_HEADER_AL.add(I18N.get("exportTable.absoluteFrequency") + " [" + key.toString() + "]");
 				FILE_HEADER_AL.add(I18N.get("exportTable.percentage") + " [" + key.toString() + "]");
 				FILE_HEADER_AL.add(I18N.get("exportTable.relativeFrequency") + " [" + key.toString() + "]");
@@ -280,10 +280,10 @@ public class Export {
 
 
                     dataEntry.add(e.getValue().toString());
-                    dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(Taxonomy.TOTAL)));
-                    dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
+                    dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal())));
+                    dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
                     for (Taxonomy key : taxonomyResults.keySet()){
-                        if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
+                        if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
                             AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
                             dataEntry.add(frequency.toString());
                             dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key)));
diff --git a/src/main/resources/message_en.properties b/src/main/resources/message_en.properties
index aafa86e..5b8f2c8 100644
--- a/src/main/resources/message_en.properties
+++ b/src/main/resources/message_en.properties
@@ -118,6 +118,7 @@ message.WARNING_NO_SOLAR_FILTERS_FOUND=We weren't able to read filters from corp
 message.ERROR_WHILE_EXECUTING=Error in program execution.
 message.ERROR_WHILE_SAVING_RESULTS_TO_CSV=Error while saving results.
 message.ERROR_NOT_ENOUGH_MEMORY=You do not have sufficient RAM for analyzing such amount of data. You can try changing filters.
+message.ERROR_NO_REGI_FILE_FOUND=Missing file \"%s\".
 
 message.MISSING_NGRAM_LEVEL=N-gram level
 message.MISSING_CALCULATE_FOR=Calculate for
@@ -132,7 +133,7 @@ message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS=Analysis completed, however n
 message.RESULTS_PATH_SET_TO_DEFAULT=Save location is set on corpus location.
 message.NOTIFICATION_ANALYSIS_CANCELED=Analysis was cancled.
 
-message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y=Analyzing file %d of %d (%s)
+message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y=Analyzing file %d of %d (%s) - Estimated time remaining %d s
 message.CANCELING_NOTIFICATION=Canceled
 
 message.LABEL_CORPUS_LOCATION_NOT_SET=Corpus location is not set
diff --git a/src/main/resources/message_sl.properties b/src/main/resources/message_sl.properties
index 3f3424c..bb6f142 100644
--- a/src/main/resources/message_sl.properties
+++ b/src/main/resources/message_sl.properties
@@ -118,6 +118,7 @@ message.WARNING_NO_SOLAR_FILTERS_FOUND=Iz korpusnih datotek ni bilo moč razbrat
 message.ERROR_WHILE_EXECUTING=Prišlo je do napake med izvajanjem.
 message.ERROR_WHILE_SAVING_RESULTS_TO_CSV=Prišlo je do napake med shranjevanje rezultatov.
 message.ERROR_NOT_ENOUGH_MEMORY=Na voljo imate premalo pomnilnika (RAM-a) za analizo takšne količine podatkov.
+message.ERROR_NO_REGI_FILE_FOUND=Manjka datoteka \"%s\".
 
 message.MISSING_NGRAM_LEVEL=N-gram nivo
 message.MISSING_CALCULATE_FOR=Izračunaj za
@@ -132,7 +133,7 @@ message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS=Analiza je zaključena, venda
 message.RESULTS_PATH_SET_TO_DEFAULT=Lokacija za shranjevanje rezultatov je nastavljena na lokacijo korpusa.
 message.NOTIFICATION_ANALYSIS_CANCELED=Analiziranje je bilo prekinjeno.
 
-message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y=Analiziram datoteko %d od %d (%s)
+message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y=Analiziram datoteko %d od %d (%s) - Preostali čas %d s
 message.CANCELING_NOTIFICATION=Prekinjeno
 
 message.LABEL_CORPUS_LOCATION_NOT_SET=Lokacija korpusa ni nastavljena