Added fixes on ssj500k functionality, fixed prefix/suffix bug and some other bugs.

2018-12-01 10:50:11 +01:00 · 2018-12-01 10:50:11 +01:00 · ca83cb023b
commit ca83cb023b
parent 9efe3d529b
14 changed files with 530 additions and 162 deletions
--- a/src/main/java/alg/XML_processing.java
+++ b/src/main/java/alg/XML_processing.java
@ -313,6 +313,17 @@ public class XML_processing {
                            }
 							if (c3Content.equals(".") && includeThisBlock) {
 								if (stats.getFilter().getNgramValue() == 0){
 									int numSentenceParts = 0;
 									for(Word w : stavek){
 										int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
 										numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
 									}
 									stats.updateUniGramOccurrences(numSentenceParts, new ArrayList<>());
 								} else if(stats.getFilter().getNgramValue() >= 1) {
 									stats.updateUniGramOccurrences(stavek.size(), new ArrayList<>());
 								}
 								// add sentence to corpus
 								corpus.add(new Sentence(stavek, null));
 								// and start a new one
@ -637,8 +648,16 @@ public class XML_processing {
 						// parser reached end of the current sentence
 						if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
                            // count all UniGramOccurrences in sentence for statistics
 							if (stats.getFilter().getNgramValue() == 0){
 								int numSentenceParts = 0;
 								for(Word w : sentence){
 									int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
 									numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
 								}
 								stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
 							} else if(stats.getFilter().getNgramValue() >= 1) {
 								stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
-
+							}
 							// add sentence to corpus if it passes filters
 							sentence = runFilters(sentence, stats.getFilter());
@ -713,6 +732,7 @@ public class XML_processing {
    public static boolean readXMLSSJ500K(String path, StatisticsNew stats) {
        boolean inWord = false;
        boolean inPunctuation = false;
        boolean taxonomyMatch = true;
        ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
 //        ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
        String lemma = "";
@ -759,9 +779,13 @@ public class XML_processing {
                                // keep only taxonomy properties
                                Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""));
                                currentFiletaxonomy.add(currentFiletaxonomyElement);
-                                Tax taxonomy = new Tax();
+//                                Tax taxonomy = new Tax();
 //                                currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
                            }
                        } else if (qName.equals("bibl")) {
 							// before proceeding to read this file, make sure that taxonomy filters are a match
 							taxonomyMatch = true;
 						}
                        break;
@ -789,10 +813,21 @@ public class XML_processing {
                        // parser reached end of the current sentence
                        if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
 							if (stats.getFilter().getNgramValue() == 0){
 								int numSentenceParts = 0;
 								for(Word w : sentence){
 									int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
 									numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
 								}
 								stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
 							} else if(stats.getFilter().getNgramValue() >= 1) {
 								stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
 							}
                            // add sentence to corpus if it passes filters
                            sentence = runFilters(sentence, stats.getFilter());
-                            if (!ValidationUtil.isEmpty(sentence)) {
+                            if (!ValidationUtil.isEmpty(sentence) && taxonomyMatch) {
                                corpus.add(new Sentence(sentence, currentFiletaxonomy));
                            }
@ -821,6 +856,19 @@ public class XML_processing {
                            currentFiletaxonomy = new ArrayList<>();
 //                            currentFiletaxonomyLong = new ArrayList<>();
                        } else if (endElement.getName().getLocalPart().equals("bibl")) {
 							// before proceeding to read this file, make sure that taxonomy filters are a match
 							if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
 								currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
 								if (currentFiletaxonomy.isEmpty()) {
 									// taxonomies don't match so stop
 //									return false;
 									taxonomyMatch = false;
 //									System.out.println("TEST");
 								}
 							}
 						}
                        break;
@ -925,7 +973,7 @@ public class XML_processing {
 								// keep only taxonomy properties
 								Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()));
 								currentFiletaxonomy.add(currentFiletaxonomyElement);
-								Tax taxonomy = new Tax();
+//								Tax taxonomy = new Tax();
 //								currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
 							}
 						} else if (qName.equalsIgnoreCase("div")) {
@ -1007,6 +1055,17 @@ public class XML_processing {
                                sentence = GOSCorpusHM.remove(GOSCorpusHMKey);
 								if (stats.getFilter().getNgramValue() == 0){
 									int numSentenceParts = 0;
 									for(Word w : sentence){
 										int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
 										numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
 									}
 									stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
 								} else if(stats.getFilter().getNgramValue() >= 1) {
 									stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
 								}
 								// add sentence to corpus if it passes filters
 								if (includeFile && !ValidationUtil.isEmpty(sentence)) {
 									sentence = runFilters(sentence, stats.getFilter());
@ -1040,7 +1099,7 @@ public class XML_processing {
 								// disregard this entry if taxonomies don't match
 								includeFile = !currentFiletaxonomy.isEmpty();
-								currentFiletaxonomy = new ArrayList<>();
+//								currentFiletaxonomy = new ArrayList<>();
 							}
 						}
--- a/src/main/java/data/CalculateFor.java
+++ b/src/main/java/data/CalculateFor.java
@ -45,8 +45,30 @@ public enum CalculateFor {
 		return null;
 	}
-	public String toMetadataString() {
+	public String totalSumString(int ngram) {
-		switch(this){
+		if (ngram == 0) {
 			switch (this) {
 				case WORD:
 					return "Skupna vsota vseh črkovnih nizov različnic:";
 				case NORMALIZED_WORD:
 					return "Skupna vsota vseh črkovnih nizov normaliziranih različnic:";
 				case LEMMA:
 					return "Skupna vsota vseh črkovnih nizov lem:";
 				case MORPHOSYNTACTIC_SPECS:
 					return "Skupna vsota vseh črkovnih nizov oblikoskladenjskih oznak:";
 				case MORPHOSYNTACTIC_PROPERTY:
 					return "Skupna vsota vseh črkovnih nizov oblikoskladenjskih lastnosti:";
 				case WORD_TYPE:
 					return "Skupna vsota vseh črkovnih nizov besednih vrst:";
 				case DIST_WORDS:
 					return "Skupna vsota vseh črkovnih nizov različnic:";
 				case DIST_LEMMAS:
 					return "Skupna vsota vseh črkovnih nizov lem:";
 				default:
 					return null;
 			}
 		} else if (ngram >= 1) {
 			switch (this) {
 				case WORD:
 					return "Skupna vsota vseh različnic:";
 				case NORMALIZED_WORD:
@ -67,9 +89,114 @@ public enum CalculateFor {
 					return null;
 			}
 		}
 		return null;
 	}
-	public String toHeaderString() {
+	public String foundSumString(int ngram) {
 		if (ngram == 0) {
 			switch (this) {
 				case WORD:
 					return "Skupna vsota vseh najdenih črkovnih nizov različnic:";
 				case NORMALIZED_WORD:
 					return "Skupna vsota vseh najdenih črkovnih nizov normaliziranih različnic:";
 				case LEMMA:
 					return "Skupna vsota vseh najdenih črkovnih nizov lem:";
 				case MORPHOSYNTACTIC_SPECS:
 					return "Skupna vsota vseh najdenih črkovnih nizov oblikoskladenjskih oznak:";
 				case MORPHOSYNTACTIC_PROPERTY:
 					return "Skupna vsota vseh najdenih črkovnih nizov oblikoskladenjskih lastnosti:";
 				case WORD_TYPE:
 					return "Skupna vsota vseh najdenih črkovnih nizov besednih vrst:";
 				case DIST_WORDS:
 					return "Skupna vsota vseh najdenih črkovnih nizov različnic:";
 				case DIST_LEMMAS:
 					return "Skupna vsota vseh najdenih črkovnih nizov lem:";
 				default:
 					return null;
 			}
 		} else if (ngram >= 1) {
 			switch (this) {
 				case WORD:
 					return "Skupna vsota vseh najdenih različnic:";
 				case NORMALIZED_WORD:
 					return "Skupna vsota vseh najdenih normaliziranih različnic:";
 				case LEMMA:
 					return "Skupna vsota vseh najdenih lem:";
 				case MORPHOSYNTACTIC_SPECS:
 					return "Skupna vsota vseh najdenih oblikoskladenjskih oznak:";
 				case MORPHOSYNTACTIC_PROPERTY:
 					return "Skupna vsota vseh najdenih oblikoskladenjskih lastnosti:";
 				case WORD_TYPE:
 					return "Skupna vsota vseh najdenih besednih vrst:";
 				case DIST_WORDS:
 					return "Skupna vsota vseh najdenih različnic:";
 				case DIST_LEMMAS:
 					return "Skupna vsota vseh najdenih lem:";
 				default:
 					return null;
 			}
 		}
 		return null;
 	}
 	public String totalAbsoluteFrequencyString(int ngram) {
 		if (ngram == 0) {
 			return "Skupna absolutna pogostost črkovnega niza";
 		}
 		switch(this){
 			case WORD:
 				return "Skupna absolutna pogostost različnice";
 			case NORMALIZED_WORD:
 				return "Skupna absolutna pogostost normalizirane različnice";
 			case LEMMA:
 				return "Skupna absolutna pogostost leme";
 			case MORPHOSYNTACTIC_SPECS:
 				return "Skupna absolutna pogostost oblikoskladenjske oznake";
 			case MORPHOSYNTACTIC_PROPERTY:
 				return "Skupna absolutna pogostost oblikoskladenjske lastnosti";
 			case WORD_TYPE:
 				return "Skupna absolutna pogostost besedne vrste";
 			case DIST_WORDS:
 				return "Skupna absolutna pogostost različnice";
 			case DIST_LEMMAS:
 				return "Skupna absolutna pogostost leme";
 			default:
 				return null;
 		}
 	}
 	public String shareOfTotalString(int ngram) {
 		if (ngram == 0) {
 			return "Delež glede na skupno vsoto vseh najdenih črkovnih nizov";
 		}
 		switch(this){
 			case WORD:
 				return "Delež glede na vse najdene različnice";
 			case NORMALIZED_WORD:
 				return "Delež glede na vse najdene normalizirane različnice";
 			case LEMMA:
 				return "Delež glede na vse najdene leme";
 			case MORPHOSYNTACTIC_SPECS:
 				return "Delež glede na vse najdene oblikoskladenjske oznake";
 			case MORPHOSYNTACTIC_PROPERTY:
 				return "Delež glede na vse najdene oblikoskladenjske lastnosti";
 			case WORD_TYPE:
 				return "Delež glede na vse najdene besedne vrste";
 			case DIST_WORDS:
 				return "Delež glede na vse najdene različnice";
 			case DIST_LEMMAS:
 				return "Delež glede na vse najdene leme";
 			default:
 				return null;
 		}
 	}
 	public String toHeaderString(int ngram) {
 		if (ngram == 0) {
 			return "Črkovni niz";
 		} else if (ngram == 1) {
 			switch (this) {
 				case WORD:
 					return "Različnica";
 				case NORMALIZED_WORD:
@ -90,27 +217,46 @@ public enum CalculateFor {
 					return null;
 			}
 		}
-
+		switch (this) {
    public String toPercentString() {
        switch(this){
 			case WORD:
                return "Delež glede na vse različnice";
            case NORMALIZED_WORD:
                return "Delež glede na vse normalizirane različnice";
            case LEMMA:
                return "Delež glede na vse leme";
            case MORPHOSYNTACTIC_SPECS:
                return "Delež glede na vse oblikoskladenjske oznake";
            case MORPHOSYNTACTIC_PROPERTY:
                return "Delež glede na vse oblikoskladenjske lastnosti";
            case WORD_TYPE:
                return "Delež glede na vse besedne vrste";
 			case DIST_WORDS:
-                return "Delež glede na vse različnice";
+				return "Različnica niza";
 			case NORMALIZED_WORD:
 				return "Normalizirana različnica niza";
 			case LEMMA:
 			case DIST_LEMMAS:
-                return "Delež glede na vse leme";
+				return "Lema niza";
 			case MORPHOSYNTACTIC_SPECS:
 				return "Oblikoskladenjska oznaka niza";
 			case MORPHOSYNTACTIC_PROPERTY:
 				return "Oblikoskladenjska lastnost niza";
 			case WORD_TYPE:
 				return "Besedna vrsta niza";
 			default:
 				return null;
 		}
 	}
 //    public String toPercentString() {
 //        switch(this){
 //            case WORD:
 //                return "Delež glede na vse različnice";
 //            case NORMALIZED_WORD:
 //                return "Delež glede na vse normalizirane različnice";
 //            case LEMMA:
 //                return "Delež glede na vse leme";
 //            case MORPHOSYNTACTIC_SPECS:
 //                return "Delež glede na vse oblikoskladenjske oznake";
 //            case MORPHOSYNTACTIC_PROPERTY:
 //                return "Delež glede na vse oblikoskladenjske lastnosti";
 //            case WORD_TYPE:
 //                return "Delež glede na vse besedne vrste";
 //            case DIST_WORDS:
 //                return "Delež glede na vse različnice";
 //            case DIST_LEMMAS:
 //                return "Delež glede na vse leme";
 //            default:
 //                return null;
 //        }
 //    }
 }
--- a/src/main/java/data/StatisticsNew.java
+++ b/src/main/java/data/StatisticsNew.java
@ -10,6 +10,7 @@ import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.tuple.ImmutablePair;
@ -59,7 +60,7 @@ public class StatisticsNew {
 		if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
 			if (this.filter.getTaxonomy().isEmpty()) {
 				for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
-					this.taxonomyResult.put(Taxonomy.factory(this.corpus.getTaxonomy().get(i)), new ConcurrentHashMap<>());
+					this.taxonomyResult.put(Taxonomy.factoryLongName(this.corpus.getTaxonomy().get(i)), new ConcurrentHashMap<>());
 				}
 			} else {
 				for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
@ -236,7 +237,7 @@ public class StatisticsNew {
 			analysisProducedResults = true;
 		}
-		removeMinimalOccurrences(taxonomyResult.get(Taxonomy.TOTAL), filter.getMinimalOccurrences());
+		removeMinimalOccurrences(filter.getMinimalOccurrences());
 		removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
 		stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get(Taxonomy.TOTAL), Util.getValidInt(limit))));
 		Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
@ -265,12 +266,14 @@ public class StatisticsNew {
 	/**
 	 * Removes lines where total number of occurrences is lower than specified number (minimalOccurrences)
 	 */
-	private void removeMinimalOccurrences(Map<MultipleHMKeys, AtomicLong> taxonomyResultTotal, Integer minimalOccurrences) {
+	private void removeMinimalOccurrences(Integer minimalOccurrences) {
 		if (minimalOccurrences == 0)
 			return;
-		for (MultipleHMKeys key : taxonomyResultTotal.keySet()){
+		for (MultipleHMKeys key : taxonomyResult.get(Taxonomy.TOTAL).keySet()){
-			if(taxonomyResultTotal.get(key).intValue() < minimalOccurrences){
+			if(taxonomyResult.get(Taxonomy.TOTAL).get(key).intValue() < minimalOccurrences){
-				taxonomyResultTotal.remove(key);
+				for (Taxonomy t : taxonomyResult.keySet()){
 					taxonomyResult.get(t).remove(key);
 				}
 			}
 		}
 	}
@ -498,15 +501,17 @@ public class StatisticsNew {
 		info.put("Izračunaj za:", filter.getCalculateFor().toString());
 		// also write
-		if (filter.getMultipleKeys().size() > 0){
+		if (ngramLevel > 0) {
 			if (filter.getMultipleKeys().size() > 0) {
 				StringBuilder mk = new StringBuilder();
 				for (CalculateFor s : filter.getMultipleKeys()) {
 					mk.append(s.toString()).append("; ");
 				}
-			info.put("Izpiši tudi: ", String.join("; ", mk.substring(0, mk.length() - 2)));
+				info.put("Upoštevaj tudi: ", String.join("; ", mk.substring(0, mk.length() - 2)));
 			} else {
-			info.put("Izpiši tudi: ", "");
+				info.put("Upoštevaj tudi: ", "");
 			}
 		}
 		// data limitations
@ -535,7 +540,8 @@ public class StatisticsNew {
 		}
 		// also write - n - gram > 1
-		if (ngramLevel > 1 && filter.getCollocability().size() > 0){
+		if(ngramLevel > 1) {
 			if (filter.getCollocability().size() > 0) {
 				StringBuilder mk = new StringBuilder();
 				for (Collocability s : filter.getCollocability()) {
 					mk.append(s.toString()).append("; ");
@ -544,6 +550,7 @@ public class StatisticsNew {
 			} else {
 				info.put("Kolokabilnost: ", "");
 			}
 		}
 		// fragmented MSD - n-gram = 1
 		if (info.get("Analiza:").equals("besede")){
@ -580,13 +587,48 @@ public class StatisticsNew {
 //		}
 		info.put("Taksonomija: ", "");
-		if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+		if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) || filter.getDisplayTaxonomy()) {
 			ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
-			String sep = "";
+			if (filter.getDisplayTaxonomy() && tax.size() == 0) {
-			for (String s : tax) {
+
-				info.put(sep = sep + " ", s);
+
 //				ArrayList<String> intList = (new ArrayList<>(taxonomyResult.keySet()).stream()
 //						.forEach(x -> {x.toString();}));
 //				ArrayList<String> taxonomyString = new ArrayList<>();
 //				for (Taxonomy t : taxonomyResult.keySet()){
 //					taxonomyString.add(t.toString());
 //				}
 //				ObservableList<String> taxonomyObservableString = Tax.getTaxonomyForComboBox(corpus.getCorpusType(), new HashSet<>(taxonomyString));
 //				ArrayList<String> sortedTaxonomyString = new ArrayList<>();
 //				for (String t : taxonomyObservableString){
 //					sortedTaxonomyString.add(t);
 //				}
 //				getTaxonomyForTaxonomyResult
 				tax = Tax.getTaxonomyForTaxonomyResult(corpus.getCorpusType(), taxonomyResult.keySet());
 			}
 //			String sep = "";
 			for (String s : tax) {
 				if (s == null) {
 					continue;
 				}
 //				info.put(sep = sep + " ", s);
 				if (uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s)) == null) {
 					info.put(s, "");
 					continue;
 				}
 				int n = uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s)).intValue();
 				if (n == 0) {
 					info.put(s, "");
 				} else {
 					info.put(s, String.valueOf(n));
 				}
 			}
 		}
 		info.put("Min. št. pojavitev: ", String.valueOf(filter.getMinimalOccurrences()));
--- a/src/main/java/data/Tax.java
+++ b/src/main/java/data/Tax.java
@ -135,6 +135,60 @@ public class Tax {
 		return FXCollections.observableArrayList(taxForCombo);
 	}
 	/**
 	 * Returns taxonomy names only for items found in headers
 	 */
 	public static ArrayList<String> getTaxonomyForTaxonomyResult(CorpusType corpusType, Set<Taxonomy> foundTax) {
 		LinkedHashMap<String, String> tax = new LinkedHashMap<>();
 		Set<Taxonomy> foundTaxHS= new HashSet<>(foundTax);
 		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
 			tax = GIGAFIDA_TAXONOMY;
 		} else if (corpusType == CorpusType.GOS) {
 			tax = GOS_TAXONOMY;
 		}
 		ArrayList<String> taxForCombo = new ArrayList<>();
 		// adds parents taxonomy as well
 		Set<Taxonomy> genFoundTax = new HashSet<>();
 		for(Taxonomy e : foundTaxHS){
 			String[] elList = e.toString().split("\\.");
 			for(int i = 1; i < elList.length - 1; i++){
 				Taxonomy candidate = Taxonomy.factory(String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i)));
 				genFoundTax.add(candidate);
 			}
 		}
 //		ArrayList<String> taxonomyString = new ArrayList<>();
 //		for (Taxonomy t : taxonomyResult.keySet()){
 //			taxonomyString.add(t.toString());
 //		}
 //		ObservableList<String> taxonomyObservableString = Tax.getTaxonomyForComboBox(corpus.getCorpusType(), new HashSet<>(taxonomyString));
 //		ArrayList<String> sortedTaxonomyString = new ArrayList<>();
 //		for (String t : taxonomyObservableString){
 //			sortedTaxonomyString.add(t);
 //		}
 		foundTaxHS.addAll(genFoundTax);
 		// assures same relative order
 		for (String t : tax.keySet()) {
 			if (foundTaxHS.contains(Taxonomy.factory(t))) {
 				taxForCombo.add(tax.get(t));
 			}
 		}
 		return taxForCombo;
 	}
 	public static HashSet<CorpusType> getCorpusTypesWithTaxonomy() {
 		return corpusTypesWithTaxonomy;
 	}
@ -204,7 +258,7 @@ public class Tax {
 	public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<Taxonomy> taxonomy) {
 		LinkedHashMap<String, String> tax = new LinkedHashMap<>();
-		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
 			tax = GIGAFIDA_TAXONOMY;
 		} else if (corpusType == CorpusType.GOS) {
 			tax = GOS_TAXONOMY;
--- a/src/main/java/data/Taxonomy.java
+++ b/src/main/java/data/Taxonomy.java
@ -502,6 +502,7 @@ public enum Taxonomy {
 			r.add(SITUACIJA_TELEVIZIJA);
 		} else if(disjointTaxonomy.equals(KANAL)){
 			r.add(KANAL_OSEBNI_STIK);
 			r.add(KANAL_TELEFON);
 			r.add(KANAL_RADIO);
 			r.add(KANAL_TELEVIZIJA);
 		} else if(disjointTaxonomy.equals(SSJ_TISK)){
@ -646,6 +647,9 @@ public enum Taxonomy {
 		connections.put(SSJ_KNJIZNO, SSJ_TISK);
 		connections.put(SSJ_LEPOSLOVNO, SSJ_KNJIZNO);
 		connections.put(SSJ_STROKOVNO, SSJ_KNJIZNO);
 		connections.put(SSJ_PERIODICNO, SSJ_TISK);
 		connections.put(SSJ_CASOPIS, SSJ_PERIODICNO);
 		connections.put(SSJ_REVIJA, SSJ_PERIODICNO);
 		connections.put(SSJ_DRUGO, SSJ_TISK);
 		connections.put(FT_P_GOVORNI, FT_P_PRENOSNIK);
--- a/src/main/java/gui/CharacterAnalysisTab.java
+++ b/src/main/java/gui/CharacterAnalysisTab.java
@ -181,6 +181,7 @@ public class CharacterAnalysisTab {
 		// taxonomy
 		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
 			taxonomyCCB.setDisable(false);
 			taxonomyCCB.getItems().removeAll();
 			taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
 			taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
@ -217,10 +218,13 @@ public class CharacterAnalysisTab {
 		}
 		displayTaxonomy = false;
 		displayTaxonomyChB.setSelected(false);
 		// set
 		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
 			displayTaxonomyChB.setDisable(false);
 			displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
 				displayTaxonomy = newValue;
-			if(displayTaxonomy){
+				if (displayTaxonomy) {
 					minimalTaxonomyTF.setDisable(false);
 				} else {
 					minimalTaxonomyTF.setDisable(true);
@ -232,6 +236,9 @@ public class CharacterAnalysisTab {
 				logger.info("display taxonomy: ", displayTaxonomy);
 			});
 			displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
 		} else {
 			displayTaxonomyChB.setDisable(true);
 		}
 		// cvv
 		calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
--- a/src/main/java/gui/OneWordAnalysisTab.java
+++ b/src/main/java/gui/OneWordAnalysisTab.java
@ -14,6 +14,7 @@ import org.apache.commons.lang3.StringUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.controlsfx.control.CheckComboBox;
 import org.controlsfx.control.IndexedCheckModel;
 import java.io.File;
 import java.io.UnsupportedEncodingException;
@ -220,14 +221,16 @@ public class OneWordAnalysisTab {
        // taxonomy
        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
            taxonomyCCB.setDisable(false);
            taxonomyCCB.getItems().removeAll();
            taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
-            taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
+
-                boolean changing = true;
+            ListChangeListener<String> listener = new ListChangeListener<String>() {
                public boolean changing = true;
                @Override
-                public void onChanged(ListChangeListener.Change<? extends String> c){
+                public void onChanged(Change<? extends String> c) {
-                    if(changing) {
+                    if (changing) {
                        ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
                        ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
@ -249,17 +252,28 @@ public class OneWordAnalysisTab {
                        logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
                    }
                }
-            });
+            };
 //            taxonomyCCB.getCheckModel().getCheckedItems().removeListener(listener);
 //            System.out.println("THIS WORKS!!!!");
            taxonomyCCB.getCheckModel().clearChecks();
 //            System.out.println("YES???");
            taxonomyCCB.getCheckModel().getCheckedItems().addListener(listener);
 //            taxonomyCCB.setCheckModel(null);
 //            taxonomyCCB.getCheckModel().clearChecks();
        } else {
            taxonomyCCB.setDisable(true);
        }
        displayTaxonomy = false;
        displayTaxonomyChB.setSelected(false);
        // set
        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
            displayTaxonomyChB.setDisable(false);
            displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
                displayTaxonomy = newValue;
-            if(displayTaxonomy){
+                if (displayTaxonomy) {
                    minimalTaxonomyTF.setDisable(false);
                } else {
                    minimalTaxonomyTF.setDisable(true);
@ -270,6 +284,9 @@ public class OneWordAnalysisTab {
                logger.info("display taxonomy: ", displayTaxonomy);
            });
            displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
        } else {
            displayTaxonomyChB.setDisable(true);
        }
        writeMsdAtTheEnd = false;
        writeMsdAtTheEndChB.setDisable(true);
--- a/src/main/java/gui/StringAnalysisTabNew2.java
+++ b/src/main/java/gui/StringAnalysisTabNew2.java
@ -198,10 +198,14 @@ public class StringAnalysisTabNew2 {
        notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB));
        displayTaxonomy = false;
        displayTaxonomyChB.setSelected(false);
        // set
        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
            displayTaxonomyChB.setDisable(false);
            displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
                displayTaxonomy = newValue;
-            if(displayTaxonomy){
+                if (displayTaxonomy) {
                    minimalTaxonomyTF.setDisable(false);
                } else {
                    minimalTaxonomyTF.setDisable(true);
@ -212,6 +216,9 @@ public class StringAnalysisTabNew2 {
                logger.info("display taxonomy: ", displayTaxonomy);
            });
            displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
        } else {
            displayTaxonomyChB.setDisable(true);
        }
        // calculateForCB
        calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
@ -306,6 +313,7 @@ public class StringAnalysisTabNew2 {
        // taxonomy
        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
            taxonomyCCB.setDisable(false);
            taxonomyCCB.getItems().removeAll();
            taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
            taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
@ -667,7 +675,11 @@ public class StringAnalysisTabNew2 {
                    readXML(f.toString(), statisticsOneGrams);
                    i++;
                    this.updateProgress(i, corpusFiles.size() * 2);
                    if (statistic.getFilter().getCollocability().size() > 0) {
                        this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
                    } else {
                        this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
                    }
                }
                return null;
@ -776,7 +788,7 @@ public class StringAnalysisTabNew2 {
                        this.updateProgress(i, corpusFiles.size());
                        this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
                    }
-                    this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
+//                    this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
                }
                return null;
--- a/src/main/java/gui/WordLevelTab.java
+++ b/src/main/java/gui/WordLevelTab.java
@ -343,6 +343,7 @@ public class WordLevelTab {
 		// taxonomy
 		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
 			taxonomyCCB.setDisable(false);
 			taxonomyCCB.getItems().removeAll();
 			taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
 			taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
@ -379,10 +380,13 @@ public class WordLevelTab {
 		}
 		displayTaxonomy = false;
 		displayTaxonomyChB.setSelected(false);
 		// set
 		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
 			displayTaxonomyChB.setDisable(false);
 			displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
 				displayTaxonomy = newValue;
-			if(displayTaxonomy){
+				if (displayTaxonomy) {
 					minimalTaxonomyTF.setDisable(false);
 				} else {
 					minimalTaxonomyTF.setDisable(true);
@ -393,6 +397,9 @@ public class WordLevelTab {
 				logger.info("display taxonomy: ", displayTaxonomy);
 			});
 			displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
 		} else {
 			displayTaxonomyChB.setDisable(true);
 		}
 //		writeMsdAtTheEnd = false;
 //		writeMsdAtTheEndChB.setDisable(true);
--- a/src/main/java/util/Export.java
+++ b/src/main/java/util/Export.java
@ -64,27 +64,27 @@ public class Export {
 		//Delimiter used in CSV file
 		String NEW_LINE_SEPARATOR = "\n";
-		List<Object> FILE_HEADER_AL = new ArrayList<Object>();
+		List<Object> FILE_HEADER_AL = new ArrayList<>();
 		Object[] FILE_HEADER;
 		//Count frequencies
-		long num_frequencies = 0;
+//		long num_frequencies = 0;
-		for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
+//		for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
-			Map<MultipleHMKeys, Long> map = p.getRight();
+//			Map<MultipleHMKeys, Long> map = p.getRight();
-			if (map.isEmpty())
+//			if (map.isEmpty())
-				continue;
+//				continue;
-			num_frequencies = Util.mapSumFrequencies(map);
+//			num_frequencies = Util.mapSumFrequencies(map);
-		}
+//		}
-//		Map<String, Long> num_taxonomy_frequencies = new ConcurrentHashMap<>();
+		Map<Taxonomy, Long> num_selected_taxonomy_frequencies = new ConcurrentHashMap<>();
-//		for (String taxonomyKey : taxonomyResults.keySet()) {
+		for (Taxonomy taxonomyKey : taxonomyResults.keySet()) {
-//			num_taxonomy_frequencies.put(taxonomyKey, (long) 0);
+			num_selected_taxonomy_frequencies.put(taxonomyKey, (long) 0);
-//			for (AtomicLong value : taxonomyResults.get(taxonomyKey).values()){
+			for (AtomicLong value : taxonomyResults.get(taxonomyKey).values()){
-//				long val = num_taxonomy_frequencies.get(taxonomyKey);
+				long val = num_selected_taxonomy_frequencies.get(taxonomyKey);
-//				val += value.get();
+				val += value.get();
-//				num_taxonomy_frequencies.put(taxonomyKey, val);
+				num_selected_taxonomy_frequencies.put(taxonomyKey, val);
-//			}
+			}
-//		}
+		}
        Map<Taxonomy, AtomicLong> num_taxonomy_frequencies = statistics.getUniGramOccurrences();
@ -92,32 +92,37 @@ public class Export {
 		if (!ValidationUtil.isEmpty(filter.getSkipValue()) && filter.getSkipValue() > 0) {
 			FILE_HEADER_AL.add("Izpuščene besede");
 		}
-		FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString());
+		FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString(filter.getNgramValue()));
-		if (filter.getCalculateFor().equals(CalculateFor.LEMMA))
+		if (filter.getCalculateFor().equals(CalculateFor.LEMMA)) {
-			FILE_HEADER_AL.add("Lema male črke");
+			if(filter.getNgramValue() == 0) {
-
+				FILE_HEADER_AL.add("Črkovni niz (male črke)");
 			} else if(filter.getNgramValue() >= 1) {
 				FILE_HEADER_AL.add("Lema (male črke)");
 			}
 		}
 		if (filter.getSuffixLength() != null && filter.getSuffixList() != null && filter.getPrefixLength() != null && filter.getPrefixList() != null) {
            if (filter.getPrefixLength() > 0 || filter.getPrefixList().size() > 0) {
-                FILE_HEADER_AL.add("Predpona");
+                FILE_HEADER_AL.add("Začetni del besede");
            }
            FILE_HEADER_AL.add("Preostali del besede");
            if (filter.getSuffixLength() > 0 || filter.getSuffixList().size() > 0) {
-                FILE_HEADER_AL.add("Pripona");
+                FILE_HEADER_AL.add("Končni del besede");
            }
 		}
-		headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(statistics.getUniGramOccurrences().get(Taxonomy.TOTAL).longValue()));
+		headerInfoBlock.put(filter.getCalculateFor().totalSumString(filter.getNgramValue()), String.valueOf(num_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
 		headerInfoBlock.put(filter.getCalculateFor().foundSumString(filter.getNgramValue()), String.valueOf(num_selected_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
 //		headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
 		for (CalculateFor otherKey : filter.getMultipleKeys()) {
-            FILE_HEADER_AL.add(otherKey.toHeaderString());
+            FILE_HEADER_AL.add(otherKey.toHeaderString(filter.getNgramValue()));
            if (otherKey.equals(CalculateFor.LEMMA))
-                FILE_HEADER_AL.add("Lema male črke");
+                FILE_HEADER_AL.add("Lema (male črke)");
 		}
-		FILE_HEADER_AL.add("Skupna absolutna pogostost");
+		FILE_HEADER_AL.add(filter.getCalculateFor().totalAbsoluteFrequencyString(filter.getNgramValue()));
-		FILE_HEADER_AL.add(filter.getCalculateFor().toPercentString());
+		FILE_HEADER_AL.add(filter.getCalculateFor().shareOfTotalString(filter.getNgramValue()));
 		FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
@ -216,6 +221,9 @@ public class Export {
                            // real prefix
                            String rpf = "";
                            for(String pf : filter.getPrefixList()){
 								if (key.length() < pf.length()) {
 									continue;
 								}
                                if (pf.equals(key.substring(0, pf.length()))){
                                    rpf = pf;
                                    break;
@ -225,6 +233,9 @@ public class Export {
                            // real suffix
                            String rsf = "";
                            for(String sf : filter.getSuffixList()){
                            	if (key.length() < sf.length()) {
                            		continue;
 								}
                                if (sf.equals(key.substring(key.length() - sf.length()))){
                                    rsf = sf;
                                    break;
@ -268,13 +279,13 @@ public class Export {
                    dataEntry.add(e.getValue().toString());
-                    dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
+                    dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(Taxonomy.TOTAL)));
-                    dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
+                    dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
                    for (Taxonomy key : taxonomyResults.keySet()){
                        if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
                            AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
                            dataEntry.add(frequency.toString());
-                            dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key).longValue()));
+                            dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key)));
                            dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue()));
 //                            dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences()));
 //                            dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences()));
--- a/src/main/resources/gui/OneWordAnalysisTab.fxml
+++ b/src/main/resources/gui/OneWordAnalysisTab.fxml
@ -30,7 +30,7 @@
            </items>
        </ComboBox>
-        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" />
+        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Upoštevaj tudi" />
        <CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>
--- a/src/main/resources/gui/StringAnalysisTabNew2.fxml
+++ b/src/main/resources/gui/StringAnalysisTabNew2.fxml
@ -54,7 +54,7 @@
            </items>
        </ComboBox>
-        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" />
+        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Upoštevaj tudi" />
        <CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>
        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
--- a/src/main/resources/gui/WordLevelTab.fxml
+++ b/src/main/resources/gui/WordLevelTab.fxml
@ -30,7 +30,7 @@
        </items>
    </ComboBox>
-    <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" />
+    <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Upoštevaj tudi" />
    <CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>
--- a/src/main/resources/style.css
+++ b/src/main/resources/style.css
@ -0,0 +1,9 @@
 .root {
    -fx-font: 16px Arial;
 }
 .test {
    -fx-font: 8px Arial;
    -fx-font-weight: bold;
    -fx-text-fill: red;
 }