Some functionality from OneWord copied to StringAnalysis and fixed

2018-08-22 09:11:14 +02:00 · 2018-08-22 09:11:14 +02:00 · a8d147de52
commit a8d147de52
parent e140a9538b
12 changed files with 289 additions and 89 deletions
--- a/src/main/java/alg/ngram/Ngrams.java
+++ b/src/main/java/alg/ngram/Ngrams.java
@ -67,22 +67,43 @@ public class Ngrams {
 						multipleKeys = new MultipleHMKeys1(key);
 						break;
 					case 1:
-						multipleKeys = new MultipleHMKeys2(key, wordToString(ngramCandidate, otherKeys.get(0)));
+						String k1_2 = wordToString(ngramCandidate, otherKeys.get(0));
+						if (stats.getFilter().getNotePunctuations())
+							k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
+						multipleKeys = new MultipleHMKeys2(key, k1_2);
 						break;
 					case 2:
-						multipleKeys = new MultipleHMKeys3(key, wordToString(ngramCandidate, otherKeys.get(0)),
-								wordToString(ngramCandidate, otherKeys.get(1)));
+						String k2_2 = wordToString(ngramCandidate, otherKeys.get(0));
+						String k2_3 = wordToString(ngramCandidate, otherKeys.get(1));
+						if (stats.getFilter().getNotePunctuations()) {
+							k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
+							k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
+						}
+						multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
 						break;
 					case 3:
-						multipleKeys = new MultipleHMKeys4(key, wordToString(ngramCandidate, otherKeys.get(0)),
-								wordToString(ngramCandidate, otherKeys.get(1)),
-								wordToString(ngramCandidate, otherKeys.get(2)));
+						String k3_2 = wordToString(ngramCandidate, otherKeys.get(0));
+						String k3_3 = wordToString(ngramCandidate, otherKeys.get(1));
+						String k3_4 = wordToString(ngramCandidate, otherKeys.get(2));
+						if (stats.getFilter().getNotePunctuations()) {
+							k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
+							k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
+							k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
+						}
+						multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
 						break;
 					case 4:
-						multipleKeys = new MultipleHMKeys5(key, wordToString(ngramCandidate, otherKeys.get(0)),
-								wordToString(ngramCandidate, otherKeys.get(1)),
-								wordToString(ngramCandidate, otherKeys.get(2)),
-								wordToString(ngramCandidate, otherKeys.get(3)));
+						String k4_2 = wordToString(ngramCandidate, otherKeys.get(0));
+						String k4_3 = wordToString(ngramCandidate, otherKeys.get(1));
+						String k4_4 = wordToString(ngramCandidate, otherKeys.get(2));
+						String k4_5 = wordToString(ngramCandidate, otherKeys.get(3));
+						if (stats.getFilter().getNotePunctuations()) {
+							k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
+							k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
+							k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
+							k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
+						}
+						multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
 						break;
 					default:
 						multipleKeys = null;
@ -265,7 +286,7 @@ public class Ngrams {
 						currentLoop.add(checkAndModifySkipgramPunctuation(sentence, i, j, stats));
 						currentLoop.add(sentence.get(j));

-						validateAndCountSkipgramCandidate(currentLoop, stats);
+						validateAndCountSkipgramCandidate(currentLoop, stats, s.getTaxonomy());
 					} else {
 						for (int k = j + 1; k <= j + 1 + skip; k++) { // 3gram
 							if (ngram == 3 && k < sentence.size()) {
@ -274,7 +295,7 @@ public class Ngrams {
 								currentLoop.add(checkAndModifySkipgramPunctuation(sentence, j, k, stats));
 								currentLoop.add(sentence.get(k));

-								validateAndCountSkipgramCandidate(currentLoop, stats);
+								validateAndCountSkipgramCandidate(currentLoop, stats, s.getTaxonomy());
 							} else {
 								for (int l = k + 1; l <= k + 1 + skip; l++) { // 4gram
 									if (ngram == 4 && l < sentence.size()) {
@ -284,7 +305,7 @@ public class Ngrams {
 										currentLoop.add(checkAndModifySkipgramPunctuation(sentence, k, l, stats));
 										currentLoop.add(sentence.get(l));

-										validateAndCountSkipgramCandidate(currentLoop, stats);
+										validateAndCountSkipgramCandidate(currentLoop, stats, s.getTaxonomy());
 									} else {
 										for (int m = l + 1; m <= l + 1 + skip; m++) { // 5gram
 											if (ngram == 5 && m < sentence.size()) {
@ -295,7 +316,7 @@ public class Ngrams {
 												currentLoop.add(checkAndModifySkipgramPunctuation(sentence, l, m, stats));
 												currentLoop.add(sentence.get(m));

-												validateAndCountSkipgramCandidate(currentLoop, stats);
+												validateAndCountSkipgramCandidate(currentLoop, stats, s.getTaxonomy());
 											}
 										}
 									}
@ -308,13 +329,80 @@ public class Ngrams {
 		}
 	}

-	private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats) {
+	private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats, List<String> taxonomy) {
 		// count if no regex is set or if it is & candidate passes it
 		if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) {
-		    String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
-            key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
-			stats.updateTaxonomyResults(new MultipleHMKeys1(key),
-										stats.getCorpus().getTaxonomy());
+//		    String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
+//            key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
+//			stats.updateTaxonomyResults(new MultipleHMKeys1(key),
+//										stats.getCorpus().getTaxonomy());
+
+
+			ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();
+
+			String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
+
+			// if last letter is ',' erase it
+
+//				if (key.equals("")){
+//					String test = key;
+//				}
+
+			if (stats.getFilter().getNotePunctuations())
+				key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
+
+			MultipleHMKeys multipleKeys;
+
+			// create MultipleHMKeys for different amount of other keys
+			switch (otherKeys.size()) {
+				case 0:
+					multipleKeys = new MultipleHMKeys1(key);
+					break;
+				case 1:
+					String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0));
+					if (stats.getFilter().getNotePunctuations())
+						k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
+					multipleKeys = new MultipleHMKeys2(key, k1_2);
+					break;
+				case 2:
+					String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0));
+					String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1));
+					if (stats.getFilter().getNotePunctuations()) {
+						k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
+						k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
+					}
+					multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
+					break;
+				case 3:
+					String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0));
+					String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1));
+					String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2));
+					if (stats.getFilter().getNotePunctuations()) {
+						k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
+						k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
+						k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
+					}
+					multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
+					break;
+				case 4:
+					String k4_2 = wordToString(skipgramCandidate, otherKeys.get(0));
+					String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1));
+					String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2));
+					String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3));
+					if (stats.getFilter().getNotePunctuations()) {
+						k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
+						k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
+						k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
+						k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
+					}
+					multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
+					break;
+				default:
+					multipleKeys = null;
+
+			}
+			stats.updateTaxonomyResults(multipleKeys, taxonomy);
+
 		}
 	}
 }
--- a/src/main/java/data/Filter.java
+++ b/src/main/java/data/Filter.java
@ -22,6 +22,7 @@ public class Filter {
 		IS_CVV,
 		STRING_LENGTH,
 		TAXONOMY,
+		DISPLAY_TAXONOMY,
 		MSD,
 		HAS_MSD,
 		SOLAR_FILTERS,
@ -102,6 +103,14 @@ public class Filter {
 		}
 	}

+	public void setDisplayTaxonomy(boolean displayTaxonomy) {
+		filter.put(DISPLAY_TAXONOMY, displayTaxonomy);
+	}
+
+	public boolean getDisplayTaxonomy() {
+		return (boolean) filter.get(DISPLAY_TAXONOMY);
+	}
+
 	public void setMsd(ArrayList<Pattern> msd) {
 		filter.put(MSD, msd);
 		if (!ValidationUtil.isEmpty(msd)) {
--- a/src/main/java/data/StatisticsNew.java
+++ b/src/main/java/data/StatisticsNew.java
@ -48,7 +48,7 @@ public class StatisticsNew {
 		this.taxonomyResult.put("Total", new ConcurrentHashMap<>());

 		// create table for counting word occurances per taxonomies
-		if (this.corpus.getTaxonomy() != null) {
+		if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
 			if (this.filter.getTaxonomy().isEmpty()) {
 				for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
 					this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>());
--- a/src/main/java/gui/CharacterAnalysisTab.java
+++ b/src/main/java/gui/CharacterAnalysisTab.java
@ -43,6 +43,10 @@ public class CharacterAnalysisTab {
 	private CheckComboBox<String> taxonomyCCB;
 	private ArrayList<String> taxonomy;

+	@FXML
+	private CheckBox displayTaxonomyChB;
+	private boolean displayTaxonomy;
+
 	@FXML
 	private CheckBox calculatecvvCB;
 	private boolean calculateCvv;
@ -171,6 +175,14 @@ public class CharacterAnalysisTab {
 			taxonomyCCB.setDisable(true);
 		}

+		displayTaxonomy = false;
+		// set
+		displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
+			displayTaxonomy = newValue;
+			logger.info("display taxonomy: ", displayTaxonomy);
+		});
+		displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
+
 		// cvv
 		calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
 			calculateCvv = newValue;
@ -390,6 +402,7 @@ public class CharacterAnalysisTab {
 		filter.setCalculateFor(calculateFor);
 		filter.setMsd(msd);
 		filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+		filter.setDisplayTaxonomy(displayTaxonomy);
 		filter.setAl(AnalysisLevel.STRING_LEVEL);
 		filter.setSkipValue(0);
 		filter.setIsCvv(calculateCvv);
--- a/src/main/java/gui/CorpusTab.java
+++ b/src/main/java/gui/CorpusTab.java
@ -46,9 +46,9 @@ public class CorpusTab {
 	private CheckBox readHeaderInfoChB;
 	private boolean readHeaderInfo;

-	@FXML
-	private CheckBox gosUseOrthChB;
-	private boolean gosUseOrth;
+//	@FXML
+//	private CheckBox gosUseOrthChB;
+//	private boolean gosUseOrth;

 	@FXML
 	private Button chooseResultsLocationB;
@ -102,21 +102,21 @@ public class CorpusTab {
 		});
 		readHeaderInfoChB.setTooltip(new Tooltip(TOOLTIP_readHeaderInfoChB));

-		gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
-			gosUseOrth = newValue;
-			corpus.setGosOrthMode(gosUseOrth);
-//			wordFormationTab.setDisable(gosUseOrth);
-			satNew2Controller.toggleMode(null);
-			oneWordTabController.toggleMode(null);
-			catController.toggleMode(null);
-
-			logger.info("gosUseOrth: ", gosUseOrth);
-		});
+//		gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
+//			gosUseOrth = newValue;
+//			corpus.setGosOrthMode(gosUseOrth);
+////			wordFormationTab.setDisable(gosUseOrth);
+//			satNew2Controller.toggleMode(null);
+//			oneWordTabController.toggleMode(null);
+//			catController.toggleMode(null);
+//
+//			logger.info("gosUseOrth: ", gosUseOrth);
+//		});

 		chooseResultsLocationB.setOnAction(e -> chooseResultsLocation(null));

 		// set labels and toggle visibility
-		toggleGosChBVisibility();
+//		toggleGosChBVisibility();

 		chooseCorpusLabelContent = Messages.LABEL_CORPUS_LOCATION_NOT_SET;
 		chooseCorpusL.setText(chooseCorpusLabelContent);
@ -227,7 +227,7 @@ public class CorpusTab {
 	private void setResults() {
 		// if everything is ok
 		// check and enable checkbox if GOS
-		toggleGosChBVisibility();
+//		toggleGosChBVisibility();

 		// set default results location
 		String defaultResultsLocationPath = corpus.getChosenCorpusLocation().getAbsolutePath();
@ -420,9 +420,9 @@ public class CorpusTab {
 	/**
 	 * Hides GOS related checkbox until needed.
 	 */
-	private void toggleGosChBVisibility() {
-		gosUseOrthChB.setVisible(corpus != null && corpus.getCorpusType() != null && corpus.getCorpusType() == CorpusType.GOS);
-	}
+//	private void toggleGosChBVisibility() {
+//		gosUseOrthChB.setVisible(corpus != null && corpus.getCorpusType() != null && corpus.getCorpusType() == CorpusType.GOS);
+//	}

 	private String detectCorpusType(Collection<File> corpusFiles, String corpusLocation) {
 		// check that we recognize this corpus
--- a/src/main/java/gui/Messages.java
+++ b/src/main/java/gui/Messages.java
@ -55,6 +55,7 @@ public class Messages {
 	public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.";
 	public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.";
 	public static final String TOOLTIP_readNotePunctuationsChB = "Ločila med povedmi se upoštevajo v vsakem primeru.";
+	public static final String TOOLTIP_readDisplayTaxonomyChB = "V izhodni datoteki bodo prikazane tudi statistike po taksonomijah.";



--- a/src/main/java/gui/OneWordAnalysisTab.java
+++ b/src/main/java/gui/OneWordAnalysisTab.java
@ -45,6 +45,10 @@ public class OneWordAnalysisTab {
    private CheckComboBox<String> taxonomyCCB;
    private ArrayList<String> taxonomy;

+    @FXML
+    private CheckBox displayTaxonomyChB;
+    private boolean displayTaxonomy;
+
    @FXML
    private ComboBox<String> calculateForCB;
    private CalculateFor calculateFor;
@ -91,7 +95,7 @@ public class OneWordAnalysisTab {
    private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
    private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
    private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
-    private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "različnica");
+    private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
    private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();

    // TODO: pass observables for taxonomy based on header scan
@ -192,6 +196,14 @@ public class OneWordAnalysisTab {
            taxonomyCCB.setDisable(true);
        }

+        displayTaxonomy = false;
+        // set
+        displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
+            displayTaxonomy = newValue;
+            logger.info("display taxonomy: ", displayTaxonomy);
+        });
+        displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
+
        // set default values
        minimalOccurrencesTF.setText("1");
        minimalOccurrences = 1;
@ -369,6 +381,7 @@ public class OneWordAnalysisTab {
        filter.setCalculateFor(calculateFor);
        filter.setMsd(msd);
        filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+        filter.setDisplayTaxonomy(displayTaxonomy);
        filter.setAl(AnalysisLevel.STRING_LEVEL);
        filter.setSkipValue(0);
        filter.setIsCvv(false);
--- a/src/main/java/gui/StringAnalysisTabNew2.java
+++ b/src/main/java/gui/StringAnalysisTabNew2.java
@ -38,6 +38,10 @@ public class StringAnalysisTabNew2 {
    private ArrayList<Pattern> msd;
    private ArrayList<String> msdStrings;

+    @FXML
+    private CheckComboBox<String> alsoVisualizeCCB;
+    private ArrayList<String> alsoVisualize;
+
    @FXML
    private CheckComboBox<String> taxonomyCCB;
    private ArrayList<String> taxonomy;
@ -66,6 +70,10 @@ public class StringAnalysisTabNew2 {
    private CheckBox notePunctuationsChB;
    private boolean notePunctuations;

+    @FXML
+    private CheckBox displayTaxonomyChB;
+    private boolean displayTaxonomy;
+
    @FXML
    private TextField minimalOccurrencesTF;
    private Integer minimalOccurrences;
@ -111,6 +119,12 @@ public class StringAnalysisTabNew2 {
 //    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
+    private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
+    private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
+    private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
+    private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
+    private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();


    // TODO: pass observables for taxonomy based on header scan
@ -163,9 +177,39 @@ public class StringAnalysisTabNew2 {
        });
        notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB));

+        displayTaxonomy = false;
+        // set
+        displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
+            displayTaxonomy = newValue;
+            logger.info("display taxonomy: ", displayTaxonomy);
+        });
+        displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
+
        // calculateForCB
        calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
            calculateFor = CalculateFor.factory(newValue);
+
+            alsoVisualizeCCB.getItems().removeAll();
+            if(newValue.equals("lema")){
+                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma);
+            } else if(newValue.equals("različnica")) {
+                if (corpus.getCorpusType() == CorpusType.GOS)
+                    alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWordGos);
+                else
+                    alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
+            } else if(newValue.equals("normalizirana različnica")) {
+                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
+            }else {
+                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
+            }
+            alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+                alsoVisualize = new ArrayList<>();
+                ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
+                alsoVisualize.addAll(checkedItems);
+                logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
+            });
+            alsoVisualizeCCB.getCheckModel().clearChecks();
+
            logger.info("calculateForCB:", calculateFor.toString());
        });

@ -205,6 +249,16 @@ public class StringAnalysisTabNew2 {
        msdTF.setText("");
        msd = new ArrayList<>();

+        alsoVisualizeCCB.getItems().removeAll();
+        alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma);
+        alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+            alsoVisualize = new ArrayList<>();
+            ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
+            alsoVisualize.addAll(checkedItems);
+            logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
+        });
+        alsoVisualizeCCB.getCheckModel().clearChecks();
+
        // taxonomy
        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
            taxonomyCCB.getItems().removeAll();
@ -424,7 +478,11 @@ public class StringAnalysisTabNew2 {
 //            if (corpus.getCorpusType() == CorpusType.GOS)
 //                calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
 //            else
-            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
+            if (corpus.getCorpusType() == CorpusType.GOS)
+                calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
+            else
+                calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
+//            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);

        } else if (mode == MODE.LETTER) {
            paneWords.setVisible(false);
@ -462,11 +520,13 @@ public class StringAnalysisTabNew2 {
        filter.setCalculateFor(calculateFor);
        filter.setMsd(msd);
        filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+        filter.setDisplayTaxonomy(displayTaxonomy);
        filter.setAl(AnalysisLevel.STRING_LEVEL);
        filter.setSkipValue(skipValue);
        filter.setIsCvv(calculateCvv);
        filter.setSolarFilters(solarFiltersMap);
        filter.setNotePunctuations(notePunctuations);
+        filter.setMultipleKeys(alsoVisualize);
        filter.setMinimalOccurrences(minimalOccurrences);
        filter.setMinimalTaxonomy(minimalTaxonomy);

--- a/src/main/resources/gui/CharacterAnalysisTab.fxml
+++ b/src/main/resources/gui/CharacterAnalysisTab.fxml
@ -27,17 +27,21 @@
                <RadioButton fx:id="varietyRB" mnemonicParsing="false" text="različnica" toggleGroup="$calculateForRB" />
            </children>
        </HBox>
-        <Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Omejitev podatkov" />
-        <Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Oznaka MSD" />
-        <TextField fx:id="msdTF" layoutX="185.0" layoutY="160.0" prefWidth="180.0" />
-        <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Taksonomija" />
-        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0" />

-        <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Min. št. pojavitev" />
-        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0" />
+        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
+        <CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />

-        <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. taksonomij" />
-        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
+        <Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" />
+        <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD" />
+        <TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0" />
+        <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija" />
+        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0" />
+
+        <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" />
+        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
+
+        <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" />
+        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />

        <Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0">
            <children>
@ -45,7 +49,7 @@
            </children>
        </Pane>

-        <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="422.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
+        <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
    </Pane>

    <Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:" />
--- a/src/main/resources/gui/CorpusTab.fxml
+++ b/src/main/resources/gui/CorpusTab.fxml
@ -19,7 +19,7 @@
        <Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0">
            <children>
                <Label fx:id="chooseCorpusL" prefHeight="50.0" prefWidth="704.0" text="Label"/>
-                <CheckBox fx:id="gosUseOrthChB" layoutY="65.0" mnemonicParsing="false" text="Uporabi pogovorni zapis"/>
+                <!--<CheckBox fx:id="gosUseOrthChB" layoutY="65.0" mnemonicParsing="false" text="Uporabi pogovorni zapis"/>-->
            </children>
        </Pane>
        <ProgressIndicator fx:id="locationScanPI" layoutX="10.0" layoutY="60.0" prefHeight="50.0" progress="0.0"/>
--- a/src/main/resources/gui/OneWordAnalysisTab.fxml
+++ b/src/main/resources/gui/OneWordAnalysisTab.fxml
@ -32,20 +32,24 @@

        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" />
        <CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>
+
+
+        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
+        <CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
+
        <!-- MSD and Taxonomy separated -->
+        <Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" />

-        <Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Omejitev podatkov" />
+        <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/>
+        <TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0"/>
+        <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/>
+        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/>

-        <Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Oznaka MSD"/>
-        <TextField fx:id="msdTF" layoutX="185.0" layoutY="160.0" prefWidth="180.0"/>
-        <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Taksonomija"/>
-        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0"/>
+        <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" />
+        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />

-        <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Min. št. pojavitev" />
-        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0" />
-
-        <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. taksonomij" />
-        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
+        <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" />
+        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />

        <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false"
                prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
--- a/src/main/resources/gui/StringAnalysisTabNew2.fxml
+++ b/src/main/resources/gui/StringAnalysisTabNew2.fxml
@ -19,8 +19,8 @@

        <Pane fx:id="paneWords">
            <children>
-                <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Preskok besed" />
-                <ComboBox fx:id="skipValueCB" layoutX="185.0" layoutY="100.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
+                <Label layoutX="10.0" layoutY="180.0" prefHeight="25.0" text="Preskok besed" />
+                <ComboBox fx:id="skipValueCB" layoutX="185.0" layoutY="180.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
                    <items>
                        <FXCollections fx:factory="observableArrayList">
                            <String fx:value="0" />
@ -37,17 +37,6 @@
            </children>
        </Pane>
        <!-- for some reason following two ComboBoxes have to be below paneWords -->
-        <ComboBox fx:id="ngramValueCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
-            <items>
-                <FXCollections fx:factory="observableArrayList">
-                    <String fx:value="2" />
-                    <String fx:value="3" />
-                    <String fx:value="4" />
-                    <String fx:value="5" />
-                </FXCollections>
-            </items>
-        </ComboBox>
-
        <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za" />
        <ComboBox fx:id="calculateForCB" layoutX="185.0" layoutY="20.0" minWidth="180.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
            <items>
@ -61,34 +50,53 @@
            </items>
        </ComboBox>

-        <Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Upoštevaj ločila" />
-        <CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="145.0" selected="true" />
+        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" />
+        <CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>
+
+        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
+        <CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
+
+        <Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="N-gram nivo" />
+        <ComboBox fx:id="ngramValueCB" layoutX="185.0" layoutY="140.0" prefHeight="25.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
+            <items>
+                <FXCollections fx:factory="observableArrayList">
+                    <String fx:value="2" />
+                    <String fx:value="3" />
+                    <String fx:value="4" />
+                    <String fx:value="5" />
+                </FXCollections>
+            </items>
+        </ComboBox>
+
+
+        <Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Upoštevaj ločila" />
+        <CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="225.0" selected="true" />


        <!-- MSD and Taxonomy separated -->

-        <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov" />
+        <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Omejitev podatkov" />

-        <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD" />
-        <TextField fx:id="msdTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0" />
-        <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija" />
-        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0" />
+        <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Oznaka MSD" />
+        <TextField fx:id="msdTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
+        <Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Taksonomija" />
+        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="360.0" prefHeight="25.0" prefWidth="180.0" />
+
+        <Label layoutX="10.0" layoutY="400.0" prefHeight="25.0" text="Min. št. pojavitev" />
+        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="400.0" prefWidth="180.0" />

-        <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. pojavitev" />
-        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />

-        <Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Min. št. taksonomij" />
-        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="360.0" prefWidth="180.0" />

-        <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />

-        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="N-gram nivo" />


    </Pane>
+    <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
    <Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
-        <Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />
-        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />
+        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. taksonomij" />
+        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
+        <Label fx:id="solarFilters" layoutX="10.0" layoutY="100.0" text="Izbrani filtri:" />
+        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="300.0" prefWidth="275.0" text=" " wrapText="true" />
        <!-- samoglasniki/soglasniki -->
        <Pane fx:id="paneLetters">
            <children>