Added translataion fixes + punctuation in export fix

2019-03-19 14:06:39 +01:00
parent 39624fa4f2
commit 10666b4453
19 changed files with 586 additions and 421 deletions
@@ -1744,7 +1744,7 @@ public class XML_processing {

            // if we're calculating values for letters, omit words that are shorter than string length
            if (filter.getNgramValue() == 0) {
-                sentence.removeIf(w -> (filter.getCalculateFor() == CalculateFor.WORD && w.getWord(filter.getWordParts()).length() < filter.getStringLength())
+                sentence.removeIf(w -> ((filter.getCalculateFor() == CalculateFor.WORD || filter.getCalculateFor() == CalculateFor.LOWERCASE_WORD) && w.getWord(filter.getWordParts()).length() < filter.getStringLength())
                        || (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma(filter.getWordParts()).length() < filter.getStringLength()));
            }
        }
@@ -1766,7 +1766,7 @@ public class XML_processing {

    public static Word createWord(String word, String lemma, String msd, String normalizedWord, Filter f){
        List<String> wString = new ArrayList<>();
-        if (f.getWordParts().contains(CalculateFor.WORD))
+        if (f.getWordParts().contains(CalculateFor.WORD) || f.getWordParts().contains(CalculateFor.LOWERCASE_WORD))
            wString.add(word);
        if (f.getWordParts().contains(CalculateFor.LEMMA))
            wString.add(lemma);
@@ -222,6 +222,12 @@ public class Ngrams {
 						.map(w -> w.getLemma(wordParts))
 						.collect(Collectors.toList()));
 				return StringUtils.join(candidate, " ");
+			case LOWERCASE_WORD:
+				candidate.addAll(ngramCandidate
+						.stream()
+						.map(w -> w.getWord(wordParts).toLowerCase())
+						.collect(Collectors.toList()));
+				return StringUtils.join(candidate, " ");
 			case WORD:
 				candidate.addAll(ngramCandidate
 						.stream()
@@ -298,6 +304,10 @@ public class Ngrams {
 					continue;
 				}

+				if(stats.getFilter().getCalculateFor().equals(CalculateFor.LOWERCASE_WORD)){
+					word = word.toLowerCase();
+				}
+
 				for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
 					// TODO: locila?

@@ -22,6 +22,7 @@ public enum CalculateFor {
 //	DIST_LEMMAS("lema");

 	WORD("calculateFor.WORD"),
+	LOWERCASE_WORD("calculateFor.LOWERCASE_WORD"),
 	NORMALIZED_WORD("calculateFor.NORMALIZED_WORD"),
 	LEMMA("calculateFor.LEMMA"),
 	MORPHOSYNTACTIC_SPECS("calculateFor.MORPHOSYNTACTIC_SPECS"),
@@ -47,6 +48,9 @@ public enum CalculateFor {
 			if (WORD.toString().equals(cf)) {
 				return WORD;
 			}
+			if (LOWERCASE_WORD.toString().equals(cf)) {
+				return LOWERCASE_WORD;
+			}
 			if (LEMMA.toString().equals(cf)) {
 				return LEMMA;
 			}
@@ -71,6 +75,8 @@ public enum CalculateFor {
 			switch (this) {
 				case WORD:
 					return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.word");
+				case LOWERCASE_WORD:
+					return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.lowercaseWord");
 				case NORMALIZED_WORD:
 					return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.normalizedWord");
 				case LEMMA:
@@ -92,6 +98,8 @@ public enum CalculateFor {
 			switch (this) {
 				case WORD:
 					return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.word");
+				case LOWERCASE_WORD:
+					return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.lowercaseWord");
 				case NORMALIZED_WORD:
 					return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.normalizedWord");
 				case LEMMA:
@@ -118,6 +126,8 @@ public enum CalculateFor {
 			switch (this) {
 				case WORD:
 					return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.word");
+				case LOWERCASE_WORD:
+					return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.lowercaseWord");
 				case NORMALIZED_WORD:
 					return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.normalizedWord");
 				case LEMMA:
@@ -139,6 +149,8 @@ public enum CalculateFor {
 			switch (this) {
 				case WORD:
 					return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.word");
+				case LOWERCASE_WORD:
+					return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.lowercaseWord");
 				case NORMALIZED_WORD:
 					return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.normalizedWord");
 				case LEMMA:
@@ -168,6 +180,8 @@ public enum CalculateFor {
 		switch(this){
 			case WORD:
 				return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.word2");
+			case LOWERCASE_WORD:
+				return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.lowercaseWord2");
 			case NORMALIZED_WORD:
 				return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.normalizedWord2");
 			case LEMMA:
@@ -194,6 +208,8 @@ public enum CalculateFor {
 		switch(this){
 			case WORD:
 				return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.word2");
+			case LOWERCASE_WORD:
+				return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.lowercaseWord2");
 			case NORMALIZED_WORD:
 				return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.normalizedWord2");
 			case LEMMA:
@@ -221,6 +237,8 @@ public enum CalculateFor {
 				case WORD:
 				case DIST_WORDS:
 					return I18N.get("exportTable.part.word3");
+				case LOWERCASE_WORD:
+					return I18N.get("exportTable.part.lowercaseWord3");
 				case NORMALIZED_WORD:
 					return I18N.get("exportTable.part.normalizedWord3");
 				case LEMMA:
@@ -240,6 +258,8 @@ public enum CalculateFor {
 			case WORD:
 			case DIST_WORDS:
 				return I18N.get("exportTable.part.word3") + " " + I18N.get("exportTable.part.set");
+			case LOWERCASE_WORD:
+				return I18N.get("exportTable.part.lowercaseWord3") + " " + I18N.get("exportTable.part.set");
 			case NORMALIZED_WORD:
 				return I18N.get("exportTable.part.normalizedWord3") + " " + I18N.get("exportTable.part.set");
 			case LEMMA:
@@ -36,6 +36,7 @@ public class Corpus {
 	boolean hasMsdData;
 	private ArrayList<String> validationErrors;
 	private String corpusName = "";
+	private String punctuation = "punctuation.COMMA";

 	public Corpus() {
 		validationErrors = new ArrayList<>();
@@ -52,6 +53,16 @@ public class Corpus {
 		logger.info("Corpus.set: ", corpusName);
 	}

+	public String getPunctuation() {
+		return punctuation;
+	}
+
+	public void setPunctuation(String punctuation) {
+//		System.out.println(corpusName);
+		this.punctuation = punctuation;
+		logger.info("Punctuation.set: ", punctuation);
+	}
+
 	public CorpusType getCorpusType() {
 		return corpusType;
 	}
@@ -320,6 +320,10 @@ public class Filter implements Cloneable {
        ArrayList<CalculateFor> oldWp = ((ArrayList<CalculateFor>) filter.get(WORD_PARTS));

        switch (wp) {
+			case LOWERCASE_WORD:
+				if (!oldWp.contains(CalculateFor.LOWERCASE_WORD))
+					oldWp.add(CalculateFor.LOWERCASE_WORD);
+				break;
            case WORD:
            case DIST_WORDS:
                if (!oldWp.contains(CalculateFor.WORD))
@@ -324,28 +324,28 @@ public class StatisticsNew {
 		return true;
 	}

-	public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {
-		filter.setAl(AnalysisLevel.WORD_FORMATION);
-		resultTitle = generateResultTitle();
-
-		if (useDB) {
-			result = db.getDump();
-			db.delete();
-		}
-
-		// if no results and nothing to save, return false
-		if (!(result.size() > 0)) {
-			analysisProducedResults = false;
-			return false;
-		} else {
-			analysisProducedResults = true;
-		}
-
-		WordFormation.calculateStatistics(this);
-
-		Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());
-		return true;
-	}
+//	public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {
+//		filter.setAl(AnalysisLevel.WORD_FORMATION);
+//		resultTitle = generateResultTitle();
+//
+//		if (useDB) {
+//			result = db.getDump();
+//			db.delete();
+//		}
+//
+//		// if no results and nothing to save, return false
+//		if (!(result.size() > 0)) {
+//			analysisProducedResults = false;
+//			return false;
+//		} else {
+//			analysisProducedResults = true;
+//		}
+//
+//		WordFormation.calculateStatistics(this);
+//
+//		Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());
+//		return true;
+//	}

 	private Map<String, Map<MultipleHMKeys, Long>> sortNestedMap(Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> nestedMap, int limit) {
 		Map<String, Map<MultipleHMKeys, Long>> sorted = new HashMap<>();
@@ -682,6 +682,11 @@ public class StatisticsNew {
 		info.put(I18N.get("exportHeader.minOccurrences"), String.valueOf(filter.getMinimalOccurrences()));
 		info.put(I18N.get("exportHeader.minTaxonomies"), String.valueOf(filter.getMinimalTaxonomy()));

+		// if not letters extraction
+		if(filter.getNgramValue() > 0) {
+			info.put(I18N.get("exportHeader.minRelFre"), String.valueOf(filter.getMinimalRelFre()));
+		}
+
 		if (corpus.getCorpusType() == CorpusType.SOLAR) {
 			HashMap<String, ObservableList<String>> filters = corpus.getSolarSelectedFilters();

@@ -28,7 +28,11 @@ public interface Word {
 	}

 	default String getWord(ArrayList<CalculateFor> wordParts){
-		return get(wordParts, CalculateFor.WORD);
+		String w = get(wordParts, CalculateFor.WORD);
+		if (w == null){
+			return get(wordParts, CalculateFor.LOWERCASE_WORD);
+		}
+		return w;
 	}

 	default String getLemma(ArrayList<CalculateFor> wordParts){
@@ -102,9 +106,9 @@ public interface Word {
 		String returnValue = "";

 		if (cvv) {
-			returnValue = calculateFor == CalculateFor.WORD ? getCVVWord(cf) : getCVVLemma(cf);
+			returnValue = (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD) ? getCVVWord(cf) : getCVVLemma(cf);
 		} else {
-			returnValue = calculateFor == CalculateFor.WORD ? getWord(cf) : getLemma(cf);
+			returnValue = (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD) ? getWord(cf) : getLemma(cf);
 		}

 		return returnValue;
@@ -179,7 +179,7 @@ public class CharacterAnalysisTab {
 	private ChangeListener<Boolean> minimalOccurrencesListener;
 	private ChangeListener<Boolean> minimalTaxonomyListener;

-	private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA"};
+	private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
 	private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY));

 	private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
@@ -623,7 +623,7 @@ public class CharacterAnalysisTab {
 			}

 			// if calculateFor was selected for something other than a word or a lemma -> reset
-			if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
+			if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA || calculateFor == CalculateFor.LOWERCASE_WORD)) {
 				// if the user selected something else before selecting ngram for letters, reset that choice
 				calculateFor = CalculateFor.WORD;

@@ -92,6 +92,9 @@ public class CorpusTab {
 	@FXML
 	public Label outputNameL;

+	@FXML
+	public Label punctuationL;
+
 	@FXML
 	public ImageView chooseCorpusLocationI;

@@ -107,6 +110,9 @@ public class CorpusTab {
 	@FXML
 	public ImageView outputNameI;

+	@FXML
+	public ImageView punctuationI;
+
 	@FXML
 	public TextField outputNameTF;
 	public String outputName = "";
@@ -115,6 +121,10 @@ public class CorpusTab {
 	public ComboBox<String> selectReaderCB;
 	public String selectReader;

+	@FXML
+	public ComboBox<String> punctuationCB;
+	public String punctuation;
+
 	@FXML
 	private ProgressIndicator locationScanPI;

@@ -137,7 +147,7 @@ public class CorpusTab {
 	private OneWordAnalysisTab oneWordTabController;
 	private CharacterAnalysisTab catController;
 	private FiltersForSolar ffsController;
-	private WordFormationTab wfController;
+//	private WordFormationTab wfController;
 	private WordLevelTab wlController;
 	private HostServices hostService;

@@ -146,6 +156,10 @@ public class CorpusTab {

 	private static final String [] SELECT_READER_ARRAY = {"VERT + REGI", "XML (Šolar 1.0)", "XML (GOS 1.0)", "XML (ssj500k 2.1)", "XML (Gigafida 2.0)", "XML (Gigafida 1.0, Kres 1.0)"};
 	private static final ArrayList<String> SELECT_READER = new ArrayList<>(Arrays.asList(SELECT_READER_ARRAY));
+
+	private static final String [] PUNCTUATION_ARRAY = {"punctuation.COMMA", "punctuation.POINT"};
+	private static final ArrayList<String> PUNCTUATION = new ArrayList<>(Arrays.asList(PUNCTUATION_ARRAY));
+
 	private Collection<File> corpusFiles;
 	private File selectedDirectory;

@@ -219,6 +233,23 @@ public class CorpusTab {

 		selectReaderCB.getSelectionModel().select(0);

+		// comma / point choice
+		punctuationCB.valueProperty().addListener((observable, oldValue, newValue) -> {
+			if(newValue == null){
+//				newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
+				newValue = I18N.getTranslatedValue(oldValue, PUNCTUATION);
+				punctuationCB.getSelectionModel().select(newValue);
+			}
+//			System.out.println(oldValue);
+//			System.out.println(newValue);
+			punctuation = newValue;
+			if(corpus != null) {
+				corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
+			}
+		});
+
+		punctuationCB.getSelectionModel().select(0);
+
 		// add listeners
 		chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation());
 //		chooseCorpusLocationB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_chooseCorpusLocationB")));
@@ -301,12 +332,16 @@ public class CorpusTab {
 		readHeaderInfoL.textProperty().bind(I18N.createStringBinding("label.readHeaderInfo"));
 		selectReaderL.textProperty().bind(I18N.createStringBinding("label.selectReader"));
 		outputNameL.textProperty().bind(I18N.createStringBinding("label.outputName"));
+		punctuationL.textProperty().bind(I18N.createStringBinding("label.punctuation"));

 		addTooltipToImage(chooseCorpusLocationI, I18N.createStringBinding("label.corpusTab.chooseCorpusLocationH"));
 		addTooltipToImage(readHeaderInfoI, I18N.createStringBinding("label.corpusTab.readHeaderInfoH"));
 		addTooltipToImage(chooseResultsLocationI, I18N.createStringBinding("label.corpusTab.chooseResultsLocationH"));
 		addTooltipToImage(selectReaderI, I18N.createStringBinding("label.corpusTab.selectReaderH"));
 		addTooltipToImage(outputNameI, I18N.createStringBinding("label.corpusTab.outputNameH"));
+		addTooltipToImage(punctuationI, I18N.createStringBinding("label.corpusTab.punctuationH"));
+
+		punctuationCB.itemsProperty().bind(I18N.createObjectBinding(PUNCTUATION));
 	}

 	private void togglePiAndSetCorpusWrapper(boolean piIsActive) {
@@ -456,8 +491,9 @@ public class CorpusTab {
 				}
 			}
 		}
-		System.out.println(outputName);
+//		System.out.println(outputName);
 		corpus.setCorpusName(outputName);
+		corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
 	}

 	/**
@@ -167,6 +167,30 @@ public final class I18N {
 //        return MessageFormat.format(bundle.getString(key), args);
    }

+    public static String getRootValue(String oldValue, ArrayList<String> nGramComputeForLetters) {
+        Locale loc;
+        if(getLocale().equals(Locale.ENGLISH)) {
+            loc = new Locale.Builder().setLanguage("sl").setRegion("SI").build();
+        } else {
+            loc = Locale.ENGLISH;
+        }
+
+        for (String el : nGramComputeForLetters){
+            if (oldValue.equals(getIndependent(el, loc))){
+                return el;
+            }
+        }
+
+        // in case translated language doesn't contain specified word, try original language
+        for (String el : nGramComputeForLetters){
+            if (oldValue.equals(get(el))){
+                return el;
+            }
+        }
+
+        return null;
+    }
+
    public static String getTranslatedValue(String oldValue, ArrayList<String> nGramComputeForLetters) {
        Locale loc;
        if(getLocale().equals(Locale.ENGLISH)) {
@@ -210,13 +210,13 @@ public class OneWordAnalysisTab {

    //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
 //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
-    private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
+    private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
    private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
    //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
-    private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"};
+    private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
    private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
    //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
-    private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
+    private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
    private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
    //	private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
    private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
@@ -300,7 +300,7 @@ public class OneWordAnalysisTab {
               alsoVisualizeCCB.getItems().removeAll();
               if (newValue.equals(CalculateFor.LEMMA.toString())) {
                   alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
-               } else if (newValue.equals(CalculateFor.WORD.toString())) {
+               } else if (newValue.equals(CalculateFor.WORD.toString()) || newValue.equals(CalculateFor.LOWERCASE_WORD.toString())) {
                   if (corpus.getCorpusType() == CorpusType.GOS)
                       alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS));
                   else
@@ -250,13 +250,13 @@ public class StringAnalysisTabNew2 {
 //    private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();

    //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
-    private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
+    private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
    private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
    //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
-    private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"};
+    private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
    private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
    //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
-    private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
+    private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
    private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
    //	private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
    private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
@@ -408,7 +408,7 @@ public class StringAnalysisTabNew2 {
                alsoVisualizeCCB.getItems().removeAll();
                if (newValue.equals(CalculateFor.LEMMA.toString())) {
                    alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
-                } else if (newValue.equals(CalculateFor.WORD.toString())) {
+                } else if (newValue.equals(CalculateFor.WORD.toString()) || newValue.equals(CalculateFor.LOWERCASE_WORD.toString())) {
                    if (corpus.getCorpusType() == CorpusType.GOS)
                        alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS));
                    else
@@ -1,260 +1,260 @@
-package gui;
-
-import static alg.XML_processing.*;
-import static gui.GUIController.*;
-
-import java.io.File;
-import java.io.UnsupportedEncodingException;
-import java.util.*;
-
-import javafx.application.HostServices;
-import javafx.scene.control.*;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-import org.controlsfx.control.CheckComboBox;
-
-import data.*;
-import javafx.collections.ListChangeListener;
-import javafx.collections.ObservableList;
-import javafx.concurrent.Task;
-import javafx.fxml.FXML;
-import javafx.scene.layout.AnchorPane;
-
-@SuppressWarnings("Duplicates")
-public class WordFormationTab {
-	public final static Logger logger = LogManager.getLogger(WordFormationTab.class);
-
-	public AnchorPane wordAnalysisTabPane;
-
-	@FXML
-	public Label selectedFiltersLabel;
-	@FXML
-	public Label solarFilters;
-
-	@FXML
-	private CheckComboBox<String> taxonomyCCB;
-	private ArrayList<Taxonomy> taxonomy;
-
-	@FXML
-	private TextField minimalOccurrencesTF;
-	private Integer minimalOccurrences;
-
-	@FXML
-	private TextField minimalTaxonomyTF;
-	private Integer minimalTaxonomy;
-
-	@FXML
-	private Button computeB;
-
-	@FXML
-	public ProgressBar ngramProgressBar;
-	@FXML
-	public Label progressLabel;
-
-	@FXML
-	private Hyperlink helpH;
-
-	private Corpus corpus;
-	private HashMap<String, HashSet<String>> solarFiltersMap;
-	private HostServices hostService;
-
-	// after header scan
-	private ObservableList<String> taxonomyCCBValues;
-	private CorpusType currentCorpusType;
-	private boolean useDb;
-
-
-	public void init() {
-		// taxonomy
-		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
-			taxonomyCCB.getItems().removeAll();
-			taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
-			taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
-				taxonomy = new ArrayList<>();
-				ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
-				ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus);
-				taxonomy.addAll(checkedItemsTaxonomy);
-				logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
-			});
-			taxonomyCCB.getCheckModel().clearChecks();
-		} else {
-			taxonomyCCB.setDisable(true);
-		}
-
-		// set default values
-		minimalOccurrencesTF.setText("1");
-		minimalOccurrences = 1;
-
-		minimalTaxonomyTF.setText("1");
-		minimalTaxonomy = 1;
-
-		minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
-			if (!newValue) {
-				// focus lost
-				String value = minimalOccurrencesTF.getText();
-				if (!ValidationUtil.isEmpty(value)) {
-					if (!ValidationUtil.isNumber(value)) {
-						logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
-						GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
-					} else {
-						minimalOccurrences = Integer.parseInt(value);
-					}
-				} else {
-					minimalOccurrencesTF.setText("1");
-					minimalOccurrences = 1;
-				}
-			}
-		});
-
-		minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
-			if (!newValue) {
-				// focus lost
-				String value = minimalTaxonomyTF.getText();
-				if (!ValidationUtil.isEmpty(value)) {
-					if (!ValidationUtil.isNumber(value)) {
-						logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
-						GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
-					} else {
-						minimalTaxonomy = Integer.parseInt(value);
-					}
-				} else {
-					minimalTaxonomyTF.setText("1");
-					minimalTaxonomy = 1;
-				}
-			}
-		});
-
-		computeB.setOnAction(e -> {
-			compute();
-			logger.info("compute button");
-		});
-
-        helpH.setOnAction(e -> openHelpWebsite());
-	}
-
-	private void compute() {
-		Filter filter = new Filter();
-		filter.setNgramValue(1);
-		filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
-		filter.setTaxonomy(taxonomy);
-		filter.setAl(AnalysisLevel.STRING_LEVEL);
-		filter.setSkipValue(0);
-		filter.setMsd(new ArrayList<>());
-		filter.setIsCvv(false);
-		filter.setSolarFilters(solarFiltersMap);
-		filter.setMinimalOccurrences(minimalOccurrences);
-		filter.setMinimalTaxonomy(minimalTaxonomy);
-
-		String message = Validation.validateForStringLevel(filter);
-		if (message == null) {
-			// no errors
-			logger.info("Executing: ", filter.toString());
-			StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
-			execute(statistic);
-		} else {
-			logAlert(message);
-			showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
-		}
-	}
-
-	private void openHelpWebsite(){
-		hostService.showDocument(Messages.HELP_URL);
-	}
-
-	private void execute(StatisticsNew statistic) {
-		logger.info("Started execution: ", statistic.getFilter());
-
-		Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
-
-		final Task<Void> task = new Task<Void>() {
-			@SuppressWarnings("Duplicates")
-			@Override
-			protected Void call() throws Exception {
-				int i = 0;
-				Date startTime = new Date();
-				Date previousTime = new Date();
-				for (File f : corpusFiles) {
-					readXML(f.toString(), statistic);
-					i++;
-					this.updateProgress(i, corpusFiles.size());
-					this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
-				}
-
-				return null;
-			}
-		};
-
-		ngramProgressBar.progressProperty().bind(task.progressProperty());
-		progressLabel.textProperty().bind(task.messageProperty());
-
-		task.setOnSucceeded(e -> {
-			try {
-				// first, we have to recalculate all occurrences to detailed statistics
-				boolean successullySaved = statistic.recalculateAndSaveResultToDisk();
-
-				if (successullySaved) {
-					showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
-				} else {
-					showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
-				}
-			} catch (UnsupportedEncodingException e1) {
-				showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
-				logger.error("Error while saving", e1);
-			}
-
-			ngramProgressBar.progressProperty().unbind();
-			ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
-			progressLabel.textProperty().unbind();
-			progressLabel.setText("");
-		});
-
-		task.setOnFailed(e -> {
-			showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
-			logger.error("Error while executing", e);
-			ngramProgressBar.progressProperty().unbind();
-			ngramProgressBar.setProgress(0.0);
-			ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
-			progressLabel.textProperty().unbind();
-			progressLabel.setText("");
-		});
-
-		final Thread thread = new Thread(task, "task");
-		thread.setDaemon(true);
-		thread.start();
-	}
-
-	private void logAlert(String alert) {
-		logger.info("alert: " + alert);
-	}
-
-
-	public void setCorpus(Corpus corpus) {
-		this.corpus = corpus;
-
-		if (corpus.getCorpusType() != CorpusType.SOLAR) {
-			setSelectedFiltersLabel(null);
-		} else {
-			setSelectedFiltersLabel("/");
-		}
-	}
-
-	public void setSelectedFiltersLabel(String content) {
-		if (content != null) {
-			solarFilters.setVisible(true);
-			selectedFiltersLabel.setVisible(true);
-			selectedFiltersLabel.setText(content);
-		} else {
-			solarFilters.setVisible(false);
-			selectedFiltersLabel.setVisible(false);
-		}
-	}
-
-	public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
-		this.solarFiltersMap = solarFiltersMap;
-	}
-
-	public void setHostServices(HostServices hostServices){
-		this.hostService = hostServices;
-	}
-}
+//package gui;
+//
+//import static alg.XML_processing.*;
+//import static gui.GUIController.*;
+//
+//import java.io.File;
+//import java.io.UnsupportedEncodingException;
+//import java.util.*;
+//
+//import javafx.application.HostServices;
+//import javafx.scene.control.*;
+//import org.apache.commons.lang3.StringUtils;
+//import org.apache.logging.log4j.LogManager;
+//import org.apache.logging.log4j.Logger;
+//import org.controlsfx.control.CheckComboBox;
+//
+//import data.*;
+//import javafx.collections.ListChangeListener;
+//import javafx.collections.ObservableList;
+//import javafx.concurrent.Task;
+//import javafx.fxml.FXML;
+//import javafx.scene.layout.AnchorPane;
+//
+//@SuppressWarnings("Duplicates")
+//public class WordFormationTab {
+//	public final static Logger logger = LogManager.getLogger(WordFormationTab.class);
+//
+//	public AnchorPane wordAnalysisTabPane;
+//
+//	@FXML
+//	public Label selectedFiltersLabel;
+//	@FXML
+//	public Label solarFilters;
+//
+//	@FXML
+//	private CheckComboBox<String> taxonomyCCB;
+//	private ArrayList<Taxonomy> taxonomy;
+//
+//	@FXML
+//	private TextField minimalOccurrencesTF;
+//	private Integer minimalOccurrences;
+//
+//	@FXML
+//	private TextField minimalTaxonomyTF;
+//	private Integer minimalTaxonomy;
+//
+//	@FXML
+//	private Button computeB;
+//
+//	@FXML
+//	public ProgressBar ngramProgressBar;
+//	@FXML
+//	public Label progressLabel;
+//
+//	@FXML
+//	private Hyperlink helpH;
+//
+//	private Corpus corpus;
+//	private HashMap<String, HashSet<String>> solarFiltersMap;
+//	private HostServices hostService;
+//
+//	// after header scan
+//	private ObservableList<String> taxonomyCCBValues;
+//	private CorpusType currentCorpusType;
+//	private boolean useDb;
+//
+//
+//	public void init() {
+//		// taxonomy
+//		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+//			taxonomyCCB.getItems().removeAll();
+//			taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
+//			taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+//				taxonomy = new ArrayList<>();
+//				ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+//				ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus);
+//				taxonomy.addAll(checkedItemsTaxonomy);
+//				logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+//			});
+//			taxonomyCCB.getCheckModel().clearChecks();
+//		} else {
+//			taxonomyCCB.setDisable(true);
+//		}
+//
+//		// set default values
+//		minimalOccurrencesTF.setText("1");
+//		minimalOccurrences = 1;
+//
+//		minimalTaxonomyTF.setText("1");
+//		minimalTaxonomy = 1;
+//
+//		minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
+//			if (!newValue) {
+//				// focus lost
+//				String value = minimalOccurrencesTF.getText();
+//				if (!ValidationUtil.isEmpty(value)) {
+//					if (!ValidationUtil.isNumber(value)) {
+//						logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
+//						GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
+//					} else {
+//						minimalOccurrences = Integer.parseInt(value);
+//					}
+//				} else {
+//					minimalOccurrencesTF.setText("1");
+//					minimalOccurrences = 1;
+//				}
+//			}
+//		});
+//
+//		minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
+//			if (!newValue) {
+//				// focus lost
+//				String value = minimalTaxonomyTF.getText();
+//				if (!ValidationUtil.isEmpty(value)) {
+//					if (!ValidationUtil.isNumber(value)) {
+//						logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
+//						GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
+//					} else {
+//						minimalTaxonomy = Integer.parseInt(value);
+//					}
+//				} else {
+//					minimalTaxonomyTF.setText("1");
+//					minimalTaxonomy = 1;
+//				}
+//			}
+//		});
+//
+//		computeB.setOnAction(e -> {
+//			compute();
+//			logger.info("compute button");
+//		});
+//
+//        helpH.setOnAction(e -> openHelpWebsite());
+//	}
+//
+//	private void compute() {
+//		Filter filter = new Filter();
+//		filter.setNgramValue(1);
+//		filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
+//		filter.setTaxonomy(taxonomy);
+//		filter.setAl(AnalysisLevel.STRING_LEVEL);
+//		filter.setSkipValue(0);
+//		filter.setMsd(new ArrayList<>());
+//		filter.setIsCvv(false);
+//		filter.setSolarFilters(solarFiltersMap);
+//		filter.setMinimalOccurrences(minimalOccurrences);
+//		filter.setMinimalTaxonomy(minimalTaxonomy);
+//
+//		String message = Validation.validateForStringLevel(filter);
+//		if (message == null) {
+//			// no errors
+//			logger.info("Executing: ", filter.toString());
+//			StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
+//			execute(statistic);
+//		} else {
+//			logAlert(message);
+//			showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
+//		}
+//	}
+//
+//	private void openHelpWebsite(){
+//		hostService.showDocument(Messages.HELP_URL);
+//	}
+//
+//	private void execute(StatisticsNew statistic) {
+//		logger.info("Started execution: ", statistic.getFilter());
+//
+//		Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
+//
+//		final Task<Void> task = new Task<Void>() {
+//			@SuppressWarnings("Duplicates")
+//			@Override
+//			protected Void call() throws Exception {
+//				int i = 0;
+//				Date startTime = new Date();
+//				Date previousTime = new Date();
+//				for (File f : corpusFiles) {
+//					readXML(f.toString(), statistic);
+//					i++;
+//					this.updateProgress(i, corpusFiles.size());
+//					this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
+//				}
+//
+//				return null;
+//			}
+//		};
+//
+//		ngramProgressBar.progressProperty().bind(task.progressProperty());
+//		progressLabel.textProperty().bind(task.messageProperty());
+//
+//		task.setOnSucceeded(e -> {
+//			try {
+//				// first, we have to recalculate all occurrences to detailed statistics
+//				boolean successullySaved = statistic.recalculateAndSaveResultToDisk();
+//
+//				if (successullySaved) {
+//					showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
+//				} else {
+//					showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
+//				}
+//			} catch (UnsupportedEncodingException e1) {
+//				showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
+//				logger.error("Error while saving", e1);
+//			}
+//
+//			ngramProgressBar.progressProperty().unbind();
+//			ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+//			progressLabel.textProperty().unbind();
+//			progressLabel.setText("");
+//		});
+//
+//		task.setOnFailed(e -> {
+//			showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
+//			logger.error("Error while executing", e);
+//			ngramProgressBar.progressProperty().unbind();
+//			ngramProgressBar.setProgress(0.0);
+//			ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
+//			progressLabel.textProperty().unbind();
+//			progressLabel.setText("");
+//		});
+//
+//		final Thread thread = new Thread(task, "task");
+//		thread.setDaemon(true);
+//		thread.start();
+//	}
+//
+//	private void logAlert(String alert) {
+//		logger.info("alert: " + alert);
+//	}
+//
+//
+//	public void setCorpus(Corpus corpus) {
+//		this.corpus = corpus;
+//
+//		if (corpus.getCorpusType() != CorpusType.SOLAR) {
+//			setSelectedFiltersLabel(null);
+//		} else {
+//			setSelectedFiltersLabel("/");
+//		}
+//	}
+//
+//	public void setSelectedFiltersLabel(String content) {
+//		if (content != null) {
+//			solarFilters.setVisible(true);
+//			selectedFiltersLabel.setVisible(true);
+//			selectedFiltersLabel.setText(content);
+//		} else {
+//			solarFilters.setVisible(false);
+//			selectedFiltersLabel.setVisible(false);
+//		}
+//	}
+//
+//	public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
+//		this.solarFiltersMap = solarFiltersMap;
+//	}
+//
+//	public void setHostServices(HostServices hostServices){
+//		this.hostService = hostServices;
+//	}
+//}
@@ -228,13 +228,13 @@ public class WordLevelTab {

 //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
 //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
-	private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA"};
+	private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
 	private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
 //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
-	private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"};
+	private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
 	private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
 //	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
-	private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"};
+	private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"};
 	private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
 //	private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
 	private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
@@ -320,7 +320,7 @@ public class WordLevelTab {
 				alsoVisualizeCCB.getItems().removeAll();
 				if (newValue.equals(CalculateFor.LEMMA.toString())) {
 					alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
-				} else if (newValue.equals(CalculateFor.WORD.toString())) {
+				} else if (newValue.equals(CalculateFor.WORD.toString()) || newValue.equals(CalculateFor.LOWERCASE_WORD.toString())) {
 					if (corpus.getCorpusType() == CorpusType.GOS)
 						alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS));
 					else
@@ -23,41 +23,41 @@ import data.Enums.WordLevelType;

@SuppressWarnings("unchecked")
 public class Export {
-	public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) {
-		JSONArray wrapper = new JSONArray();
-
-		for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
-			JSONArray data_wrapper = new JSONArray();
-			JSONObject metric = new JSONObject();
-
-			String title = p.getLeft();
-			Map<MultipleHMKeys, Long> map = p.getRight();
-
-			if (map.isEmpty())
-				continue;
-
-			long total = Util.mapSumFrequencies(map);
-
-			for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
-				JSONObject data_entry = new JSONObject();
-				data_entry.put("word", e.getKey());
-				data_entry.put("frequency", e.getValue());
-				data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
-
-				data_wrapper.add(data_entry);
-			}
-
-			metric.put("Title", title);
-			metric.put("data", data_wrapper);
-			wrapper.add(metric);
-		}
-
-		try (FileWriter file = new FileWriter("statistics.json")) {
-			file.write(wrapper.toJSONString());
-		} catch (IOException e) {
-			e.printStackTrace();
-		}
-	}
+//	public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) {
+//		JSONArray wrapper = new JSONArray();
+//
+//		for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
+//			JSONArray data_wrapper = new JSONArray();
+//			JSONObject metric = new JSONObject();
+//
+//			String title = p.getLeft();
+//			Map<MultipleHMKeys, Long> map = p.getRight();
+//
+//			if (map.isEmpty())
+//				continue;
+//
+//			long total = Util.mapSumFrequencies(map);
+//
+//			for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
+//				JSONObject data_entry = new JSONObject();
+//				data_entry.put("word", e.getKey());
+//				data_entry.put("frequency", e.getValue());
+//				data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
+//
+//				data_wrapper.add(data_entry);
+//			}
+//
+//			metric.put("Title", title);
+//			metric.put("data", data_wrapper);
+//			wrapper.add(metric);
+//		}
+//
+//		try (FileWriter file = new FileWriter("statistics.json")) {
+//			file.write(wrapper.toJSONString());
+//		} catch (IOException e) {
+//			e.printStackTrace();
+//		}
+//	}

 	public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
                                  StatisticsNew statistics, Filter filter) {
@@ -127,12 +127,6 @@ public class Export {

 		FILE_HEADER_AL.add(I18N.get("exportTable.totalRelativeFrequency"));

-        if (filter.getCollocability().size() > 0){
-            for (Collocability c : filter.getCollocability()) {
-                FILE_HEADER_AL.add(c.toHeaderString());
-            }
-        }
-
 		for (Taxonomy key : taxonomyResults.keySet()) {
 			if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
 				FILE_HEADER_AL.add(I18N.get("exportTable.absoluteFrequency") + " [" + key.toString() + "]");
@@ -141,6 +135,13 @@ public class Export {
 			}
 		}

+        if (filter.getCollocability().size() > 0){
+            for (Collocability c : filter.getCollocability()) {
+                FILE_HEADER_AL.add(c.toHeaderString());
+            }
+        }
+
+
 		if (filter.getWriteMsdAtTheEnd()) {
            String msd = "";
            int maxMsdLength = 0;
@@ -280,14 +281,14 @@ public class Export {


                    dataEntry.add(e.getValue().toString());
-                    dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal())));
-                    dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
+                    dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()), statistics.getCorpus().getPunctuation()));
+                    dataEntry.add(formatNumberForExport(((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue(), statistics.getCorpus().getPunctuation()));
                    for (Taxonomy key : taxonomyResults.keySet()){
                        if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
                            AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
                            dataEntry.add(frequency.toString());
-                            dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key)));
-                            dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue()));
+                            dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key), statistics.getCorpus().getPunctuation()));
+                            dataEntry.add(formatNumberForExport(((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue(), statistics.getCorpus().getPunctuation()));
 //                            dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences()));
 //                            dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences()));
                        }
@@ -296,7 +297,7 @@ public class Export {

                    if (filter.getCollocability().size() > 0){
                        for (Collocability c : filter.getCollocability()) {
-                            dataEntry.add(statistics.getCollocability().get(c).get(e.getKey()));
+                            dataEntry.add(formatNumberForLongExport(statistics.getCollocability().get(c).get(e.getKey()), statistics.getCorpus().getPunctuation()));
                        }
                    }

@@ -371,66 +372,66 @@ public class Export {
 		return s;
 	}

-	public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
-		//Delimiter used in CSV file
-		String NEW_LINE_SEPARATOR = "\n";
-
-		//CSV file header
-		Object[] FILE_HEADER = {"word", "frequency", "percent"};
-
-		String fileName = "";
-
-		fileName = title.replace(": ", "-");
-		fileName = fileName.replace(" ", "_").concat(".csv");
-
-		fileName = resultsPath.toString().concat(File.separator).concat(fileName);
-
-		OutputStreamWriter fileWriter = null;
-		CSVPrinter csvFilePrinter = null;
-
-		//Create the CSVFormat object with "\n" as a record delimiter
-		CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
-
-		try {
-			//initialize FileWriter object
-			fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
-
-			//initialize CSVPrinter object
-			csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
-
-			// write info block
-			printHeaderInfo(csvFilePrinter, headerInfoBlock);
-
-			//Create CSV file header
-			csvFilePrinter.printRecord(FILE_HEADER);
-
-			for (Object[] resultEntry : result) {
-				List dataEntry = new ArrayList<>();
-				dataEntry.add(resultEntry[0]);
-				dataEntry.add(resultEntry[1]);
-				dataEntry.add(formatNumberAsPercent(resultEntry[2]));
-				csvFilePrinter.printRecord(dataEntry);
-			}
-		} catch (Exception e) {
-			System.out.println("Error in CsvFileWriter!");
-			e.printStackTrace();
-		} finally {
-			try {
-				if (fileWriter != null) {
-					fileWriter.flush();
-					fileWriter.close();
-				}
-				if (csvFilePrinter != null) {
-					csvFilePrinter.close();
-				}
-			} catch (IOException e) {
-				System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
-				e.printStackTrace();
-			}
-		}
-
-		return fileName;
-	}
+//	public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
+//		//Delimiter used in CSV file
+//		String NEW_LINE_SEPARATOR = "\n";
+//
+//		//CSV file header
+//		Object[] FILE_HEADER = {"word", "frequency", "percent"};
+//
+//		String fileName = "";
+//
+//		fileName = title.replace(": ", "-");
+//		fileName = fileName.replace(" ", "_").concat(".csv");
+//
+//		fileName = resultsPath.toString().concat(File.separator).concat(fileName);
+//
+//		OutputStreamWriter fileWriter = null;
+//		CSVPrinter csvFilePrinter = null;
+//
+//		//Create the CSVFormat object with "\n" as a record delimiter
+//		CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
+//
+//		try {
+//			//initialize FileWriter object
+//			fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
+//
+//			//initialize CSVPrinter object
+//			csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
+//
+//			// write info block
+//			printHeaderInfo(csvFilePrinter, headerInfoBlock);
+//
+//			//Create CSV file header
+//			csvFilePrinter.printRecord(FILE_HEADER);
+//
+//			for (Object[] resultEntry : result) {
+//				List dataEntry = new ArrayList<>();
+//				dataEntry.add(resultEntry[0]);
+//				dataEntry.add(resultEntry[1]);
+//				dataEntry.add(formatNumberAsPercent(resultEntry[2]), statistics.getCorpus().getPunctuation());
+//				csvFilePrinter.printRecord(dataEntry);
+//			}
+//		} catch (Exception e) {
+//			System.out.println("Error in CsvFileWriter!");
+//			e.printStackTrace();
+//		} finally {
+//			try {
+//				if (fileWriter != null) {
+//					fileWriter.flush();
+//					fileWriter.close();
+//				}
+//				if (csvFilePrinter != null) {
+//					csvFilePrinter.close();
+//				}
+//			} catch (IOException e) {
+//				System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
+//				e.printStackTrace();
+//			}
+//		}
+//
+//		return fileName;
+//	}

 	public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
 		//Delimiter used in CSV file
@@ -54,8 +54,29 @@ public class Util {
 			return "- invalid input format -";
 	}

-	public static String formatNumberAsPercent(Object o) {
+	public static String formatNumberAsPercent(Object o, String punctuation) {
+	    if(punctuation.equals("punctuation.COMMA")) {
            return MessageFormat.format("{0,number,#.### %}", o).replace('.', ',');
+        } else {
+            return MessageFormat.format("{0,number,#.### %}", o);
+        }
+	}
+
+	public static String formatNumberForExport(Object o, String punctuation) {
+        if(punctuation.equals("punctuation.COMMA")) {
+            return MessageFormat.format("{0,number,#.##}", o).replace('.', ',');
+        } else {
+            return MessageFormat.format("{0,number,#.##}", o);
+        }
+
+	}
+
+    public static String formatNumberForLongExport(Object o, String punctuation) {
+        if(punctuation.equals("punctuation.COMMA")) {
+            return MessageFormat.format("{0,number,#.########}", o).replace('.', ',');
+        } else {
+            return MessageFormat.format("{0,number,#.########}", o);
+        }
    }

 	private static boolean isInstanceOfInteger(Object o) {
@@ -57,6 +57,13 @@
            <Image url="questionmark.png" backgroundLoading="true"/>
        </ImageView>

+        <Label fx:id="punctuationL" layoutX="10.0" layoutY="340.0" prefHeight="25.0" text="Decimalno znamenje"/>
+        <ComboBox fx:id="punctuationCB" layoutX="225.0" layoutY="340.0" minWidth="140.0" prefWidth="140.0"
+                  visibleRowCount="5"/>
+        <ImageView fx:id="punctuationI" layoutX="370.0" layoutY="347.5" pickOnBounds="true" preserveRatio="true">
+            <Image url="questionmark.png" backgroundLoading="true"/>
+        </ImageView>
+
        <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0"/>
        <Button fx:id="changeLanguageB" layoutX="710.0" layoutY="40.0" mnemonicParsing="false" prefWidth="50.0"/>
    </children>
@@ -17,18 +17,20 @@ tab.stringLevelTabNew2=Word sets
 # corpus tab
 label.setCorpusLocation=Set corpus location
 button.setCorpusLocation=Set location
-label.readHeaderInfo=Read info from headers
+label.readHeaderInfo=Read tax. from corpus files
 checkBox.readHeaderInfo=
 label.chooseResultsLocation=Choose result location
 button.chooseResultsLocation=Set location
 label.selectReader=Select reader
 label.outputName=Output file name
+label.punctuation=Decimal separator

 label.corpusTab.chooseCorpusLocationH=Select the folder which contains the corpus. The folder should only contain one corpus and should not contain files that are not part of the corpus.
-label.corpusTab.readHeaderInfoH=If you select this option, the taxonomy will be read separately. This might take a while.
+label.corpusTab.readHeaderInfoH=The program will read the taxonomy from corpus files. This might take a while.
 label.corpusTab.chooseResultsLocationH=Choose result location
 label.corpusTab.selectReaderH=Select reader
 label.corpusTab.outputNameH=Output file name
+label.corpusTab.punctuationH=Select decimal separator used in export files.

 # character analysis tab
 label.stringLength=Number of characters
@@ -40,7 +42,7 @@ label.taxonomy=Filter by taxonomy
 label.minimalOccurrences=Min. nr. occurrences
 label.minimalTaxonomy=Min. nr. tax. branches
 label.minimalRelFre=Min. rel. frequency
-label.taxonomySetOperation=Filtriraj taksonomijo po
+label.taxonomySetOperation=Filter taxonomy by
 label.solarFilters=Selected filters:
 string.lemma=lemma
 string.word=word
@@ -52,7 +54,7 @@ label.letter.msdH=Character strings will be counted only in words with the provi
 label.letter.taxonomyH=Character strings will be counted only in selected text types.
 label.letter.minimalOccurrencesH=Character strings with fewer occurrences will not be included in the output.
 label.letter.minimalTaxonomyH=Character strings that occur in fewer taxonomy branches will not be included in the output.
-label.letter.taxonomySetOperationH=Izpisuj iz besedil, ki ustrezajo vsaj eni od izbranih vej (unija) ali vsem izbranim vejam (presek)
+label.letter.taxonomySetOperationH=Extract information from texts that fit into at least one (union) or all (intersection) of the selected branches.

 # word part tab
 label.alsoVisualize=Also split by
@@ -101,8 +103,13 @@ label.wordSet.taxonomyH=Word sets will only be extracted from the selected taxon
 label.wordSet.minimalOccurrencesH=Word sets with fewer occurrences will not be included in the output.
 label.wordSet.minimalTaxonomyH=Word sets that occur in fewer taxonomy branches will not be included in the output.

+# punctuations
+punctuation.COMMA=comma (,)
+punctuation.POINT=point (.)
+
 # calculate for
 calculateFor.WORD=word
+calculateFor.LOWERCASE_WORD=lowercase word
 calculateFor.NORMALIZED_WORD=normalized word
 calculateFor.LEMMA=lemma
 calculateFor.MORPHOSYNTACTIC_SPECS=morphosyntactic tag
@@ -187,7 +194,7 @@ windowTitles.warning=Warning
 windowTitles.confirmation=Confirmation

 # export header translations
-exportHeader.corpus=Corpus:
+exportHeader.corpus=Reader:
 exportHeader.date=Date:
 exportHeader.executionTime=Execution time:
 exportHeader.analysis=Analysis:
@@ -212,6 +219,7 @@ exportHeader.msd=Morphosyntactic tag:
 exportHeader.taxonomy=Filter by taxonomy:
 exportHeader.minOccurrences=Min. nr. occurrences:
 exportHeader.minTaxonomies=Min. nr. taxonomy branches:
+exportHeader.minRelFre=Min. rel. frequency nr.:
 exportHeader.additionalFilters=Additional filters:
 exportHeader.yes=yes
 exportHeader.no=no
@@ -231,6 +239,7 @@ exportTable.relativeFrequency=Relative frequency
 exportTable.msd=msd
 # parts
 exportTable.part.word=words:
+exportTable.part.lowercaseWord=lowercase words:
 exportTable.part.normalizedWord=normalized words:
 exportTable.part.lemma=lemmas:
 exportTable.part.msd=msd:
@@ -239,6 +248,7 @@ exportTable.part.wordType=word type:

 exportTable.part.letterSet=character set
 exportTable.part.word2=word
+exportTable.part.lowercaseWord2=lowercase word
 exportTable.part.normalizedWord2=normalized word
 exportTable.part.lemma2=lemma
 exportTable.part.msd2=msd
@@ -248,6 +258,7 @@ exportTable.part.wordType2=word type
 exportTable.part.letterSet2=Share of total sum of all letter sets
 exportTable.part.letterSet3=Letter set
 exportTable.part.word3=Word
+exportTable.part.lowercaseWord3=Lowercase word
 exportTable.part.normalizedWord3=Normalized word
 exportTable.part.lemma3=Lemma
 exportTable.part.msd3=Msd
@@ -1,5 +1,5 @@
 # general
-window.title=Luščilnik
+window.title=Korpusni luščilnik

 hyperlink.help=Pomoč
 button.language=EN
@@ -17,18 +17,20 @@ tab.stringLevelTabNew2=Besedni nizi
 # corpus tab
 label.setCorpusLocation=Nastavi lokacijo korpusa
 button.setCorpusLocation=Ponastavi
-label.readHeaderInfo=Preberi info iz headerjev
+label.readHeaderInfo=Preberi taks. iz korp. datotek
 checkBox.readHeaderInfo=
-label.chooseResultsLocation=Nastavi lokacijo rezultatov
+label.chooseResultsLocation=Nastavi lokacijo izpisov
 button.chooseResultsLocation=Ponastavi
 label.selectReader=Izberi bralnik
 label.outputName=Ime izhodne datoteke
+label.punctuation=Decimalno znamenje

 label.corpusTab.chooseCorpusLocationH=Izberite mapo, v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.
-label.corpusTab.readHeaderInfoH=Če izberete to opcijo, se bo iz korpusnih datotek prebrala razpoložljiva taksonomija oz. filtri. Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.
+label.corpusTab.readHeaderInfoH=Program bo iz korpusnih datotek prebral taksonomijo. Ta operacija lahko traja dlje časa.
 label.corpusTab.chooseResultsLocationH=Nastavi lokacijo rezultatov
 label.corpusTab.selectReaderH=Izberi bralnik
 label.corpusTab.outputNameH=Ime izhodne datoteke
+label.corpusTab.punctuationH=Izberite željeno decimalno znamenje v izpisu.

 # character analysis tab
 label.stringLength=Dolžina znakovnih nizov
@@ -101,8 +103,13 @@ label.wordSet.taxonomyH=Besedni nizi bodo izpisani samo iz izbranih taksonomskih
 label.wordSet.minimalOccurrencesH=Besedni nizi, ki se pojavijo redkeje, ne bodo vključeni v izpis.
 label.wordSet.minimalTaxonomyH=Besedni nizi, ki so prisotni v manj vejah, ne bodo vključeni v izpis.

+# punctuations
+punctuation.COMMA=vejica (,)
+punctuation.POINT=pika (.)
+
 # calculate for
 calculateFor.WORD=oblike
+calculateFor.LOWERCASE_WORD=oblike z malimi črkami
 calculateFor.NORMALIZED_WORD=normalizirane oblike
 calculateFor.LEMMA=leme
 calculateFor.MORPHOSYNTACTIC_SPECS=oblikoskladenjske oznake
@@ -187,7 +194,7 @@ windowTitles.warning=Opozorilo
 windowTitles.confirmation=Potrdilo

 # export header translations
-exportHeader.corpus=Korpus:
+exportHeader.corpus=Bralnik:
 exportHeader.date=Datum:
 exportHeader.executionTime=Čas izvajanja:
 exportHeader.analysis=Analiza:
@@ -212,6 +219,7 @@ exportHeader.msd=Oblikoskladenjska oznaka:
 exportHeader.taxonomy=Filtriranje po taksonomiji:
 exportHeader.minOccurrences=Min. št. pojavitev:
 exportHeader.minTaxonomies=Min. št. taksonomskih vej:
+exportHeader.minRelFre=Min. rel. pogostost:
 exportHeader.additionalFilters=Dodatni filtri:
 exportHeader.yes=da
 exportHeader.no=ne
@@ -231,6 +239,7 @@ exportTable.relativeFrequency=Relativna pogostost
 exportTable.msd=msd
 # parts
 exportTable.part.word=oblik:
+exportTable.part.lowercaseWord=oblik z malimi črkami:
 exportTable.part.normalizedWord=normaliziranih oblik:
 exportTable.part.lemma=lem:
 exportTable.part.msd=oblikoskladenjskih oznak:
@@ -239,6 +248,7 @@ exportTable.part.wordType=besednih vrst:

 exportTable.part.letterSet=znakovnega niza
 exportTable.part.word2=oblike
+exportTable.part.lowercaseWord2=oblike z malimi črkami
 exportTable.part.normalizedWord2=normalizirane oblike
 exportTable.part.lemma2=leme
 exportTable.part.msd2=oblikoskladenjske oznake
@@ -248,6 +258,7 @@ exportTable.part.wordType2=besedne vrste
 exportTable.part.letterSet2=Delež glede na skupno vsoto vseh najdenih znakovnih nizov
 exportTable.part.letterSet3=Znakovni niz
 exportTable.part.word3=Oblika
+exportTable.part.lowercaseWord3=Oblika z malimi črkami
 exportTable.part.normalizedWord3=Normalizirana oblika
 exportTable.part.lemma3=Lema
 exportTable.part.msd3=Oblikoskladenjska oznaka