Added filter parameters to CSV + created names of columns for MSDs + [partly] fixed number of words parameter

2018-11-13 13:57:49 +01:00 · 2018-11-13 13:57:49 +01:00 · cbfe3e6025
commit cbfe3e6025
parent a4df732678
9 changed files with 502 additions and 219 deletions
--- a/src/main/java/alg/XML_processing.java
+++ b/src/main/java/alg/XML_processing.java
@ -535,6 +535,7 @@ public class XML_processing {
 	public static boolean readXMLGigafida(String path, StatisticsNew stats) {
 		boolean inWord = false;
 		boolean inPunctuation = false;
 		boolean taxonomyMatch = true;
 		ArrayList<String> currentFiletaxonomy = new ArrayList<>();
 		ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
 		String lemma = "";
@ -635,13 +636,19 @@ public class XML_processing {
 						// parser reached end of the current sentence
 						if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
                            // count all UniGramOccurrences in sentence for statistics
                            stats.updateUniGramOccurrences(sentence.size());
 							// add sentence to corpus if it passes filters
 							sentence = runFilters(sentence, stats.getFilter());
-							if (!ValidationUtil.isEmpty(sentence)) {
+
 							if (!ValidationUtil.isEmpty(sentence) && taxonomyMatch) {
 								corpus.add(new Sentence(sentence, currentFiletaxonomyLong));
 							}
 //							taxonomyMatch = true;
 							// and start a new one
 							sentence = new ArrayList<>();
@ -666,7 +673,9 @@ public class XML_processing {
 								if (currentFiletaxonomy.isEmpty()) {
 									// taxonomies don't match so stop
-									return false;
+//									return false;
                                    taxonomyMatch = false;
 //									System.out.println("TEST");
 								}
 							}
 						}
--- a/src/main/java/alg/ngram/Ngrams.java
+++ b/src/main/java/alg/ngram/Ngrams.java
@ -36,6 +36,8 @@ public class Ngrams {
 		ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();
 		for (Sentence s : corpus) {
 //            stats.updateUniGramOccurrences(s.getWords().size());
 			// skip sentences shorter than specified ngram length
 			if (s.getWords().size() < stats.getFilter().getNgramValue()) {
 				continue;
@ -176,6 +178,8 @@ public class Ngrams {
 				// UPDATE TAXONOMY HERE!!!
                stats.updateTaxonomyResults(multipleKeys, s.getTaxonomy());
 //				stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
@ -261,16 +265,34 @@ public class Ngrams {
 	 */
 	private static void generateNgramLetterCandidates(List<Sentence> corpus, StatisticsNew stats) {
 		for (Sentence s : corpus) {
 //            stats.updateUniGramOccurrences(s.getWords().size());
 			for (Word w : s.getWords()) {
 				List<String> taxonomy = s.getTaxonomy();
 ////				List<Word> ngramCandidate = s.getSublist(i, i + stats.getFilter().getNgramValue());
 				List<Word> ngramCandidate = new ArrayList<>();
 				ngramCandidate.add(w);
 //
 //				// if msd regex is set and this candidate doesn't pass it, skip this iteration
 //				if (stats.getFilter().hasMsd() && !passesRegex(ngramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())) {
 //					continue;
 //				}
 				String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv(), stats.getFilter().getWordParts());
 				// skip this iteration if:
 				// - word doesn't contain a proper version (missing lemma for example)
 				// - msd regex is given but this word's msd doesn't match it, skip this iteration
 				// - given substring length is larger than the word length
 //                boolean t1 = stats.getFilter().hasMsd() && !w.getMsd(stats.getFilter().getWordParts()).matches(stats.getFilter().getMsd().get(0).pattern());
 //                boolean t2 = !w.getMsd(stats.getFilter().getWordParts()).matches(stats.getFilter().getMsd().get(0).pattern());
 //                String t3 = stats.getFilter().getMsd().get(0).pattern();
 //                ArrayList<CalculateFor> t4 = stats.getFilter().getWordParts();
 //                boolean t5 = word.length() < stats.getFilter().getStringLength();
 				if (ValidationUtil.isEmpty(word)
-						|| stats.getFilter().hasMsd() && !w.getMsd(stats.getFilter().getWordParts()).matches(stats.getFilter().getMsd().get(0).pattern())
+						|| stats.getFilter().hasMsd() && !passesRegex(ngramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())
 						|| word.length() < stats.getFilter().getStringLength()) {
 					continue;
 				}
@ -330,6 +352,8 @@ public class Ngrams {
 		for (Sentence s : corpus) {
 			List<Word> sentence = s.getWords();
 //			stats.updateUniGramOccurrences(s.getWords().size());
 			if (sentence == null){
 				continue;
 			}
--- a/src/main/java/data/MultipleHMKeys.java
+++ b/src/main/java/data/MultipleHMKeys.java
@ -15,6 +15,42 @@ public interface MultipleHMKeys {
    default ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){ return null; }
    default String getMsd(Filter filter) {
        String msd = "";
        if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
            msd = getK1();
        } else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
            int i = 0;
            for (CalculateFor otherKey : filter.getMultipleKeys()) {
                switch (i) {
                    case 0:
                        if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
                            msd = getK2();
                        }
                        break;
                    case 1:
                        if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
                            msd = getK3();
                        }
                        break;
                    case 2:
                        if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
                            msd = getK4();
                        }
                        break;
                    case 3:
                        if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
                            msd = getK5();
                        }
                        break;
                }
                i++;
            }
        }
        return msd;
    }
    @Override
    int hashCode();
--- a/src/main/java/data/StatisticsNew.java
+++ b/src/main/java/data/StatisticsNew.java
@ -5,6 +5,7 @@ import static gui.ValidationUtil.*;
 import java.io.UnsupportedEncodingException;
 import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
 import java.time.temporal.ChronoUnit;
 import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;
@ -39,8 +40,10 @@ public class StatisticsNew {
 	private boolean useDB;
 	private RDB db;
 	private boolean analysisProducedResults;
-	private LocalDateTime time;
+	private LocalDateTime timeBeginning;
 	private LocalDateTime timeEnding;
 	private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
 	private AtomicLong uniGramOccurrences;
 	public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
 		this.corpus = corpus;
@ -48,6 +51,8 @@ public class StatisticsNew {
 		this.taxonomyResult = new ConcurrentHashMap<>();
 		this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
 		this.collocability = new ConcurrentHashMap<>();
 		this.uniGramOccurrences = new AtomicLong(0L);
        // create table for counting word occurrences per taxonomies
 		if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
@ -75,7 +80,9 @@ public class StatisticsNew {
 			result = new ConcurrentHashMap<>();
 		}
-		resultTitle = generateResultTitle();
+        this.timeBeginning = LocalDateTime.now();
 //		resultTitle = generateResultTitle();
 		logger.debug(toString());
 	}
@ -94,7 +101,7 @@ public class StatisticsNew {
 	 *
 	 * @return
 	 */
-	private String generateResultTitle() {
+	public String generateResultTitle() {
 		String separator = "_";
 		StringBuilder sb = new StringBuilder();
@ -108,6 +115,14 @@ public class StatisticsNew {
 						.append(filter.getCalculateFor())
 						.append(separator);
 			} else if(ngramLevel == 1) {
 				if (filter.getSuffixLength() != null && filter.getSuffixList() != null && filter.getPrefixLength() != null && filter.getPrefixList() != null) {
 					sb.append(corpus.getCorpusType().toString())
 							.append(separator)
 							.append("besedni-deli")
 							.append(separator)
 							.append(filter.getCalculateFor())
 							.append(separator);
 				} else {
 					sb.append(corpus.getCorpusType().toString())
 							.append(separator)
 							.append("besede")
@ -115,6 +130,7 @@ public class StatisticsNew {
 							.append(filter.getCalculateFor())
 							.append(separator);
 				}
 			}
 			else {
 				sb.append(filter.getAl().toString())
 						.append(separator)
@ -141,13 +157,20 @@ public class StatisticsNew {
 		// if taxonomy -> taxonomy
 		// if cvv -> cvv + dolžina
 		this.time = this.time != null ? this.time : LocalDateTime.now();
-		sb.append(time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss")));
+		sb.append(getTimeEnding());
 		return sb.toString();
 	}
 	public void setTimeEnding(){
        this.timeEnding = LocalDateTime.now();
    }
    public String getTimeEnding(){
        return timeEnding.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss"));
    }
 	public boolean isAnalysisProducedResults() {
 		return analysisProducedResults;
 	}
@ -319,6 +342,14 @@ public class StatisticsNew {
 		return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
 	}
 	public void updateUniGramOccurrences(int amount){
        uniGramOccurrences.set(uniGramOccurrences.get() + amount);
    }
    public long getUniGramOccurrences(){
 	    return uniGramOccurrences.longValue();
    }
 	public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {
 		for (String key : taxonomyResult.keySet()) {
 			// first word should have the same taxonomy as others
@ -423,22 +454,23 @@ public class StatisticsNew {
 		LinkedHashMap<String, String> info = new LinkedHashMap<>();
 		info.put("Korpus:", corpus.getCorpusType().toString());
-		info.put("Datum:", time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
+		setTimeEnding();
 		info.put("Datum:", timeEnding.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
 		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
 			Integer ngramLevel = filter.getNgramValue();
 			if (ngramLevel == 0)
-				info.put("Analiza", "Črke");
+				info.put("Analiza:", "Črke");
 			else if (ngramLevel == 1) {
 				// if suffixes or prefixes are not null print word parts
 				if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
-					info.put("Analiza", "Besedni deli");
+					info.put("Analiza:", "Besedni deli");
 				} else {
-					info.put("Analiza", "Besede");
+					info.put("Analiza:", "Besede");
 				}
 			} else
-				info.put("Analiza", filter.getAl().toString());
+				info.put("Analiza:", filter.getAl().toString());
 		} else {
-			info.put("Analiza", filter.getAl().toString());
+			info.put("Analiza:", filter.getAl().toString());
 		}
 		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
@ -453,9 +485,68 @@ public class StatisticsNew {
 			if (ngramLevel > 1)
 				info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
-			// izračunaj za
+			// calculate for
 			info.put("Izračunaj za:", filter.getCalculateFor().toString());
 			// also write
            if (filter.getMultipleKeys().size() > 0){
                StringBuilder mk = new StringBuilder();
                for (CalculateFor s : filter.getMultipleKeys()) {
                    mk.append(s.toString()).append("; ");
                }
                info.put("Izpiši tudi: ", String.join("; ", mk.substring(0, mk.length() - 2)));
            }
 			// time elapsed
 //            setTimeEnding();
            long seconds = ChronoUnit.MILLIS.between(timeBeginning, timeEnding) / 1000;
 			info.put("Čas izvajanja:", String.valueOf(seconds) + " s");
            // data limitations
            if (filter.getDisplayTaxonomy()){
                info.put("Izpiši taksonomije: ", "Da");
            } else {
                info.put("Izpiši taksonomije: ", "Ne");
            }
            // note punctuations - ngram > 1
            if(ngramLevel > 1) {
                if (filter.getNotePunctuations()) {
                    info.put("Upoštevaj ločila: ", "Da");
                } else {
                    info.put("Upoštevaj ločila: ", "Ne");
                }
            }
            // also write - n - gram > 1
            if (ngramLevel > 1 && filter.getCollocability().size() > 0){
                StringBuilder mk = new StringBuilder();
                for (Collocability s : filter.getCollocability()) {
                    mk.append(s.toString()).append("; ");
                }
                info.put("Kolokabilnost: ", String.join("; ", mk.substring(0, mk.length() - 2)));
            }
            // fragmented MSD - n-gram = 1
            if (info.get("Analiza:").equals("Besede")){
                if (filter.getWriteMsdAtTheEnd()){
                    info.put("Izpiši razbit MSD: ", "Da");
                } else {
                    info.put("Izpiši razbit MSD: ", "Ne");
                }
            }
            if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
                if (filter.getPrefixLength() > 0 || filter.getSuffixLength() > 0) {
                    info.put("Dolžina predpone: ", String.valueOf(filter.getPrefixLength()));
                    info.put("Dolžina pripone: ", String.valueOf(filter.getSuffixLength()));
                } else {
                    info.put("Seznam predpon: ", String.join("; ", filter.getPrefixList()));
                    info.put("Seznam pripon: ", String.join("; ", filter.getSuffixList()));
                }
            }
 			// msd
 			if (!isEmpty(filter.getMsd())) {
 				StringBuilder msdPattern = new StringBuilder();
@ -479,6 +570,9 @@ public class StatisticsNew {
 			}
 		}
 		info.put("Min. št. pojavitev: ", String.valueOf(filter.getMinimalOccurrences()));
 		info.put("Min. št. taksonomij: ", String.valueOf(filter.getMinimalTaxonomy()));
 		if (corpus.getCorpusType() == CorpusType.SOLAR) {
 			HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();
--- a/src/main/java/gui/CharacterAnalysisTab.java
+++ b/src/main/java/gui/CharacterAnalysisTab.java
@ -63,8 +63,12 @@ public class CharacterAnalysisTab {
 	private TextField minimalTaxonomyTF;
 	private Integer minimalTaxonomy;
 //	@FXML
 //	private ToggleGroup calculateForRB;
 //	private  CalculateFor calculateFor;
 	@FXML
-	private ToggleGroup calculateForRB;
+	private ComboBox<String> calculateForCB;
 	private CalculateFor calculateFor;
 	@FXML
@ -115,17 +119,25 @@ public class CharacterAnalysisTab {
 		currentMode = MODE.LETTER;
 		toggleMode(currentMode);
-		calculateForRB.selectedToggleProperty().addListener(new ChangeListener<Toggle>() {
+//		calculateForRB.selectedToggleProperty().addListener(new ChangeListener<Toggle>() {
-			@Override
+//			@Override
-			public void changed(ObservableValue<? extends Toggle> observable, Toggle oldValue, Toggle newValue) {
+//			public void changed(ObservableValue<? extends Toggle> observable, Toggle oldValue, Toggle newValue) {
-				//logger.info("calculateForRB:", newValue.toString());
+//				//logger.info("calculateForRB:", newValue.toString());
-				RadioButton chk = (RadioButton)newValue.getToggleGroup().getSelectedToggle(); // Cast object to radio button
+//				RadioButton chk = (RadioButton)newValue.getToggleGroup().getSelectedToggle(); // Cast object to radio button
-				calculateFor = CalculateFor.factory(chk.getText());
+//				calculateFor = CalculateFor.factory(chk.getText());
-				logger.info("calculateForRB:", chk.getText());
+//				logger.info("calculateForRB:", chk.getText());
-				//System.out.println("Selected Radio Button - "+chk.getText());
+//				//System.out.println("Selected Radio Button - "+chk.getText());
-			}
+//			}
 //		});
 		calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
 			calculateFor = CalculateFor.factory(newValue);
 			logger.info("calculateForCB:", calculateFor.toString());
 		});
 		calculateForCB.getSelectionModel().select(0);
 		// msd
 		msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
 			if (!newValue) {
@ -292,10 +304,10 @@ public class CharacterAnalysisTab {
 		// TODO: check for GOS, GIGAFIDA, SOLAR...
 		// refresh and:
 		// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
-		if (calculateFor == null) {
+//		if (calculateFor == null) {
-			calculateForRB.selectToggle(lemmaRB);
+//			calculateForRB.selectToggle(lemmaRB);
-			calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString());
+//			calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString());
-		}
+//		}
 		if (!filter.hasMsd()) {
 			// if current corpus doesn't have msd data, disable this field
@ -381,18 +393,19 @@ public class CharacterAnalysisTab {
 			if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
 				// if the user selected something else before selecting ngram for letters, reset that choice
 				calculateFor = CalculateFor.LEMMA;
-				calculateForRB.selectToggle(lemmaRB);
+
 				calculateForCB.getSelectionModel().select(0);
 			}
 		}
 		// override if orth mode, allow only word
 		if (corpus.isGosOrthMode()) {
 			// TODO change to
-			varietyRB.setDisable(true);
+//			varietyRB.setDisable(true);
 			msdTF.setDisable(true);
 		} else {
 			msdTF.setDisable(false);
-			varietyRB.setDisable(false);
+//			varietyRB.setDisable(false);
 		}
 	}
@ -400,6 +413,8 @@ public class CharacterAnalysisTab {
 		Filter filter = new Filter();
 		filter.setNgramValue(0);
 		filter.setCalculateFor(calculateFor);
        filter.setMultipleKeys(new ArrayList<>());
 		filter.setMsd(msd);
 		filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
 		filter.setDisplayTaxonomy(displayTaxonomy);
--- a/src/main/java/util/Export.java
+++ b/src/main/java/util/Export.java
@ -105,7 +105,8 @@ public class Export {
            }
 		}
-		headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
+		headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(statistics.getUniGramOccurrences()));
 //		headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
 		for (CalculateFor otherKey : filter.getMultipleKeys()) {
            FILE_HEADER_AL.add(otherKey.toHeaderString());
@ -132,6 +133,21 @@ public class Export {
 				FILE_HEADER_AL.add("Relativna pogostost [" + key + "]");
 			}
 		}
 		if (filter.getWriteMsdAtTheEnd()) {
            String msd = "";
            int maxMsdLength = 0;
 			for(MultipleHMKeys key : set.iterator().next().getRight().keySet()){
 				msd = key.getMsd(filter);
 				if (msd.length() > maxMsdLength){
 				    maxMsdLength = msd.length();
                }
 			}
 			for(int i = 0; i < maxMsdLength; i++){
                FILE_HEADER_AL.add("msd" + String.format("%02d", i + 1));
            }
 		}
 		FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
 		FILE_HEADER_AL.toArray(FILE_HEADER);
@ -139,6 +155,11 @@ public class Export {
        for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
            String title = p.getLeft();
 //            statistics.setTimeEnding();
            title = statistics.generateResultTitle();
 //			statistics.
            fileName = title.replace(": ", "-");
            fileName = fileName.replace(" ", "_").concat(".csv");
@ -252,8 +273,10 @@ public class Export {
                        if(!key.equals("Total") && num_taxonomy_frequencies.get(key) > 0) {
                            AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
                            dataEntry.add(frequency.toString());
-							dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
+//                            dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
-							dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
+//                            dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
                            dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences()));
                            dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences()));
                        }
                    }
@ -266,41 +289,42 @@ public class Export {
                    // Write msd separated per letters at the end of each line in csv
                    if (filter.getWriteMsdAtTheEnd()) {
-						String msd = "";
+//                        String msd = "";
-						if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
+//
-							msd = e.getKey().getK1();
+//                        if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
-						} else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
+//                            msd = e.getKey().getK1();
-							i = 0;
+//                        } else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
-							for (CalculateFor otherKey : filter.getMultipleKeys()){
+//                            i = 0;
-								switch(i){
+//                            for (CalculateFor otherKey : filter.getMultipleKeys()){
-									case 0:
+//                                switch(i){
-										if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
+//                                    case 0:
-											msd = e.getKey().getK2();
+//                                        if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
-										}
+//                                            msd = e.getKey().getK2();
-										break;
+//                                        }
-									case 1:
+//                                        break;
-										if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
+//                                    case 1:
-											msd = e.getKey().getK3();
+//                                        if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
-										}
+//                                            msd = e.getKey().getK3();
-										break;
+//                                        }
-									case 2:
+//                                        break;
-										if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
+//                                    case 2:
-											msd = e.getKey().getK4();
+//                                        if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
-										}
+//                                            msd = e.getKey().getK4();
-										break;
+//                                        }
-									case 3:
+//                                        break;
-										if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
+//                                    case 3:
-											msd = e.getKey().getK5();
+//                                        if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
-										}
+//                                            msd = e.getKey().getK5();
-										break;
+//                                        }
-								}
+//                                        break;
-
+//                                }
-								i++;
+//
-							}
+//                                i++;
-						}
+//                            }
 //                        }
                        String msd = e.getKey().getMsd(filter);
                        String [] charArray = msd.split("(?!^)");
                        dataEntry.addAll(Arrays.asList(charArray));
                    }
                    csvFilePrinter.printRecord(dataEntry);
--- a/src/main/resources/gui/CharacterAnalysisTab.fxml
+++ b/src/main/resources/gui/CharacterAnalysisTab.fxml
@ -13,35 +13,49 @@
 <?import javafx.scene.layout.Pane?>
 <?import org.controlsfx.control.CheckComboBox?>
 <?import javafx.scene.control.ComboBox?>
 <?import javafx.collections.FXCollections?>
 <?import java.lang.String?>
 <AnchorPane fx:id="characterAnalysisTab" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.112" xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.CharacterAnalysisTab">
    <Pane>
        <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Število črk" />
        <TextField fx:id="stringLengthTF" layoutX="185.0" layoutY="20.0" prefWidth="180.0" />
-        <HBox layoutX="10.0" layoutY="60.0">
+        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izračunaj za"/>
-            <children>
+        <ComboBox fx:id="calculateForCB" layoutX="185.0" layoutY="60.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
-                <RadioButton fx:id="lemmaRB" mnemonicParsing="false" prefHeight="25.0" prefWidth="86.0" selected="true" text="lema">
+                  visibleRowCount="5">
-                    <toggleGroup>
+            <items>
-                        <ToggleGroup fx:id="calculateForRB" />
+                <FXCollections fx:factory="observableArrayList">
-                    </toggleGroup></RadioButton>
+                    <String fx:value="lema"/>
-                <RadioButton fx:id="varietyRB" mnemonicParsing="false" text="različnica" toggleGroup="$calculateForRB" />
+                    <String fx:value="različnica"/>
-            </children>
+                </FXCollections>
-        </HBox>
+            </items>
        </ComboBox>
        <!--<HBox layoutX="10.0" layoutY="60.0">-->
            <!--<children>-->
                <!--<RadioButton fx:id="lemmaRB" mnemonicParsing="false" prefHeight="25.0" prefWidth="86.0" selected="true" text="lema">-->
                    <!--<toggleGroup>-->
                        <!--<ToggleGroup fx:id="calculateForRB" />-->
                    <!--</toggleGroup></RadioButton>-->
                <!--<RadioButton fx:id="varietyRB" mnemonicParsing="false" text="različnica" toggleGroup="$calculateForRB" />-->
            <!--</children>-->
        <!--</HBox>-->
        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
        <CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
-        <Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" />
+        <!--<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" />-->
-        <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD" />
+        <!--<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD" />-->
-        <TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0" />
+        <!--<TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0" />-->
-        <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija" />
+        <!--<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija" />-->
-        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0" />
+        <!--<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0" />-->
-        <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" />
+        <!--<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" />-->
-        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
+        <!--<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />-->
-        <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" />
+        <!--<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" />-->
-        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
+        <!--<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />-->
        <Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0">
            <children>
@ -52,8 +66,28 @@
        <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
    </Pane>
-    <Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:" />
+    <Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
-    <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0" text=" " wrapText="true" />
+        <!-- MSD and Taxonomy separated -->
        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Omejitev podatkov" />
        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Oznaka MSD"/>
        <TextField fx:id="msdTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0"/>
        <Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Taksonomija"/>
        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="140.0" prefHeight="25.0" prefWidth="180.0"/>
        <Label layoutX="10.0" layoutY="180.0" prefHeight="25.0" text="Min. št. pojavitev" />
        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="180.0" prefWidth="180.0" />
        <Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Min. št. taksonomij" />
        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="220.0" prefWidth="180.0" />
        <Label fx:id="solarFilters" layoutX="10.0" layoutY="280.0" text="Izbrani filtri:" />
        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="320.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />
    </Pane>
    <!--<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:" />-->
    <!--<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0" text=" " wrapText="true" />-->
    <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
--- a/src/main/resources/gui/OneWordAnalysisTab.fxml
+++ b/src/main/resources/gui/OneWordAnalysisTab.fxml
@ -41,28 +41,48 @@
        <CheckBox fx:id="writeMsdAtTheEndChB" layoutX="263.0" layoutY="145.0" selected="false" />
        <!-- MSD and Taxonomy separated -->
-        <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov" />
+        <!--<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov" />-->
-        <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD"/>
+        <!--<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD"/>-->
-        <TextField fx:id="msdTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0"/>
+        <!--<TextField fx:id="msdTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0"/>-->
-        <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija"/>
+        <!--<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija"/>-->
-        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0"/>
+        <!--<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0"/>-->
-        <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. pojavitev" />
+        <!--<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. pojavitev" />-->
-        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
+        <!--<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />-->
-        <Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Min. št. taksonomij" />
+        <!--<Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Min. št. taksonomij" />-->
-        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="360.0" prefWidth="180.0" />
+        <!--<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="360.0" prefWidth="180.0" />-->
        <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false"
                prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
    </Pane>
    <Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
-        <Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />
+        <!-- MSD and Taxonomy separated -->
-        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />
+        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Omejitev podatkov" />
        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Oznaka MSD"/>
        <TextField fx:id="msdTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0"/>
        <Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Taksonomija"/>
        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="140.0" prefHeight="25.0" prefWidth="180.0"/>
        <Label layoutX="10.0" layoutY="180.0" prefHeight="25.0" text="Min. št. pojavitev" />
        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="180.0" prefWidth="180.0" />
        <Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Min. št. taksonomij" />
        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="220.0" prefWidth="180.0" />
        <Label fx:id="solarFilters" layoutX="10.0" layoutY="280.0" text="Izbrani filtri:" />
        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="320.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />
    </Pane>
    <!--<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">-->
        <!--<Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />-->
        <!--<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />-->
    <!--</Pane>-->
    <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
    <Button fx:id="cancel" layoutX="540.0" layoutY="482.0" mnemonicParsing="false"
--- a/src/main/resources/gui/StringAnalysisTabNew2.fxml
+++ b/src/main/resources/gui/StringAnalysisTabNew2.fxml
@ -73,45 +73,53 @@
        <Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Upoštevaj ločila" />
        <CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="225.0" selected="false" />
        <Label layoutX="10.0" layoutY="260.0" prefHeight="25.0" text="Kolokabilnost" />
        <CheckComboBox fx:id="collocabilityCCB" layoutX="185.0" layoutY="260.0" prefHeight="25.0" prefWidth="180.0"/>
        <!-- MSD and Taxonomy separated -->
-        <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Omejitev podatkov" />
+        <!--<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Omejitev podatkov" />-->
        <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Oznaka MSD" />
        <TextField fx:id="msdTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
        <Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Taksonomija" />
        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="360.0" prefHeight="25.0" prefWidth="180.0" >
            <items>
                <FXCollections fx:factory="observableArrayList">
                    <String fx:value="2" />
                    <String fx:value="3" />
                    <String fx:value="4" />
                    <String fx:value="5" />
                </FXCollections>
            </items>
        </CheckComboBox>
        <Label layoutX="10.0" layoutY="400.0" prefHeight="25.0" text="Min. št. pojavitev" />
        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="400.0" prefWidth="180.0" />
        <!--<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Oznaka MSD" />-->
        <!--<TextField fx:id="msdTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />-->
        <!--<Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Taksonomija" />-->
        <!--<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="360.0" prefHeight="25.0" prefWidth="180.0" >-->
            <!--<items>-->
                <!--<FXCollections fx:factory="observableArrayList">-->
                    <!--<String fx:value="2" />-->
                    <!--<String fx:value="3" />-->
                    <!--<String fx:value="4" />-->
                    <!--<String fx:value="5" />-->
                <!--</FXCollections>-->
            <!--</items>-->
        <!--</CheckComboBox>-->
        <!--<Label layoutX="10.0" layoutY="400.0" prefHeight="25.0" text="Min. št. pojavitev" />-->
        <!--<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="400.0" prefWidth="180.0" />-->
    </Pane>
    <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
    <Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
-        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. taksonomij" />
+        <!-- MSD and Taxonomy separated -->
-        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
+        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Omejitev podatkov" />
-        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Kolokabilnost" />
+        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Oznaka MSD"/>
-        <CheckComboBox fx:id="collocabilityCCB" layoutX="185.0" layoutY="100.0" prefHeight="25.0" prefWidth="180.0"/>
+        <TextField fx:id="msdTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0"/>
        <Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Taksonomija"/>
        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="140.0" prefHeight="25.0" prefWidth="180.0"/>
        <Label layoutX="10.0" layoutY="180.0" prefHeight="25.0" text="Min. št. pojavitev" />
        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="180.0" prefWidth="180.0" />
        <Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Min. št. taksonomij" />
        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="220.0" prefWidth="180.0" />
        <Label fx:id="solarFilters" layoutX="10.0" layoutY="280.0" text="Izbrani filtri:" />
        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="320.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />
        <Label fx:id="solarFilters" layoutX="10.0" layoutY="140.0" text="Izbrani filtri:" />
        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="260.0" prefWidth="275.0" text=" " wrapText="true" />
        <!-- samoglasniki/soglasniki -->
        <Pane fx:id="paneLetters">
            <children>
                <CheckBox fx:id="calculatecvvCB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" text="Izračunaj za kombinacije samoglasnikov in soglasnikov" />
@ -120,6 +128,25 @@
            </children>
        </Pane>
    </Pane>
    <!--<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">-->
        <!--<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. taksonomij" />-->
        <!--<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />-->
        <!--<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Kolokabilnost" />-->
        <!--<CheckComboBox fx:id="collocabilityCCB" layoutX="185.0" layoutY="100.0" prefHeight="25.0" prefWidth="180.0"/>-->
        <!--<Label fx:id="solarFilters" layoutX="10.0" layoutY="140.0" text="Izbrani filtri:" />-->
        <!--<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="260.0" prefWidth="275.0" text=" " wrapText="true" />-->
        <!--&lt;!&ndash; samoglasniki/soglasniki &ndash;&gt;-->
        <!--<Pane fx:id="paneLetters">-->
            <!--<children>-->
                <!--<CheckBox fx:id="calculatecvvCB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" text="Izračunaj za kombinacije samoglasnikov in soglasnikov" />-->
                <!--<Label layoutX="10.0" layoutY="400.0" prefHeight="25.0" text="Dolžina niza" />-->
                <!--<TextField fx:id="stringLengthTF" layoutX="185.0" layoutY="400.0" prefWidth="180.0" />-->
            <!--</children>-->
        <!--</Pane>-->
    <!--</Pane>-->
    <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
    <Button fx:id="cancel" layoutX="540.0" layoutY="482.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Prekini" />