Added collocability functionality - implemented Dice method

2018-10-24 10:36:07 +02:00 · 2018-10-24 10:36:07 +02:00 · f9ce74d7b8
commit f9ce74d7b8
parent 1d9e9b7ed6
6 changed files with 290 additions and 108 deletions
--- a/src/main/java/data/Collocability.java
+++ b/src/main/java/data/Collocability.java
@ -0,0 +1,51 @@
 package data;
 public enum Collocability {
 	DICE("Dice");
 	private final String name;
 	Collocability(String name) {
 		this.name = name;
 	}
 	public String toString() {
 		return this.name;
 	}
 	public static Collocability factory(String cf) {
 		if (cf != null) {
 			if (DICE.toString().equals(cf)) {
 				return DICE;
 			}
 		}
 		return null;
 	}
 	public String toMetadataString() {
 		switch(this){
 			case DICE:
 				return "Kolokabilnost - Dice:";
 			default:
 				return null;
 		}
 	}
 	public String toHeaderString() {
 		switch(this){
 			case DICE:
 				return "Kolokabilnost - Dice";
 			default:
 				return null;
 		}
 	}
 //    public String toPercentString() {
 //        switch(this){
 //            case DICE:
 //                return "Delež glede na vse različnice";
 //            default:
 //                return null;
 //        }
 //    }
 }
--- a/src/main/java/data/Filter.java
+++ b/src/main/java/data/Filter.java
@ -8,7 +8,7 @@ import java.util.regex.Pattern;
 import gui.ValidationUtil;
@SuppressWarnings("unchecked")
-public class Filter {
+public class Filter implements Cloneable {
 	private HashMap<filterName, Object> filter;
 	public enum filterName {
@ -28,7 +28,8 @@ public class Filter {
 		MULTIPLE_KEYS,
 		NOTE_PUNCTUATIONS,
 		MINIMAL_OCCURRENCES,
-		MINIMAL_TAXONOMY
+		MINIMAL_TAXONOMY,
 		COLLOCABILITY
 	}
 	public Filter() {
@ -186,6 +187,23 @@ public class Filter {
 		filter.put(MULTIPLE_KEYS, newKeys);
 	}
 	public void setCollocability(ArrayList<Collocability> keys) {
 		ArrayList<Collocability> newKeys = new ArrayList<>();
 		if (keys != null) {
 			newKeys.addAll(keys);
        }
 		filter.put(COLLOCABILITY, newKeys);
 	}
 	public ArrayList<Collocability> getCollocability() {
 		if (filter.containsKey(COLLOCABILITY) && filter.get(COLLOCABILITY) != null) {
 			return (ArrayList<Collocability>) filter.get(COLLOCABILITY);
 		} else {
 			return new ArrayList<>();
 		}
 	}
 	public ArrayList<CalculateFor> getMultipleKeys() {
 		if (filter.containsKey(MULTIPLE_KEYS) && filter.get(MULTIPLE_KEYS) != null) {
 			return (ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS);
@ -254,4 +272,18 @@ public class Filter {
        }
 	}
 	public Object clone() throws CloneNotSupportedException{
 		Filter f = null;
 		try {
 			f = (Filter) super.clone();
 		} catch (CloneNotSupportedException e) {
 			f = new Filter();
 		}
 		f.filter = (HashMap<filterName, Object>) f.filter.clone();
 		return f;
 	}
 }
--- a/src/main/java/data/StatisticsNew.java
+++ b/src/main/java/data/StatisticsNew.java
@ -40,14 +40,16 @@ public class StatisticsNew {
 	private RDB db;
 	private boolean analysisProducedResults;
 	private LocalDateTime time;
 	private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
 	public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
 		this.corpus = corpus;
 		this.filter = filter;
 		this.taxonomyResult = new ConcurrentHashMap<>();
 		this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
 		this.collocability = new ConcurrentHashMap<>();
-		// create table for counting word occurances per taxonomies
+		// create table for counting word occurrences per taxonomies
 		if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
 			if (this.filter.getTaxonomy().isEmpty()) {
 				for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
@ -213,7 +215,7 @@ public class StatisticsNew {
 		removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
 		removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
 		stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
-		Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult, filter);
+		Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
 		return true;
 	}
@ -442,11 +444,6 @@ public class StatisticsNew {
 				info.put("n-gram nivo:", String.valueOf(ngramLevel));
 			}
 //			else if (ngramLevel == 1){
 //				info.put("n-gram nivo:", "nivo besed");
 //			} else {
 //				info.put("n-gram nivo:", "nivo črk");
 //			}
 			// skip
 			if (ngramLevel > 1)
 				info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
@ -464,11 +461,6 @@ public class StatisticsNew {
 				info.put("MSD:", msdPattern.toString());
 			}
 			// taksonomija
 //			if (!isEmpty(filter.getTaxonomy())) {
 //				info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
 //			}
 		}
@ -496,4 +488,28 @@ public class StatisticsNew {
 		return info;
 	}
    public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
        Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();
        Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
        for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
            String[] splitedString = hmKey.getK1().split("\\s+");
            long sum_fwi =0L;
            for(String s : splitedString){
                MultipleHMKeys search = new MultipleHMKeys1(s);
                sum_fwi += oneWordTaxonomyResult.get("Total").get(search).longValue();
            }
            double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
            collocabilityMap.put(hmKey, dice_value);
        }
        collocability.put(filter.getCollocability().get(0), collocabilityMap);
    }
    public Map<Collocability, Map<MultipleHMKeys, Double>> getCollocability(){
 	    return this.collocability;
    }
 }
--- a/src/main/java/gui/StringAnalysisTabNew2.java
+++ b/src/main/java/gui/StringAnalysisTabNew2.java
@ -7,10 +7,13 @@ import static gui.Messages.*;
 import java.io.File;
 import java.io.UnsupportedEncodingException;
 import java.util.*;
 import java.util.concurrent.*;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.regex.Pattern;
 import javafx.application.HostServices;
-import javafx.collections.transformation.SortedList;
+import org.apache.commons.lang3.SerializationUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@ -59,6 +62,10 @@ public class StringAnalysisTabNew2 {
    private ComboBox<String> calculateForCB;
    private CalculateFor calculateFor;
    @FXML
    private CheckComboBox<String> collocabilityCCB;
    private ArrayList<Collocability> collocability;
    @FXML
    private ComboBox<String> ngramValueCB;
    private Integer ngramValue;
@ -126,6 +133,7 @@ public class StringAnalysisTabNew2 {
    private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
    private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
    private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
    private static final ObservableList<String> COLLOCABILITY_ITEMS = FXCollections.observableArrayList("Dice");
    private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
@ -219,6 +227,21 @@ public class StringAnalysisTabNew2 {
        calculateForCB.getSelectionModel().select(0);
        // collocabilityCCB
        collocabilityCCB.getItems().removeAll();
        collocabilityCCB.getItems().setAll(FXCollections.observableArrayList(COLLOCABILITY_ITEMS));
        collocabilityCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
            collocability = new ArrayList<>();
            ObservableList<Collocability> checkedItems = FXCollections.observableArrayList();
            for (String el : collocabilityCCB.getCheckModel().getCheckedItems()) {
                checkedItems.add(Collocability.factory(el));
            }
            collocability.addAll(checkedItems);
            logger.info(String.format("Selected collocabilities: %s", StringUtils.join(collocabilityCCB.getCheckModel().getCheckedItems(), ",")));
        });
        collocabilityCCB.getCheckModel().clearChecks();
        // msd
        msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
            if (!newValue) {
@ -535,6 +558,7 @@ public class StringAnalysisTabNew2 {
        filter.setMsd(msd);
        filter.setMinimalOccurrences(minimalOccurrences);
        filter.setMinimalTaxonomy(minimalTaxonomy);
        filter.setCollocability(collocability);
        if (ngramValue != null && ngramValue == 0) {
            filter.setStringLength(stringLength);
@ -545,6 +569,7 @@ public class StringAnalysisTabNew2 {
            // no errors
            logger.info("Executing: ", filter.toString());
            StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
 //            ADD THINGS HERE!!!
            execute(statistic);
        } else {
            logAlert(message);
@ -585,8 +610,109 @@ public class StringAnalysisTabNew2 {
        }
    }
 //    public void calculate_collocabilities(StatisticsNew statistics, StatisticsNew oneWordStatistics) {
 //        statistics.updateCalculateCollocabilities(oneWordStatistics);
 //
 //    }
    private final Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
        Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
        final Task<Void> task = new Task<Void>() {
            @SuppressWarnings("Duplicates")
            @Override
            protected Void call() throws Exception {
                long i = 0;
                for (File f : corpusFiles) {
                    readXML(f.toString(), statisticsOneGrams);
 //                    i++;
 //                    this.updateProgress(i, corpusFiles.size());
 //                    this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
                }
                return null;
            }
        };
 //        ngramProgressBar.progressProperty().bind(task.progressProperty());
 //        progressLabel.textProperty().bind(task.messageProperty());
        task.setOnSucceeded(e -> {
            System.out.print("test");
            try {
                System.out.print(statistic);
 //                calculate_collocabilities(statistic, statisticsOneGrams);
                statistic.updateCalculateCollocabilities(statisticsOneGrams);
                boolean successullySaved = statistic.saveResultToDisk();
                if (successullySaved) {
                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
                } else {
                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
                }
            } catch (UnsupportedEncodingException e1) {
                showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
                logger.error("Error while saving", e1);
            } catch (OutOfMemoryError e1) {
                showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
                logger.error("Out of memory error", e1);
            }
 //            try {
 //                boolean successullySaved = statistic.saveResultToDisk();
 //                if (successullySaved) {
 //                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
 //                } else {
 //                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
 //                }
 //            } catch (UnsupportedEncodingException e1) {
 //                showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
 //                logger.error("Error while saving", e1);
 //            } catch (OutOfMemoryError e1){
 //                showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
 //                logger.error("Out of memory error", e1);
 //            }
 //
 //            ngramProgressBar.progressProperty().unbind();
 //            ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
 //            progressLabel.textProperty().unbind();
 //            progressLabel.setText("");
 //            cancel.setVisible(false);
        });
        task.setOnFailed(e -> {
 //            showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
 //            logger.error("Error while executing", e);
 //            ngramProgressBar.progressProperty().unbind();
 //            ngramProgressBar.setProgress(0.0);
 //            ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
 //            progressLabel.textProperty().unbind();
 //            progressLabel.setText("");
 //            cancel.setVisible(false);
        });
        task.setOnCancelled(e -> {
 //            showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
 //            ngramProgressBar.progressProperty().unbind();
 //            ngramProgressBar.setProgress(0.0);
 //            ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
 //            progressLabel.textProperty().unbind();
 //            progressLabel.setText("");
 //            cancel.setVisible(false);
        });
        // When cancel button is pressed cancel analysis
        cancel.setOnAction(e -> {
            task.cancel();
 //            logger.info("cancel button");
        });
 //        cancel.setVisible(true);
        return task;
    }
    private void execute(StatisticsNew statistic) {
-        logger.info("Started execution: ", statistic.getFilter());
+        Filter f = statistic.getFilter();
        logger.info("Started execution: ", f);
 //        Task<Void> task_collocability = null;
        Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
        boolean corpusIsSplit = corpusFiles.size() > 1;
@ -613,21 +739,36 @@ public class StringAnalysisTabNew2 {
        ngramProgressBar.progressProperty().bind(task.progressProperty());
        progressLabel.textProperty().bind(task.messageProperty());
        task.setOnSucceeded(e -> {
-            try {
+            if (f.getCollocability().size() > 0) {
-                boolean successullySaved = statistic.saveResultToDisk();
+                try{
-                if (successullySaved) {
+                    Filter f2 = (Filter) f.clone();
-                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+                    f2.setNgramValue(1);
-                } else {
+                    StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
-                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+                    final Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
                    final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
                    thread_collocability.setDaemon(true);
                    thread_collocability.start();
                }catch(CloneNotSupportedException c){}
            } else {
                try {
 //                    System.out.print(statistics);
                    boolean successullySaved = statistic.saveResultToDisk();
                    if (successullySaved) {
                        showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
                    } else {
                        showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
                    }
                } catch (UnsupportedEncodingException e1) {
                    showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
                    logger.error("Error while saving", e1);
                } catch (OutOfMemoryError e1) {
                    showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
                    logger.error("Out of memory error", e1);
                }
            } catch (UnsupportedEncodingException e1) {
                showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
                logger.error("Error while saving", e1);
            } catch (OutOfMemoryError e1){
                showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
                logger.error("Out of memory error", e1);
            }
            ngramProgressBar.progressProperty().unbind();
--- a/src/main/java/util/Export.java
+++ b/src/main/java/util/Export.java
@ -9,9 +9,7 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.atomic.AtomicLong;
-import data.CalculateFor;
+import data.*;
 import data.Filter;
 import data.MultipleHMKeys;
 import gui.ValidationUtil;
 import org.apache.commons.csv.CSVFormat;
 import org.apache.commons.csv.CSVPrinter;
@ -61,7 +59,9 @@ public class Export {
 	}
 	public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
-								  Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults, Filter filter) {
+                                  StatisticsNew statistics, Filter filter) {
        Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult();
 		//Delimiter used in CSV file
 		String NEW_LINE_SEPARATOR = "\n";
 		List<Object> FILE_HEADER_AL = new ArrayList<Object>();
@ -96,77 +96,22 @@ public class Export {
 			FILE_HEADER_AL.add("Lema male črke");
 		headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
 //		if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) {
 //			if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
 //				headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
 //				if (headerInfoBlock.get("Analiza").equals("Besede")){
 //					FILE_HEADER_AL.add("Različnica");
 //				} else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
 //					FILE_HEADER_AL.add("Različnice");
 //				}
 //			} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
 //				headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
 //				if (headerInfoBlock.get("Analiza").equals("Besede")){
 //					FILE_HEADER_AL.add("Lema");
 //                    FILE_HEADER_AL.add("Lema male črke");
 //				} else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
 //					FILE_HEADER_AL.add("Leme");
 //                    FILE_HEADER_AL.add("Leme male črke");
 //				}
 //			} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
 //				headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
 //				if (headerInfoBlock.get("Analiza").equals("Besede")){
 //					FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
 //				} else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
 //					FILE_HEADER_AL.add("Oblikoskladenjska oznake");
 //				}
 //			} else {
 //				headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
 //				FILE_HEADER_AL.add("Lema");
 //                FILE_HEADER_AL.add("Lema male črke");
 //			}
 //			for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) {
 		for (CalculateFor otherKey : filter.getMultipleKeys()) {
            FILE_HEADER_AL.add(otherKey.toHeaderString());
            if (otherKey.equals(CalculateFor.LEMMA))
                FILE_HEADER_AL.add("Lema male črke");
 		}
 //					if(otherKey.equals(CalculateFor.LEMMA)){
 //						FILE_HEADER_AL.add("Lema");
 //						FILE_HEADER_AL.add("Lema male črke");
 //					}
 //					if(otherKey.equals(CalculateFor.WORD_TYPE)){
 //						FILE_HEADER_AL.add("Besedna vrsta");
 //					}
 //					if(otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
 //						FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
 //					}
 //					if(otherKey.equals(CalculateFor.NORMALIZED_WORD)){
 //						FILE_HEADER_AL.add("Normalizirana različnica");
 //					}
 //				}
 //				break;
 //			}
 		FILE_HEADER_AL.add("Skupna absolutna pogostost");
 		FILE_HEADER_AL.add(filter.getCalculateFor().toPercentString());
 //		if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
 //			FILE_HEADER_AL.add("Delež glede na vse različnice");
 //		} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
 //			FILE_HEADER_AL.add("Delež glede na vse leme");
 //		} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
 //			FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
 //		} else {
 //			FILE_HEADER_AL.add("Delež glede na vse leme");
 //		}
 		FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
        if (filter.getCollocability().size() > 0){
            FILE_HEADER_AL.add(filter.getCollocability().get(0).toHeaderString());
        }
 		for (String key : taxonomyResults.keySet()) {
 			if(!key.equals("Total") && num_taxonomy_frequencies.get(key) > 0) {
 				FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
@ -176,9 +121,6 @@ public class Export {
 		}
 		FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
 		FILE_HEADER_AL.toArray(FILE_HEADER);
 //		} else {
 //			FILE_HEADER = new Object[]{"word", "frequency", "percent"};
 //		}
 		String fileName = "";
@ -250,16 +192,7 @@ public class Export {
                    	i++;
 					}
-//					if(!e.getKey().getLemma().equals("")){
+
 //						dataEntry.add(e.getKey().getLemma());
 //                        dataEntry.add(e.getKey().getLemma().toLowerCase());
 //					}
 //					if(!e.getKey().getWordType().equals("")){
 //						dataEntry.add(e.getKey().getWordType());
 //					}
 //					if(!e.getKey().getMsd().equals("")){
 //						dataEntry.add(e.getKey().getMsd());
 //					}
 					dataEntry.add(e.getValue().toString());
 					dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
 					dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
@ -270,8 +203,13 @@ public class Export {
 							dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
 							dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
 						}
 					}
                    if (filter.getCollocability().size() > 0){
                        dataEntry.add(String.format("%.4f", statistics.getCollocability().get(filter.getCollocability().get(0)).get(e.getKey())));
                    }
 					// Write msd separated per letters at the end of each line in csv
 					if (filter.getWriteMsdAtTheEnd()) {
 						String msd = "";
--- a/src/main/resources/gui/StringAnalysisTabNew2.fxml
+++ b/src/main/resources/gui/StringAnalysisTabNew2.fxml
@ -105,8 +105,12 @@
    <Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. taksonomij" />
        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
-        <Label fx:id="solarFilters" layoutX="10.0" layoutY="100.0" text="Izbrani filtri:" />
+
-        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="300.0" prefWidth="275.0" text=" " wrapText="true" />
+        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Kolokabilnost" />
        <CheckComboBox fx:id="collocabilityCCB" layoutX="185.0" layoutY="100.0" prefHeight="25.0" prefWidth="180.0"/>
        <Label fx:id="solarFilters" layoutX="10.0" layoutY="140.0" text="Izbrani filtri:" />
        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="260.0" prefWidth="275.0" text=" " wrapText="true" />
        <!-- samoglasniki/soglasniki -->
        <Pane fx:id="paneLetters">
            <children>