Added collocability functionality - implemented Dice method

2018-10-24 10:36:07 +02:00 · 2018-10-24 10:36:07 +02:00 · f9ce74d7b8
commit f9ce74d7b8
parent 1d9e9b7ed6
6 changed files with 290 additions and 108 deletions
--- a/src/main/java/data/Collocability.java
+++ b/src/main/java/data/Collocability.java
@ -0,0 +1,51 @@
+package data;
+
+public enum Collocability {
+	DICE("Dice");
+
+	private final String name;
+
+	Collocability(String name) {
+		this.name = name;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public static Collocability factory(String cf) {
+		if (cf != null) {
+			if (DICE.toString().equals(cf)) {
+				return DICE;
+			}
+		}
+		return null;
+	}
+
+	public String toMetadataString() {
+		switch(this){
+			case DICE:
+				return "Kolokabilnost - Dice:";
+			default:
+				return null;
+		}
+	}
+
+	public String toHeaderString() {
+		switch(this){
+			case DICE:
+				return "Kolokabilnost - Dice";
+			default:
+				return null;
+		}
+	}
+
+//    public String toPercentString() {
+//        switch(this){
+//            case DICE:
+//                return "Delež glede na vse različnice";
+//            default:
+//                return null;
+//        }
+//    }
+}
--- a/src/main/java/data/Filter.java
+++ b/src/main/java/data/Filter.java
@ -8,7 +8,7 @@ import java.util.regex.Pattern;
 import gui.ValidationUtil;

@SuppressWarnings("unchecked")
-public class Filter {
+public class Filter implements Cloneable {
 	private HashMap<filterName, Object> filter;

 	public enum filterName {
@ -28,7 +28,8 @@ public class Filter {
 		MULTIPLE_KEYS,
 		NOTE_PUNCTUATIONS,
 		MINIMAL_OCCURRENCES,
-		MINIMAL_TAXONOMY
+		MINIMAL_TAXONOMY,
+		COLLOCABILITY
 	}

 	public Filter() {
@ -186,6 +187,23 @@ public class Filter {
 		filter.put(MULTIPLE_KEYS, newKeys);
 	}

+	public void setCollocability(ArrayList<Collocability> keys) {
+		ArrayList<Collocability> newKeys = new ArrayList<>();
+		if (keys != null) {
+			newKeys.addAll(keys);
+        }
+
+		filter.put(COLLOCABILITY, newKeys);
+	}
+
+	public ArrayList<Collocability> getCollocability() {
+		if (filter.containsKey(COLLOCABILITY) && filter.get(COLLOCABILITY) != null) {
+			return (ArrayList<Collocability>) filter.get(COLLOCABILITY);
+		} else {
+			return new ArrayList<>();
+		}
+	}
+
 	public ArrayList<CalculateFor> getMultipleKeys() {
 		if (filter.containsKey(MULTIPLE_KEYS) && filter.get(MULTIPLE_KEYS) != null) {
 			return (ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS);
@ -254,4 +272,18 @@ public class Filter {
        }

 	}
+
+
+
+	public Object clone() throws CloneNotSupportedException{
+		Filter f = null;
+		try {
+			f = (Filter) super.clone();
+		} catch (CloneNotSupportedException e) {
+			f = new Filter();
+		}
+		f.filter = (HashMap<filterName, Object>) f.filter.clone();
+
+		return f;
+	}
 }
--- a/src/main/java/data/StatisticsNew.java
+++ b/src/main/java/data/StatisticsNew.java
@ -40,14 +40,16 @@ public class StatisticsNew {
 	private RDB db;
 	private boolean analysisProducedResults;
 	private LocalDateTime time;
+	private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;

 	public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
 		this.corpus = corpus;
 		this.filter = filter;
 		this.taxonomyResult = new ConcurrentHashMap<>();
 		this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
+		this.collocability = new ConcurrentHashMap<>();

-		// create table for counting word occurances per taxonomies
+		// create table for counting word occurrences per taxonomies
 		if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
 			if (this.filter.getTaxonomy().isEmpty()) {
 				for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
@ -213,7 +215,7 @@ public class StatisticsNew {
 		removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
 		removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
 		stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
-		Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult, filter);
+		Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
 		return true;
 	}

@ -442,11 +444,6 @@ public class StatisticsNew {
 				info.put("n-gram nivo:", String.valueOf(ngramLevel));
 			}

-//			else if (ngramLevel == 1){
-//				info.put("n-gram nivo:", "nivo besed");
-//			} else {
-//				info.put("n-gram nivo:", "nivo črk");
-//			}
 			// skip
 			if (ngramLevel > 1)
 				info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
@ -464,11 +461,6 @@ public class StatisticsNew {
 				info.put("MSD:", msdPattern.toString());
 			}

-			// taksonomija
-//			if (!isEmpty(filter.getTaxonomy())) {
-//				info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
-//			}
-

 		}

@ -496,4 +488,28 @@ public class StatisticsNew {

 		return info;
 	}
+
+    public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
+        Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();
+
+        Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
+
+        for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
+            String[] splitedString = hmKey.getK1().split("\\s+");
+
+            long sum_fwi =0L;
+            for(String s : splitedString){
+                MultipleHMKeys search = new MultipleHMKeys1(s);
+                sum_fwi += oneWordTaxonomyResult.get("Total").get(search).longValue();
+            }
+            double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
+            collocabilityMap.put(hmKey, dice_value);
+        }
+
+        collocability.put(filter.getCollocability().get(0), collocabilityMap);
+    }
+
+    public Map<Collocability, Map<MultipleHMKeys, Double>> getCollocability(){
+	    return this.collocability;
+    }
 }
--- a/src/main/java/gui/StringAnalysisTabNew2.java
+++ b/src/main/java/gui/StringAnalysisTabNew2.java
@ -7,10 +7,13 @@ import static gui.Messages.*;
 import java.io.File;
 import java.io.UnsupportedEncodingException;
 import java.util.*;
+import java.util.concurrent.*;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicLong;
 import java.util.regex.Pattern;

 import javafx.application.HostServices;
-import javafx.collections.transformation.SortedList;
+import org.apache.commons.lang3.SerializationUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@ -59,6 +62,10 @@ public class StringAnalysisTabNew2 {
    private ComboBox<String> calculateForCB;
    private CalculateFor calculateFor;

+    @FXML
+    private CheckComboBox<String> collocabilityCCB;
+    private ArrayList<Collocability> collocability;
+
    @FXML
    private ComboBox<String> ngramValueCB;
    private Integer ngramValue;
@ -126,6 +133,7 @@ public class StringAnalysisTabNew2 {
    private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
    private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
    private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
+    private static final ObservableList<String> COLLOCABILITY_ITEMS = FXCollections.observableArrayList("Dice");
    private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();


@ -219,6 +227,21 @@ public class StringAnalysisTabNew2 {

        calculateForCB.getSelectionModel().select(0);

+        // collocabilityCCB
+        collocabilityCCB.getItems().removeAll();
+
+        collocabilityCCB.getItems().setAll(FXCollections.observableArrayList(COLLOCABILITY_ITEMS));
+        collocabilityCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+            collocability = new ArrayList<>();
+            ObservableList<Collocability> checkedItems = FXCollections.observableArrayList();
+            for (String el : collocabilityCCB.getCheckModel().getCheckedItems()) {
+                checkedItems.add(Collocability.factory(el));
+            }
+            collocability.addAll(checkedItems);
+            logger.info(String.format("Selected collocabilities: %s", StringUtils.join(collocabilityCCB.getCheckModel().getCheckedItems(), ",")));
+        });
+        collocabilityCCB.getCheckModel().clearChecks();
+
        // msd
        msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
            if (!newValue) {
@ -535,6 +558,7 @@ public class StringAnalysisTabNew2 {
        filter.setMsd(msd);
        filter.setMinimalOccurrences(minimalOccurrences);
        filter.setMinimalTaxonomy(minimalTaxonomy);
+        filter.setCollocability(collocability);

        if (ngramValue != null && ngramValue == 0) {
            filter.setStringLength(stringLength);
@ -545,6 +569,7 @@ public class StringAnalysisTabNew2 {
            // no errors
            logger.info("Executing: ", filter.toString());
            StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
+//            ADD THINGS HERE!!!
            execute(statistic);
        } else {
            logAlert(message);
@ -585,8 +610,109 @@ public class StringAnalysisTabNew2 {
        }
    }

+//    public void calculate_collocabilities(StatisticsNew statistics, StatisticsNew oneWordStatistics) {
+//        statistics.updateCalculateCollocabilities(oneWordStatistics);
+//
+//    }
+
+    private final Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
+        Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
+
+        final Task<Void> task = new Task<Void>() {
+            @SuppressWarnings("Duplicates")
+            @Override
+            protected Void call() throws Exception {
+                long i = 0;
+                for (File f : corpusFiles) {
+                    readXML(f.toString(), statisticsOneGrams);
+//                    i++;
+//                    this.updateProgress(i, corpusFiles.size());
+//                    this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
+                }
+
+                return null;
+            }
+        };
+
+//        ngramProgressBar.progressProperty().bind(task.progressProperty());
+//        progressLabel.textProperty().bind(task.messageProperty());
+
+        task.setOnSucceeded(e -> {
+            System.out.print("test");
+            try {
+                System.out.print(statistic);
+//                calculate_collocabilities(statistic, statisticsOneGrams);
+                statistic.updateCalculateCollocabilities(statisticsOneGrams);
+                boolean successullySaved = statistic.saveResultToDisk();
+                if (successullySaved) {
+                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+                } else {
+                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+                }
+            } catch (UnsupportedEncodingException e1) {
+                showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
+                logger.error("Error while saving", e1);
+            } catch (OutOfMemoryError e1) {
+                showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
+                logger.error("Out of memory error", e1);
+            }
+//            try {
+//                boolean successullySaved = statistic.saveResultToDisk();
+//                if (successullySaved) {
+//                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+//                } else {
+//                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+//                }
+//            } catch (UnsupportedEncodingException e1) {
+//                showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
+//                logger.error("Error while saving", e1);
+//            } catch (OutOfMemoryError e1){
+//                showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
+//                logger.error("Out of memory error", e1);
+//            }
+//
+//            ngramProgressBar.progressProperty().unbind();
+//            ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+//            progressLabel.textProperty().unbind();
+//            progressLabel.setText("");
+//            cancel.setVisible(false);
+        });
+
+        task.setOnFailed(e -> {
+//            showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
+//            logger.error("Error while executing", e);
+//            ngramProgressBar.progressProperty().unbind();
+//            ngramProgressBar.setProgress(0.0);
+//            ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
+//            progressLabel.textProperty().unbind();
+//            progressLabel.setText("");
+//            cancel.setVisible(false);
+        });
+
+        task.setOnCancelled(e -> {
+//            showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
+//            ngramProgressBar.progressProperty().unbind();
+//            ngramProgressBar.setProgress(0.0);
+//            ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+//            progressLabel.textProperty().unbind();
+//            progressLabel.setText("");
+//            cancel.setVisible(false);
+        });
+
+        // When cancel button is pressed cancel analysis
+        cancel.setOnAction(e -> {
+            task.cancel();
+//            logger.info("cancel button");
+        });
+
+//        cancel.setVisible(true);
+        return task;
+    }
+
    private void execute(StatisticsNew statistic) {
-        logger.info("Started execution: ", statistic.getFilter());
+        Filter f = statistic.getFilter();
+        logger.info("Started execution: ", f);
+//        Task<Void> task_collocability = null;

        Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
        boolean corpusIsSplit = corpusFiles.size() > 1;
@ -613,9 +739,23 @@ public class StringAnalysisTabNew2 {

        ngramProgressBar.progressProperty().bind(task.progressProperty());
        progressLabel.textProperty().bind(task.messageProperty());
-
        task.setOnSucceeded(e -> {
+            if (f.getCollocability().size() > 0) {
                try{
+                    Filter f2 = (Filter) f.clone();
+                    f2.setNgramValue(1);
+                    StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
+                    final Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
+                    final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
+                    thread_collocability.setDaemon(true);
+                    thread_collocability.start();
+                }catch(CloneNotSupportedException c){}
+
+
+
+            } else {
+                try {
+//                    System.out.print(statistics);
                    boolean successullySaved = statistic.saveResultToDisk();
                    if (successullySaved) {
                        showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
@ -629,6 +769,7 @@ public class StringAnalysisTabNew2 {
                    showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
                    logger.error("Out of memory error", e1);
                }
+            }

            ngramProgressBar.progressProperty().unbind();
            ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
--- a/src/main/java/util/Export.java
+++ b/src/main/java/util/Export.java
@ -9,9 +9,7 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.atomic.AtomicLong;

-import data.CalculateFor;
-import data.Filter;
-import data.MultipleHMKeys;
+import data.*;
 import gui.ValidationUtil;
 import org.apache.commons.csv.CSVFormat;
 import org.apache.commons.csv.CSVPrinter;
@ -61,7 +59,9 @@ public class Export {
 	}

 	public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
-								  Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults, Filter filter) {
+                                  StatisticsNew statistics, Filter filter) {
+        Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult();
+
 		//Delimiter used in CSV file
 		String NEW_LINE_SEPARATOR = "\n";
 		List<Object> FILE_HEADER_AL = new ArrayList<Object>();
@ -96,77 +96,22 @@ public class Export {
 			FILE_HEADER_AL.add("Lema male črke");
 		headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));

-//		if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) {
-//			if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
-//				headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
-//				if (headerInfoBlock.get("Analiza").equals("Besede")){
-//					FILE_HEADER_AL.add("Različnica");
-//				} else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
-//					FILE_HEADER_AL.add("Različnice");
-//				}
-//			} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
-//				headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
-//				if (headerInfoBlock.get("Analiza").equals("Besede")){
-//					FILE_HEADER_AL.add("Lema");
-//                    FILE_HEADER_AL.add("Lema male črke");
-//				} else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
-//					FILE_HEADER_AL.add("Leme");
-//                    FILE_HEADER_AL.add("Leme male črke");
-//				}
-//			} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
-//				headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
-//				if (headerInfoBlock.get("Analiza").equals("Besede")){
-//					FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
-//				} else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
-//					FILE_HEADER_AL.add("Oblikoskladenjska oznake");
-//				}
-//			} else {
-//				headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
-//				FILE_HEADER_AL.add("Lema");
-//                FILE_HEADER_AL.add("Lema male črke");
-//			}
-
-
-//			for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) {
 		for (CalculateFor otherKey : filter.getMultipleKeys()) {
            FILE_HEADER_AL.add(otherKey.toHeaderString());
            if (otherKey.equals(CalculateFor.LEMMA))
                FILE_HEADER_AL.add("Lema male črke");
 		}

-//					if(otherKey.equals(CalculateFor.LEMMA)){
-//						FILE_HEADER_AL.add("Lema");
-//						FILE_HEADER_AL.add("Lema male črke");
-//					}
-//					if(otherKey.equals(CalculateFor.WORD_TYPE)){
-//						FILE_HEADER_AL.add("Besedna vrsta");
-//					}
-//					if(otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
-//						FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
-//					}
-//					if(otherKey.equals(CalculateFor.NORMALIZED_WORD)){
-//						FILE_HEADER_AL.add("Normalizirana različnica");
-//					}
-//				}
-
-//				break;
-//			}
-
-

 		FILE_HEADER_AL.add("Skupna absolutna pogostost");
 		FILE_HEADER_AL.add(filter.getCalculateFor().toPercentString());

-//		if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
-//			FILE_HEADER_AL.add("Delež glede na vse različnice");
-//		} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
-//			FILE_HEADER_AL.add("Delež glede na vse leme");
-//		} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
-//			FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
-//		} else {
-//			FILE_HEADER_AL.add("Delež glede na vse leme");
-//		}
 		FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
+
+        if (filter.getCollocability().size() > 0){
+            FILE_HEADER_AL.add(filter.getCollocability().get(0).toHeaderString());
+        }
+
 		for (String key : taxonomyResults.keySet()) {
 			if(!key.equals("Total") && num_taxonomy_frequencies.get(key) > 0) {
 				FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
@ -176,9 +121,6 @@ public class Export {
 		}
 		FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
 		FILE_HEADER_AL.toArray(FILE_HEADER);
-//		} else {
-//			FILE_HEADER = new Object[]{"word", "frequency", "percent"};
-//		}

 		String fileName = "";

@ -250,16 +192,7 @@ public class Export {
                    	i++;
 					}

-//					if(!e.getKey().getLemma().equals("")){
-//						dataEntry.add(e.getKey().getLemma());
-//                        dataEntry.add(e.getKey().getLemma().toLowerCase());
-//					}
-//					if(!e.getKey().getWordType().equals("")){
-//						dataEntry.add(e.getKey().getWordType());
-//					}
-//					if(!e.getKey().getMsd().equals("")){
-//						dataEntry.add(e.getKey().getMsd());
-//					}
+
 					dataEntry.add(e.getValue().toString());
 					dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
 					dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
@ -270,6 +203,11 @@ public class Export {
 							dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
 							dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
 						}
+
+					}
+
+                    if (filter.getCollocability().size() > 0){
+                        dataEntry.add(String.format("%.4f", statistics.getCollocability().get(filter.getCollocability().get(0)).get(e.getKey())));
                    }

 					// Write msd separated per letters at the end of each line in csv
--- a/src/main/resources/gui/StringAnalysisTabNew2.fxml
+++ b/src/main/resources/gui/StringAnalysisTabNew2.fxml
@ -105,8 +105,12 @@
    <Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. taksonomij" />
        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
-        <Label fx:id="solarFilters" layoutX="10.0" layoutY="100.0" text="Izbrani filtri:" />
-        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="300.0" prefWidth="275.0" text=" " wrapText="true" />
+
+        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Kolokabilnost" />
+        <CheckComboBox fx:id="collocabilityCCB" layoutX="185.0" layoutY="100.0" prefHeight="25.0" prefWidth="180.0"/>
+
+        <Label fx:id="solarFilters" layoutX="10.0" layoutY="140.0" text="Izbrani filtri:" />
+        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="260.0" prefWidth="275.0" text=" " wrapText="true" />
        <!-- samoglasniki/soglasniki -->
        <Pane fx:id="paneLetters">
            <children>