Added collocability functionality - implemented Dice method

This commit is contained in:
Luka 2018-10-24 10:36:07 +02:00
parent 1d9e9b7ed6
commit f9ce74d7b8
6 changed files with 290 additions and 108 deletions

View File

@ -0,0 +1,51 @@
package data;
public enum Collocability {
DICE("Dice");
private final String name;
Collocability(String name) {
this.name = name;
}
public String toString() {
return this.name;
}
public static Collocability factory(String cf) {
if (cf != null) {
if (DICE.toString().equals(cf)) {
return DICE;
}
}
return null;
}
public String toMetadataString() {
switch(this){
case DICE:
return "Kolokabilnost - Dice:";
default:
return null;
}
}
public String toHeaderString() {
switch(this){
case DICE:
return "Kolokabilnost - Dice";
default:
return null;
}
}
// public String toPercentString() {
// switch(this){
// case DICE:
// return "Delež glede na vse različnice";
// default:
// return null;
// }
// }
}

View File

@ -8,7 +8,7 @@ import java.util.regex.Pattern;
import gui.ValidationUtil; import gui.ValidationUtil;
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public class Filter { public class Filter implements Cloneable {
private HashMap<filterName, Object> filter; private HashMap<filterName, Object> filter;
public enum filterName { public enum filterName {
@ -28,7 +28,8 @@ public class Filter {
MULTIPLE_KEYS, MULTIPLE_KEYS,
NOTE_PUNCTUATIONS, NOTE_PUNCTUATIONS,
MINIMAL_OCCURRENCES, MINIMAL_OCCURRENCES,
MINIMAL_TAXONOMY MINIMAL_TAXONOMY,
COLLOCABILITY
} }
public Filter() { public Filter() {
@ -186,6 +187,23 @@ public class Filter {
filter.put(MULTIPLE_KEYS, newKeys); filter.put(MULTIPLE_KEYS, newKeys);
} }
public void setCollocability(ArrayList<Collocability> keys) {
ArrayList<Collocability> newKeys = new ArrayList<>();
if (keys != null) {
newKeys.addAll(keys);
}
filter.put(COLLOCABILITY, newKeys);
}
public ArrayList<Collocability> getCollocability() {
if (filter.containsKey(COLLOCABILITY) && filter.get(COLLOCABILITY) != null) {
return (ArrayList<Collocability>) filter.get(COLLOCABILITY);
} else {
return new ArrayList<>();
}
}
public ArrayList<CalculateFor> getMultipleKeys() { public ArrayList<CalculateFor> getMultipleKeys() {
if (filter.containsKey(MULTIPLE_KEYS) && filter.get(MULTIPLE_KEYS) != null) { if (filter.containsKey(MULTIPLE_KEYS) && filter.get(MULTIPLE_KEYS) != null) {
return (ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS); return (ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS);
@ -254,4 +272,18 @@ public class Filter {
} }
} }
public Object clone() throws CloneNotSupportedException{
Filter f = null;
try {
f = (Filter) super.clone();
} catch (CloneNotSupportedException e) {
f = new Filter();
}
f.filter = (HashMap<filterName, Object>) f.filter.clone();
return f;
}
} }

View File

@ -40,14 +40,16 @@ public class StatisticsNew {
private RDB db; private RDB db;
private boolean analysisProducedResults; private boolean analysisProducedResults;
private LocalDateTime time; private LocalDateTime time;
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) { public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
this.corpus = corpus; this.corpus = corpus;
this.filter = filter; this.filter = filter;
this.taxonomyResult = new ConcurrentHashMap<>(); this.taxonomyResult = new ConcurrentHashMap<>();
this.taxonomyResult.put("Total", new ConcurrentHashMap<>()); this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
this.collocability = new ConcurrentHashMap<>();
// create table for counting word occurances per taxonomies // create table for counting word occurrences per taxonomies
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) { if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
if (this.filter.getTaxonomy().isEmpty()) { if (this.filter.getTaxonomy().isEmpty()) {
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) { for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
@ -213,7 +215,7 @@ public class StatisticsNew {
removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences()); removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy()); removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit)))); stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult, filter); Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
return true; return true;
} }
@ -442,11 +444,6 @@ public class StatisticsNew {
info.put("n-gram nivo:", String.valueOf(ngramLevel)); info.put("n-gram nivo:", String.valueOf(ngramLevel));
} }
// else if (ngramLevel == 1){
// info.put("n-gram nivo:", "nivo besed");
// } else {
// info.put("n-gram nivo:", "nivo črk");
// }
// skip // skip
if (ngramLevel > 1) if (ngramLevel > 1)
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0"); info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
@ -464,11 +461,6 @@ public class StatisticsNew {
info.put("MSD:", msdPattern.toString()); info.put("MSD:", msdPattern.toString());
} }
// taksonomija
// if (!isEmpty(filter.getTaxonomy())) {
// info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
// }
} }
@ -496,4 +488,28 @@ public class StatisticsNew {
return info; return info;
} }
public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();
Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
String[] splitedString = hmKey.getK1().split("\\s+");
long sum_fwi =0L;
for(String s : splitedString){
MultipleHMKeys search = new MultipleHMKeys1(s);
sum_fwi += oneWordTaxonomyResult.get("Total").get(search).longValue();
}
double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
collocabilityMap.put(hmKey, dice_value);
}
collocability.put(filter.getCollocability().get(0), collocabilityMap);
}
public Map<Collocability, Map<MultipleHMKeys, Double>> getCollocability(){
return this.collocability;
}
} }

View File

@ -7,10 +7,13 @@ import static gui.Messages.*;
import java.io.File; import java.io.File;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.util.*; import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import javafx.application.HostServices; import javafx.application.HostServices;
import javafx.collections.transformation.SortedList; import org.apache.commons.lang3.SerializationUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
@ -59,6 +62,10 @@ public class StringAnalysisTabNew2 {
private ComboBox<String> calculateForCB; private ComboBox<String> calculateForCB;
private CalculateFor calculateFor; private CalculateFor calculateFor;
@FXML
private CheckComboBox<String> collocabilityCCB;
private ArrayList<Collocability> collocability;
@FXML @FXML
private ComboBox<String> ngramValueCB; private ComboBox<String> ngramValueCB;
private Integer ngramValue; private Integer ngramValue;
@ -126,6 +133,7 @@ public class StringAnalysisTabNew2 {
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
private static final ObservableList<String> COLLOCABILITY_ITEMS = FXCollections.observableArrayList("Dice");
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
@ -219,6 +227,21 @@ public class StringAnalysisTabNew2 {
calculateForCB.getSelectionModel().select(0); calculateForCB.getSelectionModel().select(0);
// collocabilityCCB
collocabilityCCB.getItems().removeAll();
collocabilityCCB.getItems().setAll(FXCollections.observableArrayList(COLLOCABILITY_ITEMS));
collocabilityCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
collocability = new ArrayList<>();
ObservableList<Collocability> checkedItems = FXCollections.observableArrayList();
for (String el : collocabilityCCB.getCheckModel().getCheckedItems()) {
checkedItems.add(Collocability.factory(el));
}
collocability.addAll(checkedItems);
logger.info(String.format("Selected collocabilities: %s", StringUtils.join(collocabilityCCB.getCheckModel().getCheckedItems(), ",")));
});
collocabilityCCB.getCheckModel().clearChecks();
// msd // msd
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> { msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) { if (!newValue) {
@ -535,6 +558,7 @@ public class StringAnalysisTabNew2 {
filter.setMsd(msd); filter.setMsd(msd);
filter.setMinimalOccurrences(minimalOccurrences); filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy); filter.setMinimalTaxonomy(minimalTaxonomy);
filter.setCollocability(collocability);
if (ngramValue != null && ngramValue == 0) { if (ngramValue != null && ngramValue == 0) {
filter.setStringLength(stringLength); filter.setStringLength(stringLength);
@ -545,6 +569,7 @@ public class StringAnalysisTabNew2 {
// no errors // no errors
logger.info("Executing: ", filter.toString()); logger.info("Executing: ", filter.toString());
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb); StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
// ADD THINGS HERE!!!
execute(statistic); execute(statistic);
} else { } else {
logAlert(message); logAlert(message);
@ -585,8 +610,109 @@ public class StringAnalysisTabNew2 {
} }
} }
// public void calculate_collocabilities(StatisticsNew statistics, StatisticsNew oneWordStatistics) {
// statistics.updateCalculateCollocabilities(oneWordStatistics);
//
// }
private final Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
for (File f : corpusFiles) {
readXML(f.toString(), statisticsOneGrams);
// i++;
// this.updateProgress(i, corpusFiles.size());
// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
return null;
}
};
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
System.out.print("test");
try {
System.out.print(statistic);
// calculate_collocabilities(statistic, statisticsOneGrams);
statistic.updateCalculateCollocabilities(statisticsOneGrams);
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
} else {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1) {
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
logger.error("Out of memory error", e1);
}
// try {
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
// } else {
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
// logger.error("Error while saving", e1);
// } catch (OutOfMemoryError e1){
// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
// logger.error("Out of memory error", e1);
// }
//
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
});
task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
});
task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
// logger.info("cancel button");
});
// cancel.setVisible(true);
return task;
}
private void execute(StatisticsNew statistic) { private void execute(StatisticsNew statistic) {
logger.info("Started execution: ", statistic.getFilter()); Filter f = statistic.getFilter();
logger.info("Started execution: ", f);
// Task<Void> task_collocability = null;
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
boolean corpusIsSplit = corpusFiles.size() > 1; boolean corpusIsSplit = corpusFiles.size() > 1;
@ -613,9 +739,23 @@ public class StringAnalysisTabNew2 {
ngramProgressBar.progressProperty().bind(task.progressProperty()); ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty()); progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> { task.setOnSucceeded(e -> {
if (f.getCollocability().size() > 0) {
try{ try{
Filter f2 = (Filter) f.clone();
f2.setNgramValue(1);
StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
final Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
thread_collocability.setDaemon(true);
thread_collocability.start();
}catch(CloneNotSupportedException c){}
} else {
try {
// System.out.print(statistics);
boolean successullySaved = statistic.saveResultToDisk(); boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) { if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED); showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
@ -629,6 +769,7 @@ public class StringAnalysisTabNew2 {
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY); showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
logger.error("Out of memory error", e1); logger.error("Out of memory error", e1);
} }
}
ngramProgressBar.progressProperty().unbind(); ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);

View File

@ -9,9 +9,7 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import data.CalculateFor; import data.*;
import data.Filter;
import data.MultipleHMKeys;
import gui.ValidationUtil; import gui.ValidationUtil;
import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter; import org.apache.commons.csv.CSVPrinter;
@ -61,7 +59,9 @@ public class Export {
} }
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock, public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults, Filter filter) { StatisticsNew statistics, Filter filter) {
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult();
//Delimiter used in CSV file //Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n"; String NEW_LINE_SEPARATOR = "\n";
List<Object> FILE_HEADER_AL = new ArrayList<Object>(); List<Object> FILE_HEADER_AL = new ArrayList<Object>();
@ -96,77 +96,22 @@ public class Export {
FILE_HEADER_AL.add("Lema male črke"); FILE_HEADER_AL.add("Lema male črke");
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies)); headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
// if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) {
// if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
// headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
// if (headerInfoBlock.get("Analiza").equals("Besede")){
// FILE_HEADER_AL.add("Različnica");
// } else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
// FILE_HEADER_AL.add("Različnice");
// }
// } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
// headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
// if (headerInfoBlock.get("Analiza").equals("Besede")){
// FILE_HEADER_AL.add("Lema");
// FILE_HEADER_AL.add("Lema male črke");
// } else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
// FILE_HEADER_AL.add("Leme");
// FILE_HEADER_AL.add("Leme male črke");
// }
// } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
// headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
// if (headerInfoBlock.get("Analiza").equals("Besede")){
// FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
// } else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
// FILE_HEADER_AL.add("Oblikoskladenjska oznake");
// }
// } else {
// headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
// FILE_HEADER_AL.add("Lema");
// FILE_HEADER_AL.add("Lema male črke");
// }
// for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) {
for (CalculateFor otherKey : filter.getMultipleKeys()) { for (CalculateFor otherKey : filter.getMultipleKeys()) {
FILE_HEADER_AL.add(otherKey.toHeaderString()); FILE_HEADER_AL.add(otherKey.toHeaderString());
if (otherKey.equals(CalculateFor.LEMMA)) if (otherKey.equals(CalculateFor.LEMMA))
FILE_HEADER_AL.add("Lema male črke"); FILE_HEADER_AL.add("Lema male črke");
} }
// if(otherKey.equals(CalculateFor.LEMMA)){
// FILE_HEADER_AL.add("Lema");
// FILE_HEADER_AL.add("Lema male črke");
// }
// if(otherKey.equals(CalculateFor.WORD_TYPE)){
// FILE_HEADER_AL.add("Besedna vrsta");
// }
// if(otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
// FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
// }
// if(otherKey.equals(CalculateFor.NORMALIZED_WORD)){
// FILE_HEADER_AL.add("Normalizirana različnica");
// }
// }
// break;
// }
FILE_HEADER_AL.add("Skupna absolutna pogostost"); FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add(filter.getCalculateFor().toPercentString()); FILE_HEADER_AL.add(filter.getCalculateFor().toPercentString());
// if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
// FILE_HEADER_AL.add("Delež glede na vse različnice");
// } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
// FILE_HEADER_AL.add("Delež glede na vse leme");
// } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
// FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
// } else {
// FILE_HEADER_AL.add("Delež glede na vse leme");
// }
FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)"); FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
if (filter.getCollocability().size() > 0){
FILE_HEADER_AL.add(filter.getCollocability().get(0).toHeaderString());
}
for (String key : taxonomyResults.keySet()) { for (String key : taxonomyResults.keySet()) {
if(!key.equals("Total") && num_taxonomy_frequencies.get(key) > 0) { if(!key.equals("Total") && num_taxonomy_frequencies.get(key) > 0) {
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]"); FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
@ -176,9 +121,6 @@ public class Export {
} }
FILE_HEADER = new String[ FILE_HEADER_AL.size() ]; FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
FILE_HEADER_AL.toArray(FILE_HEADER); FILE_HEADER_AL.toArray(FILE_HEADER);
// } else {
// FILE_HEADER = new Object[]{"word", "frequency", "percent"};
// }
String fileName = ""; String fileName = "";
@ -250,16 +192,7 @@ public class Export {
i++; i++;
} }
// if(!e.getKey().getLemma().equals("")){
// dataEntry.add(e.getKey().getLemma());
// dataEntry.add(e.getKey().getLemma().toLowerCase());
// }
// if(!e.getKey().getWordType().equals("")){
// dataEntry.add(e.getKey().getWordType());
// }
// if(!e.getKey().getMsd().equals("")){
// dataEntry.add(e.getKey().getMsd());
// }
dataEntry.add(e.getValue().toString()); dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies)); dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies)); dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
@ -270,6 +203,11 @@ public class Export {
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key))); dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key))); dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
} }
}
if (filter.getCollocability().size() > 0){
dataEntry.add(String.format("%.4f", statistics.getCollocability().get(filter.getCollocability().get(0)).get(e.getKey())));
} }
// Write msd separated per letters at the end of each line in csv // Write msd separated per letters at the end of each line in csv

View File

@ -105,8 +105,12 @@
<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0"> <Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. taksonomij" /> <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" /> <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
<Label fx:id="solarFilters" layoutX="10.0" layoutY="100.0" text="Izbrani filtri:" />
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="300.0" prefWidth="275.0" text=" " wrapText="true" /> <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Kolokabilnost" />
<CheckComboBox fx:id="collocabilityCCB" layoutX="185.0" layoutY="100.0" prefHeight="25.0" prefWidth="180.0"/>
<Label fx:id="solarFilters" layoutX="10.0" layoutY="140.0" text="Izbrani filtri:" />
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="260.0" prefWidth="275.0" text=" " wrapText="true" />
<!-- samoglasniki/soglasniki --> <!-- samoglasniki/soglasniki -->
<Pane fx:id="paneLetters"> <Pane fx:id="paneLetters">
<children> <children>