Added collocability functionality - implemented Dice method
This commit is contained in:
parent
1d9e9b7ed6
commit
f9ce74d7b8
51
src/main/java/data/Collocability.java
Executable file
51
src/main/java/data/Collocability.java
Executable file
|
@ -0,0 +1,51 @@
|
||||||
|
package data;
|
||||||
|
|
||||||
|
public enum Collocability {
|
||||||
|
DICE("Dice");
|
||||||
|
|
||||||
|
private final String name;
|
||||||
|
|
||||||
|
Collocability(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return this.name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Collocability factory(String cf) {
|
||||||
|
if (cf != null) {
|
||||||
|
if (DICE.toString().equals(cf)) {
|
||||||
|
return DICE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toMetadataString() {
|
||||||
|
switch(this){
|
||||||
|
case DICE:
|
||||||
|
return "Kolokabilnost - Dice:";
|
||||||
|
default:
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toHeaderString() {
|
||||||
|
switch(this){
|
||||||
|
case DICE:
|
||||||
|
return "Kolokabilnost - Dice";
|
||||||
|
default:
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// public String toPercentString() {
|
||||||
|
// switch(this){
|
||||||
|
// case DICE:
|
||||||
|
// return "Delež glede na vse različnice";
|
||||||
|
// default:
|
||||||
|
// return null;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
}
|
|
@ -8,7 +8,7 @@ import java.util.regex.Pattern;
|
||||||
import gui.ValidationUtil;
|
import gui.ValidationUtil;
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public class Filter {
|
public class Filter implements Cloneable {
|
||||||
private HashMap<filterName, Object> filter;
|
private HashMap<filterName, Object> filter;
|
||||||
|
|
||||||
public enum filterName {
|
public enum filterName {
|
||||||
|
@ -28,7 +28,8 @@ public class Filter {
|
||||||
MULTIPLE_KEYS,
|
MULTIPLE_KEYS,
|
||||||
NOTE_PUNCTUATIONS,
|
NOTE_PUNCTUATIONS,
|
||||||
MINIMAL_OCCURRENCES,
|
MINIMAL_OCCURRENCES,
|
||||||
MINIMAL_TAXONOMY
|
MINIMAL_TAXONOMY,
|
||||||
|
COLLOCABILITY
|
||||||
}
|
}
|
||||||
|
|
||||||
public Filter() {
|
public Filter() {
|
||||||
|
@ -186,6 +187,23 @@ public class Filter {
|
||||||
filter.put(MULTIPLE_KEYS, newKeys);
|
filter.put(MULTIPLE_KEYS, newKeys);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setCollocability(ArrayList<Collocability> keys) {
|
||||||
|
ArrayList<Collocability> newKeys = new ArrayList<>();
|
||||||
|
if (keys != null) {
|
||||||
|
newKeys.addAll(keys);
|
||||||
|
}
|
||||||
|
|
||||||
|
filter.put(COLLOCABILITY, newKeys);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<Collocability> getCollocability() {
|
||||||
|
if (filter.containsKey(COLLOCABILITY) && filter.get(COLLOCABILITY) != null) {
|
||||||
|
return (ArrayList<Collocability>) filter.get(COLLOCABILITY);
|
||||||
|
} else {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public ArrayList<CalculateFor> getMultipleKeys() {
|
public ArrayList<CalculateFor> getMultipleKeys() {
|
||||||
if (filter.containsKey(MULTIPLE_KEYS) && filter.get(MULTIPLE_KEYS) != null) {
|
if (filter.containsKey(MULTIPLE_KEYS) && filter.get(MULTIPLE_KEYS) != null) {
|
||||||
return (ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS);
|
return (ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS);
|
||||||
|
@ -254,4 +272,18 @@ public class Filter {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public Object clone() throws CloneNotSupportedException{
|
||||||
|
Filter f = null;
|
||||||
|
try {
|
||||||
|
f = (Filter) super.clone();
|
||||||
|
} catch (CloneNotSupportedException e) {
|
||||||
|
f = new Filter();
|
||||||
|
}
|
||||||
|
f.filter = (HashMap<filterName, Object>) f.filter.clone();
|
||||||
|
|
||||||
|
return f;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,14 +40,16 @@ public class StatisticsNew {
|
||||||
private RDB db;
|
private RDB db;
|
||||||
private boolean analysisProducedResults;
|
private boolean analysisProducedResults;
|
||||||
private LocalDateTime time;
|
private LocalDateTime time;
|
||||||
|
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
|
||||||
|
|
||||||
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
|
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
|
||||||
this.corpus = corpus;
|
this.corpus = corpus;
|
||||||
this.filter = filter;
|
this.filter = filter;
|
||||||
this.taxonomyResult = new ConcurrentHashMap<>();
|
this.taxonomyResult = new ConcurrentHashMap<>();
|
||||||
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
|
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
|
||||||
|
this.collocability = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
// create table for counting word occurances per taxonomies
|
// create table for counting word occurrences per taxonomies
|
||||||
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
|
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
|
||||||
if (this.filter.getTaxonomy().isEmpty()) {
|
if (this.filter.getTaxonomy().isEmpty()) {
|
||||||
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
|
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
|
||||||
|
@ -213,7 +215,7 @@ public class StatisticsNew {
|
||||||
removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
|
removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
|
||||||
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
|
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
|
||||||
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
|
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
|
||||||
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult, filter);
|
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -442,11 +444,6 @@ public class StatisticsNew {
|
||||||
info.put("n-gram nivo:", String.valueOf(ngramLevel));
|
info.put("n-gram nivo:", String.valueOf(ngramLevel));
|
||||||
}
|
}
|
||||||
|
|
||||||
// else if (ngramLevel == 1){
|
|
||||||
// info.put("n-gram nivo:", "nivo besed");
|
|
||||||
// } else {
|
|
||||||
// info.put("n-gram nivo:", "nivo črk");
|
|
||||||
// }
|
|
||||||
// skip
|
// skip
|
||||||
if (ngramLevel > 1)
|
if (ngramLevel > 1)
|
||||||
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
|
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
|
||||||
|
@ -464,11 +461,6 @@ public class StatisticsNew {
|
||||||
info.put("MSD:", msdPattern.toString());
|
info.put("MSD:", msdPattern.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
// taksonomija
|
|
||||||
// if (!isEmpty(filter.getTaxonomy())) {
|
|
||||||
// info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -496,4 +488,28 @@ public class StatisticsNew {
|
||||||
|
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
|
||||||
|
Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();
|
||||||
|
|
||||||
|
Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
|
||||||
|
String[] splitedString = hmKey.getK1().split("\\s+");
|
||||||
|
|
||||||
|
long sum_fwi =0L;
|
||||||
|
for(String s : splitedString){
|
||||||
|
MultipleHMKeys search = new MultipleHMKeys1(s);
|
||||||
|
sum_fwi += oneWordTaxonomyResult.get("Total").get(search).longValue();
|
||||||
|
}
|
||||||
|
double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
|
||||||
|
collocabilityMap.put(hmKey, dice_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
collocability.put(filter.getCollocability().get(0), collocabilityMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Collocability, Map<MultipleHMKeys, Double>> getCollocability(){
|
||||||
|
return this.collocability;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,10 +7,13 @@ import static gui.Messages.*;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.util.concurrent.*;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import javafx.application.HostServices;
|
import javafx.application.HostServices;
|
||||||
import javafx.collections.transformation.SortedList;
|
import org.apache.commons.lang3.SerializationUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
|
@ -59,6 +62,10 @@ public class StringAnalysisTabNew2 {
|
||||||
private ComboBox<String> calculateForCB;
|
private ComboBox<String> calculateForCB;
|
||||||
private CalculateFor calculateFor;
|
private CalculateFor calculateFor;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private CheckComboBox<String> collocabilityCCB;
|
||||||
|
private ArrayList<Collocability> collocability;
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private ComboBox<String> ngramValueCB;
|
private ComboBox<String> ngramValueCB;
|
||||||
private Integer ngramValue;
|
private Integer ngramValue;
|
||||||
|
@ -126,6 +133,7 @@ public class StringAnalysisTabNew2 {
|
||||||
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||||
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||||
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
||||||
|
private static final ObservableList<String> COLLOCABILITY_ITEMS = FXCollections.observableArrayList("Dice");
|
||||||
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
||||||
|
|
||||||
|
|
||||||
|
@ -219,6 +227,21 @@ public class StringAnalysisTabNew2 {
|
||||||
|
|
||||||
calculateForCB.getSelectionModel().select(0);
|
calculateForCB.getSelectionModel().select(0);
|
||||||
|
|
||||||
|
// collocabilityCCB
|
||||||
|
collocabilityCCB.getItems().removeAll();
|
||||||
|
|
||||||
|
collocabilityCCB.getItems().setAll(FXCollections.observableArrayList(COLLOCABILITY_ITEMS));
|
||||||
|
collocabilityCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||||
|
collocability = new ArrayList<>();
|
||||||
|
ObservableList<Collocability> checkedItems = FXCollections.observableArrayList();
|
||||||
|
for (String el : collocabilityCCB.getCheckModel().getCheckedItems()) {
|
||||||
|
checkedItems.add(Collocability.factory(el));
|
||||||
|
}
|
||||||
|
collocability.addAll(checkedItems);
|
||||||
|
logger.info(String.format("Selected collocabilities: %s", StringUtils.join(collocabilityCCB.getCheckModel().getCheckedItems(), ",")));
|
||||||
|
});
|
||||||
|
collocabilityCCB.getCheckModel().clearChecks();
|
||||||
|
|
||||||
// msd
|
// msd
|
||||||
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
if (!newValue) {
|
if (!newValue) {
|
||||||
|
@ -535,6 +558,7 @@ public class StringAnalysisTabNew2 {
|
||||||
filter.setMsd(msd);
|
filter.setMsd(msd);
|
||||||
filter.setMinimalOccurrences(minimalOccurrences);
|
filter.setMinimalOccurrences(minimalOccurrences);
|
||||||
filter.setMinimalTaxonomy(minimalTaxonomy);
|
filter.setMinimalTaxonomy(minimalTaxonomy);
|
||||||
|
filter.setCollocability(collocability);
|
||||||
|
|
||||||
if (ngramValue != null && ngramValue == 0) {
|
if (ngramValue != null && ngramValue == 0) {
|
||||||
filter.setStringLength(stringLength);
|
filter.setStringLength(stringLength);
|
||||||
|
@ -545,6 +569,7 @@ public class StringAnalysisTabNew2 {
|
||||||
// no errors
|
// no errors
|
||||||
logger.info("Executing: ", filter.toString());
|
logger.info("Executing: ", filter.toString());
|
||||||
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
||||||
|
// ADD THINGS HERE!!!
|
||||||
execute(statistic);
|
execute(statistic);
|
||||||
} else {
|
} else {
|
||||||
logAlert(message);
|
logAlert(message);
|
||||||
|
@ -585,8 +610,109 @@ public class StringAnalysisTabNew2 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// public void calculate_collocabilities(StatisticsNew statistics, StatisticsNew oneWordStatistics) {
|
||||||
|
// statistics.updateCalculateCollocabilities(oneWordStatistics);
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
|
||||||
|
private final Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
|
||||||
|
Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
|
||||||
|
|
||||||
|
final Task<Void> task = new Task<Void>() {
|
||||||
|
@SuppressWarnings("Duplicates")
|
||||||
|
@Override
|
||||||
|
protected Void call() throws Exception {
|
||||||
|
long i = 0;
|
||||||
|
for (File f : corpusFiles) {
|
||||||
|
readXML(f.toString(), statisticsOneGrams);
|
||||||
|
// i++;
|
||||||
|
// this.updateProgress(i, corpusFiles.size());
|
||||||
|
// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||||
|
// progressLabel.textProperty().bind(task.messageProperty());
|
||||||
|
|
||||||
|
task.setOnSucceeded(e -> {
|
||||||
|
System.out.print("test");
|
||||||
|
try {
|
||||||
|
System.out.print(statistic);
|
||||||
|
// calculate_collocabilities(statistic, statisticsOneGrams);
|
||||||
|
statistic.updateCalculateCollocabilities(statisticsOneGrams);
|
||||||
|
boolean successullySaved = statistic.saveResultToDisk();
|
||||||
|
if (successullySaved) {
|
||||||
|
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||||
|
} else {
|
||||||
|
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
||||||
|
}
|
||||||
|
} catch (UnsupportedEncodingException e1) {
|
||||||
|
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||||
|
logger.error("Error while saving", e1);
|
||||||
|
} catch (OutOfMemoryError e1) {
|
||||||
|
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
||||||
|
logger.error("Out of memory error", e1);
|
||||||
|
}
|
||||||
|
// try {
|
||||||
|
// boolean successullySaved = statistic.saveResultToDisk();
|
||||||
|
// if (successullySaved) {
|
||||||
|
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||||
|
// } else {
|
||||||
|
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
||||||
|
// }
|
||||||
|
// } catch (UnsupportedEncodingException e1) {
|
||||||
|
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||||
|
// logger.error("Error while saving", e1);
|
||||||
|
// } catch (OutOfMemoryError e1){
|
||||||
|
// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
||||||
|
// logger.error("Out of memory error", e1);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// ngramProgressBar.progressProperty().unbind();
|
||||||
|
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
|
// progressLabel.textProperty().unbind();
|
||||||
|
// progressLabel.setText("");
|
||||||
|
// cancel.setVisible(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
task.setOnFailed(e -> {
|
||||||
|
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
|
||||||
|
// logger.error("Error while executing", e);
|
||||||
|
// ngramProgressBar.progressProperty().unbind();
|
||||||
|
// ngramProgressBar.setProgress(0.0);
|
||||||
|
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||||
|
// progressLabel.textProperty().unbind();
|
||||||
|
// progressLabel.setText("");
|
||||||
|
// cancel.setVisible(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
task.setOnCancelled(e -> {
|
||||||
|
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
|
||||||
|
// ngramProgressBar.progressProperty().unbind();
|
||||||
|
// ngramProgressBar.setProgress(0.0);
|
||||||
|
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
|
// progressLabel.textProperty().unbind();
|
||||||
|
// progressLabel.setText("");
|
||||||
|
// cancel.setVisible(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
// When cancel button is pressed cancel analysis
|
||||||
|
cancel.setOnAction(e -> {
|
||||||
|
task.cancel();
|
||||||
|
// logger.info("cancel button");
|
||||||
|
});
|
||||||
|
|
||||||
|
// cancel.setVisible(true);
|
||||||
|
return task;
|
||||||
|
}
|
||||||
|
|
||||||
private void execute(StatisticsNew statistic) {
|
private void execute(StatisticsNew statistic) {
|
||||||
logger.info("Started execution: ", statistic.getFilter());
|
Filter f = statistic.getFilter();
|
||||||
|
logger.info("Started execution: ", f);
|
||||||
|
// Task<Void> task_collocability = null;
|
||||||
|
|
||||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||||
boolean corpusIsSplit = corpusFiles.size() > 1;
|
boolean corpusIsSplit = corpusFiles.size() > 1;
|
||||||
|
@ -613,9 +739,23 @@ public class StringAnalysisTabNew2 {
|
||||||
|
|
||||||
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||||
progressLabel.textProperty().bind(task.messageProperty());
|
progressLabel.textProperty().bind(task.messageProperty());
|
||||||
|
|
||||||
task.setOnSucceeded(e -> {
|
task.setOnSucceeded(e -> {
|
||||||
|
if (f.getCollocability().size() > 0) {
|
||||||
try{
|
try{
|
||||||
|
Filter f2 = (Filter) f.clone();
|
||||||
|
f2.setNgramValue(1);
|
||||||
|
StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
|
||||||
|
final Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
|
||||||
|
final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
|
||||||
|
thread_collocability.setDaemon(true);
|
||||||
|
thread_collocability.start();
|
||||||
|
}catch(CloneNotSupportedException c){}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
// System.out.print(statistics);
|
||||||
boolean successullySaved = statistic.saveResultToDisk();
|
boolean successullySaved = statistic.saveResultToDisk();
|
||||||
if (successullySaved) {
|
if (successullySaved) {
|
||||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||||
|
@ -629,6 +769,7 @@ public class StringAnalysisTabNew2 {
|
||||||
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
||||||
logger.error("Out of memory error", e1);
|
logger.error("Out of memory error", e1);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
|
|
|
@ -9,9 +9,7 @@ import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ConcurrentMap;
|
import java.util.concurrent.ConcurrentMap;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import data.CalculateFor;
|
import data.*;
|
||||||
import data.Filter;
|
|
||||||
import data.MultipleHMKeys;
|
|
||||||
import gui.ValidationUtil;
|
import gui.ValidationUtil;
|
||||||
import org.apache.commons.csv.CSVFormat;
|
import org.apache.commons.csv.CSVFormat;
|
||||||
import org.apache.commons.csv.CSVPrinter;
|
import org.apache.commons.csv.CSVPrinter;
|
||||||
|
@ -61,7 +59,9 @@ public class Export {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
||||||
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults, Filter filter) {
|
StatisticsNew statistics, Filter filter) {
|
||||||
|
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult();
|
||||||
|
|
||||||
//Delimiter used in CSV file
|
//Delimiter used in CSV file
|
||||||
String NEW_LINE_SEPARATOR = "\n";
|
String NEW_LINE_SEPARATOR = "\n";
|
||||||
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
|
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
|
||||||
|
@ -96,77 +96,22 @@ public class Export {
|
||||||
FILE_HEADER_AL.add("Lema male črke");
|
FILE_HEADER_AL.add("Lema male črke");
|
||||||
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
|
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
|
||||||
|
|
||||||
// if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) {
|
|
||||||
// if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
|
||||||
// headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
|
||||||
// if (headerInfoBlock.get("Analiza").equals("Besede")){
|
|
||||||
// FILE_HEADER_AL.add("Različnica");
|
|
||||||
// } else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
|
|
||||||
// FILE_HEADER_AL.add("Različnice");
|
|
||||||
// }
|
|
||||||
// } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
|
||||||
// headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
|
|
||||||
// if (headerInfoBlock.get("Analiza").equals("Besede")){
|
|
||||||
// FILE_HEADER_AL.add("Lema");
|
|
||||||
// FILE_HEADER_AL.add("Lema male črke");
|
|
||||||
// } else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
|
|
||||||
// FILE_HEADER_AL.add("Leme");
|
|
||||||
// FILE_HEADER_AL.add("Leme male črke");
|
|
||||||
// }
|
|
||||||
// } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
|
||||||
// headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
|
|
||||||
// if (headerInfoBlock.get("Analiza").equals("Besede")){
|
|
||||||
// FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
|
||||||
// } else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
|
|
||||||
// FILE_HEADER_AL.add("Oblikoskladenjska oznake");
|
|
||||||
// }
|
|
||||||
// } else {
|
|
||||||
// headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
|
||||||
// FILE_HEADER_AL.add("Lema");
|
|
||||||
// FILE_HEADER_AL.add("Lema male črke");
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
// for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) {
|
|
||||||
for (CalculateFor otherKey : filter.getMultipleKeys()) {
|
for (CalculateFor otherKey : filter.getMultipleKeys()) {
|
||||||
FILE_HEADER_AL.add(otherKey.toHeaderString());
|
FILE_HEADER_AL.add(otherKey.toHeaderString());
|
||||||
if (otherKey.equals(CalculateFor.LEMMA))
|
if (otherKey.equals(CalculateFor.LEMMA))
|
||||||
FILE_HEADER_AL.add("Lema male črke");
|
FILE_HEADER_AL.add("Lema male črke");
|
||||||
}
|
}
|
||||||
|
|
||||||
// if(otherKey.equals(CalculateFor.LEMMA)){
|
|
||||||
// FILE_HEADER_AL.add("Lema");
|
|
||||||
// FILE_HEADER_AL.add("Lema male črke");
|
|
||||||
// }
|
|
||||||
// if(otherKey.equals(CalculateFor.WORD_TYPE)){
|
|
||||||
// FILE_HEADER_AL.add("Besedna vrsta");
|
|
||||||
// }
|
|
||||||
// if(otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
||||||
// FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
|
||||||
// }
|
|
||||||
// if(otherKey.equals(CalculateFor.NORMALIZED_WORD)){
|
|
||||||
// FILE_HEADER_AL.add("Normalizirana različnica");
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||||
FILE_HEADER_AL.add(filter.getCalculateFor().toPercentString());
|
FILE_HEADER_AL.add(filter.getCalculateFor().toPercentString());
|
||||||
|
|
||||||
// if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
|
||||||
// FILE_HEADER_AL.add("Delež glede na vse različnice");
|
|
||||||
// } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
|
||||||
// FILE_HEADER_AL.add("Delež glede na vse leme");
|
|
||||||
// } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
|
||||||
// FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
|
|
||||||
// } else {
|
|
||||||
// FILE_HEADER_AL.add("Delež glede na vse leme");
|
|
||||||
// }
|
|
||||||
FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
|
FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
|
||||||
|
|
||||||
|
if (filter.getCollocability().size() > 0){
|
||||||
|
FILE_HEADER_AL.add(filter.getCollocability().get(0).toHeaderString());
|
||||||
|
}
|
||||||
|
|
||||||
for (String key : taxonomyResults.keySet()) {
|
for (String key : taxonomyResults.keySet()) {
|
||||||
if(!key.equals("Total") && num_taxonomy_frequencies.get(key) > 0) {
|
if(!key.equals("Total") && num_taxonomy_frequencies.get(key) > 0) {
|
||||||
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
|
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
|
||||||
|
@ -176,9 +121,6 @@ public class Export {
|
||||||
}
|
}
|
||||||
FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
|
FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
|
||||||
FILE_HEADER_AL.toArray(FILE_HEADER);
|
FILE_HEADER_AL.toArray(FILE_HEADER);
|
||||||
// } else {
|
|
||||||
// FILE_HEADER = new Object[]{"word", "frequency", "percent"};
|
|
||||||
// }
|
|
||||||
|
|
||||||
String fileName = "";
|
String fileName = "";
|
||||||
|
|
||||||
|
@ -250,16 +192,7 @@ public class Export {
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if(!e.getKey().getLemma().equals("")){
|
|
||||||
// dataEntry.add(e.getKey().getLemma());
|
|
||||||
// dataEntry.add(e.getKey().getLemma().toLowerCase());
|
|
||||||
// }
|
|
||||||
// if(!e.getKey().getWordType().equals("")){
|
|
||||||
// dataEntry.add(e.getKey().getWordType());
|
|
||||||
// }
|
|
||||||
// if(!e.getKey().getMsd().equals("")){
|
|
||||||
// dataEntry.add(e.getKey().getMsd());
|
|
||||||
// }
|
|
||||||
dataEntry.add(e.getValue().toString());
|
dataEntry.add(e.getValue().toString());
|
||||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
||||||
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
|
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
|
||||||
|
@ -270,6 +203,11 @@ public class Export {
|
||||||
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
|
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
|
||||||
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
|
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filter.getCollocability().size() > 0){
|
||||||
|
dataEntry.add(String.format("%.4f", statistics.getCollocability().get(filter.getCollocability().get(0)).get(e.getKey())));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write msd separated per letters at the end of each line in csv
|
// Write msd separated per letters at the end of each line in csv
|
||||||
|
|
|
@ -105,8 +105,12 @@
|
||||||
<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
|
<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
|
||||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
||||||
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
|
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
|
||||||
<Label fx:id="solarFilters" layoutX="10.0" layoutY="100.0" text="Izbrani filtri:" />
|
|
||||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="300.0" prefWidth="275.0" text=" " wrapText="true" />
|
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Kolokabilnost" />
|
||||||
|
<CheckComboBox fx:id="collocabilityCCB" layoutX="185.0" layoutY="100.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||||
|
|
||||||
|
<Label fx:id="solarFilters" layoutX="10.0" layoutY="140.0" text="Izbrani filtri:" />
|
||||||
|
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="260.0" prefWidth="275.0" text=" " wrapText="true" />
|
||||||
<!-- samoglasniki/soglasniki -->
|
<!-- samoglasniki/soglasniki -->
|
||||||
<Pane fx:id="paneLetters">
|
<Pane fx:id="paneLetters">
|
||||||
<children>
|
<children>
|
||||||
|
|
Loading…
Reference in New Issue
Block a user