Added collocability functionality - implemented Dice method

This commit is contained in:
2018-10-24 10:36:07 +02:00
parent 1d9e9b7ed6
commit f9ce74d7b8
6 changed files with 290 additions and 108 deletions

View File

@@ -7,10 +7,13 @@ import static gui.Messages.*;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import javafx.application.HostServices;
import javafx.collections.transformation.SortedList;
import org.apache.commons.lang3.SerializationUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -59,6 +62,10 @@ public class StringAnalysisTabNew2 {
private ComboBox<String> calculateForCB;
private CalculateFor calculateFor;
@FXML
private CheckComboBox<String> collocabilityCCB;
private ArrayList<Collocability> collocability;
@FXML
private ComboBox<String> ngramValueCB;
private Integer ngramValue;
@@ -126,6 +133,7 @@ public class StringAnalysisTabNew2 {
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
private static final ObservableList<String> COLLOCABILITY_ITEMS = FXCollections.observableArrayList("Dice");
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
@@ -219,6 +227,21 @@ public class StringAnalysisTabNew2 {
calculateForCB.getSelectionModel().select(0);
// collocabilityCCB
collocabilityCCB.getItems().removeAll();
collocabilityCCB.getItems().setAll(FXCollections.observableArrayList(COLLOCABILITY_ITEMS));
collocabilityCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
collocability = new ArrayList<>();
ObservableList<Collocability> checkedItems = FXCollections.observableArrayList();
for (String el : collocabilityCCB.getCheckModel().getCheckedItems()) {
checkedItems.add(Collocability.factory(el));
}
collocability.addAll(checkedItems);
logger.info(String.format("Selected collocabilities: %s", StringUtils.join(collocabilityCCB.getCheckModel().getCheckedItems(), ",")));
});
collocabilityCCB.getCheckModel().clearChecks();
// msd
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
@@ -535,6 +558,7 @@ public class StringAnalysisTabNew2 {
filter.setMsd(msd);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
filter.setCollocability(collocability);
if (ngramValue != null && ngramValue == 0) {
filter.setStringLength(stringLength);
@@ -545,6 +569,7 @@ public class StringAnalysisTabNew2 {
// no errors
logger.info("Executing: ", filter.toString());
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
// ADD THINGS HERE!!!
execute(statistic);
} else {
logAlert(message);
@@ -585,8 +610,109 @@ public class StringAnalysisTabNew2 {
}
}
// public void calculate_collocabilities(StatisticsNew statistics, StatisticsNew oneWordStatistics) {
// statistics.updateCalculateCollocabilities(oneWordStatistics);
//
// }
private final Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
for (File f : corpusFiles) {
readXML(f.toString(), statisticsOneGrams);
// i++;
// this.updateProgress(i, corpusFiles.size());
// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
return null;
}
};
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
System.out.print("test");
try {
System.out.print(statistic);
// calculate_collocabilities(statistic, statisticsOneGrams);
statistic.updateCalculateCollocabilities(statisticsOneGrams);
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
} else {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1) {
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
logger.error("Out of memory error", e1);
}
// try {
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
// } else {
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
// logger.error("Error while saving", e1);
// } catch (OutOfMemoryError e1){
// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
// logger.error("Out of memory error", e1);
// }
//
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
});
task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
});
task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
// logger.info("cancel button");
});
// cancel.setVisible(true);
return task;
}
private void execute(StatisticsNew statistic) {
logger.info("Started execution: ", statistic.getFilter());
Filter f = statistic.getFilter();
logger.info("Started execution: ", f);
// Task<Void> task_collocability = null;
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
boolean corpusIsSplit = corpusFiles.size() > 1;
@@ -613,21 +739,36 @@ public class StringAnalysisTabNew2 {
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
try {
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
} else {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
if (f.getCollocability().size() > 0) {
try{
Filter f2 = (Filter) f.clone();
f2.setNgramValue(1);
StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
final Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
thread_collocability.setDaemon(true);
thread_collocability.start();
}catch(CloneNotSupportedException c){}
} else {
try {
// System.out.print(statistics);
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
} else {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1) {
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
logger.error("Out of memory error", e1);
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1){
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
logger.error("Out of memory error", e1);
}
ngramProgressBar.progressProperty().unbind();