You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

484 lines
24 KiB

package util;
import alg.XML_processing;
import data.*;
import gui.I18N;
import gui.StringAnalysisTabNew2;
import javafx.beans.InvalidationListener;
import javafx.beans.Observable;
import javafx.beans.property.ReadOnlyDoubleWrapper;
import javafx.fxml.FXML;
import javafx.scene.control.Alert;
import javafx.scene.control.Button;
import javafx.scene.control.Label;
import javafx.scene.control.ProgressBar;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.Collection;
import java.util.Date;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import static gui.GUIController.showAlert;
public class Tasks {
public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class);
private Corpus corpus;
private boolean useDb;
@FXML
private Button cancel;
@FXML
public ProgressBar ngramProgressBar;
@FXML
public Label progressLabel;
public Tasks(Corpus corpus, boolean useDb, Button cancel, ProgressBar ngramProgressBar, Label progressLabel) {
this.corpus = corpus;
this.useDb = useDb;
this.cancel = cancel;
this.ngramProgressBar = ngramProgressBar;
this.progressLabel = progressLabel;
}
public final javafx.concurrent.Task<Void> prepareTaskForMinRelFre(StatisticsNew statistic) {
Filter f = statistic.getFilter();
logger.info("Started execution: ", f);
javafx.concurrent.Task<Void> task_collocability = null;
try{
Filter f2 = (Filter) f.clone();
f2.setIsMinimalRelFreScraper(true);
StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb);
Collection<File> corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles();
final javafx.concurrent.Task<Void> task = new javafx.concurrent.Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statisticsMinRelFre.getCorpus().getCorpusType());
if(multipleFiles){
cancel.setVisible(true);
}
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
int corpusSize;
int i;
if(statistic.getFilter().getCollocability().size() > 0){
i = 0;
corpusSize = corpusFiles.size() * 3;
} else {
i = 0;
corpusSize = corpusFiles.size() * 2;
}
for (File f : corpusFiles) {
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
xml_processing.isCancelled = false;
i++;
if(xml_processing.progressBarListener != null) {
xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
}
if (multipleFiles) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
} else {
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
}
xml_processing.readXML(f.toString(), statisticsMinRelFre);
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
if(!(multipleFiles)){
cancel.setVisible(false);
}
}
// add remaining minRelFre results
if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) {
long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue();
double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
statisticsMinRelFre.updateMinimalRelFre(statisticsMinRelFre.getTaxonomyResult().get(statisticsMinRelFre.getCorpus().getTotal()).entrySet(), absToRelFactor);
// reset all values
for(Taxonomy taxonomy : statisticsMinRelFre.getTaxonomyResult().keySet()){
statisticsMinRelFre.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>());
}
for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){
statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
}
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
statistic.updateMinimalRelFre(statisticsMinRelFre.getMinimalRelFreNgrams(), statisticsMinRelFre.getMinimalRelFre1grams());
final javafx.concurrent.Task<Void> taskCollocability = prepareMainTask(statistic);
final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
thread_collocability.setDaemon(true);
thread_collocability.start();
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
logger.info("cancel button");
});
return task;
}catch(CloneNotSupportedException c){ return null; }
}
public final javafx.concurrent.Task<Void> prepareMainTask(StatisticsNew statistic) {
Filter f = statistic.getFilter();
logger.info("Started execution: ", f);
javafx.concurrent.Task<Void> task_collocability = null;
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
final javafx.concurrent.Task<Void> task = new javafx.concurrent.Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
if(multipleFiles){
cancel.setVisible(true);
}
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
int corpusSize;
int i;
int taskIndex = 0;
if(statistic.getFilter().getCollocability().size() > 0 && statistic.getFilter().getMinimalRelFre() > 1){
i = corpusFiles.size();
corpusSize = corpusFiles.size() * 3;
} else if (statistic.getFilter().getMinimalRelFre() > 1) {
i = corpusFiles.size();
corpusSize = corpusFiles.size() * 2;
} else if (statistic.getFilter().getCollocability().size() > 0) {
i = 0;
corpusSize = corpusFiles.size() * 2;
} else {
i = 0;
corpusSize = corpusFiles.size();
}
for (File f : corpusFiles) {
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
xml_processing.isCancelled = false;
i++;
taskIndex++;
if(xml_processing.progressBarListener != null) {
xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
}
if (multipleFiles) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
} else {
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
}
xml_processing.readXML(f.toString(), statistic);
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
if(!(multipleFiles)){
cancel.setVisible(false);
}
}
// if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing
if (statistic.getFilter().getMinimalRelFre() > 1){
long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue();
double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
for(Map.Entry<MultipleHMKeys, AtomicLong> entry : statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet()){
if(entry.getValue().longValue() < absToRelFactor){
statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).remove(entry.getKey());
}
}
statistic.updateMinimalRelFre(statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet(), absToRelFactor);
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
if (f.getCollocability().size() > 0) {
try{
Filter f2 = (Filter) f.clone();
f2.setNgramValue(1);
StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
final javafx.concurrent.Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
thread_collocability.setDaemon(true);
thread_collocability.start();
}catch(CloneNotSupportedException c){}
} else {
try {
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
} else {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
logger.error("Out of memory error", e1);
}
ngramProgressBar.progressProperty().unbind();
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
}
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
logger.info("cancel button");
});
return task;
}
public final javafx.concurrent.Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
final javafx.concurrent.Task<Void> task = new javafx.concurrent.Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
if(multipleFiles){
cancel.setVisible(true);
}
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
int corpusSize;
int i;
int taskIndex = 0;
if(statistic.getFilter().getMinimalRelFre() > 1){
i = corpusFiles.size() * 2;
corpusSize = corpusFiles.size() * 3;
} else {
i = corpusFiles.size();
corpusSize = corpusFiles.size() * 2;
}
for (File f : corpusFiles) {
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
i++;
taskIndex++;
if(xml_processing.progressBarListener != null) {
xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
}
if (multipleFiles) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
} else {
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
}
xml_processing.isCollocability = true;
xml_processing.readXML(f.toString(), statisticsOneGrams);
xml_processing.isCollocability = false;
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
try {
System.out.print(statistic);
statistic.updateCalculateCollocabilities(statisticsOneGrams);
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
} else {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
logger.error("Out of memory error", e1);
}
ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
});
return task;
}
}