package util; import alg.XML_processing; import data.*; import gui.I18N; import gui.StringAnalysisTabNew2; import javafx.beans.InvalidationListener; import javafx.beans.Observable; import javafx.beans.property.ReadOnlyDoubleWrapper; import javafx.fxml.FXML; import javafx.scene.control.Alert; import javafx.scene.control.Button; import javafx.scene.control.Label; import javafx.scene.control.ProgressBar; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.io.File; import java.io.UnsupportedEncodingException; import java.util.Collection; import java.util.Date; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; import static gui.GUIController.showAlert; public class Tasks { public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class); private Corpus corpus; private boolean useDb; @FXML private Button cancel; @FXML public ProgressBar ngramProgressBar; @FXML public Label progressLabel; public Tasks(Corpus corpus, boolean useDb, Button cancel, ProgressBar ngramProgressBar, Label progressLabel) { this.corpus = corpus; this.useDb = useDb; this.cancel = cancel; this.ngramProgressBar = ngramProgressBar; this.progressLabel = progressLabel; } public final javafx.concurrent.Task prepareTaskForMinRelFre(StatisticsNew statistic) { Filter f = statistic.getFilter(); logger.info("Started execution: ", f); javafx.concurrent.Task task_collocability = null; try{ Filter f2 = (Filter) f.clone(); f2.setIsMinimalRelFreScraper(true); StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb); Collection corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles(); final javafx.concurrent.Task task = new javafx.concurrent.Task() { @SuppressWarnings("Duplicates") @Override protected Void call() throws Exception { final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statisticsMinRelFre.getCorpus().getCorpusType()); if(multipleFiles){ cancel.setVisible(true); } Date startTime = new Date(); Date previousTime = new Date(); int remainingSeconds = -1; int corpusSize; int i; if(statistic.getFilter().getCollocability().size() > 0){ i = 0; corpusSize = corpusFiles.size() * 3; } else { i = 0; corpusSize = corpusFiles.size() * 2; } for (File f : corpusFiles) { final int iFinal = i; XML_processing xml_processing = new XML_processing(); xml_processing.isCancelled = false; i++; if(xml_processing.progressBarListener != null) { xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); } if (multipleFiles) { if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); previousTime = new Date(); } this.updateProgress(i, corpusSize); this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); } else { xml_processing.progressBarListener = new InvalidationListener() { int remainingSeconds = -1; Date previousTime = new Date(); @Override public void invalidated(Observable observable) { cancel.setVisible(true); if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); previousTime = new Date(); } xml_processing.isCancelled = isCancelled(); updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); } }; xml_processing.progressProperty().addListener(xml_processing.progressBarListener); } xml_processing.readXML(f.toString(), statisticsMinRelFre); if (isCancelled()) { updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); break; } if(!(multipleFiles)){ cancel.setVisible(false); } } // add remaining minRelFre results if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) { long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue(); double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; statisticsMinRelFre.updateMinimalRelFre(statisticsMinRelFre.getTaxonomyResult().get(statisticsMinRelFre.getCorpus().getTotal()).entrySet(), absToRelFactor); // reset all values for(Taxonomy taxonomy : statisticsMinRelFre.getTaxonomyResult().keySet()){ statisticsMinRelFre.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>()); } for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){ statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0)); } } return null; } }; ngramProgressBar.progressProperty().bind(task.progressProperty()); progressLabel.textProperty().bind(task.messageProperty()); task.setOnSucceeded(e -> { statistic.updateMinimalRelFre(statisticsMinRelFre.getMinimalRelFreNgrams(), statisticsMinRelFre.getMinimalRelFre1grams()); final javafx.concurrent.Task taskCollocability = prepareMainTask(statistic); final Thread thread_collocability = new Thread(taskCollocability, "task_collocability"); thread_collocability.setDaemon(true); thread_collocability.start(); }); task.setOnFailed(e -> { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); logger.error("Error while executing", e); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnCancelled(e -> { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); // When cancel button is pressed cancel analysis cancel.setOnAction(e -> { task.cancel(); logger.info("cancel button"); }); return task; }catch(CloneNotSupportedException c){ return null; } } public final javafx.concurrent.Task prepareMainTask(StatisticsNew statistic) { Filter f = statistic.getFilter(); logger.info("Started execution: ", f); javafx.concurrent.Task task_collocability = null; Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); final javafx.concurrent.Task task = new javafx.concurrent.Task() { @SuppressWarnings("Duplicates") @Override protected Void call() throws Exception { final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); if(multipleFiles){ cancel.setVisible(true); } Date startTime = new Date(); Date previousTime = new Date(); int remainingSeconds = -1; int corpusSize; int i; int taskIndex = 0; if(statistic.getFilter().getCollocability().size() > 0 && statistic.getFilter().getMinimalRelFre() > 1){ i = corpusFiles.size(); corpusSize = corpusFiles.size() * 3; } else if (statistic.getFilter().getMinimalRelFre() > 1) { i = corpusFiles.size(); corpusSize = corpusFiles.size() * 2; } else if (statistic.getFilter().getCollocability().size() > 0) { i = 0; corpusSize = corpusFiles.size() * 2; } else { i = 0; corpusSize = corpusFiles.size(); } for (File f : corpusFiles) { final int iFinal = i; XML_processing xml_processing = new XML_processing(); xml_processing.isCancelled = false; i++; taskIndex++; if(xml_processing.progressBarListener != null) { xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); } if (multipleFiles) { if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000); previousTime = new Date(); } this.updateProgress(i, corpusSize); this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); } else { xml_processing.progressBarListener = new InvalidationListener() { int remainingSeconds = -1; Date previousTime = new Date(); @Override public void invalidated(Observable observable) { cancel.setVisible(true); if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); previousTime = new Date(); } xml_processing.isCancelled = isCancelled(); updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); } }; xml_processing.progressProperty().addListener(xml_processing.progressBarListener); } xml_processing.readXML(f.toString(), statistic); if (isCancelled()) { updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); break; } if(!(multipleFiles)){ cancel.setVisible(false); } } // if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing if (statistic.getFilter().getMinimalRelFre() > 1){ long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue(); double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; for(Map.Entry entry : statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet()){ if(entry.getValue().longValue() < absToRelFactor){ statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).remove(entry.getKey()); } } statistic.updateMinimalRelFre(statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet(), absToRelFactor); } return null; } }; ngramProgressBar.progressProperty().bind(task.progressProperty()); progressLabel.textProperty().bind(task.messageProperty()); task.setOnSucceeded(e -> { if (f.getCollocability().size() > 0) { try{ Filter f2 = (Filter) f.clone(); f2.setNgramValue(1); StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb); final javafx.concurrent.Task taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams); final Thread thread_collocability = new Thread(taskCollocability, "task_collocability"); thread_collocability.setDaemon(true); thread_collocability.start(); }catch(CloneNotSupportedException c){} } else { try { boolean successullySaved = statistic.saveResultToDisk(); if (successullySaved) { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); } else { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); } } catch (UnsupportedEncodingException e1) { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); logger.error("Error while saving", e1); } catch (OutOfMemoryError e1) { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); logger.error("Out of memory error", e1); } ngramProgressBar.progressProperty().unbind(); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); } }); task.setOnFailed(e -> { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); logger.error("Error while executing", e); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnCancelled(e -> { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); // When cancel button is pressed cancel analysis cancel.setOnAction(e -> { task.cancel(); logger.info("cancel button"); }); return task; } public final javafx.concurrent.Task prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) { Collection corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles(); final javafx.concurrent.Task task = new javafx.concurrent.Task() { @SuppressWarnings("Duplicates") @Override protected Void call() throws Exception { final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); if(multipleFiles){ cancel.setVisible(true); } Date startTime = new Date(); Date previousTime = new Date(); int remainingSeconds = -1; int corpusSize; int i; int taskIndex = 0; if(statistic.getFilter().getMinimalRelFre() > 1){ i = corpusFiles.size() * 2; corpusSize = corpusFiles.size() * 3; } else { i = corpusFiles.size(); corpusSize = corpusFiles.size() * 2; } for (File f : corpusFiles) { final int iFinal = i; XML_processing xml_processing = new XML_processing(); i++; taskIndex++; if(xml_processing.progressBarListener != null) { xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); } if (multipleFiles) { if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000); previousTime = new Date(); } this.updateProgress(i, corpusSize); this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); } else { xml_processing.progressBarListener = new InvalidationListener() { int remainingSeconds = -1; Date previousTime = new Date(); @Override public void invalidated(Observable observable) { cancel.setVisible(true); if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); previousTime = new Date(); } xml_processing.isCancelled = isCancelled(); updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); } }; xml_processing.progressProperty().addListener(xml_processing.progressBarListener); } xml_processing.isCollocability = true; xml_processing.readXML(f.toString(), statisticsOneGrams); xml_processing.isCollocability = false; if (isCancelled()) { updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); break; } } return null; } }; ngramProgressBar.progressProperty().bind(task.progressProperty()); progressLabel.textProperty().bind(task.messageProperty()); task.setOnSucceeded(e -> { try { System.out.print(statistic); statistic.updateCalculateCollocabilities(statisticsOneGrams); boolean successullySaved = statistic.saveResultToDisk(); if (successullySaved) { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); } else { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); } } catch (UnsupportedEncodingException e1) { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); logger.error("Error while saving", e1); } catch (OutOfMemoryError e1) { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); logger.error("Out of memory error", e1); } ngramProgressBar.progressProperty().unbind(); // ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnFailed(e -> { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); logger.error("Error while executing", e); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnCancelled(e -> { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); // When cancel button is pressed cancel analysis cancel.setOnAction(e -> { task.cancel(); }); return task; } }