diff --git a/src/main/java/alg/XML_processing.java b/src/main/java/alg/XML_processing.java index 9d31002..ffd14f8 100755 --- a/src/main/java/alg/XML_processing.java +++ b/src/main/java/alg/XML_processing.java @@ -6,6 +6,7 @@ import java.io.*; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.atomic.AtomicLong; import javax.xml.namespace.QName; import javax.xml.stream.XMLEventReader; @@ -178,6 +179,26 @@ public class XML_processing { // alg.inflectedJOS.ForkJoin wc = new alg.inflectedJOS.ForkJoin(corpus, stats); // pool.invoke(wc); } + + // if running with minimalRelFre frequency erase all ngrams with occurrences lower than set value per 1M + if(stats.getFilter().getIsMinimalRelFreScraper()) { +// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() + + long countFor1MWords = stats.getUniGramOccurrences().get(stats.getCorpus().getTotal()).longValue(); + if(countFor1MWords > 1000000L){ + double absToRelFactor = (stats.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; + + stats.updateMinimalRelFre(stats.getTaxonomyResult().get(stats.getCorpus().getTotal()).entrySet(), absToRelFactor); + + // reset all values + for(Taxonomy taxonomy : stats.getTaxonomyResult().keySet()){ + stats.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>()); + } + for(Taxonomy taxonomy : stats.getUniGramOccurrences().keySet()){ + stats.getUniGramOccurrences().put(taxonomy, new AtomicLong(0)); + } + } +// System.out.println("asd"); + } } // public static void readXMLGos(String path, Statistics stats) { diff --git a/src/main/java/data/Filter.java b/src/main/java/data/Filter.java index 33c9f66..f8f6128 100755 --- a/src/main/java/data/Filter.java +++ b/src/main/java/data/Filter.java @@ -29,6 +29,8 @@ public class Filter implements Cloneable { NOTE_PUNCTUATIONS, MINIMAL_OCCURRENCES, MINIMAL_TAXONOMY, + MINIMAL_REL_FRE, + IS_MINIMAL_REL_FRE_SCRAPER, TAXONOMY_SET_OPERATION, COLLOCABILITY, PREFIX_LENGTH, @@ -41,6 +43,7 @@ public class Filter implements Cloneable { filter = new HashMap<>(); filter.put(WRITE_MSD_AT_THE_END, false); filter.put(WORD_PARTS, new ArrayList()); + filter.put(IS_MINIMAL_REL_FRE_SCRAPER, false); } public Filter(AnalysisLevel al, CalculateFor cf) { @@ -258,6 +261,24 @@ public class Filter implements Cloneable { return (Integer) filter.get(MINIMAL_TAXONOMY); } + + public void setMinimalRelFre(Integer minimalRelFre) { + filter.put(MINIMAL_REL_FRE, minimalRelFre); + } + + public Integer getMinimalRelFre() { + return (Integer) filter.get(MINIMAL_REL_FRE); + } + + + public void setIsMinimalRelFreScraper(boolean isMinimalRelFreScraper) { + filter.put(IS_MINIMAL_REL_FRE_SCRAPER, isMinimalRelFreScraper); + } + + public boolean getIsMinimalRelFreScraper() { + return (boolean) filter.get(IS_MINIMAL_REL_FRE_SCRAPER); + } + // PREFIX_LENGTH, // SUFFIX_LENGTH, // PREFIX_LIST, diff --git a/src/main/java/data/MultipleHMKeys.java b/src/main/java/data/MultipleHMKeys.java index 026df1a..435decf 100755 --- a/src/main/java/data/MultipleHMKeys.java +++ b/src/main/java/data/MultipleHMKeys.java @@ -66,4 +66,6 @@ public interface MultipleHMKeys { .thenComparing(MultipleHMKeys::getK5) .compare(this, othr); } + + MultipleHMKeys[] splitNgramTo1grams(); } diff --git a/src/main/java/data/MultipleHMKeys1.java b/src/main/java/data/MultipleHMKeys1.java index 0ca07d9..e2cbce1 100755 --- a/src/main/java/data/MultipleHMKeys1.java +++ b/src/main/java/data/MultipleHMKeys1.java @@ -36,4 +36,13 @@ public final class MultipleHMKeys1 implements MultipleHMKeys { public boolean equals(Object obj) { return (obj instanceof MultipleHMKeys1) && ((MultipleHMKeys1) obj).k1.equals(k1); } + + public MultipleHMKeys[] splitNgramTo1grams(){ + String[] k1 = getK1().split(" "); + MultipleHMKeys[] res = new MultipleHMKeys[k1.length]; + for(int i = 0; i < k1.length; i++){ + res[i] = new MultipleHMKeys1(k1[i]); + } + return res; + } } diff --git a/src/main/java/data/MultipleHMKeys2.java b/src/main/java/data/MultipleHMKeys2.java index 852d2e0..1ba1d60 100755 --- a/src/main/java/data/MultipleHMKeys2.java +++ b/src/main/java/data/MultipleHMKeys2.java @@ -46,4 +46,14 @@ public final class MultipleHMKeys2 implements MultipleHMKeys { // return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key.equals(key); } + + public MultipleHMKeys[] splitNgramTo1grams(){ + String[] k1 = getK1().split(" "); + String[] k2 = getK2().split(" "); + MultipleHMKeys[] res = new MultipleHMKeys[k1.length]; + for(int i = 0; i < k1.length; i++){ + res[i] = new MultipleHMKeys2(k1[i], k2[i]); + } + return res; + } } diff --git a/src/main/java/data/MultipleHMKeys3.java b/src/main/java/data/MultipleHMKeys3.java index a39e617..5c0a1b6 100755 --- a/src/main/java/data/MultipleHMKeys3.java +++ b/src/main/java/data/MultipleHMKeys3.java @@ -50,4 +50,15 @@ public final class MultipleHMKeys3 implements MultipleHMKeys { && ((MultipleHMKeys3) obj).k2.equals(k2) && ((MultipleHMKeys3) obj).k3.equals(k3); } + + public MultipleHMKeys[] splitNgramTo1grams(){ + String[] k1 = getK1().split(" "); + String[] k2 = getK2().split(" "); + String[] k3 = getK3().split(" "); + MultipleHMKeys[] res = new MultipleHMKeys[k1.length]; + for(int i = 0; i < k1.length; i++){ + res[i] = new MultipleHMKeys3(k1[i], k2[i], k3[i]); + } + return res; + } } diff --git a/src/main/java/data/MultipleHMKeys4.java b/src/main/java/data/MultipleHMKeys4.java index 3dbc91b..c7862fc 100755 --- a/src/main/java/data/MultipleHMKeys4.java +++ b/src/main/java/data/MultipleHMKeys4.java @@ -58,4 +58,16 @@ public final class MultipleHMKeys4 implements MultipleHMKeys { && ((MultipleHMKeys4) obj).k3.equals(k3) && ((MultipleHMKeys4) obj).k4.equals(k4); } + + public MultipleHMKeys[] splitNgramTo1grams(){ + String[] k1 = getK1().split(" "); + String[] k2 = getK2().split(" "); + String[] k3 = getK3().split(" "); + String[] k4 = getK4().split(" "); + MultipleHMKeys[] res = new MultipleHMKeys[k1.length]; + for(int i = 0; i < k1.length; i++){ + res[i] = new MultipleHMKeys4(k1[i], k2[i], k3[i], k4[i]); + } + return res; + } } diff --git a/src/main/java/data/MultipleHMKeys5.java b/src/main/java/data/MultipleHMKeys5.java index 0c9270f..b999f9f 100755 --- a/src/main/java/data/MultipleHMKeys5.java +++ b/src/main/java/data/MultipleHMKeys5.java @@ -66,4 +66,17 @@ public final class MultipleHMKeys5 implements MultipleHMKeys { && ((MultipleHMKeys5) obj).k4.equals(k4) && ((MultipleHMKeys5) obj).k5.equals(k5); } + + public MultipleHMKeys[] splitNgramTo1grams(){ + String[] k1 = getK1().split(" "); + String[] k2 = getK2().split(" "); + String[] k3 = getK3().split(" "); + String[] k4 = getK4().split(" "); + String[] k5 = getK5().split(" "); + MultipleHMKeys[] res = new MultipleHMKeys[k1.length]; + for(int i = 0; i < k1.length; i++){ + res[i] = new MultipleHMKeys5(k1[i], k2[i], k3[i], k4[i], k5[i]); + } + return res; + } } diff --git a/src/main/java/data/StatisticsNew.java b/src/main/java/data/StatisticsNew.java index 5b02c12..215e2ba 100755 --- a/src/main/java/data/StatisticsNew.java +++ b/src/main/java/data/StatisticsNew.java @@ -46,6 +46,9 @@ public class StatisticsNew { private Map> collocability; private Map uniGramTaxonomyOccurrences; + private HashSet minimalRelFreNgrams; + private HashSet minimalRelFre1grams; + public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) { this.corpus = corpus; this.filter = filter; @@ -54,6 +57,9 @@ public class StatisticsNew { this.collocability = new ConcurrentHashMap<>(); this.uniGramTaxonomyOccurrences = new ConcurrentHashMap<>(); this.uniGramTaxonomyOccurrences.put(corpus.getTotal(), new AtomicLong(0L)); + this.minimalRelFreNgrams = new HashSet<>(); + this.minimalRelFre1grams = new HashSet<>(); + // create table for counting word occurrences per taxonomies @@ -373,6 +379,10 @@ public class StatisticsNew { } public void updateTaxonomyResults(MultipleHMKeys o, List taxonomy) { + if(minimalRelFreNgrams.size() > 0 && !filter.getIsMinimalRelFreScraper() && !(minimalRelFreNgrams.contains(o) || minimalRelFre1grams.contains(o))) { + return; + } + for (Taxonomy key : taxonomyResult.keySet()) { // first word should have the same taxonomy as others if (key.equals(corpus.getTotal()) || taxonomy.contains(key)) { @@ -472,6 +482,28 @@ public class StatisticsNew { } } + public HashSet getMinimalRelFreNgrams() { + return minimalRelFreNgrams; + } + + public HashSet getMinimalRelFre1grams() { + return minimalRelFre1grams; + } + + public void updateMinimalRelFre(HashSet hsNgrams, HashSet hs1grams) { + minimalRelFreNgrams = hsNgrams; + minimalRelFre1grams = hs1grams; + } + + public void updateMinimalRelFre(Set> entries, double absToRelFactor) { + for(Map.Entry entry : entries){ + if(entry.getValue().longValue() >= absToRelFactor){ + minimalRelFreNgrams.add(entry.getKey()); + minimalRelFre1grams.addAll(Arrays.asList(entry.getKey().splitNgramTo1grams())); + } + } + } + private LinkedHashMap headerInfoBlock() { LinkedHashMap info = new LinkedHashMap<>(); diff --git a/src/main/java/gui/CorpusTab.java b/src/main/java/gui/CorpusTab.java index a838de0..45df198 100755 --- a/src/main/java/gui/CorpusTab.java +++ b/src/main/java/gui/CorpusTab.java @@ -144,7 +144,7 @@ public class CorpusTab { private String corpusLocation; private String corpusFilesSize; - private static final String [] SELECT_READER_ARRAY = {"vert", "Solar", "GOS", "SSJ500K", "Gigafida", "Gigafida (old)"}; + private static final String [] SELECT_READER_ARRAY = {"VERT + REGI", "XML (Šolar 1.0)", "XML (GOS 1.0)", "XML (ssj500k 2.1)", "XML (Gigafida 2.0)", "XML (Gigafida 1.0, Kres 1.0)"}; private static final ArrayList SELECT_READER = new ArrayList<>(Arrays.asList(SELECT_READER_ARRAY)); private Collection corpusFiles; private File selectedDirectory; @@ -798,22 +798,22 @@ public class CorpusTab { private void selectReader() { switch (selectReader) { // "vert", "Solar", "GOS", "SSJ500K", "Gigafida", "Gigafida (old)", "Kres (old)" - case "vert": + case "VERT + REGI": corpusType = VERT; break; - case "Solar": + case "XML (Šolar 1.0)": corpusType = SOLAR; break; - case "GOS": + case "XML (GOS 1.0)": corpusType = GOS; break; - case "SSJ500K": + case "XML (ssj500k 2.1)": corpusType = SSJ500K; break; - case "Gigafida": + case "XML (Gigafida 2.0)": corpusType = GIGAFIDA2; break; - case "Gigafida (old)": + case "XML (Gigafida 1.0, Kres 1.0)": corpusType = GIGAFIDA; break; default: diff --git a/src/main/java/gui/GUIController.java b/src/main/java/gui/GUIController.java index 741d002..a60b695 100755 --- a/src/main/java/gui/GUIController.java +++ b/src/main/java/gui/GUIController.java @@ -182,7 +182,7 @@ public class GUIController extends Application { alert.showAndWait(); } - static void showAlert(Alert.AlertType alertType, String headerText) { + public static void showAlert(Alert.AlertType alertType, String headerText) { showAlert(alertType, headerText, null); } } \ No newline at end of file diff --git a/src/main/java/gui/OneWordAnalysisTab.java b/src/main/java/gui/OneWordAnalysisTab.java index d925534..346c3e9 100755 --- a/src/main/java/gui/OneWordAnalysisTab.java +++ b/src/main/java/gui/OneWordAnalysisTab.java @@ -21,6 +21,8 @@ import org.apache.logging.log4j.Logger; import org.controlsfx.control.CheckComboBox; import javafx.scene.image.ImageView; +import util.Tasks; + import java.io.File; import java.io.UnsupportedEncodingException; import java.util.*; @@ -74,6 +76,9 @@ public class OneWordAnalysisTab { @FXML public Label minimalTaxonomyL; + @FXML + public Label minimalRelFreL; + @FXML public Label taxonomySetOperationL; @@ -104,6 +109,9 @@ public class OneWordAnalysisTab { @FXML public ImageView minimalTaxonomyI; + @FXML + public ImageView minimalRelFreI; + @FXML public ImageView taxonomySetOperationI; @@ -144,6 +152,10 @@ public class OneWordAnalysisTab { private TextField minimalTaxonomyTF; private Integer minimalTaxonomy; + @FXML + private TextField minimalRelFreTF; + private Integer minimalRelFre; + @FXML private ComboBox taxonomySetOperationCB; private String taxonomySetOperation; @@ -559,6 +571,29 @@ public class OneWordAnalysisTab { } }); + // set default values + minimalRelFreTF.setText("1"); + minimalRelFre = 1; + + minimalRelFreTF.focusedProperty().addListener((observable, oldValue, newValue) -> { + if (!newValue) { + // focus lost + String value = minimalRelFreTF.getText(); + if (!ValidationUtil.isEmpty(value)) { + if (!ValidationUtil.isNumber(value)) { + logAlert("minimalRelFreTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); + GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); + } else { + minimalRelFre = Integer.parseInt(value); + } + } else { + minimalRelFreTF.setText("1"); + minimalRelFre = 1; + } + } + }); + + changeLanguageB.setOnAction(e -> { if (I18N.getLocale() == new Locale.Builder().setLanguage("sl").setRegion("SI").build()){ I18N.setLocale(Locale.ENGLISH); @@ -680,6 +715,7 @@ public class OneWordAnalysisTab { taxonomyL.textProperty().bind(I18N.createStringBinding("label.taxonomy")); minimalOccurrencesL.textProperty().bind(I18N.createStringBinding("label.minimalOccurrences")); minimalTaxonomyL.textProperty().bind(I18N.createStringBinding("label.minimalTaxonomy")); + minimalRelFreL.textProperty().bind(I18N.createStringBinding("label.minimalRelFre")); solarFilters.textProperty().bind(I18N.createStringBinding("label.solarFilters")); taxonomySetOperationL.textProperty().bind(I18N.createStringBinding("label.taxonomySetOperation")); @@ -693,6 +729,7 @@ public class OneWordAnalysisTab { addTooltipToImage(taxonomyI, I18N.createStringBinding("label.word.taxonomyH")); addTooltipToImage(minimalOccurrencesI, I18N.createStringBinding("label.word.minimalOccurrencesH")); addTooltipToImage(minimalTaxonomyI, I18N.createStringBinding("label.word.minimalTaxonomyH")); + addTooltipToImage(minimalRelFreI, I18N.createStringBinding("label.wordPart.minimalRelFreH")); addTooltipToImage(taxonomySetOperationI, I18N.createStringBinding("label.letter.taxonomySetOperationH")); taxonomySetOperationCB.itemsProperty().bind(I18N.createObjectBinding(TAXONOMY_SET_OPERATION)); @@ -750,6 +787,7 @@ public class OneWordAnalysisTab { filter.setMsd(msd); filter.setMinimalOccurrences(minimalOccurrences); filter.setMinimalTaxonomy(minimalTaxonomy); + filter.setMinimalRelFre(minimalRelFre); filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd); filter.setTaxonomySetOperation(taxonomySetOperation); @@ -803,123 +841,138 @@ public class OneWordAnalysisTab { Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); - final Task task = new Task() { - @SuppressWarnings("Duplicates") - @Override - protected Void call() throws Exception { - final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); - if(multipleFiles){ - cancel.setVisible(true); - } - int i = 0; - Date startTime = new Date(); - Date previousTime = new Date(); - int remainingSeconds = -1; - for (File f : corpusFiles) { - final int iFinal = i; - XML_processing xml_processing = new XML_processing(); - xml_processing.isCancelled = false; - i++; - if(xml_processing.progressBarListener != null) { - xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); - } - if (multipleFiles) { - if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ - remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000); - previousTime = new Date(); - } - this.updateProgress(i, corpusFiles.size()); - this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds)); -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; +// final Task task = new Task() { +// @SuppressWarnings("Duplicates") +// @Override +// protected Void call() throws Exception { +// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); +// if(multipleFiles){ +// cancel.setVisible(true); +// } +// int i = 0; +// Date startTime = new Date(); +// Date previousTime = new Date(); +// int remainingSeconds = -1; +// for (File f : corpusFiles) { +// final int iFinal = i; +// XML_processing xml_processing = new XML_processing(); +// xml_processing.isCancelled = false; +// i++; +// if(xml_processing.progressBarListener != null) { +// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); +// } +// if (multipleFiles) { +// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ +// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000); +// previousTime = new Date(); // } - } else { - - xml_processing.progressBarListener = new InvalidationListener() { - int remainingSeconds = -1; - Date previousTime = new Date(); - @Override - public void invalidated(Observable observable) { - cancel.setVisible(true); - if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ - remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * - (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * - ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); - previousTime = new Date(); - } - xml_processing.isCancelled = isCancelled(); - updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100); - updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds)); - } - }; - - xml_processing.progressProperty().addListener(xml_processing.progressBarListener); - } - xml_processing.readXML(f.toString(), statistic); - if (isCancelled()) { - updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); - break; - } - } - - return null; - } - }; - - ngramProgressBar.progressProperty().bind(task.progressProperty()); - progressLabel.textProperty().bind(task.messageProperty()); - - task.setOnSucceeded(e -> { - try { - boolean successullySaved = statistic.saveResultToDisk(); - if (successullySaved) { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); - } else { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); - } - } catch (UnsupportedEncodingException e1) { - showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); - logger.error("Error while saving", e1); - } - - ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); - }); - - task.setOnFailed(e -> { - showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); - logger.error("Error while executing", e); - ngramProgressBar.progressProperty().unbind(); - ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); - }); - - task.setOnCancelled(e -> { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); - ngramProgressBar.progressProperty().unbind(); - ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); - }); - - // When cancel button is pressed cancel analysis - cancel.setOnAction(e -> { - task.cancel(); - logger.info("cancel button"); - }); - - final Thread thread = new Thread(task, "task"); - thread.setDaemon(true); - thread.start(); +// this.updateProgress(i, corpusFiles.size()); +// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds)); +//// if (isCancelled()) { +//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +//// break; +//// } +// } else { +// +// xml_processing.progressBarListener = new InvalidationListener() { +// int remainingSeconds = -1; +// Date previousTime = new Date(); +// @Override +// public void invalidated(Observable observable) { +// cancel.setVisible(true); +// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ +// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * +// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * +// ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); +// previousTime = new Date(); +// } +// xml_processing.isCancelled = isCancelled(); +// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100); +// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds)); +// } +// }; +// +// xml_processing.progressProperty().addListener(xml_processing.progressBarListener); +// } +// xml_processing.readXML(f.toString(), statistic); +// if (isCancelled()) { +// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +// break; +// } +// } +// +// return null; +// } +// }; +// +// ngramProgressBar.progressProperty().bind(task.progressProperty()); +// progressLabel.textProperty().bind(task.messageProperty()); +// +// task.setOnSucceeded(e -> { +// try { +// boolean successullySaved = statistic.saveResultToDisk(); +// if (successullySaved) { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); +// } else { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); +// } +// } catch (UnsupportedEncodingException e1) { +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); +// logger.error("Error while saving", e1); +// } +// +// ngramProgressBar.progressProperty().unbind(); +//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); +// +// task.setOnFailed(e -> { +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); +// logger.error("Error while executing", e); +// ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setProgress(0.0); +//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); +// +// task.setOnCancelled(e -> { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); +// ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setProgress(0.0); +//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); +// +// // When cancel button is pressed cancel analysis +// cancel.setOnAction(e -> { +// task.cancel(); +// logger.info("cancel button"); +// }); + +// final Thread thread = new Thread(task, "task"); +// thread.setDaemon(true); +// thread.start(); + + Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel); + if (statistic.getFilter().getMinimalRelFre() > 1){ + final Task mainTask = t.prepareTaskForMinRelFre(statistic); +// final Task mainTask = prepareTaskForMinRelFre(statistic); + final Thread thread = new Thread(mainTask, "task"); + thread.setDaemon(true); + thread.start(); + } else { + final Task mainTask = t.prepareMainTask(statistic); +// final Task mainTask = prepareMainTask(statistic); + final Thread thread = new Thread(mainTask, "task"); + thread.setDaemon(true); + thread.start(); + } } public void setSolarFiltersMap(HashMap> solarFiltersMap) { diff --git a/src/main/java/gui/StringAnalysisTabNew2.java b/src/main/java/gui/StringAnalysisTabNew2.java index 865beff..38af7a4 100755 --- a/src/main/java/gui/StringAnalysisTabNew2.java +++ b/src/main/java/gui/StringAnalysisTabNew2.java @@ -6,6 +6,8 @@ import static gui.GUIController.*; import java.io.File; import java.io.UnsupportedEncodingException; import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Pattern; import alg.XML_processing; @@ -31,6 +33,7 @@ import javafx.concurrent.Task; import javafx.fxml.FXML; import javafx.scene.control.*; import javafx.scene.layout.Pane; +import util.Tasks; @SuppressWarnings("Duplicates") public class StringAnalysisTabNew2 { @@ -69,6 +72,9 @@ public class StringAnalysisTabNew2 { @FXML public Label minimalTaxonomyL; + @FXML + public Label minimalRelFreL; + @FXML public Label taxonomySetOperationL; @@ -111,6 +117,9 @@ public class StringAnalysisTabNew2 { @FXML public ImageView minimalTaxonomyI; + @FXML + public ImageView minimalRelFreI; + @FXML public ImageView taxonomySetOperationI; @@ -179,6 +188,10 @@ public class StringAnalysisTabNew2 { private TextField minimalTaxonomyTF; private Integer minimalTaxonomy; + @FXML + private TextField minimalRelFreTF; + private Integer minimalRelFre; + @FXML private ComboBox taxonomySetOperationCB; private String taxonomySetOperation; @@ -685,6 +698,29 @@ public class StringAnalysisTabNew2 { } }); + // set default values + minimalRelFreTF.setText("1"); + minimalRelFre = 1; + + minimalRelFreTF.focusedProperty().addListener((observable, oldValue, newValue) -> { + if (!newValue) { + // focus lost + String value = minimalRelFreTF.getText(); + if (!ValidationUtil.isEmpty(value)) { + if (!ValidationUtil.isNumber(value)) { + logAlert("minimalRelFreTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); + GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); + } else { + minimalRelFre = Integer.parseInt(value); + } + } else { + minimalRelFreTF.setText("1"); + minimalRelFre = 1; + } + } + }); + + changeLanguageB.setOnAction(e -> { if (I18N.getLocale() == new Locale.Builder().setLanguage("sl").setRegion("SI").build()){ I18N.setLocale(Locale.ENGLISH); @@ -836,6 +872,7 @@ public class StringAnalysisTabNew2 { taxonomyL.textProperty().bind(I18N.createStringBinding("label.taxonomy")); minimalOccurrencesL.textProperty().bind(I18N.createStringBinding("label.minimalOccurrences")); minimalTaxonomyL.textProperty().bind(I18N.createStringBinding("label.minimalTaxonomy")); + minimalRelFreL.textProperty().bind(I18N.createStringBinding("label.minimalRelFre")); taxonomySetOperationL.textProperty().bind(I18N.createStringBinding("label.taxonomySetOperation")); solarFilters.textProperty().bind(I18N.createStringBinding("label.solarFilters")); @@ -851,6 +888,7 @@ public class StringAnalysisTabNew2 { addTooltipToImage(taxonomyI, I18N.createStringBinding("label.wordSet.taxonomyH")); addTooltipToImage(minimalOccurrencesI, I18N.createStringBinding("label.wordSet.minimalOccurrencesH")); addTooltipToImage(minimalTaxonomyI, I18N.createStringBinding("label.wordSet.minimalTaxonomyH")); + addTooltipToImage(minimalRelFreI, I18N.createStringBinding("label.wordPart.minimalRelFreH")); addTooltipToImage(taxonomySetOperationI, I18N.createStringBinding("label.letter.taxonomySetOperationH")); taxonomySetOperationCB.itemsProperty().bind(I18N.createObjectBinding(TAXONOMY_SET_OPERATION)); @@ -912,6 +950,7 @@ public class StringAnalysisTabNew2 { filter.setMsd(msd); filter.setMinimalOccurrences(minimalOccurrences); filter.setMinimalTaxonomy(minimalTaxonomy); + filter.setMinimalRelFre(minimalRelFre); filter.setCollocability(collocability); filter.setTaxonomySetOperation(taxonomySetOperation); @@ -970,332 +1009,560 @@ public class StringAnalysisTabNew2 { // // } - private final Task prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) { - Collection corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles(); - - final Task task = new Task() { - @SuppressWarnings("Duplicates") - @Override - protected Void call() throws Exception { - final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); - if(multipleFiles){ - cancel.setVisible(true); - } - int i = corpusFiles.size(); - Date startTime = new Date(); - Date previousTime = new Date(); - int remainingSeconds = -1; - int corpusSize; - if (statistic.getFilter().getCollocability().size() > 0) { - corpusSize = corpusFiles.size() * 2; - } else { - corpusSize = corpusFiles.size(); - } - for (File f : corpusFiles) { - final int iFinal = i; - XML_processing xml_processing = new XML_processing(); - i++; - if(xml_processing.progressBarListener != null) { - xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); - } - if (multipleFiles) { - if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ - remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); - previousTime = new Date(); - } - this.updateProgress(i, corpusSize); - this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; +// private final Task prepareTaskForMinRelFre(StatisticsNew statistic) { +// Filter f = statistic.getFilter(); +// logger.info("Started execution: ", f); +// Task task_collocability = null; +// +// try{ +// Filter f2 = (Filter) f.clone(); +// f2.setIsMinimalRelFreScraper(true); +// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb); +// +// +//// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f, useDb); +// +// Collection corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles(); +// +// final Task task = new Task() { +// @SuppressWarnings("Duplicates") +// @Override +// protected Void call() throws Exception { +// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statisticsMinRelFre.getCorpus().getCorpusType()); +// if(multipleFiles){ +// cancel.setVisible(true); +// } +// Date startTime = new Date(); +// Date previousTime = new Date(); +// int remainingSeconds = -1; +// int corpusSize; +// int i; +// if(statistic.getFilter().getCollocability().size() > 0){ +// i = 0; +// corpusSize = corpusFiles.size() * 3; +// } else { +// i = 0; +// corpusSize = corpusFiles.size() * 2; +// } +// for (File f : corpusFiles) { +// final int iFinal = i; +// XML_processing xml_processing = new XML_processing(); +// xml_processing.isCancelled = false; +// i++; +// if(xml_processing.progressBarListener != null) { +// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); +// } +// if (multipleFiles) { +// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ +// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); +// previousTime = new Date(); // } - } else { - xml_processing.progressBarListener = new InvalidationListener() { - int remainingSeconds = -1; - Date previousTime = new Date(); - @Override - public void invalidated(Observable observable) { - cancel.setVisible(true); - if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ - remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * - (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * - ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); -// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); -// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1))); -// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); -// System.out.println(remainingSeconds); - previousTime = new Date(); - } - xml_processing.isCancelled = isCancelled(); - updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); - updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); - } - }; - - xml_processing.progressProperty().addListener(xml_processing.progressBarListener); - } - xml_processing.isCollocability = true; - xml_processing.readXML(f.toString(), statisticsOneGrams); - xml_processing.isCollocability = false; - if (isCancelled()) { - updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); - break; - } -// readXML(f.toString(), statisticsOneGrams); +// this.updateProgress(i, corpusSize); +// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); +//// if (isCancelled()) { +//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +//// break; +//// } +// } else { +// xml_processing.progressBarListener = new InvalidationListener() { +// int remainingSeconds = -1; +// Date previousTime = new Date(); +// @Override +// public void invalidated(Observable observable) { +// cancel.setVisible(true); +// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ +// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * +// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * +// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); +//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); +//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1); +//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); +//// System.out.println(remainingSeconds); +// previousTime = new Date(); +// } +// xml_processing.isCancelled = isCancelled(); +// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); +// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); +// } +// }; +// +// xml_processing.progressProperty().addListener(xml_processing.progressBarListener); +// } +// xml_processing.readXML(f.toString(), statisticsMinRelFre); +// if (isCancelled()) { +// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +// break; +// } +// if(!(multipleFiles)){ +// cancel.setVisible(false); +// } +// } +// +// // add remaining minRelFre results +// if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) { +//// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() + +// long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue(); +// double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; +// +// statisticsMinRelFre.updateMinimalRelFre(statisticsMinRelFre.getTaxonomyResult().get(statisticsMinRelFre.getCorpus().getTotal()).entrySet(), absToRelFactor); +// +// // reset all values +// for(Taxonomy taxonomy : statisticsMinRelFre.getTaxonomyResult().keySet()){ +// statisticsMinRelFre.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>()); +// } +// for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){ +// statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0)); +// } +// +//// System.out.println("asd"); +// } +// +// return null; +// } +// }; +// +// ngramProgressBar.progressProperty().bind(task.progressProperty()); +// progressLabel.textProperty().bind(task.messageProperty()); +// task.setOnSucceeded(e -> { +// statistic.updateMinimalRelFre(statisticsMinRelFre.getMinimalRelFreNgrams(), statisticsMinRelFre.getMinimalRelFre1grams()); +// final Task taskCollocability = prepareMainTask(statistic); +// final Thread thread_collocability = new Thread(taskCollocability, "task_collocability"); +// thread_collocability.setDaemon(true); +// thread_collocability.start(); +// }); +// +// task.setOnFailed(e -> { +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); +// logger.error("Error while executing", e); +// ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setProgress(0.0); +// // ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); +// +// task.setOnCancelled(e -> { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); +// ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setProgress(0.0); +// // ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); +// +// // When cancel button is pressed cancel analysis +// cancel.setOnAction(e -> { +// task.cancel(); +// logger.info("cancel button"); +// }); +// +// return task; +// }catch(CloneNotSupportedException c){ return null; } +// } +// +// private final Task prepareMainTask(StatisticsNew statistic) { +// Filter f = statistic.getFilter(); +// logger.info("Started execution: ", f); +// Task task_collocability = null; +// +// Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); +// +// final Task task = new Task() { +// @SuppressWarnings("Duplicates") +// @Override +// protected Void call() throws Exception { +// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); +// if(multipleFiles){ +// cancel.setVisible(true); +// } +// +// +//// int i = corpusFiles.size(); +//// Date startTime = new Date(); +//// Date previousTime = new Date(); +//// int remainingSeconds = -1; +//// int corpusSize; +//// if (statistic.getFilter().getCollocability().size() > 0) { +//// corpusSize = corpusFiles.size() * 2; +//// } else { +//// corpusSize = corpusFiles.size(); +//// } +// +// Date startTime = new Date(); +// Date previousTime = new Date(); +// int remainingSeconds = -1; +// int corpusSize; +// int i; +// int taskIndex = 0; +// if(statistic.getFilter().getCollocability().size() > 0 && statistic.getFilter().getMinimalRelFre() > 1){ +// i = corpusFiles.size(); +// corpusSize = corpusFiles.size() * 3; +// } else if (statistic.getFilter().getMinimalRelFre() > 1) { +// i = corpusFiles.size(); +// corpusSize = corpusFiles.size() * 2; +// } else if (statistic.getFilter().getCollocability().size() > 0) { +// i = 0; +// corpusSize = corpusFiles.size() * 2; +// } else { +// i = 0; +// corpusSize = corpusFiles.size(); +// } +// for (File f : corpusFiles) { +// final int iFinal = i; +// XML_processing xml_processing = new XML_processing(); +// xml_processing.isCancelled = false; +// i++; +// taskIndex++; +// if(xml_processing.progressBarListener != null) { +// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); +// } +// if (multipleFiles) { +// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ +// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000); +// previousTime = new Date(); +// } +// this.updateProgress(i, corpusSize); +// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); +// +//// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ +//// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); +//// previousTime = new Date(); +//// } +//// this.updateProgress(i, corpusSize); +//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); +// +// } else { +// xml_processing.progressBarListener = new InvalidationListener() { +// int remainingSeconds = -1; +// Date previousTime = new Date(); +// @Override +// public void invalidated(Observable observable) { +// cancel.setVisible(true); +// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ +// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * +// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * +// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); +//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); +//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1); +//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); +//// System.out.println(remainingSeconds); +// previousTime = new Date(); +// } +// xml_processing.isCancelled = isCancelled(); +// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); +// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); +// } +// }; +// +// xml_processing.progressProperty().addListener(xml_processing.progressBarListener); +// } +// xml_processing.readXML(f.toString(), statistic); +// if (isCancelled()) { +// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +// break; +// } +// if(!(multipleFiles)){ +// cancel.setVisible(false); +// } +//// readXML(f.toString(), statistic); +//// i++; +//// if (isCancelled()) { +//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +//// break; +//// } +//// if (statistic.getFilter().getCollocability().size() > 0) { +//// this.updateProgress(i, corpusFiles.size() * 2); +//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName())); +//// } else { +//// this.updateProgress(i, corpusFiles.size()); +//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); +//// } +////// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName())); +// } +// // if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing +// if (statistic.getFilter().getMinimalRelFre() > 1){ +//// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() + +// long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue(); +// double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; +// +// +// for(Map.Entry entry : statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet()){ +// if(entry.getValue().longValue() < absToRelFactor){ +// statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).remove(entry.getKey()); +// } +// } +// statistic.updateMinimalRelFre(statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet(), absToRelFactor); +// } +// +// return null; +// } +// }; +// +// ngramProgressBar.progressProperty().bind(task.progressProperty()); +// progressLabel.textProperty().bind(task.messageProperty()); +// task.setOnSucceeded(e -> { +// if (f.getCollocability().size() > 0) { +// try{ +// Filter f2 = (Filter) f.clone(); +// f2.setNgramValue(1); +// StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb); +// final Task taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams); +// final Thread thread_collocability = new Thread(taskCollocability, "task_collocability"); +// thread_collocability.setDaemon(true); +// thread_collocability.start(); +// }catch(CloneNotSupportedException c){} +// +// +// +// } else { +// try { +//// System.out.print(statistics); +// boolean successullySaved = statistic.saveResultToDisk(); +// if (successullySaved) { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); +// } else { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); +// } +// } catch (UnsupportedEncodingException e1) { +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); +// logger.error("Error while saving", e1); +// } catch (OutOfMemoryError e1) { +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); +// logger.error("Out of memory error", e1); +// } +// ngramProgressBar.progressProperty().unbind(); +//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// } +// +// +// }); +// +// task.setOnFailed(e -> { +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); +// logger.error("Error while executing", e); +// ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setProgress(0.0); +//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); +// +// task.setOnCancelled(e -> { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); +// ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setProgress(0.0); +//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); +// +// // When cancel button is pressed cancel analysis +// cancel.setOnAction(e -> { +// task.cancel(); +// logger.info("cancel button"); +// }); +// +// return task; +// } +// +// private final Task prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) { +// Collection corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles(); +// +// final Task task = new Task() { +// @SuppressWarnings("Duplicates") +// @Override +// protected Void call() throws Exception { +// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); +// if(multipleFiles){ +// cancel.setVisible(true); +// } +//// int i = corpusFiles.size(); +// Date startTime = new Date(); +// Date previousTime = new Date(); +// int remainingSeconds = -1; +//// int corpusSize; +//// if (statistic.getFilter().getCollocability().size() > 0) { +//// corpusSize = corpusFiles.size() * 2; +//// } else { +//// corpusSize = corpusFiles.size(); +//// } +// +// +// int corpusSize; +// int i; +// int taskIndex = 0; +// if(statistic.getFilter().getMinimalRelFre() > 1){ +// i = corpusFiles.size() * 2; +// corpusSize = corpusFiles.size() * 3; +// } else { +// i = corpusFiles.size(); +// corpusSize = corpusFiles.size() * 2; +// } +// +// +// +// for (File f : corpusFiles) { +// final int iFinal = i; +// XML_processing xml_processing = new XML_processing(); // i++; -// this.updateProgress(i, corpusFiles.size() * 2); -// if (statistic.getFilter().getCollocability().size() > 0) { -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName())); +// taskIndex++; +// if(xml_processing.progressBarListener != null) { +// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); +// } +// if (multipleFiles) { +// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ +// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000); +// previousTime = new Date(); +// } +// this.updateProgress(i, corpusSize); +// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); +//// if (isCancelled()) { +//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +//// break; +//// } // } else { -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); +// xml_processing.progressBarListener = new InvalidationListener() { +// int remainingSeconds = -1; +// Date previousTime = new Date(); +// @Override +// public void invalidated(Observable observable) { +// cancel.setVisible(true); +// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ +// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * +// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * +// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); +//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); +//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1))); +//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); +//// System.out.println(remainingSeconds); +// previousTime = new Date(); +// } +// xml_processing.isCancelled = isCancelled(); +// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); +// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); +// } +// }; +// +// xml_processing.progressProperty().addListener(xml_processing.progressBarListener); // } - } - - return null; - } - }; - - ngramProgressBar.progressProperty().bind(task.progressProperty()); - progressLabel.textProperty().bind(task.messageProperty()); - - task.setOnSucceeded(e -> { - try { - System.out.print(statistic); -// calculate_collocabilities(statistic, statisticsOneGrams); - statistic.updateCalculateCollocabilities(statisticsOneGrams); - boolean successullySaved = statistic.saveResultToDisk(); - if (successullySaved) { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); - } else { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); - } - } catch (UnsupportedEncodingException e1) { - showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); - logger.error("Error while saving", e1); - } catch (OutOfMemoryError e1) { - showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); - logger.error("Out of memory error", e1); - } +// xml_processing.isCollocability = true; +// xml_processing.readXML(f.toString(), statisticsOneGrams); +// xml_processing.isCollocability = false; +// if (isCancelled()) { +// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +// break; +// } +//// readXML(f.toString(), statisticsOneGrams); +//// i++; +//// this.updateProgress(i, corpusFiles.size() * 2); +//// if (statistic.getFilter().getCollocability().size() > 0) { +//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName())); +//// } else { +//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); +//// } +// } +// +// return null; +// } +// }; +// +// ngramProgressBar.progressProperty().bind(task.progressProperty()); +// progressLabel.textProperty().bind(task.messageProperty()); +// +// task.setOnSucceeded(e -> { // try { +// System.out.print(statistic); +//// calculate_collocabilities(statistic, statisticsOneGrams); +// statistic.updateCalculateCollocabilities(statisticsOneGrams); // boolean successullySaved = statistic.saveResultToDisk(); // if (successullySaved) { -// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED); +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); // } else { -// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS); +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); // } // } catch (UnsupportedEncodingException e1) { -// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV); +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); // logger.error("Error while saving", e1); -// } catch (OutOfMemoryError e1){ -// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY); +// } catch (OutOfMemoryError e1) { +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); // logger.error("Out of memory error", e1); // } +//// try { +//// boolean successullySaved = statistic.saveResultToDisk(); +//// if (successullySaved) { +//// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED); +//// } else { +//// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS); +//// } +//// } catch (UnsupportedEncodingException e1) { +//// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV); +//// logger.error("Error while saving", e1); +//// } catch (OutOfMemoryError e1){ +//// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY); +//// logger.error("Out of memory error", e1); +//// } +//// +// ngramProgressBar.progressProperty().unbind(); +//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); // - ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); - }); - - task.setOnFailed(e -> { - showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); - logger.error("Error while executing", e); - ngramProgressBar.progressProperty().unbind(); - ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); - }); - - task.setOnCancelled(e -> { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); - ngramProgressBar.progressProperty().unbind(); - ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); - }); - - // When cancel button is pressed cancel analysis - cancel.setOnAction(e -> { - task.cancel(); -// logger.info("cancel button"); - }); - return task; - } +// task.setOnFailed(e -> { +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); +// logger.error("Error while executing", e); +// ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setProgress(0.0); +//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); +// +// task.setOnCancelled(e -> { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); +// ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setProgress(0.0); +//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); +// +// // When cancel button is pressed cancel analysis +// cancel.setOnAction(e -> { +// task.cancel(); +//// logger.info("cancel button"); +// }); +// return task; +// } private void execute(StatisticsNew statistic) { Filter f = statistic.getFilter(); logger.info("Started execution: ", f); -// Task task_collocability = null; - - Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); - - final Task task = new Task() { - @SuppressWarnings("Duplicates") - @Override - protected Void call() throws Exception { - final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); - if(multipleFiles){ - cancel.setVisible(true); - } - int i = 0; - Date startTime = new Date(); - Date previousTime = new Date(); - int remainingSeconds = -1; - int corpusSize; - if (statistic.getFilter().getCollocability().size() > 0) { - corpusSize = corpusFiles.size() * 2; - } else { - corpusSize = corpusFiles.size(); - } - for (File f : corpusFiles) { - final int iFinal = i; - XML_processing xml_processing = new XML_processing(); - xml_processing.isCancelled = false; - i++; - if(xml_processing.progressBarListener != null) { - xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); - } - if (multipleFiles) { - if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ - remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); - previousTime = new Date(); - } - this.updateProgress(i, corpusSize); - this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; -// } - } else { - xml_processing.progressBarListener = new InvalidationListener() { - int remainingSeconds = -1; - Date previousTime = new Date(); - @Override - public void invalidated(Observable observable) { - cancel.setVisible(true); - if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ - remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * - (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * - ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); -// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); -// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1); -// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); -// System.out.println(remainingSeconds); - previousTime = new Date(); - } - xml_processing.isCancelled = isCancelled(); - updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); - updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); - } - }; - - xml_processing.progressProperty().addListener(xml_processing.progressBarListener); - } - xml_processing.readXML(f.toString(), statistic); - if (isCancelled()) { - updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); - break; - } - if(!(multipleFiles)){ - cancel.setVisible(false); - } -// readXML(f.toString(), statistic); -// i++; -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; -// } -// if (statistic.getFilter().getCollocability().size() > 0) { -// this.updateProgress(i, corpusFiles.size() * 2); -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName())); -// } else { -// this.updateProgress(i, corpusFiles.size()); -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); -// } -//// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName())); - } - - return null; - } - }; - - ngramProgressBar.progressProperty().bind(task.progressProperty()); - progressLabel.textProperty().bind(task.messageProperty()); - task.setOnSucceeded(e -> { - if (f.getCollocability().size() > 0) { - try{ - Filter f2 = (Filter) f.clone(); - f2.setNgramValue(1); - StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb); - final Task taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams); - final Thread thread_collocability = new Thread(taskCollocability, "task_collocability"); - thread_collocability.setDaemon(true); - thread_collocability.start(); - }catch(CloneNotSupportedException c){} - - - } else { - try { -// System.out.print(statistics); - boolean successullySaved = statistic.saveResultToDisk(); - if (successullySaved) { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); - } else { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); - } - } catch (UnsupportedEncodingException e1) { - showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); - logger.error("Error while saving", e1); - } catch (OutOfMemoryError e1) { - showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); - logger.error("Out of memory error", e1); - } - ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); - } - - - }); - - task.setOnFailed(e -> { - showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); - logger.error("Error while executing", e); - ngramProgressBar.progressProperty().unbind(); - ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); - }); - - task.setOnCancelled(e -> { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); - ngramProgressBar.progressProperty().unbind(); - ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); - }); - - // When cancel button is pressed cancel analysis - cancel.setOnAction(e -> { - task.cancel(); - logger.info("cancel button"); - }); - - final Thread thread = new Thread(task, "task"); - thread.setDaemon(true); - thread.start(); + Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel); + if (f.getMinimalRelFre() > 1){ + final Task mainTask = t.prepareTaskForMinRelFre(statistic); +// final Task mainTask = prepareTaskForMinRelFre(statistic); + final Thread thread = new Thread(mainTask, "task"); + thread.setDaemon(true); + thread.start(); + } else { + final Task mainTask = t.prepareMainTask(statistic); +// final Task mainTask = prepareMainTask(statistic); + final Thread thread = new Thread(mainTask, "task"); + thread.setDaemon(true); + thread.start(); + } } public void setSolarFiltersMap(HashMap> solarFiltersMap) { diff --git a/src/main/java/gui/WordLevelTab.java b/src/main/java/gui/WordLevelTab.java index 5446e9a..2625486 100755 --- a/src/main/java/gui/WordLevelTab.java +++ b/src/main/java/gui/WordLevelTab.java @@ -20,6 +20,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.controlsfx.control.CheckComboBox; +import util.Tasks; import java.io.File; import java.io.UnsupportedEncodingException; @@ -85,6 +86,9 @@ public class WordLevelTab { @FXML public Label minimalTaxonomyL; + @FXML + public Label minimalRelFreL; + @FXML public Label taxonomySetOperationL; @@ -122,6 +126,9 @@ public class WordLevelTab { @FXML public ImageView minimalTaxonomyI; + @FXML + public ImageView minimalRelFreI; + @FXML public ImageView taxonomySetOperationI; @@ -174,6 +181,10 @@ public class WordLevelTab { private TextField minimalTaxonomyTF; private Integer minimalTaxonomy; + @FXML + private TextField minimalRelFreTF; + private Integer minimalRelFre; + @FXML private ComboBox taxonomySetOperationCB; private String taxonomySetOperation; @@ -669,6 +680,29 @@ public class WordLevelTab { } }); + // set default values + minimalRelFreTF.setText("1"); + minimalRelFre = 1; + + minimalRelFreTF.focusedProperty().addListener((observable, oldValue, newValue) -> { + if (!newValue) { + // focus lost + String value = minimalRelFreTF.getText(); + if (!ValidationUtil.isEmpty(value)) { + if (!ValidationUtil.isNumber(value)) { + logAlert("minimalRelFreTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); + GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); + } else { + minimalRelFre = Integer.parseInt(value); + } + } else { + minimalRelFreTF.setText("1"); + minimalRelFre = 1; + } + } + }); + + changeLanguageB.setOnAction(e -> { if (I18N.getLocale() == new Locale.Builder().setLanguage("sl").setRegion("SI").build()){ I18N.setLocale(Locale.ENGLISH); @@ -798,6 +832,7 @@ public class WordLevelTab { taxonomyL.textProperty().bind(I18N.createStringBinding("label.taxonomy")); minimalOccurrencesL.textProperty().bind(I18N.createStringBinding("label.minimalOccurrences")); minimalTaxonomyL.textProperty().bind(I18N.createStringBinding("label.minimalTaxonomy")); + minimalRelFreL.textProperty().bind(I18N.createStringBinding("label.minimalRelFre")); solarFilters.textProperty().bind(I18N.createStringBinding("label.solarFilters")); taxonomySetOperationL.textProperty().bind(I18N.createStringBinding("label.taxonomySetOperation")); @@ -814,6 +849,7 @@ public class WordLevelTab { addTooltipToImage(taxonomyI, I18N.createStringBinding("label.wordPart.taxonomyH")); addTooltipToImage(minimalOccurrencesI, I18N.createStringBinding("label.wordPart.minimalOccurrencesH")); addTooltipToImage(minimalTaxonomyI, I18N.createStringBinding("label.wordPart.minimalTaxonomyH")); + addTooltipToImage(minimalRelFreI, I18N.createStringBinding("label.wordPart.minimalRelFreH")); addTooltipToImage(taxonomySetOperationI, I18N.createStringBinding("label.letter.taxonomySetOperationH")); taxonomySetOperationCB.itemsProperty().bind(I18N.createObjectBinding(TAXONOMY_SET_OPERATION)); @@ -873,6 +909,7 @@ public class WordLevelTab { filter.setMsd(msd); filter.setMinimalOccurrences(minimalOccurrences); filter.setMinimalTaxonomy(minimalTaxonomy); + filter.setMinimalRelFre(minimalRelFre); filter.setPrefixLength(prefixLength); filter.setSuffixLength(suffixLength); filter.setPrefixList(prefixList); @@ -930,122 +967,136 @@ public class WordLevelTab { Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); - final Task task = new Task() { - @SuppressWarnings("Duplicates") - @Override - protected Void call() throws Exception { - final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); - if(multipleFiles){ - cancel.setVisible(true); - } - int i = 0; - Date startTime = new Date(); - Date previousTime = new Date(); - int remainingSeconds = -1; - for (File f : corpusFiles) { - final int iFinal = i; - XML_processing xml_processing = new XML_processing(); - xml_processing.isCancelled = false; - i++; - if (isCancelled()) { - updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); - break; - } - if(xml_processing.progressBarListener != null) { - xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); - } - if (multipleFiles) { - if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ - remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000); - previousTime = new Date(); - } - this.updateProgress(i, corpusFiles.size()); - this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds)); - } else { - xml_processing.progressBarListener = new InvalidationListener() { - int remainingSeconds = -1; - Date previousTime = new Date(); - @Override - public void invalidated(Observable observable) { - cancel.setVisible(true); - if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ - remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * - (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * - ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); - previousTime = new Date(); - } - xml_processing.isCancelled = isCancelled(); - updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100); - updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds)); - } - }; - - xml_processing.progressProperty().addListener(xml_processing.progressBarListener); - } - xml_processing.readXML(f.toString(), statistic); - if (isCancelled()) { - updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); - break; - } - } - - return null; - } - }; - - ngramProgressBar.progressProperty().bind(task.progressProperty()); - progressLabel.textProperty().bind(task.messageProperty()); - - task.setOnSucceeded(e -> { - try { - boolean successullySaved = statistic.saveResultToDisk(); - if (successullySaved) { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); - } else { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); - } - } catch (UnsupportedEncodingException e1) { - showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); - logger.error("Error while saving", e1); - } - - ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); - }); - - task.setOnFailed(e -> { - showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); - logger.error("Error while executing", e); - ngramProgressBar.progressProperty().unbind(); - ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); - }); - - task.setOnCancelled(e -> { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); - ngramProgressBar.progressProperty().unbind(); - ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); - }); - - // When cancel button is pressed cancel analysis - cancel.setOnAction(e -> { - task.cancel(); - logger.info("cancel button"); - }); - - final Thread thread = new Thread(task, "task"); - thread.setDaemon(true); - thread.start(); +// final Task task = new Task() { +// @SuppressWarnings("Duplicates") +// @Override +// protected Void call() throws Exception { +// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); +// if(multipleFiles){ +// cancel.setVisible(true); +// } +// int i = 0; +// Date startTime = new Date(); +// Date previousTime = new Date(); +// int remainingSeconds = -1; +// for (File f : corpusFiles) { +// final int iFinal = i; +// XML_processing xml_processing = new XML_processing(); +// xml_processing.isCancelled = false; +// i++; +// if (isCancelled()) { +// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +// break; +// } +// if(xml_processing.progressBarListener != null) { +// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); +// } +// if (multipleFiles) { +// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ +// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000); +// previousTime = new Date(); +// } +// this.updateProgress(i, corpusFiles.size()); +// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds)); +// } else { +// xml_processing.progressBarListener = new InvalidationListener() { +// int remainingSeconds = -1; +// Date previousTime = new Date(); +// @Override +// public void invalidated(Observable observable) { +// cancel.setVisible(true); +// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ +// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * +// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * +// ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); +// previousTime = new Date(); +// } +// xml_processing.isCancelled = isCancelled(); +// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100); +// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds)); +// } +// }; +// +// xml_processing.progressProperty().addListener(xml_processing.progressBarListener); +// } +// xml_processing.readXML(f.toString(), statistic); +// if (isCancelled()) { +// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +// break; +// } +// } +// +// return null; +// } +// }; +// +// ngramProgressBar.progressProperty().bind(task.progressProperty()); +// progressLabel.textProperty().bind(task.messageProperty()); +// +// task.setOnSucceeded(e -> { +// try { +// boolean successullySaved = statistic.saveResultToDisk(); +// if (successullySaved) { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); +// } else { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); +// } +// } catch (UnsupportedEncodingException e1) { +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); +// logger.error("Error while saving", e1); +// } +// +// ngramProgressBar.progressProperty().unbind(); +//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); +// +// task.setOnFailed(e -> { +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); +// logger.error("Error while executing", e); +// ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setProgress(0.0); +//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); +// +// task.setOnCancelled(e -> { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); +// ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setProgress(0.0); +//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// cancel.setVisible(false); +// }); +// +// // When cancel button is pressed cancel analysis +// cancel.setOnAction(e -> { +// task.cancel(); +// logger.info("cancel button"); +// }); +// +// final Thread thread = new Thread(task, "task"); +// thread.setDaemon(true); +// thread.start(); + Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel); + if (statistic.getFilter().getMinimalRelFre() > 1){ + final Task mainTask = t.prepareTaskForMinRelFre(statistic); +// final Task mainTask = prepareTaskForMinRelFre(statistic); + final Thread thread = new Thread(mainTask, "task"); + thread.setDaemon(true); + thread.start(); + } else { + final Task mainTask = t.prepareMainTask(statistic); +// final Task mainTask = prepareMainTask(statistic); + final Thread thread = new Thread(mainTask, "task"); + thread.setDaemon(true); + thread.start(); + } } public void setSolarFiltersMap(HashMap> solarFiltersMap) { diff --git a/src/main/java/util/Tasks.java b/src/main/java/util/Tasks.java new file mode 100644 index 0000000..859588c --- /dev/null +++ b/src/main/java/util/Tasks.java @@ -0,0 +1,585 @@ +package util; + +import alg.XML_processing; +import data.*; +import gui.I18N; +import gui.StringAnalysisTabNew2; +import javafx.beans.InvalidationListener; +import javafx.beans.Observable; +import javafx.beans.property.ReadOnlyDoubleWrapper; +import javafx.fxml.FXML; +import javafx.scene.control.Alert; +import javafx.scene.control.Button; +import javafx.scene.control.Label; +import javafx.scene.control.ProgressBar; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.File; +import java.io.UnsupportedEncodingException; +import java.util.Collection; +import java.util.Date; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + +import static gui.GUIController.showAlert; + +public class Tasks { + public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class); + + private Corpus corpus; + private boolean useDb; + + @FXML + private Button cancel; + + @FXML + public ProgressBar ngramProgressBar; + @FXML + public Label progressLabel; + + public Tasks(Corpus corpus, boolean useDb, Button cancel, ProgressBar ngramProgressBar, Label progressLabel) { + this.corpus = corpus; + this.useDb = useDb; + this.cancel = cancel; + this.ngramProgressBar = ngramProgressBar; + this.progressLabel = progressLabel; + } + + public final javafx.concurrent.Task prepareTaskForMinRelFre(StatisticsNew statistic) { + Filter f = statistic.getFilter(); + logger.info("Started execution: ", f); + javafx.concurrent.Task task_collocability = null; + + try{ + Filter f2 = (Filter) f.clone(); + f2.setIsMinimalRelFreScraper(true); + StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb); + + +// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f, useDb); + + Collection corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles(); + + final javafx.concurrent.Task task = new javafx.concurrent.Task() { + @SuppressWarnings("Duplicates") + @Override + protected Void call() throws Exception { + final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statisticsMinRelFre.getCorpus().getCorpusType()); + if(multipleFiles){ + cancel.setVisible(true); + } + Date startTime = new Date(); + Date previousTime = new Date(); + int remainingSeconds = -1; + int corpusSize; + int i; + if(statistic.getFilter().getCollocability().size() > 0){ + i = 0; + corpusSize = corpusFiles.size() * 3; + } else { + i = 0; + corpusSize = corpusFiles.size() * 2; + } + for (File f : corpusFiles) { + final int iFinal = i; + XML_processing xml_processing = new XML_processing(); + xml_processing.isCancelled = false; + i++; + if(xml_processing.progressBarListener != null) { + xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); + } + if (multipleFiles) { + if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ + remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); + previousTime = new Date(); + } + this.updateProgress(i, corpusSize); + this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); +// if (isCancelled()) { +// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +// break; +// } + } else { + xml_processing.progressBarListener = new InvalidationListener() { + int remainingSeconds = -1; + Date previousTime = new Date(); + @Override + public void invalidated(Observable observable) { + cancel.setVisible(true); + if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ + remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * + (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * + ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); +// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); +// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1); +// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); +// System.out.println(remainingSeconds); + previousTime = new Date(); + } + xml_processing.isCancelled = isCancelled(); + updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); + updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); + } + }; + + xml_processing.progressProperty().addListener(xml_processing.progressBarListener); + } + xml_processing.readXML(f.toString(), statisticsMinRelFre); + if (isCancelled()) { + updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); + break; + } + if(!(multipleFiles)){ + cancel.setVisible(false); + } + } + + // add remaining minRelFre results + if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) { +// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() + + long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue(); + double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; + + statisticsMinRelFre.updateMinimalRelFre(statisticsMinRelFre.getTaxonomyResult().get(statisticsMinRelFre.getCorpus().getTotal()).entrySet(), absToRelFactor); + + // reset all values + for(Taxonomy taxonomy : statisticsMinRelFre.getTaxonomyResult().keySet()){ + statisticsMinRelFre.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>()); + } + for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){ + statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0)); + } + +// System.out.println("asd"); + } + + return null; + } + }; + + ngramProgressBar.progressProperty().bind(task.progressProperty()); + progressLabel.textProperty().bind(task.messageProperty()); + task.setOnSucceeded(e -> { + statistic.updateMinimalRelFre(statisticsMinRelFre.getMinimalRelFreNgrams(), statisticsMinRelFre.getMinimalRelFre1grams()); + final javafx.concurrent.Task taskCollocability = prepareMainTask(statistic); + final Thread thread_collocability = new Thread(taskCollocability, "task_collocability"); + thread_collocability.setDaemon(true); + thread_collocability.start(); + }); + + task.setOnFailed(e -> { + showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); + logger.error("Error while executing", e); + ngramProgressBar.progressProperty().unbind(); + ngramProgressBar.setProgress(0.0); + // ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); + }); + + task.setOnCancelled(e -> { + showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); + ngramProgressBar.progressProperty().unbind(); + ngramProgressBar.setProgress(0.0); + // ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); + }); + + // When cancel button is pressed cancel analysis + cancel.setOnAction(e -> { + task.cancel(); + logger.info("cancel button"); + }); + + return task; + }catch(CloneNotSupportedException c){ return null; } + } + + public final javafx.concurrent.Task prepareMainTask(StatisticsNew statistic) { + Filter f = statistic.getFilter(); + logger.info("Started execution: ", f); + javafx.concurrent.Task task_collocability = null; + + Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); + + final javafx.concurrent.Task task = new javafx.concurrent.Task() { + @SuppressWarnings("Duplicates") + @Override + protected Void call() throws Exception { + final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); + if(multipleFiles){ + cancel.setVisible(true); + } + + +// int i = corpusFiles.size(); +// Date startTime = new Date(); +// Date previousTime = new Date(); +// int remainingSeconds = -1; +// int corpusSize; +// if (statistic.getFilter().getCollocability().size() > 0) { +// corpusSize = corpusFiles.size() * 2; +// } else { +// corpusSize = corpusFiles.size(); +// } + + Date startTime = new Date(); + Date previousTime = new Date(); + int remainingSeconds = -1; + int corpusSize; + int i; + int taskIndex = 0; + if(statistic.getFilter().getCollocability().size() > 0 && statistic.getFilter().getMinimalRelFre() > 1){ + i = corpusFiles.size(); + corpusSize = corpusFiles.size() * 3; + } else if (statistic.getFilter().getMinimalRelFre() > 1) { + i = corpusFiles.size(); + corpusSize = corpusFiles.size() * 2; + } else if (statistic.getFilter().getCollocability().size() > 0) { + i = 0; + corpusSize = corpusFiles.size() * 2; + } else { + i = 0; + corpusSize = corpusFiles.size(); + } + for (File f : corpusFiles) { + final int iFinal = i; + XML_processing xml_processing = new XML_processing(); + xml_processing.isCancelled = false; + i++; + taskIndex++; + if(xml_processing.progressBarListener != null) { + xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); + } + if (multipleFiles) { + if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ + remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000); + previousTime = new Date(); + } + this.updateProgress(i, corpusSize); + this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); + +// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ +// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); +// previousTime = new Date(); +// } +// this.updateProgress(i, corpusSize); +// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); + + } else { + xml_processing.progressBarListener = new InvalidationListener() { + int remainingSeconds = -1; + Date previousTime = new Date(); + @Override + public void invalidated(Observable observable) { + cancel.setVisible(true); + if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ + remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * + (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * + ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); +// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); +// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1); +// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); +// System.out.println(remainingSeconds); + previousTime = new Date(); + } + xml_processing.isCancelled = isCancelled(); + updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); + updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); + } + }; + + xml_processing.progressProperty().addListener(xml_processing.progressBarListener); + } + xml_processing.readXML(f.toString(), statistic); + if (isCancelled()) { + updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); + break; + } + if(!(multipleFiles)){ + cancel.setVisible(false); + } +// readXML(f.toString(), statistic); +// i++; +// if (isCancelled()) { +// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +// break; +// } +// if (statistic.getFilter().getCollocability().size() > 0) { +// this.updateProgress(i, corpusFiles.size() * 2); +// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName())); +// } else { +// this.updateProgress(i, corpusFiles.size()); +// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); +// } +//// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName())); + } + // if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing + if (statistic.getFilter().getMinimalRelFre() > 1){ +// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() + + long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue(); + double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; + + + for(Map.Entry entry : statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet()){ + if(entry.getValue().longValue() < absToRelFactor){ + statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).remove(entry.getKey()); + } + } + statistic.updateMinimalRelFre(statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet(), absToRelFactor); + } + + return null; + } + }; + + ngramProgressBar.progressProperty().bind(task.progressProperty()); + progressLabel.textProperty().bind(task.messageProperty()); + task.setOnSucceeded(e -> { + if (f.getCollocability().size() > 0) { + try{ + Filter f2 = (Filter) f.clone(); + f2.setNgramValue(1); + StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb); + final javafx.concurrent.Task taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams); + final Thread thread_collocability = new Thread(taskCollocability, "task_collocability"); + thread_collocability.setDaemon(true); + thread_collocability.start(); + }catch(CloneNotSupportedException c){} + + + + } else { + try { +// System.out.print(statistics); + boolean successullySaved = statistic.saveResultToDisk(); + if (successullySaved) { + showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); + } else { + showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); + } + } catch (UnsupportedEncodingException e1) { + showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); + logger.error("Error while saving", e1); + } catch (OutOfMemoryError e1) { + showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); + logger.error("Out of memory error", e1); + } + ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); + } + + + }); + + task.setOnFailed(e -> { + showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); + logger.error("Error while executing", e); + ngramProgressBar.progressProperty().unbind(); + ngramProgressBar.setProgress(0.0); +// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); + }); + + task.setOnCancelled(e -> { + showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); + ngramProgressBar.progressProperty().unbind(); + ngramProgressBar.setProgress(0.0); +// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); + }); + + // When cancel button is pressed cancel analysis + cancel.setOnAction(e -> { + task.cancel(); + logger.info("cancel button"); + }); + + return task; + } + + public final javafx.concurrent.Task prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) { + Collection corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles(); + + final javafx.concurrent.Task task = new javafx.concurrent.Task() { + @SuppressWarnings("Duplicates") + @Override + protected Void call() throws Exception { + final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); + if(multipleFiles){ + cancel.setVisible(true); + } +// int i = corpusFiles.size(); + Date startTime = new Date(); + Date previousTime = new Date(); + int remainingSeconds = -1; +// int corpusSize; +// if (statistic.getFilter().getCollocability().size() > 0) { +// corpusSize = corpusFiles.size() * 2; +// } else { +// corpusSize = corpusFiles.size(); +// } + + + int corpusSize; + int i; + int taskIndex = 0; + if(statistic.getFilter().getMinimalRelFre() > 1){ + i = corpusFiles.size() * 2; + corpusSize = corpusFiles.size() * 3; + } else { + i = corpusFiles.size(); + corpusSize = corpusFiles.size() * 2; + } + + + + for (File f : corpusFiles) { + final int iFinal = i; + XML_processing xml_processing = new XML_processing(); + i++; + taskIndex++; + if(xml_processing.progressBarListener != null) { + xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); + } + if (multipleFiles) { + if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ + remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000); + previousTime = new Date(); + } + this.updateProgress(i, corpusSize); + this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); +// if (isCancelled()) { +// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); +// break; +// } + } else { + xml_processing.progressBarListener = new InvalidationListener() { + int remainingSeconds = -1; + Date previousTime = new Date(); + @Override + public void invalidated(Observable observable) { + cancel.setVisible(true); + if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ + remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * + (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * + ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); +// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); +// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1))); +// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); +// System.out.println(remainingSeconds); + previousTime = new Date(); + } + xml_processing.isCancelled = isCancelled(); + updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); + updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); + } + }; + + xml_processing.progressProperty().addListener(xml_processing.progressBarListener); + } + xml_processing.isCollocability = true; + xml_processing.readXML(f.toString(), statisticsOneGrams); + xml_processing.isCollocability = false; + if (isCancelled()) { + updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); + break; + } +// readXML(f.toString(), statisticsOneGrams); +// i++; +// this.updateProgress(i, corpusFiles.size() * 2); +// if (statistic.getFilter().getCollocability().size() > 0) { +// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName())); +// } else { +// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); +// } + } + + return null; + } + }; + + ngramProgressBar.progressProperty().bind(task.progressProperty()); + progressLabel.textProperty().bind(task.messageProperty()); + + task.setOnSucceeded(e -> { + try { + System.out.print(statistic); +// calculate_collocabilities(statistic, statisticsOneGrams); + statistic.updateCalculateCollocabilities(statisticsOneGrams); + boolean successullySaved = statistic.saveResultToDisk(); + if (successullySaved) { + showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); + } else { + showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); + } + } catch (UnsupportedEncodingException e1) { + showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); + logger.error("Error while saving", e1); + } catch (OutOfMemoryError e1) { + showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); + logger.error("Out of memory error", e1); + } +// try { +// boolean successullySaved = statistic.saveResultToDisk(); +// if (successullySaved) { +// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED); +// } else { +// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS); +// } +// } catch (UnsupportedEncodingException e1) { +// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV); +// logger.error("Error while saving", e1); +// } catch (OutOfMemoryError e1){ +// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY); +// logger.error("Out of memory error", e1); +// } +// + ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); + }); + + task.setOnFailed(e -> { + showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); + logger.error("Error while executing", e); + ngramProgressBar.progressProperty().unbind(); + ngramProgressBar.setProgress(0.0); +// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); + }); + + task.setOnCancelled(e -> { + showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); + ngramProgressBar.progressProperty().unbind(); + ngramProgressBar.setProgress(0.0); +// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); + }); + + // When cancel button is pressed cancel analysis + cancel.setOnAction(e -> { + task.cancel(); +// logger.info("cancel button"); + }); + return task; + } +} diff --git a/src/main/resources/gui/OneWordAnalysisTab.fxml b/src/main/resources/gui/OneWordAnalysisTab.fxml index 15a040e..df069b7 100755 --- a/src/main/resources/gui/OneWordAnalysisTab.fxml +++ b/src/main/resources/gui/OneWordAnalysisTab.fxml @@ -99,9 +99,15 @@ +