Added filter delete words with lower frequency from output (large corpuses optimization)

This commit is contained in:
Luka 2019-02-27 10:14:40 +01:00
parent b8dee86c36
commit 82d111eade
20 changed files with 1670 additions and 561 deletions

View File

@ -6,6 +6,7 @@ import java.io.*;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.atomic.AtomicLong;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
@ -178,6 +179,26 @@ public class XML_processing {
// alg.inflectedJOS.ForkJoin wc = new alg.inflectedJOS.ForkJoin(corpus, stats);
// pool.invoke(wc);
}
// if running with minimalRelFre frequency erase all ngrams with occurrences lower than set value per 1M
if(stats.getFilter().getIsMinimalRelFreScraper()) {
// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
long countFor1MWords = stats.getUniGramOccurrences().get(stats.getCorpus().getTotal()).longValue();
if(countFor1MWords > 1000000L){
double absToRelFactor = (stats.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
stats.updateMinimalRelFre(stats.getTaxonomyResult().get(stats.getCorpus().getTotal()).entrySet(), absToRelFactor);
// reset all values
for(Taxonomy taxonomy : stats.getTaxonomyResult().keySet()){
stats.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>());
}
for(Taxonomy taxonomy : stats.getUniGramOccurrences().keySet()){
stats.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
}
}
// System.out.println("asd");
}
}
// public static void readXMLGos(String path, Statistics stats) {

View File

@ -29,6 +29,8 @@ public class Filter implements Cloneable {
NOTE_PUNCTUATIONS,
MINIMAL_OCCURRENCES,
MINIMAL_TAXONOMY,
MINIMAL_REL_FRE,
IS_MINIMAL_REL_FRE_SCRAPER,
TAXONOMY_SET_OPERATION,
COLLOCABILITY,
PREFIX_LENGTH,
@ -41,6 +43,7 @@ public class Filter implements Cloneable {
filter = new HashMap<>();
filter.put(WRITE_MSD_AT_THE_END, false);
filter.put(WORD_PARTS, new ArrayList<CalculateFor>());
filter.put(IS_MINIMAL_REL_FRE_SCRAPER, false);
}
public Filter(AnalysisLevel al, CalculateFor cf) {
@ -258,6 +261,24 @@ public class Filter implements Cloneable {
return (Integer) filter.get(MINIMAL_TAXONOMY);
}
public void setMinimalRelFre(Integer minimalRelFre) {
filter.put(MINIMAL_REL_FRE, minimalRelFre);
}
public Integer getMinimalRelFre() {
return (Integer) filter.get(MINIMAL_REL_FRE);
}
public void setIsMinimalRelFreScraper(boolean isMinimalRelFreScraper) {
filter.put(IS_MINIMAL_REL_FRE_SCRAPER, isMinimalRelFreScraper);
}
public boolean getIsMinimalRelFreScraper() {
return (boolean) filter.get(IS_MINIMAL_REL_FRE_SCRAPER);
}
// PREFIX_LENGTH,
// SUFFIX_LENGTH,
// PREFIX_LIST,

View File

@ -66,4 +66,6 @@ public interface MultipleHMKeys {
.thenComparing(MultipleHMKeys::getK5)
.compare(this, othr);
}
MultipleHMKeys[] splitNgramTo1grams();
}

View File

@ -36,4 +36,13 @@ public final class MultipleHMKeys1 implements MultipleHMKeys {
public boolean equals(Object obj) {
return (obj instanceof MultipleHMKeys1) && ((MultipleHMKeys1) obj).k1.equals(k1);
}
public MultipleHMKeys[] splitNgramTo1grams(){
String[] k1 = getK1().split(" ");
MultipleHMKeys[] res = new MultipleHMKeys[k1.length];
for(int i = 0; i < k1.length; i++){
res[i] = new MultipleHMKeys1(k1[i]);
}
return res;
}
}

View File

@ -46,4 +46,14 @@ public final class MultipleHMKeys2 implements MultipleHMKeys {
// return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key.equals(key);
}
public MultipleHMKeys[] splitNgramTo1grams(){
String[] k1 = getK1().split(" ");
String[] k2 = getK2().split(" ");
MultipleHMKeys[] res = new MultipleHMKeys[k1.length];
for(int i = 0; i < k1.length; i++){
res[i] = new MultipleHMKeys2(k1[i], k2[i]);
}
return res;
}
}

View File

@ -50,4 +50,15 @@ public final class MultipleHMKeys3 implements MultipleHMKeys {
&& ((MultipleHMKeys3) obj).k2.equals(k2)
&& ((MultipleHMKeys3) obj).k3.equals(k3);
}
public MultipleHMKeys[] splitNgramTo1grams(){
String[] k1 = getK1().split(" ");
String[] k2 = getK2().split(" ");
String[] k3 = getK3().split(" ");
MultipleHMKeys[] res = new MultipleHMKeys[k1.length];
for(int i = 0; i < k1.length; i++){
res[i] = new MultipleHMKeys3(k1[i], k2[i], k3[i]);
}
return res;
}
}

View File

@ -58,4 +58,16 @@ public final class MultipleHMKeys4 implements MultipleHMKeys {
&& ((MultipleHMKeys4) obj).k3.equals(k3)
&& ((MultipleHMKeys4) obj).k4.equals(k4);
}
public MultipleHMKeys[] splitNgramTo1grams(){
String[] k1 = getK1().split(" ");
String[] k2 = getK2().split(" ");
String[] k3 = getK3().split(" ");
String[] k4 = getK4().split(" ");
MultipleHMKeys[] res = new MultipleHMKeys[k1.length];
for(int i = 0; i < k1.length; i++){
res[i] = new MultipleHMKeys4(k1[i], k2[i], k3[i], k4[i]);
}
return res;
}
}

View File

@ -66,4 +66,17 @@ public final class MultipleHMKeys5 implements MultipleHMKeys {
&& ((MultipleHMKeys5) obj).k4.equals(k4)
&& ((MultipleHMKeys5) obj).k5.equals(k5);
}
public MultipleHMKeys[] splitNgramTo1grams(){
String[] k1 = getK1().split(" ");
String[] k2 = getK2().split(" ");
String[] k3 = getK3().split(" ");
String[] k4 = getK4().split(" ");
String[] k5 = getK5().split(" ");
MultipleHMKeys[] res = new MultipleHMKeys[k1.length];
for(int i = 0; i < k1.length; i++){
res[i] = new MultipleHMKeys5(k1[i], k2[i], k3[i], k4[i], k5[i]);
}
return res;
}
}

View File

@ -46,6 +46,9 @@ public class StatisticsNew {
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
private Map<Taxonomy, AtomicLong> uniGramTaxonomyOccurrences;
private HashSet<MultipleHMKeys> minimalRelFreNgrams;
private HashSet<MultipleHMKeys> minimalRelFre1grams;
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
this.corpus = corpus;
this.filter = filter;
@ -54,6 +57,9 @@ public class StatisticsNew {
this.collocability = new ConcurrentHashMap<>();
this.uniGramTaxonomyOccurrences = new ConcurrentHashMap<>();
this.uniGramTaxonomyOccurrences.put(corpus.getTotal(), new AtomicLong(0L));
this.minimalRelFreNgrams = new HashSet<>();
this.minimalRelFre1grams = new HashSet<>();
// create table for counting word occurrences per taxonomies
@ -373,6 +379,10 @@ public class StatisticsNew {
}
public void updateTaxonomyResults(MultipleHMKeys o, List<Taxonomy> taxonomy) {
if(minimalRelFreNgrams.size() > 0 && !filter.getIsMinimalRelFreScraper() && !(minimalRelFreNgrams.contains(o) || minimalRelFre1grams.contains(o))) {
return;
}
for (Taxonomy key : taxonomyResult.keySet()) {
// first word should have the same taxonomy as others
if (key.equals(corpus.getTotal()) || taxonomy.contains(key)) {
@ -472,6 +482,28 @@ public class StatisticsNew {
}
}
public HashSet<MultipleHMKeys> getMinimalRelFreNgrams() {
return minimalRelFreNgrams;
}
public HashSet<MultipleHMKeys> getMinimalRelFre1grams() {
return minimalRelFre1grams;
}
public void updateMinimalRelFre(HashSet<MultipleHMKeys> hsNgrams, HashSet<MultipleHMKeys> hs1grams) {
minimalRelFreNgrams = hsNgrams;
minimalRelFre1grams = hs1grams;
}
public void updateMinimalRelFre(Set<Map.Entry<MultipleHMKeys, AtomicLong>> entries, double absToRelFactor) {
for(Map.Entry<MultipleHMKeys, AtomicLong> entry : entries){
if(entry.getValue().longValue() >= absToRelFactor){
minimalRelFreNgrams.add(entry.getKey());
minimalRelFre1grams.addAll(Arrays.asList(entry.getKey().splitNgramTo1grams()));
}
}
}
private LinkedHashMap<String, String> headerInfoBlock() {
LinkedHashMap<String, String> info = new LinkedHashMap<>();

View File

@ -144,7 +144,7 @@ public class CorpusTab {
private String corpusLocation;
private String corpusFilesSize;
private static final String [] SELECT_READER_ARRAY = {"vert", "Solar", "GOS", "SSJ500K", "Gigafida", "Gigafida (old)"};
private static final String [] SELECT_READER_ARRAY = {"VERT + REGI", "XML (Šolar 1.0)", "XML (GOS 1.0)", "XML (ssj500k 2.1)", "XML (Gigafida 2.0)", "XML (Gigafida 1.0, Kres 1.0)"};
private static final ArrayList<String> SELECT_READER = new ArrayList<>(Arrays.asList(SELECT_READER_ARRAY));
private Collection<File> corpusFiles;
private File selectedDirectory;
@ -798,22 +798,22 @@ public class CorpusTab {
private void selectReader() {
switch (selectReader) {
// "vert", "Solar", "GOS", "SSJ500K", "Gigafida", "Gigafida (old)", "Kres (old)"
case "vert":
case "VERT + REGI":
corpusType = VERT;
break;
case "Solar":
case "XML (Šolar 1.0)":
corpusType = SOLAR;
break;
case "GOS":
case "XML (GOS 1.0)":
corpusType = GOS;
break;
case "SSJ500K":
case "XML (ssj500k 2.1)":
corpusType = SSJ500K;
break;
case "Gigafida":
case "XML (Gigafida 2.0)":
corpusType = GIGAFIDA2;
break;
case "Gigafida (old)":
case "XML (Gigafida 1.0, Kres 1.0)":
corpusType = GIGAFIDA;
break;
default:

View File

@ -182,7 +182,7 @@ public class GUIController extends Application {
alert.showAndWait();
}
static void showAlert(Alert.AlertType alertType, String headerText) {
public static void showAlert(Alert.AlertType alertType, String headerText) {
showAlert(alertType, headerText, null);
}
}

View File

@ -21,6 +21,8 @@ import org.apache.logging.log4j.Logger;
import org.controlsfx.control.CheckComboBox;
import javafx.scene.image.ImageView;
import util.Tasks;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.*;
@ -74,6 +76,9 @@ public class OneWordAnalysisTab {
@FXML
public Label minimalTaxonomyL;
@FXML
public Label minimalRelFreL;
@FXML
public Label taxonomySetOperationL;
@ -104,6 +109,9 @@ public class OneWordAnalysisTab {
@FXML
public ImageView minimalTaxonomyI;
@FXML
public ImageView minimalRelFreI;
@FXML
public ImageView taxonomySetOperationI;
@ -144,6 +152,10 @@ public class OneWordAnalysisTab {
private TextField minimalTaxonomyTF;
private Integer minimalTaxonomy;
@FXML
private TextField minimalRelFreTF;
private Integer minimalRelFre;
@FXML
private ComboBox<String> taxonomySetOperationCB;
private String taxonomySetOperation;
@ -559,6 +571,29 @@ public class OneWordAnalysisTab {
}
});
// set default values
minimalRelFreTF.setText("1");
minimalRelFre = 1;
minimalRelFreTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalRelFreTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalRelFreTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
} else {
minimalRelFre = Integer.parseInt(value);
}
} else {
minimalRelFreTF.setText("1");
minimalRelFre = 1;
}
}
});
changeLanguageB.setOnAction(e -> {
if (I18N.getLocale() == new Locale.Builder().setLanguage("sl").setRegion("SI").build()){
I18N.setLocale(Locale.ENGLISH);
@ -680,6 +715,7 @@ public class OneWordAnalysisTab {
taxonomyL.textProperty().bind(I18N.createStringBinding("label.taxonomy"));
minimalOccurrencesL.textProperty().bind(I18N.createStringBinding("label.minimalOccurrences"));
minimalTaxonomyL.textProperty().bind(I18N.createStringBinding("label.minimalTaxonomy"));
minimalRelFreL.textProperty().bind(I18N.createStringBinding("label.minimalRelFre"));
solarFilters.textProperty().bind(I18N.createStringBinding("label.solarFilters"));
taxonomySetOperationL.textProperty().bind(I18N.createStringBinding("label.taxonomySetOperation"));
@ -693,6 +729,7 @@ public class OneWordAnalysisTab {
addTooltipToImage(taxonomyI, I18N.createStringBinding("label.word.taxonomyH"));
addTooltipToImage(minimalOccurrencesI, I18N.createStringBinding("label.word.minimalOccurrencesH"));
addTooltipToImage(minimalTaxonomyI, I18N.createStringBinding("label.word.minimalTaxonomyH"));
addTooltipToImage(minimalRelFreI, I18N.createStringBinding("label.wordPart.minimalRelFreH"));
addTooltipToImage(taxonomySetOperationI, I18N.createStringBinding("label.letter.taxonomySetOperationH"));
taxonomySetOperationCB.itemsProperty().bind(I18N.createObjectBinding(TAXONOMY_SET_OPERATION));
@ -750,6 +787,7 @@ public class OneWordAnalysisTab {
filter.setMsd(msd);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
filter.setMinimalRelFre(minimalRelFre);
filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd);
filter.setTaxonomySetOperation(taxonomySetOperation);
@ -803,123 +841,138 @@ public class OneWordAnalysisTab {
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
if(multipleFiles){
cancel.setVisible(true);
}
int i = 0;
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
for (File f : corpusFiles) {
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
xml_processing.isCancelled = false;
i++;
if(xml_processing.progressBarListener != null) {
xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
}
if (multipleFiles) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// final Task<Void> task = new Task<Void>() {
// @SuppressWarnings("Duplicates")
// @Override
// protected Void call() throws Exception {
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
// if(multipleFiles){
// cancel.setVisible(true);
// }
// int i = 0;
// Date startTime = new Date();
// Date previousTime = new Date();
// int remainingSeconds = -1;
// for (File f : corpusFiles) {
// final int iFinal = i;
// XML_processing xml_processing = new XML_processing();
// xml_processing.isCancelled = false;
// i++;
// if(xml_processing.progressBarListener != null) {
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
// }
// if (multipleFiles) {
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
// previousTime = new Date();
// }
} else {
// this.updateProgress(i, corpusFiles.size());
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
//// if (isCancelled()) {
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
//// break;
//// }
// } else {
//
// xml_processing.progressBarListener = new InvalidationListener() {
// int remainingSeconds = -1;
// Date previousTime = new Date();
// @Override
// public void invalidated(Observable observable) {
// cancel.setVisible(true);
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
// ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// previousTime = new Date();
// }
// xml_processing.isCancelled = isCancelled();
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
// }
// };
//
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
// }
// xml_processing.readXML(f.toString(), statistic);
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// }
//
// return null;
// }
// };
//
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
//
// task.setOnSucceeded(e -> {
// try {
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
// } else {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
// logger.error("Error while saving", e1);
// }
//
// ngramProgressBar.progressProperty().unbind();
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// // When cancel button is pressed cancel analysis
// cancel.setOnAction(e -> {
// task.cancel();
// logger.info("cancel button");
// });
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
}
};
// final Thread thread = new Thread(task, "task");
// thread.setDaemon(true);
// thread.start();
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
}
xml_processing.readXML(f.toString(), statistic);
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
try {
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
} else {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
logger.error("Error while saving", e1);
}
ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
logger.info("cancel button");
});
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
if (statistic.getFilter().getMinimalRelFre() > 1){
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
// final Task<Void> mainTask = prepareTaskForMinRelFre(statistic);
final Thread thread = new Thread(mainTask, "task");
thread.setDaemon(true);
thread.start();
} else {
final Task<Void> mainTask = t.prepareMainTask(statistic);
// final Task<Void> mainTask = prepareMainTask(statistic);
final Thread thread = new Thread(mainTask, "task");
thread.setDaemon(true);
thread.start();
}
}
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {

View File

@ -6,6 +6,8 @@ import static gui.GUIController.*;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import alg.XML_processing;
@ -31,6 +33,7 @@ import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.control.*;
import javafx.scene.layout.Pane;
import util.Tasks;
@SuppressWarnings("Duplicates")
public class StringAnalysisTabNew2 {
@ -69,6 +72,9 @@ public class StringAnalysisTabNew2 {
@FXML
public Label minimalTaxonomyL;
@FXML
public Label minimalRelFreL;
@FXML
public Label taxonomySetOperationL;
@ -111,6 +117,9 @@ public class StringAnalysisTabNew2 {
@FXML
public ImageView minimalTaxonomyI;
@FXML
public ImageView minimalRelFreI;
@FXML
public ImageView taxonomySetOperationI;
@ -179,6 +188,10 @@ public class StringAnalysisTabNew2 {
private TextField minimalTaxonomyTF;
private Integer minimalTaxonomy;
@FXML
private TextField minimalRelFreTF;
private Integer minimalRelFre;
@FXML
private ComboBox<String> taxonomySetOperationCB;
private String taxonomySetOperation;
@ -685,6 +698,29 @@ public class StringAnalysisTabNew2 {
}
});
// set default values
minimalRelFreTF.setText("1");
minimalRelFre = 1;
minimalRelFreTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalRelFreTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalRelFreTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
} else {
minimalRelFre = Integer.parseInt(value);
}
} else {
minimalRelFreTF.setText("1");
minimalRelFre = 1;
}
}
});
changeLanguageB.setOnAction(e -> {
if (I18N.getLocale() == new Locale.Builder().setLanguage("sl").setRegion("SI").build()){
I18N.setLocale(Locale.ENGLISH);
@ -836,6 +872,7 @@ public class StringAnalysisTabNew2 {
taxonomyL.textProperty().bind(I18N.createStringBinding("label.taxonomy"));
minimalOccurrencesL.textProperty().bind(I18N.createStringBinding("label.minimalOccurrences"));
minimalTaxonomyL.textProperty().bind(I18N.createStringBinding("label.minimalTaxonomy"));
minimalRelFreL.textProperty().bind(I18N.createStringBinding("label.minimalRelFre"));
taxonomySetOperationL.textProperty().bind(I18N.createStringBinding("label.taxonomySetOperation"));
solarFilters.textProperty().bind(I18N.createStringBinding("label.solarFilters"));
@ -851,6 +888,7 @@ public class StringAnalysisTabNew2 {
addTooltipToImage(taxonomyI, I18N.createStringBinding("label.wordSet.taxonomyH"));
addTooltipToImage(minimalOccurrencesI, I18N.createStringBinding("label.wordSet.minimalOccurrencesH"));
addTooltipToImage(minimalTaxonomyI, I18N.createStringBinding("label.wordSet.minimalTaxonomyH"));
addTooltipToImage(minimalRelFreI, I18N.createStringBinding("label.wordPart.minimalRelFreH"));
addTooltipToImage(taxonomySetOperationI, I18N.createStringBinding("label.letter.taxonomySetOperationH"));
taxonomySetOperationCB.itemsProperty().bind(I18N.createObjectBinding(TAXONOMY_SET_OPERATION));
@ -912,6 +950,7 @@ public class StringAnalysisTabNew2 {
filter.setMsd(msd);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
filter.setMinimalRelFre(minimalRelFre);
filter.setCollocability(collocability);
filter.setTaxonomySetOperation(taxonomySetOperation);
@ -970,332 +1009,560 @@ public class StringAnalysisTabNew2 {
//
// }
private final Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
if(multipleFiles){
cancel.setVisible(true);
}
int i = corpusFiles.size();
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
int corpusSize;
if (statistic.getFilter().getCollocability().size() > 0) {
corpusSize = corpusFiles.size() * 2;
} else {
corpusSize = corpusFiles.size();
}
for (File f : corpusFiles) {
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
i++;
if(xml_processing.progressBarListener != null) {
xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
}
if (multipleFiles) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
} else {
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)));
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
// System.out.println(remainingSeconds);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
}
xml_processing.isCollocability = true;
xml_processing.readXML(f.toString(), statisticsOneGrams);
xml_processing.isCollocability = false;
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
// readXML(f.toString(), statisticsOneGrams);
// i++;
// this.updateProgress(i, corpusFiles.size() * 2);
// if (statistic.getFilter().getCollocability().size() > 0) {
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
// } else {
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
// }
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
try {
System.out.print(statistic);
// calculate_collocabilities(statistic, statisticsOneGrams);
statistic.updateCalculateCollocabilities(statisticsOneGrams);
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
} else {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
logger.error("Out of memory error", e1);
}
// try {
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
// } else {
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
// logger.error("Error while saving", e1);
// } catch (OutOfMemoryError e1){
// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
// logger.error("Out of memory error", e1);
// }
//
ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
// logger.info("cancel button");
});
return task;
}
private void execute(StatisticsNew statistic) {
Filter f = statistic.getFilter();
logger.info("Started execution: ", f);
// private final Task<Void> prepareTaskForMinRelFre(StatisticsNew statistic) {
// Filter f = statistic.getFilter();
// logger.info("Started execution: ", f);
// Task<Void> task_collocability = null;
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
if(multipleFiles){
cancel.setVisible(true);
}
int i = 0;
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
int corpusSize;
if (statistic.getFilter().getCollocability().size() > 0) {
corpusSize = corpusFiles.size() * 2;
} else {
corpusSize = corpusFiles.size();
}
for (File f : corpusFiles) {
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
xml_processing.isCancelled = false;
i++;
if(xml_processing.progressBarListener != null) {
xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
}
if (multipleFiles) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
} else {
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
// System.out.println(remainingSeconds);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
}
xml_processing.readXML(f.toString(), statistic);
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
if(!(multipleFiles)){
cancel.setVisible(false);
}
// readXML(f.toString(), statistic);
//
// try{
// Filter f2 = (Filter) f.clone();
// f2.setIsMinimalRelFreScraper(true);
// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb);
//
//
//// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f, useDb);
//
// Collection<File> corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles();
//
// final Task<Void> task = new Task<Void>() {
// @SuppressWarnings("Duplicates")
// @Override
// protected Void call() throws Exception {
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statisticsMinRelFre.getCorpus().getCorpusType());
// if(multipleFiles){
// cancel.setVisible(true);
// }
// Date startTime = new Date();
// Date previousTime = new Date();
// int remainingSeconds = -1;
// int corpusSize;
// int i;
// if(statistic.getFilter().getCollocability().size() > 0){
// i = 0;
// corpusSize = corpusFiles.size() * 3;
// } else {
// i = 0;
// corpusSize = corpusFiles.size() * 2;
// }
// for (File f : corpusFiles) {
// final int iFinal = i;
// XML_processing xml_processing = new XML_processing();
// xml_processing.isCancelled = false;
// i++;
// if(xml_processing.progressBarListener != null) {
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
// }
// if (multipleFiles) {
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
// previousTime = new Date();
// }
// this.updateProgress(i, corpusSize);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
//// if (isCancelled()) {
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
//// break;
//// }
// } else {
// xml_processing.progressBarListener = new InvalidationListener() {
// int remainingSeconds = -1;
// Date previousTime = new Date();
// @Override
// public void invalidated(Observable observable) {
// cancel.setVisible(true);
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
//// System.out.println(remainingSeconds);
// previousTime = new Date();
// }
// xml_processing.isCancelled = isCancelled();
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
// }
// };
//
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
// }
// xml_processing.readXML(f.toString(), statisticsMinRelFre);
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// if (statistic.getFilter().getCollocability().size() > 0) {
// this.updateProgress(i, corpusFiles.size() * 2);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
// } else {
// this.updateProgress(i, corpusFiles.size());
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
// if(!(multipleFiles)){
// cancel.setVisible(false);
// }
//// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
}
// }
//
// // add remaining minRelFre results
// if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) {
//// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
// long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue();
// double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
//
// statisticsMinRelFre.updateMinimalRelFre(statisticsMinRelFre.getTaxonomyResult().get(statisticsMinRelFre.getCorpus().getTotal()).entrySet(), absToRelFactor);
//
// // reset all values
// for(Taxonomy taxonomy : statisticsMinRelFre.getTaxonomyResult().keySet()){
// statisticsMinRelFre.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>());
// }
// for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){
// statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
// }
//
//// System.out.println("asd");
// }
//
// return null;
// }
// };
//
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
// task.setOnSucceeded(e -> {
// statistic.updateMinimalRelFre(statisticsMinRelFre.getMinimalRelFreNgrams(), statisticsMinRelFre.getMinimalRelFre1grams());
// final Task<Void> taskCollocability = prepareMainTask(statistic);
// final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
// thread_collocability.setDaemon(true);
// thread_collocability.start();
// });
//
// task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
// // ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
// // ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// // When cancel button is pressed cancel analysis
// cancel.setOnAction(e -> {
// task.cancel();
// logger.info("cancel button");
// });
//
// return task;
// }catch(CloneNotSupportedException c){ return null; }
// }
//
// private final Task<Void> prepareMainTask(StatisticsNew statistic) {
// Filter f = statistic.getFilter();
// logger.info("Started execution: ", f);
// Task<Void> task_collocability = null;
//
// Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
//
// final Task<Void> task = new Task<Void>() {
// @SuppressWarnings("Duplicates")
// @Override
// protected Void call() throws Exception {
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
// if(multipleFiles){
// cancel.setVisible(true);
// }
//
//
//// int i = corpusFiles.size();
//// Date startTime = new Date();
//// Date previousTime = new Date();
//// int remainingSeconds = -1;
//// int corpusSize;
//// if (statistic.getFilter().getCollocability().size() > 0) {
//// corpusSize = corpusFiles.size() * 2;
//// } else {
//// corpusSize = corpusFiles.size();
//// }
//
// Date startTime = new Date();
// Date previousTime = new Date();
// int remainingSeconds = -1;
// int corpusSize;
// int i;
// int taskIndex = 0;
// if(statistic.getFilter().getCollocability().size() > 0 && statistic.getFilter().getMinimalRelFre() > 1){
// i = corpusFiles.size();
// corpusSize = corpusFiles.size() * 3;
// } else if (statistic.getFilter().getMinimalRelFre() > 1) {
// i = corpusFiles.size();
// corpusSize = corpusFiles.size() * 2;
// } else if (statistic.getFilter().getCollocability().size() > 0) {
// i = 0;
// corpusSize = corpusFiles.size() * 2;
// } else {
// i = 0;
// corpusSize = corpusFiles.size();
// }
// for (File f : corpusFiles) {
// final int iFinal = i;
// XML_processing xml_processing = new XML_processing();
// xml_processing.isCancelled = false;
// i++;
// taskIndex++;
// if(xml_processing.progressBarListener != null) {
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
// }
// if (multipleFiles) {
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
// previousTime = new Date();
// }
// this.updateProgress(i, corpusSize);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
//
//// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
//// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
//// previousTime = new Date();
//// }
//// this.updateProgress(i, corpusSize);
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
//
// } else {
// xml_processing.progressBarListener = new InvalidationListener() {
// int remainingSeconds = -1;
// Date previousTime = new Date();
// @Override
// public void invalidated(Observable observable) {
// cancel.setVisible(true);
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
//// System.out.println(remainingSeconds);
// previousTime = new Date();
// }
// xml_processing.isCancelled = isCancelled();
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
// }
// };
//
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
// }
// xml_processing.readXML(f.toString(), statistic);
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// if(!(multipleFiles)){
// cancel.setVisible(false);
// }
//// readXML(f.toString(), statistic);
//// i++;
//// if (isCancelled()) {
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
//// break;
//// }
//// if (statistic.getFilter().getCollocability().size() > 0) {
//// this.updateProgress(i, corpusFiles.size() * 2);
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
//// } else {
//// this.updateProgress(i, corpusFiles.size());
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
//// }
////// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
// }
// // if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing
// if (statistic.getFilter().getMinimalRelFre() > 1){
//// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
// long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue();
// double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
//
//
// for(Map.Entry<MultipleHMKeys, AtomicLong> entry : statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet()){
// if(entry.getValue().longValue() < absToRelFactor){
// statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).remove(entry.getKey());
// }
// }
// statistic.updateMinimalRelFre(statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet(), absToRelFactor);
// }
//
// return null;
// }
// };
//
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
// task.setOnSucceeded(e -> {
// if (f.getCollocability().size() > 0) {
// try{
// Filter f2 = (Filter) f.clone();
// f2.setNgramValue(1);
// StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
// final Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
// final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
// thread_collocability.setDaemon(true);
// thread_collocability.start();
// }catch(CloneNotSupportedException c){}
//
//
//
// } else {
// try {
//// System.out.print(statistics);
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
// } else {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
// logger.error("Error while saving", e1);
// } catch (OutOfMemoryError e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
// logger.error("Out of memory error", e1);
// }
// ngramProgressBar.progressProperty().unbind();
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// }
//
//
// });
//
// task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// // When cancel button is pressed cancel analysis
// cancel.setOnAction(e -> {
// task.cancel();
// logger.info("cancel button");
// });
//
// return task;
// }
//
// private final Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
// Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
//
// final Task<Void> task = new Task<Void>() {
// @SuppressWarnings("Duplicates")
// @Override
// protected Void call() throws Exception {
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
// if(multipleFiles){
// cancel.setVisible(true);
// }
//// int i = corpusFiles.size();
// Date startTime = new Date();
// Date previousTime = new Date();
// int remainingSeconds = -1;
//// int corpusSize;
//// if (statistic.getFilter().getCollocability().size() > 0) {
//// corpusSize = corpusFiles.size() * 2;
//// } else {
//// corpusSize = corpusFiles.size();
//// }
//
//
// int corpusSize;
// int i;
// int taskIndex = 0;
// if(statistic.getFilter().getMinimalRelFre() > 1){
// i = corpusFiles.size() * 2;
// corpusSize = corpusFiles.size() * 3;
// } else {
// i = corpusFiles.size();
// corpusSize = corpusFiles.size() * 2;
// }
//
//
//
// for (File f : corpusFiles) {
// final int iFinal = i;
// XML_processing xml_processing = new XML_processing();
// i++;
// taskIndex++;
// if(xml_processing.progressBarListener != null) {
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
// }
// if (multipleFiles) {
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
// previousTime = new Date();
// }
// this.updateProgress(i, corpusSize);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
//// if (isCancelled()) {
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
//// break;
//// }
// } else {
// xml_processing.progressBarListener = new InvalidationListener() {
// int remainingSeconds = -1;
// Date previousTime = new Date();
// @Override
// public void invalidated(Observable observable) {
// cancel.setVisible(true);
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)));
//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
//// System.out.println(remainingSeconds);
// previousTime = new Date();
// }
// xml_processing.isCancelled = isCancelled();
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
// }
// };
//
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
// }
// xml_processing.isCollocability = true;
// xml_processing.readXML(f.toString(), statisticsOneGrams);
// xml_processing.isCollocability = false;
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
//// readXML(f.toString(), statisticsOneGrams);
//// i++;
//// this.updateProgress(i, corpusFiles.size() * 2);
//// if (statistic.getFilter().getCollocability().size() > 0) {
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
//// } else {
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
//// }
// }
//
// return null;
// }
// };
//
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
//
// task.setOnSucceeded(e -> {
// try {
// System.out.print(statistic);
//// calculate_collocabilities(statistic, statisticsOneGrams);
// statistic.updateCalculateCollocabilities(statisticsOneGrams);
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
// } else {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
// logger.error("Error while saving", e1);
// } catch (OutOfMemoryError e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
// logger.error("Out of memory error", e1);
// }
//// try {
//// boolean successullySaved = statistic.saveResultToDisk();
//// if (successullySaved) {
//// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
//// } else {
//// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
//// }
//// } catch (UnsupportedEncodingException e1) {
//// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
//// logger.error("Error while saving", e1);
//// } catch (OutOfMemoryError e1){
//// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
//// logger.error("Out of memory error", e1);
//// }
////
// ngramProgressBar.progressProperty().unbind();
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// // When cancel button is pressed cancel analysis
// cancel.setOnAction(e -> {
// task.cancel();
//// logger.info("cancel button");
// });
// return task;
// }
return null;
}
};
private void execute(StatisticsNew statistic) {
Filter f = statistic.getFilter();
logger.info("Started execution: ", f);
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
if (f.getCollocability().size() > 0) {
try{
Filter f2 = (Filter) f.clone();
f2.setNgramValue(1);
StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
final Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
thread_collocability.setDaemon(true);
thread_collocability.start();
}catch(CloneNotSupportedException c){}
} else {
try {
// System.out.print(statistics);
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
} else {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
logger.error("Out of memory error", e1);
}
ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
}
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
logger.info("cancel button");
});
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
if (f.getMinimalRelFre() > 1){
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
// final Task<Void> mainTask = prepareTaskForMinRelFre(statistic);
final Thread thread = new Thread(mainTask, "task");
thread.setDaemon(true);
thread.start();
} else {
final Task<Void> mainTask = t.prepareMainTask(statistic);
// final Task<Void> mainTask = prepareMainTask(statistic);
final Thread thread = new Thread(mainTask, "task");
thread.setDaemon(true);
thread.start();
}
}
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {

View File

@ -20,6 +20,7 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.controlsfx.control.CheckComboBox;
import util.Tasks;
import java.io.File;
import java.io.UnsupportedEncodingException;
@ -85,6 +86,9 @@ public class WordLevelTab {
@FXML
public Label minimalTaxonomyL;
@FXML
public Label minimalRelFreL;
@FXML
public Label taxonomySetOperationL;
@ -122,6 +126,9 @@ public class WordLevelTab {
@FXML
public ImageView minimalTaxonomyI;
@FXML
public ImageView minimalRelFreI;
@FXML
public ImageView taxonomySetOperationI;
@ -174,6 +181,10 @@ public class WordLevelTab {
private TextField minimalTaxonomyTF;
private Integer minimalTaxonomy;
@FXML
private TextField minimalRelFreTF;
private Integer minimalRelFre;
@FXML
private ComboBox<String> taxonomySetOperationCB;
private String taxonomySetOperation;
@ -669,6 +680,29 @@ public class WordLevelTab {
}
});
// set default values
minimalRelFreTF.setText("1");
minimalRelFre = 1;
minimalRelFreTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalRelFreTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalRelFreTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
} else {
minimalRelFre = Integer.parseInt(value);
}
} else {
minimalRelFreTF.setText("1");
minimalRelFre = 1;
}
}
});
changeLanguageB.setOnAction(e -> {
if (I18N.getLocale() == new Locale.Builder().setLanguage("sl").setRegion("SI").build()){
I18N.setLocale(Locale.ENGLISH);
@ -798,6 +832,7 @@ public class WordLevelTab {
taxonomyL.textProperty().bind(I18N.createStringBinding("label.taxonomy"));
minimalOccurrencesL.textProperty().bind(I18N.createStringBinding("label.minimalOccurrences"));
minimalTaxonomyL.textProperty().bind(I18N.createStringBinding("label.minimalTaxonomy"));
minimalRelFreL.textProperty().bind(I18N.createStringBinding("label.minimalRelFre"));
solarFilters.textProperty().bind(I18N.createStringBinding("label.solarFilters"));
taxonomySetOperationL.textProperty().bind(I18N.createStringBinding("label.taxonomySetOperation"));
@ -814,6 +849,7 @@ public class WordLevelTab {
addTooltipToImage(taxonomyI, I18N.createStringBinding("label.wordPart.taxonomyH"));
addTooltipToImage(minimalOccurrencesI, I18N.createStringBinding("label.wordPart.minimalOccurrencesH"));
addTooltipToImage(minimalTaxonomyI, I18N.createStringBinding("label.wordPart.minimalTaxonomyH"));
addTooltipToImage(minimalRelFreI, I18N.createStringBinding("label.wordPart.minimalRelFreH"));
addTooltipToImage(taxonomySetOperationI, I18N.createStringBinding("label.letter.taxonomySetOperationH"));
taxonomySetOperationCB.itemsProperty().bind(I18N.createObjectBinding(TAXONOMY_SET_OPERATION));
@ -873,6 +909,7 @@ public class WordLevelTab {
filter.setMsd(msd);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
filter.setMinimalRelFre(minimalRelFre);
filter.setPrefixLength(prefixLength);
filter.setSuffixLength(suffixLength);
filter.setPrefixList(prefixList);
@ -930,122 +967,136 @@ public class WordLevelTab {
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
if(multipleFiles){
cancel.setVisible(true);
}
int i = 0;
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
for (File f : corpusFiles) {
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
xml_processing.isCancelled = false;
i++;
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
if(xml_processing.progressBarListener != null) {
xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
}
if (multipleFiles) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
} else {
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
}
xml_processing.readXML(f.toString(), statistic);
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
try {
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
} else {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
logger.error("Error while saving", e1);
}
ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
logger.info("cancel button");
});
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
// final Task<Void> task = new Task<Void>() {
// @SuppressWarnings("Duplicates")
// @Override
// protected Void call() throws Exception {
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
// if(multipleFiles){
// cancel.setVisible(true);
// }
// int i = 0;
// Date startTime = new Date();
// Date previousTime = new Date();
// int remainingSeconds = -1;
// for (File f : corpusFiles) {
// final int iFinal = i;
// XML_processing xml_processing = new XML_processing();
// xml_processing.isCancelled = false;
// i++;
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// if(xml_processing.progressBarListener != null) {
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
// }
// if (multipleFiles) {
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
// previousTime = new Date();
// }
// this.updateProgress(i, corpusFiles.size());
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
// } else {
// xml_processing.progressBarListener = new InvalidationListener() {
// int remainingSeconds = -1;
// Date previousTime = new Date();
// @Override
// public void invalidated(Observable observable) {
// cancel.setVisible(true);
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
// ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// previousTime = new Date();
// }
// xml_processing.isCancelled = isCancelled();
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
// }
// };
//
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
// }
// xml_processing.readXML(f.toString(), statistic);
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// }
//
// return null;
// }
// };
//
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
//
// task.setOnSucceeded(e -> {
// try {
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
// } else {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
// logger.error("Error while saving", e1);
// }
//
// ngramProgressBar.progressProperty().unbind();
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
//
// // When cancel button is pressed cancel analysis
// cancel.setOnAction(e -> {
// task.cancel();
// logger.info("cancel button");
// });
//
// final Thread thread = new Thread(task, "task");
// thread.setDaemon(true);
// thread.start();
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
if (statistic.getFilter().getMinimalRelFre() > 1){
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
// final Task<Void> mainTask = prepareTaskForMinRelFre(statistic);
final Thread thread = new Thread(mainTask, "task");
thread.setDaemon(true);
thread.start();
} else {
final Task<Void> mainTask = t.prepareMainTask(statistic);
// final Task<Void> mainTask = prepareMainTask(statistic);
final Thread thread = new Thread(mainTask, "task");
thread.setDaemon(true);
thread.start();
}
}
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {

View File

@ -0,0 +1,585 @@
package util;
import alg.XML_processing;
import data.*;
import gui.I18N;
import gui.StringAnalysisTabNew2;
import javafx.beans.InvalidationListener;
import javafx.beans.Observable;
import javafx.beans.property.ReadOnlyDoubleWrapper;
import javafx.fxml.FXML;
import javafx.scene.control.Alert;
import javafx.scene.control.Button;
import javafx.scene.control.Label;
import javafx.scene.control.ProgressBar;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.Collection;
import java.util.Date;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import static gui.GUIController.showAlert;
public class Tasks {
public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class);
private Corpus corpus;
private boolean useDb;
@FXML
private Button cancel;
@FXML
public ProgressBar ngramProgressBar;
@FXML
public Label progressLabel;
public Tasks(Corpus corpus, boolean useDb, Button cancel, ProgressBar ngramProgressBar, Label progressLabel) {
this.corpus = corpus;
this.useDb = useDb;
this.cancel = cancel;
this.ngramProgressBar = ngramProgressBar;
this.progressLabel = progressLabel;
}
public final javafx.concurrent.Task<Void> prepareTaskForMinRelFre(StatisticsNew statistic) {
Filter f = statistic.getFilter();
logger.info("Started execution: ", f);
javafx.concurrent.Task<Void> task_collocability = null;
try{
Filter f2 = (Filter) f.clone();
f2.setIsMinimalRelFreScraper(true);
StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb);
// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f, useDb);
Collection<File> corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles();
final javafx.concurrent.Task<Void> task = new javafx.concurrent.Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statisticsMinRelFre.getCorpus().getCorpusType());
if(multipleFiles){
cancel.setVisible(true);
}
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
int corpusSize;
int i;
if(statistic.getFilter().getCollocability().size() > 0){
i = 0;
corpusSize = corpusFiles.size() * 3;
} else {
i = 0;
corpusSize = corpusFiles.size() * 2;
}
for (File f : corpusFiles) {
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
xml_processing.isCancelled = false;
i++;
if(xml_processing.progressBarListener != null) {
xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
}
if (multipleFiles) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
} else {
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
// System.out.println(remainingSeconds);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
}
xml_processing.readXML(f.toString(), statisticsMinRelFre);
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
if(!(multipleFiles)){
cancel.setVisible(false);
}
}
// add remaining minRelFre results
if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) {
// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue();
double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
statisticsMinRelFre.updateMinimalRelFre(statisticsMinRelFre.getTaxonomyResult().get(statisticsMinRelFre.getCorpus().getTotal()).entrySet(), absToRelFactor);
// reset all values
for(Taxonomy taxonomy : statisticsMinRelFre.getTaxonomyResult().keySet()){
statisticsMinRelFre.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>());
}
for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){
statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
}
// System.out.println("asd");
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
statistic.updateMinimalRelFre(statisticsMinRelFre.getMinimalRelFreNgrams(), statisticsMinRelFre.getMinimalRelFre1grams());
final javafx.concurrent.Task<Void> taskCollocability = prepareMainTask(statistic);
final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
thread_collocability.setDaemon(true);
thread_collocability.start();
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
logger.info("cancel button");
});
return task;
}catch(CloneNotSupportedException c){ return null; }
}
public final javafx.concurrent.Task<Void> prepareMainTask(StatisticsNew statistic) {
Filter f = statistic.getFilter();
logger.info("Started execution: ", f);
javafx.concurrent.Task<Void> task_collocability = null;
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
final javafx.concurrent.Task<Void> task = new javafx.concurrent.Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
if(multipleFiles){
cancel.setVisible(true);
}
// int i = corpusFiles.size();
// Date startTime = new Date();
// Date previousTime = new Date();
// int remainingSeconds = -1;
// int corpusSize;
// if (statistic.getFilter().getCollocability().size() > 0) {
// corpusSize = corpusFiles.size() * 2;
// } else {
// corpusSize = corpusFiles.size();
// }
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
int corpusSize;
int i;
int taskIndex = 0;
if(statistic.getFilter().getCollocability().size() > 0 && statistic.getFilter().getMinimalRelFre() > 1){
i = corpusFiles.size();
corpusSize = corpusFiles.size() * 3;
} else if (statistic.getFilter().getMinimalRelFre() > 1) {
i = corpusFiles.size();
corpusSize = corpusFiles.size() * 2;
} else if (statistic.getFilter().getCollocability().size() > 0) {
i = 0;
corpusSize = corpusFiles.size() * 2;
} else {
i = 0;
corpusSize = corpusFiles.size();
}
for (File f : corpusFiles) {
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
xml_processing.isCancelled = false;
i++;
taskIndex++;
if(xml_processing.progressBarListener != null) {
xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
}
if (multipleFiles) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
// previousTime = new Date();
// }
// this.updateProgress(i, corpusSize);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
} else {
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
// System.out.println(remainingSeconds);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
}
xml_processing.readXML(f.toString(), statistic);
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
if(!(multipleFiles)){
cancel.setVisible(false);
}
// readXML(f.toString(), statistic);
// i++;
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// if (statistic.getFilter().getCollocability().size() > 0) {
// this.updateProgress(i, corpusFiles.size() * 2);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
// } else {
// this.updateProgress(i, corpusFiles.size());
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
// }
//// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
}
// if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing
if (statistic.getFilter().getMinimalRelFre() > 1){
// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue();
double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
for(Map.Entry<MultipleHMKeys, AtomicLong> entry : statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet()){
if(entry.getValue().longValue() < absToRelFactor){
statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).remove(entry.getKey());
}
}
statistic.updateMinimalRelFre(statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet(), absToRelFactor);
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
if (f.getCollocability().size() > 0) {
try{
Filter f2 = (Filter) f.clone();
f2.setNgramValue(1);
StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
final javafx.concurrent.Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
thread_collocability.setDaemon(true);
thread_collocability.start();
}catch(CloneNotSupportedException c){}
} else {
try {
// System.out.print(statistics);
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
} else {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
logger.error("Out of memory error", e1);
}
ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
}
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
logger.info("cancel button");
});
return task;
}
public final javafx.concurrent.Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
final javafx.concurrent.Task<Void> task = new javafx.concurrent.Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
if(multipleFiles){
cancel.setVisible(true);
}
// int i = corpusFiles.size();
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
// int corpusSize;
// if (statistic.getFilter().getCollocability().size() > 0) {
// corpusSize = corpusFiles.size() * 2;
// } else {
// corpusSize = corpusFiles.size();
// }
int corpusSize;
int i;
int taskIndex = 0;
if(statistic.getFilter().getMinimalRelFre() > 1){
i = corpusFiles.size() * 2;
corpusSize = corpusFiles.size() * 3;
} else {
i = corpusFiles.size();
corpusSize = corpusFiles.size() * 2;
}
for (File f : corpusFiles) {
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
i++;
taskIndex++;
if(xml_processing.progressBarListener != null) {
xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
}
if (multipleFiles) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
} else {
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)));
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
// System.out.println(remainingSeconds);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
}
xml_processing.isCollocability = true;
xml_processing.readXML(f.toString(), statisticsOneGrams);
xml_processing.isCollocability = false;
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
// readXML(f.toString(), statisticsOneGrams);
// i++;
// this.updateProgress(i, corpusFiles.size() * 2);
// if (statistic.getFilter().getCollocability().size() > 0) {
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
// } else {
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
// }
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
try {
System.out.print(statistic);
// calculate_collocabilities(statistic, statisticsOneGrams);
statistic.updateCalculateCollocabilities(statisticsOneGrams);
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
} else {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
logger.error("Out of memory error", e1);
}
// try {
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
// } else {
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
// logger.error("Error while saving", e1);
// } catch (OutOfMemoryError e1){
// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
// logger.error("Out of memory error", e1);
// }
//
ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
// logger.info("cancel button");
});
return task;
}
}

View File

@ -99,9 +99,15 @@
<Image url="questionmark.png" backgroundLoading="true"/>
</ImageView>
<Label fx:id="minimalRelFreL" layoutX="10.0" layoutY="300.0" prefHeight="25.0" text="Min. rel. št. pojavitev" />
<TextField fx:id="minimalRelFreTF" layoutX="225.0" layoutY="300.0" prefWidth="140.0" />
<ImageView fx:id="minimalRelFreI" layoutX="370.0" layoutY="307.5" pickOnBounds="true" preserveRatio="true">
<Image url="questionmark.png" backgroundLoading="true"/>
</ImageView>
<Label fx:id="solarFilters" layoutX="10.0" layoutY="320.0" text="Izbrani filtri:" />
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="360.0" prefHeight="115.0" maxHeight="115.0" prefWidth="360.0" text=" " wrapText="true" editable="false"/>
<Label fx:id="solarFilters" layoutX="10.0" layoutY="340.0" text="Izbrani filtri:" />
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="380.0" prefHeight="95.0" maxHeight="95.0" prefWidth="360.0" text=" " wrapText="true" editable="false"/>
</Pane>
<!--<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">-->

View File

@ -141,9 +141,15 @@
<Image url="questionmark.png" backgroundLoading="true"/>
</ImageView>
<Label fx:id="minimalRelFreL" layoutX="10.0" layoutY="300.0" prefHeight="25.0" text="Min. rel. št. pojavitev" />
<TextField fx:id="minimalRelFreTF" layoutX="225.0" layoutY="300.0" prefWidth="140.0" />
<ImageView fx:id="minimalRelFreI" layoutX="370.0" layoutY="307.5" pickOnBounds="true" preserveRatio="true">
<Image url="questionmark.png" backgroundLoading="true"/>
</ImageView>
<Label fx:id="solarFilters" layoutX="10.0" layoutY="320.0" text="Izbrani filtri:" />
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="360.0" prefHeight="115.0" maxHeight="115.0" prefWidth="360.0" text=" " wrapText="true" editable="false"/>
<Label fx:id="solarFilters" layoutX="10.0" layoutY="340.0" text="Izbrani filtri:" />
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="380.0" prefHeight="95.0" maxHeight="95.0" prefWidth="360.0" text=" " wrapText="true" editable="false"/>
</Pane>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />

View File

@ -144,9 +144,15 @@
<Image url="questionmark.png" backgroundLoading="true"/>
</ImageView>
<Label fx:id="minimalRelFreL" layoutX="10.0" layoutY="300.0" prefHeight="25.0" text="Min. rel. št. pojavitev" />
<TextField fx:id="minimalRelFreTF" layoutX="225.0" layoutY="300.0" prefWidth="140.0" />
<ImageView fx:id="minimalRelFreI" layoutX="370.0" layoutY="307.5" pickOnBounds="true" preserveRatio="true">
<Image url="questionmark.png" backgroundLoading="true"/>
</ImageView>
<Label fx:id="solarFilters" layoutX="10.0" layoutY="320.0" text="Izbrani filtri:" />
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="360.0" prefHeight="115.0" maxHeight="115.0" prefWidth="360.0" text=" " wrapText="true" editable="false"/>
<Label fx:id="solarFilters" layoutX="10.0" layoutY="340.0" text="Izbrani filtri:" />
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="380.0" prefHeight="95.0" maxHeight="95.0" prefWidth="360.0" text=" " wrapText="true" editable="false"/>
</Pane>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />

View File

@ -39,6 +39,7 @@ label.msd=Morphosyntactic tag
label.taxonomy=Filter by taxonomy
label.minimalOccurrences=Min. nr. occurrences
label.minimalTaxonomy=Min. nr. tax. branches
label.minimalRelFre=Min. rel. frequency
label.taxonomySetOperation=Filtriraj taksonomijo po
label.solarFilters=Selected filters:
string.lemma=lemma
@ -73,6 +74,7 @@ label.wordPart.msdH=Word parts will only be counted in words with the specified
label.wordPart.taxonomyH=Word parts will only be counted in the selected text types.
label.wordPart.minimalOccurrencesH=Units with the specified word part that occur fewer times will not be included in the output.
label.wordPart.minimalTaxonomyH=Units with the specified word part that are present in fewer taxonomy branches will not be included in the output.
label.wordPart.minimalRelFreH=Minimal relative frequency per million occurrences.
# word tab
label.writeMsdAtTheEnd=Split the morphosyntactic tag

View File

@ -39,6 +39,7 @@ label.msd=Oblikoskladenjska oznaka
label.taxonomy=Filtriranje po taksonomiji
label.minimalOccurrences=Min. št. pojavitev
label.minimalTaxonomy=Min. št. taksonomskih vej
label.minimalRelFre=Min. rel. št. pojavitev
label.taxonomySetOperation=Filtriraj taksonomijo po
label.solarFilters=Izbrani filtri:
string.lemma=lema
@ -73,6 +74,7 @@ label.wordPart.msdH=Besedni deli bodo prešteti samo v besedah z določeno oznak
label.wordPart.taxonomyH=Besedni deli bodo prešteti samo v izbranih vrstah besedil.
label.wordPart.minimalOccurrencesH=Enote z iskanim besednim delom, ki se pojavijo redkeje, ne bodo vključene v izpis.
label.wordPart.minimalTaxonomyH=Enote z iskanim besednim delom, ki so prisotne v manj vejah, ne bodo vključene v izpis.
label.wordPart.minimalRelFreH=Minimalno relativno število pojavitev na milijon.
# word tab
label.writeMsdAtTheEnd=Razbij oblikoskladenjsko oznako