Added initial functionality for word parts implementation
This commit is contained in:
parent
f9ce74d7b8
commit
abc15360d3
|
@ -52,6 +52,10 @@ public class Ngrams {
|
||||||
// generate proper MultipleHMKeys depending on filter data
|
// generate proper MultipleHMKeys depending on filter data
|
||||||
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor(), stats.getFilter().getWordParts());
|
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor(), stats.getFilter().getWordParts());
|
||||||
|
|
||||||
|
if(key.length() < stats.getFilter().getPrefixLength() + stats.getFilter().getSuffixLength()){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// if last letter is ',' erase it
|
// if last letter is ',' erase it
|
||||||
|
|
||||||
// if (key.equals("")){
|
// if (key.equals("")){
|
||||||
|
|
|
@ -29,7 +29,11 @@ public class Filter implements Cloneable {
|
||||||
NOTE_PUNCTUATIONS,
|
NOTE_PUNCTUATIONS,
|
||||||
MINIMAL_OCCURRENCES,
|
MINIMAL_OCCURRENCES,
|
||||||
MINIMAL_TAXONOMY,
|
MINIMAL_TAXONOMY,
|
||||||
COLLOCABILITY
|
COLLOCABILITY,
|
||||||
|
PREFIX_LENGTH,
|
||||||
|
SUFFIX_LENGTH,
|
||||||
|
PREFIX_LIST,
|
||||||
|
SUFFIX_LIST
|
||||||
}
|
}
|
||||||
|
|
||||||
public Filter() {
|
public Filter() {
|
||||||
|
@ -245,6 +249,43 @@ public class Filter implements Cloneable {
|
||||||
return (Integer) filter.get(MINIMAL_TAXONOMY);
|
return (Integer) filter.get(MINIMAL_TAXONOMY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PREFIX_LENGTH,
|
||||||
|
// SUFFIX_LENGTH,
|
||||||
|
// PREFIX_LIST,
|
||||||
|
// SUFFIX_LIST
|
||||||
|
|
||||||
|
public void setPrefixLength(Integer v) {
|
||||||
|
filter.put(PREFIX_LENGTH, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getPrefixLength() {
|
||||||
|
return (Integer) filter.get(PREFIX_LENGTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSuffixLength(Integer v) {
|
||||||
|
filter.put(SUFFIX_LENGTH, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getSuffixLength() {
|
||||||
|
return (Integer) filter.get(SUFFIX_LENGTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPrefixList(ArrayList<String> v) {
|
||||||
|
filter.put(PREFIX_LIST, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<String> getPrefixList() {
|
||||||
|
return (ArrayList<String>) filter.get(PREFIX_LIST);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSuffixList(ArrayList<String> v) {
|
||||||
|
filter.put(SUFFIX_LIST, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<String> getSuffixList() {
|
||||||
|
return (ArrayList<String>) filter.get(SUFFIX_LIST);
|
||||||
|
}
|
||||||
|
|
||||||
private void addWordPart(CalculateFor wp){
|
private void addWordPart(CalculateFor wp){
|
||||||
ArrayList<CalculateFor> oldWp = ((ArrayList<CalculateFor>) filter.get(WORD_PARTS));
|
ArrayList<CalculateFor> oldWp = ((ArrayList<CalculateFor>) filter.get(WORD_PARTS));
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package data;
|
package data;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -12,6 +13,8 @@ public interface MultipleHMKeys {
|
||||||
default String getK4(){ return null; }
|
default String getK4(){ return null; }
|
||||||
default String getK5(){ return null; }
|
default String getK5(){ return null; }
|
||||||
|
|
||||||
|
default ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){ return null; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
int hashCode();
|
int hashCode();
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package data;
|
package data;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -16,6 +17,16 @@ public final class MultipleHMKeys1 implements MultipleHMKeys {
|
||||||
return k1;
|
return k1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
|
||||||
|
ArrayList<MultipleHMKeys> r = new ArrayList<>();
|
||||||
|
String[] splitedK1 = k1.split("\\s+");
|
||||||
|
for(int i = 0; i < splitedK1.length; i ++){
|
||||||
|
MultipleHMKeys search = new MultipleHMKeys1(splitedK1[i]);
|
||||||
|
r.add(search);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return k1.hashCode();
|
return k1.hashCode();
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package data;
|
package data;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -21,6 +22,17 @@ public final class MultipleHMKeys2 implements MultipleHMKeys {
|
||||||
return k2;
|
return k2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
|
||||||
|
ArrayList<MultipleHMKeys> r = new ArrayList<>();
|
||||||
|
String[] splitedK1 = k1.split("\\s+");
|
||||||
|
String[] splitedK2 = k2.split("\\s+");
|
||||||
|
for(int i = 0; i < splitedK1.length; i ++){
|
||||||
|
MultipleHMKeys search = new MultipleHMKeys2(splitedK1[i], splitedK2[i]);
|
||||||
|
r.add(search);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(k1, k2);
|
return Objects.hash(k1, k2);
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package data;
|
package data;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -26,6 +27,18 @@ public final class MultipleHMKeys3 implements MultipleHMKeys {
|
||||||
return k3;
|
return k3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
|
||||||
|
ArrayList<MultipleHMKeys> r = new ArrayList<>();
|
||||||
|
String[] splitedK1 = k1.split("\\s+");
|
||||||
|
String[] splitedK2 = k2.split("\\s+");
|
||||||
|
String[] splitedK3 = k3.split("\\s+");
|
||||||
|
for(int i = 0; i < splitedK1.length; i ++){
|
||||||
|
MultipleHMKeys search = new MultipleHMKeys3(splitedK1[i], splitedK2[i], splitedK3[i]);
|
||||||
|
r.add(search);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(k1, k2, k3);
|
return Objects.hash(k1, k2, k3);
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package data;
|
package data;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -31,6 +32,20 @@ public final class MultipleHMKeys4 implements MultipleHMKeys {
|
||||||
return k4;
|
return k4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
|
||||||
|
ArrayList<MultipleHMKeys> r = new ArrayList<>();
|
||||||
|
String[] splitedK1 = k1.split("\\s+");
|
||||||
|
String[] splitedK2 = k2.split("\\s+");
|
||||||
|
String[] splitedK3 = k3.split("\\s+");
|
||||||
|
String[] splitedK4 = k4.split("\\s+");
|
||||||
|
for(int i = 0; i < splitedK1.length; i ++){
|
||||||
|
MultipleHMKeys search = new MultipleHMKeys4(splitedK1[i], splitedK2[i], splitedK3[i], splitedK4[i]);
|
||||||
|
r.add(search);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(k1, k2, k3, k4);
|
return Objects.hash(k1, k2, k3, k4);
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package data;
|
package data;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -36,6 +37,22 @@ public final class MultipleHMKeys5 implements MultipleHMKeys {
|
||||||
return k5;
|
return k5;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
|
||||||
|
ArrayList<MultipleHMKeys> r = new ArrayList<>();
|
||||||
|
String[] splitedK1 = k1.split("\\s+");
|
||||||
|
String[] splitedK2 = k2.split("\\s+");
|
||||||
|
String[] splitedK3 = k3.split("\\s+");
|
||||||
|
String[] splitedK4 = k4.split("\\s+");
|
||||||
|
String[] splitedK5 = k5.split("\\s+");
|
||||||
|
for(int i = 0; i < splitedK1.length; i ++){
|
||||||
|
MultipleHMKeys search = new MultipleHMKeys5(splitedK1[i], splitedK2[i], splitedK3[i], splitedK4[i], splitedK5[i]);
|
||||||
|
r.add(search);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(k1, k2, k3, k4, k5);
|
return Objects.hash(k1, k2, k3, k4, k5);
|
||||||
|
|
|
@ -495,12 +495,13 @@ public class StatisticsNew {
|
||||||
Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
|
Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
|
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
|
||||||
String[] splitedString = hmKey.getK1().split("\\s+");
|
// String[] splitedString = hmKey.getK1().split("\\s+");
|
||||||
|
|
||||||
long sum_fwi =0L;
|
long sum_fwi =0L;
|
||||||
for(String s : splitedString){
|
|
||||||
MultipleHMKeys search = new MultipleHMKeys1(s);
|
for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){
|
||||||
sum_fwi += oneWordTaxonomyResult.get("Total").get(search).longValue();
|
System.out.println(smallHmKey.getK1());
|
||||||
|
sum_fwi += oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue();
|
||||||
}
|
}
|
||||||
double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
|
double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
|
||||||
collocabilityMap.put(hmKey, dice_value);
|
collocabilityMap.put(hmKey, dice_value);
|
||||||
|
|
|
@ -622,20 +622,20 @@ public class StringAnalysisTabNew2 {
|
||||||
@SuppressWarnings("Duplicates")
|
@SuppressWarnings("Duplicates")
|
||||||
@Override
|
@Override
|
||||||
protected Void call() throws Exception {
|
protected Void call() throws Exception {
|
||||||
long i = 0;
|
long i = corpusFiles.size();
|
||||||
for (File f : corpusFiles) {
|
for (File f : corpusFiles) {
|
||||||
readXML(f.toString(), statisticsOneGrams);
|
readXML(f.toString(), statisticsOneGrams);
|
||||||
// i++;
|
i++;
|
||||||
// this.updateProgress(i, corpusFiles.size());
|
this.updateProgress(i, corpusFiles.size() * 2);
|
||||||
// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||||
// progressLabel.textProperty().bind(task.messageProperty());
|
progressLabel.textProperty().bind(task.messageProperty());
|
||||||
|
|
||||||
task.setOnSucceeded(e -> {
|
task.setOnSucceeded(e -> {
|
||||||
System.out.print("test");
|
System.out.print("test");
|
||||||
|
@ -671,32 +671,32 @@ public class StringAnalysisTabNew2 {
|
||||||
// logger.error("Out of memory error", e1);
|
// logger.error("Out of memory error", e1);
|
||||||
// }
|
// }
|
||||||
//
|
//
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
// progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
// progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
// cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
task.setOnFailed(e -> {
|
task.setOnFailed(e -> {
|
||||||
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
|
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
|
||||||
// logger.error("Error while executing", e);
|
logger.error("Error while executing", e);
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
// ngramProgressBar.setProgress(0.0);
|
ngramProgressBar.setProgress(0.0);
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||||
// progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
// progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
// cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
task.setOnCancelled(e -> {
|
task.setOnCancelled(e -> {
|
||||||
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
|
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
// ngramProgressBar.setProgress(0.0);
|
ngramProgressBar.setProgress(0.0);
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
// progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
// progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
// cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
// When cancel button is pressed cancel analysis
|
// When cancel button is pressed cancel analysis
|
||||||
|
@ -729,9 +729,15 @@ public class StringAnalysisTabNew2 {
|
||||||
updateMessage(CANCELING_NOTIFICATION);
|
updateMessage(CANCELING_NOTIFICATION);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (statistic.getFilter().getCollocability().size() > 0) {
|
||||||
|
this.updateProgress(i, corpusFiles.size() * 2);
|
||||||
|
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
|
||||||
|
} else {
|
||||||
this.updateProgress(i, corpusFiles.size());
|
this.updateProgress(i, corpusFiles.size());
|
||||||
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
||||||
}
|
}
|
||||||
|
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -769,13 +775,14 @@ public class StringAnalysisTabNew2 {
|
||||||
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
||||||
logger.error("Out of memory error", e1);
|
logger.error("Out of memory error", e1);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
task.setOnFailed(e -> {
|
task.setOnFailed(e -> {
|
||||||
|
|
|
@ -1,45 +1,78 @@
|
||||||
package gui;
|
package gui;
|
||||||
|
|
||||||
import static alg.XML_processing.*;
|
import data.*;
|
||||||
import static gui.GUIController.*;
|
|
||||||
import static gui.Messages.*;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
|
|
||||||
import javafx.application.HostServices;
|
import javafx.application.HostServices;
|
||||||
|
import javafx.collections.FXCollections;
|
||||||
|
import javafx.collections.ListChangeListener;
|
||||||
|
import javafx.collections.ObservableList;
|
||||||
|
import javafx.concurrent.Task;
|
||||||
|
import javafx.fxml.FXML;
|
||||||
import javafx.scene.control.*;
|
import javafx.scene.control.*;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.controlsfx.control.CheckComboBox;
|
import org.controlsfx.control.CheckComboBox;
|
||||||
|
|
||||||
import data.*;
|
import java.io.File;
|
||||||
import javafx.collections.ListChangeListener;
|
import java.io.UnsupportedEncodingException;
|
||||||
import javafx.collections.ObservableList;
|
import java.util.*;
|
||||||
import javafx.concurrent.Task;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import javafx.fxml.FXML;
|
import java.util.regex.Pattern;
|
||||||
import javafx.scene.layout.AnchorPane;
|
|
||||||
|
import static alg.XML_processing.readXML;
|
||||||
|
import static gui.GUIController.showAlert;
|
||||||
|
import static gui.Messages.*;
|
||||||
|
|
||||||
@SuppressWarnings("Duplicates")
|
@SuppressWarnings("Duplicates")
|
||||||
public class WordLevelTab {
|
public class WordLevelTab {
|
||||||
public final static Logger logger = LogManager.getLogger(WordLevelTab.class);
|
public final static Logger logger = LogManager.getLogger(OneWordAnalysisTab.class);
|
||||||
|
|
||||||
public AnchorPane wordLevelAnalysisTabPane;
|
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
public Label selectedFiltersLabel;
|
public Label selectedFiltersLabel;
|
||||||
@FXML
|
@FXML
|
||||||
public Label solarFilters;
|
public Label solarFilters;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private TextField msdTF;
|
||||||
|
private ArrayList<Pattern> msd;
|
||||||
|
private ArrayList<String> msdStrings;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private CheckComboBox<String> alsoVisualizeCCB;
|
||||||
|
private ArrayList<String> alsoVisualize;
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private CheckComboBox<String> taxonomyCCB;
|
private CheckComboBox<String> taxonomyCCB;
|
||||||
private ArrayList<String> taxonomy;
|
private ArrayList<String> taxonomy;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private CheckBox displayTaxonomyChB;
|
||||||
|
private boolean displayTaxonomy;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private ComboBox<String> prefixLengthCB;
|
||||||
|
private Integer prefixLength;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private ComboBox<String> suffixLengthCB;
|
||||||
|
private Integer suffixLength;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private TextField prefixListTF;
|
||||||
|
private ArrayList<String> prefixList;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private TextField suffixListTF;
|
||||||
|
private ArrayList<String> suffixList;
|
||||||
|
|
||||||
|
// @FXML
|
||||||
|
// private CheckBox writeMsdAtTheEndChB;
|
||||||
|
// private boolean writeMsdAtTheEnd;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private ComboBox<String> calculateForCB;
|
||||||
|
private CalculateFor calculateFor;
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private TextField minimalOccurrencesTF;
|
private TextField minimalOccurrencesTF;
|
||||||
private Integer minimalOccurrences;
|
private Integer minimalOccurrences;
|
||||||
|
@ -49,7 +82,10 @@ public class WordLevelTab {
|
||||||
private Integer minimalTaxonomy;
|
private Integer minimalTaxonomy;
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private Button computeB;
|
private Button computeNgramsB;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private Button cancel;
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
public ProgressBar ngramProgressBar;
|
public ProgressBar ngramProgressBar;
|
||||||
|
@ -59,17 +95,242 @@ public class WordLevelTab {
|
||||||
@FXML
|
@FXML
|
||||||
private Hyperlink helpH;
|
private Hyperlink helpH;
|
||||||
|
|
||||||
|
private enum MODE {
|
||||||
|
LETTER,
|
||||||
|
WORD
|
||||||
|
}
|
||||||
|
|
||||||
|
private MODE currentMode;
|
||||||
|
|
||||||
private Corpus corpus;
|
private Corpus corpus;
|
||||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||||
|
private Filter filter;
|
||||||
|
private boolean useDb;
|
||||||
private HostServices hostService;
|
private HostServices hostService;
|
||||||
|
|
||||||
|
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
|
||||||
|
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
||||||
|
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||||
|
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||||
|
private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
||||||
|
private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||||
|
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||||
|
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||||
|
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
||||||
|
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
||||||
|
|
||||||
|
// TODO: pass observables for taxonomy based on header scan
|
||||||
// after header scan
|
// after header scan
|
||||||
private ObservableList<String> taxonomyCCBValues;
|
private ObservableList<String> taxonomyCCBValues;
|
||||||
private CorpusType currentCorpusType;
|
private CorpusType currentCorpusType;
|
||||||
private boolean useDb;
|
|
||||||
|
|
||||||
|
|
||||||
public void init() {
|
public void init() {
|
||||||
|
currentMode = MODE.WORD;
|
||||||
|
toggleMode(currentMode);
|
||||||
|
|
||||||
|
AtomicBoolean writeMsdAtTheEndEnableCalculateFor = new AtomicBoolean(false);
|
||||||
|
|
||||||
|
// calculateForCB
|
||||||
|
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
|
calculateFor = CalculateFor.factory(newValue);
|
||||||
|
|
||||||
|
alsoVisualizeCCB.getItems().removeAll();
|
||||||
|
if(newValue.equals("lema")){
|
||||||
|
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma);
|
||||||
|
} else if(newValue.equals("različnica")) {
|
||||||
|
if (corpus.getCorpusType() == CorpusType.GOS)
|
||||||
|
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWordGos);
|
||||||
|
else
|
||||||
|
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
|
||||||
|
} else if(newValue.equals("normalizirana različnica")) {
|
||||||
|
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
|
||||||
|
} else if(newValue.equals("oblikoskladenjska oznaka")) {
|
||||||
|
// writeMsdAtTheEndEnableCalculateFor.set(true);
|
||||||
|
// writeMsdAtTheEndChB.setDisable(false);
|
||||||
|
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd);
|
||||||
|
}else {
|
||||||
|
|
||||||
|
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
|
||||||
|
}
|
||||||
|
|
||||||
|
// if (!newValue.equals("oblikoskladenjska oznaka")){
|
||||||
|
// writeMsdAtTheEnd = false;
|
||||||
|
// writeMsdAtTheEndChB.setSelected(false);
|
||||||
|
// writeMsdAtTheEndChB.setDisable(true);
|
||||||
|
// writeMsdAtTheEndEnableCalculateFor.set(false);
|
||||||
|
// }
|
||||||
|
|
||||||
|
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||||
|
alsoVisualize = new ArrayList<>();
|
||||||
|
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
|
||||||
|
alsoVisualize.addAll(checkedItems);
|
||||||
|
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
||||||
|
});
|
||||||
|
alsoVisualizeCCB.getCheckModel().clearChecks();
|
||||||
|
|
||||||
|
logger.info("calculateForCB:", calculateFor.toString());
|
||||||
|
});
|
||||||
|
|
||||||
|
calculateForCB.getSelectionModel().select(0);
|
||||||
|
|
||||||
|
prefixLengthCB.getSelectionModel().select(0);
|
||||||
|
prefixLength = 0;
|
||||||
|
|
||||||
|
prefixLengthCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
|
prefixLength = Integer.valueOf(newValue);
|
||||||
|
if(prefixLength > 0){
|
||||||
|
prefixListTF.setDisable(true);
|
||||||
|
suffixListTF.setDisable(true);
|
||||||
|
} else if(prefixLength == 0 && suffixLength == 0){
|
||||||
|
prefixListTF.setDisable(false);
|
||||||
|
suffixListTF.setDisable(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) {
|
||||||
|
computeNgramsB.setDisable(false);
|
||||||
|
} else {
|
||||||
|
computeNgramsB.setDisable(true);
|
||||||
|
}
|
||||||
|
logger.info("Prefix length " + prefixLength);
|
||||||
|
});
|
||||||
|
|
||||||
|
suffixLengthCB.getSelectionModel().select(0);
|
||||||
|
suffixLength = 0;
|
||||||
|
|
||||||
|
|
||||||
|
suffixLengthCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
|
suffixLength = Integer.valueOf(newValue);
|
||||||
|
if(suffixLength > 0){
|
||||||
|
prefixListTF.setDisable(true);
|
||||||
|
suffixListTF.setDisable(true);
|
||||||
|
} else if(prefixLength == 0 && suffixLength == 0){
|
||||||
|
prefixListTF.setDisable(false);
|
||||||
|
suffixListTF.setDisable(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) {
|
||||||
|
computeNgramsB.setDisable(false);
|
||||||
|
} else {
|
||||||
|
computeNgramsB.setDisable(true);
|
||||||
|
}
|
||||||
|
logger.info("Prefix length " + suffixLength);
|
||||||
|
});
|
||||||
|
|
||||||
|
prefixList = new ArrayList<>();
|
||||||
|
|
||||||
|
prefixListTF.textProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
|
String value = prefixListTF.getText();
|
||||||
|
prefixList = new ArrayList<>();
|
||||||
|
if (!ValidationUtil.isEmpty(value)) {
|
||||||
|
for (String w : value.split(";")){
|
||||||
|
w = w.replaceAll("\\s+","");
|
||||||
|
if (!w.equals("")){
|
||||||
|
prefixList.add(w);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// suffixList = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println(prefixList);
|
||||||
|
|
||||||
|
if(prefixList.size() > 0){
|
||||||
|
prefixLengthCB.setDisable(true);
|
||||||
|
suffixLengthCB.setDisable(true);
|
||||||
|
} else if(suffixList.size() == 0){
|
||||||
|
prefixLengthCB.setDisable(false);
|
||||||
|
suffixLengthCB.setDisable(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) {
|
||||||
|
computeNgramsB.setDisable(false);
|
||||||
|
} else {
|
||||||
|
computeNgramsB.setDisable(true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
suffixList = new ArrayList<>();
|
||||||
|
|
||||||
|
suffixListTF.textProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
|
String value = suffixListTF.getText();
|
||||||
|
suffixList = new ArrayList<>();
|
||||||
|
if (!ValidationUtil.isEmpty(value)) {
|
||||||
|
for (String w : value.split(";")){
|
||||||
|
w = w.replaceAll("\\s+","");
|
||||||
|
if (!w.equals("")){
|
||||||
|
suffixList.add(w);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// suffixList = value;
|
||||||
|
}
|
||||||
|
System.out.println(suffixList);
|
||||||
|
if(suffixList.size() > 0){
|
||||||
|
prefixLengthCB.setDisable(true);
|
||||||
|
suffixLengthCB.setDisable(true);
|
||||||
|
} else if(prefixList.size() == 0){
|
||||||
|
prefixLengthCB.setDisable(false);
|
||||||
|
suffixLengthCB.setDisable(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) {
|
||||||
|
computeNgramsB.setDisable(false);
|
||||||
|
} else {
|
||||||
|
computeNgramsB.setDisable(true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
// prefixLengthCB.setDisable(true);
|
||||||
|
|
||||||
|
|
||||||
|
// msd
|
||||||
|
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
|
if (!newValue) {
|
||||||
|
// focus lost
|
||||||
|
String value = msdTF.getText();
|
||||||
|
logger.info("msdTf: ", value);
|
||||||
|
|
||||||
|
if (!ValidationUtil.isEmpty(value)) {
|
||||||
|
ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
|
||||||
|
|
||||||
|
int nOfRequiredMsdTokens = 1;
|
||||||
|
if (msdTmp.size() != nOfRequiredMsdTokens) {
|
||||||
|
String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
|
||||||
|
logAlert(msg);
|
||||||
|
showAlert(Alert.AlertType.ERROR, msg);
|
||||||
|
}
|
||||||
|
msd = new ArrayList<>();
|
||||||
|
msdStrings = new ArrayList<>();
|
||||||
|
for (String msdToken : msdTmp) {
|
||||||
|
msd.add(Pattern.compile(msdToken));
|
||||||
|
msdStrings.add(msdToken);
|
||||||
|
}
|
||||||
|
logger.info(String.format("msd accepted (%d)", msd.size()));
|
||||||
|
|
||||||
|
} else if (!ValidationUtil.isEmpty(newValue)) {
|
||||||
|
msd = new ArrayList<>();
|
||||||
|
msdStrings = new ArrayList<>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
msdTF.setText("");
|
||||||
|
msd = new ArrayList<>();
|
||||||
|
|
||||||
|
alsoVisualizeCCB.getItems().removeAll();
|
||||||
|
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma);
|
||||||
|
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||||
|
alsoVisualize = new ArrayList<>();
|
||||||
|
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
|
||||||
|
alsoVisualize.addAll(checkedItems);
|
||||||
|
// if (checkedItems.contains("oblikoskladenjska oznaka") || writeMsdAtTheEndEnableCalculateFor.get()){
|
||||||
|
// writeMsdAtTheEndChB.setDisable(false);
|
||||||
|
// } else {
|
||||||
|
// writeMsdAtTheEnd = false;
|
||||||
|
// writeMsdAtTheEndChB.setSelected(false);
|
||||||
|
// writeMsdAtTheEndChB.setDisable(true);
|
||||||
|
// }
|
||||||
|
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
||||||
|
});
|
||||||
|
alsoVisualizeCCB.getCheckModel().clearChecks();
|
||||||
|
|
||||||
// taxonomy
|
// taxonomy
|
||||||
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
||||||
taxonomyCCB.getItems().removeAll();
|
taxonomyCCB.getItems().removeAll();
|
||||||
|
@ -85,6 +346,23 @@ public class WordLevelTab {
|
||||||
taxonomyCCB.setDisable(true);
|
taxonomyCCB.setDisable(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
displayTaxonomy = false;
|
||||||
|
// set
|
||||||
|
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
|
displayTaxonomy = newValue;
|
||||||
|
logger.info("display taxonomy: ", displayTaxonomy);
|
||||||
|
});
|
||||||
|
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
|
||||||
|
|
||||||
|
// writeMsdAtTheEnd = false;
|
||||||
|
// writeMsdAtTheEndChB.setDisable(true);
|
||||||
|
// // set
|
||||||
|
// writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
|
// writeMsdAtTheEnd = newValue;
|
||||||
|
// logger.info("write msd at the end: ", writeMsdAtTheEnd);
|
||||||
|
// });
|
||||||
|
// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
|
||||||
|
|
||||||
// set default values
|
// set default values
|
||||||
minimalOccurrencesTF.setText("1");
|
minimalOccurrencesTF.setText("1");
|
||||||
minimalOccurrences = 1;
|
minimalOccurrences = 1;
|
||||||
|
@ -128,29 +406,158 @@ public class WordLevelTab {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
computeB.setOnAction(e -> {
|
computeNgramsB.setDisable(true);
|
||||||
|
|
||||||
|
computeNgramsB.setOnAction(e -> {
|
||||||
compute();
|
compute();
|
||||||
logger.info("compute button");
|
logger.info("compute button");
|
||||||
});
|
});
|
||||||
|
|
||||||
helpH.setOnAction(e -> openHelpWebsite());
|
helpH.setOnAction(e -> openHelpWebsite());
|
||||||
|
|
||||||
|
cancel.setVisible(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void openHelpWebsite(){
|
/**
|
||||||
hostService.showDocument(Messages.HELP_URL);
|
* case a: values for combo boxes can change after a corpus change
|
||||||
|
* <ul>
|
||||||
|
* <li>different corpus type - reset all fields so no old values remain</li>
|
||||||
|
* <li>same corpus type, different subset - keep</li>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* case b: values for combo boxes can change after a header scan
|
||||||
|
* <ul>
|
||||||
|
* <li>at first, fields are populated by corpus type defaults</li>
|
||||||
|
* <li>after, with gathered data</li>
|
||||||
|
* </ul>
|
||||||
|
* <p></p>
|
||||||
|
* ngrams: 1
|
||||||
|
* calculateFor: word
|
||||||
|
* msd:
|
||||||
|
* taxonomy:
|
||||||
|
* skip: 0
|
||||||
|
* iscvv: false
|
||||||
|
* string length: 1
|
||||||
|
*/
|
||||||
|
public void populateFields() {
|
||||||
|
// corpus changed if: current one is null (this is first run of the app)
|
||||||
|
// or if currentCorpus != gui's corpus
|
||||||
|
boolean corpusChanged = currentCorpusType == null
|
||||||
|
|| currentCorpusType != corpus.getCorpusType();
|
||||||
|
|
||||||
|
|
||||||
|
// TODO: check for GOS, GIGAFIDA, SOLAR...
|
||||||
|
// refresh and:
|
||||||
|
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
||||||
|
if (calculateFor == null) {
|
||||||
|
calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
|
||||||
|
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!filter.hasMsd()) {
|
||||||
|
// if current corpus doesn't have msd data, disable this field
|
||||||
|
msd = new ArrayList<>();
|
||||||
|
msdTF.setText("");
|
||||||
|
msdTF.setDisable(true);
|
||||||
|
logger.info("no msd data");
|
||||||
|
} else {
|
||||||
|
if (ValidationUtil.isEmpty(msd)
|
||||||
|
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
||||||
|
// msd has not been set previously
|
||||||
|
// or msd has been set but the corpus changed -> reset
|
||||||
|
msd = new ArrayList<>();
|
||||||
|
msdTF.setText("");
|
||||||
|
msdTF.setDisable(false);
|
||||||
|
logger.info("msd reset");
|
||||||
|
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
||||||
|
// if msd has been set, but corpus type remained the same, we can keep any set msd value
|
||||||
|
msdTF.setText(StringUtils.join(msdStrings, " "));
|
||||||
|
msdTF.setDisable(false);
|
||||||
|
logger.info("msd kept");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: trigger on rescan
|
||||||
|
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
||||||
|
// user changed corpus (by type) or by selection & triggered a rescan of headers
|
||||||
|
// see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||||
|
ObservableList<String> tax = corpus.getTaxonomy();
|
||||||
|
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||||
|
|
||||||
|
currentCorpusType = corpus.getCorpusType();
|
||||||
|
// setTaxonomyIsDirty(false);
|
||||||
|
} else {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||||
|
ObservableList<String> tax = corpus.getTaxonomy();
|
||||||
|
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||||
|
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
|
||||||
|
* sets combobox values to what is applicable ...
|
||||||
|
*
|
||||||
|
* @param mode
|
||||||
|
*/
|
||||||
|
public void toggleMode(MODE mode) {
|
||||||
|
if (mode == null) {
|
||||||
|
mode = currentMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("mode: ", mode.toString());
|
||||||
|
|
||||||
|
if (mode == MODE.WORD) {
|
||||||
|
if (corpus.getCorpusType() == CorpusType.GOS)
|
||||||
|
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
|
||||||
|
else
|
||||||
|
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
|
||||||
|
} else if (mode == MODE.LETTER) {
|
||||||
|
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS);
|
||||||
|
|
||||||
|
|
||||||
|
// if calculateFor was selected for something other than a word or a lemma -> reset
|
||||||
|
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
|
||||||
|
// if the user selected something else before selecting ngram for letters, reset that choice
|
||||||
|
calculateFor = CalculateFor.WORD;
|
||||||
|
calculateForCB.getSelectionModel().select("različnica");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// override if orth mode, allow only word
|
||||||
|
if (corpus.isGosOrthMode()) {
|
||||||
|
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
|
||||||
|
msdTF.setDisable(true);
|
||||||
|
} else {
|
||||||
|
msdTF.setDisable(false);
|
||||||
|
}
|
||||||
|
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
|
||||||
|
}
|
||||||
|
|
||||||
private void compute() {
|
private void compute() {
|
||||||
Filter filter = new Filter();
|
Filter filter = new Filter();
|
||||||
filter.setNgramValue(1);
|
filter.setNgramValue(1);
|
||||||
filter.setCalculateFor(CalculateFor.WORD);
|
filter.setCalculateFor(calculateFor);
|
||||||
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
|
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
|
||||||
filter.setAl(AnalysisLevel.WORD_LEVEL);
|
filter.setDisplayTaxonomy(displayTaxonomy);
|
||||||
|
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||||
filter.setSkipValue(0);
|
filter.setSkipValue(0);
|
||||||
filter.setMsd(new ArrayList<>());
|
|
||||||
filter.setIsCvv(false);
|
filter.setIsCvv(false);
|
||||||
filter.setSolarFilters(solarFiltersMap);
|
filter.setSolarFilters(solarFiltersMap);
|
||||||
|
filter.setStringLength(1);
|
||||||
|
filter.setMultipleKeys(alsoVisualize);
|
||||||
|
|
||||||
|
// setMsd must be behind alsoVisualize
|
||||||
|
filter.setMsd(msd);
|
||||||
filter.setMinimalOccurrences(minimalOccurrences);
|
filter.setMinimalOccurrences(minimalOccurrences);
|
||||||
filter.setMinimalTaxonomy(minimalTaxonomy);
|
filter.setMinimalTaxonomy(minimalTaxonomy);
|
||||||
|
filter.setPrefixLength(prefixLength);
|
||||||
|
filter.setSuffixLength(suffixLength);
|
||||||
|
filter.setPrefixList(prefixList);
|
||||||
|
filter.setSuffixList(suffixList);
|
||||||
|
// filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd);
|
||||||
|
|
||||||
String message = Validation.validateForStringLevel(filter);
|
String message = Validation.validateForStringLevel(filter);
|
||||||
if (message == null) {
|
if (message == null) {
|
||||||
|
@ -164,70 +571,17 @@ public class WordLevelTab {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void execute(StatisticsNew statistic) {
|
|
||||||
logger.info("Started execution: ", statistic.getFilter());
|
|
||||||
|
|
||||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
|
||||||
|
|
||||||
final Task<Void> task = new Task<Void>() {
|
|
||||||
@SuppressWarnings("Duplicates")
|
|
||||||
@Override
|
|
||||||
protected Void call() throws Exception {
|
|
||||||
long i = 0;
|
|
||||||
for (File f : corpusFiles) {
|
|
||||||
readXML(f.toString(), statistic);
|
|
||||||
i++;
|
|
||||||
this.updateProgress(i, corpusFiles.size());
|
|
||||||
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
|
||||||
progressLabel.textProperty().bind(task.messageProperty());
|
|
||||||
|
|
||||||
task.setOnSucceeded(e -> {
|
|
||||||
try {
|
|
||||||
// first, we have to recalculate all occurrences to detailed statistics
|
|
||||||
boolean successullySaved = statistic.saveResultNestedToDisk();
|
|
||||||
|
|
||||||
if (successullySaved) {
|
|
||||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
|
||||||
} else {
|
|
||||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
|
||||||
}
|
|
||||||
} catch (UnsupportedEncodingException e1) {
|
|
||||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
|
||||||
logger.error("Error while saving", e1);
|
|
||||||
}
|
|
||||||
|
|
||||||
ngramProgressBar.progressProperty().unbind();
|
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
progressLabel.textProperty().unbind();
|
|
||||||
progressLabel.setText("");
|
|
||||||
});
|
|
||||||
|
|
||||||
task.setOnFailed(e -> {
|
|
||||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
|
|
||||||
logger.error("Error while executing", e);
|
|
||||||
ngramProgressBar.progressProperty().unbind();
|
|
||||||
ngramProgressBar.setProgress(0.0);
|
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
|
||||||
progressLabel.textProperty().unbind();
|
|
||||||
progressLabel.setText("");
|
|
||||||
});
|
|
||||||
|
|
||||||
final Thread thread = new Thread(task, "task");
|
|
||||||
thread.setDaemon(true);
|
|
||||||
thread.start();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void logAlert(String alert) {
|
private void logAlert(String alert) {
|
||||||
logger.info("alert: " + alert);
|
logger.info("alert: " + alert);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void openHelpWebsite(){
|
||||||
|
hostService.showDocument(Messages.HELP_URL);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Corpus getCorpus() {
|
||||||
|
return corpus;
|
||||||
|
}
|
||||||
|
|
||||||
public void setCorpus(Corpus corpus) {
|
public void setCorpus(Corpus corpus) {
|
||||||
this.corpus = corpus;
|
this.corpus = corpus;
|
||||||
|
@ -250,11 +604,93 @@ public class WordLevelTab {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void execute(StatisticsNew statistic) {
|
||||||
|
logger.info("Started execution: ", statistic.getFilter());
|
||||||
|
|
||||||
|
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||||
|
boolean corpusIsSplit = corpusFiles.size() > 1;
|
||||||
|
|
||||||
|
final Task<Void> task = new Task<Void>() {
|
||||||
|
@SuppressWarnings("Duplicates")
|
||||||
|
@Override
|
||||||
|
protected Void call() throws Exception {
|
||||||
|
long i = 0;
|
||||||
|
for (File f : corpusFiles) {
|
||||||
|
readXML(f.toString(), statistic);
|
||||||
|
i++;
|
||||||
|
if (isCancelled()) {
|
||||||
|
updateMessage(CANCELING_NOTIFICATION);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
this.updateProgress(i, corpusFiles.size());
|
||||||
|
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||||
|
progressLabel.textProperty().bind(task.messageProperty());
|
||||||
|
|
||||||
|
task.setOnSucceeded(e -> {
|
||||||
|
try {
|
||||||
|
boolean successullySaved = statistic.saveResultToDisk();
|
||||||
|
if (successullySaved) {
|
||||||
|
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||||
|
} else {
|
||||||
|
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
||||||
|
}
|
||||||
|
} catch (UnsupportedEncodingException e1) {
|
||||||
|
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||||
|
logger.error("Error while saving", e1);
|
||||||
|
}
|
||||||
|
|
||||||
|
ngramProgressBar.progressProperty().unbind();
|
||||||
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
|
progressLabel.textProperty().unbind();
|
||||||
|
progressLabel.setText("");
|
||||||
|
cancel.setVisible(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
task.setOnFailed(e -> {
|
||||||
|
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
|
||||||
|
logger.error("Error while executing", e);
|
||||||
|
ngramProgressBar.progressProperty().unbind();
|
||||||
|
ngramProgressBar.setProgress(0.0);
|
||||||
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||||
|
progressLabel.textProperty().unbind();
|
||||||
|
progressLabel.setText("");
|
||||||
|
cancel.setVisible(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
task.setOnCancelled(e -> {
|
||||||
|
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
|
||||||
|
ngramProgressBar.progressProperty().unbind();
|
||||||
|
ngramProgressBar.setProgress(0.0);
|
||||||
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
|
progressLabel.textProperty().unbind();
|
||||||
|
progressLabel.setText("");
|
||||||
|
cancel.setVisible(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
// When cancel button is pressed cancel analysis
|
||||||
|
cancel.setOnAction(e -> {
|
||||||
|
task.cancel();
|
||||||
|
logger.info("cancel button");
|
||||||
|
});
|
||||||
|
|
||||||
|
cancel.setVisible(true);
|
||||||
|
final Thread thread = new Thread(task, "task");
|
||||||
|
thread.setDaemon(true);
|
||||||
|
thread.start();
|
||||||
|
}
|
||||||
|
|
||||||
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
|
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
|
||||||
this.solarFiltersMap = solarFiltersMap;
|
this.solarFiltersMap = solarFiltersMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setHostServices(HostServices hostServices){
|
public void setHostServices(HostServices hostServices){
|
||||||
this.hostService = hostServices;
|
this.hostService = hostServices;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,30 +1,113 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
|
||||||
<?import org.controlsfx.control.CheckComboBox?>
|
<?import java.lang.String?>
|
||||||
<?import javafx.scene.control.*?>
|
<?import javafx.collections.FXCollections?>
|
||||||
|
<?import javafx.scene.control.Button?>
|
||||||
|
<?import javafx.scene.control.CheckBox?>
|
||||||
|
<?import javafx.scene.control.Hyperlink?>
|
||||||
|
<?import javafx.scene.control.ComboBox?>
|
||||||
|
<?import javafx.scene.control.Label?>
|
||||||
|
<?import javafx.scene.control.ProgressBar?>
|
||||||
|
<?import javafx.scene.control.TextField?>
|
||||||
<?import javafx.scene.layout.AnchorPane?>
|
<?import javafx.scene.layout.AnchorPane?>
|
||||||
<?import javafx.scene.layout.Pane?>
|
<?import javafx.scene.layout.Pane?>
|
||||||
|
<?import org.controlsfx.control.CheckComboBox?>
|
||||||
|
|
||||||
<AnchorPane fx:id="wordLevelAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
<AnchorPane fx:id="wordLevelAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
||||||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordLevelTab">
|
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordLevelTab">
|
||||||
<Pane>
|
<Pane>
|
||||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
|
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za"/>
|
||||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
|
<ComboBox fx:id="calculateForCB" layoutX="185.0" layoutY="20.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
|
||||||
|
visibleRowCount="5">
|
||||||
|
<items>
|
||||||
|
<FXCollections fx:factory="observableArrayList">
|
||||||
|
<String fx:value="lema"/>
|
||||||
|
<String fx:value="različnica"/>
|
||||||
|
<String fx:value="oblikoskladenjska oznaka"/>
|
||||||
|
<String fx:value="oblikoskladenjska lastnost"/>
|
||||||
|
<String fx:value="besedna vrsta"/>
|
||||||
|
</FXCollections>
|
||||||
|
</items>
|
||||||
|
</ComboBox>
|
||||||
|
|
||||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. pojavitev" />
|
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" />
|
||||||
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
|
<CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||||
|
|
||||||
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
|
||||||
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0" />
|
|
||||||
|
|
||||||
<Button fx:id="computeB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false"
|
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
|
||||||
|
<CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
|
||||||
|
|
||||||
|
<!--<Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Izpiši razbit MSD" />-->
|
||||||
|
<!--<CheckBox fx:id="writeMsdAtTheEndChB" layoutX="263.0" layoutY="145.0" selected="false" />-->
|
||||||
|
|
||||||
|
<!-- MSD and Taxonomy separated -->
|
||||||
|
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Iskanje besednih delov preko podane dolžine" />
|
||||||
|
|
||||||
|
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Predpona je dolga"/>
|
||||||
|
<ComboBox fx:id="prefixLengthCB" layoutX="185.0" layoutY="200.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
|
||||||
|
<items>
|
||||||
|
<FXCollections fx:factory="observableArrayList">
|
||||||
|
<String fx:value="0" />
|
||||||
|
<String fx:value="1" />
|
||||||
|
<String fx:value="2" />
|
||||||
|
<String fx:value="3" />
|
||||||
|
<String fx:value="4" />
|
||||||
|
<String fx:value="5" />
|
||||||
|
</FXCollections>
|
||||||
|
</items>
|
||||||
|
</ComboBox>
|
||||||
|
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Pripona je dolga"/>
|
||||||
|
<ComboBox fx:id="suffixLengthCB" layoutX="185.0" layoutY="240.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
|
||||||
|
<items>
|
||||||
|
<FXCollections fx:factory="observableArrayList">
|
||||||
|
<String fx:value="0" />
|
||||||
|
<String fx:value="1" />
|
||||||
|
<String fx:value="2" />
|
||||||
|
<String fx:value="3" />
|
||||||
|
<String fx:value="4" />
|
||||||
|
<String fx:value="5" />
|
||||||
|
</FXCollections>
|
||||||
|
</items>
|
||||||
|
</ComboBox>
|
||||||
|
|
||||||
|
<Label layoutX="10.0" layoutY="300.0" prefHeight="25.0" text="Iskanje besednih delov preko podanih predpon in pripon" />
|
||||||
|
|
||||||
|
<Label layoutX="10.0" layoutY="340.0" prefHeight="25.0" text="Seznam predpon"/>
|
||||||
|
<TextField fx:id="prefixListTF" layoutX="185.0" layoutY="340.0" prefWidth="180.0" />
|
||||||
|
|
||||||
|
<Label layoutX="10.0" layoutY="380.0" prefHeight="25.0" text="Seznam pripon"/>
|
||||||
|
<TextField fx:id="suffixListTF" layoutX="185.0" layoutY="380.0" prefWidth="180.0" />
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false"
|
||||||
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
||||||
</Pane>
|
</Pane>
|
||||||
|
|
||||||
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
|
<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
|
||||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
|
<!-- MSD and Taxonomy separated -->
|
||||||
text=" " wrapText="true"/>
|
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Omejitev podatkov" />
|
||||||
|
|
||||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
|
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Oznaka MSD"/>
|
||||||
|
<TextField fx:id="msdTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0"/>
|
||||||
|
<Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Taksonomija"/>
|
||||||
|
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="140.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||||
|
|
||||||
|
<Label layoutX="10.0" layoutY="180.0" prefHeight="25.0" text="Min. št. pojavitev" />
|
||||||
|
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="180.0" prefWidth="180.0" />
|
||||||
|
|
||||||
|
<Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
||||||
|
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="220.0" prefWidth="180.0" />
|
||||||
|
|
||||||
|
|
||||||
|
<Label fx:id="solarFilters" layoutX="10.0" layoutY="280.0" text="Izbrani filtri:" />
|
||||||
|
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="320.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />
|
||||||
|
</Pane>
|
||||||
|
|
||||||
|
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
|
||||||
|
|
||||||
|
<Button fx:id="cancel" layoutX="540.0" layoutY="482.0" mnemonicParsing="false"
|
||||||
|
prefHeight="25.0" prefWidth="250.0" text="Prekini"/>
|
||||||
|
|
||||||
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
|
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
|
||||||
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
|
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
|
||||||
|
|
Loading…
Reference in New Issue
Block a user