Added initial functionality for word parts implementation

master
Luka 6 years ago
parent f9ce74d7b8
commit abc15360d3

@ -52,6 +52,10 @@ public class Ngrams {
// generate proper MultipleHMKeys depending on filter data
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor(), stats.getFilter().getWordParts());
if(key.length() < stats.getFilter().getPrefixLength() + stats.getFilter().getSuffixLength()){
continue;
}
// if last letter is ',' erase it
// if (key.equals("")){

@ -29,7 +29,11 @@ public class Filter implements Cloneable {
NOTE_PUNCTUATIONS,
MINIMAL_OCCURRENCES,
MINIMAL_TAXONOMY,
COLLOCABILITY
COLLOCABILITY,
PREFIX_LENGTH,
SUFFIX_LENGTH,
PREFIX_LIST,
SUFFIX_LIST
}
public Filter() {
@ -245,6 +249,43 @@ public class Filter implements Cloneable {
return (Integer) filter.get(MINIMAL_TAXONOMY);
}
// PREFIX_LENGTH,
// SUFFIX_LENGTH,
// PREFIX_LIST,
// SUFFIX_LIST
public void setPrefixLength(Integer v) {
filter.put(PREFIX_LENGTH, v);
}
public Integer getPrefixLength() {
return (Integer) filter.get(PREFIX_LENGTH);
}
public void setSuffixLength(Integer v) {
filter.put(SUFFIX_LENGTH, v);
}
public Integer getSuffixLength() {
return (Integer) filter.get(SUFFIX_LENGTH);
}
public void setPrefixList(ArrayList<String> v) {
filter.put(PREFIX_LIST, v);
}
public ArrayList<String> getPrefixList() {
return (ArrayList<String>) filter.get(PREFIX_LIST);
}
public void setSuffixList(ArrayList<String> v) {
filter.put(SUFFIX_LIST, v);
}
public ArrayList<String> getSuffixList() {
return (ArrayList<String>) filter.get(SUFFIX_LIST);
}
private void addWordPart(CalculateFor wp){
ArrayList<CalculateFor> oldWp = ((ArrayList<CalculateFor>) filter.get(WORD_PARTS));

@ -1,5 +1,6 @@
package data;
import java.util.ArrayList;
import java.util.Objects;
/*
@ -12,6 +13,8 @@ public interface MultipleHMKeys {
default String getK4(){ return null; }
default String getK5(){ return null; }
default ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){ return null; }
@Override
int hashCode();

@ -1,5 +1,6 @@
package data;
import java.util.ArrayList;
import java.util.Objects;
/*
@ -16,6 +17,16 @@ public final class MultipleHMKeys1 implements MultipleHMKeys {
return k1;
}
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
ArrayList<MultipleHMKeys> r = new ArrayList<>();
String[] splitedK1 = k1.split("\\s+");
for(int i = 0; i < splitedK1.length; i ++){
MultipleHMKeys search = new MultipleHMKeys1(splitedK1[i]);
r.add(search);
}
return r;
}
@Override
public int hashCode() {
return k1.hashCode();

@ -1,5 +1,6 @@
package data;
import java.util.ArrayList;
import java.util.Objects;
/*
@ -21,6 +22,17 @@ public final class MultipleHMKeys2 implements MultipleHMKeys {
return k2;
}
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
ArrayList<MultipleHMKeys> r = new ArrayList<>();
String[] splitedK1 = k1.split("\\s+");
String[] splitedK2 = k2.split("\\s+");
for(int i = 0; i < splitedK1.length; i ++){
MultipleHMKeys search = new MultipleHMKeys2(splitedK1[i], splitedK2[i]);
r.add(search);
}
return r;
}
@Override
public int hashCode() {
return Objects.hash(k1, k2);

@ -1,5 +1,6 @@
package data;
import java.util.ArrayList;
import java.util.Objects;
/*
@ -26,6 +27,18 @@ public final class MultipleHMKeys3 implements MultipleHMKeys {
return k3;
}
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
ArrayList<MultipleHMKeys> r = new ArrayList<>();
String[] splitedK1 = k1.split("\\s+");
String[] splitedK2 = k2.split("\\s+");
String[] splitedK3 = k3.split("\\s+");
for(int i = 0; i < splitedK1.length; i ++){
MultipleHMKeys search = new MultipleHMKeys3(splitedK1[i], splitedK2[i], splitedK3[i]);
r.add(search);
}
return r;
}
@Override
public int hashCode() {
return Objects.hash(k1, k2, k3);

@ -1,5 +1,6 @@
package data;
import java.util.ArrayList;
import java.util.Objects;
/*
@ -31,6 +32,20 @@ public final class MultipleHMKeys4 implements MultipleHMKeys {
return k4;
}
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
ArrayList<MultipleHMKeys> r = new ArrayList<>();
String[] splitedK1 = k1.split("\\s+");
String[] splitedK2 = k2.split("\\s+");
String[] splitedK3 = k3.split("\\s+");
String[] splitedK4 = k4.split("\\s+");
for(int i = 0; i < splitedK1.length; i ++){
MultipleHMKeys search = new MultipleHMKeys4(splitedK1[i], splitedK2[i], splitedK3[i], splitedK4[i]);
r.add(search);
}
return r;
}
@Override
public int hashCode() {
return Objects.hash(k1, k2, k3, k4);

@ -1,5 +1,6 @@
package data;
import java.util.ArrayList;
import java.util.Objects;
/*
@ -36,6 +37,22 @@ public final class MultipleHMKeys5 implements MultipleHMKeys {
return k5;
}
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
ArrayList<MultipleHMKeys> r = new ArrayList<>();
String[] splitedK1 = k1.split("\\s+");
String[] splitedK2 = k2.split("\\s+");
String[] splitedK3 = k3.split("\\s+");
String[] splitedK4 = k4.split("\\s+");
String[] splitedK5 = k5.split("\\s+");
for(int i = 0; i < splitedK1.length; i ++){
MultipleHMKeys search = new MultipleHMKeys5(splitedK1[i], splitedK2[i], splitedK3[i], splitedK4[i], splitedK5[i]);
r.add(search);
}
return r;
}
@Override
public int hashCode() {
return Objects.hash(k1, k2, k3, k4, k5);

@ -495,12 +495,13 @@ public class StatisticsNew {
Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
String[] splitedString = hmKey.getK1().split("\\s+");
// String[] splitedString = hmKey.getK1().split("\\s+");
long sum_fwi =0L;
for(String s : splitedString){
MultipleHMKeys search = new MultipleHMKeys1(s);
sum_fwi += oneWordTaxonomyResult.get("Total").get(search).longValue();
for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){
System.out.println(smallHmKey.getK1());
sum_fwi += oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue();
}
double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
collocabilityMap.put(hmKey, dice_value);

@ -622,20 +622,20 @@ public class StringAnalysisTabNew2 {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
long i = corpusFiles.size();
for (File f : corpusFiles) {
readXML(f.toString(), statisticsOneGrams);
// i++;
// this.updateProgress(i, corpusFiles.size());
// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
i++;
this.updateProgress(i, corpusFiles.size() * 2);
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
}
return null;
}
};
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
System.out.print("test");
@ -671,32 +671,32 @@ public class StringAnalysisTabNew2 {
// logger.error("Out of memory error", e1);
// }
//
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
@ -729,8 +729,14 @@ public class StringAnalysisTabNew2 {
updateMessage(CANCELING_NOTIFICATION);
break;
}
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
if (statistic.getFilter().getCollocability().size() > 0) {
this.updateProgress(i, corpusFiles.size() * 2);
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
} else {
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
}
return null;
@ -769,13 +775,14 @@ public class StringAnalysisTabNew2 {
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
logger.error("Out of memory error", e1);
}
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
}
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnFailed(e -> {

@ -1,45 +1,78 @@
package gui;
import static alg.XML_processing.*;
import static gui.GUIController.*;
import static gui.Messages.*;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import data.*;
import javafx.application.HostServices;
import javafx.collections.FXCollections;
import javafx.collections.ListChangeListener;
import javafx.collections.ObservableList;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.control.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.controlsfx.control.CheckComboBox;
import data.*;
import javafx.collections.ListChangeListener;
import javafx.collections.ObservableList;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.layout.AnchorPane;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Pattern;
import static alg.XML_processing.readXML;
import static gui.GUIController.showAlert;
import static gui.Messages.*;
@SuppressWarnings("Duplicates")
public class WordLevelTab {
public final static Logger logger = LogManager.getLogger(WordLevelTab.class);
public AnchorPane wordLevelAnalysisTabPane;
public final static Logger logger = LogManager.getLogger(OneWordAnalysisTab.class);
@FXML
public Label selectedFiltersLabel;
@FXML
public Label solarFilters;
@FXML
private TextField msdTF;
private ArrayList<Pattern> msd;
private ArrayList<String> msdStrings;
@FXML
private CheckComboBox<String> alsoVisualizeCCB;
private ArrayList<String> alsoVisualize;
@FXML
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
@FXML
private CheckBox displayTaxonomyChB;
private boolean displayTaxonomy;
@FXML
private ComboBox<String> prefixLengthCB;
private Integer prefixLength;
@FXML
private ComboBox<String> suffixLengthCB;
private Integer suffixLength;
@FXML
private TextField prefixListTF;
private ArrayList<String> prefixList;
@FXML
private TextField suffixListTF;
private ArrayList<String> suffixList;
// @FXML
// private CheckBox writeMsdAtTheEndChB;
// private boolean writeMsdAtTheEnd;
@FXML
private ComboBox<String> calculateForCB;
private CalculateFor calculateFor;
@FXML
private TextField minimalOccurrencesTF;
private Integer minimalOccurrences;
@ -49,7 +82,10 @@ public class WordLevelTab {
private Integer minimalTaxonomy;
@FXML
private Button computeB;
private Button computeNgramsB;
@FXML
private Button cancel;
@FXML
public ProgressBar ngramProgressBar;
@ -59,17 +95,242 @@ public class WordLevelTab {
@FXML
private Hyperlink helpH;
private enum MODE {
LETTER,
WORD
}
private MODE currentMode;
private Corpus corpus;
private HashMap<String, HashSet<String>> solarFiltersMap;
private Filter filter;
private boolean useDb;
private HostServices hostService;
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
// TODO: pass observables for taxonomy based on header scan
// after header scan
private ObservableList<String> taxonomyCCBValues;
private CorpusType currentCorpusType;
private boolean useDb;
public void init() {
currentMode = MODE.WORD;
toggleMode(currentMode);
AtomicBoolean writeMsdAtTheEndEnableCalculateFor = new AtomicBoolean(false);
// calculateForCB
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
calculateFor = CalculateFor.factory(newValue);
alsoVisualizeCCB.getItems().removeAll();
if(newValue.equals("lema")){
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma);
} else if(newValue.equals("različnica")) {
if (corpus.getCorpusType() == CorpusType.GOS)
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWordGos);
else
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
} else if(newValue.equals("normalizirana različnica")) {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
} else if(newValue.equals("oblikoskladenjska oznaka")) {
// writeMsdAtTheEndEnableCalculateFor.set(true);
// writeMsdAtTheEndChB.setDisable(false);
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd);
}else {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
}
// if (!newValue.equals("oblikoskladenjska oznaka")){
// writeMsdAtTheEnd = false;
// writeMsdAtTheEndChB.setSelected(false);
// writeMsdAtTheEndChB.setDisable(true);
// writeMsdAtTheEndEnableCalculateFor.set(false);
// }
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
alsoVisualize.addAll(checkedItems);
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
});
alsoVisualizeCCB.getCheckModel().clearChecks();
logger.info("calculateForCB:", calculateFor.toString());
});
calculateForCB.getSelectionModel().select(0);
prefixLengthCB.getSelectionModel().select(0);
prefixLength = 0;
prefixLengthCB.valueProperty().addListener((observable, oldValue, newValue) -> {
prefixLength = Integer.valueOf(newValue);
if(prefixLength > 0){
prefixListTF.setDisable(true);
suffixListTF.setDisable(true);
} else if(prefixLength == 0 && suffixLength == 0){
prefixListTF.setDisable(false);
suffixListTF.setDisable(false);
}
if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) {
computeNgramsB.setDisable(false);
} else {
computeNgramsB.setDisable(true);
}
logger.info("Prefix length " + prefixLength);
});
suffixLengthCB.getSelectionModel().select(0);
suffixLength = 0;
suffixLengthCB.valueProperty().addListener((observable, oldValue, newValue) -> {
suffixLength = Integer.valueOf(newValue);
if(suffixLength > 0){
prefixListTF.setDisable(true);
suffixListTF.setDisable(true);
} else if(prefixLength == 0 && suffixLength == 0){
prefixListTF.setDisable(false);
suffixListTF.setDisable(false);
}
if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) {
computeNgramsB.setDisable(false);
} else {
computeNgramsB.setDisable(true);
}
logger.info("Prefix length " + suffixLength);
});
prefixList = new ArrayList<>();
prefixListTF.textProperty().addListener((observable, oldValue, newValue) -> {
String value = prefixListTF.getText();
prefixList = new ArrayList<>();
if (!ValidationUtil.isEmpty(value)) {
for (String w : value.split(";")){
w = w.replaceAll("\\s+","");
if (!w.equals("")){
prefixList.add(w);
}
}
// suffixList = value;
}
System.out.println(prefixList);
if(prefixList.size() > 0){
prefixLengthCB.setDisable(true);
suffixLengthCB.setDisable(true);
} else if(suffixList.size() == 0){
prefixLengthCB.setDisable(false);
suffixLengthCB.setDisable(false);
}
if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) {
computeNgramsB.setDisable(false);
} else {
computeNgramsB.setDisable(true);
}
});
suffixList = new ArrayList<>();
suffixListTF.textProperty().addListener((observable, oldValue, newValue) -> {
String value = suffixListTF.getText();
suffixList = new ArrayList<>();
if (!ValidationUtil.isEmpty(value)) {
for (String w : value.split(";")){
w = w.replaceAll("\\s+","");
if (!w.equals("")){
suffixList.add(w);
}
}
// suffixList = value;
}
System.out.println(suffixList);
if(suffixList.size() > 0){
prefixLengthCB.setDisable(true);
suffixLengthCB.setDisable(true);
} else if(prefixList.size() == 0){
prefixLengthCB.setDisable(false);
suffixLengthCB.setDisable(false);
}
if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) {
computeNgramsB.setDisable(false);
} else {
computeNgramsB.setDisable(true);
}
});
// prefixLengthCB.setDisable(true);
// msd
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = msdTF.getText();
logger.info("msdTf: ", value);
if (!ValidationUtil.isEmpty(value)) {
ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
int nOfRequiredMsdTokens = 1;
if (msdTmp.size() != nOfRequiredMsdTokens) {
String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
logAlert(msg);
showAlert(Alert.AlertType.ERROR, msg);
}
msd = new ArrayList<>();
msdStrings = new ArrayList<>();
for (String msdToken : msdTmp) {
msd.add(Pattern.compile(msdToken));
msdStrings.add(msdToken);
}
logger.info(String.format("msd accepted (%d)", msd.size()));
} else if (!ValidationUtil.isEmpty(newValue)) {
msd = new ArrayList<>();
msdStrings = new ArrayList<>();
}
}
});
msdTF.setText("");
msd = new ArrayList<>();
alsoVisualizeCCB.getItems().removeAll();
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma);
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
alsoVisualize.addAll(checkedItems);
// if (checkedItems.contains("oblikoskladenjska oznaka") || writeMsdAtTheEndEnableCalculateFor.get()){
// writeMsdAtTheEndChB.setDisable(false);
// } else {
// writeMsdAtTheEnd = false;
// writeMsdAtTheEndChB.setSelected(false);
// writeMsdAtTheEndChB.setDisable(true);
// }
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
});
alsoVisualizeCCB.getCheckModel().clearChecks();
// taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll();
@ -85,12 +346,29 @@ public class WordLevelTab {
taxonomyCCB.setDisable(true);
}
// set default values
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
displayTaxonomy = false;
// set
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
displayTaxonomy = newValue;
logger.info("display taxonomy: ", displayTaxonomy);
});
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
// writeMsdAtTheEnd = false;
// writeMsdAtTheEndChB.setDisable(true);
// // set
// writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
// writeMsdAtTheEnd = newValue;
// logger.info("write msd at the end: ", writeMsdAtTheEnd);
// });
// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
// set default values
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
@ -128,29 +406,158 @@ public class WordLevelTab {
}
});
computeB.setOnAction(e -> {
computeNgramsB.setDisable(true);
computeNgramsB.setOnAction(e -> {
compute();
logger.info("compute button");
});
helpH.setOnAction(e -> openHelpWebsite());
cancel.setVisible(false);
}
private void openHelpWebsite(){
hostService.showDocument(Messages.HELP_URL);
/**
* case a: values for combo boxes can change after a corpus change
* <ul>
* <li>different corpus type - reset all fields so no old values remain</li>
* <li>same corpus type, different subset - keep</li>
* </ul>
* <p>
* case b: values for combo boxes can change after a header scan
* <ul>
* <li>at first, fields are populated by corpus type defaults</li>
* <li>after, with gathered data</li>
* </ul>
* <p></p>
* ngrams: 1
* calculateFor: word
* msd:
* taxonomy:
* skip: 0
* iscvv: false
* string length: 1
*/
public void populateFields() {
// corpus changed if: current one is null (this is first run of the app)
// or if currentCorpus != gui's corpus
boolean corpusChanged = currentCorpusType == null
|| currentCorpusType != corpus.getCorpusType();
// TODO: check for GOS, GIGAFIDA, SOLAR...
// refresh and:
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
if (calculateFor == null) {
calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
}
if (!filter.hasMsd()) {
// if current corpus doesn't have msd data, disable this field
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(true);
logger.info("no msd data");
} else {
if (ValidationUtil.isEmpty(msd)
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// msd has not been set previously
// or msd has been set but the corpus changed -> reset
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(false);
logger.info("msd reset");
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// if msd has been set, but corpus type remained the same, we can keep any set msd value
msdTF.setText(StringUtils.join(msdStrings, " "));
msdTF.setDisable(false);
logger.info("msd kept");
}
}
// TODO: trigger on rescan
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// user changed corpus (by type) or by selection & triggered a rescan of headers
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
currentCorpusType = corpus.getCorpusType();
// setTaxonomyIsDirty(false);
} else {
}
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
}
/**
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
* sets combobox values to what is applicable ...
*
* @param mode
*/
public void toggleMode(MODE mode) {
if (mode == null) {
mode = currentMode;
}
logger.info("mode: ", mode.toString());
if (mode == MODE.WORD) {
if (corpus.getCorpusType() == CorpusType.GOS)
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
else
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
} else if (mode == MODE.LETTER) {
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS);
// if calculateFor was selected for something other than a word or a lemma -> reset
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
// if the user selected something else before selecting ngram for letters, reset that choice
calculateFor = CalculateFor.WORD;
calculateForCB.getSelectionModel().select("različnica");
}
}
// override if orth mode, allow only word
if (corpus.isGosOrthMode()) {
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
msdTF.setDisable(true);
} else {
msdTF.setDisable(false);
}
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
}
private void compute() {
Filter filter = new Filter();
filter.setNgramValue(1);
filter.setCalculateFor(CalculateFor.WORD);
filter.setCalculateFor(calculateFor);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setAl(AnalysisLevel.WORD_LEVEL);
filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0);
filter.setMsd(new ArrayList<>());
filter.setIsCvv(false);
filter.setSolarFilters(solarFiltersMap);
filter.setStringLength(1);
filter.setMultipleKeys(alsoVisualize);
// setMsd must be behind alsoVisualize
filter.setMsd(msd);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
filter.setPrefixLength(prefixLength);
filter.setSuffixLength(suffixLength);
filter.setPrefixList(prefixList);
filter.setSuffixList(suffixList);
// filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd);
String message = Validation.validateForStringLevel(filter);
if (message == null) {
@ -164,10 +571,44 @@ public class WordLevelTab {
}
}
private void logAlert(String alert) {
logger.info("alert: " + alert);
}
private void openHelpWebsite(){
hostService.showDocument(Messages.HELP_URL);
}
public Corpus getCorpus() {
return corpus;
}
public void setCorpus(Corpus corpus) {
this.corpus = corpus;
if (corpus.getCorpusType() != CorpusType.SOLAR) {
setSelectedFiltersLabel(null);
} else {
setSelectedFiltersLabel("/");
}
}
public void setSelectedFiltersLabel(String content) {
if (content != null) {
solarFilters.setVisible(true);
selectedFiltersLabel.setVisible(true);
selectedFiltersLabel.setText(content);
} else {
solarFilters.setVisible(false);
selectedFiltersLabel.setVisible(false);
}
}
private void execute(StatisticsNew statistic) {
logger.info("Started execution: ", statistic.getFilter());
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
boolean corpusIsSplit = corpusFiles.size() > 1;
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@ -177,6 +618,10 @@ public class WordLevelTab {
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
i++;
if (isCancelled()) {
updateMessage(CANCELING_NOTIFICATION);
break;
}
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
@ -190,9 +635,7 @@ public class WordLevelTab {
task.setOnSucceeded(e -> {
try {
// first, we have to recalculate all occurrences to detailed statistics
boolean successullySaved = statistic.saveResultNestedToDisk();
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
} else {
@ -207,6 +650,7 @@ public class WordLevelTab {
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnFailed(e -> {
@ -217,44 +661,36 @@ public class WordLevelTab {
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
logger.info("cancel button");
});
cancel.setVisible(true);
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
}
private void logAlert(String alert) {
logger.info("alert: " + alert);
}
public void setCorpus(Corpus corpus) {
this.corpus = corpus;
if (corpus.getCorpusType() != CorpusType.SOLAR) {
setSelectedFiltersLabel(null);
} else {
setSelectedFiltersLabel("/");
}
}
public void setSelectedFiltersLabel(String content) {
if (content != null) {
solarFilters.setVisible(true);
selectedFiltersLabel.setVisible(true);
selectedFiltersLabel.setText(content);
} else {
solarFilters.setVisible(false);
selectedFiltersLabel.setVisible(false);
}
}
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
this.solarFiltersMap = solarFiltersMap;
}
public void setHostServices(HostServices hostServices){
this.hostService = hostServices;
}
}

@ -1,32 +1,115 @@
<?xml version="1.0" encoding="UTF-8"?>
<?import org.controlsfx.control.CheckComboBox?>
<?import javafx.scene.control.*?>
<?import java.lang.String?>
<?import javafx.collections.FXCollections?>
<?import javafx.scene.control.Button?>
<?import javafx.scene.control.CheckBox?>
<?import javafx.scene.control.Hyperlink?>
<?import javafx.scene.control.ComboBox?>
<?import javafx.scene.control.Label?>
<?import javafx.scene.control.ProgressBar?>
<?import javafx.scene.control.TextField?>
<?import javafx.scene.layout.AnchorPane?>
<?import javafx.scene.layout.Pane?>
<?import org.controlsfx.control.CheckComboBox?>
<AnchorPane fx:id="wordLevelAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordLevelTab">
<Pane>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
<Pane>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za"/>
<ComboBox fx:id="calculateForCB" layoutX="185.0" layoutY="20.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
visibleRowCount="5">
<items>
<FXCollections fx:factory="observableArrayList">
<String fx:value="lema"/>
<String fx:value="različnica"/>
<String fx:value="oblikoskladenjska oznaka"/>
<String fx:value="oblikoskladenjska lastnost"/>
<String fx:value="besedna vrsta"/>
</FXCollections>
</items>
</ComboBox>
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" />
<CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
<CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
<!--<Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Izpiši razbit MSD" />-->
<!--<CheckBox fx:id="writeMsdAtTheEndChB" layoutX="263.0" layoutY="145.0" selected="false" />-->
<!-- MSD and Taxonomy separated -->
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Iskanje besednih delov preko podane dolžine" />
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Predpona je dolga"/>
<ComboBox fx:id="prefixLengthCB" layoutX="185.0" layoutY="200.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
<items>
<FXCollections fx:factory="observableArrayList">
<String fx:value="0" />
<String fx:value="1" />
<String fx:value="2" />
<String fx:value="3" />
<String fx:value="4" />
<String fx:value="5" />
</FXCollections>
</items>
</ComboBox>
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Pripona je dolga"/>
<ComboBox fx:id="suffixLengthCB" layoutX="185.0" layoutY="240.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
<items>
<FXCollections fx:factory="observableArrayList">
<String fx:value="0" />
<String fx:value="1" />
<String fx:value="2" />
<String fx:value="3" />
<String fx:value="4" />
<String fx:value="5" />
</FXCollections>
</items>
</ComboBox>
<Label layoutX="10.0" layoutY="300.0" prefHeight="25.0" text="Iskanje besednih delov preko podanih predpon in pripon" />
<Label layoutX="10.0" layoutY="340.0" prefHeight="25.0" text="Seznam predpon"/>
<TextField fx:id="prefixListTF" layoutX="185.0" layoutY="340.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="380.0" prefHeight="25.0" text="Seznam pripon"/>
<TextField fx:id="suffixListTF" layoutX="185.0" layoutY="380.0" prefWidth="180.0" />
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
</Pane>
<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
<!-- MSD and Taxonomy separated -->
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Omejitev podatkov" />
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="140.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="180.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="180.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="220.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0" />
<Button fx:id="computeB" layoutX="14.0" layoutY="422.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
</Pane>
<Label fx:id="solarFilters" layoutX="10.0" layoutY="280.0" text="Izbrani filtri:" />
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="320.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />
</Pane>
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
text=" " wrapText="true"/>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
<Button fx:id="cancel" layoutX="540.0" layoutY="482.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Prekini"/>
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
</AnchorPane>

Loading…
Cancel
Save