list/src/main/java/gui/StringAnalysisTabNew2.java

675 lines
25 KiB
Java
Raw Normal View History

2018-06-19 07:15:37 +00:00
package gui;
import static alg.XML_processing.*;
import static gui.GUIController.*;
import static gui.Messages.*;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.util.regex.Pattern;
import javafx.application.HostServices;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.controlsfx.control.CheckComboBox;
import data.*;
import javafx.collections.FXCollections;
import javafx.collections.ListChangeListener;
import javafx.collections.ObservableList;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.control.*;
import javafx.scene.layout.Pane;
@SuppressWarnings("Duplicates")
public class StringAnalysisTabNew2 {
public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class);
@FXML
public Label selectedFiltersLabel;
@FXML
public Label solarFilters;
@FXML
private TextField msdTF;
private ArrayList<Pattern> msd;
private ArrayList<String> msdStrings;
@FXML
private CheckComboBox<String> alsoVisualizeCCB;
private ArrayList<String> alsoVisualize;
2018-06-19 07:15:37 +00:00
@FXML
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
@FXML
private CheckBox calculatecvvCB;
private boolean calculateCvv;
@FXML
private TextField stringLengthTF;
private Integer stringLength;
@FXML
private ComboBox<String> calculateForCB;
private CalculateFor calculateFor;
@FXML
private ComboBox<String> ngramValueCB;
private Integer ngramValue;
@FXML
private ComboBox<String> skipValueCB;
private Integer skipValue;
2018-07-23 07:14:46 +00:00
@FXML
private CheckBox notePunctuationsChB;
private boolean notePunctuations;
@FXML
private CheckBox displayTaxonomyChB;
private boolean displayTaxonomy;
@FXML
private TextField minimalOccurrencesTF;
private Integer minimalOccurrences;
@FXML
private TextField minimalTaxonomyTF;
private Integer minimalTaxonomy;
2018-06-19 07:15:37 +00:00
@FXML
private Pane paneWords;
@FXML
private Pane paneLetters;
@FXML
private Button computeNgramsB;
2018-06-29 10:53:29 +00:00
@FXML
private Button cancel;
2018-06-19 07:15:37 +00:00
@FXML
public ProgressBar ngramProgressBar;
@FXML
public Label progressLabel;
@FXML
private Hyperlink helpH;
private enum MODE {
LETTER,
WORD
}
private MODE currentMode;
private Corpus corpus;
private HashMap<String, HashSet<String>> solarFiltersMap;
private Filter filter;
private boolean useDb;
private HostServices hostService;
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
2018-06-19 07:15:37 +00:00
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
2018-06-19 07:15:37 +00:00
// TODO: pass observables for taxonomy based on header scan
// after header scan
private ObservableList<String> taxonomyCCBValues;
private CorpusType currentCorpusType;
public void init() {
currentMode = MODE.WORD;
toggleMode(currentMode);
// ngram value CB
ngramValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
if (newValue.equals("nivo črk")) {
ngramValue = 0;
toggleMode(MODE.LETTER);
} else {
ngramValue = Integer.valueOf(newValue);
toggleMode(MODE.WORD);
}
// skip only on ngrams of more than one word
if (ngramValue > 1) {
skipValueCB.setDisable(false);
} else {
skipValueCB.getSelectionModel().select(0);
skipValue = 0;
skipValueCB.setDisable(true);
}
logger.info("ngramValueCB:", ngramValue);
});
// set first n-gram value to 2 at index 0
ngramValueCB.getSelectionModel().select(0); // selected index
ngramValue = 2; // actual value at that index
// set default values
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
2018-07-23 07:14:46 +00:00
notePunctuations = true;
// set
notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
notePunctuations = newValue;
logger.info("note punctuations: ", notePunctuations);
});
notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB));
displayTaxonomy = false;
// set
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
displayTaxonomy = newValue;
logger.info("display taxonomy: ", displayTaxonomy);
});
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
2018-06-19 07:15:37 +00:00
// calculateForCB
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
calculateFor = CalculateFor.factory(newValue);
alsoVisualizeCCB.getItems().removeAll();
if(newValue.equals("lema")){
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma);
} else if(newValue.equals("različnica")) {
if (corpus.getCorpusType() == CorpusType.GOS)
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWordGos);
else
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
} else if(newValue.equals("normalizirana različnica")) {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
}else {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
}
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
alsoVisualize.addAll(checkedItems);
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
});
alsoVisualizeCCB.getCheckModel().clearChecks();
2018-06-19 07:15:37 +00:00
logger.info("calculateForCB:", calculateFor.toString());
});
calculateForCB.getSelectionModel().select(0);
// msd
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = msdTF.getText();
logger.info("msdTf: ", value);
if (!ValidationUtil.isEmpty(value)) {
ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
int nOfRequiredMsdTokens = ngramValue == 0 ? 1 : ngramValue;
if (msdTmp.size() != nOfRequiredMsdTokens) {
String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
logAlert(msg);
showAlert(Alert.AlertType.ERROR, msg);
}
msd = new ArrayList<>();
msdStrings = new ArrayList<>();
for (String msdToken : msdTmp) {
msd.add(Pattern.compile(msdToken));
msdStrings.add(msdToken);
}
logger.info(String.format("msd accepted (%d)", msd.size()));
} else if (!ValidationUtil.isEmpty(newValue)) {
msd = new ArrayList<>();
msdStrings = new ArrayList<>();
}
}
});
msdTF.setText("");
msd = new ArrayList<>();
alsoVisualizeCCB.getItems().removeAll();
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma);
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
alsoVisualize.addAll(checkedItems);
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
});
alsoVisualizeCCB.getCheckModel().clearChecks();
2018-06-19 07:15:37 +00:00
// taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>();
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems);
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
});
taxonomyCCB.getCheckModel().clearChecks();
} else {
taxonomyCCB.setDisable(true);
}
// skip
skipValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
skipValue = Integer.valueOf(newValue);
logger.info("Skip " + skipValue);
});
skipValueCB.getSelectionModel().select(0);
skipValue = 0;
// cvv
calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
calculateCvv = newValue;
logger.info("calculate cvv: " + calculateCvv);
});
calculatecvvCB.setSelected(false);
// string length
stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = stringLengthTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("stringlengthTf: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
}
stringLength = Integer.parseInt(value);
} else {
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_MISSING_STRING_LENGTH);
stringLengthTF.setText("1");
logAlert(WARNING_MISSING_STRING_LENGTH);
}
}
});
minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalOccurrencesTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalOccurrencesTF: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
} else {
minimalOccurrences = Integer.parseInt(value);
}
} else {
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
}
}
});
minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalTaxonomyTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalTaxonomyTF: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
} else {
minimalTaxonomy = Integer.parseInt(value);
}
} else {
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
}
}
});
2018-06-19 07:15:37 +00:00
computeNgramsB.setOnAction(e -> {
compute();
logger.info("compute button");
});
helpH.setOnAction(e -> openHelpWebsite());
2018-06-29 10:53:29 +00:00
cancel.setVisible(false);
2018-06-19 07:15:37 +00:00
}
/**
* case a: values for combo boxes can change after a corpus change
* <ul>
* <li>different corpus type - reset all fields so no old values remain</li>
* <li>same corpus type, different subset - keep</li>
* </ul>
* <p>
* case b: values for combo boxes can change after a header scan
* <ul>
* <li>at first, fields are populated by corpus type defaults</li>
* <li>after, with gathered data</li>
* </ul>
* <p></p>
* ngrams: 1
* calculateFor: word
* msd:
* taxonomy:
* skip: 0
* iscvv: false
* string length: 1
*/
public void populateFields() {
// corpus changed if: current one is null (this is first run of the app)
// or if currentCorpus != gui's corpus
boolean corpusChanged = currentCorpusType == null
|| currentCorpusType != corpus.getCorpusType();
// keep ngram value if set
if (ngramValue == null) {
ngramValueCB.getSelectionModel().select("1");
ngramValue = 1;
}
// TODO: check for GOS, GIGAFIDA, SOLAR...
// refresh and:
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
if (calculateFor == null) {
calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
}
if (!filter.hasMsd()) {
// if current corpus doesn't have msd data, disable this field
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(true);
logger.info("no msd data");
} else {
if (ValidationUtil.isEmpty(msd)
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// msd has not been set previously
// or msd has been set but the corpus changed -> reset
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(false);
logger.info("msd reset");
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// if msd has been set, but corpus type remained the same, we can keep any set msd value
msdTF.setText(StringUtils.join(msdStrings, " "));
msdTF.setDisable(false);
logger.info("msd kept");
}
}
// TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
// keep skip value
if (skipValue == null) {
skipValueCB.getSelectionModel().select("0");
skipValue = 0;
}
// keep calculateCvv
calculatecvvCB.setSelected(calculateCvv);
// keep string length if set
if (stringLength != null) {
stringLengthTF.setText(String.valueOf(stringLength));
} else {
stringLengthTF.setText("1");
stringLength = 1;
}
// TODO: trigger on rescan
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// user changed corpus (by type) or by selection & triggered a rescan of headers
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
currentCorpusType = corpus.getCorpusType();
// setTaxonomyIsDirty(false);
} else {
}
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
}
/**
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
* sets combobox values to what is applicable ...
*
* @param mode
*/
public void toggleMode(MODE mode) {
if (mode == null) {
mode = currentMode;
}
logger.info("mode: ", mode.toString());
if (mode == MODE.WORD) {
paneWords.setVisible(true);
paneLetters.setVisible(false);
// if (corpus.getCorpusType() == CorpusType.GOS)
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
// else
if (corpus.getCorpusType() == CorpusType.GOS)
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
else
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
2018-06-19 07:15:37 +00:00
} else if (mode == MODE.LETTER) {
paneWords.setVisible(false);
paneLetters.setVisible(true);
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS);
// populate with default cvv length value
if (stringLength == null) {
stringLengthTF.setText("1");
stringLength = 1;
} else {
stringLengthTF.setText(String.valueOf(stringLength));
}
// if calculateFor was selected for something other than a word or a lemma -> reset
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
// if the user selected something else before selecting ngram for letters, reset that choice
calculateFor = CalculateFor.WORD;
calculateForCB.getSelectionModel().select("različnica");
}
}
// override if orth mode, allow only word
if (corpus.isGosOrthMode()) {
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
msdTF.setDisable(true);
} else {
msdTF.setDisable(false);
}
}
private void compute() {
Filter filter = new Filter();
filter.setNgramValue(ngramValue);
filter.setCalculateFor(calculateFor);
filter.setMsd(msd);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setDisplayTaxonomy(displayTaxonomy);
2018-06-19 07:15:37 +00:00
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(skipValue);
filter.setIsCvv(calculateCvv);
filter.setSolarFilters(solarFiltersMap);
2018-07-23 07:14:46 +00:00
filter.setNotePunctuations(notePunctuations);
filter.setMultipleKeys(alsoVisualize);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
2018-06-19 07:15:37 +00:00
if (ngramValue != null && ngramValue == 0) {
filter.setStringLength(stringLength);
}
String message = Validation.validateForStringLevel(filter);
if (message == null) {
// no errors
logger.info("Executing: ", filter.toString());
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
execute(statistic);
} else {
logAlert(message);
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
}
}
private void logAlert(String alert) {
logger.info("alert: " + alert);
}
private void openHelpWebsite(){
hostService.showDocument(Messages.HELP_URL);
}
public Corpus getCorpus() {
return corpus;
}
public void setCorpus(Corpus corpus) {
this.corpus = corpus;
if (corpus.getCorpusType() != CorpusType.SOLAR) {
setSelectedFiltersLabel(null);
} else {
setSelectedFiltersLabel("/");
}
}
public void setSelectedFiltersLabel(String content) {
if (content != null) {
solarFilters.setVisible(true);
selectedFiltersLabel.setVisible(true);
selectedFiltersLabel.setText(content);
} else {
solarFilters.setVisible(false);
selectedFiltersLabel.setVisible(false);
}
}
private void execute(StatisticsNew statistic) {
logger.info("Started execution: ", statistic.getFilter());
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
boolean corpusIsSplit = corpusFiles.size() > 1;
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
i++;
2018-06-29 10:53:29 +00:00
if (isCancelled()) {
updateMessage(CANCELING_NOTIFICATION);
break;
}
2018-06-19 07:15:37 +00:00
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
try {
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
} else {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1);
2018-07-23 07:14:46 +00:00
} catch (OutOfMemoryError e1){
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
logger.error("Out of memory error", e1);
2018-06-19 07:15:37 +00:00
}
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
2018-06-29 10:53:29 +00:00
cancel.setVisible(false);
2018-06-19 07:15:37 +00:00
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
2018-06-29 10:53:29 +00:00
cancel.setVisible(false);
});
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
2018-06-19 07:15:37 +00:00
});
2018-06-29 10:53:29 +00:00
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
logger.info("cancel button");
});
cancel.setVisible(true);
2018-06-19 07:15:37 +00:00
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
}
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
this.solarFiltersMap = solarFiltersMap;
}
public void setHostServices(HostServices hostServices){
this.hostService = hostServices;
}
}