512 lines
18 KiB
Java
Executable File
512 lines
18 KiB
Java
Executable File
package gui;
|
|
|
|
import static alg.XML_processing.*;
|
|
import static gui.GUIController.*;
|
|
import static gui.Messages.*;
|
|
|
|
import java.io.File;
|
|
import java.io.UnsupportedEncodingException;
|
|
import java.util.*;
|
|
import java.util.regex.Pattern;
|
|
|
|
import javafx.application.HostServices;
|
|
import org.apache.commons.lang3.StringUtils;
|
|
import org.apache.logging.log4j.LogManager;
|
|
import org.apache.logging.log4j.Logger;
|
|
import org.controlsfx.control.CheckComboBox;
|
|
|
|
import data.*;
|
|
import javafx.collections.FXCollections;
|
|
import javafx.collections.ListChangeListener;
|
|
import javafx.collections.ObservableList;
|
|
import javafx.concurrent.Task;
|
|
import javafx.fxml.FXML;
|
|
import javafx.scene.control.*;
|
|
import javafx.scene.layout.Pane;
|
|
|
|
@SuppressWarnings("Duplicates")
|
|
public class StringAnalysisTabNew2 {
|
|
public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class);
|
|
|
|
@FXML
|
|
public Label selectedFiltersLabel;
|
|
@FXML
|
|
public Label solarFilters;
|
|
|
|
@FXML
|
|
private TextField msdTF;
|
|
private ArrayList<Pattern> msd;
|
|
private ArrayList<String> msdStrings;
|
|
|
|
@FXML
|
|
private CheckComboBox<String> taxonomyCCB;
|
|
private ArrayList<String> taxonomy;
|
|
|
|
@FXML
|
|
private CheckBox calculatecvvCB;
|
|
private boolean calculateCvv;
|
|
|
|
@FXML
|
|
private TextField stringLengthTF;
|
|
private Integer stringLength;
|
|
|
|
@FXML
|
|
private ComboBox<String> calculateForCB;
|
|
private CalculateFor calculateFor;
|
|
|
|
@FXML
|
|
private ComboBox<String> ngramValueCB;
|
|
private Integer ngramValue;
|
|
|
|
@FXML
|
|
private ComboBox<String> skipValueCB;
|
|
private Integer skipValue;
|
|
|
|
@FXML
|
|
private Pane paneWords;
|
|
|
|
@FXML
|
|
private Pane paneLetters;
|
|
|
|
@FXML
|
|
private Button computeNgramsB;
|
|
|
|
@FXML
|
|
public ProgressBar ngramProgressBar;
|
|
@FXML
|
|
public Label progressLabel;
|
|
|
|
@FXML
|
|
private Hyperlink helpH;
|
|
|
|
private enum MODE {
|
|
LETTER,
|
|
WORD
|
|
}
|
|
|
|
private MODE currentMode;
|
|
|
|
private Corpus corpus;
|
|
private HashMap<String, HashSet<String>> solarFiltersMap;
|
|
private Filter filter;
|
|
private boolean useDb;
|
|
private HostServices hostService;
|
|
|
|
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
|
|
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
|
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
|
|
|
|
|
// TODO: pass observables for taxonomy based on header scan
|
|
// after header scan
|
|
private ObservableList<String> taxonomyCCBValues;
|
|
private CorpusType currentCorpusType;
|
|
|
|
public void init() {
|
|
currentMode = MODE.WORD;
|
|
toggleMode(currentMode);
|
|
|
|
// ngram value CB
|
|
ngramValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
|
if (newValue.equals("nivo črk")) {
|
|
ngramValue = 0;
|
|
toggleMode(MODE.LETTER);
|
|
} else {
|
|
ngramValue = Integer.valueOf(newValue);
|
|
toggleMode(MODE.WORD);
|
|
}
|
|
|
|
// skip only on ngrams of more than one word
|
|
if (ngramValue > 1) {
|
|
skipValueCB.setDisable(false);
|
|
} else {
|
|
skipValueCB.getSelectionModel().select(0);
|
|
skipValue = 0;
|
|
skipValueCB.setDisable(true);
|
|
}
|
|
|
|
logger.info("ngramValueCB:", ngramValue);
|
|
});
|
|
|
|
// set first n-gram value to 2 at index 0
|
|
ngramValueCB.getSelectionModel().select(0); // selected index
|
|
ngramValue = 2; // actual value at that index
|
|
|
|
// calculateForCB
|
|
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
|
calculateFor = CalculateFor.factory(newValue);
|
|
logger.info("calculateForCB:", calculateFor.toString());
|
|
});
|
|
|
|
calculateForCB.getSelectionModel().select(0);
|
|
|
|
// msd
|
|
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
|
if (!newValue) {
|
|
// focus lost
|
|
String value = msdTF.getText();
|
|
logger.info("msdTf: ", value);
|
|
|
|
if (!ValidationUtil.isEmpty(value)) {
|
|
ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
|
|
|
|
int nOfRequiredMsdTokens = ngramValue == 0 ? 1 : ngramValue;
|
|
if (msdTmp.size() != nOfRequiredMsdTokens) {
|
|
String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
|
|
logAlert(msg);
|
|
showAlert(Alert.AlertType.ERROR, msg);
|
|
}
|
|
msd = new ArrayList<>();
|
|
msdStrings = new ArrayList<>();
|
|
for (String msdToken : msdTmp) {
|
|
msd.add(Pattern.compile(msdToken));
|
|
msdStrings.add(msdToken);
|
|
}
|
|
logger.info(String.format("msd accepted (%d)", msd.size()));
|
|
|
|
} else if (!ValidationUtil.isEmpty(newValue)) {
|
|
msd = new ArrayList<>();
|
|
msdStrings = new ArrayList<>();
|
|
}
|
|
}
|
|
});
|
|
|
|
msdTF.setText("");
|
|
msd = new ArrayList<>();
|
|
|
|
// taxonomy
|
|
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
|
taxonomyCCB.getItems().removeAll();
|
|
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
|
|
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
|
taxonomy = new ArrayList<>();
|
|
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
|
taxonomy.addAll(checkedItems);
|
|
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
|
|
});
|
|
taxonomyCCB.getCheckModel().clearChecks();
|
|
} else {
|
|
taxonomyCCB.setDisable(true);
|
|
}
|
|
|
|
// skip
|
|
skipValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
|
skipValue = Integer.valueOf(newValue);
|
|
logger.info("Skip " + skipValue);
|
|
});
|
|
|
|
skipValueCB.getSelectionModel().select(0);
|
|
skipValue = 0;
|
|
|
|
// cvv
|
|
calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
|
calculateCvv = newValue;
|
|
logger.info("calculate cvv: " + calculateCvv);
|
|
});
|
|
|
|
calculatecvvCB.setSelected(false);
|
|
|
|
// string length
|
|
stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
|
if (!newValue) {
|
|
// focus lost
|
|
String value = stringLengthTF.getText();
|
|
if (!ValidationUtil.isEmpty(value)) {
|
|
if (!ValidationUtil.isNumber(value)) {
|
|
logAlert("stringlengthTf: " + WARNING_ONLY_NUMBERS_ALLOWED);
|
|
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
|
|
}
|
|
stringLength = Integer.parseInt(value);
|
|
} else {
|
|
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_MISSING_STRING_LENGTH);
|
|
stringLengthTF.setText("1");
|
|
logAlert(WARNING_MISSING_STRING_LENGTH);
|
|
}
|
|
}
|
|
});
|
|
|
|
computeNgramsB.setOnAction(e -> {
|
|
compute();
|
|
logger.info("compute button");
|
|
});
|
|
|
|
helpH.setOnAction(e -> openHelpWebsite());
|
|
}
|
|
|
|
/**
|
|
* case a: values for combo boxes can change after a corpus change
|
|
* <ul>
|
|
* <li>different corpus type - reset all fields so no old values remain</li>
|
|
* <li>same corpus type, different subset - keep</li>
|
|
* </ul>
|
|
* <p>
|
|
* case b: values for combo boxes can change after a header scan
|
|
* <ul>
|
|
* <li>at first, fields are populated by corpus type defaults</li>
|
|
* <li>after, with gathered data</li>
|
|
* </ul>
|
|
* <p></p>
|
|
* ngrams: 1
|
|
* calculateFor: word
|
|
* msd:
|
|
* taxonomy:
|
|
* skip: 0
|
|
* iscvv: false
|
|
* string length: 1
|
|
*/
|
|
public void populateFields() {
|
|
// corpus changed if: current one is null (this is first run of the app)
|
|
// or if currentCorpus != gui's corpus
|
|
boolean corpusChanged = currentCorpusType == null
|
|
|| currentCorpusType != corpus.getCorpusType();
|
|
|
|
// keep ngram value if set
|
|
if (ngramValue == null) {
|
|
ngramValueCB.getSelectionModel().select("1");
|
|
ngramValue = 1;
|
|
}
|
|
|
|
// TODO: check for GOS, GIGAFIDA, SOLAR...
|
|
// refresh and:
|
|
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
|
if (calculateFor == null) {
|
|
calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
|
|
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
|
|
}
|
|
|
|
if (!filter.hasMsd()) {
|
|
// if current corpus doesn't have msd data, disable this field
|
|
msd = new ArrayList<>();
|
|
msdTF.setText("");
|
|
msdTF.setDisable(true);
|
|
logger.info("no msd data");
|
|
} else {
|
|
if (ValidationUtil.isEmpty(msd)
|
|
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
|
// msd has not been set previously
|
|
// or msd has been set but the corpus changed -> reset
|
|
msd = new ArrayList<>();
|
|
msdTF.setText("");
|
|
msdTF.setDisable(false);
|
|
logger.info("msd reset");
|
|
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
|
// if msd has been set, but corpus type remained the same, we can keep any set msd value
|
|
msdTF.setText(StringUtils.join(msdStrings, " "));
|
|
msdTF.setDisable(false);
|
|
logger.info("msd kept");
|
|
}
|
|
}
|
|
|
|
// TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
|
|
|
|
// keep skip value
|
|
if (skipValue == null) {
|
|
skipValueCB.getSelectionModel().select("0");
|
|
skipValue = 0;
|
|
}
|
|
|
|
// keep calculateCvv
|
|
calculatecvvCB.setSelected(calculateCvv);
|
|
|
|
// keep string length if set
|
|
if (stringLength != null) {
|
|
stringLengthTF.setText(String.valueOf(stringLength));
|
|
} else {
|
|
stringLengthTF.setText("1");
|
|
stringLength = 1;
|
|
}
|
|
|
|
// TODO: trigger on rescan
|
|
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
|
// user changed corpus (by type) or by selection & triggered a rescan of headers
|
|
// see if we read taxonomy from headers, otherwise use default values for given corpus
|
|
ObservableList<String> tax = corpus.getTaxonomy();
|
|
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
|
|
|
currentCorpusType = corpus.getCorpusType();
|
|
// setTaxonomyIsDirty(false);
|
|
} else {
|
|
|
|
}
|
|
|
|
// see if we read taxonomy from headers, otherwise use default values for given corpus
|
|
ObservableList<String> tax = corpus.getTaxonomy();
|
|
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
|
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
|
|
|
}
|
|
|
|
/**
|
|
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
|
|
* sets combobox values to what is applicable ...
|
|
*
|
|
* @param mode
|
|
*/
|
|
public void toggleMode(MODE mode) {
|
|
if (mode == null) {
|
|
mode = currentMode;
|
|
}
|
|
|
|
logger.info("mode: ", mode.toString());
|
|
|
|
if (mode == MODE.WORD) {
|
|
paneWords.setVisible(true);
|
|
paneLetters.setVisible(false);
|
|
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
|
|
} else if (mode == MODE.LETTER) {
|
|
paneWords.setVisible(false);
|
|
paneLetters.setVisible(true);
|
|
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS);
|
|
|
|
// populate with default cvv length value
|
|
if (stringLength == null) {
|
|
stringLengthTF.setText("1");
|
|
stringLength = 1;
|
|
} else {
|
|
stringLengthTF.setText(String.valueOf(stringLength));
|
|
}
|
|
|
|
// if calculateFor was selected for something other than a word or a lemma -> reset
|
|
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
|
|
// if the user selected something else before selecting ngram for letters, reset that choice
|
|
calculateFor = CalculateFor.WORD;
|
|
calculateForCB.getSelectionModel().select("različnica");
|
|
}
|
|
}
|
|
|
|
// override if orth mode, allow only word
|
|
if (corpus.isGosOrthMode()) {
|
|
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
|
|
msdTF.setDisable(true);
|
|
} else {
|
|
msdTF.setDisable(false);
|
|
}
|
|
}
|
|
|
|
private void compute() {
|
|
Filter filter = new Filter();
|
|
filter.setNgramValue(ngramValue);
|
|
filter.setCalculateFor(calculateFor);
|
|
filter.setMsd(msd);
|
|
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
|
|
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
|
filter.setSkipValue(skipValue);
|
|
filter.setIsCvv(calculateCvv);
|
|
filter.setSolarFilters(solarFiltersMap);
|
|
|
|
if (ngramValue != null && ngramValue == 0) {
|
|
filter.setStringLength(stringLength);
|
|
}
|
|
|
|
String message = Validation.validateForStringLevel(filter);
|
|
if (message == null) {
|
|
// no errors
|
|
logger.info("Executing: ", filter.toString());
|
|
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
|
execute(statistic);
|
|
} else {
|
|
logAlert(message);
|
|
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
|
|
}
|
|
}
|
|
|
|
private void logAlert(String alert) {
|
|
logger.info("alert: " + alert);
|
|
}
|
|
|
|
private void openHelpWebsite(){
|
|
hostService.showDocument(Messages.HELP_URL);
|
|
}
|
|
|
|
public Corpus getCorpus() {
|
|
return corpus;
|
|
}
|
|
|
|
public void setCorpus(Corpus corpus) {
|
|
this.corpus = corpus;
|
|
|
|
if (corpus.getCorpusType() != CorpusType.SOLAR) {
|
|
setSelectedFiltersLabel(null);
|
|
} else {
|
|
setSelectedFiltersLabel("/");
|
|
}
|
|
}
|
|
|
|
public void setSelectedFiltersLabel(String content) {
|
|
if (content != null) {
|
|
solarFilters.setVisible(true);
|
|
selectedFiltersLabel.setVisible(true);
|
|
selectedFiltersLabel.setText(content);
|
|
} else {
|
|
solarFilters.setVisible(false);
|
|
selectedFiltersLabel.setVisible(false);
|
|
}
|
|
}
|
|
|
|
private void execute(StatisticsNew statistic) {
|
|
logger.info("Started execution: ", statistic.getFilter());
|
|
|
|
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
|
boolean corpusIsSplit = corpusFiles.size() > 1;
|
|
|
|
final Task<Void> task = new Task<Void>() {
|
|
@SuppressWarnings("Duplicates")
|
|
@Override
|
|
protected Void call() throws Exception {
|
|
long i = 0;
|
|
for (File f : corpusFiles) {
|
|
readXML(f.toString(), statistic);
|
|
i++;
|
|
this.updateProgress(i, corpusFiles.size());
|
|
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
|
}
|
|
|
|
return null;
|
|
}
|
|
};
|
|
|
|
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
|
progressLabel.textProperty().bind(task.messageProperty());
|
|
|
|
task.setOnSucceeded(e -> {
|
|
try {
|
|
boolean successullySaved = statistic.saveResultToDisk();
|
|
if (successullySaved) {
|
|
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
|
} else {
|
|
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
|
}
|
|
} catch (UnsupportedEncodingException e1) {
|
|
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
|
logger.error("Error while saving", e1);
|
|
}
|
|
|
|
ngramProgressBar.progressProperty().unbind();
|
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
progressLabel.textProperty().unbind();
|
|
progressLabel.setText("");
|
|
});
|
|
|
|
task.setOnFailed(e -> {
|
|
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
|
|
logger.error("Error while executing", e);
|
|
ngramProgressBar.progressProperty().unbind();
|
|
ngramProgressBar.setProgress(0.0);
|
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
|
progressLabel.textProperty().unbind();
|
|
progressLabel.setText("");
|
|
});
|
|
|
|
final Thread thread = new Thread(task, "task");
|
|
thread.setDaemon(true);
|
|
thread.start();
|
|
}
|
|
|
|
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
|
|
this.solarFiltersMap = solarFiltersMap;
|
|
}
|
|
public void setHostServices(HostServices hostServices){
|
|
this.hostService = hostServices;
|
|
}
|
|
}
|