package gui; import static alg.XML_processing.*; import static gui.GUIController.*; import static gui.Messages.*; import java.io.File; import java.io.UnsupportedEncodingException; import java.util.*; import java.util.regex.Pattern; import javafx.application.HostServices; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.controlsfx.control.CheckComboBox; import data.*; import javafx.collections.FXCollections; import javafx.collections.ListChangeListener; import javafx.collections.ObservableList; import javafx.concurrent.Task; import javafx.fxml.FXML; import javafx.scene.control.*; import javafx.scene.layout.Pane; @SuppressWarnings("Duplicates") public class StringAnalysisTabNew2 { public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class); @FXML public Label selectedFiltersLabel; @FXML public Label solarFilters; @FXML private TextField msdTF; private ArrayList msd; private ArrayList msdStrings; @FXML private CheckComboBox taxonomyCCB; private ArrayList taxonomy; @FXML private CheckBox calculatecvvCB; private boolean calculateCvv; @FXML private TextField stringLengthTF; private Integer stringLength; @FXML private ComboBox calculateForCB; private CalculateFor calculateFor; @FXML private ComboBox ngramValueCB; private Integer ngramValue; @FXML private ComboBox skipValueCB; private Integer skipValue; @FXML private Pane paneWords; @FXML private Pane paneLetters; @FXML private Button computeNgramsB; @FXML private Button cancel; @FXML public ProgressBar ngramProgressBar; @FXML public Label progressLabel; @FXML private Hyperlink helpH; private enum MODE { LETTER, WORD } private MODE currentMode; private Corpus corpus; private HashMap> solarFiltersMap; private Filter filter; private boolean useDb; private HostServices hostService; private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka"); private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); // TODO: pass observables for taxonomy based on header scan // after header scan private ObservableList taxonomyCCBValues; private CorpusType currentCorpusType; public void init() { currentMode = MODE.WORD; toggleMode(currentMode); // ngram value CB ngramValueCB.valueProperty().addListener((observable, oldValue, newValue) -> { if (newValue.equals("nivo črk")) { ngramValue = 0; toggleMode(MODE.LETTER); } else { ngramValue = Integer.valueOf(newValue); toggleMode(MODE.WORD); } // skip only on ngrams of more than one word if (ngramValue > 1) { skipValueCB.setDisable(false); } else { skipValueCB.getSelectionModel().select(0); skipValue = 0; skipValueCB.setDisable(true); } logger.info("ngramValueCB:", ngramValue); }); // set first n-gram value to 2 at index 0 ngramValueCB.getSelectionModel().select(0); // selected index ngramValue = 2; // actual value at that index // calculateForCB calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> { calculateFor = CalculateFor.factory(newValue); logger.info("calculateForCB:", calculateFor.toString()); }); calculateForCB.getSelectionModel().select(0); // msd msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { // focus lost String value = msdTF.getText(); logger.info("msdTf: ", value); if (!ValidationUtil.isEmpty(value)) { ArrayList msdTmp = new ArrayList<>(Arrays.asList(value.split(" "))); int nOfRequiredMsdTokens = ngramValue == 0 ? 1 : ngramValue; if (msdTmp.size() != nOfRequiredMsdTokens) { String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size()); logAlert(msg); showAlert(Alert.AlertType.ERROR, msg); } msd = new ArrayList<>(); msdStrings = new ArrayList<>(); for (String msdToken : msdTmp) { msd.add(Pattern.compile(msdToken)); msdStrings.add(msdToken); } logger.info(String.format("msd accepted (%d)", msd.size())); } else if (!ValidationUtil.isEmpty(newValue)) { msd = new ArrayList<>(); msdStrings = new ArrayList<>(); } } }); msdTF.setText(""); msd = new ArrayList<>(); // taxonomy if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { taxonomy = new ArrayList<>(); ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); taxonomy.addAll(checkedItems); logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); }); taxonomyCCB.getCheckModel().clearChecks(); } else { taxonomyCCB.setDisable(true); } // skip skipValueCB.valueProperty().addListener((observable, oldValue, newValue) -> { skipValue = Integer.valueOf(newValue); logger.info("Skip " + skipValue); }); skipValueCB.getSelectionModel().select(0); skipValue = 0; // cvv calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> { calculateCvv = newValue; logger.info("calculate cvv: " + calculateCvv); }); calculatecvvCB.setSelected(false); // string length stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { // focus lost String value = stringLengthTF.getText(); if (!ValidationUtil.isEmpty(value)) { if (!ValidationUtil.isNumber(value)) { logAlert("stringlengthTf: " + WARNING_ONLY_NUMBERS_ALLOWED); GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED); } stringLength = Integer.parseInt(value); } else { GUIController.showAlert(Alert.AlertType.ERROR, WARNING_MISSING_STRING_LENGTH); stringLengthTF.setText("1"); logAlert(WARNING_MISSING_STRING_LENGTH); } } }); computeNgramsB.setOnAction(e -> { compute(); logger.info("compute button"); }); helpH.setOnAction(e -> openHelpWebsite()); cancel.setVisible(false); } /** * case a: values for combo boxes can change after a corpus change *
    *
  • different corpus type - reset all fields so no old values remain
  • *
  • same corpus type, different subset - keep
  • *
*

* case b: values for combo boxes can change after a header scan *

    *
  • at first, fields are populated by corpus type defaults
  • *
  • after, with gathered data
  • *
*

* ngrams: 1 * calculateFor: word * msd: * taxonomy: * skip: 0 * iscvv: false * string length: 1 */ public void populateFields() { // corpus changed if: current one is null (this is first run of the app) // or if currentCorpus != gui's corpus boolean corpusChanged = currentCorpusType == null || currentCorpusType != corpus.getCorpusType(); // keep ngram value if set if (ngramValue == null) { ngramValueCB.getSelectionModel().select("1"); ngramValue = 1; } // TODO: check for GOS, GIGAFIDA, SOLAR... // refresh and: // TODO if current value != null && is in new calculateFor ? keep : otherwise reset if (calculateFor == null) { calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); } if (!filter.hasMsd()) { // if current corpus doesn't have msd data, disable this field msd = new ArrayList<>(); msdTF.setText(""); msdTF.setDisable(true); logger.info("no msd data"); } else { if (ValidationUtil.isEmpty(msd) || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { // msd has not been set previously // or msd has been set but the corpus changed -> reset msd = new ArrayList<>(); msdTF.setText(""); msdTF.setDisable(false); logger.info("msd reset"); } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { // if msd has been set, but corpus type remained the same, we can keep any set msd value msdTF.setText(StringUtils.join(msdStrings, " ")); msdTF.setDisable(false); logger.info("msd kept"); } } // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection) // keep skip value if (skipValue == null) { skipValueCB.getSelectionModel().select("0"); skipValue = 0; } // keep calculateCvv calculatecvvCB.setSelected(calculateCvv); // keep string length if set if (stringLength != null) { stringLengthTF.setText(String.valueOf(stringLength)); } else { stringLengthTF.setText("1"); stringLength = 1; } // TODO: trigger on rescan if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { // user changed corpus (by type) or by selection & triggered a rescan of headers // see if we read taxonomy from headers, otherwise use default values for given corpus ObservableList tax = corpus.getTaxonomy(); taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); currentCorpusType = corpus.getCorpusType(); // setTaxonomyIsDirty(false); } else { } // see if we read taxonomy from headers, otherwise use default values for given corpus ObservableList tax = corpus.getTaxonomy(); taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); taxonomyCCB.getItems().addAll(taxonomyCCBValues); } /** * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc., * sets combobox values to what is applicable ... * * @param mode */ public void toggleMode(MODE mode) { if (mode == null) { mode = currentMode; } logger.info("mode: ", mode.toString()); if (mode == MODE.WORD) { paneWords.setVisible(true); paneLetters.setVisible(false); calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS); } else if (mode == MODE.LETTER) { paneWords.setVisible(false); paneLetters.setVisible(true); calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS); // populate with default cvv length value if (stringLength == null) { stringLengthTF.setText("1"); stringLength = 1; } else { stringLengthTF.setText(String.valueOf(stringLength)); } // if calculateFor was selected for something other than a word or a lemma -> reset if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) { // if the user selected something else before selecting ngram for letters, reset that choice calculateFor = CalculateFor.WORD; calculateForCB.getSelectionModel().select("različnica"); } } // override if orth mode, allow only word if (corpus.isGosOrthMode()) { calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH); msdTF.setDisable(true); } else { msdTF.setDisable(false); } } private void compute() { Filter filter = new Filter(); filter.setNgramValue(ngramValue); filter.setCalculateFor(calculateFor); filter.setMsd(msd); filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setSkipValue(skipValue); filter.setIsCvv(calculateCvv); filter.setSolarFilters(solarFiltersMap); if (ngramValue != null && ngramValue == 0) { filter.setStringLength(stringLength); } String message = Validation.validateForStringLevel(filter); if (message == null) { // no errors logger.info("Executing: ", filter.toString()); StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb); execute(statistic); } else { logAlert(message); showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message); } } private void logAlert(String alert) { logger.info("alert: " + alert); } private void openHelpWebsite(){ hostService.showDocument(Messages.HELP_URL); } public Corpus getCorpus() { return corpus; } public void setCorpus(Corpus corpus) { this.corpus = corpus; if (corpus.getCorpusType() != CorpusType.SOLAR) { setSelectedFiltersLabel(null); } else { setSelectedFiltersLabel("/"); } } public void setSelectedFiltersLabel(String content) { if (content != null) { solarFilters.setVisible(true); selectedFiltersLabel.setVisible(true); selectedFiltersLabel.setText(content); } else { solarFilters.setVisible(false); selectedFiltersLabel.setVisible(false); } } private void execute(StatisticsNew statistic) { logger.info("Started execution: ", statistic.getFilter()); Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); boolean corpusIsSplit = corpusFiles.size() > 1; final Task task = new Task() { @SuppressWarnings("Duplicates") @Override protected Void call() throws Exception { long i = 0; for (File f : corpusFiles) { readXML(f.toString(), statistic); i++; if (isCancelled()) { updateMessage(CANCELING_NOTIFICATION); break; } this.updateProgress(i, corpusFiles.size()); this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName())); } return null; } }; ngramProgressBar.progressProperty().bind(task.progressProperty()); progressLabel.textProperty().bind(task.messageProperty()); task.setOnSucceeded(e -> { try { boolean successullySaved = statistic.saveResultToDisk(); if (successullySaved) { showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED); } else { showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS); } } catch (UnsupportedEncodingException e1) { showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV); logger.error("Error while saving", e1); } ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnFailed(e -> { showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING); logger.error("Error while executing", e); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnCancelled(e -> { showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); // When cancel button is pressed cancel analysis cancel.setOnAction(e -> { task.cancel(); logger.info("cancel button"); }); cancel.setVisible(true); final Thread thread = new Thread(task, "task"); thread.setDaemon(true); thread.start(); } public void setSolarFiltersMap(HashMap> solarFiltersMap) { this.solarFiltersMap = solarFiltersMap; } public void setHostServices(HostServices hostServices){ this.hostService = hostServices; } }