package gui; import static alg.XML_processing.*; import static gui.GUIController.*; import static gui.Messages.*; import java.io.File; import java.io.UnsupportedEncodingException; import java.util.*; import java.util.regex.Pattern; import javafx.application.HostServices; import javafx.collections.transformation.SortedList; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.controlsfx.control.CheckComboBox; import data.*; import javafx.collections.FXCollections; import javafx.collections.ListChangeListener; import javafx.collections.ObservableList; import javafx.concurrent.Task; import javafx.fxml.FXML; import javafx.scene.control.*; import javafx.scene.layout.Pane; @SuppressWarnings("Duplicates") public class StringAnalysisTabNew2 { public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class); @FXML public Label selectedFiltersLabel; @FXML public Label solarFilters; @FXML private TextField msdTF; private ArrayList msd; private ArrayList msdStrings; @FXML private CheckComboBox alsoVisualizeCCB; private ArrayList alsoVisualize; @FXML private CheckComboBox taxonomyCCB; private ArrayList taxonomy; @FXML private CheckBox calculatecvvCB; private boolean calculateCvv; @FXML private TextField stringLengthTF; private Integer stringLength; @FXML private ComboBox calculateForCB; private CalculateFor calculateFor; @FXML private ComboBox ngramValueCB; private Integer ngramValue; @FXML private ComboBox skipValueCB; private Integer skipValue; @FXML private CheckBox notePunctuationsChB; private boolean notePunctuations; @FXML private CheckBox displayTaxonomyChB; private boolean displayTaxonomy; @FXML private TextField minimalOccurrencesTF; private Integer minimalOccurrences; @FXML private TextField minimalTaxonomyTF; private Integer minimalTaxonomy; @FXML private Pane paneWords; @FXML private Pane paneLetters; @FXML private Button computeNgramsB; @FXML private Button cancel; @FXML public ProgressBar ngramProgressBar; @FXML public Label progressLabel; @FXML private Hyperlink helpH; private enum MODE { LETTER, WORD } private MODE currentMode; private Corpus corpus; private HashMap> solarFiltersMap; private Filter filter; private boolean useDb; private HostServices hostService; private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka"); // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final ObservableList alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); // TODO: pass observables for taxonomy based on header scan // after header scan private ObservableList taxonomyCCBValues; private CorpusType currentCorpusType; public void init() { currentMode = MODE.WORD; toggleMode(currentMode); // ngram value CB ngramValueCB.valueProperty().addListener((observable, oldValue, newValue) -> { if (newValue.equals("nivo črk")) { ngramValue = 0; toggleMode(MODE.LETTER); } else { ngramValue = Integer.valueOf(newValue); toggleMode(MODE.WORD); } // skip only on ngrams of more than one word if (ngramValue > 1) { skipValueCB.setDisable(false); } else { skipValueCB.getSelectionModel().select(0); skipValue = 0; skipValueCB.setDisable(true); } logger.info("ngramValueCB:", ngramValue); }); // set first n-gram value to 2 at index 0 ngramValueCB.getSelectionModel().select(0); // selected index ngramValue = 2; // actual value at that index // set default values minimalOccurrencesTF.setText("1"); minimalOccurrences = 1; minimalTaxonomyTF.setText("1"); minimalTaxonomy = 1; notePunctuations = false; // set notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> { notePunctuations = newValue; logger.info("note punctuations: ", notePunctuations); }); notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB)); displayTaxonomy = false; // set displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> { displayTaxonomy = newValue; logger.info("display taxonomy: ", displayTaxonomy); }); displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); // calculateForCB calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> { calculateFor = CalculateFor.factory(newValue); alsoVisualizeCCB.getItems().removeAll(); if(newValue.equals("lema")){ alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma); } else if(newValue.equals("različnica")) { if (corpus.getCorpusType() == CorpusType.GOS) alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWordGos); else alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord); } else if(newValue.equals("normalizirana različnica")) { alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord); }else if(newValue.equals("oblikoskladenjska oznaka")) { alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd); }else { alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty); } alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { alsoVisualize = new ArrayList<>(); ObservableList checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); alsoVisualize.addAll(checkedItems); logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); }); alsoVisualizeCCB.getCheckModel().clearChecks(); logger.info("calculateForCB:", calculateFor.toString()); }); calculateForCB.getSelectionModel().select(0); // msd msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { // focus lost String value = msdTF.getText(); logger.info("msdTf: ", value); if (!ValidationUtil.isEmpty(value)) { ArrayList msdTmp = new ArrayList<>(Arrays.asList(value.split(" "))); int nOfRequiredMsdTokens = ngramValue == 0 ? 1 : ngramValue; if (msdTmp.size() != nOfRequiredMsdTokens) { String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size()); logAlert(msg); showAlert(Alert.AlertType.ERROR, msg); } msd = new ArrayList<>(); msdStrings = new ArrayList<>(); for (String msdToken : msdTmp) { msd.add(Pattern.compile(msdToken)); msdStrings.add(msdToken); } logger.info(String.format("msd accepted (%d)", msd.size())); } else if (!ValidationUtil.isEmpty(newValue)) { msd = new ArrayList<>(); msdStrings = new ArrayList<>(); } } }); msdTF.setText(""); msd = new ArrayList<>(); alsoVisualizeCCB.getItems().removeAll(); alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma); alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { alsoVisualize = new ArrayList<>(); ObservableList checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); alsoVisualize.addAll(checkedItems); logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); }); alsoVisualizeCCB.getCheckModel().clearChecks(); // taxonomy if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { taxonomy = new ArrayList<>(); ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); taxonomy.addAll(checkedItems); logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); }); taxonomyCCB.getCheckModel().clearChecks(); } else { taxonomyCCB.setDisable(true); } // skip skipValueCB.valueProperty().addListener((observable, oldValue, newValue) -> { skipValue = Integer.valueOf(newValue); logger.info("Skip " + skipValue); }); skipValueCB.getSelectionModel().select(0); skipValue = 0; // cvv calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> { calculateCvv = newValue; logger.info("calculate cvv: " + calculateCvv); }); calculatecvvCB.setSelected(false); // string length stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { // focus lost String value = stringLengthTF.getText(); if (!ValidationUtil.isEmpty(value)) { if (!ValidationUtil.isNumber(value)) { logAlert("stringlengthTf: " + WARNING_ONLY_NUMBERS_ALLOWED); GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED); } stringLength = Integer.parseInt(value); } else { GUIController.showAlert(Alert.AlertType.ERROR, WARNING_MISSING_STRING_LENGTH); stringLengthTF.setText("1"); logAlert(WARNING_MISSING_STRING_LENGTH); } } }); minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { // focus lost String value = minimalOccurrencesTF.getText(); if (!ValidationUtil.isEmpty(value)) { if (!ValidationUtil.isNumber(value)) { logAlert("minimalOccurrencesTF: " + WARNING_ONLY_NUMBERS_ALLOWED); GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED); } else { minimalOccurrences = Integer.parseInt(value); } } else { minimalOccurrencesTF.setText("1"); minimalOccurrences = 1; } } }); minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { // focus lost String value = minimalTaxonomyTF.getText(); if (!ValidationUtil.isEmpty(value)) { if (!ValidationUtil.isNumber(value)) { logAlert("minimalTaxonomyTF: " + WARNING_ONLY_NUMBERS_ALLOWED); GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED); } else { minimalTaxonomy = Integer.parseInt(value); } } else { minimalTaxonomyTF.setText("1"); minimalTaxonomy = 1; } } }); computeNgramsB.setOnAction(e -> { compute(); logger.info("compute button"); }); helpH.setOnAction(e -> openHelpWebsite()); cancel.setVisible(false); } /** * case a: values for combo boxes can change after a corpus change *
    *
  • different corpus type - reset all fields so no old values remain
  • *
  • same corpus type, different subset - keep
  • *
*

* case b: values for combo boxes can change after a header scan *

    *
  • at first, fields are populated by corpus type defaults
  • *
  • after, with gathered data
  • *
*

* ngrams: 1 * calculateFor: word * msd: * taxonomy: * skip: 0 * iscvv: false * string length: 1 */ // public void populateFields() { // // corpus changed if: current one is null (this is first run of the app) // // or if currentCorpus != gui's corpus // boolean corpusChanged = currentCorpusType == null // || currentCorpusType != corpus.getCorpusType(); // // // keep ngram value if set // if (ngramValue == null) { // ngramValueCB.getSelectionModel().select("1"); // ngramValue = 1; // } // // // TODO: check for GOS, GIGAFIDA, SOLAR... // // refresh and: // // TODO if current value != null && is in new calculateFor ? keep : otherwise reset // if (calculateFor == null) { // calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); // calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); // } // // if (!filter.hasMsd()) { // // if current corpus doesn't have msd data, disable this field // msd = new ArrayList<>(); // msdTF.setText(""); // msdTF.setDisable(true); // logger.info("no msd data"); // } else { // if (ValidationUtil.isEmpty(msd) // || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { // // msd has not been set previously // // or msd has been set but the corpus changed -> reset // msd = new ArrayList<>(); // msdTF.setText(""); // msdTF.setDisable(false); // logger.info("msd reset"); // } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { // // if msd has been set, but corpus type remained the same, we can keep any set msd value // msdTF.setText(StringUtils.join(msdStrings, " ")); // msdTF.setDisable(false); // logger.info("msd kept"); // } // } // // // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection) // // // keep skip value // if (skipValue == null) { // skipValueCB.getSelectionModel().select("0"); // skipValue = 0; // } // // // keep calculateCvv // calculatecvvCB.setSelected(calculateCvv); // // // keep string length if set // if (stringLength != null) { // stringLengthTF.setText(String.valueOf(stringLength)); // } else { // stringLengthTF.setText("1"); // stringLength = 1; // } // // // TODO: trigger on rescan // if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { // // user changed corpus (by type) or by selection & triggered a rescan of headers // // see if we read taxonomy from headers, otherwise use default values for given corpus // ObservableList tax = corpus.getTaxonomy(); // taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); // // currentCorpusType = corpus.getCorpusType(); // // setTaxonomyIsDirty(false); // } else { // // } // // // see if we read taxonomy from headers, otherwise use default values for given corpus // ObservableList tax = corpus.getTaxonomy(); // taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); // taxonomyCCB.getItems().addAll(taxonomyCCBValues); // // } /** * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc., * sets combobox values to what is applicable ... * * @param mode */ public void toggleMode(MODE mode) { if (mode == null) { mode = currentMode; } logger.info("mode: ", mode.toString()); if (mode == MODE.WORD) { paneWords.setVisible(true); paneLetters.setVisible(false); // if (corpus.getCorpusType() == CorpusType.GOS) // calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS); // else if (corpus.getCorpusType() == CorpusType.GOS) calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS); else calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS); // calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS); } else if (mode == MODE.LETTER) { paneWords.setVisible(false); paneLetters.setVisible(true); calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS); // populate with default cvv length value if (stringLength == null) { stringLengthTF.setText("1"); stringLength = 1; } else { stringLengthTF.setText(String.valueOf(stringLength)); } // if calculateFor was selected for something other than a word or a lemma -> reset if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) { // if the user selected something else before selecting ngram for letters, reset that choice calculateFor = CalculateFor.WORD; calculateForCB.getSelectionModel().select("različnica"); } } // override if orth mode, allow only word if (corpus.isGosOrthMode()) { calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH); msdTF.setDisable(true); } else { msdTF.setDisable(false); } } private void compute() { Filter filter = new Filter(); filter.setNgramValue(ngramValue); filter.setCalculateFor(calculateFor); filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); filter.setDisplayTaxonomy(displayTaxonomy); filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setSkipValue(skipValue); filter.setIsCvv(calculateCvv); filter.setSolarFilters(solarFiltersMap); filter.setNotePunctuations(notePunctuations); filter.setMultipleKeys(alsoVisualize); // setMsd must be behind alsoVisualize filter.setMsd(msd); filter.setMinimalOccurrences(minimalOccurrences); filter.setMinimalTaxonomy(minimalTaxonomy); if (ngramValue != null && ngramValue == 0) { filter.setStringLength(stringLength); } String message = Validation.validateForStringLevel(filter); if (message == null) { // no errors logger.info("Executing: ", filter.toString()); StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb); execute(statistic); } else { logAlert(message); showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message); } } private void logAlert(String alert) { logger.info("alert: " + alert); } private void openHelpWebsite(){ hostService.showDocument(Messages.HELP_URL); } public Corpus getCorpus() { return corpus; } public void setCorpus(Corpus corpus) { this.corpus = corpus; if (corpus.getCorpusType() != CorpusType.SOLAR) { setSelectedFiltersLabel(null); } else { setSelectedFiltersLabel("/"); } } public void setSelectedFiltersLabel(String content) { if (content != null) { solarFilters.setVisible(true); selectedFiltersLabel.setVisible(true); selectedFiltersLabel.setText(content); } else { solarFilters.setVisible(false); selectedFiltersLabel.setVisible(false); } } private void execute(StatisticsNew statistic) { logger.info("Started execution: ", statistic.getFilter()); Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); boolean corpusIsSplit = corpusFiles.size() > 1; final Task task = new Task() { @SuppressWarnings("Duplicates") @Override protected Void call() throws Exception { long i = 0; for (File f : corpusFiles) { readXML(f.toString(), statistic); i++; if (isCancelled()) { updateMessage(CANCELING_NOTIFICATION); break; } this.updateProgress(i, corpusFiles.size()); this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName())); } return null; } }; ngramProgressBar.progressProperty().bind(task.progressProperty()); progressLabel.textProperty().bind(task.messageProperty()); task.setOnSucceeded(e -> { try { boolean successullySaved = statistic.saveResultToDisk(); if (successullySaved) { showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED); } else { showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS); } } catch (UnsupportedEncodingException e1) { showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV); logger.error("Error while saving", e1); } catch (OutOfMemoryError e1){ showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY); logger.error("Out of memory error", e1); } ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnFailed(e -> { showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING); logger.error("Error while executing", e); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnCancelled(e -> { showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); // When cancel button is pressed cancel analysis cancel.setOnAction(e -> { task.cancel(); logger.info("cancel button"); }); cancel.setVisible(true); final Thread thread = new Thread(task, "task"); thread.setDaemon(true); thread.start(); } public void setSolarFiltersMap(HashMap> solarFiltersMap) { this.solarFiltersMap = solarFiltersMap; } public void setHostServices(HostServices hostServices){ this.hostService = hostServices; } }