package gui; import static alg.XML_processing.*; import static gui.GUIController.*; import java.io.File; import java.io.UnsupportedEncodingException; import java.util.*; import java.util.regex.Pattern; import alg.XML_processing; import javafx.application.HostServices; import javafx.beans.InvalidationListener; import javafx.beans.Observable; import javafx.beans.property.ReadOnlyDoubleWrapper; import javafx.beans.value.ChangeListener; import javafx.beans.value.ObservableValue; import javafx.scene.layout.AnchorPane; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.controlsfx.control.CheckComboBox; import data.*; import javafx.collections.FXCollections; import javafx.collections.ListChangeListener; import javafx.collections.ObservableList; import javafx.concurrent.Task; import javafx.fxml.FXML; import javafx.scene.control.*; import javafx.scene.layout.Pane; @SuppressWarnings("Duplicates") public class StringAnalysisTabNew2 { public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class); @FXML public AnchorPane stringAnalysisTabPaneNew2; @FXML public Label selectedFiltersLabel; @FXML public Label solarFilters; @FXML public Label calculateForL; @FXML public Label alsoVisualizeL; @FXML public Label displayTaxonomyL; @FXML public Label dataLimitL; @FXML public Label msdL; @FXML public Label taxonomyL; @FXML public Label minimalOccurrencesL; @FXML public Label minimalTaxonomyL; @FXML public Label skipValueL; @FXML public Label slowSpeedWarning1L; @FXML public Label slowSpeedWarning2L; @FXML public Label ngramValueL; @FXML public Label notePunctuationsL; @FXML public Label collocabilityL; @FXML public Label calculateForLH; @FXML public Label alsoVisualizeLH; @FXML public Label displayTaxonomyLH; @FXML public Label msdLH; @FXML public Label taxonomyLH; @FXML public Label minimalOccurrencesLH; @FXML public Label minimalTaxonomyLH; @FXML public Label skipValueLH; @FXML public Label ngramValueLH; @FXML public Label notePunctuationsLH; @FXML public Label collocabilityLH; @FXML private TextField msdTF; private ArrayList msd; private ArrayList msdStrings; @FXML private CheckComboBox alsoVisualizeCCB; private ArrayList alsoVisualize; @FXML private CheckComboBox taxonomyCCB; private ArrayList taxonomy; // // @FXML // private CheckBox calculatecvvCB; // private boolean calculateCvv; // @FXML // private TextField stringLengthTF; // private Integer stringLength; @FXML private ComboBox calculateForCB; private CalculateFor calculateFor; @FXML private CheckComboBox collocabilityCCB; private ArrayList collocability; @FXML private ComboBox ngramValueCB; private Integer ngramValue; @FXML private ComboBox skipValueCB; private Integer skipValue; @FXML private CheckBox notePunctuationsChB; private boolean notePunctuations; @FXML private CheckBox displayTaxonomyChB; private boolean displayTaxonomy; @FXML private TextField minimalOccurrencesTF; private Integer minimalOccurrences; @FXML private TextField minimalTaxonomyTF; private Integer minimalTaxonomy; @FXML private Pane paneWords; @FXML private Button computeNgramsB; @FXML private Button changeLanguageB; @FXML private Button cancel; @FXML public ProgressBar ngramProgressBar; @FXML public Label progressLabel; @FXML private Hyperlink helpH; private enum MODE { LETTER, WORD } private MODE currentMode; private Corpus corpus; private HashMap> solarFiltersMap; private Filter filter; private boolean useDb; private HostServices hostService; private ListChangeListener taxonomyListener; private ListChangeListener alsoVisualizeListener; private ListChangeListener collocabilityListener; private ChangeListener calculateForListener; // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka"); // private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica"); // private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); // private static final ObservableList alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); // private static final ObservableList alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); // private static final ObservableList alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); // private static final ObservableList alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); private static final ObservableList COLLOCABILITY_ITEMS = FXCollections.observableArrayList("Dice", "t-score", "MI", "MI3", "logDice", "simple LL"); // private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); // private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY)); // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY)); // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica"); private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY)); // private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY)); // private static final ObservableList alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY)); // private static final ObservableList alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY)); // private static final ObservableList alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY)); // private static final ObservableList alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY)); // private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {}; private static final ArrayList ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY)); // TODO: pass observables for taxonomy based on header scan // after header scan private ObservableList taxonomyCCBValues; private CorpusType currentCorpusType; public void init() { // add CSS style stringAnalysisTabPaneNew2.getStylesheets().add("style.css"); stringAnalysisTabPaneNew2.getStyleClass().add("root"); manageTranslations(); currentMode = MODE.WORD; toggleMode(currentMode); // ngram value CB ngramValueCB.valueProperty().addListener((observable, oldValue, newValue) -> { if (newValue.equals("nivo črk")) { ngramValue = 0; toggleMode(MODE.LETTER); } else { ngramValue = Integer.valueOf(newValue); toggleMode(MODE.WORD); } // skip only on ngrams of more than one word if (ngramValue > 1) { skipValueCB.setDisable(false); } else { skipValueCB.getSelectionModel().select(0); skipValue = 0; skipValueCB.setDisable(true); } logger.info("ngramValueCB:", ngramValue); }); // set first n-gram value to 2 at index 0 ngramValueCB.getSelectionModel().select(0); // selected index ngramValue = 2; // actual value at that index // set default values minimalOccurrencesTF.setText("1"); minimalOccurrences = 1; minimalTaxonomyTF.setText("1"); minimalTaxonomy = 1; minimalTaxonomyTF.setDisable(true); notePunctuations = false; // set notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> { notePunctuations = newValue; logger.info("note punctuations: ", notePunctuations); }); notePunctuationsChB.setSelected(false); notePunctuationsChB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_readNotePunctuationsChB"))); displayTaxonomy = false; displayTaxonomyChB.setSelected(false); // set if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) { displayTaxonomyChB.setDisable(false); displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> { displayTaxonomy = newValue; if (displayTaxonomy) { minimalTaxonomyTF.setDisable(false); } else { minimalTaxonomyTF.setDisable(true); minimalTaxonomyTF.setText("1"); minimalTaxonomy = 1; } logger.info("display taxonomy: ", displayTaxonomy); }); displayTaxonomyChB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_readDisplayTaxonomyChB"))); } else { displayTaxonomyChB.setDisable(true); } if (calculateForListener != null){ calculateForCB.valueProperty().removeListener(calculateForListener); } // calculateForCB calculateForListener = new ChangeListener() { boolean ignoreCode = false; @Override public void changed(ObservableValue observable, String oldValue, String newValue) { if (ignoreCode) { return; } boolean languageChanged = newValue == null; ArrayList checkedIndices = new ArrayList<>(); checkedIndices.addAll(alsoVisualizeCCB.getCheckModel().getCheckedIndices()); if (languageChanged) { ignoreCode = true; if (corpus.getCorpusType() == CorpusType.GOS) { newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_WORDS_GOS); calculateForCB.getSelectionModel().select(newValue); } else { newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_WORDS); calculateForCB.getSelectionModel().select(newValue); } // override if orth mode, allow only word if (corpus.isGosOrthMode()) { newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_WORDS_ORTH); calculateForCB.getSelectionModel().select(newValue); } ignoreCode = false; } calculateFor = CalculateFor.factory(newValue); if (alsoVisualizeListener != null) { alsoVisualizeCCB.getCheckModel().getCheckedItems().removeListener(alsoVisualizeListener); } alsoVisualizeCCB.getCheckModel().clearChecks(); alsoVisualizeCCB.getItems().removeAll(); if (newValue.equals(CalculateFor.LEMMA.toString())) { alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA)); } else if (newValue.equals(CalculateFor.WORD.toString())) { if (corpus.getCorpusType() == CorpusType.GOS) alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS)); else alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS)); } else if (newValue.equals(CalculateFor.NORMALIZED_WORD.toString())) { alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS)); } else if (newValue.equals(CalculateFor.MORPHOSYNTACTIC_SPECS.toString())) { alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_MSD)); } else { alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_EMPTY)); } // alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { // alsoVisualize = new ArrayList<>(); // ObservableList checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); // alsoVisualize.addAll(checkedItems); // logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); // }); // alsoVisualizeCCB.getCheckModel().clearChecks(); alsoVisualizeListener = new ListChangeListener() { @Override public void onChanged(Change c) { alsoVisualize = new ArrayList<>(); ObservableList checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); alsoVisualize.addAll(checkedItems); logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); } }; // alsoVisualizeCCB.getCheckModel().clearChecks(); // alsoVisualizeCCB.getItems().removeAll(); // alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS)); alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener); if (languageChanged) { for (int i : checkedIndices) { alsoVisualizeCCB.getCheckModel().check(i); } } logger.info("calculateForCB:", calculateFor.toString()); } }; calculateForCB.valueProperty().addListener(calculateForListener); calculateForCB.getSelectionModel().select(0); // collocabilityCCB if (collocabilityListener != null){ collocabilityCCB.getCheckModel().getCheckedItems().removeListener(collocabilityListener); } collocabilityListener = new ListChangeListener() { @Override public void onChanged(Change c) { collocability = new ArrayList<>(); ObservableList checkedItems = FXCollections.observableArrayList(); for (String el : collocabilityCCB.getCheckModel().getCheckedItems()) { checkedItems.add(Collocability.factory(el)); } collocability.addAll(checkedItems); logger.info(String.format("Selected collocabilities: %s", StringUtils.join(collocabilityCCB.getCheckModel().getCheckedItems(), ","))); } }; collocabilityCCB.getCheckModel().clearChecks(); collocabilityCCB.getItems().removeAll(); collocabilityCCB.getItems().setAll(FXCollections.observableArrayList(COLLOCABILITY_ITEMS)); collocabilityCCB.getCheckModel().getCheckedItems().addListener(collocabilityListener); // collocabilityCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { // collocability = new ArrayList<>(); // ObservableList checkedItems = FXCollections.observableArrayList(); // for (String el : collocabilityCCB.getCheckModel().getCheckedItems()) { // checkedItems.add(Collocability.factory(el)); // } // collocability.addAll(checkedItems); // logger.info(String.format("Selected collocabilities: %s", StringUtils.join(collocabilityCCB.getCheckModel().getCheckedItems(), ","))); // }); // msd msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { // focus lost String value = msdTF.getText(); logger.info("msdTf: ", value); if (!ValidationUtil.isEmpty(value)) { ArrayList msdTmp = new ArrayList<>(Arrays.asList(value.split(" "))); int nOfRequiredMsdTokens = ngramValue == 0 ? 1 : ngramValue; if (msdTmp.size() != nOfRequiredMsdTokens) { String msg = String.format(I18N.get("message.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES"), nOfRequiredMsdTokens, msdTmp.size()); logAlert(msg); showAlert(Alert.AlertType.ERROR, msg); } msd = new ArrayList<>(); msdStrings = new ArrayList<>(); for (String msdToken : msdTmp) { msd.add(Pattern.compile(msdToken)); msdStrings.add(msdToken); } logger.info(String.format("msd accepted (%d)", msd.size())); } else if (!ValidationUtil.isEmpty(newValue)) { msd = new ArrayList<>(); msdStrings = new ArrayList<>(); } } }); msdTF.setText(""); msd = new ArrayList<>(); if (alsoVisualizeListener != null){ alsoVisualizeCCB.getCheckModel().getCheckedItems().removeListener(alsoVisualizeListener); } alsoVisualizeListener = new ListChangeListener() { @Override public void onChanged(Change c) { alsoVisualize = new ArrayList<>(); ObservableList checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); alsoVisualize.addAll(checkedItems); logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); } }; alsoVisualizeCCB.getCheckModel().clearChecks(); alsoVisualizeCCB.getItems().removeAll(); if (corpus.getCorpusType() != CorpusType.GOS){ alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS)); } else { alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS)); } alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener); // taxonomy if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) { taxonomyCCB.setDisable(false); } else { taxonomyCCB.setDisable(true); } if (taxonomyListener != null){ taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener); } taxonomyListener = new ListChangeListener() { boolean changing = true; @Override public void onChanged(ListChangeListener.Change c){ if(changing) { ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); // ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems); // // Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus); ArrayList checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus); taxonomy = new ArrayList<>(); taxonomy.addAll(checkedItemsTaxonomy); taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy()); // taxonomyCCB.getCheckModel().clearChecks(); changing = false; taxonomyCCB.getCheckModel().clearChecks(); for (Taxonomy t : checkedItemsTaxonomy) { taxonomyCCB.getCheckModel().check(t.toLongNameString()); } changing = true; logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); } } }; taxonomyCCB.getCheckModel().clearChecks(); taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy()); taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener); // skip skipValueCB.valueProperty().addListener((observable, oldValue, newValue) -> { skipValue = Integer.valueOf(newValue); logger.info("Skip " + skipValue); }); skipValueCB.getSelectionModel().select(0); skipValue = 0; // cvv // calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> { // calculateCvv = newValue; // logger.info("calculate cvv: " + calculateCvv); // }); // calculatecvvCB.setSelected(false); // string length // stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> { // if (!newValue) { // // focus lost // String value = stringLengthTF.getText(); // if (!ValidationUtil.isEmpty(value)) { // if (!ValidationUtil.isNumber(value)) { // logAlert("stringlengthTf: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); // GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); // } // stringLength = Integer.parseInt(value); // } else { // GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_MISSING_STRING_LENGTH")); // stringLengthTF.setText("1"); // logAlert(I18N.get("message.WARNING_MISSING_STRING_LENGTH")); // } // } // }); minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { // focus lost String value = minimalOccurrencesTF.getText(); if (!ValidationUtil.isEmpty(value)) { if (!ValidationUtil.isNumber(value)) { logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); } else { minimalOccurrences = Integer.parseInt(value); } } else { minimalOccurrencesTF.setText("1"); minimalOccurrences = 1; } } }); minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { // focus lost String value = minimalTaxonomyTF.getText(); if (!ValidationUtil.isEmpty(value)) { if (!ValidationUtil.isNumber(value)) { logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); } else { minimalTaxonomy = Integer.parseInt(value); } } else { minimalTaxonomyTF.setText("1"); minimalTaxonomy = 1; } } }); changeLanguageB.setOnAction(e -> { if (I18N.getLocale() == new Locale.Builder().setLanguage("sl").setRegion("SI").build()){ I18N.setLocale(Locale.ENGLISH); } else { I18N.setLocale(new Locale.Builder().setLanguage("sl").setRegion("SI").build()); } Messages.reload(); Messages.updateChooseCorpusL(); logger.info("change language"); }); computeNgramsB.setOnAction(e -> { compute(); logger.info("compute button"); }); helpH.setOnAction(e -> openHelpWebsite()); cancel.setVisible(false); } /** * case a: values for combo boxes can change after a corpus change *
    *
  • different corpus type - reset all fields so no old values remain
  • *
  • same corpus type, different subset - keep
  • *
*

* case b: values for combo boxes can change after a header scan *

    *
  • at first, fields are populated by corpus type defaults
  • *
  • after, with gathered data
  • *
*

* ngrams: 1 * calculateFor: word * msd: * taxonomy: * skip: 0 * iscvv: false * string length: 1 */ // public void populateFields() { // // corpus changed if: current one is null (this is first run of the app) // // or if currentCorpus != gui's corpus // boolean corpusChanged = currentCorpusType == null // || currentCorpusType != corpus.getCorpusType(); // // // keep ngram value if set // if (ngramValue == null) { // ngramValueCB.getSelectionModel().select("1"); // ngramValue = 1; // } // // // TODO: check for GOS, GIGAFIDA, SOLAR... // // refresh and: // // TODO if current value != null && is in new calculateFor ? keep : otherwise reset // if (calculateFor == null) { // calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); // calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); // } // // if (!filter.hasMsd()) { // // if current corpus doesn't have msd data, disable this field // msd = new ArrayList<>(); // msdTF.setText(""); // msdTF.setDisable(true); // logger.info("no msd data"); // } else { // if (ValidationUtil.isEmpty(msd) // || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { // // msd has not been set previously // // or msd has been set but the corpus changed -> reset // msd = new ArrayList<>(); // msdTF.setText(""); // msdTF.setDisable(false); // logger.info("msd reset"); // } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { // // if msd has been set, but corpus type remained the same, we can keep any set msd value // msdTF.setText(StringUtils.join(msdStrings, " ")); // msdTF.setDisable(false); // logger.info("msd kept"); // } // } // // // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection) // // // keep skip value // if (skipValue == null) { // skipValueCB.getSelectionModel().select("0"); // skipValue = 0; // } // // // keep calculateCvv // calculatecvvCB.setSelected(calculateCvv); // // // keep string length if set // if (stringLength != null) { // stringLengthTF.setText(String.valueOf(stringLength)); // } else { // stringLengthTF.setText("1"); // stringLength = 1; // } // // // TODO: trigger on rescan // if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { // // user changed corpus (by type) or by selection & triggered a rescan of headers // // see if we read taxonomy from headers, otherwise use default values for given corpus // ObservableList tax = corpus.getObservableListTaxonomy(); // taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); // // currentCorpusType = corpus.getCorpusType(); // // setTaxonomyIsDirty(false); // } else { // // } // // // see if we read taxonomy from headers, otherwise use default values for given corpus // ObservableList tax = corpus.getObservableListTaxonomy(); // taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); // taxonomyCCB.getItems().addAll(taxonomyCCBValues); // // } private void manageTranslations(){ helpH.textProperty().bind(I18N.createStringBinding("hyperlink.help")); changeLanguageB.textProperty().bind(I18N.createStringBinding("button.language")); computeNgramsB.textProperty().bind(I18N.createStringBinding("button.computeNgrams")); cancel.textProperty().bind(I18N.createStringBinding("button.cancel")); calculateForL.textProperty().bind(I18N.createStringBinding("label.calculateFor")); alsoVisualizeL.textProperty().bind(I18N.createStringBinding("label.alsoVisualize")); displayTaxonomyL.textProperty().bind(I18N.createStringBinding("label.displayTaxonomy")); // writeMsdAtTheEndL.textProperty().bind(I18N.createStringBinding("label.writeMsdAtTheEnd")); skipValueL.textProperty().bind(I18N.createStringBinding("label.skipValue")); slowSpeedWarning1L.textProperty().bind(I18N.createStringBinding("label.slowSpeedWarning")); slowSpeedWarning2L.textProperty().bind(I18N.createStringBinding("label.slowSpeedWarning")); ngramValueL.textProperty().bind(I18N.createStringBinding("label.ngramValue")); notePunctuationsL.textProperty().bind(I18N.createStringBinding("label.notePunctuations")); collocabilityL.textProperty().bind(I18N.createStringBinding("label.collocability")); dataLimitL.textProperty().bind(I18N.createStringBinding("label.dataLimit")); msdL.textProperty().bind(I18N.createStringBinding("label.msd")); taxonomyL.textProperty().bind(I18N.createStringBinding("label.taxonomy")); minimalOccurrencesL.textProperty().bind(I18N.createStringBinding("label.minimalOccurrences")); minimalTaxonomyL.textProperty().bind(I18N.createStringBinding("label.minimalTaxonomy")); solarFilters.textProperty().bind(I18N.createStringBinding("label.solarFilters")); calculateForLH.textProperty().bind(I18N.createStringBinding("label.wordSet.calculateForH")); alsoVisualizeLH.textProperty().bind(I18N.createStringBinding("label.wordSet.alsoVisualizeH")); displayTaxonomyLH.textProperty().bind(I18N.createStringBinding("label.wordSet.displayTaxonomyH")); skipValueLH.textProperty().bind(I18N.createStringBinding("label.wordSet.skipValueH")); ngramValueLH.textProperty().bind(I18N.createStringBinding("label.wordSet.ngramValueH")); notePunctuationsLH.textProperty().bind(I18N.createStringBinding("label.wordSet.notePunctuationsH")); collocabilityLH.textProperty().bind(I18N.createStringBinding("label.wordSet.collocabilityH")); msdLH.textProperty().bind(I18N.createStringBinding("label.wordSet.msdH")); taxonomyLH.textProperty().bind(I18N.createStringBinding("label.wordSet.taxonomyH")); minimalOccurrencesLH.textProperty().bind(I18N.createStringBinding("label.wordSet.minimalOccurrencesH")); minimalTaxonomyLH.textProperty().bind(I18N.createStringBinding("label.wordSet.minimalTaxonomyH")); } /** * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc., * sets combobox values to what is applicable ... * * @param mode */ public void toggleMode(MODE mode) { if (mode == null) { mode = currentMode; } logger.info("mode: ", mode.toString()); if (mode == MODE.WORD) { paneWords.setVisible(true); // paneLetters.setVisible(false); // if (corpus.getCorpusType() == CorpusType.GOS) // calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS); // else if (corpus.getCorpusType() == CorpusType.GOS) { calculateForCB.itemsProperty().unbind(); calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_GOS)); } else { calculateForCB.itemsProperty().unbind(); calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS)); } } // override if orth mode, allow only word if (corpus.isGosOrthMode()) { calculateForCB.itemsProperty().unbind(); calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_ORTH)); msdTF.setDisable(true); } else { msdTF.setDisable(false); } } private void compute() { Filter filter = new Filter(); filter.setNgramValue(ngramValue); filter.setCalculateFor(calculateFor); filter.setTaxonomy(taxonomy); filter.setDisplayTaxonomy(displayTaxonomy); filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setSkipValue(skipValue); // filter.setIsCvv(calculateCvv); filter.setSolarFilters(solarFiltersMap); filter.setNotePunctuations(notePunctuations); filter.setMultipleKeys(alsoVisualize); // setMsd must be behind alsoVisualize filter.setMsd(msd); filter.setMinimalOccurrences(minimalOccurrences); filter.setMinimalTaxonomy(minimalTaxonomy); filter.setCollocability(collocability); // if (ngramValue != null && ngramValue == 0) { // filter.setStringLength(stringLength); // } String message = Validation.validateForStringLevel(filter); if (message == null) { // no errors logger.info("Executing: ", filter.toString()); StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb); // ADD THINGS HERE!!! execute(statistic); } else { logAlert(message); showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message); } } private void logAlert(String alert) { logger.info("alert: " + alert); } private void openHelpWebsite(){ hostService.showDocument(Messages.HELP_URL); } public Corpus getCorpus() { return corpus; } public void setCorpus(Corpus corpus) { this.corpus = corpus; if (corpus.getCorpusType() != CorpusType.SOLAR) { setSelectedFiltersLabel(null); } else { setSelectedFiltersLabel("/"); } } public void setSelectedFiltersLabel(String content) { if (content != null) { solarFilters.setVisible(true); selectedFiltersLabel.setVisible(true); selectedFiltersLabel.setText(content); } else { solarFilters.setVisible(false); selectedFiltersLabel.setVisible(false); } } // public void calculate_collocabilities(StatisticsNew statistics, StatisticsNew oneWordStatistics) { // statistics.updateCalculateCollocabilities(oneWordStatistics); // // } private final Task prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) { Collection corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles(); final Task task = new Task() { @SuppressWarnings("Duplicates") @Override protected Void call() throws Exception { final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); if(multipleFiles){ cancel.setVisible(true); } int i = corpusFiles.size(); Date startTime = new Date(); Date previousTime = new Date(); int remainingSeconds = -1; int corpusSize; if (statistic.getFilter().getCollocability().size() > 0) { corpusSize = corpusFiles.size() * 2; } else { corpusSize = corpusFiles.size(); } for (File f : corpusFiles) { final int iFinal = i; XML_processing xml_processing = new XML_processing(); i++; if(xml_processing.progressBarListener != null) { xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); } if (multipleFiles) { if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); previousTime = new Date(); } this.updateProgress(i, corpusSize); this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); // if (isCancelled()) { // updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); // break; // } } else { xml_processing.progressBarListener = new InvalidationListener() { int remainingSeconds = -1; Date previousTime = new Date(); @Override public void invalidated(Observable observable) { cancel.setVisible(true); if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); // System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); // System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1))); // System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); // System.out.println(remainingSeconds); previousTime = new Date(); } xml_processing.isCancelled = isCancelled(); updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusSize, f.getName(), remainingSeconds)); } }; xml_processing.progressProperty().addListener(xml_processing.progressBarListener); } xml_processing.isCollocability = true; xml_processing.readXML(f.toString(), statisticsOneGrams); xml_processing.isCollocability = false; if (isCancelled()) { updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); break; } // readXML(f.toString(), statisticsOneGrams); // i++; // this.updateProgress(i, corpusFiles.size() * 2); // if (statistic.getFilter().getCollocability().size() > 0) { // this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName())); // } else { // this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); // } } return null; } }; ngramProgressBar.progressProperty().bind(task.progressProperty()); progressLabel.textProperty().bind(task.messageProperty()); task.setOnSucceeded(e -> { try { System.out.print(statistic); // calculate_collocabilities(statistic, statisticsOneGrams); statistic.updateCalculateCollocabilities(statisticsOneGrams); boolean successullySaved = statistic.saveResultToDisk(); if (successullySaved) { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); } else { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); } } catch (UnsupportedEncodingException e1) { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); logger.error("Error while saving", e1); } catch (OutOfMemoryError e1) { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); logger.error("Out of memory error", e1); } // try { // boolean successullySaved = statistic.saveResultToDisk(); // if (successullySaved) { // showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED); // } else { // showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS); // } // } catch (UnsupportedEncodingException e1) { // showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV); // logger.error("Error while saving", e1); // } catch (OutOfMemoryError e1){ // showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY); // logger.error("Out of memory error", e1); // } // ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnFailed(e -> { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); logger.error("Error while executing", e); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnCancelled(e -> { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); // When cancel button is pressed cancel analysis cancel.setOnAction(e -> { task.cancel(); // logger.info("cancel button"); }); return task; } private void execute(StatisticsNew statistic) { Filter f = statistic.getFilter(); logger.info("Started execution: ", f); // Task task_collocability = null; Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); final Task task = new Task() { @SuppressWarnings("Duplicates") @Override protected Void call() throws Exception { final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); if(multipleFiles){ cancel.setVisible(true); } int i = 0; Date startTime = new Date(); Date previousTime = new Date(); int remainingSeconds = -1; int corpusSize; if (statistic.getFilter().getCollocability().size() > 0) { corpusSize = corpusFiles.size() * 2; } else { corpusSize = corpusFiles.size(); } for (File f : corpusFiles) { final int iFinal = i; XML_processing xml_processing = new XML_processing(); xml_processing.isCancelled = false; i++; if(xml_processing.progressBarListener != null) { xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); } if (multipleFiles) { if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); previousTime = new Date(); } this.updateProgress(i, corpusSize); this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); // if (isCancelled()) { // updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); // break; // } } else { xml_processing.progressBarListener = new InvalidationListener() { int remainingSeconds = -1; Date previousTime = new Date(); @Override public void invalidated(Observable observable) { cancel.setVisible(true); if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); // System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); // System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1); // System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); // System.out.println(remainingSeconds); previousTime = new Date(); } xml_processing.isCancelled = isCancelled(); updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusSize, f.getName(), remainingSeconds)); } }; xml_processing.progressProperty().addListener(xml_processing.progressBarListener); } xml_processing.readXML(f.toString(), statistic); if (isCancelled()) { updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); break; } if(!(multipleFiles)){ cancel.setVisible(false); } // readXML(f.toString(), statistic); // i++; // if (isCancelled()) { // updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); // break; // } // if (statistic.getFilter().getCollocability().size() > 0) { // this.updateProgress(i, corpusFiles.size() * 2); // this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName())); // } else { // this.updateProgress(i, corpusFiles.size()); // this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); // } //// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName())); } return null; } }; ngramProgressBar.progressProperty().bind(task.progressProperty()); progressLabel.textProperty().bind(task.messageProperty()); task.setOnSucceeded(e -> { if (f.getCollocability().size() > 0) { try{ Filter f2 = (Filter) f.clone(); f2.setNgramValue(1); StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb); final Task taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams); final Thread thread_collocability = new Thread(taskCollocability, "task_collocability"); thread_collocability.setDaemon(true); thread_collocability.start(); }catch(CloneNotSupportedException c){} } else { try { // System.out.print(statistics); boolean successullySaved = statistic.saveResultToDisk(); if (successullySaved) { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); } else { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); } } catch (UnsupportedEncodingException e1) { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); logger.error("Error while saving", e1); } catch (OutOfMemoryError e1) { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); logger.error("Out of memory error", e1); } ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); } }); task.setOnFailed(e -> { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); logger.error("Error while executing", e); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnCancelled(e -> { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); // When cancel button is pressed cancel analysis cancel.setOnAction(e -> { task.cancel(); logger.info("cancel button"); }); final Thread thread = new Thread(task, "task"); thread.setDaemon(true); thread.start(); } public void setSolarFiltersMap(HashMap> solarFiltersMap) { this.solarFiltersMap = solarFiltersMap; } public void setHostServices(HostServices hostServices){ this.hostService = hostServices; } }