package gui; import data.*; import javafx.application.HostServices; import javafx.collections.FXCollections; import javafx.collections.ListChangeListener; import javafx.collections.ObservableList; import javafx.concurrent.Task; import javafx.fxml.FXML; import javafx.scene.control.*; import javafx.scene.layout.AnchorPane; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.controlsfx.control.CheckComboBox; import java.io.File; import java.io.UnsupportedEncodingException; import java.util.*; import java.util.concurrent.atomic.AtomicBoolean; import java.util.regex.Pattern; import static alg.XML_processing.readXML; import static gui.GUIController.showAlert; import static gui.Messages.*; @SuppressWarnings("Duplicates") public class WordLevelTab { public final static Logger logger = LogManager.getLogger(OneWordAnalysisTab.class); @FXML public AnchorPane wordLevelAnalysisTabPane; @FXML public Label selectedFiltersLabel; @FXML public Label solarFilters; @FXML private TextField msdTF; private ArrayList msd; private ArrayList msdStrings; @FXML private CheckComboBox alsoVisualizeCCB; private ArrayList alsoVisualize; @FXML private CheckComboBox taxonomyCCB; private ArrayList taxonomy; @FXML private CheckBox displayTaxonomyChB; private boolean displayTaxonomy; @FXML private ComboBox prefixLengthCB; private Integer prefixLength; @FXML private ComboBox suffixLengthCB; private Integer suffixLength; @FXML private TextField prefixListTF; private ArrayList prefixList; @FXML private TextField suffixListTF; private ArrayList suffixList; // @FXML // private CheckBox writeMsdAtTheEndChB; // private boolean writeMsdAtTheEnd; @FXML private ComboBox calculateForCB; private CalculateFor calculateFor; @FXML private TextField minimalOccurrencesTF; private Integer minimalOccurrences; @FXML private TextField minimalTaxonomyTF; private Integer minimalTaxonomy; @FXML private Button computeNgramsB; @FXML private Button cancel; @FXML public ProgressBar ngramProgressBar; @FXML public Label progressLabel; @FXML private Hyperlink helpH; private enum MODE { LETTER, WORD } private MODE currentMode; private Corpus corpus; private HashMap> solarFiltersMap; private Filter filter; private boolean useDb; private HostServices hostService; private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica"); private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica"); private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final ObservableList alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); // TODO: pass observables for taxonomy based on header scan // after header scan private ObservableList taxonomyCCBValues; private CorpusType currentCorpusType; public void init() { // add CSS style wordLevelAnalysisTabPane.getStylesheets().add("style.css"); wordLevelAnalysisTabPane.getStyleClass().add("root"); currentMode = MODE.WORD; toggleMode(currentMode); AtomicBoolean writeMsdAtTheEndEnableCalculateFor = new AtomicBoolean(false); // calculateForCB calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> { calculateFor = CalculateFor.factory(newValue); alsoVisualizeCCB.getItems().removeAll(); if(newValue.equals("lema")){ alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma); } else if(newValue.equals("različnica")) { if (corpus.getCorpusType() == CorpusType.GOS) alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWordGos); else alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord); } else if(newValue.equals("normalizirana različnica")) { alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord); } else if(newValue.equals("oblikoskladenjska oznaka")) { // writeMsdAtTheEndEnableCalculateFor.set(true); // writeMsdAtTheEndChB.setDisable(false); alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd); }else { alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty); } // if (!newValue.equals("oblikoskladenjska oznaka")){ // writeMsdAtTheEnd = false; // writeMsdAtTheEndChB.setSelected(false); // writeMsdAtTheEndChB.setDisable(true); // writeMsdAtTheEndEnableCalculateFor.set(false); // } alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { alsoVisualize = new ArrayList<>(); ObservableList checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); alsoVisualize.addAll(checkedItems); logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); }); alsoVisualizeCCB.getCheckModel().clearChecks(); logger.info("calculateForCB:", calculateFor.toString()); }); calculateForCB.getSelectionModel().select(0); prefixLengthCB.getSelectionModel().select(0); prefixLength = 0; prefixLengthCB.valueProperty().addListener((observable, oldValue, newValue) -> { prefixLength = Integer.valueOf(newValue); if(prefixLength > 0){ prefixListTF.setDisable(true); suffixListTF.setDisable(true); } else if(prefixLength == 0 && suffixLength == 0){ prefixListTF.setDisable(false); suffixListTF.setDisable(false); } if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) { computeNgramsB.setDisable(false); } else { computeNgramsB.setDisable(true); } logger.info("Prefix length " + prefixLength); }); suffixLengthCB.getSelectionModel().select(0); suffixLength = 0; suffixLengthCB.valueProperty().addListener((observable, oldValue, newValue) -> { suffixLength = Integer.valueOf(newValue); if(suffixLength > 0){ prefixListTF.setDisable(true); suffixListTF.setDisable(true); } else if(prefixLength == 0 && suffixLength == 0){ prefixListTF.setDisable(false); suffixListTF.setDisable(false); } if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) { computeNgramsB.setDisable(false); } else { computeNgramsB.setDisable(true); } logger.info("Prefix length " + suffixLength); }); prefixListTF.setText(""); prefixList = new ArrayList<>(); prefixListTF.textProperty().addListener((observable, oldValue, newValue) -> { String value = prefixListTF.getText(); prefixList = new ArrayList<>(); if (!ValidationUtil.isEmpty(value)) { for (String w : value.split(";")){ w = w.replaceAll("\\s+",""); if (!w.equals("")){ prefixList.add(w); } } // suffixList = value; } System.out.println(prefixList); if(prefixList.size() > 0){ prefixLengthCB.setDisable(true); suffixLengthCB.setDisable(true); } else if(suffixList.size() == 0){ prefixLengthCB.setDisable(false); suffixLengthCB.setDisable(false); } if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) { computeNgramsB.setDisable(false); } else { computeNgramsB.setDisable(true); } }); suffixListTF.setText(""); suffixList = new ArrayList<>(); suffixListTF.textProperty().addListener((observable, oldValue, newValue) -> { String value = suffixListTF.getText(); suffixList = new ArrayList<>(); if (!ValidationUtil.isEmpty(value)) { for (String w : value.split(";")){ w = w.replaceAll("\\s+",""); if (!w.equals("")){ suffixList.add(w); } } // suffixList = value; } System.out.println(suffixList); if(suffixList.size() > 0){ prefixLengthCB.setDisable(true); suffixLengthCB.setDisable(true); } else if(prefixList.size() == 0){ prefixLengthCB.setDisable(false); suffixLengthCB.setDisable(false); } if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) { computeNgramsB.setDisable(false); } else { computeNgramsB.setDisable(true); } }); // prefixLengthCB.setDisable(true); // msd msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { // focus lost String value = msdTF.getText(); logger.info("msdTf: ", value); if (!ValidationUtil.isEmpty(value)) { ArrayList msdTmp = new ArrayList<>(Arrays.asList(value.split(" "))); int nOfRequiredMsdTokens = 1; if (msdTmp.size() != nOfRequiredMsdTokens) { String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size()); logAlert(msg); showAlert(Alert.AlertType.ERROR, msg); } msd = new ArrayList<>(); msdStrings = new ArrayList<>(); for (String msdToken : msdTmp) { msd.add(Pattern.compile(msdToken)); msdStrings.add(msdToken); } logger.info(String.format("msd accepted (%d)", msd.size())); } else if (!ValidationUtil.isEmpty(newValue)) { msd = new ArrayList<>(); msdStrings = new ArrayList<>(); } } }); msdTF.setText(""); msd = new ArrayList<>(); alsoVisualizeCCB.getItems().removeAll(); alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma); alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { alsoVisualize = new ArrayList<>(); ObservableList checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); alsoVisualize.addAll(checkedItems); // if (checkedItems.contains("oblikoskladenjska oznaka") || writeMsdAtTheEndEnableCalculateFor.get()){ // writeMsdAtTheEndChB.setDisable(false); // } else { // writeMsdAtTheEnd = false; // writeMsdAtTheEndChB.setSelected(false); // writeMsdAtTheEndChB.setDisable(true); // } logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); }); alsoVisualizeCCB.getCheckModel().clearChecks(); // taxonomy if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener() { boolean changing = true; @Override public void onChanged(ListChangeListener.Change c){ if(changing) { ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems); Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus); taxonomy = new ArrayList<>(); taxonomy.addAll(checkedItemsTaxonomy); taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); // taxonomyCCB.getCheckModel().clearChecks(); changing = false; taxonomyCCB.getCheckModel().clearChecks(); for (Taxonomy t : checkedItemsTaxonomy) { taxonomyCCB.getCheckModel().check(t.toLongNameString()); } changing = true; logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); } } }); taxonomyCCB.getCheckModel().clearChecks(); } else { taxonomyCCB.setDisable(true); } displayTaxonomy = false; // set displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> { displayTaxonomy = newValue; if(displayTaxonomy){ minimalTaxonomyTF.setDisable(false); } else { minimalTaxonomyTF.setDisable(true); minimalTaxonomyTF.setText("1"); minimalTaxonomy = 1; } logger.info("display taxonomy: ", displayTaxonomy); }); displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); // writeMsdAtTheEnd = false; // writeMsdAtTheEndChB.setDisable(true); // // set // writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> { // writeMsdAtTheEnd = newValue; // logger.info("write msd at the end: ", writeMsdAtTheEnd); // }); // writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); // set default values minimalOccurrencesTF.setText("1"); minimalOccurrences = 1; minimalTaxonomyTF.setText("1"); minimalTaxonomy = 1; minimalTaxonomyTF.setDisable(true); minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { // focus lost String value = minimalOccurrencesTF.getText(); if (!ValidationUtil.isEmpty(value)) { if (!ValidationUtil.isNumber(value)) { logAlert("minimalOccurrencesTF: " + WARNING_ONLY_NUMBERS_ALLOWED); GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED); } else { minimalOccurrences = Integer.parseInt(value); } } else { minimalOccurrencesTF.setText("1"); minimalOccurrences = 1; } } }); minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { // focus lost String value = minimalTaxonomyTF.getText(); if (!ValidationUtil.isEmpty(value)) { if (!ValidationUtil.isNumber(value)) { logAlert("minimalTaxonomyTF: " + WARNING_ONLY_NUMBERS_ALLOWED); GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED); } else { minimalTaxonomy = Integer.parseInt(value); } } else { minimalTaxonomyTF.setText("1"); minimalTaxonomy = 1; } } }); computeNgramsB.setDisable(true); computeNgramsB.setOnAction(e -> { compute(); logger.info("compute button"); }); helpH.setOnAction(e -> openHelpWebsite()); cancel.setVisible(false); } /** * case a: values for combo boxes can change after a corpus change *
    *
  • different corpus type - reset all fields so no old values remain
  • *
  • same corpus type, different subset - keep
  • *
*

* case b: values for combo boxes can change after a header scan *

    *
  • at first, fields are populated by corpus type defaults
  • *
  • after, with gathered data
  • *
*

* ngrams: 1 * calculateFor: word * msd: * taxonomy: * skip: 0 * iscvv: false * string length: 1 */ // public void populateFields() { // // corpus changed if: current one is null (this is first run of the app) // // or if currentCorpus != gui's corpus // boolean corpusChanged = currentCorpusType == null // || currentCorpusType != corpus.getCorpusType(); // // // // TODO: check for GOS, GIGAFIDA, SOLAR... // // refresh and: // // TODO if current value != null && is in new calculateFor ? keep : otherwise reset // if (calculateFor == null) { // calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); // calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); // } // // if (!filter.hasMsd()) { // // if current corpus doesn't have msd data, disable this field // msd = new ArrayList<>(); // msdTF.setText(""); // msdTF.setDisable(true); // logger.info("no msd data"); // } else { // if (ValidationUtil.isEmpty(msd) // || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { // // msd has not been set previously // // or msd has been set but the corpus changed -> reset // msd = new ArrayList<>(); // msdTF.setText(""); // msdTF.setDisable(false); // logger.info("msd reset"); // } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { // // if msd has been set, but corpus type remained the same, we can keep any set msd value // msdTF.setText(StringUtils.join(msdStrings, " ")); // msdTF.setDisable(false); // logger.info("msd kept"); // } // } // // // TODO: trigger on rescan // if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { // // user changed corpus (by type) or by selection & triggered a rescan of headers // // see if we read taxonomy from headers, otherwise use default values for given corpus // ObservableList tax = corpus.getTaxonomy(); // taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); // // currentCorpusType = corpus.getCorpusType(); // // setTaxonomyIsDirty(false); // } else { // // } // // // see if we read taxonomy from headers, otherwise use default values for given corpus // ObservableList tax = corpus.getTaxonomy(); // taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); // taxonomyCCB.getItems().addAll(taxonomyCCBValues); // // } /** * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc., * sets combobox values to what is applicable ... * * @param mode */ public void toggleMode(MODE mode) { if (mode == null) { mode = currentMode; } logger.info("mode: ", mode.toString()); if (mode == MODE.WORD) { if (corpus.getCorpusType() == CorpusType.GOS) calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS); else calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS); } else if (mode == MODE.LETTER) { calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS); // if calculateFor was selected for something other than a word or a lemma -> reset if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) { // if the user selected something else before selecting ngram for letters, reset that choice calculateFor = CalculateFor.WORD; calculateForCB.getSelectionModel().select("različnica"); } } // override if orth mode, allow only word if (corpus.isGosOrthMode()) { calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH); msdTF.setDisable(true); } else { msdTF.setDisable(false); } calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); } private void compute() { Filter filter = new Filter(); filter.setNgramValue(1); filter.setCalculateFor(calculateFor); filter.setTaxonomy(taxonomy); filter.setDisplayTaxonomy(displayTaxonomy); filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setSkipValue(0); filter.setIsCvv(false); filter.setSolarFilters(solarFiltersMap); filter.setStringLength(1); filter.setMultipleKeys(alsoVisualize); // setMsd must be behind alsoVisualize filter.setMsd(msd); filter.setMinimalOccurrences(minimalOccurrences); filter.setMinimalTaxonomy(minimalTaxonomy); filter.setPrefixLength(prefixLength); filter.setSuffixLength(suffixLength); filter.setPrefixList(prefixList); filter.setSuffixList(suffixList); // filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd); String message = Validation.validateForStringLevel(filter); if (message == null) { // no errors logger.info("Executing: ", filter.toString()); StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb); execute(statistic); } else { logAlert(message); showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message); } } private void logAlert(String alert) { logger.info("alert: " + alert); } private void openHelpWebsite(){ hostService.showDocument(Messages.HELP_URL); } public Corpus getCorpus() { return corpus; } public void setCorpus(Corpus corpus) { this.corpus = corpus; if (corpus.getCorpusType() != CorpusType.SOLAR) { setSelectedFiltersLabel(null); } else { setSelectedFiltersLabel("/"); } } public void setSelectedFiltersLabel(String content) { if (content != null) { solarFilters.setVisible(true); selectedFiltersLabel.setVisible(true); selectedFiltersLabel.setText(content); } else { solarFilters.setVisible(false); selectedFiltersLabel.setVisible(false); } } private void execute(StatisticsNew statistic) { logger.info("Started execution: ", statistic.getFilter()); Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); boolean corpusIsSplit = corpusFiles.size() > 1; final Task task = new Task() { @SuppressWarnings("Duplicates") @Override protected Void call() throws Exception { long i = 0; for (File f : corpusFiles) { readXML(f.toString(), statistic); i++; if (isCancelled()) { updateMessage(CANCELING_NOTIFICATION); break; } this.updateProgress(i, corpusFiles.size()); this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName())); } return null; } }; ngramProgressBar.progressProperty().bind(task.progressProperty()); progressLabel.textProperty().bind(task.messageProperty()); task.setOnSucceeded(e -> { try { boolean successullySaved = statistic.saveResultToDisk(); if (successullySaved) { showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED); } else { showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS); } } catch (UnsupportedEncodingException e1) { showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV); logger.error("Error while saving", e1); } ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnFailed(e -> { showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING); logger.error("Error while executing", e); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); task.setOnCancelled(e -> { showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); }); // When cancel button is pressed cancel analysis cancel.setOnAction(e -> { task.cancel(); logger.info("cancel button"); }); cancel.setVisible(true); final Thread thread = new Thread(task, "task"); thread.setDaemon(true); thread.start(); } public void setSolarFiltersMap(HashMap> solarFiltersMap) { this.solarFiltersMap = solarFiltersMap; } public void setHostServices(HostServices hostServices){ this.hostService = hostServices; } }