From abc15360d3e63b46c1abf0f70bb307747d6eadb2 Mon Sep 17 00:00:00 2001 From: Luka Date: Mon, 5 Nov 2018 10:30:41 +0100 Subject: [PATCH] Added initial functionality for word parts implementation --- src/main/java/alg/ngram/Ngrams.java | 4 + src/main/java/data/Filter.java | 43 +- src/main/java/data/MultipleHMKeys.java | 3 + src/main/java/data/MultipleHMKeys1.java | 11 + src/main/java/data/MultipleHMKeys2.java | 12 + src/main/java/data/MultipleHMKeys3.java | 13 + src/main/java/data/MultipleHMKeys4.java | 15 + src/main/java/data/MultipleHMKeys5.java | 17 + src/main/java/data/StatisticsNew.java | 9 +- src/main/java/gui/StringAnalysisTabNew2.java | 73 +-- src/main/java/gui/WordLevelTab.java | 566 ++++++++++++++++--- src/main/resources/gui/WordLevelTab.fxml | 119 +++- 12 files changed, 764 insertions(+), 121 deletions(-) diff --git a/src/main/java/alg/ngram/Ngrams.java b/src/main/java/alg/ngram/Ngrams.java index 5f45544..ffd32c6 100755 --- a/src/main/java/alg/ngram/Ngrams.java +++ b/src/main/java/alg/ngram/Ngrams.java @@ -52,6 +52,10 @@ public class Ngrams { // generate proper MultipleHMKeys depending on filter data String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor(), stats.getFilter().getWordParts()); + if(key.length() < stats.getFilter().getPrefixLength() + stats.getFilter().getSuffixLength()){ + continue; + } + // if last letter is ',' erase it // if (key.equals("")){ diff --git a/src/main/java/data/Filter.java b/src/main/java/data/Filter.java index 29b4673..84361de 100755 --- a/src/main/java/data/Filter.java +++ b/src/main/java/data/Filter.java @@ -29,7 +29,11 @@ public class Filter implements Cloneable { NOTE_PUNCTUATIONS, MINIMAL_OCCURRENCES, MINIMAL_TAXONOMY, - COLLOCABILITY + COLLOCABILITY, + PREFIX_LENGTH, + SUFFIX_LENGTH, + PREFIX_LIST, + SUFFIX_LIST } public Filter() { @@ -245,6 +249,43 @@ public class Filter implements Cloneable { return (Integer) filter.get(MINIMAL_TAXONOMY); } +// PREFIX_LENGTH, +// SUFFIX_LENGTH, +// PREFIX_LIST, +// SUFFIX_LIST + + public void setPrefixLength(Integer v) { + filter.put(PREFIX_LENGTH, v); + } + + public Integer getPrefixLength() { + return (Integer) filter.get(PREFIX_LENGTH); + } + + public void setSuffixLength(Integer v) { + filter.put(SUFFIX_LENGTH, v); + } + + public Integer getSuffixLength() { + return (Integer) filter.get(SUFFIX_LENGTH); + } + + public void setPrefixList(ArrayList v) { + filter.put(PREFIX_LIST, v); + } + + public ArrayList getPrefixList() { + return (ArrayList) filter.get(PREFIX_LIST); + } + + public void setSuffixList(ArrayList v) { + filter.put(SUFFIX_LIST, v); + } + + public ArrayList getSuffixList() { + return (ArrayList) filter.get(SUFFIX_LIST); + } + private void addWordPart(CalculateFor wp){ ArrayList oldWp = ((ArrayList) filter.get(WORD_PARTS)); diff --git a/src/main/java/data/MultipleHMKeys.java b/src/main/java/data/MultipleHMKeys.java index 18904d5..ff8f2a9 100755 --- a/src/main/java/data/MultipleHMKeys.java +++ b/src/main/java/data/MultipleHMKeys.java @@ -1,5 +1,6 @@ package data; +import java.util.ArrayList; import java.util.Objects; /* @@ -12,6 +13,8 @@ public interface MultipleHMKeys { default String getK4(){ return null; } default String getK5(){ return null; } + default ArrayList getSplittedMultipleHMKeys(){ return null; } + @Override int hashCode(); diff --git a/src/main/java/data/MultipleHMKeys1.java b/src/main/java/data/MultipleHMKeys1.java index 5c0898c..0ca07d9 100755 --- a/src/main/java/data/MultipleHMKeys1.java +++ b/src/main/java/data/MultipleHMKeys1.java @@ -1,5 +1,6 @@ package data; +import java.util.ArrayList; import java.util.Objects; /* @@ -16,6 +17,16 @@ public final class MultipleHMKeys1 implements MultipleHMKeys { return k1; } + public ArrayList getSplittedMultipleHMKeys(){ + ArrayList r = new ArrayList<>(); + String[] splitedK1 = k1.split("\\s+"); + for(int i = 0; i < splitedK1.length; i ++){ + MultipleHMKeys search = new MultipleHMKeys1(splitedK1[i]); + r.add(search); + } + return r; + } + @Override public int hashCode() { return k1.hashCode(); diff --git a/src/main/java/data/MultipleHMKeys2.java b/src/main/java/data/MultipleHMKeys2.java index 9456e50..852d2e0 100755 --- a/src/main/java/data/MultipleHMKeys2.java +++ b/src/main/java/data/MultipleHMKeys2.java @@ -1,5 +1,6 @@ package data; +import java.util.ArrayList; import java.util.Objects; /* @@ -21,6 +22,17 @@ public final class MultipleHMKeys2 implements MultipleHMKeys { return k2; } + public ArrayList getSplittedMultipleHMKeys(){ + ArrayList r = new ArrayList<>(); + String[] splitedK1 = k1.split("\\s+"); + String[] splitedK2 = k2.split("\\s+"); + for(int i = 0; i < splitedK1.length; i ++){ + MultipleHMKeys search = new MultipleHMKeys2(splitedK1[i], splitedK2[i]); + r.add(search); + } + return r; + } + @Override public int hashCode() { return Objects.hash(k1, k2); diff --git a/src/main/java/data/MultipleHMKeys3.java b/src/main/java/data/MultipleHMKeys3.java index 8342273..a39e617 100755 --- a/src/main/java/data/MultipleHMKeys3.java +++ b/src/main/java/data/MultipleHMKeys3.java @@ -1,5 +1,6 @@ package data; +import java.util.ArrayList; import java.util.Objects; /* @@ -26,6 +27,18 @@ public final class MultipleHMKeys3 implements MultipleHMKeys { return k3; } + public ArrayList getSplittedMultipleHMKeys(){ + ArrayList r = new ArrayList<>(); + String[] splitedK1 = k1.split("\\s+"); + String[] splitedK2 = k2.split("\\s+"); + String[] splitedK3 = k3.split("\\s+"); + for(int i = 0; i < splitedK1.length; i ++){ + MultipleHMKeys search = new MultipleHMKeys3(splitedK1[i], splitedK2[i], splitedK3[i]); + r.add(search); + } + return r; + } + @Override public int hashCode() { return Objects.hash(k1, k2, k3); diff --git a/src/main/java/data/MultipleHMKeys4.java b/src/main/java/data/MultipleHMKeys4.java index 1d8ccac..3dbc91b 100755 --- a/src/main/java/data/MultipleHMKeys4.java +++ b/src/main/java/data/MultipleHMKeys4.java @@ -1,5 +1,6 @@ package data; +import java.util.ArrayList; import java.util.Objects; /* @@ -31,6 +32,20 @@ public final class MultipleHMKeys4 implements MultipleHMKeys { return k4; } + public ArrayList getSplittedMultipleHMKeys(){ + ArrayList r = new ArrayList<>(); + String[] splitedK1 = k1.split("\\s+"); + String[] splitedK2 = k2.split("\\s+"); + String[] splitedK3 = k3.split("\\s+"); + String[] splitedK4 = k4.split("\\s+"); + for(int i = 0; i < splitedK1.length; i ++){ + MultipleHMKeys search = new MultipleHMKeys4(splitedK1[i], splitedK2[i], splitedK3[i], splitedK4[i]); + r.add(search); + } + return r; + } + + @Override public int hashCode() { return Objects.hash(k1, k2, k3, k4); diff --git a/src/main/java/data/MultipleHMKeys5.java b/src/main/java/data/MultipleHMKeys5.java index 11d36df..0c9270f 100755 --- a/src/main/java/data/MultipleHMKeys5.java +++ b/src/main/java/data/MultipleHMKeys5.java @@ -1,5 +1,6 @@ package data; +import java.util.ArrayList; import java.util.Objects; /* @@ -36,6 +37,22 @@ public final class MultipleHMKeys5 implements MultipleHMKeys { return k5; } + public ArrayList getSplittedMultipleHMKeys(){ + ArrayList r = new ArrayList<>(); + String[] splitedK1 = k1.split("\\s+"); + String[] splitedK2 = k2.split("\\s+"); + String[] splitedK3 = k3.split("\\s+"); + String[] splitedK4 = k4.split("\\s+"); + String[] splitedK5 = k5.split("\\s+"); + for(int i = 0; i < splitedK1.length; i ++){ + MultipleHMKeys search = new MultipleHMKeys5(splitedK1[i], splitedK2[i], splitedK3[i], splitedK4[i], splitedK5[i]); + r.add(search); + } + return r; + } + + + @Override public int hashCode() { return Objects.hash(k1, k2, k3, k4, k5); diff --git a/src/main/java/data/StatisticsNew.java b/src/main/java/data/StatisticsNew.java index 96028a5..ada8244 100755 --- a/src/main/java/data/StatisticsNew.java +++ b/src/main/java/data/StatisticsNew.java @@ -495,12 +495,13 @@ public class StatisticsNew { Map collocabilityMap = new ConcurrentHashMap<>(); for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) { - String[] splitedString = hmKey.getK1().split("\\s+"); +// String[] splitedString = hmKey.getK1().split("\\s+"); long sum_fwi =0L; - for(String s : splitedString){ - MultipleHMKeys search = new MultipleHMKeys1(s); - sum_fwi += oneWordTaxonomyResult.get("Total").get(search).longValue(); + + for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){ + System.out.println(smallHmKey.getK1()); + sum_fwi += oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue(); } double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi; collocabilityMap.put(hmKey, dice_value); diff --git a/src/main/java/gui/StringAnalysisTabNew2.java b/src/main/java/gui/StringAnalysisTabNew2.java index 596f1a8..a183567 100755 --- a/src/main/java/gui/StringAnalysisTabNew2.java +++ b/src/main/java/gui/StringAnalysisTabNew2.java @@ -622,20 +622,20 @@ public class StringAnalysisTabNew2 { @SuppressWarnings("Duplicates") @Override protected Void call() throws Exception { - long i = 0; + long i = corpusFiles.size(); for (File f : corpusFiles) { readXML(f.toString(), statisticsOneGrams); -// i++; -// this.updateProgress(i, corpusFiles.size()); -// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName())); + i++; + this.updateProgress(i, corpusFiles.size() * 2); + this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName())); } return null; } }; -// ngramProgressBar.progressProperty().bind(task.progressProperty()); -// progressLabel.textProperty().bind(task.messageProperty()); + ngramProgressBar.progressProperty().bind(task.progressProperty()); + progressLabel.textProperty().bind(task.messageProperty()); task.setOnSucceeded(e -> { System.out.print("test"); @@ -671,32 +671,32 @@ public class StringAnalysisTabNew2 { // logger.error("Out of memory error", e1); // } // -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); + ngramProgressBar.progressProperty().unbind(); + ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); }); task.setOnFailed(e -> { -// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING); -// logger.error("Error while executing", e); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); + showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING); + logger.error("Error while executing", e); + ngramProgressBar.progressProperty().unbind(); + ngramProgressBar.setProgress(0.0); + ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); }); task.setOnCancelled(e -> { -// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); + showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED); + ngramProgressBar.progressProperty().unbind(); + ngramProgressBar.setProgress(0.0); + ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); }); // When cancel button is pressed cancel analysis @@ -729,8 +729,14 @@ public class StringAnalysisTabNew2 { updateMessage(CANCELING_NOTIFICATION); break; } - this.updateProgress(i, corpusFiles.size()); - this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName())); + if (statistic.getFilter().getCollocability().size() > 0) { + this.updateProgress(i, corpusFiles.size() * 2); + this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName())); + } else { + this.updateProgress(i, corpusFiles.size()); + this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName())); + } + this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName())); } return null; @@ -769,13 +775,14 @@ public class StringAnalysisTabNew2 { showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY); logger.error("Out of memory error", e1); } + ngramProgressBar.progressProperty().unbind(); + ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); } - ngramProgressBar.progressProperty().unbind(); - ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - cancel.setVisible(false); + }); task.setOnFailed(e -> { diff --git a/src/main/java/gui/WordLevelTab.java b/src/main/java/gui/WordLevelTab.java index f64241d..560b59d 100755 --- a/src/main/java/gui/WordLevelTab.java +++ b/src/main/java/gui/WordLevelTab.java @@ -1,45 +1,78 @@ package gui; -import static alg.XML_processing.*; -import static gui.GUIController.*; -import static gui.Messages.*; - -import java.io.File; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; - +import data.*; import javafx.application.HostServices; +import javafx.collections.FXCollections; +import javafx.collections.ListChangeListener; +import javafx.collections.ObservableList; +import javafx.concurrent.Task; +import javafx.fxml.FXML; import javafx.scene.control.*; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.controlsfx.control.CheckComboBox; -import data.*; -import javafx.collections.ListChangeListener; -import javafx.collections.ObservableList; -import javafx.concurrent.Task; -import javafx.fxml.FXML; -import javafx.scene.layout.AnchorPane; +import java.io.File; +import java.io.UnsupportedEncodingException; +import java.util.*; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.regex.Pattern; + +import static alg.XML_processing.readXML; +import static gui.GUIController.showAlert; +import static gui.Messages.*; @SuppressWarnings("Duplicates") public class WordLevelTab { - public final static Logger logger = LogManager.getLogger(WordLevelTab.class); - - public AnchorPane wordLevelAnalysisTabPane; + public final static Logger logger = LogManager.getLogger(OneWordAnalysisTab.class); @FXML public Label selectedFiltersLabel; @FXML public Label solarFilters; + @FXML + private TextField msdTF; + private ArrayList msd; + private ArrayList msdStrings; + + @FXML + private CheckComboBox alsoVisualizeCCB; + private ArrayList alsoVisualize; + @FXML private CheckComboBox taxonomyCCB; private ArrayList taxonomy; + @FXML + private CheckBox displayTaxonomyChB; + private boolean displayTaxonomy; + + @FXML + private ComboBox prefixLengthCB; + private Integer prefixLength; + + @FXML + private ComboBox suffixLengthCB; + private Integer suffixLength; + + @FXML + private TextField prefixListTF; + private ArrayList prefixList; + + @FXML + private TextField suffixListTF; + private ArrayList suffixList; + +// @FXML +// private CheckBox writeMsdAtTheEndChB; +// private boolean writeMsdAtTheEnd; + + @FXML + private ComboBox calculateForCB; + private CalculateFor calculateFor; + @FXML private TextField minimalOccurrencesTF; private Integer minimalOccurrences; @@ -49,7 +82,10 @@ public class WordLevelTab { private Integer minimalTaxonomy; @FXML - private Button computeB; + private Button computeNgramsB; + + @FXML + private Button cancel; @FXML public ProgressBar ngramProgressBar; @@ -59,17 +95,242 @@ public class WordLevelTab { @FXML private Hyperlink helpH; + private enum MODE { + LETTER, + WORD + } + + private MODE currentMode; + private Corpus corpus; private HashMap> solarFiltersMap; + private Filter filter; + private boolean useDb; private HostServices hostService; + private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka"); + private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); + private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); + private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica"); + private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); + private static final ObservableList alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); + private static final ObservableList alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); + private static final ObservableList alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); + private static final ObservableList alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); + private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); + + // TODO: pass observables for taxonomy based on header scan // after header scan private ObservableList taxonomyCCBValues; private CorpusType currentCorpusType; - private boolean useDb; - public void init() { + currentMode = MODE.WORD; + toggleMode(currentMode); + + AtomicBoolean writeMsdAtTheEndEnableCalculateFor = new AtomicBoolean(false); + + // calculateForCB + calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> { + calculateFor = CalculateFor.factory(newValue); + + alsoVisualizeCCB.getItems().removeAll(); + if(newValue.equals("lema")){ + alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma); + } else if(newValue.equals("različnica")) { + if (corpus.getCorpusType() == CorpusType.GOS) + alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWordGos); + else + alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord); + } else if(newValue.equals("normalizirana različnica")) { + alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord); + } else if(newValue.equals("oblikoskladenjska oznaka")) { +// writeMsdAtTheEndEnableCalculateFor.set(true); +// writeMsdAtTheEndChB.setDisable(false); + alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd); + }else { + + alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty); + } + +// if (!newValue.equals("oblikoskladenjska oznaka")){ +// writeMsdAtTheEnd = false; +// writeMsdAtTheEndChB.setSelected(false); +// writeMsdAtTheEndChB.setDisable(true); +// writeMsdAtTheEndEnableCalculateFor.set(false); +// } + + alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { + alsoVisualize = new ArrayList<>(); + ObservableList checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); + alsoVisualize.addAll(checkedItems); + logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); + }); + alsoVisualizeCCB.getCheckModel().clearChecks(); + + logger.info("calculateForCB:", calculateFor.toString()); + }); + + calculateForCB.getSelectionModel().select(0); + + prefixLengthCB.getSelectionModel().select(0); + prefixLength = 0; + + prefixLengthCB.valueProperty().addListener((observable, oldValue, newValue) -> { + prefixLength = Integer.valueOf(newValue); + if(prefixLength > 0){ + prefixListTF.setDisable(true); + suffixListTF.setDisable(true); + } else if(prefixLength == 0 && suffixLength == 0){ + prefixListTF.setDisable(false); + suffixListTF.setDisable(false); + } + + if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) { + computeNgramsB.setDisable(false); + } else { + computeNgramsB.setDisable(true); + } + logger.info("Prefix length " + prefixLength); + }); + + suffixLengthCB.getSelectionModel().select(0); + suffixLength = 0; + + + suffixLengthCB.valueProperty().addListener((observable, oldValue, newValue) -> { + suffixLength = Integer.valueOf(newValue); + if(suffixLength > 0){ + prefixListTF.setDisable(true); + suffixListTF.setDisable(true); + } else if(prefixLength == 0 && suffixLength == 0){ + prefixListTF.setDisable(false); + suffixListTF.setDisable(false); + } + + if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) { + computeNgramsB.setDisable(false); + } else { + computeNgramsB.setDisable(true); + } + logger.info("Prefix length " + suffixLength); + }); + + prefixList = new ArrayList<>(); + + prefixListTF.textProperty().addListener((observable, oldValue, newValue) -> { + String value = prefixListTF.getText(); + prefixList = new ArrayList<>(); + if (!ValidationUtil.isEmpty(value)) { + for (String w : value.split(";")){ + w = w.replaceAll("\\s+",""); + if (!w.equals("")){ + prefixList.add(w); + } + } +// suffixList = value; + } + + System.out.println(prefixList); + + if(prefixList.size() > 0){ + prefixLengthCB.setDisable(true); + suffixLengthCB.setDisable(true); + } else if(suffixList.size() == 0){ + prefixLengthCB.setDisable(false); + suffixLengthCB.setDisable(false); + } + + if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) { + computeNgramsB.setDisable(false); + } else { + computeNgramsB.setDisable(true); + } + }); + + suffixList = new ArrayList<>(); + + suffixListTF.textProperty().addListener((observable, oldValue, newValue) -> { + String value = suffixListTF.getText(); + suffixList = new ArrayList<>(); + if (!ValidationUtil.isEmpty(value)) { + for (String w : value.split(";")){ + w = w.replaceAll("\\s+",""); + if (!w.equals("")){ + suffixList.add(w); + } + } +// suffixList = value; + } + System.out.println(suffixList); + if(suffixList.size() > 0){ + prefixLengthCB.setDisable(true); + suffixLengthCB.setDisable(true); + } else if(prefixList.size() == 0){ + prefixLengthCB.setDisable(false); + suffixLengthCB.setDisable(false); + } + + if(prefixLength > 0 || suffixLength > 0 || prefixList.size() > 0 || suffixList.size() > 0) { + computeNgramsB.setDisable(false); + } else { + computeNgramsB.setDisable(true); + } + }); +// prefixLengthCB.setDisable(true); + + + // msd + msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> { + if (!newValue) { + // focus lost + String value = msdTF.getText(); + logger.info("msdTf: ", value); + + if (!ValidationUtil.isEmpty(value)) { + ArrayList msdTmp = new ArrayList<>(Arrays.asList(value.split(" "))); + + int nOfRequiredMsdTokens = 1; + if (msdTmp.size() != nOfRequiredMsdTokens) { + String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size()); + logAlert(msg); + showAlert(Alert.AlertType.ERROR, msg); + } + msd = new ArrayList<>(); + msdStrings = new ArrayList<>(); + for (String msdToken : msdTmp) { + msd.add(Pattern.compile(msdToken)); + msdStrings.add(msdToken); + } + logger.info(String.format("msd accepted (%d)", msd.size())); + + } else if (!ValidationUtil.isEmpty(newValue)) { + msd = new ArrayList<>(); + msdStrings = new ArrayList<>(); + } + } + }); + + msdTF.setText(""); + msd = new ArrayList<>(); + + alsoVisualizeCCB.getItems().removeAll(); + alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma); + alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { + alsoVisualize = new ArrayList<>(); + ObservableList checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); + alsoVisualize.addAll(checkedItems); +// if (checkedItems.contains("oblikoskladenjska oznaka") || writeMsdAtTheEndEnableCalculateFor.get()){ +// writeMsdAtTheEndChB.setDisable(false); +// } else { +// writeMsdAtTheEnd = false; +// writeMsdAtTheEndChB.setSelected(false); +// writeMsdAtTheEndChB.setDisable(true); +// } + logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); + }); + alsoVisualizeCCB.getCheckModel().clearChecks(); + // taxonomy if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { taxonomyCCB.getItems().removeAll(); @@ -85,12 +346,29 @@ public class WordLevelTab { taxonomyCCB.setDisable(true); } - // set default values - minimalOccurrencesTF.setText("1"); - minimalOccurrences = 1; + displayTaxonomy = false; + // set + displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> { + displayTaxonomy = newValue; + logger.info("display taxonomy: ", displayTaxonomy); + }); + displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); + +// writeMsdAtTheEnd = false; +// writeMsdAtTheEndChB.setDisable(true); +// // set +// writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> { +// writeMsdAtTheEnd = newValue; +// logger.info("write msd at the end: ", writeMsdAtTheEnd); +// }); +// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); - minimalTaxonomyTF.setText("1"); - minimalTaxonomy = 1; + // set default values + minimalOccurrencesTF.setText("1"); + minimalOccurrences = 1; + + minimalTaxonomyTF.setText("1"); + minimalTaxonomy = 1; minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> { if (!newValue) { @@ -128,29 +406,158 @@ public class WordLevelTab { } }); - computeB.setOnAction(e -> { + computeNgramsB.setDisable(true); + + computeNgramsB.setOnAction(e -> { compute(); logger.info("compute button"); }); - helpH.setOnAction(e -> openHelpWebsite()); + + cancel.setVisible(false); } - private void openHelpWebsite(){ - hostService.showDocument(Messages.HELP_URL); + /** + * case a: values for combo boxes can change after a corpus change + *
    + *
  • different corpus type - reset all fields so no old values remain
  • + *
  • same corpus type, different subset - keep
  • + *
+ *

+ * case b: values for combo boxes can change after a header scan + *

    + *
  • at first, fields are populated by corpus type defaults
  • + *
  • after, with gathered data
  • + *
+ *

+ * ngrams: 1 + * calculateFor: word + * msd: + * taxonomy: + * skip: 0 + * iscvv: false + * string length: 1 + */ + public void populateFields() { + // corpus changed if: current one is null (this is first run of the app) + // or if currentCorpus != gui's corpus + boolean corpusChanged = currentCorpusType == null + || currentCorpusType != corpus.getCorpusType(); + + + // TODO: check for GOS, GIGAFIDA, SOLAR... + // refresh and: + // TODO if current value != null && is in new calculateFor ? keep : otherwise reset + if (calculateFor == null) { + calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); + calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); + } + + if (!filter.hasMsd()) { + // if current corpus doesn't have msd data, disable this field + msd = new ArrayList<>(); + msdTF.setText(""); + msdTF.setDisable(true); + logger.info("no msd data"); + } else { + if (ValidationUtil.isEmpty(msd) + || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { + // msd has not been set previously + // or msd has been set but the corpus changed -> reset + msd = new ArrayList<>(); + msdTF.setText(""); + msdTF.setDisable(false); + logger.info("msd reset"); + } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { + // if msd has been set, but corpus type remained the same, we can keep any set msd value + msdTF.setText(StringUtils.join(msdStrings, " ")); + msdTF.setDisable(false); + logger.info("msd kept"); + } + } + + // TODO: trigger on rescan + if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { + // user changed corpus (by type) or by selection & triggered a rescan of headers + // see if we read taxonomy from headers, otherwise use default values for given corpus + ObservableList tax = corpus.getTaxonomy(); + taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); + + currentCorpusType = corpus.getCorpusType(); + // setTaxonomyIsDirty(false); + } else { + + } + + // see if we read taxonomy from headers, otherwise use default values for given corpus + ObservableList tax = corpus.getTaxonomy(); + taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); + taxonomyCCB.getItems().addAll(taxonomyCCBValues); + } + + /** + * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc., + * sets combobox values to what is applicable ... + * + * @param mode + */ + public void toggleMode(MODE mode) { + if (mode == null) { + mode = currentMode; + } + + logger.info("mode: ", mode.toString()); + + if (mode == MODE.WORD) { + if (corpus.getCorpusType() == CorpusType.GOS) + calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS); + else + calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS); + } else if (mode == MODE.LETTER) { + calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS); + + + // if calculateFor was selected for something other than a word or a lemma -> reset + if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) { + // if the user selected something else before selecting ngram for letters, reset that choice + calculateFor = CalculateFor.WORD; + calculateForCB.getSelectionModel().select("različnica"); + } + } + + // override if orth mode, allow only word + if (corpus.isGosOrthMode()) { + calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH); + msdTF.setDisable(true); + } else { + msdTF.setDisable(false); + } + calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); + } + private void compute() { Filter filter = new Filter(); filter.setNgramValue(1); - filter.setCalculateFor(CalculateFor.WORD); + filter.setCalculateFor(calculateFor); filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); - filter.setAl(AnalysisLevel.WORD_LEVEL); + filter.setDisplayTaxonomy(displayTaxonomy); + filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setSkipValue(0); - filter.setMsd(new ArrayList<>()); filter.setIsCvv(false); filter.setSolarFilters(solarFiltersMap); + filter.setStringLength(1); + filter.setMultipleKeys(alsoVisualize); + + // setMsd must be behind alsoVisualize + filter.setMsd(msd); filter.setMinimalOccurrences(minimalOccurrences); filter.setMinimalTaxonomy(minimalTaxonomy); + filter.setPrefixLength(prefixLength); + filter.setSuffixLength(suffixLength); + filter.setPrefixList(prefixList); + filter.setSuffixList(suffixList); +// filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd); String message = Validation.validateForStringLevel(filter); if (message == null) { @@ -164,10 +571,44 @@ public class WordLevelTab { } } + private void logAlert(String alert) { + logger.info("alert: " + alert); + } + + private void openHelpWebsite(){ + hostService.showDocument(Messages.HELP_URL); + } + + public Corpus getCorpus() { + return corpus; + } + + public void setCorpus(Corpus corpus) { + this.corpus = corpus; + + if (corpus.getCorpusType() != CorpusType.SOLAR) { + setSelectedFiltersLabel(null); + } else { + setSelectedFiltersLabel("/"); + } + } + + public void setSelectedFiltersLabel(String content) { + if (content != null) { + solarFilters.setVisible(true); + selectedFiltersLabel.setVisible(true); + selectedFiltersLabel.setText(content); + } else { + solarFilters.setVisible(false); + selectedFiltersLabel.setVisible(false); + } + } + private void execute(StatisticsNew statistic) { logger.info("Started execution: ", statistic.getFilter()); Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); + boolean corpusIsSplit = corpusFiles.size() > 1; final Task task = new Task() { @SuppressWarnings("Duplicates") @@ -177,6 +618,10 @@ public class WordLevelTab { for (File f : corpusFiles) { readXML(f.toString(), statistic); i++; + if (isCancelled()) { + updateMessage(CANCELING_NOTIFICATION); + break; + } this.updateProgress(i, corpusFiles.size()); this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName())); } @@ -190,9 +635,7 @@ public class WordLevelTab { task.setOnSucceeded(e -> { try { - // first, we have to recalculate all occurrences to detailed statistics - boolean successullySaved = statistic.saveResultNestedToDisk(); - + boolean successullySaved = statistic.saveResultToDisk(); if (successullySaved) { showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED); } else { @@ -207,6 +650,7 @@ public class WordLevelTab { ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); + cancel.setVisible(false); }); task.setOnFailed(e -> { @@ -217,44 +661,36 @@ public class WordLevelTab { ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); progressLabel.textProperty().unbind(); progressLabel.setText(""); + cancel.setVisible(false); }); + task.setOnCancelled(e -> { + showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED); + ngramProgressBar.progressProperty().unbind(); + ngramProgressBar.setProgress(0.0); + ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); + progressLabel.textProperty().unbind(); + progressLabel.setText(""); + cancel.setVisible(false); + }); + + // When cancel button is pressed cancel analysis + cancel.setOnAction(e -> { + task.cancel(); + logger.info("cancel button"); + }); + + cancel.setVisible(true); final Thread thread = new Thread(task, "task"); thread.setDaemon(true); thread.start(); } - private void logAlert(String alert) { - logger.info("alert: " + alert); - } - - - public void setCorpus(Corpus corpus) { - this.corpus = corpus; - - if (corpus.getCorpusType() != CorpusType.SOLAR) { - setSelectedFiltersLabel(null); - } else { - setSelectedFiltersLabel("/"); - } - } - - public void setSelectedFiltersLabel(String content) { - if (content != null) { - solarFilters.setVisible(true); - selectedFiltersLabel.setVisible(true); - selectedFiltersLabel.setText(content); - } else { - solarFilters.setVisible(false); - selectedFiltersLabel.setVisible(false); - } - } - public void setSolarFiltersMap(HashMap> solarFiltersMap) { this.solarFiltersMap = solarFiltersMap; } - public void setHostServices(HostServices hostServices){ this.hostService = hostServices; } + } diff --git a/src/main/resources/gui/WordLevelTab.fxml b/src/main/resources/gui/WordLevelTab.fxml index 6a1b7a4..7684b86 100755 --- a/src/main/resources/gui/WordLevelTab.fxml +++ b/src/main/resources/gui/WordLevelTab.fxml @@ -1,32 +1,115 @@ - - + + + + + + + + + + + - -