Some functionality from OneWord copied to StringAnalysis and fixed

This commit is contained in:
Luka 2018-08-22 09:11:14 +02:00
parent e140a9538b
commit a8d147de52
12 changed files with 289 additions and 89 deletions

View File

@ -67,22 +67,43 @@ public class Ngrams {
multipleKeys = new MultipleHMKeys1(key); multipleKeys = new MultipleHMKeys1(key);
break; break;
case 1: case 1:
multipleKeys = new MultipleHMKeys2(key, wordToString(ngramCandidate, otherKeys.get(0))); String k1_2 = wordToString(ngramCandidate, otherKeys.get(0));
if (stats.getFilter().getNotePunctuations())
k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
multipleKeys = new MultipleHMKeys2(key, k1_2);
break; break;
case 2: case 2:
multipleKeys = new MultipleHMKeys3(key, wordToString(ngramCandidate, otherKeys.get(0)), String k2_2 = wordToString(ngramCandidate, otherKeys.get(0));
wordToString(ngramCandidate, otherKeys.get(1))); String k2_3 = wordToString(ngramCandidate, otherKeys.get(1));
if (stats.getFilter().getNotePunctuations()) {
k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
}
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
break; break;
case 3: case 3:
multipleKeys = new MultipleHMKeys4(key, wordToString(ngramCandidate, otherKeys.get(0)), String k3_2 = wordToString(ngramCandidate, otherKeys.get(0));
wordToString(ngramCandidate, otherKeys.get(1)), String k3_3 = wordToString(ngramCandidate, otherKeys.get(1));
wordToString(ngramCandidate, otherKeys.get(2))); String k3_4 = wordToString(ngramCandidate, otherKeys.get(2));
if (stats.getFilter().getNotePunctuations()) {
k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
}
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
break; break;
case 4: case 4:
multipleKeys = new MultipleHMKeys5(key, wordToString(ngramCandidate, otherKeys.get(0)), String k4_2 = wordToString(ngramCandidate, otherKeys.get(0));
wordToString(ngramCandidate, otherKeys.get(1)), String k4_3 = wordToString(ngramCandidate, otherKeys.get(1));
wordToString(ngramCandidate, otherKeys.get(2)), String k4_4 = wordToString(ngramCandidate, otherKeys.get(2));
wordToString(ngramCandidate, otherKeys.get(3))); String k4_5 = wordToString(ngramCandidate, otherKeys.get(3));
if (stats.getFilter().getNotePunctuations()) {
k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
}
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
break; break;
default: default:
multipleKeys = null; multipleKeys = null;
@ -265,7 +286,7 @@ public class Ngrams {
currentLoop.add(checkAndModifySkipgramPunctuation(sentence, i, j, stats)); currentLoop.add(checkAndModifySkipgramPunctuation(sentence, i, j, stats));
currentLoop.add(sentence.get(j)); currentLoop.add(sentence.get(j));
validateAndCountSkipgramCandidate(currentLoop, stats); validateAndCountSkipgramCandidate(currentLoop, stats, s.getTaxonomy());
} else { } else {
for (int k = j + 1; k <= j + 1 + skip; k++) { // 3gram for (int k = j + 1; k <= j + 1 + skip; k++) { // 3gram
if (ngram == 3 && k < sentence.size()) { if (ngram == 3 && k < sentence.size()) {
@ -274,7 +295,7 @@ public class Ngrams {
currentLoop.add(checkAndModifySkipgramPunctuation(sentence, j, k, stats)); currentLoop.add(checkAndModifySkipgramPunctuation(sentence, j, k, stats));
currentLoop.add(sentence.get(k)); currentLoop.add(sentence.get(k));
validateAndCountSkipgramCandidate(currentLoop, stats); validateAndCountSkipgramCandidate(currentLoop, stats, s.getTaxonomy());
} else { } else {
for (int l = k + 1; l <= k + 1 + skip; l++) { // 4gram for (int l = k + 1; l <= k + 1 + skip; l++) { // 4gram
if (ngram == 4 && l < sentence.size()) { if (ngram == 4 && l < sentence.size()) {
@ -284,7 +305,7 @@ public class Ngrams {
currentLoop.add(checkAndModifySkipgramPunctuation(sentence, k, l, stats)); currentLoop.add(checkAndModifySkipgramPunctuation(sentence, k, l, stats));
currentLoop.add(sentence.get(l)); currentLoop.add(sentence.get(l));
validateAndCountSkipgramCandidate(currentLoop, stats); validateAndCountSkipgramCandidate(currentLoop, stats, s.getTaxonomy());
} else { } else {
for (int m = l + 1; m <= l + 1 + skip; m++) { // 5gram for (int m = l + 1; m <= l + 1 + skip; m++) { // 5gram
if (ngram == 5 && m < sentence.size()) { if (ngram == 5 && m < sentence.size()) {
@ -295,7 +316,7 @@ public class Ngrams {
currentLoop.add(checkAndModifySkipgramPunctuation(sentence, l, m, stats)); currentLoop.add(checkAndModifySkipgramPunctuation(sentence, l, m, stats));
currentLoop.add(sentence.get(m)); currentLoop.add(sentence.get(m));
validateAndCountSkipgramCandidate(currentLoop, stats); validateAndCountSkipgramCandidate(currentLoop, stats, s.getTaxonomy());
} }
} }
} }
@ -308,13 +329,80 @@ public class Ngrams {
} }
} }
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats) { private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats, List<String> taxonomy) {
// count if no regex is set or if it is & candidate passes it // count if no regex is set or if it is & candidate passes it
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) { if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) {
String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor()); // String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key; // key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
stats.updateTaxonomyResults(new MultipleHMKeys1(key), // stats.updateTaxonomyResults(new MultipleHMKeys1(key),
stats.getCorpus().getTaxonomy()); // stats.getCorpus().getTaxonomy());
ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();
String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
// if last letter is ',' erase it
// if (key.equals("")){
// String test = key;
// }
if (stats.getFilter().getNotePunctuations())
key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
MultipleHMKeys multipleKeys;
// create MultipleHMKeys for different amount of other keys
switch (otherKeys.size()) {
case 0:
multipleKeys = new MultipleHMKeys1(key);
break;
case 1:
String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0));
if (stats.getFilter().getNotePunctuations())
k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
multipleKeys = new MultipleHMKeys2(key, k1_2);
break;
case 2:
String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0));
String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1));
if (stats.getFilter().getNotePunctuations()) {
k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
}
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
break;
case 3:
String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0));
String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1));
String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2));
if (stats.getFilter().getNotePunctuations()) {
k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
}
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
break;
case 4:
String k4_2 = wordToString(skipgramCandidate, otherKeys.get(0));
String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1));
String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2));
String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3));
if (stats.getFilter().getNotePunctuations()) {
k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
}
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
break;
default:
multipleKeys = null;
}
stats.updateTaxonomyResults(multipleKeys, taxonomy);
} }
} }
} }

View File

@ -22,6 +22,7 @@ public class Filter {
IS_CVV, IS_CVV,
STRING_LENGTH, STRING_LENGTH,
TAXONOMY, TAXONOMY,
DISPLAY_TAXONOMY,
MSD, MSD,
HAS_MSD, HAS_MSD,
SOLAR_FILTERS, SOLAR_FILTERS,
@ -102,6 +103,14 @@ public class Filter {
} }
} }
public void setDisplayTaxonomy(boolean displayTaxonomy) {
filter.put(DISPLAY_TAXONOMY, displayTaxonomy);
}
public boolean getDisplayTaxonomy() {
return (boolean) filter.get(DISPLAY_TAXONOMY);
}
public void setMsd(ArrayList<Pattern> msd) { public void setMsd(ArrayList<Pattern> msd) {
filter.put(MSD, msd); filter.put(MSD, msd);
if (!ValidationUtil.isEmpty(msd)) { if (!ValidationUtil.isEmpty(msd)) {

View File

@ -48,7 +48,7 @@ public class StatisticsNew {
this.taxonomyResult.put("Total", new ConcurrentHashMap<>()); this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
// create table for counting word occurances per taxonomies // create table for counting word occurances per taxonomies
if (this.corpus.getTaxonomy() != null) { if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
if (this.filter.getTaxonomy().isEmpty()) { if (this.filter.getTaxonomy().isEmpty()) {
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) { for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>()); this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>());

View File

@ -43,6 +43,10 @@ public class CharacterAnalysisTab {
private CheckComboBox<String> taxonomyCCB; private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy; private ArrayList<String> taxonomy;
@FXML
private CheckBox displayTaxonomyChB;
private boolean displayTaxonomy;
@FXML @FXML
private CheckBox calculatecvvCB; private CheckBox calculatecvvCB;
private boolean calculateCvv; private boolean calculateCvv;
@ -171,6 +175,14 @@ public class CharacterAnalysisTab {
taxonomyCCB.setDisable(true); taxonomyCCB.setDisable(true);
} }
displayTaxonomy = false;
// set
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
displayTaxonomy = newValue;
logger.info("display taxonomy: ", displayTaxonomy);
});
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
// cvv // cvv
calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> { calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
calculateCvv = newValue; calculateCvv = newValue;
@ -390,6 +402,7 @@ public class CharacterAnalysisTab {
filter.setCalculateFor(calculateFor); filter.setCalculateFor(calculateFor);
filter.setMsd(msd); filter.setMsd(msd);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0); filter.setSkipValue(0);
filter.setIsCvv(calculateCvv); filter.setIsCvv(calculateCvv);

View File

@ -46,9 +46,9 @@ public class CorpusTab {
private CheckBox readHeaderInfoChB; private CheckBox readHeaderInfoChB;
private boolean readHeaderInfo; private boolean readHeaderInfo;
@FXML // @FXML
private CheckBox gosUseOrthChB; // private CheckBox gosUseOrthChB;
private boolean gosUseOrth; // private boolean gosUseOrth;
@FXML @FXML
private Button chooseResultsLocationB; private Button chooseResultsLocationB;
@ -102,21 +102,21 @@ public class CorpusTab {
}); });
readHeaderInfoChB.setTooltip(new Tooltip(TOOLTIP_readHeaderInfoChB)); readHeaderInfoChB.setTooltip(new Tooltip(TOOLTIP_readHeaderInfoChB));
gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> { // gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
gosUseOrth = newValue; // gosUseOrth = newValue;
corpus.setGosOrthMode(gosUseOrth); // corpus.setGosOrthMode(gosUseOrth);
// wordFormationTab.setDisable(gosUseOrth); //// wordFormationTab.setDisable(gosUseOrth);
satNew2Controller.toggleMode(null); // satNew2Controller.toggleMode(null);
oneWordTabController.toggleMode(null); // oneWordTabController.toggleMode(null);
catController.toggleMode(null); // catController.toggleMode(null);
//
logger.info("gosUseOrth: ", gosUseOrth); // logger.info("gosUseOrth: ", gosUseOrth);
}); // });
chooseResultsLocationB.setOnAction(e -> chooseResultsLocation(null)); chooseResultsLocationB.setOnAction(e -> chooseResultsLocation(null));
// set labels and toggle visibility // set labels and toggle visibility
toggleGosChBVisibility(); // toggleGosChBVisibility();
chooseCorpusLabelContent = Messages.LABEL_CORPUS_LOCATION_NOT_SET; chooseCorpusLabelContent = Messages.LABEL_CORPUS_LOCATION_NOT_SET;
chooseCorpusL.setText(chooseCorpusLabelContent); chooseCorpusL.setText(chooseCorpusLabelContent);
@ -227,7 +227,7 @@ public class CorpusTab {
private void setResults() { private void setResults() {
// if everything is ok // if everything is ok
// check and enable checkbox if GOS // check and enable checkbox if GOS
toggleGosChBVisibility(); // toggleGosChBVisibility();
// set default results location // set default results location
String defaultResultsLocationPath = corpus.getChosenCorpusLocation().getAbsolutePath(); String defaultResultsLocationPath = corpus.getChosenCorpusLocation().getAbsolutePath();
@ -420,9 +420,9 @@ public class CorpusTab {
/** /**
* Hides GOS related checkbox until needed. * Hides GOS related checkbox until needed.
*/ */
private void toggleGosChBVisibility() { // private void toggleGosChBVisibility() {
gosUseOrthChB.setVisible(corpus != null && corpus.getCorpusType() != null && corpus.getCorpusType() == CorpusType.GOS); // gosUseOrthChB.setVisible(corpus != null && corpus.getCorpusType() != null && corpus.getCorpusType() == CorpusType.GOS);
} // }
private String detectCorpusType(Collection<File> corpusFiles, String corpusLocation) { private String detectCorpusType(Collection<File> corpusFiles, String corpusLocation) {
// check that we recognize this corpus // check that we recognize this corpus

View File

@ -55,6 +55,7 @@ public class Messages {
public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa."; public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.";
public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki."; public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.";
public static final String TOOLTIP_readNotePunctuationsChB = "Ločila med povedmi se upoštevajo v vsakem primeru."; public static final String TOOLTIP_readNotePunctuationsChB = "Ločila med povedmi se upoštevajo v vsakem primeru.";
public static final String TOOLTIP_readDisplayTaxonomyChB = "V izhodni datoteki bodo prikazane tudi statistike po taksonomijah.";

View File

@ -45,6 +45,10 @@ public class OneWordAnalysisTab {
private CheckComboBox<String> taxonomyCCB; private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy; private ArrayList<String> taxonomy;
@FXML
private CheckBox displayTaxonomyChB;
private boolean displayTaxonomy;
@FXML @FXML
private ComboBox<String> calculateForCB; private ComboBox<String> calculateForCB;
private CalculateFor calculateFor; private CalculateFor calculateFor;
@ -91,7 +95,7 @@ public class OneWordAnalysisTab {
private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "različnica"); private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
// TODO: pass observables for taxonomy based on header scan // TODO: pass observables for taxonomy based on header scan
@ -192,6 +196,14 @@ public class OneWordAnalysisTab {
taxonomyCCB.setDisable(true); taxonomyCCB.setDisable(true);
} }
displayTaxonomy = false;
// set
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
displayTaxonomy = newValue;
logger.info("display taxonomy: ", displayTaxonomy);
});
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
// set default values // set default values
minimalOccurrencesTF.setText("1"); minimalOccurrencesTF.setText("1");
minimalOccurrences = 1; minimalOccurrences = 1;
@ -369,6 +381,7 @@ public class OneWordAnalysisTab {
filter.setCalculateFor(calculateFor); filter.setCalculateFor(calculateFor);
filter.setMsd(msd); filter.setMsd(msd);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0); filter.setSkipValue(0);
filter.setIsCvv(false); filter.setIsCvv(false);

View File

@ -38,6 +38,10 @@ public class StringAnalysisTabNew2 {
private ArrayList<Pattern> msd; private ArrayList<Pattern> msd;
private ArrayList<String> msdStrings; private ArrayList<String> msdStrings;
@FXML
private CheckComboBox<String> alsoVisualizeCCB;
private ArrayList<String> alsoVisualize;
@FXML @FXML
private CheckComboBox<String> taxonomyCCB; private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy; private ArrayList<String> taxonomy;
@ -66,6 +70,10 @@ public class StringAnalysisTabNew2 {
private CheckBox notePunctuationsChB; private CheckBox notePunctuationsChB;
private boolean notePunctuations; private boolean notePunctuations;
@FXML
private CheckBox displayTaxonomyChB;
private boolean displayTaxonomy;
@FXML @FXML
private TextField minimalOccurrencesTF; private TextField minimalOccurrencesTF;
private Integer minimalOccurrences; private Integer minimalOccurrences;
@ -111,6 +119,12 @@ public class StringAnalysisTabNew2 {
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica"); // private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
// TODO: pass observables for taxonomy based on header scan // TODO: pass observables for taxonomy based on header scan
@ -163,9 +177,39 @@ public class StringAnalysisTabNew2 {
}); });
notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB)); notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB));
displayTaxonomy = false;
// set
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
displayTaxonomy = newValue;
logger.info("display taxonomy: ", displayTaxonomy);
});
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
// calculateForCB // calculateForCB
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> { calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
calculateFor = CalculateFor.factory(newValue); calculateFor = CalculateFor.factory(newValue);
alsoVisualizeCCB.getItems().removeAll();
if(newValue.equals("lema")){
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma);
} else if(newValue.equals("različnica")) {
if (corpus.getCorpusType() == CorpusType.GOS)
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWordGos);
else
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
} else if(newValue.equals("normalizirana različnica")) {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
}else {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
}
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
alsoVisualize.addAll(checkedItems);
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
});
alsoVisualizeCCB.getCheckModel().clearChecks();
logger.info("calculateForCB:", calculateFor.toString()); logger.info("calculateForCB:", calculateFor.toString());
}); });
@ -205,6 +249,16 @@ public class StringAnalysisTabNew2 {
msdTF.setText(""); msdTF.setText("");
msd = new ArrayList<>(); msd = new ArrayList<>();
alsoVisualizeCCB.getItems().removeAll();
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsLemma);
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
alsoVisualize.addAll(checkedItems);
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
});
alsoVisualizeCCB.getCheckModel().clearChecks();
// taxonomy // taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().removeAll();
@ -424,7 +478,11 @@ public class StringAnalysisTabNew2 {
// if (corpus.getCorpusType() == CorpusType.GOS) // if (corpus.getCorpusType() == CorpusType.GOS)
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS); // calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
// else // else
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS); if (corpus.getCorpusType() == CorpusType.GOS)
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
else
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
} else if (mode == MODE.LETTER) { } else if (mode == MODE.LETTER) {
paneWords.setVisible(false); paneWords.setVisible(false);
@ -462,11 +520,13 @@ public class StringAnalysisTabNew2 {
filter.setCalculateFor(calculateFor); filter.setCalculateFor(calculateFor);
filter.setMsd(msd); filter.setMsd(msd);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(skipValue); filter.setSkipValue(skipValue);
filter.setIsCvv(calculateCvv); filter.setIsCvv(calculateCvv);
filter.setSolarFilters(solarFiltersMap); filter.setSolarFilters(solarFiltersMap);
filter.setNotePunctuations(notePunctuations); filter.setNotePunctuations(notePunctuations);
filter.setMultipleKeys(alsoVisualize);
filter.setMinimalOccurrences(minimalOccurrences); filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy); filter.setMinimalTaxonomy(minimalTaxonomy);

View File

@ -27,17 +27,21 @@
<RadioButton fx:id="varietyRB" mnemonicParsing="false" text="različnica" toggleGroup="$calculateForRB" /> <RadioButton fx:id="varietyRB" mnemonicParsing="false" text="različnica" toggleGroup="$calculateForRB" />
</children> </children>
</HBox> </HBox>
<Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Omejitev podatkov" />
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Oznaka MSD" />
<TextField fx:id="msdTF" layoutX="185.0" layoutY="160.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Taksonomija" />
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Min. št. pojavitev" /> <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0" /> <CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. taksonomij" /> <Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" />
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" /> <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD" />
<TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija" />
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0"> <Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0">
<children> <children>
@ -45,7 +49,7 @@
</children> </children>
</Pane> </Pane>
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="422.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" /> <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
</Pane> </Pane>
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:" /> <Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:" />

View File

@ -19,7 +19,7 @@
<Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0"> <Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0">
<children> <children>
<Label fx:id="chooseCorpusL" prefHeight="50.0" prefWidth="704.0" text="Label"/> <Label fx:id="chooseCorpusL" prefHeight="50.0" prefWidth="704.0" text="Label"/>
<CheckBox fx:id="gosUseOrthChB" layoutY="65.0" mnemonicParsing="false" text="Uporabi pogovorni zapis"/> <!--<CheckBox fx:id="gosUseOrthChB" layoutY="65.0" mnemonicParsing="false" text="Uporabi pogovorni zapis"/>-->
</children> </children>
</Pane> </Pane>
<ProgressIndicator fx:id="locationScanPI" layoutX="10.0" layoutY="60.0" prefHeight="50.0" progress="0.0"/> <ProgressIndicator fx:id="locationScanPI" layoutX="10.0" layoutY="60.0" prefHeight="50.0" progress="0.0"/>

View File

@ -32,20 +32,24 @@
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" /> <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" />
<CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/> <CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
<CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
<!-- MSD and Taxonomy separated --> <!-- MSD and Taxonomy separated -->
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" />
<Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Omejitev podatkov" /> <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Oznaka MSD"/> <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="msdTF" layoutX="185.0" layoutY="160.0" prefWidth="180.0"/> <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Min. št. pojavitev" /> <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0" /> <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/> prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>

View File

@ -19,8 +19,8 @@
<Pane fx:id="paneWords"> <Pane fx:id="paneWords">
<children> <children>
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Preskok besed" /> <Label layoutX="10.0" layoutY="180.0" prefHeight="25.0" text="Preskok besed" />
<ComboBox fx:id="skipValueCB" layoutX="185.0" layoutY="100.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5"> <ComboBox fx:id="skipValueCB" layoutX="185.0" layoutY="180.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
<items> <items>
<FXCollections fx:factory="observableArrayList"> <FXCollections fx:factory="observableArrayList">
<String fx:value="0" /> <String fx:value="0" />
@ -37,17 +37,6 @@
</children> </children>
</Pane> </Pane>
<!-- for some reason following two ComboBoxes have to be below paneWords --> <!-- for some reason following two ComboBoxes have to be below paneWords -->
<ComboBox fx:id="ngramValueCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
<items>
<FXCollections fx:factory="observableArrayList">
<String fx:value="2" />
<String fx:value="3" />
<String fx:value="4" />
<String fx:value="5" />
</FXCollections>
</items>
</ComboBox>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za" /> <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za" />
<ComboBox fx:id="calculateForCB" layoutX="185.0" layoutY="20.0" minWidth="180.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5"> <ComboBox fx:id="calculateForCB" layoutX="185.0" layoutY="20.0" minWidth="180.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
<items> <items>
@ -61,34 +50,53 @@
</items> </items>
</ComboBox> </ComboBox>
<Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Upoštevaj ločila" /> <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" />
<CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="145.0" selected="true" /> <CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
<CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
<Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="N-gram nivo" />
<ComboBox fx:id="ngramValueCB" layoutX="185.0" layoutY="140.0" prefHeight="25.0" prefWidth="180.0" promptText="izberi" visibleRowCount="5">
<items>
<FXCollections fx:factory="observableArrayList">
<String fx:value="2" />
<String fx:value="3" />
<String fx:value="4" />
<String fx:value="5" />
</FXCollections>
</items>
</ComboBox>
<Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Upoštevaj ločila" />
<CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="225.0" selected="true" />
<!-- MSD and Taxonomy separated --> <!-- MSD and Taxonomy separated -->
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov" /> <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Omejitev podatkov" />
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD" /> <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Oznaka MSD" />
<TextField fx:id="msdTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0" /> <TextField fx:id="msdTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija" /> <Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Taksonomija" />
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0" /> <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="360.0" prefHeight="25.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="400.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="400.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="360.0" prefWidth="180.0" />
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="N-gram nivo" />
</Pane> </Pane>
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0"> <Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
<Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" /> <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. taksonomij" />
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" /> <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
<Label fx:id="solarFilters" layoutX="10.0" layoutY="100.0" text="Izbrani filtri:" />
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="300.0" prefWidth="275.0" text=" " wrapText="true" />
<!-- samoglasniki/soglasniki --> <!-- samoglasniki/soglasniki -->
<Pane fx:id="paneLetters"> <Pane fx:id="paneLetters">
<children> <children>