From 1c00f1a283bc1a5b553485d9d07379c1756e5013 Mon Sep 17 00:00:00 2001 From: Luka Date: Tue, 28 Aug 2018 11:41:19 +0200 Subject: [PATCH] Reimplementation of other signs (,/*() etc.) in ngrams. --- src/main/java/alg/XML_processing.java | 38 +++--- src/main/java/alg/ngram/Ngrams.java | 108 +++++++++--------- src/main/java/data/CalculateFor.java | 23 ++++ src/main/java/data/Filter.java | 11 ++ src/main/java/gui/OneWordAnalysisTab.java | 39 ++++++- src/main/java/gui/StringAnalysisTabNew2.java | 5 +- src/main/java/util/Export.java | 49 +++++++- .../resources/gui/OneWordAnalysisTab.fxml | 21 ++-- .../resources/gui/StringAnalysisTabNew2.fxml | 2 +- 9 files changed, 204 insertions(+), 92 deletions(-) diff --git a/src/main/java/alg/XML_processing.java b/src/main/java/alg/XML_processing.java index 44f2dc8..6f1386f 100755 --- a/src/main/java/alg/XML_processing.java +++ b/src/main/java/alg/XML_processing.java @@ -260,6 +260,12 @@ public class XML_processing { } else if (qName.equals("c3")) { String c3Content = eventReader.nextEvent().asCharacters().getData(); + if(stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && + stavek.size() > 0){ + stavek.add(new Word(c3Content, c3Content, "/")); + + } + if (c3Content.equals(".") && includeThisBlock) { // add sentence to corpus corpus.add(new Sentence(stavek, null)); @@ -277,9 +283,6 @@ public class XML_processing { corpus.clear(); } } - else if(includeThisBlock){ - inPunctuation = true; - } } else if (headTags.contains(qName)) { String tagContent = eventReader.nextEvent().asCharacters().getData(); headBlock.put(qName, tagContent); @@ -296,17 +299,7 @@ public class XML_processing { if (in_word) { stavek.add(new Word(characters.getData(), lemma, msd)); in_word = false; - } else if(inPunctuation){ - String punctuation = ","; - - if (stavek.size() > 0){ - stavek.get(stavek.size()-1).setWord(stavek.get(stavek.size()-1).getWord() + punctuation); - stavek.get(stavek.size()-1).setLemma(stavek.get(stavek.size()-1).getLemma() + punctuation); - stavek.get(stavek.size()-1).setMsd(stavek.get(stavek.size()-1).getMsd() + punctuation); - } - - inPunctuation = false; - } + } break; case XMLStreamConstants.END_ELEMENT: @@ -548,13 +541,16 @@ public class XML_processing { inWord = false; } if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) { -// String punctuation = characters.getData(); - String punctuation = ","; - - sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation); - sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation); - sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation); - inPunctuation = false; + String punctuation = characters.getData(); + sentence.add(new Word(punctuation, punctuation, "/")); + inPunctuation = false; + +// String punctuation = ","; +// +// sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation); +// sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation); +// sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation); +// inPunctuation = false; } break; diff --git a/src/main/java/alg/ngram/Ngrams.java b/src/main/java/alg/ngram/Ngrams.java index 20a61ca..c0c0155 100755 --- a/src/main/java/alg/ngram/Ngrams.java +++ b/src/main/java/alg/ngram/Ngrams.java @@ -56,8 +56,8 @@ public class Ngrams { // String test = key; // } - if (stats.getFilter().getNotePunctuations()) - key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key; +// if (stats.getFilter().getNotePunctuations()) +// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key; MultipleHMKeys multipleKeys; @@ -68,28 +68,28 @@ public class Ngrams { break; case 1: String k1_2 = wordToString(ngramCandidate, otherKeys.get(0)); - if (stats.getFilter().getNotePunctuations()) - k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2; +// if (stats.getFilter().getNotePunctuations()) +// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2; multipleKeys = new MultipleHMKeys2(key, k1_2); break; case 2: String k2_2 = wordToString(ngramCandidate, otherKeys.get(0)); String k2_3 = wordToString(ngramCandidate, otherKeys.get(1)); - if (stats.getFilter().getNotePunctuations()) { - k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2; - k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3; - } +// if (stats.getFilter().getNotePunctuations()) { +// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2; +// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3; +// } multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3); break; case 3: String k3_2 = wordToString(ngramCandidate, otherKeys.get(0)); String k3_3 = wordToString(ngramCandidate, otherKeys.get(1)); String k3_4 = wordToString(ngramCandidate, otherKeys.get(2)); - if (stats.getFilter().getNotePunctuations()) { - k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2; - k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3; - k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4; - } +// if (stats.getFilter().getNotePunctuations()) { +// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2; +// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3; +// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4; +// } multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4); break; case 4: @@ -97,12 +97,12 @@ public class Ngrams { String k4_3 = wordToString(ngramCandidate, otherKeys.get(1)); String k4_4 = wordToString(ngramCandidate, otherKeys.get(2)); String k4_5 = wordToString(ngramCandidate, otherKeys.get(3)); - if (stats.getFilter().getNotePunctuations()) { - k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2; - k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3; - k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4; - k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5; - } +// if (stats.getFilter().getNotePunctuations()) { +// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2; +// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3; +// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4; +// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5; +// } multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5); break; default: @@ -241,22 +241,22 @@ public class Ngrams { */ private static Word checkAndModifySkipgramPunctuation(List sentence, int i, int j, StatisticsNew stats){ // if punctuation checkbox selected and there words at indexes i and j are not next to each other - if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){ - boolean middleWordsHavePunctuation = false; - for (int n = i + 1; n < j; n++){ - if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){ - middleWordsHavePunctuation = true; - break; - } - } - if (middleWordsHavePunctuation){ - - String punctuation = ","; - return new Word(sentence.get(i).getWord() + punctuation, - sentence.get(i).getLemma() + punctuation, - sentence.get(i).getMsd() + punctuation); - } - } +// if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){ +// boolean middleWordsHavePunctuation = false; +// for (int n = i + 1; n < j; n++){ +// if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){ +// middleWordsHavePunctuation = true; +// break; +// } +// } +// if (middleWordsHavePunctuation){ +// +// String punctuation = ","; +// return new Word(sentence.get(i).getWord() + punctuation, +// sentence.get(i).getLemma() + punctuation, +// sentence.get(i).getMsd() + punctuation); +// } +// } return sentence.get(i); } @@ -348,8 +348,8 @@ public class Ngrams { // String test = key; // } - if (stats.getFilter().getNotePunctuations()) - key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key; +// if (stats.getFilter().getNotePunctuations()) +// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key; MultipleHMKeys multipleKeys; @@ -360,28 +360,28 @@ public class Ngrams { break; case 1: String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0)); - if (stats.getFilter().getNotePunctuations()) - k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2; +// if (stats.getFilter().getNotePunctuations()) +// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2; multipleKeys = new MultipleHMKeys2(key, k1_2); break; case 2: String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0)); String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1)); - if (stats.getFilter().getNotePunctuations()) { - k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2; - k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3; - } +// if (stats.getFilter().getNotePunctuations()) { +// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2; +// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3; +// } multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3); break; case 3: String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0)); String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1)); String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2)); - if (stats.getFilter().getNotePunctuations()) { - k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2; - k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3; - k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4; - } +// if (stats.getFilter().getNotePunctuations()) { +// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2; +// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3; +// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4; +// } multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4); break; case 4: @@ -389,12 +389,12 @@ public class Ngrams { String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1)); String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2)); String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3)); - if (stats.getFilter().getNotePunctuations()) { - k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2; - k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3; - k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4; - k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5; - } +// if (stats.getFilter().getNotePunctuations()) { +// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2; +// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3; +// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4; +// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5; +// } multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5); break; default: diff --git a/src/main/java/data/CalculateFor.java b/src/main/java/data/CalculateFor.java index 1dde1b2..5ff938e 100755 --- a/src/main/java/data/CalculateFor.java +++ b/src/main/java/data/CalculateFor.java @@ -45,6 +45,29 @@ public enum CalculateFor { return null; } + public String toMetadataString() { + switch(this){ + case WORD: + return "Skupna vsota vseh različnic:"; + case NORMALIZED_WORD: + return "Skupna vsota vseh normaliziranih različnic:"; + case LEMMA: + return "Skupna vsota vseh lem:"; + case MORPHOSYNTACTIC_SPECS: + return "Skupna vsota vseh oblikoskladenjskih oznak:"; + case MORPHOSYNTACTIC_PROPERTY: + return "Skupna vsota vseh oblikoskladenjskih lastnosti:"; + case WORD_TYPE: + return "Skupna vsota vseh besednih vrst:"; + case DIST_WORDS: + return "Skupna vsota vseh različnic:"; + case DIST_LEMMAS: + return "Skupna vsota vseh lem:"; + default: + return null; + } + } + public String toHeaderString() { switch(this){ case WORD: diff --git a/src/main/java/data/Filter.java b/src/main/java/data/Filter.java index ee87823..0f5ee59 100755 --- a/src/main/java/data/Filter.java +++ b/src/main/java/data/Filter.java @@ -25,6 +25,7 @@ public class Filter { DISPLAY_TAXONOMY, MSD, HAS_MSD, + WRITE_MSD_AT_THE_END, SOLAR_FILTERS, MULTIPLE_KEYS, NOTE_PUNCTUATIONS, @@ -34,6 +35,7 @@ public class Filter { public Filter() { filter = new HashMap<>(); + filter.put(WRITE_MSD_AT_THE_END, false); } public Filter(AnalysisLevel al, CalculateFor cf) { @@ -41,6 +43,7 @@ public class Filter { filter.put(ANALYSIS_LEVEL, al); filter.put(CALCULATE_FOR, cf); + filter.put(WRITE_MSD_AT_THE_END, false); } public void setAl(AnalysisLevel al) { @@ -124,6 +127,14 @@ public class Filter { return (ArrayList) filter.get(MSD); } + public void setWriteMsdAtTheEnd(boolean writeMsdAtTheEnd) { + filter.put(WRITE_MSD_AT_THE_END, writeMsdAtTheEnd); + } + + public boolean getWriteMsdAtTheEnd() { + return (boolean) filter.get(WRITE_MSD_AT_THE_END); + } + public void setHasMsd(boolean hasMsd) { filter.put(HAS_MSD, hasMsd); } diff --git a/src/main/java/gui/OneWordAnalysisTab.java b/src/main/java/gui/OneWordAnalysisTab.java index 8c40595..65c55fe 100755 --- a/src/main/java/gui/OneWordAnalysisTab.java +++ b/src/main/java/gui/OneWordAnalysisTab.java @@ -8,7 +8,6 @@ import javafx.collections.ObservableList; import javafx.concurrent.Task; import javafx.fxml.FXML; import javafx.scene.control.*; -import javafx.scene.layout.Pane; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -17,6 +16,7 @@ import org.controlsfx.control.CheckComboBox; import java.io.File; import java.io.UnsupportedEncodingException; import java.util.*; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.regex.Pattern; import static alg.XML_processing.readXML; @@ -49,6 +49,10 @@ public class OneWordAnalysisTab { private CheckBox displayTaxonomyChB; private boolean displayTaxonomy; + @FXML + private CheckBox writeMsdAtTheEndChB; + private boolean writeMsdAtTheEnd; + @FXML private ComboBox calculateForCB; private CalculateFor calculateFor; @@ -96,6 +100,7 @@ public class OneWordAnalysisTab { private static final ObservableList alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final ObservableList alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); + private static final ObservableList alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); // TODO: pass observables for taxonomy based on header scan @@ -107,6 +112,8 @@ public class OneWordAnalysisTab { currentMode = MODE.WORD; toggleMode(currentMode); + AtomicBoolean writeMsdAtTheEndEnableCalculateFor = new AtomicBoolean(false); + // calculateForCB calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> { calculateFor = CalculateFor.factory(newValue); @@ -121,9 +128,22 @@ public class OneWordAnalysisTab { alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord); } else if(newValue.equals("normalizirana različnica")) { alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord); + } else if(newValue.equals("oblikoskladenjska oznaka")) { + writeMsdAtTheEndEnableCalculateFor.set(true); + writeMsdAtTheEndChB.setDisable(false); + alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd); }else { + alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty); } + + if (!newValue.equals("oblikoskladenjska oznaka")){ + writeMsdAtTheEnd = false; + writeMsdAtTheEndChB.setSelected(false); + writeMsdAtTheEndChB.setDisable(true); + writeMsdAtTheEndEnableCalculateFor.set(false); + } + alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { alsoVisualize = new ArrayList<>(); ObservableList checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); @@ -177,6 +197,13 @@ public class OneWordAnalysisTab { alsoVisualize = new ArrayList<>(); ObservableList checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); alsoVisualize.addAll(checkedItems); + if (checkedItems.contains("oblikoskladenjska oznaka") || writeMsdAtTheEndEnableCalculateFor.get()){ + writeMsdAtTheEndChB.setDisable(false); + } else { + writeMsdAtTheEnd = false; + writeMsdAtTheEndChB.setSelected(false); + writeMsdAtTheEndChB.setDisable(true); + } logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); }); alsoVisualizeCCB.getCheckModel().clearChecks(); @@ -204,6 +231,15 @@ public class OneWordAnalysisTab { }); displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); + writeMsdAtTheEnd = false; + writeMsdAtTheEndChB.setDisable(true); + // set + writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> { + writeMsdAtTheEnd = newValue; + logger.info("write msd at the end: ", writeMsdAtTheEnd); + }); +// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); + // set default values minimalOccurrencesTF.setText("1"); minimalOccurrences = 1; @@ -390,6 +426,7 @@ public class OneWordAnalysisTab { filter.setMultipleKeys(alsoVisualize); filter.setMinimalOccurrences(minimalOccurrences); filter.setMinimalTaxonomy(minimalTaxonomy); + filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd); String message = Validation.validateForStringLevel(filter); if (message == null) { diff --git a/src/main/java/gui/StringAnalysisTabNew2.java b/src/main/java/gui/StringAnalysisTabNew2.java index 2c0445e..6bcf801 100755 --- a/src/main/java/gui/StringAnalysisTabNew2.java +++ b/src/main/java/gui/StringAnalysisTabNew2.java @@ -124,6 +124,7 @@ public class StringAnalysisTabNew2 { private static final ObservableList alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final ObservableList alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); + private static final ObservableList alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); @@ -169,7 +170,7 @@ public class StringAnalysisTabNew2 { minimalTaxonomyTF.setText("1"); minimalTaxonomy = 1; - notePunctuations = true; + notePunctuations = false; // set notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> { notePunctuations = newValue; @@ -199,6 +200,8 @@ public class StringAnalysisTabNew2 { alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord); } else if(newValue.equals("normalizirana različnica")) { alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord); + }else if(newValue.equals("oblikoskladenjska oznaka")) { + alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd); }else { alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty); } diff --git a/src/main/java/util/Export.java b/src/main/java/util/Export.java index 704ba9a..0b9f812 100755 --- a/src/main/java/util/Export.java +++ b/src/main/java/util/Export.java @@ -90,6 +90,7 @@ public class Export { FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString()); if (filter.getCalculateFor().equals(CalculateFor.LEMMA)) FILE_HEADER_AL.add("Lema male črke"); + headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies)); // if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) { // if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) { @@ -161,7 +162,7 @@ public class Export { // } else { // FILE_HEADER_AL.add("Delež glede na vse leme"); // } - FILE_HEADER_AL.add("Skupna relativna pogostost"); + FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)"); for (String key : taxonomyResults.keySet()) { if(!key.equals("Total")) { FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]"); @@ -213,8 +214,7 @@ public class Export { for (Map.Entry e : map.entrySet()) { List dataEntry = new ArrayList<>(); dataEntry.add(e.getKey().getK1()); - if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi")) && - headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")){ + if (filter.getCalculateFor().equals(CalculateFor.LEMMA)){ dataEntry.add(e.getKey().getK1().toLowerCase()); } @@ -255,16 +255,55 @@ public class Export { // } dataEntry.add(e.getValue().toString()); dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies)); - dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies)); + dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies)); for (String key : taxonomyResults.keySet()){ if(!key.equals("Total")) { AtomicLong frequency = taxonomyResults.get(key).get(e.getKey()); dataEntry.add(frequency.toString()); dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key))); - dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key))); + dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key))); } } + // Write msd separated per letters at the end of each line in csv + if (filter.getWriteMsdAtTheEnd()) { + String msd = ""; + if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){ + msd = e.getKey().getK1(); + } else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) { + i = 0; + for (CalculateFor otherKey : filter.getMultipleKeys()){ + switch(i){ + case 0: + if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){ + msd = e.getKey().getK2(); + } + break; + case 1: + if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){ + msd = e.getKey().getK3(); + } + break; + case 2: + if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){ + msd = e.getKey().getK4(); + } + break; + case 3: + if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){ + msd = e.getKey().getK5(); + } + break; + } + + i++; + } + } + String [] charArray = msd.split("(?!^)"); + dataEntry.addAll(Arrays.asList(charArray)); + + } + csvFilePrinter.printRecord(dataEntry); } } catch (Exception e) { diff --git a/src/main/resources/gui/OneWordAnalysisTab.fxml b/src/main/resources/gui/OneWordAnalysisTab.fxml index 2d396ae..34ec208 100755 --- a/src/main/resources/gui/OneWordAnalysisTab.fxml +++ b/src/main/resources/gui/OneWordAnalysisTab.fxml @@ -37,19 +37,22 @@