From 10666b44536b3030cb43ecc2f4f5fdb6f814e478 Mon Sep 17 00:00:00 2001 From: Luka Date: Tue, 19 Mar 2019 14:06:39 +0100 Subject: [PATCH] Added translataion fixes + punctuation in export fix --- src/main/java/alg/XML_processing.java | 4 +- src/main/java/alg/ngram/Ngrams.java | 10 + src/main/java/data/CalculateFor.java | 20 + src/main/java/data/Corpus.java | 11 + src/main/java/data/Filter.java | 4 + src/main/java/data/StatisticsNew.java | 49 +- src/main/java/data/Word.java | 10 +- src/main/java/gui/CharacterAnalysisTab.java | 4 +- src/main/java/gui/CorpusTab.java | 40 +- src/main/java/gui/I18N.java | 24 + src/main/java/gui/OneWordAnalysisTab.java | 8 +- src/main/java/gui/StringAnalysisTabNew2.java | 8 +- src/main/java/gui/WordFormationTab.java | 520 +++++++++---------- src/main/java/gui/WordLevelTab.java | 8 +- src/main/java/util/Export.java | 213 ++++---- src/main/java/util/Util.java | 25 +- src/main/resources/gui/CorpusTab.fxml | 7 + src/main/resources/message_en.properties | 21 +- src/main/resources/message_sl.properties | 21 +- 19 files changed, 586 insertions(+), 421 deletions(-) diff --git a/src/main/java/alg/XML_processing.java b/src/main/java/alg/XML_processing.java index ffd14f8..dad180c 100755 --- a/src/main/java/alg/XML_processing.java +++ b/src/main/java/alg/XML_processing.java @@ -1744,7 +1744,7 @@ public class XML_processing { // if we're calculating values for letters, omit words that are shorter than string length if (filter.getNgramValue() == 0) { - sentence.removeIf(w -> (filter.getCalculateFor() == CalculateFor.WORD && w.getWord(filter.getWordParts()).length() < filter.getStringLength()) + sentence.removeIf(w -> ((filter.getCalculateFor() == CalculateFor.WORD || filter.getCalculateFor() == CalculateFor.LOWERCASE_WORD) && w.getWord(filter.getWordParts()).length() < filter.getStringLength()) || (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma(filter.getWordParts()).length() < filter.getStringLength())); } } @@ -1766,7 +1766,7 @@ public class XML_processing { public static Word createWord(String word, String lemma, String msd, String normalizedWord, Filter f){ List wString = new ArrayList<>(); - if (f.getWordParts().contains(CalculateFor.WORD)) + if (f.getWordParts().contains(CalculateFor.WORD) || f.getWordParts().contains(CalculateFor.LOWERCASE_WORD)) wString.add(word); if (f.getWordParts().contains(CalculateFor.LEMMA)) wString.add(lemma); diff --git a/src/main/java/alg/ngram/Ngrams.java b/src/main/java/alg/ngram/Ngrams.java index e93dcf9..44658dd 100755 --- a/src/main/java/alg/ngram/Ngrams.java +++ b/src/main/java/alg/ngram/Ngrams.java @@ -222,6 +222,12 @@ public class Ngrams { .map(w -> w.getLemma(wordParts)) .collect(Collectors.toList())); return StringUtils.join(candidate, " "); + case LOWERCASE_WORD: + candidate.addAll(ngramCandidate + .stream() + .map(w -> w.getWord(wordParts).toLowerCase()) + .collect(Collectors.toList())); + return StringUtils.join(candidate, " "); case WORD: candidate.addAll(ngramCandidate .stream() @@ -298,6 +304,10 @@ public class Ngrams { continue; } + if(stats.getFilter().getCalculateFor().equals(CalculateFor.LOWERCASE_WORD)){ + word = word.toLowerCase(); + } + for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) { // TODO: locila? diff --git a/src/main/java/data/CalculateFor.java b/src/main/java/data/CalculateFor.java index 3401535..baa1327 100755 --- a/src/main/java/data/CalculateFor.java +++ b/src/main/java/data/CalculateFor.java @@ -22,6 +22,7 @@ public enum CalculateFor { // DIST_LEMMAS("lema"); WORD("calculateFor.WORD"), + LOWERCASE_WORD("calculateFor.LOWERCASE_WORD"), NORMALIZED_WORD("calculateFor.NORMALIZED_WORD"), LEMMA("calculateFor.LEMMA"), MORPHOSYNTACTIC_SPECS("calculateFor.MORPHOSYNTACTIC_SPECS"), @@ -47,6 +48,9 @@ public enum CalculateFor { if (WORD.toString().equals(cf)) { return WORD; } + if (LOWERCASE_WORD.toString().equals(cf)) { + return LOWERCASE_WORD; + } if (LEMMA.toString().equals(cf)) { return LEMMA; } @@ -71,6 +75,8 @@ public enum CalculateFor { switch (this) { case WORD: return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.word"); + case LOWERCASE_WORD: + return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.lowercaseWord"); case NORMALIZED_WORD: return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.normalizedWord"); case LEMMA: @@ -92,6 +98,8 @@ public enum CalculateFor { switch (this) { case WORD: return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.word"); + case LOWERCASE_WORD: + return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.lowercaseWord"); case NORMALIZED_WORD: return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.normalizedWord"); case LEMMA: @@ -118,6 +126,8 @@ public enum CalculateFor { switch (this) { case WORD: return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.word"); + case LOWERCASE_WORD: + return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.lowercaseWord"); case NORMALIZED_WORD: return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.normalizedWord"); case LEMMA: @@ -139,6 +149,8 @@ public enum CalculateFor { switch (this) { case WORD: return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.word"); + case LOWERCASE_WORD: + return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.lowercaseWord"); case NORMALIZED_WORD: return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.normalizedWord"); case LEMMA: @@ -168,6 +180,8 @@ public enum CalculateFor { switch(this){ case WORD: return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.word2"); + case LOWERCASE_WORD: + return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.lowercaseWord2"); case NORMALIZED_WORD: return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.normalizedWord2"); case LEMMA: @@ -194,6 +208,8 @@ public enum CalculateFor { switch(this){ case WORD: return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.word2"); + case LOWERCASE_WORD: + return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.lowercaseWord2"); case NORMALIZED_WORD: return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.normalizedWord2"); case LEMMA: @@ -221,6 +237,8 @@ public enum CalculateFor { case WORD: case DIST_WORDS: return I18N.get("exportTable.part.word3"); + case LOWERCASE_WORD: + return I18N.get("exportTable.part.lowercaseWord3"); case NORMALIZED_WORD: return I18N.get("exportTable.part.normalizedWord3"); case LEMMA: @@ -240,6 +258,8 @@ public enum CalculateFor { case WORD: case DIST_WORDS: return I18N.get("exportTable.part.word3") + " " + I18N.get("exportTable.part.set"); + case LOWERCASE_WORD: + return I18N.get("exportTable.part.lowercaseWord3") + " " + I18N.get("exportTable.part.set"); case NORMALIZED_WORD: return I18N.get("exportTable.part.normalizedWord3") + " " + I18N.get("exportTable.part.set"); case LEMMA: diff --git a/src/main/java/data/Corpus.java b/src/main/java/data/Corpus.java index 3a3ff18..6b1e9b4 100755 --- a/src/main/java/data/Corpus.java +++ b/src/main/java/data/Corpus.java @@ -36,6 +36,7 @@ public class Corpus { boolean hasMsdData; private ArrayList validationErrors; private String corpusName = ""; + private String punctuation = "punctuation.COMMA"; public Corpus() { validationErrors = new ArrayList<>(); @@ -52,6 +53,16 @@ public class Corpus { logger.info("Corpus.set: ", corpusName); } + public String getPunctuation() { + return punctuation; + } + + public void setPunctuation(String punctuation) { +// System.out.println(corpusName); + this.punctuation = punctuation; + logger.info("Punctuation.set: ", punctuation); + } + public CorpusType getCorpusType() { return corpusType; } diff --git a/src/main/java/data/Filter.java b/src/main/java/data/Filter.java index f8f6128..90eb0f0 100755 --- a/src/main/java/data/Filter.java +++ b/src/main/java/data/Filter.java @@ -320,6 +320,10 @@ public class Filter implements Cloneable { ArrayList oldWp = ((ArrayList) filter.get(WORD_PARTS)); switch (wp) { + case LOWERCASE_WORD: + if (!oldWp.contains(CalculateFor.LOWERCASE_WORD)) + oldWp.add(CalculateFor.LOWERCASE_WORD); + break; case WORD: case DIST_WORDS: if (!oldWp.contains(CalculateFor.WORD)) diff --git a/src/main/java/data/StatisticsNew.java b/src/main/java/data/StatisticsNew.java index 215e2ba..b670f8f 100755 --- a/src/main/java/data/StatisticsNew.java +++ b/src/main/java/data/StatisticsNew.java @@ -324,28 +324,28 @@ public class StatisticsNew { return true; } - public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException { - filter.setAl(AnalysisLevel.WORD_FORMATION); - resultTitle = generateResultTitle(); - - if (useDB) { - result = db.getDump(); - db.delete(); - } - - // if no results and nothing to save, return false - if (!(result.size() > 0)) { - analysisProducedResults = false; - return false; - } else { - analysisProducedResults = true; - } - - WordFormation.calculateStatistics(this); - - Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock()); - return true; - } +// public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException { +// filter.setAl(AnalysisLevel.WORD_FORMATION); +// resultTitle = generateResultTitle(); +// +// if (useDB) { +// result = db.getDump(); +// db.delete(); +// } +// +// // if no results and nothing to save, return false +// if (!(result.size() > 0)) { +// analysisProducedResults = false; +// return false; +// } else { +// analysisProducedResults = true; +// } +// +// WordFormation.calculateStatistics(this); +// +// Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock()); +// return true; +// } private Map> sortNestedMap(Map> nestedMap, int limit) { Map> sorted = new HashMap<>(); @@ -682,6 +682,11 @@ public class StatisticsNew { info.put(I18N.get("exportHeader.minOccurrences"), String.valueOf(filter.getMinimalOccurrences())); info.put(I18N.get("exportHeader.minTaxonomies"), String.valueOf(filter.getMinimalTaxonomy())); + // if not letters extraction + if(filter.getNgramValue() > 0) { + info.put(I18N.get("exportHeader.minRelFre"), String.valueOf(filter.getMinimalRelFre())); + } + if (corpus.getCorpusType() == CorpusType.SOLAR) { HashMap> filters = corpus.getSolarSelectedFilters(); diff --git a/src/main/java/data/Word.java b/src/main/java/data/Word.java index 1dc6ba7..94e1ce0 100755 --- a/src/main/java/data/Word.java +++ b/src/main/java/data/Word.java @@ -28,7 +28,11 @@ public interface Word { } default String getWord(ArrayList wordParts){ - return get(wordParts, CalculateFor.WORD); + String w = get(wordParts, CalculateFor.WORD); + if (w == null){ + return get(wordParts, CalculateFor.LOWERCASE_WORD); + } + return w; } default String getLemma(ArrayList wordParts){ @@ -102,9 +106,9 @@ public interface Word { String returnValue = ""; if (cvv) { - returnValue = calculateFor == CalculateFor.WORD ? getCVVWord(cf) : getCVVLemma(cf); + returnValue = (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD) ? getCVVWord(cf) : getCVVLemma(cf); } else { - returnValue = calculateFor == CalculateFor.WORD ? getWord(cf) : getLemma(cf); + returnValue = (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD) ? getWord(cf) : getLemma(cf); } return returnValue; diff --git a/src/main/java/gui/CharacterAnalysisTab.java b/src/main/java/gui/CharacterAnalysisTab.java index 1cd3e80..247a7b2 100755 --- a/src/main/java/gui/CharacterAnalysisTab.java +++ b/src/main/java/gui/CharacterAnalysisTab.java @@ -179,7 +179,7 @@ public class CharacterAnalysisTab { private ChangeListener minimalOccurrencesListener; private ChangeListener minimalTaxonomyListener; - private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA"}; + private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"}; private static final ArrayList N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY)); private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"}; @@ -623,7 +623,7 @@ public class CharacterAnalysisTab { } // if calculateFor was selected for something other than a word or a lemma -> reset - if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) { + if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA || calculateFor == CalculateFor.LOWERCASE_WORD)) { // if the user selected something else before selecting ngram for letters, reset that choice calculateFor = CalculateFor.WORD; diff --git a/src/main/java/gui/CorpusTab.java b/src/main/java/gui/CorpusTab.java index 45df198..7102018 100755 --- a/src/main/java/gui/CorpusTab.java +++ b/src/main/java/gui/CorpusTab.java @@ -92,6 +92,9 @@ public class CorpusTab { @FXML public Label outputNameL; + @FXML + public Label punctuationL; + @FXML public ImageView chooseCorpusLocationI; @@ -107,6 +110,9 @@ public class CorpusTab { @FXML public ImageView outputNameI; + @FXML + public ImageView punctuationI; + @FXML public TextField outputNameTF; public String outputName = ""; @@ -115,6 +121,10 @@ public class CorpusTab { public ComboBox selectReaderCB; public String selectReader; + @FXML + public ComboBox punctuationCB; + public String punctuation; + @FXML private ProgressIndicator locationScanPI; @@ -137,7 +147,7 @@ public class CorpusTab { private OneWordAnalysisTab oneWordTabController; private CharacterAnalysisTab catController; private FiltersForSolar ffsController; - private WordFormationTab wfController; +// private WordFormationTab wfController; private WordLevelTab wlController; private HostServices hostService; @@ -146,6 +156,10 @@ public class CorpusTab { private static final String [] SELECT_READER_ARRAY = {"VERT + REGI", "XML (Šolar 1.0)", "XML (GOS 1.0)", "XML (ssj500k 2.1)", "XML (Gigafida 2.0)", "XML (Gigafida 1.0, Kres 1.0)"}; private static final ArrayList SELECT_READER = new ArrayList<>(Arrays.asList(SELECT_READER_ARRAY)); + + private static final String [] PUNCTUATION_ARRAY = {"punctuation.COMMA", "punctuation.POINT"}; + private static final ArrayList PUNCTUATION = new ArrayList<>(Arrays.asList(PUNCTUATION_ARRAY)); + private Collection corpusFiles; private File selectedDirectory; @@ -219,6 +233,23 @@ public class CorpusTab { selectReaderCB.getSelectionModel().select(0); + // comma / point choice + punctuationCB.valueProperty().addListener((observable, oldValue, newValue) -> { + if(newValue == null){ +// newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS); + newValue = I18N.getTranslatedValue(oldValue, PUNCTUATION); + punctuationCB.getSelectionModel().select(newValue); + } +// System.out.println(oldValue); +// System.out.println(newValue); + punctuation = newValue; + if(corpus != null) { + corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION)); + } + }); + + punctuationCB.getSelectionModel().select(0); + // add listeners chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation()); // chooseCorpusLocationB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_chooseCorpusLocationB"))); @@ -301,12 +332,16 @@ public class CorpusTab { readHeaderInfoL.textProperty().bind(I18N.createStringBinding("label.readHeaderInfo")); selectReaderL.textProperty().bind(I18N.createStringBinding("label.selectReader")); outputNameL.textProperty().bind(I18N.createStringBinding("label.outputName")); + punctuationL.textProperty().bind(I18N.createStringBinding("label.punctuation")); addTooltipToImage(chooseCorpusLocationI, I18N.createStringBinding("label.corpusTab.chooseCorpusLocationH")); addTooltipToImage(readHeaderInfoI, I18N.createStringBinding("label.corpusTab.readHeaderInfoH")); addTooltipToImage(chooseResultsLocationI, I18N.createStringBinding("label.corpusTab.chooseResultsLocationH")); addTooltipToImage(selectReaderI, I18N.createStringBinding("label.corpusTab.selectReaderH")); addTooltipToImage(outputNameI, I18N.createStringBinding("label.corpusTab.outputNameH")); + addTooltipToImage(punctuationI, I18N.createStringBinding("label.corpusTab.punctuationH")); + + punctuationCB.itemsProperty().bind(I18N.createObjectBinding(PUNCTUATION)); } private void togglePiAndSetCorpusWrapper(boolean piIsActive) { @@ -456,8 +491,9 @@ public class CorpusTab { } } } - System.out.println(outputName); +// System.out.println(outputName); corpus.setCorpusName(outputName); + corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION)); } /** diff --git a/src/main/java/gui/I18N.java b/src/main/java/gui/I18N.java index 51c01e0..9a6494f 100644 --- a/src/main/java/gui/I18N.java +++ b/src/main/java/gui/I18N.java @@ -167,6 +167,30 @@ public final class I18N { // return MessageFormat.format(bundle.getString(key), args); } + public static String getRootValue(String oldValue, ArrayList nGramComputeForLetters) { + Locale loc; + if(getLocale().equals(Locale.ENGLISH)) { + loc = new Locale.Builder().setLanguage("sl").setRegion("SI").build(); + } else { + loc = Locale.ENGLISH; + } + + for (String el : nGramComputeForLetters){ + if (oldValue.equals(getIndependent(el, loc))){ + return el; + } + } + + // in case translated language doesn't contain specified word, try original language + for (String el : nGramComputeForLetters){ + if (oldValue.equals(get(el))){ + return el; + } + } + + return null; + } + public static String getTranslatedValue(String oldValue, ArrayList nGramComputeForLetters) { Locale loc; if(getLocale().equals(Locale.ENGLISH)) { diff --git a/src/main/java/gui/OneWordAnalysisTab.java b/src/main/java/gui/OneWordAnalysisTab.java index 84306bb..7ceb444 100755 --- a/src/main/java/gui/OneWordAnalysisTab.java +++ b/src/main/java/gui/OneWordAnalysisTab.java @@ -210,13 +210,13 @@ public class OneWordAnalysisTab { // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica"); // private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); - private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"}; + private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY)); // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); - private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"}; + private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY)); // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica"); - private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; + private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY)); // private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; @@ -300,7 +300,7 @@ public class OneWordAnalysisTab { alsoVisualizeCCB.getItems().removeAll(); if (newValue.equals(CalculateFor.LEMMA.toString())) { alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA)); - } else if (newValue.equals(CalculateFor.WORD.toString())) { + } else if (newValue.equals(CalculateFor.WORD.toString()) || newValue.equals(CalculateFor.LOWERCASE_WORD.toString())) { if (corpus.getCorpusType() == CorpusType.GOS) alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS)); else diff --git a/src/main/java/gui/StringAnalysisTabNew2.java b/src/main/java/gui/StringAnalysisTabNew2.java index 2c2152e..4f4cbbe 100755 --- a/src/main/java/gui/StringAnalysisTabNew2.java +++ b/src/main/java/gui/StringAnalysisTabNew2.java @@ -250,13 +250,13 @@ public class StringAnalysisTabNew2 { // private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); // private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); - private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"}; + private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY)); // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); - private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"}; + private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY)); // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica"); - private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; + private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY)); // private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; @@ -408,7 +408,7 @@ public class StringAnalysisTabNew2 { alsoVisualizeCCB.getItems().removeAll(); if (newValue.equals(CalculateFor.LEMMA.toString())) { alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA)); - } else if (newValue.equals(CalculateFor.WORD.toString())) { + } else if (newValue.equals(CalculateFor.WORD.toString()) || newValue.equals(CalculateFor.LOWERCASE_WORD.toString())) { if (corpus.getCorpusType() == CorpusType.GOS) alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS)); else diff --git a/src/main/java/gui/WordFormationTab.java b/src/main/java/gui/WordFormationTab.java index 4ea87a5..bb3e62b 100755 --- a/src/main/java/gui/WordFormationTab.java +++ b/src/main/java/gui/WordFormationTab.java @@ -1,260 +1,260 @@ -package gui; - -import static alg.XML_processing.*; -import static gui.GUIController.*; - -import java.io.File; -import java.io.UnsupportedEncodingException; -import java.util.*; - -import javafx.application.HostServices; -import javafx.scene.control.*; -import org.apache.commons.lang3.StringUtils; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.controlsfx.control.CheckComboBox; - -import data.*; -import javafx.collections.ListChangeListener; -import javafx.collections.ObservableList; -import javafx.concurrent.Task; -import javafx.fxml.FXML; -import javafx.scene.layout.AnchorPane; - -@SuppressWarnings("Duplicates") -public class WordFormationTab { - public final static Logger logger = LogManager.getLogger(WordFormationTab.class); - - public AnchorPane wordAnalysisTabPane; - - @FXML - public Label selectedFiltersLabel; - @FXML - public Label solarFilters; - - @FXML - private CheckComboBox taxonomyCCB; - private ArrayList taxonomy; - - @FXML - private TextField minimalOccurrencesTF; - private Integer minimalOccurrences; - - @FXML - private TextField minimalTaxonomyTF; - private Integer minimalTaxonomy; - - @FXML - private Button computeB; - - @FXML - public ProgressBar ngramProgressBar; - @FXML - public Label progressLabel; - - @FXML - private Hyperlink helpH; - - private Corpus corpus; - private HashMap> solarFiltersMap; - private HostServices hostService; - - // after header scan - private ObservableList taxonomyCCBValues; - private CorpusType currentCorpusType; - private boolean useDb; - - - public void init() { - // taxonomy - if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { - taxonomyCCB.getItems().removeAll(); - taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy()); - taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { - taxonomy = new ArrayList<>(); - ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); - ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus); - taxonomy.addAll(checkedItemsTaxonomy); - logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); - }); - taxonomyCCB.getCheckModel().clearChecks(); - } else { - taxonomyCCB.setDisable(true); - } - - // set default values - minimalOccurrencesTF.setText("1"); - minimalOccurrences = 1; - - minimalTaxonomyTF.setText("1"); - minimalTaxonomy = 1; - - minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> { - if (!newValue) { - // focus lost - String value = minimalOccurrencesTF.getText(); - if (!ValidationUtil.isEmpty(value)) { - if (!ValidationUtil.isNumber(value)) { - logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); - GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); - } else { - minimalOccurrences = Integer.parseInt(value); - } - } else { - minimalOccurrencesTF.setText("1"); - minimalOccurrences = 1; - } - } - }); - - minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> { - if (!newValue) { - // focus lost - String value = minimalTaxonomyTF.getText(); - if (!ValidationUtil.isEmpty(value)) { - if (!ValidationUtil.isNumber(value)) { - logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); - GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); - } else { - minimalTaxonomy = Integer.parseInt(value); - } - } else { - minimalTaxonomyTF.setText("1"); - minimalTaxonomy = 1; - } - } - }); - - computeB.setOnAction(e -> { - compute(); - logger.info("compute button"); - }); - - helpH.setOnAction(e -> openHelpWebsite()); - } - - private void compute() { - Filter filter = new Filter(); - filter.setNgramValue(1); - filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY); - filter.setTaxonomy(taxonomy); - filter.setAl(AnalysisLevel.STRING_LEVEL); - filter.setSkipValue(0); - filter.setMsd(new ArrayList<>()); - filter.setIsCvv(false); - filter.setSolarFilters(solarFiltersMap); - filter.setMinimalOccurrences(minimalOccurrences); - filter.setMinimalTaxonomy(minimalTaxonomy); - - String message = Validation.validateForStringLevel(filter); - if (message == null) { - // no errors - logger.info("Executing: ", filter.toString()); - StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb); - execute(statistic); - } else { - logAlert(message); - showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message); - } - } - - private void openHelpWebsite(){ - hostService.showDocument(Messages.HELP_URL); - } - - private void execute(StatisticsNew statistic) { - logger.info("Started execution: ", statistic.getFilter()); - - Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); - - final Task task = new Task() { - @SuppressWarnings("Duplicates") - @Override - protected Void call() throws Exception { - int i = 0; - Date startTime = new Date(); - Date previousTime = new Date(); - for (File f : corpusFiles) { - readXML(f.toString(), statistic); - i++; - this.updateProgress(i, corpusFiles.size()); - this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); - } - - return null; - } - }; - - ngramProgressBar.progressProperty().bind(task.progressProperty()); - progressLabel.textProperty().bind(task.messageProperty()); - - task.setOnSucceeded(e -> { - try { - // first, we have to recalculate all occurrences to detailed statistics - boolean successullySaved = statistic.recalculateAndSaveResultToDisk(); - - if (successullySaved) { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); - } else { - showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); - } - } catch (UnsupportedEncodingException e1) { - showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); - logger.error("Error while saving", e1); - } - - ngramProgressBar.progressProperty().unbind(); - ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - }); - - task.setOnFailed(e -> { - showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); - logger.error("Error while executing", e); - ngramProgressBar.progressProperty().unbind(); - ngramProgressBar.setProgress(0.0); - ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); - progressLabel.textProperty().unbind(); - progressLabel.setText(""); - }); - - final Thread thread = new Thread(task, "task"); - thread.setDaemon(true); - thread.start(); - } - - private void logAlert(String alert) { - logger.info("alert: " + alert); - } - - - public void setCorpus(Corpus corpus) { - this.corpus = corpus; - - if (corpus.getCorpusType() != CorpusType.SOLAR) { - setSelectedFiltersLabel(null); - } else { - setSelectedFiltersLabel("/"); - } - } - - public void setSelectedFiltersLabel(String content) { - if (content != null) { - solarFilters.setVisible(true); - selectedFiltersLabel.setVisible(true); - selectedFiltersLabel.setText(content); - } else { - solarFilters.setVisible(false); - selectedFiltersLabel.setVisible(false); - } - } - - public void setSolarFiltersMap(HashMap> solarFiltersMap) { - this.solarFiltersMap = solarFiltersMap; - } - - public void setHostServices(HostServices hostServices){ - this.hostService = hostServices; - } -} +//package gui; +// +//import static alg.XML_processing.*; +//import static gui.GUIController.*; +// +//import java.io.File; +//import java.io.UnsupportedEncodingException; +//import java.util.*; +// +//import javafx.application.HostServices; +//import javafx.scene.control.*; +//import org.apache.commons.lang3.StringUtils; +//import org.apache.logging.log4j.LogManager; +//import org.apache.logging.log4j.Logger; +//import org.controlsfx.control.CheckComboBox; +// +//import data.*; +//import javafx.collections.ListChangeListener; +//import javafx.collections.ObservableList; +//import javafx.concurrent.Task; +//import javafx.fxml.FXML; +//import javafx.scene.layout.AnchorPane; +// +//@SuppressWarnings("Duplicates") +//public class WordFormationTab { +// public final static Logger logger = LogManager.getLogger(WordFormationTab.class); +// +// public AnchorPane wordAnalysisTabPane; +// +// @FXML +// public Label selectedFiltersLabel; +// @FXML +// public Label solarFilters; +// +// @FXML +// private CheckComboBox taxonomyCCB; +// private ArrayList taxonomy; +// +// @FXML +// private TextField minimalOccurrencesTF; +// private Integer minimalOccurrences; +// +// @FXML +// private TextField minimalTaxonomyTF; +// private Integer minimalTaxonomy; +// +// @FXML +// private Button computeB; +// +// @FXML +// public ProgressBar ngramProgressBar; +// @FXML +// public Label progressLabel; +// +// @FXML +// private Hyperlink helpH; +// +// private Corpus corpus; +// private HashMap> solarFiltersMap; +// private HostServices hostService; +// +// // after header scan +// private ObservableList taxonomyCCBValues; +// private CorpusType currentCorpusType; +// private boolean useDb; +// +// +// public void init() { +// // taxonomy +// if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { +// taxonomyCCB.getItems().removeAll(); +// taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy()); +// taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { +// taxonomy = new ArrayList<>(); +// ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); +// ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus); +// taxonomy.addAll(checkedItemsTaxonomy); +// logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); +// }); +// taxonomyCCB.getCheckModel().clearChecks(); +// } else { +// taxonomyCCB.setDisable(true); +// } +// +// // set default values +// minimalOccurrencesTF.setText("1"); +// minimalOccurrences = 1; +// +// minimalTaxonomyTF.setText("1"); +// minimalTaxonomy = 1; +// +// minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> { +// if (!newValue) { +// // focus lost +// String value = minimalOccurrencesTF.getText(); +// if (!ValidationUtil.isEmpty(value)) { +// if (!ValidationUtil.isNumber(value)) { +// logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); +// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); +// } else { +// minimalOccurrences = Integer.parseInt(value); +// } +// } else { +// minimalOccurrencesTF.setText("1"); +// minimalOccurrences = 1; +// } +// } +// }); +// +// minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> { +// if (!newValue) { +// // focus lost +// String value = minimalTaxonomyTF.getText(); +// if (!ValidationUtil.isEmpty(value)) { +// if (!ValidationUtil.isNumber(value)) { +// logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); +// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); +// } else { +// minimalTaxonomy = Integer.parseInt(value); +// } +// } else { +// minimalTaxonomyTF.setText("1"); +// minimalTaxonomy = 1; +// } +// } +// }); +// +// computeB.setOnAction(e -> { +// compute(); +// logger.info("compute button"); +// }); +// +// helpH.setOnAction(e -> openHelpWebsite()); +// } +// +// private void compute() { +// Filter filter = new Filter(); +// filter.setNgramValue(1); +// filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY); +// filter.setTaxonomy(taxonomy); +// filter.setAl(AnalysisLevel.STRING_LEVEL); +// filter.setSkipValue(0); +// filter.setMsd(new ArrayList<>()); +// filter.setIsCvv(false); +// filter.setSolarFilters(solarFiltersMap); +// filter.setMinimalOccurrences(minimalOccurrences); +// filter.setMinimalTaxonomy(minimalTaxonomy); +// +// String message = Validation.validateForStringLevel(filter); +// if (message == null) { +// // no errors +// logger.info("Executing: ", filter.toString()); +// StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb); +// execute(statistic); +// } else { +// logAlert(message); +// showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message); +// } +// } +// +// private void openHelpWebsite(){ +// hostService.showDocument(Messages.HELP_URL); +// } +// +// private void execute(StatisticsNew statistic) { +// logger.info("Started execution: ", statistic.getFilter()); +// +// Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); +// +// final Task task = new Task() { +// @SuppressWarnings("Duplicates") +// @Override +// protected Void call() throws Exception { +// int i = 0; +// Date startTime = new Date(); +// Date previousTime = new Date(); +// for (File f : corpusFiles) { +// readXML(f.toString(), statistic); +// i++; +// this.updateProgress(i, corpusFiles.size()); +// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); +// } +// +// return null; +// } +// }; +// +// ngramProgressBar.progressProperty().bind(task.progressProperty()); +// progressLabel.textProperty().bind(task.messageProperty()); +// +// task.setOnSucceeded(e -> { +// try { +// // first, we have to recalculate all occurrences to detailed statistics +// boolean successullySaved = statistic.recalculateAndSaveResultToDisk(); +// +// if (successullySaved) { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); +// } else { +// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); +// } +// } catch (UnsupportedEncodingException e1) { +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); +// logger.error("Error while saving", e1); +// } +// +// ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// }); +// +// task.setOnFailed(e -> { +// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); +// logger.error("Error while executing", e); +// ngramProgressBar.progressProperty().unbind(); +// ngramProgressBar.setProgress(0.0); +// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); +// progressLabel.textProperty().unbind(); +// progressLabel.setText(""); +// }); +// +// final Thread thread = new Thread(task, "task"); +// thread.setDaemon(true); +// thread.start(); +// } +// +// private void logAlert(String alert) { +// logger.info("alert: " + alert); +// } +// +// +// public void setCorpus(Corpus corpus) { +// this.corpus = corpus; +// +// if (corpus.getCorpusType() != CorpusType.SOLAR) { +// setSelectedFiltersLabel(null); +// } else { +// setSelectedFiltersLabel("/"); +// } +// } +// +// public void setSelectedFiltersLabel(String content) { +// if (content != null) { +// solarFilters.setVisible(true); +// selectedFiltersLabel.setVisible(true); +// selectedFiltersLabel.setText(content); +// } else { +// solarFilters.setVisible(false); +// selectedFiltersLabel.setVisible(false); +// } +// } +// +// public void setSolarFiltersMap(HashMap> solarFiltersMap) { +// this.solarFiltersMap = solarFiltersMap; +// } +// +// public void setHostServices(HostServices hostServices){ +// this.hostService = hostServices; +// } +//} diff --git a/src/main/java/gui/WordLevelTab.java b/src/main/java/gui/WordLevelTab.java index 0df7ec5..187f644 100755 --- a/src/main/java/gui/WordLevelTab.java +++ b/src/main/java/gui/WordLevelTab.java @@ -228,13 +228,13 @@ public class WordLevelTab { // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica"); // private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); - private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA"}; + private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY)); // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); - private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"}; + private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY)); // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica"); - private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"}; + private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY)); // private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; @@ -320,7 +320,7 @@ public class WordLevelTab { alsoVisualizeCCB.getItems().removeAll(); if (newValue.equals(CalculateFor.LEMMA.toString())) { alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA)); - } else if (newValue.equals(CalculateFor.WORD.toString())) { + } else if (newValue.equals(CalculateFor.WORD.toString()) || newValue.equals(CalculateFor.LOWERCASE_WORD.toString())) { if (corpus.getCorpusType() == CorpusType.GOS) alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS)); else diff --git a/src/main/java/util/Export.java b/src/main/java/util/Export.java index f65f013..f9ce5ef 100755 --- a/src/main/java/util/Export.java +++ b/src/main/java/util/Export.java @@ -23,41 +23,41 @@ import data.Enums.WordLevelType; @SuppressWarnings("unchecked") public class Export { - public static void SetToJSON(Set>> set) { - JSONArray wrapper = new JSONArray(); - - for (Pair> p : set) { - JSONArray data_wrapper = new JSONArray(); - JSONObject metric = new JSONObject(); - - String title = p.getLeft(); - Map map = p.getRight(); - - if (map.isEmpty()) - continue; - - long total = Util.mapSumFrequencies(map); - - for (Map.Entry e : map.entrySet()) { - JSONObject data_entry = new JSONObject(); - data_entry.put("word", e.getKey()); - data_entry.put("frequency", e.getValue()); - data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total)); - - data_wrapper.add(data_entry); - } - - metric.put("Title", title); - metric.put("data", data_wrapper); - wrapper.add(metric); - } - - try (FileWriter file = new FileWriter("statistics.json")) { - file.write(wrapper.toJSONString()); - } catch (IOException e) { - e.printStackTrace(); - } - } +// public static void SetToJSON(Set>> set) { +// JSONArray wrapper = new JSONArray(); +// +// for (Pair> p : set) { +// JSONArray data_wrapper = new JSONArray(); +// JSONObject metric = new JSONObject(); +// +// String title = p.getLeft(); +// Map map = p.getRight(); +// +// if (map.isEmpty()) +// continue; +// +// long total = Util.mapSumFrequencies(map); +// +// for (Map.Entry e : map.entrySet()) { +// JSONObject data_entry = new JSONObject(); +// data_entry.put("word", e.getKey()); +// data_entry.put("frequency", e.getValue()); +// data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total)); +// +// data_wrapper.add(data_entry); +// } +// +// metric.put("Title", title); +// metric.put("data", data_wrapper); +// wrapper.add(metric); +// } +// +// try (FileWriter file = new FileWriter("statistics.json")) { +// file.write(wrapper.toJSONString()); +// } catch (IOException e) { +// e.printStackTrace(); +// } +// } public static String SetToCSV(Set>> set, File resultsPath, LinkedHashMap headerInfoBlock, StatisticsNew statistics, Filter filter) { @@ -127,12 +127,6 @@ public class Export { FILE_HEADER_AL.add(I18N.get("exportTable.totalRelativeFrequency")); - if (filter.getCollocability().size() > 0){ - for (Collocability c : filter.getCollocability()) { - FILE_HEADER_AL.add(c.toHeaderString()); - } - } - for (Taxonomy key : taxonomyResults.keySet()) { if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) { FILE_HEADER_AL.add(I18N.get("exportTable.absoluteFrequency") + " [" + key.toString() + "]"); @@ -141,6 +135,13 @@ public class Export { } } + if (filter.getCollocability().size() > 0){ + for (Collocability c : filter.getCollocability()) { + FILE_HEADER_AL.add(c.toHeaderString()); + } + } + + if (filter.getWriteMsdAtTheEnd()) { String msd = ""; int maxMsdLength = 0; @@ -280,14 +281,14 @@ public class Export { dataEntry.add(e.getValue().toString()); - dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()))); - dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue())); + dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()), statistics.getCorpus().getPunctuation())); + dataEntry.add(formatNumberForExport(((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue(), statistics.getCorpus().getPunctuation())); for (Taxonomy key : taxonomyResults.keySet()){ if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) { AtomicLong frequency = taxonomyResults.get(key).get(e.getKey()); dataEntry.add(frequency.toString()); - dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key))); - dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue())); + dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key), statistics.getCorpus().getPunctuation())); + dataEntry.add(formatNumberForExport(((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue(), statistics.getCorpus().getPunctuation())); // dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences())); // dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences())); } @@ -296,7 +297,7 @@ public class Export { if (filter.getCollocability().size() > 0){ for (Collocability c : filter.getCollocability()) { - dataEntry.add(statistics.getCollocability().get(c).get(e.getKey())); + dataEntry.add(formatNumberForLongExport(statistics.getCollocability().get(c).get(e.getKey()), statistics.getCorpus().getPunctuation())); } } @@ -371,66 +372,66 @@ public class Export { return s; } - public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap headerInfoBlock) { - //Delimiter used in CSV file - String NEW_LINE_SEPARATOR = "\n"; - - //CSV file header - Object[] FILE_HEADER = {"word", "frequency", "percent"}; - - String fileName = ""; - - fileName = title.replace(": ", "-"); - fileName = fileName.replace(" ", "_").concat(".csv"); - - fileName = resultsPath.toString().concat(File.separator).concat(fileName); - - OutputStreamWriter fileWriter = null; - CSVPrinter csvFilePrinter = null; - - //Create the CSVFormat object with "\n" as a record delimiter - CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';'); - - try { - //initialize FileWriter object - fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8); - - //initialize CSVPrinter object - csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat); - - // write info block - printHeaderInfo(csvFilePrinter, headerInfoBlock); - - //Create CSV file header - csvFilePrinter.printRecord(FILE_HEADER); - - for (Object[] resultEntry : result) { - List dataEntry = new ArrayList<>(); - dataEntry.add(resultEntry[0]); - dataEntry.add(resultEntry[1]); - dataEntry.add(formatNumberAsPercent(resultEntry[2])); - csvFilePrinter.printRecord(dataEntry); - } - } catch (Exception e) { - System.out.println("Error in CsvFileWriter!"); - e.printStackTrace(); - } finally { - try { - if (fileWriter != null) { - fileWriter.flush(); - fileWriter.close(); - } - if (csvFilePrinter != null) { - csvFilePrinter.close(); - } - } catch (IOException e) { - System.out.println("Error while flushing/closing fileWriter/csvPrinter!"); - e.printStackTrace(); - } - } - - return fileName; - } +// public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap headerInfoBlock) { +// //Delimiter used in CSV file +// String NEW_LINE_SEPARATOR = "\n"; +// +// //CSV file header +// Object[] FILE_HEADER = {"word", "frequency", "percent"}; +// +// String fileName = ""; +// +// fileName = title.replace(": ", "-"); +// fileName = fileName.replace(" ", "_").concat(".csv"); +// +// fileName = resultsPath.toString().concat(File.separator).concat(fileName); +// +// OutputStreamWriter fileWriter = null; +// CSVPrinter csvFilePrinter = null; +// +// //Create the CSVFormat object with "\n" as a record delimiter +// CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';'); +// +// try { +// //initialize FileWriter object +// fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8); +// +// //initialize CSVPrinter object +// csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat); +// +// // write info block +// printHeaderInfo(csvFilePrinter, headerInfoBlock); +// +// //Create CSV file header +// csvFilePrinter.printRecord(FILE_HEADER); +// +// for (Object[] resultEntry : result) { +// List dataEntry = new ArrayList<>(); +// dataEntry.add(resultEntry[0]); +// dataEntry.add(resultEntry[1]); +// dataEntry.add(formatNumberAsPercent(resultEntry[2]), statistics.getCorpus().getPunctuation()); +// csvFilePrinter.printRecord(dataEntry); +// } +// } catch (Exception e) { +// System.out.println("Error in CsvFileWriter!"); +// e.printStackTrace(); +// } finally { +// try { +// if (fileWriter != null) { +// fileWriter.flush(); +// fileWriter.close(); +// } +// if (csvFilePrinter != null) { +// csvFilePrinter.close(); +// } +// } catch (IOException e) { +// System.out.println("Error while flushing/closing fileWriter/csvPrinter!"); +// e.printStackTrace(); +// } +// } +// +// return fileName; +// } public static String nestedMapToCSV(String title, Map>> result, File resultsPath, LinkedHashMap headerInfoBlock) { //Delimiter used in CSV file diff --git a/src/main/java/util/Util.java b/src/main/java/util/Util.java index 857051f..0e96c03 100755 --- a/src/main/java/util/Util.java +++ b/src/main/java/util/Util.java @@ -54,10 +54,31 @@ public class Util { return "- invalid input format -"; } - public static String formatNumberAsPercent(Object o) { - return MessageFormat.format("{0,number,#.### %}", o).replace('.', ','); + public static String formatNumberAsPercent(Object o, String punctuation) { + if(punctuation.equals("punctuation.COMMA")) { + return MessageFormat.format("{0,number,#.### %}", o).replace('.', ','); + } else { + return MessageFormat.format("{0,number,#.### %}", o); + } } + public static String formatNumberForExport(Object o, String punctuation) { + if(punctuation.equals("punctuation.COMMA")) { + return MessageFormat.format("{0,number,#.##}", o).replace('.', ','); + } else { + return MessageFormat.format("{0,number,#.##}", o); + } + + } + + public static String formatNumberForLongExport(Object o, String punctuation) { + if(punctuation.equals("punctuation.COMMA")) { + return MessageFormat.format("{0,number,#.########}", o).replace('.', ','); + } else { + return MessageFormat.format("{0,number,#.########}", o); + } + } + private static boolean isInstanceOfInteger(Object o) { Set> types = new HashSet<>(); types.add(Byte.class); diff --git a/src/main/resources/gui/CorpusTab.fxml b/src/main/resources/gui/CorpusTab.fxml index 6417578..e0c3788 100755 --- a/src/main/resources/gui/CorpusTab.fxml +++ b/src/main/resources/gui/CorpusTab.fxml @@ -57,6 +57,13 @@ +