From bebc0abbb30a655bfb7e48f94d49202bbea594a2 Mon Sep 17 00:00:00 2001 From: Luka Date: Mon, 23 Jul 2018 09:14:46 +0200 Subject: [PATCH] Computer formatted --- .gitignore | 0 Corpus Analyzer.iml | 0 pom.xml | 0 src/main/java/alg/Common.java | 0 src/main/java/alg/XML_processing.java | 29 +++++++++++++++++-- src/main/java/alg/inflectedJOS/ForkJoin.java | 0 .../alg/inflectedJOS/InflectedJOSCount.java | 0 .../java/alg/inflectedJOS/WordFormation.java | 0 src/main/java/alg/ngram/ForkJoin.java | 0 src/main/java/alg/ngram/Ngrams.java | 3 ++ src/main/java/alg/word/ForkJoin.java | 0 src/main/java/alg/word/WordCount.java | 0 src/main/java/alg/word/WordLevel.java | 0 src/main/java/data/AnalysisLevel.java | 0 src/main/java/data/CalculateFor.java | 0 src/main/java/data/Corpus.java | 0 src/main/java/data/CorpusType.java | 0 .../java/data/Enums/InflectedJosTypes.java | 0 src/main/java/data/Enums/Msd.java | 0 .../data/Enums/WordLevelDefaultValues.java | 0 src/main/java/data/Enums/WordLevelType.java | 0 .../java/data/Enums/solar/SolarFilters.java | 0 src/main/java/data/Filter.java | 11 ++++++- src/main/java/data/GigafidaJosWordType.java | 0 src/main/java/data/GigafidaTaxonomy.java | 0 src/main/java/data/GosTaxonomy.java | 0 src/main/java/data/MultipleHMKeys.java | 0 src/main/java/data/Sentence.java | 0 src/main/java/data/Settings.java | 0 src/main/java/data/Statistics.java | 0 src/main/java/data/StatisticsNew.java | 0 src/main/java/data/Tax.java | 0 src/main/java/data/Taxonomy.java | 0 src/main/java/data/Validation.java | 0 src/main/java/data/Word.java | 4 +++ src/main/java/gui/CharacterAnalysisTab.java | 0 src/main/java/gui/CorpusTab.java | 0 src/main/java/gui/FiltersForSolar.java | 0 src/main/java/gui/GUIController.java | 0 src/main/java/gui/Messages.java | 2 ++ src/main/java/gui/SelectedFiltersPane.java | 0 src/main/java/gui/StringAnalysisTabNew2.java | 16 ++++++++++ src/main/java/gui/ValidationUtil.java | 0 src/main/java/gui/WordFormationTab.java | 0 src/main/java/gui/WordLevelTab.java | 0 src/main/java/man/META-INF/MANIFEST.MF | 0 src/main/java/util/ByteUtils.java | 0 src/main/java/util/Combinations.java | 0 src/main/java/util/Export.java | 5 ++-- src/main/java/util/Key.java | 0 src/main/java/util/TimeWatch.java | 0 src/main/java/util/Util.java | 0 src/main/java/util/db/RDB.java | 0 .../resources/GOS_small/TEI_GOS_small.xml | 0 .../resources/GOS_tax_test/GOS_tax_test.xml | 0 src/main/resources/GUI.fxml | 0 src/main/resources/Gigafida_minimal/gfmin.xml | 0 ...lema_0-gram_0-skip_14.05.2018_06.34.13.csv | 0 ...lema_0-gram_0-skip_14.05.2018_06.37.50.csv | 0 ...lema_0-gram_0-skip_14.05.2018_06.38.17.csv | 0 ...lema_1-gram_0-skip_31.01.2018_05.11.26.csv | 0 ...lema_2-gram_1-skip_31.01.2018_05.11.33.csv | 0 ...nica_1-gram_0-skip_25.01.2018_06.27.41.csv | 0 ...ičnica_2-gram_0-skip_20.01.2018_01.27.csv | 0 ...ičnica_3-gram_0-skip_20.01.2018_01.27.csv | 0 .../resources/Gigafida_subset/F0012405.xml | 0 .../resources/Gigafida_subset/F0016316.xml | 0 .../resources/Gigafida_subset/F0018194.xml | 0 .../resources/Gigafida_subset/F0026709.xml | 0 .../resources/Gigafida_subset/F0030361.xml | 0 .../Gigafida_subset/nested/F0036980.xml | 0 .../Gigafida_subset/nested/F0037258.xml | 0 .../Gigafida_subset/nested/F0037544.xml | 0 .../Gigafida_subset/nested/F0038754.xml | 0 .../Gigafida_subset/nested/F0038920.xml | 0 src/main/resources/Lists/prefixes.txt | 0 src/main/resources/Lists/suffixes.txt | 0 .../resources/gui/CharacterAnalysisTab.fxml | 0 src/main/resources/gui/CorpusTab.fxml | 0 src/main/resources/gui/FiltersForSolar.fxml | 0 .../resources/gui/SelectedFiltersPane.fxml | 0 .../resources/gui/StringAnalysisTabNew2.fxml | 16 +++++----- src/main/resources/gui/WordFormationTab.fxml | 0 src/main/resources/gui/WordLevelTab.fxml | 0 src/main/resources/log4j2.xml | 0 src/test/java/Common.java | 0 src/test/java/CorpusTests.java | 0 src/test/java/DBTest.java | 0 src/test/java/NgramTests.java | 0 src/test/java/WordFormationTest.java | 0 src/test/java/WordLevelTest.java | 0 src/test/java/WordTest.java | 0 92 files changed, 74 insertions(+), 12 deletions(-) mode change 100644 => 100755 .gitignore mode change 100644 => 100755 Corpus Analyzer.iml mode change 100644 => 100755 pom.xml mode change 100644 => 100755 src/main/java/alg/Common.java mode change 100644 => 100755 src/main/java/alg/XML_processing.java mode change 100644 => 100755 src/main/java/alg/inflectedJOS/ForkJoin.java mode change 100644 => 100755 src/main/java/alg/inflectedJOS/InflectedJOSCount.java mode change 100644 => 100755 src/main/java/alg/inflectedJOS/WordFormation.java mode change 100644 => 100755 src/main/java/alg/ngram/ForkJoin.java mode change 100644 => 100755 src/main/java/alg/ngram/Ngrams.java mode change 100644 => 100755 src/main/java/alg/word/ForkJoin.java mode change 100644 => 100755 src/main/java/alg/word/WordCount.java mode change 100644 => 100755 src/main/java/alg/word/WordLevel.java mode change 100644 => 100755 src/main/java/data/AnalysisLevel.java mode change 100644 => 100755 src/main/java/data/CalculateFor.java mode change 100644 => 100755 src/main/java/data/Corpus.java mode change 100644 => 100755 src/main/java/data/CorpusType.java mode change 100644 => 100755 src/main/java/data/Enums/InflectedJosTypes.java mode change 100644 => 100755 src/main/java/data/Enums/Msd.java mode change 100644 => 100755 src/main/java/data/Enums/WordLevelDefaultValues.java mode change 100644 => 100755 src/main/java/data/Enums/WordLevelType.java mode change 100644 => 100755 src/main/java/data/Enums/solar/SolarFilters.java mode change 100644 => 100755 src/main/java/data/Filter.java mode change 100644 => 100755 src/main/java/data/GigafidaJosWordType.java mode change 100644 => 100755 src/main/java/data/GigafidaTaxonomy.java mode change 100644 => 100755 src/main/java/data/GosTaxonomy.java mode change 100644 => 100755 src/main/java/data/MultipleHMKeys.java mode change 100644 => 100755 src/main/java/data/Sentence.java mode change 100644 => 100755 src/main/java/data/Settings.java mode change 100644 => 100755 src/main/java/data/Statistics.java mode change 100644 => 100755 src/main/java/data/StatisticsNew.java mode change 100644 => 100755 src/main/java/data/Tax.java mode change 100644 => 100755 src/main/java/data/Taxonomy.java mode change 100644 => 100755 src/main/java/data/Validation.java mode change 100644 => 100755 src/main/java/data/Word.java mode change 100644 => 100755 src/main/java/gui/CharacterAnalysisTab.java mode change 100644 => 100755 src/main/java/gui/CorpusTab.java mode change 100644 => 100755 src/main/java/gui/FiltersForSolar.java mode change 100644 => 100755 src/main/java/gui/GUIController.java mode change 100644 => 100755 src/main/java/gui/Messages.java mode change 100644 => 100755 src/main/java/gui/SelectedFiltersPane.java mode change 100644 => 100755 src/main/java/gui/ValidationUtil.java mode change 100644 => 100755 src/main/java/gui/WordFormationTab.java mode change 100644 => 100755 src/main/java/gui/WordLevelTab.java mode change 100644 => 100755 src/main/java/man/META-INF/MANIFEST.MF mode change 100644 => 100755 src/main/java/util/ByteUtils.java mode change 100644 => 100755 src/main/java/util/Combinations.java mode change 100644 => 100755 src/main/java/util/Export.java mode change 100644 => 100755 src/main/java/util/Key.java mode change 100644 => 100755 src/main/java/util/TimeWatch.java mode change 100644 => 100755 src/main/java/util/Util.java mode change 100644 => 100755 src/main/java/util/db/RDB.java mode change 100644 => 100755 src/main/resources/GOS_small/TEI_GOS_small.xml mode change 100644 => 100755 src/main/resources/GOS_tax_test/GOS_tax_test.xml mode change 100644 => 100755 src/main/resources/GUI.fxml mode change 100644 => 100755 src/main/resources/Gigafida_minimal/gfmin.xml mode change 100644 => 100755 src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_0-gram_0-skip_14.05.2018_06.34.13.csv mode change 100644 => 100755 src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_0-gram_0-skip_14.05.2018_06.37.50.csv mode change 100644 => 100755 src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_0-gram_0-skip_14.05.2018_06.38.17.csv mode change 100644 => 100755 src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_1-gram_0-skip_31.01.2018_05.11.26.csv mode change 100644 => 100755 src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_2-gram_1-skip_31.01.2018_05.11.33.csv mode change 100644 => 100755 src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_različnica_1-gram_0-skip_25.01.2018_06.27.41.csv mode change 100644 => 100755 src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_različnica_2-gram_0-skip_20.01.2018_01.27.csv mode change 100644 => 100755 src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_različnica_3-gram_0-skip_20.01.2018_01.27.csv mode change 100644 => 100755 src/main/resources/Gigafida_subset/F0012405.xml mode change 100644 => 100755 src/main/resources/Gigafida_subset/F0016316.xml mode change 100644 => 100755 src/main/resources/Gigafida_subset/F0018194.xml mode change 100644 => 100755 src/main/resources/Gigafida_subset/F0026709.xml mode change 100644 => 100755 src/main/resources/Gigafida_subset/F0030361.xml mode change 100644 => 100755 src/main/resources/Gigafida_subset/nested/F0036980.xml mode change 100644 => 100755 src/main/resources/Gigafida_subset/nested/F0037258.xml mode change 100644 => 100755 src/main/resources/Gigafida_subset/nested/F0037544.xml mode change 100644 => 100755 src/main/resources/Gigafida_subset/nested/F0038754.xml mode change 100644 => 100755 src/main/resources/Gigafida_subset/nested/F0038920.xml mode change 100644 => 100755 src/main/resources/Lists/prefixes.txt mode change 100644 => 100755 src/main/resources/Lists/suffixes.txt mode change 100644 => 100755 src/main/resources/gui/CharacterAnalysisTab.fxml mode change 100644 => 100755 src/main/resources/gui/CorpusTab.fxml mode change 100644 => 100755 src/main/resources/gui/FiltersForSolar.fxml mode change 100644 => 100755 src/main/resources/gui/SelectedFiltersPane.fxml mode change 100644 => 100755 src/main/resources/gui/WordFormationTab.fxml mode change 100644 => 100755 src/main/resources/gui/WordLevelTab.fxml mode change 100644 => 100755 src/main/resources/log4j2.xml mode change 100644 => 100755 src/test/java/Common.java mode change 100644 => 100755 src/test/java/CorpusTests.java mode change 100644 => 100755 src/test/java/DBTest.java mode change 100644 => 100755 src/test/java/NgramTests.java mode change 100644 => 100755 src/test/java/WordFormationTest.java mode change 100644 => 100755 src/test/java/WordLevelTest.java mode change 100644 => 100755 src/test/java/WordTest.java diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/Corpus Analyzer.iml b/Corpus Analyzer.iml old mode 100644 new mode 100755 diff --git a/pom.xml b/pom.xml old mode 100644 new mode 100755 diff --git a/src/main/java/alg/Common.java b/src/main/java/alg/Common.java old mode 100644 new mode 100755 diff --git a/src/main/java/alg/XML_processing.java b/src/main/java/alg/XML_processing.java old mode 100644 new mode 100755 index b47c190..90bc913 --- a/src/main/java/alg/XML_processing.java +++ b/src/main/java/alg/XML_processing.java @@ -224,7 +224,8 @@ public class XML_processing { @SuppressWarnings("unused") public static void readXMLSolar(String path, StatisticsNew stats) { boolean in_word = false; - String lemma = ""; + boolean inPunctuation = false; + String lemma = ""; String msd = ""; List stavek = new ArrayList<>(); @@ -275,6 +276,9 @@ public class XML_processing { corpus.clear(); } } + else if(includeThisBlock){ + inPunctuation = true; + } } else if (headTags.contains(qName)) { String tagContent = eventReader.nextEvent().asCharacters().getData(); headBlock.put(qName, tagContent); @@ -291,7 +295,13 @@ public class XML_processing { if (in_word) { stavek.add(new Word(characters.getData(), lemma, msd)); in_word = false; - } + } else if(inPunctuation){ + String punctuation = ","; + stavek.get(stavek.size()-1).setWord(stavek.get(stavek.size()-1).getWord() + punctuation); + stavek.get(stavek.size()-1).setLemma(stavek.get(stavek.size()-1).getLemma() + punctuation); + stavek.get(stavek.size()-1).setMsd(stavek.get(stavek.size()-1).getMsd() + punctuation); + inPunctuation = false; + } break; case XMLStreamConstants.END_ELEMENT: @@ -472,6 +482,7 @@ public class XML_processing { @SuppressWarnings("Duplicates") public static boolean readXMLGigafida(String path, StatisticsNew stats) { boolean inWord = false; + boolean inPunctuation = false; ArrayList currentFiletaxonomy = new ArrayList<>(); ArrayList currentFiletaxonomyLong = new ArrayList<>(); String lemma = ""; @@ -501,6 +512,11 @@ public class XML_processing { msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue()); lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue()); } + + if (qName.equals("c")){ + inPunctuation = true; + } + // taxonomy node else if (qName.equalsIgnoreCase("catRef")) { // there are some term nodes at the beginning that are of no interest to us @@ -526,6 +542,14 @@ public class XML_processing { sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong)); inWord = false; } +// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) { +//// String punctuation = characters.getData(); +// String punctuation = ","; +// sentence.get(sentence.size()-1).setWord(sentence.get(sentence.size()-1).getWord() + punctuation); +// sentence.get(sentence.size()-1).setLemma(sentence.get(sentence.size()-1).getLemma() + punctuation); +// sentence.get(sentence.size()-1).setMsd(sentence.get(sentence.size()-1).getMsd() + punctuation); +// inPunctuation = false; +// } break; case XMLStreamConstants.END_ELEMENT: @@ -604,6 +628,7 @@ public class XML_processing { @SuppressWarnings("Duplicates") public static boolean readXMLGos(String path, StatisticsNew stats) { boolean inWord = false; + boolean inPunctuation = false; boolean inOrthDiv = false; boolean computeForOrth = stats.getCorpus().isGosOrthMode(); ArrayList currentFiletaxonomy = new ArrayList<>(); diff --git a/src/main/java/alg/inflectedJOS/ForkJoin.java b/src/main/java/alg/inflectedJOS/ForkJoin.java old mode 100644 new mode 100755 diff --git a/src/main/java/alg/inflectedJOS/InflectedJOSCount.java b/src/main/java/alg/inflectedJOS/InflectedJOSCount.java old mode 100644 new mode 100755 diff --git a/src/main/java/alg/inflectedJOS/WordFormation.java b/src/main/java/alg/inflectedJOS/WordFormation.java old mode 100644 new mode 100755 diff --git a/src/main/java/alg/ngram/ForkJoin.java b/src/main/java/alg/ngram/ForkJoin.java old mode 100644 new mode 100755 diff --git a/src/main/java/alg/ngram/Ngrams.java b/src/main/java/alg/ngram/Ngrams.java old mode 100644 new mode 100755 index 2080d71..b2c069b --- a/src/main/java/alg/ngram/Ngrams.java +++ b/src/main/java/alg/ngram/Ngrams.java @@ -44,6 +44,7 @@ public class Ngrams { // generate proper MultipleHMKeys depending on filter data String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor()); + key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key; // String key = "aaaaaaaaaaaaaaaaaaaaaaa"; String lemma = ""; @@ -60,6 +61,8 @@ public class Ngrams { } } + + MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd); // UPDATE TAXONOMY HERE!!! diff --git a/src/main/java/alg/word/ForkJoin.java b/src/main/java/alg/word/ForkJoin.java old mode 100644 new mode 100755 diff --git a/src/main/java/alg/word/WordCount.java b/src/main/java/alg/word/WordCount.java old mode 100644 new mode 100755 diff --git a/src/main/java/alg/word/WordLevel.java b/src/main/java/alg/word/WordLevel.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/AnalysisLevel.java b/src/main/java/data/AnalysisLevel.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/CalculateFor.java b/src/main/java/data/CalculateFor.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Corpus.java b/src/main/java/data/Corpus.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/CorpusType.java b/src/main/java/data/CorpusType.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Enums/InflectedJosTypes.java b/src/main/java/data/Enums/InflectedJosTypes.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Enums/Msd.java b/src/main/java/data/Enums/Msd.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Enums/WordLevelDefaultValues.java b/src/main/java/data/Enums/WordLevelDefaultValues.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Enums/WordLevelType.java b/src/main/java/data/Enums/WordLevelType.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Enums/solar/SolarFilters.java b/src/main/java/data/Enums/solar/SolarFilters.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Filter.java b/src/main/java/data/Filter.java old mode 100644 new mode 100755 index 14a2cee..7626aa1 --- a/src/main/java/data/Filter.java +++ b/src/main/java/data/Filter.java @@ -25,7 +25,8 @@ public class Filter { MSD, HAS_MSD, SOLAR_FILTERS, - MULTIPLE_KEYS + MULTIPLE_KEYS, + NOTE_PUNCTUATIONS } public Filter() { @@ -161,4 +162,12 @@ public class Filter { return new ArrayList<>(); } } + + public void setNotePunctuations(boolean notePunctuations) { + filter.put(NOTE_PUNCTUATIONS, notePunctuations); + } + + public boolean getNotePunctuations() { + return filter.containsKey(NOTE_PUNCTUATIONS) && (boolean) filter.get(NOTE_PUNCTUATIONS); + } } diff --git a/src/main/java/data/GigafidaJosWordType.java b/src/main/java/data/GigafidaJosWordType.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/GigafidaTaxonomy.java b/src/main/java/data/GigafidaTaxonomy.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/GosTaxonomy.java b/src/main/java/data/GosTaxonomy.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/MultipleHMKeys.java b/src/main/java/data/MultipleHMKeys.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Sentence.java b/src/main/java/data/Sentence.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Settings.java b/src/main/java/data/Settings.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Statistics.java b/src/main/java/data/Statistics.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/StatisticsNew.java b/src/main/java/data/StatisticsNew.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Tax.java b/src/main/java/data/Tax.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Taxonomy.java b/src/main/java/data/Taxonomy.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Validation.java b/src/main/java/data/Validation.java old mode 100644 new mode 100755 diff --git a/src/main/java/data/Word.java b/src/main/java/data/Word.java old mode 100644 new mode 100755 index bd6b3ee..a43de13 --- a/src/main/java/data/Word.java +++ b/src/main/java/data/Word.java @@ -134,6 +134,10 @@ public class Word implements Serializable { return msd; } + public void setMsd(String msd) { + this.msd = msd; + } + public String toString() { StringBuilder sb = new StringBuilder(); diff --git a/src/main/java/gui/CharacterAnalysisTab.java b/src/main/java/gui/CharacterAnalysisTab.java old mode 100644 new mode 100755 diff --git a/src/main/java/gui/CorpusTab.java b/src/main/java/gui/CorpusTab.java old mode 100644 new mode 100755 diff --git a/src/main/java/gui/FiltersForSolar.java b/src/main/java/gui/FiltersForSolar.java old mode 100644 new mode 100755 diff --git a/src/main/java/gui/GUIController.java b/src/main/java/gui/GUIController.java old mode 100644 new mode 100755 diff --git a/src/main/java/gui/Messages.java b/src/main/java/gui/Messages.java old mode 100644 new mode 100755 index 2df16ad..c73926c --- a/src/main/java/gui/Messages.java +++ b/src/main/java/gui/Messages.java @@ -21,6 +21,7 @@ public class Messages { public static final String WARNING_NO_SOLAR_FILTERS_FOUND = "Iz korpusnih datotek ni bilo moč razbrati filtrov. Prosim izberite drugo lokacijo ali korpus."; public static final String ERROR_WHILE_EXECUTING = "Prišlo je do napake med izvajanjem."; public static final String ERROR_WHILE_SAVING_RESULTS_TO_CSV = "Prišlo je do napake med shranjevanje rezultatov."; + public static final String ERROR_NOT_ENOUGH_MEMORY= "Na voljo imate premalo pomnilnika (RAM-a) za analizo takšne količine podatkov."; // missing public static final String MISSING_NGRAM_LEVEL = "N-gram nivo"; @@ -52,6 +53,7 @@ public class Messages { public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa."; public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki."; + public static final String TOOLTIP_readNotePunctuationsChB = "Ločila med povedmi se upoštevajo v vsakem primeru."; diff --git a/src/main/java/gui/SelectedFiltersPane.java b/src/main/java/gui/SelectedFiltersPane.java old mode 100644 new mode 100755 diff --git a/src/main/java/gui/StringAnalysisTabNew2.java b/src/main/java/gui/StringAnalysisTabNew2.java index b861d3b..86e2396 100755 --- a/src/main/java/gui/StringAnalysisTabNew2.java +++ b/src/main/java/gui/StringAnalysisTabNew2.java @@ -62,6 +62,10 @@ public class StringAnalysisTabNew2 { private ComboBox skipValueCB; private Integer skipValue; + @FXML + private CheckBox notePunctuationsChB; + private boolean notePunctuations; + @FXML private Pane paneWords; @@ -135,6 +139,14 @@ public class StringAnalysisTabNew2 { ngramValueCB.getSelectionModel().select(0); // selected index ngramValue = 2; // actual value at that index + notePunctuations = true; + // set + notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> { + notePunctuations = newValue; + logger.info("note punctuations: ", notePunctuations); + }); + notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB)); + // calculateForCB calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> { calculateFor = CalculateFor.factory(newValue); @@ -398,6 +410,7 @@ public class StringAnalysisTabNew2 { filter.setSkipValue(skipValue); filter.setIsCvv(calculateCvv); filter.setSolarFilters(solarFiltersMap); + filter.setNotePunctuations(notePunctuations); if (ngramValue != null && ngramValue == 0) { filter.setStringLength(stringLength); @@ -488,6 +501,9 @@ public class StringAnalysisTabNew2 { } catch (UnsupportedEncodingException e1) { showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV); logger.error("Error while saving", e1); + } catch (OutOfMemoryError e1){ + showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY); + logger.error("Out of memory error", e1); } ngramProgressBar.progressProperty().unbind(); diff --git a/src/main/java/gui/ValidationUtil.java b/src/main/java/gui/ValidationUtil.java old mode 100644 new mode 100755 diff --git a/src/main/java/gui/WordFormationTab.java b/src/main/java/gui/WordFormationTab.java old mode 100644 new mode 100755 diff --git a/src/main/java/gui/WordLevelTab.java b/src/main/java/gui/WordLevelTab.java old mode 100644 new mode 100755 diff --git a/src/main/java/man/META-INF/MANIFEST.MF b/src/main/java/man/META-INF/MANIFEST.MF old mode 100644 new mode 100755 diff --git a/src/main/java/util/ByteUtils.java b/src/main/java/util/ByteUtils.java old mode 100644 new mode 100755 diff --git a/src/main/java/util/Combinations.java b/src/main/java/util/Combinations.java old mode 100644 new mode 100755 diff --git a/src/main/java/util/Export.java b/src/main/java/util/Export.java old mode 100644 new mode 100755 index 9341b23..cfedb39 --- a/src/main/java/util/Export.java +++ b/src/main/java/util/Export.java @@ -13,6 +13,7 @@ import data.Filter; import data.MultipleHMKeys; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.QuoteMode; import org.apache.commons.lang3.tuple.Pair; import org.json.simple.JSONArray; import org.json.simple.JSONObject; @@ -167,8 +168,8 @@ public class Export { OutputStreamWriter fileWriter = null; CSVPrinter csvFilePrinter = null; - //Create the CSVFormat object with "\n" as a record delimiter - CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';'); + //Create the CSVFormat object with "\n" as a record delimiter it puts all words in braces + CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';').withQuoteMode(QuoteMode.ALL); try { //initialize FileWriter object diff --git a/src/main/java/util/Key.java b/src/main/java/util/Key.java old mode 100644 new mode 100755 diff --git a/src/main/java/util/TimeWatch.java b/src/main/java/util/TimeWatch.java old mode 100644 new mode 100755 diff --git a/src/main/java/util/Util.java b/src/main/java/util/Util.java old mode 100644 new mode 100755 diff --git a/src/main/java/util/db/RDB.java b/src/main/java/util/db/RDB.java old mode 100644 new mode 100755 diff --git a/src/main/resources/GOS_small/TEI_GOS_small.xml b/src/main/resources/GOS_small/TEI_GOS_small.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/GOS_tax_test/GOS_tax_test.xml b/src/main/resources/GOS_tax_test/GOS_tax_test.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/GUI.fxml b/src/main/resources/GUI.fxml old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_minimal/gfmin.xml b/src/main/resources/Gigafida_minimal/gfmin.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_0-gram_0-skip_14.05.2018_06.34.13.csv b/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_0-gram_0-skip_14.05.2018_06.34.13.csv old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_0-gram_0-skip_14.05.2018_06.37.50.csv b/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_0-gram_0-skip_14.05.2018_06.37.50.csv old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_0-gram_0-skip_14.05.2018_06.38.17.csv b/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_0-gram_0-skip_14.05.2018_06.38.17.csv old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_1-gram_0-skip_31.01.2018_05.11.26.csv b/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_1-gram_0-skip_31.01.2018_05.11.26.csv old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_2-gram_1-skip_31.01.2018_05.11.33.csv b/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_lema_2-gram_1-skip_31.01.2018_05.11.33.csv old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_različnica_1-gram_0-skip_25.01.2018_06.27.41.csv b/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_različnica_1-gram_0-skip_25.01.2018_06.27.41.csv old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_različnica_2-gram_0-skip_20.01.2018_01.27.csv b/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_različnica_2-gram_0-skip_20.01.2018_01.27.csv old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_različnica_3-gram_0-skip_20.01.2018_01.27.csv b/src/main/resources/Gigafida_subset/Besedni_nizi_Gigafida_različnica_3-gram_0-skip_20.01.2018_01.27.csv old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/F0012405.xml b/src/main/resources/Gigafida_subset/F0012405.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/F0016316.xml b/src/main/resources/Gigafida_subset/F0016316.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/F0018194.xml b/src/main/resources/Gigafida_subset/F0018194.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/F0026709.xml b/src/main/resources/Gigafida_subset/F0026709.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/F0030361.xml b/src/main/resources/Gigafida_subset/F0030361.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/nested/F0036980.xml b/src/main/resources/Gigafida_subset/nested/F0036980.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/nested/F0037258.xml b/src/main/resources/Gigafida_subset/nested/F0037258.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/nested/F0037544.xml b/src/main/resources/Gigafida_subset/nested/F0037544.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/nested/F0038754.xml b/src/main/resources/Gigafida_subset/nested/F0038754.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/Gigafida_subset/nested/F0038920.xml b/src/main/resources/Gigafida_subset/nested/F0038920.xml old mode 100644 new mode 100755 diff --git a/src/main/resources/Lists/prefixes.txt b/src/main/resources/Lists/prefixes.txt old mode 100644 new mode 100755 diff --git a/src/main/resources/Lists/suffixes.txt b/src/main/resources/Lists/suffixes.txt old mode 100644 new mode 100755 diff --git a/src/main/resources/gui/CharacterAnalysisTab.fxml b/src/main/resources/gui/CharacterAnalysisTab.fxml old mode 100644 new mode 100755 diff --git a/src/main/resources/gui/CorpusTab.fxml b/src/main/resources/gui/CorpusTab.fxml old mode 100644 new mode 100755 diff --git a/src/main/resources/gui/FiltersForSolar.fxml b/src/main/resources/gui/FiltersForSolar.fxml old mode 100644 new mode 100755 diff --git a/src/main/resources/gui/SelectedFiltersPane.fxml b/src/main/resources/gui/SelectedFiltersPane.fxml old mode 100644 new mode 100755 diff --git a/src/main/resources/gui/StringAnalysisTabNew2.fxml b/src/main/resources/gui/StringAnalysisTabNew2.fxml index 505e22c..aaa9a99 100755 --- a/src/main/resources/gui/StringAnalysisTabNew2.fxml +++ b/src/main/resources/gui/StringAnalysisTabNew2.fxml @@ -62,19 +62,21 @@ + + - - -