Computer formatted

This commit is contained in:
Luka 2018-07-23 09:14:46 +02:00
parent 84d0086a66
commit bebc0abbb3
92 changed files with 74 additions and 12 deletions

0
.gitignore vendored Normal file → Executable file
View File

0
Corpus Analyzer.iml Normal file → Executable file
View File

0
pom.xml Normal file → Executable file
View File

0
src/main/java/alg/Common.java Normal file → Executable file
View File

25
src/main/java/alg/XML_processing.java Normal file → Executable file
View File

@ -224,6 +224,7 @@ public class XML_processing {
@SuppressWarnings("unused")
public static void readXMLSolar(String path, StatisticsNew stats) {
boolean in_word = false;
boolean inPunctuation = false;
String lemma = "";
String msd = "";
@ -274,6 +275,9 @@ public class XML_processing {
// the data anymore
corpus.clear();
}
}
else if(includeThisBlock){
inPunctuation = true;
}
} else if (headTags.contains(qName)) {
String tagContent = eventReader.nextEvent().asCharacters().getData();
@ -291,6 +295,12 @@ public class XML_processing {
if (in_word) {
stavek.add(new Word(characters.getData(), lemma, msd));
in_word = false;
} else if(inPunctuation){
String punctuation = ",";
stavek.get(stavek.size()-1).setWord(stavek.get(stavek.size()-1).getWord() + punctuation);
stavek.get(stavek.size()-1).setLemma(stavek.get(stavek.size()-1).getLemma() + punctuation);
stavek.get(stavek.size()-1).setMsd(stavek.get(stavek.size()-1).getMsd() + punctuation);
inPunctuation = false;
}
break;
@ -472,6 +482,7 @@ public class XML_processing {
@SuppressWarnings("Duplicates")
public static boolean readXMLGigafida(String path, StatisticsNew stats) {
boolean inWord = false;
boolean inPunctuation = false;
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = "";
@ -501,6 +512,11 @@ public class XML_processing {
msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
}
if (qName.equals("c")){
inPunctuation = true;
}
// taxonomy node
else if (qName.equalsIgnoreCase("catRef")) {
// there are some term nodes at the beginning that are of no interest to us
@ -526,6 +542,14 @@ public class XML_processing {
sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
inWord = false;
}
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
//// String punctuation = characters.getData();
// String punctuation = ",";
// sentence.get(sentence.size()-1).setWord(sentence.get(sentence.size()-1).getWord() + punctuation);
// sentence.get(sentence.size()-1).setLemma(sentence.get(sentence.size()-1).getLemma() + punctuation);
// sentence.get(sentence.size()-1).setMsd(sentence.get(sentence.size()-1).getMsd() + punctuation);
// inPunctuation = false;
// }
break;
case XMLStreamConstants.END_ELEMENT:
@ -604,6 +628,7 @@ public class XML_processing {
@SuppressWarnings("Duplicates")
public static boolean readXMLGos(String path, StatisticsNew stats) {
boolean inWord = false;
boolean inPunctuation = false;
boolean inOrthDiv = false;
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
ArrayList<String> currentFiletaxonomy = new ArrayList<>();

0
src/main/java/alg/inflectedJOS/ForkJoin.java Normal file → Executable file
View File

0
src/main/java/alg/inflectedJOS/InflectedJOSCount.java Normal file → Executable file
View File

0
src/main/java/alg/inflectedJOS/WordFormation.java Normal file → Executable file
View File

0
src/main/java/alg/ngram/ForkJoin.java Normal file → Executable file
View File

3
src/main/java/alg/ngram/Ngrams.java Normal file → Executable file
View File

@ -44,6 +44,7 @@ public class Ngrams {
// generate proper MultipleHMKeys depending on filter data
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor());
key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
// String key = "aaaaaaaaaaaaaaaaaaaaaaa";
String lemma = "";
@ -60,6 +61,8 @@ public class Ngrams {
}
}
MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
// UPDATE TAXONOMY HERE!!!

0
src/main/java/alg/word/ForkJoin.java Normal file → Executable file
View File

0
src/main/java/alg/word/WordCount.java Normal file → Executable file
View File

0
src/main/java/alg/word/WordLevel.java Normal file → Executable file
View File

0
src/main/java/data/AnalysisLevel.java Normal file → Executable file
View File

0
src/main/java/data/CalculateFor.java Normal file → Executable file
View File

0
src/main/java/data/Corpus.java Normal file → Executable file
View File

0
src/main/java/data/CorpusType.java Normal file → Executable file
View File

0
src/main/java/data/Enums/InflectedJosTypes.java Normal file → Executable file
View File

0
src/main/java/data/Enums/Msd.java Normal file → Executable file
View File

0
src/main/java/data/Enums/WordLevelDefaultValues.java Normal file → Executable file
View File

0
src/main/java/data/Enums/WordLevelType.java Normal file → Executable file
View File

0
src/main/java/data/Enums/solar/SolarFilters.java Normal file → Executable file
View File

11
src/main/java/data/Filter.java Normal file → Executable file
View File

@ -25,7 +25,8 @@ public class Filter {
MSD,
HAS_MSD,
SOLAR_FILTERS,
MULTIPLE_KEYS
MULTIPLE_KEYS,
NOTE_PUNCTUATIONS
}
public Filter() {
@ -161,4 +162,12 @@ public class Filter {
return new ArrayList<>();
}
}
public void setNotePunctuations(boolean notePunctuations) {
filter.put(NOTE_PUNCTUATIONS, notePunctuations);
}
public boolean getNotePunctuations() {
return filter.containsKey(NOTE_PUNCTUATIONS) && (boolean) filter.get(NOTE_PUNCTUATIONS);
}
}

0
src/main/java/data/GigafidaJosWordType.java Normal file → Executable file
View File

0
src/main/java/data/GigafidaTaxonomy.java Normal file → Executable file
View File

0
src/main/java/data/GosTaxonomy.java Normal file → Executable file
View File

0
src/main/java/data/MultipleHMKeys.java Normal file → Executable file
View File

0
src/main/java/data/Sentence.java Normal file → Executable file
View File

0
src/main/java/data/Settings.java Normal file → Executable file
View File

0
src/main/java/data/Statistics.java Normal file → Executable file
View File

0
src/main/java/data/StatisticsNew.java Normal file → Executable file
View File

0
src/main/java/data/Tax.java Normal file → Executable file
View File

0
src/main/java/data/Taxonomy.java Normal file → Executable file
View File

0
src/main/java/data/Validation.java Normal file → Executable file
View File

4
src/main/java/data/Word.java Normal file → Executable file
View File

@ -134,6 +134,10 @@ public class Word implements Serializable {
return msd;
}
public void setMsd(String msd) {
this.msd = msd;
}
public String toString() {
StringBuilder sb = new StringBuilder();

0
src/main/java/gui/CharacterAnalysisTab.java Normal file → Executable file
View File

0
src/main/java/gui/CorpusTab.java Normal file → Executable file
View File

0
src/main/java/gui/FiltersForSolar.java Normal file → Executable file
View File

0
src/main/java/gui/GUIController.java Normal file → Executable file
View File

2
src/main/java/gui/Messages.java Normal file → Executable file
View File

@ -21,6 +21,7 @@ public class Messages {
public static final String WARNING_NO_SOLAR_FILTERS_FOUND = "Iz korpusnih datotek ni bilo moč razbrati filtrov. Prosim izberite drugo lokacijo ali korpus.";
public static final String ERROR_WHILE_EXECUTING = "Prišlo je do napake med izvajanjem.";
public static final String ERROR_WHILE_SAVING_RESULTS_TO_CSV = "Prišlo je do napake med shranjevanje rezultatov.";
public static final String ERROR_NOT_ENOUGH_MEMORY= "Na voljo imate premalo pomnilnika (RAM-a) za analizo takšne količine podatkov.";
// missing
public static final String MISSING_NGRAM_LEVEL = "N-gram nivo";
@ -52,6 +53,7 @@ public class Messages {
public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.";
public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.";
public static final String TOOLTIP_readNotePunctuationsChB = "Ločila med povedmi se upoštevajo v vsakem primeru.";

0
src/main/java/gui/SelectedFiltersPane.java Normal file → Executable file
View File

View File

@ -62,6 +62,10 @@ public class StringAnalysisTabNew2 {
private ComboBox<String> skipValueCB;
private Integer skipValue;
@FXML
private CheckBox notePunctuationsChB;
private boolean notePunctuations;
@FXML
private Pane paneWords;
@ -135,6 +139,14 @@ public class StringAnalysisTabNew2 {
ngramValueCB.getSelectionModel().select(0); // selected index
ngramValue = 2; // actual value at that index
notePunctuations = true;
// set
notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
notePunctuations = newValue;
logger.info("note punctuations: ", notePunctuations);
});
notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB));
// calculateForCB
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
calculateFor = CalculateFor.factory(newValue);
@ -398,6 +410,7 @@ public class StringAnalysisTabNew2 {
filter.setSkipValue(skipValue);
filter.setIsCvv(calculateCvv);
filter.setSolarFilters(solarFiltersMap);
filter.setNotePunctuations(notePunctuations);
if (ngramValue != null && ngramValue == 0) {
filter.setStringLength(stringLength);
@ -488,6 +501,9 @@ public class StringAnalysisTabNew2 {
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1){
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
logger.error("Out of memory error", e1);
}
ngramProgressBar.progressProperty().unbind();

0
src/main/java/gui/ValidationUtil.java Normal file → Executable file
View File

0
src/main/java/gui/WordFormationTab.java Normal file → Executable file
View File

0
src/main/java/gui/WordLevelTab.java Normal file → Executable file
View File

0
src/main/java/man/META-INF/MANIFEST.MF Normal file → Executable file
View File

0
src/main/java/util/ByteUtils.java Normal file → Executable file
View File

0
src/main/java/util/Combinations.java Normal file → Executable file
View File

5
src/main/java/util/Export.java Normal file → Executable file
View File

@ -13,6 +13,7 @@ import data.Filter;
import data.MultipleHMKeys;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.QuoteMode;
import org.apache.commons.lang3.tuple.Pair;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
@ -167,8 +168,8 @@ public class Export {
OutputStreamWriter fileWriter = null;
CSVPrinter csvFilePrinter = null;
//Create the CSVFormat object with "\n" as a record delimiter
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
//Create the CSVFormat object with "\n" as a record delimiter it puts all words in braces
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';').withQuoteMode(QuoteMode.ALL);
try {
//initialize FileWriter object

0
src/main/java/util/Key.java Normal file → Executable file
View File

0
src/main/java/util/TimeWatch.java Normal file → Executable file
View File

0
src/main/java/util/Util.java Normal file → Executable file
View File

0
src/main/java/util/db/RDB.java Normal file → Executable file
View File

0
src/main/resources/GOS_small/TEI_GOS_small.xml Normal file → Executable file
View File

0
src/main/resources/GOS_tax_test/GOS_tax_test.xml Normal file → Executable file
View File

0
src/main/resources/GUI.fxml Normal file → Executable file
View File

0
src/main/resources/Gigafida_minimal/gfmin.xml Normal file → Executable file
View File

View File

Can't render this file because it has a wrong number of fields in line 11.

View File

Can't render this file because it has a wrong number of fields in line 11.

View File

Can't render this file because it has a wrong number of fields in line 11.

View File

Can't render this file because it has a wrong number of fields in line 9.

View File

Can't render this file because it has a wrong number of fields in line 9.

View File

Can't render this file because it has a wrong number of fields in line 9.

0
src/main/resources/Gigafida_subset/F0012405.xml Normal file → Executable file
View File

0
src/main/resources/Gigafida_subset/F0016316.xml Normal file → Executable file
View File

0
src/main/resources/Gigafida_subset/F0018194.xml Normal file → Executable file
View File

0
src/main/resources/Gigafida_subset/F0026709.xml Normal file → Executable file
View File

0
src/main/resources/Gigafida_subset/F0030361.xml Normal file → Executable file
View File

0
src/main/resources/Gigafida_subset/nested/F0036980.xml Normal file → Executable file
View File

0
src/main/resources/Gigafida_subset/nested/F0037258.xml Normal file → Executable file
View File

0
src/main/resources/Gigafida_subset/nested/F0037544.xml Normal file → Executable file
View File

0
src/main/resources/Gigafida_subset/nested/F0038754.xml Normal file → Executable file
View File

0
src/main/resources/Gigafida_subset/nested/F0038920.xml Normal file → Executable file
View File

0
src/main/resources/Lists/prefixes.txt Normal file → Executable file
View File

0
src/main/resources/Lists/suffixes.txt Normal file → Executable file
View File

0
src/main/resources/gui/CharacterAnalysisTab.fxml Normal file → Executable file
View File

0
src/main/resources/gui/CorpusTab.fxml Normal file → Executable file
View File

0
src/main/resources/gui/FiltersForSolar.fxml Normal file → Executable file
View File

0
src/main/resources/gui/SelectedFiltersPane.fxml Normal file → Executable file
View File

View File

@ -62,19 +62,21 @@
</items>
</ComboBox>
</children>
<children>
<Label layoutX="10.0" layoutY="40.0" prefHeight="25.0" text="Upoštevaj ločila"/>
<CheckBox fx:id="notePunctuationsChB" layoutX="176.0" layoutY="45.0" selected="true"/>
</children>
</Pane>
<!-- MSD and Taxonomy separated -->
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov"/>
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov"/>
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="100.0" layoutY="200.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="100.0" layoutY="240.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0"/>
<!-- samoglasniki/soglasniki -->

0
src/main/resources/gui/WordFormationTab.fxml Normal file → Executable file
View File

0
src/main/resources/gui/WordLevelTab.fxml Normal file → Executable file
View File

0
src/main/resources/log4j2.xml Normal file → Executable file
View File

0
src/test/java/Common.java Normal file → Executable file
View File

0
src/test/java/CorpusTests.java Normal file → Executable file
View File

0
src/test/java/DBTest.java Normal file → Executable file
View File

0
src/test/java/NgramTests.java Normal file → Executable file
View File

0
src/test/java/WordFormationTest.java Normal file → Executable file
View File

0
src/test/java/WordLevelTest.java Normal file → Executable file
View File

0
src/test/java/WordTest.java Normal file → Executable file
View File