Computer formatted

This commit is contained in:
2018-07-23 09:14:46 +02:00
parent 84d0086a66
commit bebc0abbb3
92 changed files with 74 additions and 12 deletions
Regular → Executable
View File
Regular → Executable
View File
Regular → Executable
View File
Regular → Executable
View File
+27 -2
View File
@@ -224,7 +224,8 @@ public class XML_processing {
@SuppressWarnings("unused") @SuppressWarnings("unused")
public static void readXMLSolar(String path, StatisticsNew stats) { public static void readXMLSolar(String path, StatisticsNew stats) {
boolean in_word = false; boolean in_word = false;
String lemma = ""; boolean inPunctuation = false;
String lemma = "";
String msd = ""; String msd = "";
List<Word> stavek = new ArrayList<>(); List<Word> stavek = new ArrayList<>();
@@ -275,6 +276,9 @@ public class XML_processing {
corpus.clear(); corpus.clear();
} }
} }
else if(includeThisBlock){
inPunctuation = true;
}
} else if (headTags.contains(qName)) { } else if (headTags.contains(qName)) {
String tagContent = eventReader.nextEvent().asCharacters().getData(); String tagContent = eventReader.nextEvent().asCharacters().getData();
headBlock.put(qName, tagContent); headBlock.put(qName, tagContent);
@@ -291,7 +295,13 @@ public class XML_processing {
if (in_word) { if (in_word) {
stavek.add(new Word(characters.getData(), lemma, msd)); stavek.add(new Word(characters.getData(), lemma, msd));
in_word = false; in_word = false;
} } else if(inPunctuation){
String punctuation = ",";
stavek.get(stavek.size()-1).setWord(stavek.get(stavek.size()-1).getWord() + punctuation);
stavek.get(stavek.size()-1).setLemma(stavek.get(stavek.size()-1).getLemma() + punctuation);
stavek.get(stavek.size()-1).setMsd(stavek.get(stavek.size()-1).getMsd() + punctuation);
inPunctuation = false;
}
break; break;
case XMLStreamConstants.END_ELEMENT: case XMLStreamConstants.END_ELEMENT:
@@ -472,6 +482,7 @@ public class XML_processing {
@SuppressWarnings("Duplicates") @SuppressWarnings("Duplicates")
public static boolean readXMLGigafida(String path, StatisticsNew stats) { public static boolean readXMLGigafida(String path, StatisticsNew stats) {
boolean inWord = false; boolean inWord = false;
boolean inPunctuation = false;
ArrayList<String> currentFiletaxonomy = new ArrayList<>(); ArrayList<String> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>(); ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = ""; String lemma = "";
@@ -501,6 +512,11 @@ public class XML_processing {
msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue()); msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue()); lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
} }
if (qName.equals("c")){
inPunctuation = true;
}
// taxonomy node // taxonomy node
else if (qName.equalsIgnoreCase("catRef")) { else if (qName.equalsIgnoreCase("catRef")) {
// there are some term nodes at the beginning that are of no interest to us // there are some term nodes at the beginning that are of no interest to us
@@ -526,6 +542,14 @@ public class XML_processing {
sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong)); sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
inWord = false; inWord = false;
} }
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
//// String punctuation = characters.getData();
// String punctuation = ",";
// sentence.get(sentence.size()-1).setWord(sentence.get(sentence.size()-1).getWord() + punctuation);
// sentence.get(sentence.size()-1).setLemma(sentence.get(sentence.size()-1).getLemma() + punctuation);
// sentence.get(sentence.size()-1).setMsd(sentence.get(sentence.size()-1).getMsd() + punctuation);
// inPunctuation = false;
// }
break; break;
case XMLStreamConstants.END_ELEMENT: case XMLStreamConstants.END_ELEMENT:
@@ -604,6 +628,7 @@ public class XML_processing {
@SuppressWarnings("Duplicates") @SuppressWarnings("Duplicates")
public static boolean readXMLGos(String path, StatisticsNew stats) { public static boolean readXMLGos(String path, StatisticsNew stats) {
boolean inWord = false; boolean inWord = false;
boolean inPunctuation = false;
boolean inOrthDiv = false; boolean inOrthDiv = false;
boolean computeForOrth = stats.getCorpus().isGosOrthMode(); boolean computeForOrth = stats.getCorpus().isGosOrthMode();
ArrayList<String> currentFiletaxonomy = new ArrayList<>(); ArrayList<String> currentFiletaxonomy = new ArrayList<>();
View File
View File
View File
View File
+3
View File
@@ -44,6 +44,7 @@ public class Ngrams {
// generate proper MultipleHMKeys depending on filter data // generate proper MultipleHMKeys depending on filter data
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor()); String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor());
key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
// String key = "aaaaaaaaaaaaaaaaaaaaaaa"; // String key = "aaaaaaaaaaaaaaaaaaaaaaa";
String lemma = ""; String lemma = "";
@@ -60,6 +61,8 @@ public class Ngrams {
} }
} }
MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd); MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
// UPDATE TAXONOMY HERE!!! // UPDATE TAXONOMY HERE!!!
View File
View File
View File
View File
View File
Regular → Executable
View File
View File
View File
View File
View File
View File
View File
Regular → Executable
+10 -1
View File
@@ -25,7 +25,8 @@ public class Filter {
MSD, MSD,
HAS_MSD, HAS_MSD,
SOLAR_FILTERS, SOLAR_FILTERS,
MULTIPLE_KEYS MULTIPLE_KEYS,
NOTE_PUNCTUATIONS
} }
public Filter() { public Filter() {
@@ -161,4 +162,12 @@ public class Filter {
return new ArrayList<>(); return new ArrayList<>();
} }
} }
public void setNotePunctuations(boolean notePunctuations) {
filter.put(NOTE_PUNCTUATIONS, notePunctuations);
}
public boolean getNotePunctuations() {
return filter.containsKey(NOTE_PUNCTUATIONS) && (boolean) filter.get(NOTE_PUNCTUATIONS);
}
} }
View File
View File
View File
View File
Regular → Executable
View File
Regular → Executable
View File
View File
View File
Regular → Executable
View File
Regular → Executable
View File
View File
Regular → Executable
+4
View File
@@ -134,6 +134,10 @@ public class Word implements Serializable {
return msd; return msd;
} }
public void setMsd(String msd) {
this.msd = msd;
}
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
View File
Regular → Executable
View File
View File
View File
Regular → Executable
+2
View File
@@ -21,6 +21,7 @@ public class Messages {
public static final String WARNING_NO_SOLAR_FILTERS_FOUND = "Iz korpusnih datotek ni bilo moč razbrati filtrov. Prosim izberite drugo lokacijo ali korpus."; public static final String WARNING_NO_SOLAR_FILTERS_FOUND = "Iz korpusnih datotek ni bilo moč razbrati filtrov. Prosim izberite drugo lokacijo ali korpus.";
public static final String ERROR_WHILE_EXECUTING = "Prišlo je do napake med izvajanjem."; public static final String ERROR_WHILE_EXECUTING = "Prišlo je do napake med izvajanjem.";
public static final String ERROR_WHILE_SAVING_RESULTS_TO_CSV = "Prišlo je do napake med shranjevanje rezultatov."; public static final String ERROR_WHILE_SAVING_RESULTS_TO_CSV = "Prišlo je do napake med shranjevanje rezultatov.";
public static final String ERROR_NOT_ENOUGH_MEMORY= "Na voljo imate premalo pomnilnika (RAM-a) za analizo takšne količine podatkov.";
// missing // missing
public static final String MISSING_NGRAM_LEVEL = "N-gram nivo"; public static final String MISSING_NGRAM_LEVEL = "N-gram nivo";
@@ -52,6 +53,7 @@ public class Messages {
public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa."; public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.";
public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki."; public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.";
public static final String TOOLTIP_readNotePunctuationsChB = "Ločila med povedmi se upoštevajo v vsakem primeru.";
View File
@@ -62,6 +62,10 @@ public class StringAnalysisTabNew2 {
private ComboBox<String> skipValueCB; private ComboBox<String> skipValueCB;
private Integer skipValue; private Integer skipValue;
@FXML
private CheckBox notePunctuationsChB;
private boolean notePunctuations;
@FXML @FXML
private Pane paneWords; private Pane paneWords;
@@ -135,6 +139,14 @@ public class StringAnalysisTabNew2 {
ngramValueCB.getSelectionModel().select(0); // selected index ngramValueCB.getSelectionModel().select(0); // selected index
ngramValue = 2; // actual value at that index ngramValue = 2; // actual value at that index
notePunctuations = true;
// set
notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
notePunctuations = newValue;
logger.info("note punctuations: ", notePunctuations);
});
notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB));
// calculateForCB // calculateForCB
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> { calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
calculateFor = CalculateFor.factory(newValue); calculateFor = CalculateFor.factory(newValue);
@@ -398,6 +410,7 @@ public class StringAnalysisTabNew2 {
filter.setSkipValue(skipValue); filter.setSkipValue(skipValue);
filter.setIsCvv(calculateCvv); filter.setIsCvv(calculateCvv);
filter.setSolarFilters(solarFiltersMap); filter.setSolarFilters(solarFiltersMap);
filter.setNotePunctuations(notePunctuations);
if (ngramValue != null && ngramValue == 0) { if (ngramValue != null && ngramValue == 0) {
filter.setStringLength(stringLength); filter.setStringLength(stringLength);
@@ -488,6 +501,9 @@ public class StringAnalysisTabNew2 {
} catch (UnsupportedEncodingException e1) { } catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV); showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1); logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1){
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
logger.error("Out of memory error", e1);
} }
ngramProgressBar.progressProperty().unbind(); ngramProgressBar.progressProperty().unbind();
View File
View File
View File
View File
View File
View File
Regular → Executable
+3 -2
View File
@@ -13,6 +13,7 @@ import data.Filter;
import data.MultipleHMKeys; import data.MultipleHMKeys;
import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter; import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.QuoteMode;
import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.tuple.Pair;
import org.json.simple.JSONArray; import org.json.simple.JSONArray;
import org.json.simple.JSONObject; import org.json.simple.JSONObject;
@@ -167,8 +168,8 @@ public class Export {
OutputStreamWriter fileWriter = null; OutputStreamWriter fileWriter = null;
CSVPrinter csvFilePrinter = null; CSVPrinter csvFilePrinter = null;
//Create the CSVFormat object with "\n" as a record delimiter //Create the CSVFormat object with "\n" as a record delimiter it puts all words in braces
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';'); CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';').withQuoteMode(QuoteMode.ALL);
try { try {
//initialize FileWriter object //initialize FileWriter object
Regular → Executable
View File
View File
Regular → Executable
View File
Regular → Executable
View File
View File
View File
Regular → Executable
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
@@ -62,19 +62,21 @@
</items> </items>
</ComboBox> </ComboBox>
</children> </children>
<children>
<Label layoutX="10.0" layoutY="40.0" prefHeight="25.0" text="Upoštevaj ločila"/>
<CheckBox fx:id="notePunctuationsChB" layoutX="176.0" layoutY="45.0" selected="true"/>
</children>
</Pane> </Pane>
<!-- MSD and Taxonomy separated --> <!-- MSD and Taxonomy separated -->
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov"/> <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov"/>
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/> <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="100.0" layoutY="200.0" prefWidth="180.0"/> <TextField fx:id="msdTF" layoutX="100.0" layoutY="240.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/> <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/> <CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0"/>
<!-- samoglasniki/soglasniki --> <!-- samoglasniki/soglasniki -->
View File
View File
Regular → Executable
View File
Regular → Executable
View File
Regular → Executable
View File
Regular → Executable
View File
Regular → Executable
View File
View File
Regular → Executable
View File
Regular → Executable
View File