Computer formatted

This commit is contained in:
2018-07-23 09:14:46 +02:00
parent 84d0086a66
commit bebc0abbb3
92 changed files with 74 additions and 12 deletions
Regular → Executable
View File
+27 -2
View File
@@ -224,7 +224,8 @@ public class XML_processing {
@SuppressWarnings("unused")
public static void readXMLSolar(String path, StatisticsNew stats) {
boolean in_word = false;
String lemma = "";
boolean inPunctuation = false;
String lemma = "";
String msd = "";
List<Word> stavek = new ArrayList<>();
@@ -275,6 +276,9 @@ public class XML_processing {
corpus.clear();
}
}
else if(includeThisBlock){
inPunctuation = true;
}
} else if (headTags.contains(qName)) {
String tagContent = eventReader.nextEvent().asCharacters().getData();
headBlock.put(qName, tagContent);
@@ -291,7 +295,13 @@ public class XML_processing {
if (in_word) {
stavek.add(new Word(characters.getData(), lemma, msd));
in_word = false;
}
} else if(inPunctuation){
String punctuation = ",";
stavek.get(stavek.size()-1).setWord(stavek.get(stavek.size()-1).getWord() + punctuation);
stavek.get(stavek.size()-1).setLemma(stavek.get(stavek.size()-1).getLemma() + punctuation);
stavek.get(stavek.size()-1).setMsd(stavek.get(stavek.size()-1).getMsd() + punctuation);
inPunctuation = false;
}
break;
case XMLStreamConstants.END_ELEMENT:
@@ -472,6 +482,7 @@ public class XML_processing {
@SuppressWarnings("Duplicates")
public static boolean readXMLGigafida(String path, StatisticsNew stats) {
boolean inWord = false;
boolean inPunctuation = false;
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = "";
@@ -501,6 +512,11 @@ public class XML_processing {
msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
}
if (qName.equals("c")){
inPunctuation = true;
}
// taxonomy node
else if (qName.equalsIgnoreCase("catRef")) {
// there are some term nodes at the beginning that are of no interest to us
@@ -526,6 +542,14 @@ public class XML_processing {
sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
inWord = false;
}
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
//// String punctuation = characters.getData();
// String punctuation = ",";
// sentence.get(sentence.size()-1).setWord(sentence.get(sentence.size()-1).getWord() + punctuation);
// sentence.get(sentence.size()-1).setLemma(sentence.get(sentence.size()-1).getLemma() + punctuation);
// sentence.get(sentence.size()-1).setMsd(sentence.get(sentence.size()-1).getMsd() + punctuation);
// inPunctuation = false;
// }
break;
case XMLStreamConstants.END_ELEMENT:
@@ -604,6 +628,7 @@ public class XML_processing {
@SuppressWarnings("Duplicates")
public static boolean readXMLGos(String path, StatisticsNew stats) {
boolean inWord = false;
boolean inPunctuation = false;
boolean inOrthDiv = false;
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
View File
View File
View File
View File
+3
View File
@@ -44,6 +44,7 @@ public class Ngrams {
// generate proper MultipleHMKeys depending on filter data
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor());
key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
// String key = "aaaaaaaaaaaaaaaaaaaaaaa";
String lemma = "";
@@ -60,6 +61,8 @@ public class Ngrams {
}
}
MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
// UPDATE TAXONOMY HERE!!!
View File
View File
View File
View File
View File
Regular → Executable
View File
View File
View File
View File
View File
View File
View File
Regular → Executable
+10 -1
View File
@@ -25,7 +25,8 @@ public class Filter {
MSD,
HAS_MSD,
SOLAR_FILTERS,
MULTIPLE_KEYS
MULTIPLE_KEYS,
NOTE_PUNCTUATIONS
}
public Filter() {
@@ -161,4 +162,12 @@ public class Filter {
return new ArrayList<>();
}
}
public void setNotePunctuations(boolean notePunctuations) {
filter.put(NOTE_PUNCTUATIONS, notePunctuations);
}
public boolean getNotePunctuations() {
return filter.containsKey(NOTE_PUNCTUATIONS) && (boolean) filter.get(NOTE_PUNCTUATIONS);
}
}
View File
View File
View File
View File
Regular → Executable
View File
Regular → Executable
View File
View File
View File
Regular → Executable
View File
Regular → Executable
View File
View File
Regular → Executable
+4
View File
@@ -134,6 +134,10 @@ public class Word implements Serializable {
return msd;
}
public void setMsd(String msd) {
this.msd = msd;
}
public String toString() {
StringBuilder sb = new StringBuilder();
View File
Regular → Executable
View File
View File
View File
Regular → Executable
+2
View File
@@ -21,6 +21,7 @@ public class Messages {
public static final String WARNING_NO_SOLAR_FILTERS_FOUND = "Iz korpusnih datotek ni bilo moč razbrati filtrov. Prosim izberite drugo lokacijo ali korpus.";
public static final String ERROR_WHILE_EXECUTING = "Prišlo je do napake med izvajanjem.";
public static final String ERROR_WHILE_SAVING_RESULTS_TO_CSV = "Prišlo je do napake med shranjevanje rezultatov.";
public static final String ERROR_NOT_ENOUGH_MEMORY= "Na voljo imate premalo pomnilnika (RAM-a) za analizo takšne količine podatkov.";
// missing
public static final String MISSING_NGRAM_LEVEL = "N-gram nivo";
@@ -52,6 +53,7 @@ public class Messages {
public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.";
public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.";
public static final String TOOLTIP_readNotePunctuationsChB = "Ločila med povedmi se upoštevajo v vsakem primeru.";
View File
@@ -62,6 +62,10 @@ public class StringAnalysisTabNew2 {
private ComboBox<String> skipValueCB;
private Integer skipValue;
@FXML
private CheckBox notePunctuationsChB;
private boolean notePunctuations;
@FXML
private Pane paneWords;
@@ -135,6 +139,14 @@ public class StringAnalysisTabNew2 {
ngramValueCB.getSelectionModel().select(0); // selected index
ngramValue = 2; // actual value at that index
notePunctuations = true;
// set
notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
notePunctuations = newValue;
logger.info("note punctuations: ", notePunctuations);
});
notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB));
// calculateForCB
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
calculateFor = CalculateFor.factory(newValue);
@@ -398,6 +410,7 @@ public class StringAnalysisTabNew2 {
filter.setSkipValue(skipValue);
filter.setIsCvv(calculateCvv);
filter.setSolarFilters(solarFiltersMap);
filter.setNotePunctuations(notePunctuations);
if (ngramValue != null && ngramValue == 0) {
filter.setStringLength(stringLength);
@@ -488,6 +501,9 @@ public class StringAnalysisTabNew2 {
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1);
} catch (OutOfMemoryError e1){
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
logger.error("Out of memory error", e1);
}
ngramProgressBar.progressProperty().unbind();
View File
View File
View File
View File
View File
View File
Regular → Executable
+3 -2
View File
@@ -13,6 +13,7 @@ import data.Filter;
import data.MultipleHMKeys;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.QuoteMode;
import org.apache.commons.lang3.tuple.Pair;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
@@ -167,8 +168,8 @@ public class Export {
OutputStreamWriter fileWriter = null;
CSVPrinter csvFilePrinter = null;
//Create the CSVFormat object with "\n" as a record delimiter
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
//Create the CSVFormat object with "\n" as a record delimiter it puts all words in braces
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';').withQuoteMode(QuoteMode.ALL);
try {
//initialize FileWriter object
Regular → Executable
View File
View File
Regular → Executable
View File
Regular → Executable
View File
View File
View File
Regular → Executable
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
@@ -62,19 +62,21 @@
</items>
</ComboBox>
</children>
<children>
<Label layoutX="10.0" layoutY="40.0" prefHeight="25.0" text="Upoštevaj ločila"/>
<CheckBox fx:id="notePunctuationsChB" layoutX="176.0" layoutY="45.0" selected="true"/>
</children>
</Pane>
<!-- MSD and Taxonomy separated -->
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov"/>
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov"/>
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="100.0" layoutY="200.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="100.0" layoutY="240.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0"/>
<!-- samoglasniki/soglasniki -->
View File
View File
Regular → Executable
View File
Regular → Executable
View File
Regular → Executable
View File
Regular → Executable
View File
Regular → Executable
View File
View File
Regular → Executable
View File
Regular → Executable
View File