Computer formatted
This commit is contained in:
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
+27
-2
@@ -224,7 +224,8 @@ public class XML_processing {
|
|||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
public static void readXMLSolar(String path, StatisticsNew stats) {
|
public static void readXMLSolar(String path, StatisticsNew stats) {
|
||||||
boolean in_word = false;
|
boolean in_word = false;
|
||||||
String lemma = "";
|
boolean inPunctuation = false;
|
||||||
|
String lemma = "";
|
||||||
String msd = "";
|
String msd = "";
|
||||||
|
|
||||||
List<Word> stavek = new ArrayList<>();
|
List<Word> stavek = new ArrayList<>();
|
||||||
@@ -275,6 +276,9 @@ public class XML_processing {
|
|||||||
corpus.clear();
|
corpus.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if(includeThisBlock){
|
||||||
|
inPunctuation = true;
|
||||||
|
}
|
||||||
} else if (headTags.contains(qName)) {
|
} else if (headTags.contains(qName)) {
|
||||||
String tagContent = eventReader.nextEvent().asCharacters().getData();
|
String tagContent = eventReader.nextEvent().asCharacters().getData();
|
||||||
headBlock.put(qName, tagContent);
|
headBlock.put(qName, tagContent);
|
||||||
@@ -291,7 +295,13 @@ public class XML_processing {
|
|||||||
if (in_word) {
|
if (in_word) {
|
||||||
stavek.add(new Word(characters.getData(), lemma, msd));
|
stavek.add(new Word(characters.getData(), lemma, msd));
|
||||||
in_word = false;
|
in_word = false;
|
||||||
}
|
} else if(inPunctuation){
|
||||||
|
String punctuation = ",";
|
||||||
|
stavek.get(stavek.size()-1).setWord(stavek.get(stavek.size()-1).getWord() + punctuation);
|
||||||
|
stavek.get(stavek.size()-1).setLemma(stavek.get(stavek.size()-1).getLemma() + punctuation);
|
||||||
|
stavek.get(stavek.size()-1).setMsd(stavek.get(stavek.size()-1).getMsd() + punctuation);
|
||||||
|
inPunctuation = false;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case XMLStreamConstants.END_ELEMENT:
|
case XMLStreamConstants.END_ELEMENT:
|
||||||
@@ -472,6 +482,7 @@ public class XML_processing {
|
|||||||
@SuppressWarnings("Duplicates")
|
@SuppressWarnings("Duplicates")
|
||||||
public static boolean readXMLGigafida(String path, StatisticsNew stats) {
|
public static boolean readXMLGigafida(String path, StatisticsNew stats) {
|
||||||
boolean inWord = false;
|
boolean inWord = false;
|
||||||
|
boolean inPunctuation = false;
|
||||||
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
||||||
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
||||||
String lemma = "";
|
String lemma = "";
|
||||||
@@ -501,6 +512,11 @@ public class XML_processing {
|
|||||||
msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
|
msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
|
||||||
lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
|
lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (qName.equals("c")){
|
||||||
|
inPunctuation = true;
|
||||||
|
}
|
||||||
|
|
||||||
// taxonomy node
|
// taxonomy node
|
||||||
else if (qName.equalsIgnoreCase("catRef")) {
|
else if (qName.equalsIgnoreCase("catRef")) {
|
||||||
// there are some term nodes at the beginning that are of no interest to us
|
// there are some term nodes at the beginning that are of no interest to us
|
||||||
@@ -526,6 +542,14 @@ public class XML_processing {
|
|||||||
sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
|
sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
|
||||||
inWord = false;
|
inWord = false;
|
||||||
}
|
}
|
||||||
|
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
||||||
|
//// String punctuation = characters.getData();
|
||||||
|
// String punctuation = ",";
|
||||||
|
// sentence.get(sentence.size()-1).setWord(sentence.get(sentence.size()-1).getWord() + punctuation);
|
||||||
|
// sentence.get(sentence.size()-1).setLemma(sentence.get(sentence.size()-1).getLemma() + punctuation);
|
||||||
|
// sentence.get(sentence.size()-1).setMsd(sentence.get(sentence.size()-1).getMsd() + punctuation);
|
||||||
|
// inPunctuation = false;
|
||||||
|
// }
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case XMLStreamConstants.END_ELEMENT:
|
case XMLStreamConstants.END_ELEMENT:
|
||||||
@@ -604,6 +628,7 @@ public class XML_processing {
|
|||||||
@SuppressWarnings("Duplicates")
|
@SuppressWarnings("Duplicates")
|
||||||
public static boolean readXMLGos(String path, StatisticsNew stats) {
|
public static boolean readXMLGos(String path, StatisticsNew stats) {
|
||||||
boolean inWord = false;
|
boolean inWord = false;
|
||||||
|
boolean inPunctuation = false;
|
||||||
boolean inOrthDiv = false;
|
boolean inOrthDiv = false;
|
||||||
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
|
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
|
||||||
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
||||||
|
|||||||
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
+3
@@ -44,6 +44,7 @@ public class Ngrams {
|
|||||||
|
|
||||||
// generate proper MultipleHMKeys depending on filter data
|
// generate proper MultipleHMKeys depending on filter data
|
||||||
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor());
|
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor());
|
||||||
|
key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||||
// String key = "aaaaaaaaaaaaaaaaaaaaaaa";
|
// String key = "aaaaaaaaaaaaaaaaaaaaaaa";
|
||||||
|
|
||||||
String lemma = "";
|
String lemma = "";
|
||||||
@@ -60,6 +61,8 @@ public class Ngrams {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
|
MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
|
||||||
|
|
||||||
// UPDATE TAXONOMY HERE!!!
|
// UPDATE TAXONOMY HERE!!!
|
||||||
|
|||||||
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
+10
-1
@@ -25,7 +25,8 @@ public class Filter {
|
|||||||
MSD,
|
MSD,
|
||||||
HAS_MSD,
|
HAS_MSD,
|
||||||
SOLAR_FILTERS,
|
SOLAR_FILTERS,
|
||||||
MULTIPLE_KEYS
|
MULTIPLE_KEYS,
|
||||||
|
NOTE_PUNCTUATIONS
|
||||||
}
|
}
|
||||||
|
|
||||||
public Filter() {
|
public Filter() {
|
||||||
@@ -161,4 +162,12 @@ public class Filter {
|
|||||||
return new ArrayList<>();
|
return new ArrayList<>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setNotePunctuations(boolean notePunctuations) {
|
||||||
|
filter.put(NOTE_PUNCTUATIONS, notePunctuations);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean getNotePunctuations() {
|
||||||
|
return filter.containsKey(NOTE_PUNCTUATIONS) && (boolean) filter.get(NOTE_PUNCTUATIONS);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
+4
@@ -134,6 +134,10 @@ public class Word implements Serializable {
|
|||||||
return msd;
|
return msd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setMsd(String msd) {
|
||||||
|
this.msd = msd;
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
|
|
||||||
|
|||||||
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
+2
@@ -21,6 +21,7 @@ public class Messages {
|
|||||||
public static final String WARNING_NO_SOLAR_FILTERS_FOUND = "Iz korpusnih datotek ni bilo moč razbrati filtrov. Prosim izberite drugo lokacijo ali korpus.";
|
public static final String WARNING_NO_SOLAR_FILTERS_FOUND = "Iz korpusnih datotek ni bilo moč razbrati filtrov. Prosim izberite drugo lokacijo ali korpus.";
|
||||||
public static final String ERROR_WHILE_EXECUTING = "Prišlo je do napake med izvajanjem.";
|
public static final String ERROR_WHILE_EXECUTING = "Prišlo je do napake med izvajanjem.";
|
||||||
public static final String ERROR_WHILE_SAVING_RESULTS_TO_CSV = "Prišlo je do napake med shranjevanje rezultatov.";
|
public static final String ERROR_WHILE_SAVING_RESULTS_TO_CSV = "Prišlo je do napake med shranjevanje rezultatov.";
|
||||||
|
public static final String ERROR_NOT_ENOUGH_MEMORY= "Na voljo imate premalo pomnilnika (RAM-a) za analizo takšne količine podatkov.";
|
||||||
|
|
||||||
// missing
|
// missing
|
||||||
public static final String MISSING_NGRAM_LEVEL = "N-gram nivo";
|
public static final String MISSING_NGRAM_LEVEL = "N-gram nivo";
|
||||||
@@ -52,6 +53,7 @@ public class Messages {
|
|||||||
|
|
||||||
public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.";
|
public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.";
|
||||||
public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.";
|
public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.";
|
||||||
|
public static final String TOOLTIP_readNotePunctuationsChB = "Ločila med povedmi se upoštevajo v vsakem primeru.";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Regular → Executable
@@ -62,6 +62,10 @@ public class StringAnalysisTabNew2 {
|
|||||||
private ComboBox<String> skipValueCB;
|
private ComboBox<String> skipValueCB;
|
||||||
private Integer skipValue;
|
private Integer skipValue;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private CheckBox notePunctuationsChB;
|
||||||
|
private boolean notePunctuations;
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private Pane paneWords;
|
private Pane paneWords;
|
||||||
|
|
||||||
@@ -135,6 +139,14 @@ public class StringAnalysisTabNew2 {
|
|||||||
ngramValueCB.getSelectionModel().select(0); // selected index
|
ngramValueCB.getSelectionModel().select(0); // selected index
|
||||||
ngramValue = 2; // actual value at that index
|
ngramValue = 2; // actual value at that index
|
||||||
|
|
||||||
|
notePunctuations = true;
|
||||||
|
// set
|
||||||
|
notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
|
notePunctuations = newValue;
|
||||||
|
logger.info("note punctuations: ", notePunctuations);
|
||||||
|
});
|
||||||
|
notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB));
|
||||||
|
|
||||||
// calculateForCB
|
// calculateForCB
|
||||||
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
calculateFor = CalculateFor.factory(newValue);
|
calculateFor = CalculateFor.factory(newValue);
|
||||||
@@ -398,6 +410,7 @@ public class StringAnalysisTabNew2 {
|
|||||||
filter.setSkipValue(skipValue);
|
filter.setSkipValue(skipValue);
|
||||||
filter.setIsCvv(calculateCvv);
|
filter.setIsCvv(calculateCvv);
|
||||||
filter.setSolarFilters(solarFiltersMap);
|
filter.setSolarFilters(solarFiltersMap);
|
||||||
|
filter.setNotePunctuations(notePunctuations);
|
||||||
|
|
||||||
if (ngramValue != null && ngramValue == 0) {
|
if (ngramValue != null && ngramValue == 0) {
|
||||||
filter.setStringLength(stringLength);
|
filter.setStringLength(stringLength);
|
||||||
@@ -488,6 +501,9 @@ public class StringAnalysisTabNew2 {
|
|||||||
} catch (UnsupportedEncodingException e1) {
|
} catch (UnsupportedEncodingException e1) {
|
||||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||||
logger.error("Error while saving", e1);
|
logger.error("Error while saving", e1);
|
||||||
|
} catch (OutOfMemoryError e1){
|
||||||
|
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
||||||
|
logger.error("Out of memory error", e1);
|
||||||
}
|
}
|
||||||
|
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
|
|||||||
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
+3
-2
@@ -13,6 +13,7 @@ import data.Filter;
|
|||||||
import data.MultipleHMKeys;
|
import data.MultipleHMKeys;
|
||||||
import org.apache.commons.csv.CSVFormat;
|
import org.apache.commons.csv.CSVFormat;
|
||||||
import org.apache.commons.csv.CSVPrinter;
|
import org.apache.commons.csv.CSVPrinter;
|
||||||
|
import org.apache.commons.csv.QuoteMode;
|
||||||
import org.apache.commons.lang3.tuple.Pair;
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
import org.json.simple.JSONArray;
|
import org.json.simple.JSONArray;
|
||||||
import org.json.simple.JSONObject;
|
import org.json.simple.JSONObject;
|
||||||
@@ -167,8 +168,8 @@ public class Export {
|
|||||||
OutputStreamWriter fileWriter = null;
|
OutputStreamWriter fileWriter = null;
|
||||||
CSVPrinter csvFilePrinter = null;
|
CSVPrinter csvFilePrinter = null;
|
||||||
|
|
||||||
//Create the CSVFormat object with "\n" as a record delimiter
|
//Create the CSVFormat object with "\n" as a record delimiter it puts all words in braces
|
||||||
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
|
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';').withQuoteMode(QuoteMode.ALL);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
//initialize FileWriter object
|
//initialize FileWriter object
|
||||||
|
|||||||
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
@@ -62,19 +62,21 @@
|
|||||||
</items>
|
</items>
|
||||||
</ComboBox>
|
</ComboBox>
|
||||||
</children>
|
</children>
|
||||||
|
<children>
|
||||||
|
<Label layoutX="10.0" layoutY="40.0" prefHeight="25.0" text="Upoštevaj ločila"/>
|
||||||
|
<CheckBox fx:id="notePunctuationsChB" layoutX="176.0" layoutY="45.0" selected="true"/>
|
||||||
|
</children>
|
||||||
</Pane>
|
</Pane>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<!-- MSD and Taxonomy separated -->
|
<!-- MSD and Taxonomy separated -->
|
||||||
|
|
||||||
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov"/>
|
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov"/>
|
||||||
|
|
||||||
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/>
|
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD"/>
|
||||||
<TextField fx:id="msdTF" layoutX="100.0" layoutY="200.0" prefWidth="180.0"/>
|
<TextField fx:id="msdTF" layoutX="100.0" layoutY="240.0" prefWidth="180.0"/>
|
||||||
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/>
|
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija"/>
|
||||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/>
|
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||||
|
|
||||||
|
|
||||||
<!-- samoglasniki/soglasniki -->
|
<!-- samoglasniki/soglasniki -->
|
||||||
|
|||||||
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Regular → Executable
Reference in New Issue
Block a user