Reimplementation of other signs (,/*() etc.) in ngrams.

master
Luka 6 years ago
parent a8d147de52
commit 1c00f1a283

@ -260,6 +260,12 @@ public class XML_processing {
} else if (qName.equals("c3")) {
String c3Content = eventReader.nextEvent().asCharacters().getData();
if(stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() &&
stavek.size() > 0){
stavek.add(new Word(c3Content, c3Content, "/"));
}
if (c3Content.equals(".") && includeThisBlock) {
// add sentence to corpus
corpus.add(new Sentence(stavek, null));
@ -277,9 +283,6 @@ public class XML_processing {
corpus.clear();
}
}
else if(includeThisBlock){
inPunctuation = true;
}
} else if (headTags.contains(qName)) {
String tagContent = eventReader.nextEvent().asCharacters().getData();
headBlock.put(qName, tagContent);
@ -296,17 +299,7 @@ public class XML_processing {
if (in_word) {
stavek.add(new Word(characters.getData(), lemma, msd));
in_word = false;
} else if(inPunctuation){
String punctuation = ",";
if (stavek.size() > 0){
stavek.get(stavek.size()-1).setWord(stavek.get(stavek.size()-1).getWord() + punctuation);
stavek.get(stavek.size()-1).setLemma(stavek.get(stavek.size()-1).getLemma() + punctuation);
stavek.get(stavek.size()-1).setMsd(stavek.get(stavek.size()-1).getMsd() + punctuation);
}
inPunctuation = false;
}
}
break;
case XMLStreamConstants.END_ELEMENT:
@ -548,13 +541,16 @@ public class XML_processing {
inWord = false;
}
if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
// String punctuation = characters.getData();
String punctuation = ",";
sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
inPunctuation = false;
String punctuation = characters.getData();
sentence.add(new Word(punctuation, punctuation, "/"));
inPunctuation = false;
// String punctuation = ",";
//
// sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
// sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
// sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
// inPunctuation = false;
}
break;

@ -56,8 +56,8 @@ public class Ngrams {
// String test = key;
// }
if (stats.getFilter().getNotePunctuations())
key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
// if (stats.getFilter().getNotePunctuations())
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
MultipleHMKeys multipleKeys;
@ -68,28 +68,28 @@ public class Ngrams {
break;
case 1:
String k1_2 = wordToString(ngramCandidate, otherKeys.get(0));
if (stats.getFilter().getNotePunctuations())
k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
// if (stats.getFilter().getNotePunctuations())
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
multipleKeys = new MultipleHMKeys2(key, k1_2);
break;
case 2:
String k2_2 = wordToString(ngramCandidate, otherKeys.get(0));
String k2_3 = wordToString(ngramCandidate, otherKeys.get(1));
if (stats.getFilter().getNotePunctuations()) {
k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
}
// if (stats.getFilter().getNotePunctuations()) {
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
// }
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
break;
case 3:
String k3_2 = wordToString(ngramCandidate, otherKeys.get(0));
String k3_3 = wordToString(ngramCandidate, otherKeys.get(1));
String k3_4 = wordToString(ngramCandidate, otherKeys.get(2));
if (stats.getFilter().getNotePunctuations()) {
k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
}
// if (stats.getFilter().getNotePunctuations()) {
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
// }
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
break;
case 4:
@ -97,12 +97,12 @@ public class Ngrams {
String k4_3 = wordToString(ngramCandidate, otherKeys.get(1));
String k4_4 = wordToString(ngramCandidate, otherKeys.get(2));
String k4_5 = wordToString(ngramCandidate, otherKeys.get(3));
if (stats.getFilter().getNotePunctuations()) {
k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
}
// if (stats.getFilter().getNotePunctuations()) {
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
// }
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
break;
default:
@ -241,22 +241,22 @@ public class Ngrams {
*/
private static Word checkAndModifySkipgramPunctuation(List<Word> sentence, int i, int j, StatisticsNew stats){
// if punctuation checkbox selected and there words at indexes i and j are not next to each other
if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){
boolean middleWordsHavePunctuation = false;
for (int n = i + 1; n < j; n++){
if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){
middleWordsHavePunctuation = true;
break;
}
}
if (middleWordsHavePunctuation){
String punctuation = ",";
return new Word(sentence.get(i).getWord() + punctuation,
sentence.get(i).getLemma() + punctuation,
sentence.get(i).getMsd() + punctuation);
}
}
// if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){
// boolean middleWordsHavePunctuation = false;
// for (int n = i + 1; n < j; n++){
// if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){
// middleWordsHavePunctuation = true;
// break;
// }
// }
// if (middleWordsHavePunctuation){
//
// String punctuation = ",";
// return new Word(sentence.get(i).getWord() + punctuation,
// sentence.get(i).getLemma() + punctuation,
// sentence.get(i).getMsd() + punctuation);
// }
// }
return sentence.get(i);
}
@ -348,8 +348,8 @@ public class Ngrams {
// String test = key;
// }
if (stats.getFilter().getNotePunctuations())
key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
// if (stats.getFilter().getNotePunctuations())
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
MultipleHMKeys multipleKeys;
@ -360,28 +360,28 @@ public class Ngrams {
break;
case 1:
String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0));
if (stats.getFilter().getNotePunctuations())
k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
// if (stats.getFilter().getNotePunctuations())
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
multipleKeys = new MultipleHMKeys2(key, k1_2);
break;
case 2:
String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0));
String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1));
if (stats.getFilter().getNotePunctuations()) {
k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
}
// if (stats.getFilter().getNotePunctuations()) {
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
// }
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
break;
case 3:
String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0));
String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1));
String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2));
if (stats.getFilter().getNotePunctuations()) {
k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
}
// if (stats.getFilter().getNotePunctuations()) {
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
// }
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
break;
case 4:
@ -389,12 +389,12 @@ public class Ngrams {
String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1));
String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2));
String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3));
if (stats.getFilter().getNotePunctuations()) {
k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
}
// if (stats.getFilter().getNotePunctuations()) {
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
// }
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
break;
default:

@ -45,6 +45,29 @@ public enum CalculateFor {
return null;
}
public String toMetadataString() {
switch(this){
case WORD:
return "Skupna vsota vseh različnic:";
case NORMALIZED_WORD:
return "Skupna vsota vseh normaliziranih različnic:";
case LEMMA:
return "Skupna vsota vseh lem:";
case MORPHOSYNTACTIC_SPECS:
return "Skupna vsota vseh oblikoskladenjskih oznak:";
case MORPHOSYNTACTIC_PROPERTY:
return "Skupna vsota vseh oblikoskladenjskih lastnosti:";
case WORD_TYPE:
return "Skupna vsota vseh besednih vrst:";
case DIST_WORDS:
return "Skupna vsota vseh različnic:";
case DIST_LEMMAS:
return "Skupna vsota vseh lem:";
default:
return null;
}
}
public String toHeaderString() {
switch(this){
case WORD:

@ -25,6 +25,7 @@ public class Filter {
DISPLAY_TAXONOMY,
MSD,
HAS_MSD,
WRITE_MSD_AT_THE_END,
SOLAR_FILTERS,
MULTIPLE_KEYS,
NOTE_PUNCTUATIONS,
@ -34,6 +35,7 @@ public class Filter {
public Filter() {
filter = new HashMap<>();
filter.put(WRITE_MSD_AT_THE_END, false);
}
public Filter(AnalysisLevel al, CalculateFor cf) {
@ -41,6 +43,7 @@ public class Filter {
filter.put(ANALYSIS_LEVEL, al);
filter.put(CALCULATE_FOR, cf);
filter.put(WRITE_MSD_AT_THE_END, false);
}
public void setAl(AnalysisLevel al) {
@ -124,6 +127,14 @@ public class Filter {
return (ArrayList<Pattern>) filter.get(MSD);
}
public void setWriteMsdAtTheEnd(boolean writeMsdAtTheEnd) {
filter.put(WRITE_MSD_AT_THE_END, writeMsdAtTheEnd);
}
public boolean getWriteMsdAtTheEnd() {
return (boolean) filter.get(WRITE_MSD_AT_THE_END);
}
public void setHasMsd(boolean hasMsd) {
filter.put(HAS_MSD, hasMsd);
}

@ -8,7 +8,6 @@ import javafx.collections.ObservableList;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.control.*;
import javafx.scene.layout.Pane;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -17,6 +16,7 @@ import org.controlsfx.control.CheckComboBox;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Pattern;
import static alg.XML_processing.readXML;
@ -49,6 +49,10 @@ public class OneWordAnalysisTab {
private CheckBox displayTaxonomyChB;
private boolean displayTaxonomy;
@FXML
private CheckBox writeMsdAtTheEndChB;
private boolean writeMsdAtTheEnd;
@FXML
private ComboBox<String> calculateForCB;
private CalculateFor calculateFor;
@ -96,6 +100,7 @@ public class OneWordAnalysisTab {
private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
// TODO: pass observables for taxonomy based on header scan
@ -107,6 +112,8 @@ public class OneWordAnalysisTab {
currentMode = MODE.WORD;
toggleMode(currentMode);
AtomicBoolean writeMsdAtTheEndEnableCalculateFor = new AtomicBoolean(false);
// calculateForCB
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
calculateFor = CalculateFor.factory(newValue);
@ -121,9 +128,22 @@ public class OneWordAnalysisTab {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
} else if(newValue.equals("normalizirana različnica")) {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
} else if(newValue.equals("oblikoskladenjska oznaka")) {
writeMsdAtTheEndEnableCalculateFor.set(true);
writeMsdAtTheEndChB.setDisable(false);
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd);
}else {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
}
if (!newValue.equals("oblikoskladenjska oznaka")){
writeMsdAtTheEnd = false;
writeMsdAtTheEndChB.setSelected(false);
writeMsdAtTheEndChB.setDisable(true);
writeMsdAtTheEndEnableCalculateFor.set(false);
}
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
@ -177,6 +197,13 @@ public class OneWordAnalysisTab {
alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
alsoVisualize.addAll(checkedItems);
if (checkedItems.contains("oblikoskladenjska oznaka") || writeMsdAtTheEndEnableCalculateFor.get()){
writeMsdAtTheEndChB.setDisable(false);
} else {
writeMsdAtTheEnd = false;
writeMsdAtTheEndChB.setSelected(false);
writeMsdAtTheEndChB.setDisable(true);
}
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
});
alsoVisualizeCCB.getCheckModel().clearChecks();
@ -204,6 +231,15 @@ public class OneWordAnalysisTab {
});
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
writeMsdAtTheEnd = false;
writeMsdAtTheEndChB.setDisable(true);
// set
writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
writeMsdAtTheEnd = newValue;
logger.info("write msd at the end: ", writeMsdAtTheEnd);
});
// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
// set default values
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
@ -390,6 +426,7 @@ public class OneWordAnalysisTab {
filter.setMultipleKeys(alsoVisualize);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd);
String message = Validation.validateForStringLevel(filter);
if (message == null) {

@ -124,6 +124,7 @@ public class StringAnalysisTabNew2 {
private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
@ -169,7 +170,7 @@ public class StringAnalysisTabNew2 {
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
notePunctuations = true;
notePunctuations = false;
// set
notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
notePunctuations = newValue;
@ -199,6 +200,8 @@ public class StringAnalysisTabNew2 {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
} else if(newValue.equals("normalizirana različnica")) {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
}else if(newValue.equals("oblikoskladenjska oznaka")) {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd);
}else {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
}

@ -90,6 +90,7 @@ public class Export {
FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString());
if (filter.getCalculateFor().equals(CalculateFor.LEMMA))
FILE_HEADER_AL.add("Lema male črke");
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
// if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) {
// if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
@ -161,7 +162,7 @@ public class Export {
// } else {
// FILE_HEADER_AL.add("Delež glede na vse leme");
// }
FILE_HEADER_AL.add("Skupna relativna pogostost");
FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
for (String key : taxonomyResults.keySet()) {
if(!key.equals("Total")) {
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
@ -213,8 +214,7 @@ public class Export {
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
List dataEntry = new ArrayList<>();
dataEntry.add(e.getKey().getK1());
if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi")) &&
headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")){
if (filter.getCalculateFor().equals(CalculateFor.LEMMA)){
dataEntry.add(e.getKey().getK1().toLowerCase());
}
@ -255,16 +255,55 @@ public class Export {
// }
dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
for (String key : taxonomyResults.keySet()){
if(!key.equals("Total")) {
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
dataEntry.add(frequency.toString());
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key)));
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
}
}
// Write msd separated per letters at the end of each line in csv
if (filter.getWriteMsdAtTheEnd()) {
String msd = "";
if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK1();
} else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
i = 0;
for (CalculateFor otherKey : filter.getMultipleKeys()){
switch(i){
case 0:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK2();
}
break;
case 1:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK3();
}
break;
case 2:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK4();
}
break;
case 3:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK5();
}
break;
}
i++;
}
}
String [] charArray = msd.split("(?!^)");
dataEntry.addAll(Arrays.asList(charArray));
}
csvFilePrinter.printRecord(dataEntry);
}
} catch (Exception e) {

@ -37,19 +37,22 @@
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
<CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
<Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Izpiši razbit MSD" />
<CheckBox fx:id="writeMsdAtTheEndChB" layoutX="263.0" layoutY="145.0" selected="false" />
<!-- MSD and Taxonomy separated -->
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" />
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov" />
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="360.0" prefWidth="180.0" />
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>

@ -70,7 +70,7 @@
<Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Upoštevaj ločila" />
<CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="225.0" selected="true" />
<CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="225.0" selected="false" />
<!-- MSD and Taxonomy separated -->

Loading…
Cancel
Save