Reimplementation of other signs (,/*() etc.) in ngrams.
This commit is contained in:
parent
a8d147de52
commit
1c00f1a283
|
@ -260,6 +260,12 @@ public class XML_processing {
|
|||
} else if (qName.equals("c3")) {
|
||||
String c3Content = eventReader.nextEvent().asCharacters().getData();
|
||||
|
||||
if(stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() &&
|
||||
stavek.size() > 0){
|
||||
stavek.add(new Word(c3Content, c3Content, "/"));
|
||||
|
||||
}
|
||||
|
||||
if (c3Content.equals(".") && includeThisBlock) {
|
||||
// add sentence to corpus
|
||||
corpus.add(new Sentence(stavek, null));
|
||||
|
@ -276,9 +282,6 @@ public class XML_processing {
|
|||
// the data anymore
|
||||
corpus.clear();
|
||||
}
|
||||
}
|
||||
else if(includeThisBlock){
|
||||
inPunctuation = true;
|
||||
}
|
||||
} else if (headTags.contains(qName)) {
|
||||
String tagContent = eventReader.nextEvent().asCharacters().getData();
|
||||
|
@ -296,16 +299,6 @@ public class XML_processing {
|
|||
if (in_word) {
|
||||
stavek.add(new Word(characters.getData(), lemma, msd));
|
||||
in_word = false;
|
||||
} else if(inPunctuation){
|
||||
String punctuation = ",";
|
||||
|
||||
if (stavek.size() > 0){
|
||||
stavek.get(stavek.size()-1).setWord(stavek.get(stavek.size()-1).getWord() + punctuation);
|
||||
stavek.get(stavek.size()-1).setLemma(stavek.get(stavek.size()-1).getLemma() + punctuation);
|
||||
stavek.get(stavek.size()-1).setMsd(stavek.get(stavek.size()-1).getMsd() + punctuation);
|
||||
}
|
||||
|
||||
inPunctuation = false;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -548,13 +541,16 @@ public class XML_processing {
|
|||
inWord = false;
|
||||
}
|
||||
if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
||||
// String punctuation = characters.getData();
|
||||
String punctuation = ",";
|
||||
|
||||
sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
|
||||
sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
|
||||
sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
|
||||
String punctuation = characters.getData();
|
||||
sentence.add(new Word(punctuation, punctuation, "/"));
|
||||
inPunctuation = false;
|
||||
|
||||
// String punctuation = ",";
|
||||
//
|
||||
// sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
|
||||
// sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
|
||||
// sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
|
||||
// inPunctuation = false;
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
@ -56,8 +56,8 @@ public class Ngrams {
|
|||
// String test = key;
|
||||
// }
|
||||
|
||||
if (stats.getFilter().getNotePunctuations())
|
||||
key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
// if (stats.getFilter().getNotePunctuations())
|
||||
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
|
||||
MultipleHMKeys multipleKeys;
|
||||
|
||||
|
@ -68,28 +68,28 @@ public class Ngrams {
|
|||
break;
|
||||
case 1:
|
||||
String k1_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
||||
if (stats.getFilter().getNotePunctuations())
|
||||
k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
||||
// if (stats.getFilter().getNotePunctuations())
|
||||
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
||||
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
||||
break;
|
||||
case 2:
|
||||
String k2_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
||||
String k2_3 = wordToString(ngramCandidate, otherKeys.get(1));
|
||||
if (stats.getFilter().getNotePunctuations()) {
|
||||
k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
||||
k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
||||
}
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
||||
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
||||
// }
|
||||
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
||||
break;
|
||||
case 3:
|
||||
String k3_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
||||
String k3_3 = wordToString(ngramCandidate, otherKeys.get(1));
|
||||
String k3_4 = wordToString(ngramCandidate, otherKeys.get(2));
|
||||
if (stats.getFilter().getNotePunctuations()) {
|
||||
k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
||||
k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
||||
k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
||||
}
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
||||
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
||||
// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
||||
// }
|
||||
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
||||
break;
|
||||
case 4:
|
||||
|
@ -97,12 +97,12 @@ public class Ngrams {
|
|||
String k4_3 = wordToString(ngramCandidate, otherKeys.get(1));
|
||||
String k4_4 = wordToString(ngramCandidate, otherKeys.get(2));
|
||||
String k4_5 = wordToString(ngramCandidate, otherKeys.get(3));
|
||||
if (stats.getFilter().getNotePunctuations()) {
|
||||
k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
||||
k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
||||
k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
||||
k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
||||
}
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
||||
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
||||
// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
||||
// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
||||
// }
|
||||
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
|
||||
break;
|
||||
default:
|
||||
|
@ -241,22 +241,22 @@ public class Ngrams {
|
|||
*/
|
||||
private static Word checkAndModifySkipgramPunctuation(List<Word> sentence, int i, int j, StatisticsNew stats){
|
||||
// if punctuation checkbox selected and there words at indexes i and j are not next to each other
|
||||
if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){
|
||||
boolean middleWordsHavePunctuation = false;
|
||||
for (int n = i + 1; n < j; n++){
|
||||
if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){
|
||||
middleWordsHavePunctuation = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (middleWordsHavePunctuation){
|
||||
|
||||
String punctuation = ",";
|
||||
return new Word(sentence.get(i).getWord() + punctuation,
|
||||
sentence.get(i).getLemma() + punctuation,
|
||||
sentence.get(i).getMsd() + punctuation);
|
||||
}
|
||||
}
|
||||
// if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){
|
||||
// boolean middleWordsHavePunctuation = false;
|
||||
// for (int n = i + 1; n < j; n++){
|
||||
// if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){
|
||||
// middleWordsHavePunctuation = true;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// if (middleWordsHavePunctuation){
|
||||
//
|
||||
// String punctuation = ",";
|
||||
// return new Word(sentence.get(i).getWord() + punctuation,
|
||||
// sentence.get(i).getLemma() + punctuation,
|
||||
// sentence.get(i).getMsd() + punctuation);
|
||||
// }
|
||||
// }
|
||||
return sentence.get(i);
|
||||
|
||||
}
|
||||
|
@ -348,8 +348,8 @@ public class Ngrams {
|
|||
// String test = key;
|
||||
// }
|
||||
|
||||
if (stats.getFilter().getNotePunctuations())
|
||||
key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
// if (stats.getFilter().getNotePunctuations())
|
||||
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
|
||||
MultipleHMKeys multipleKeys;
|
||||
|
||||
|
@ -360,28 +360,28 @@ public class Ngrams {
|
|||
break;
|
||||
case 1:
|
||||
String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
||||
if (stats.getFilter().getNotePunctuations())
|
||||
k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
||||
// if (stats.getFilter().getNotePunctuations())
|
||||
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
||||
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
||||
break;
|
||||
case 2:
|
||||
String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
||||
String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1));
|
||||
if (stats.getFilter().getNotePunctuations()) {
|
||||
k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
||||
k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
||||
}
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
||||
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
||||
// }
|
||||
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
||||
break;
|
||||
case 3:
|
||||
String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
||||
String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1));
|
||||
String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2));
|
||||
if (stats.getFilter().getNotePunctuations()) {
|
||||
k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
||||
k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
||||
k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
||||
}
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
||||
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
||||
// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
||||
// }
|
||||
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
||||
break;
|
||||
case 4:
|
||||
|
@ -389,12 +389,12 @@ public class Ngrams {
|
|||
String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1));
|
||||
String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2));
|
||||
String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3));
|
||||
if (stats.getFilter().getNotePunctuations()) {
|
||||
k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
||||
k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
||||
k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
||||
k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
||||
}
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
||||
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
||||
// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
||||
// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
||||
// }
|
||||
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -45,6 +45,29 @@ public enum CalculateFor {
|
|||
return null;
|
||||
}
|
||||
|
||||
public String toMetadataString() {
|
||||
switch(this){
|
||||
case WORD:
|
||||
return "Skupna vsota vseh različnic:";
|
||||
case NORMALIZED_WORD:
|
||||
return "Skupna vsota vseh normaliziranih različnic:";
|
||||
case LEMMA:
|
||||
return "Skupna vsota vseh lem:";
|
||||
case MORPHOSYNTACTIC_SPECS:
|
||||
return "Skupna vsota vseh oblikoskladenjskih oznak:";
|
||||
case MORPHOSYNTACTIC_PROPERTY:
|
||||
return "Skupna vsota vseh oblikoskladenjskih lastnosti:";
|
||||
case WORD_TYPE:
|
||||
return "Skupna vsota vseh besednih vrst:";
|
||||
case DIST_WORDS:
|
||||
return "Skupna vsota vseh različnic:";
|
||||
case DIST_LEMMAS:
|
||||
return "Skupna vsota vseh lem:";
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public String toHeaderString() {
|
||||
switch(this){
|
||||
case WORD:
|
||||
|
|
|
@ -25,6 +25,7 @@ public class Filter {
|
|||
DISPLAY_TAXONOMY,
|
||||
MSD,
|
||||
HAS_MSD,
|
||||
WRITE_MSD_AT_THE_END,
|
||||
SOLAR_FILTERS,
|
||||
MULTIPLE_KEYS,
|
||||
NOTE_PUNCTUATIONS,
|
||||
|
@ -34,6 +35,7 @@ public class Filter {
|
|||
|
||||
public Filter() {
|
||||
filter = new HashMap<>();
|
||||
filter.put(WRITE_MSD_AT_THE_END, false);
|
||||
}
|
||||
|
||||
public Filter(AnalysisLevel al, CalculateFor cf) {
|
||||
|
@ -41,6 +43,7 @@ public class Filter {
|
|||
|
||||
filter.put(ANALYSIS_LEVEL, al);
|
||||
filter.put(CALCULATE_FOR, cf);
|
||||
filter.put(WRITE_MSD_AT_THE_END, false);
|
||||
}
|
||||
|
||||
public void setAl(AnalysisLevel al) {
|
||||
|
@ -124,6 +127,14 @@ public class Filter {
|
|||
return (ArrayList<Pattern>) filter.get(MSD);
|
||||
}
|
||||
|
||||
public void setWriteMsdAtTheEnd(boolean writeMsdAtTheEnd) {
|
||||
filter.put(WRITE_MSD_AT_THE_END, writeMsdAtTheEnd);
|
||||
}
|
||||
|
||||
public boolean getWriteMsdAtTheEnd() {
|
||||
return (boolean) filter.get(WRITE_MSD_AT_THE_END);
|
||||
}
|
||||
|
||||
public void setHasMsd(boolean hasMsd) {
|
||||
filter.put(HAS_MSD, hasMsd);
|
||||
}
|
||||
|
|
|
@ -8,7 +8,6 @@ import javafx.collections.ObservableList;
|
|||
import javafx.concurrent.Task;
|
||||
import javafx.fxml.FXML;
|
||||
import javafx.scene.control.*;
|
||||
import javafx.scene.layout.Pane;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
@ -17,6 +16,7 @@ import org.controlsfx.control.CheckComboBox;
|
|||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static alg.XML_processing.readXML;
|
||||
|
@ -49,6 +49,10 @@ public class OneWordAnalysisTab {
|
|||
private CheckBox displayTaxonomyChB;
|
||||
private boolean displayTaxonomy;
|
||||
|
||||
@FXML
|
||||
private CheckBox writeMsdAtTheEndChB;
|
||||
private boolean writeMsdAtTheEnd;
|
||||
|
||||
@FXML
|
||||
private ComboBox<String> calculateForCB;
|
||||
private CalculateFor calculateFor;
|
||||
|
@ -96,6 +100,7 @@ public class OneWordAnalysisTab {
|
|||
private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
||||
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
||||
|
||||
// TODO: pass observables for taxonomy based on header scan
|
||||
|
@ -107,6 +112,8 @@ public class OneWordAnalysisTab {
|
|||
currentMode = MODE.WORD;
|
||||
toggleMode(currentMode);
|
||||
|
||||
AtomicBoolean writeMsdAtTheEndEnableCalculateFor = new AtomicBoolean(false);
|
||||
|
||||
// calculateForCB
|
||||
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||
calculateFor = CalculateFor.factory(newValue);
|
||||
|
@ -121,9 +128,22 @@ public class OneWordAnalysisTab {
|
|||
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
|
||||
} else if(newValue.equals("normalizirana različnica")) {
|
||||
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
|
||||
} else if(newValue.equals("oblikoskladenjska oznaka")) {
|
||||
writeMsdAtTheEndEnableCalculateFor.set(true);
|
||||
writeMsdAtTheEndChB.setDisable(false);
|
||||
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd);
|
||||
}else {
|
||||
|
||||
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
|
||||
}
|
||||
|
||||
if (!newValue.equals("oblikoskladenjska oznaka")){
|
||||
writeMsdAtTheEnd = false;
|
||||
writeMsdAtTheEndChB.setSelected(false);
|
||||
writeMsdAtTheEndChB.setDisable(true);
|
||||
writeMsdAtTheEndEnableCalculateFor.set(false);
|
||||
}
|
||||
|
||||
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||
alsoVisualize = new ArrayList<>();
|
||||
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
|
||||
|
@ -177,6 +197,13 @@ public class OneWordAnalysisTab {
|
|||
alsoVisualize = new ArrayList<>();
|
||||
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
|
||||
alsoVisualize.addAll(checkedItems);
|
||||
if (checkedItems.contains("oblikoskladenjska oznaka") || writeMsdAtTheEndEnableCalculateFor.get()){
|
||||
writeMsdAtTheEndChB.setDisable(false);
|
||||
} else {
|
||||
writeMsdAtTheEnd = false;
|
||||
writeMsdAtTheEndChB.setSelected(false);
|
||||
writeMsdAtTheEndChB.setDisable(true);
|
||||
}
|
||||
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
||||
});
|
||||
alsoVisualizeCCB.getCheckModel().clearChecks();
|
||||
|
@ -204,6 +231,15 @@ public class OneWordAnalysisTab {
|
|||
});
|
||||
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
|
||||
|
||||
writeMsdAtTheEnd = false;
|
||||
writeMsdAtTheEndChB.setDisable(true);
|
||||
// set
|
||||
writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
writeMsdAtTheEnd = newValue;
|
||||
logger.info("write msd at the end: ", writeMsdAtTheEnd);
|
||||
});
|
||||
// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
|
||||
|
||||
// set default values
|
||||
minimalOccurrencesTF.setText("1");
|
||||
minimalOccurrences = 1;
|
||||
|
@ -390,6 +426,7 @@ public class OneWordAnalysisTab {
|
|||
filter.setMultipleKeys(alsoVisualize);
|
||||
filter.setMinimalOccurrences(minimalOccurrences);
|
||||
filter.setMinimalTaxonomy(minimalTaxonomy);
|
||||
filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd);
|
||||
|
||||
String message = Validation.validateForStringLevel(filter);
|
||||
if (message == null) {
|
||||
|
|
|
@ -124,6 +124,7 @@ public class StringAnalysisTabNew2 {
|
|||
private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
||||
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
||||
|
||||
|
||||
|
@ -169,7 +170,7 @@ public class StringAnalysisTabNew2 {
|
|||
minimalTaxonomyTF.setText("1");
|
||||
minimalTaxonomy = 1;
|
||||
|
||||
notePunctuations = true;
|
||||
notePunctuations = false;
|
||||
// set
|
||||
notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
notePunctuations = newValue;
|
||||
|
@ -199,6 +200,8 @@ public class StringAnalysisTabNew2 {
|
|||
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
|
||||
} else if(newValue.equals("normalizirana različnica")) {
|
||||
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
|
||||
}else if(newValue.equals("oblikoskladenjska oznaka")) {
|
||||
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd);
|
||||
}else {
|
||||
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
|
||||
}
|
||||
|
|
|
@ -90,6 +90,7 @@ public class Export {
|
|||
FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString());
|
||||
if (filter.getCalculateFor().equals(CalculateFor.LEMMA))
|
||||
FILE_HEADER_AL.add("Lema male črke");
|
||||
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
|
||||
|
||||
// if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) {
|
||||
// if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
||||
|
@ -161,7 +162,7 @@ public class Export {
|
|||
// } else {
|
||||
// FILE_HEADER_AL.add("Delež glede na vse leme");
|
||||
// }
|
||||
FILE_HEADER_AL.add("Skupna relativna pogostost");
|
||||
FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
|
||||
for (String key : taxonomyResults.keySet()) {
|
||||
if(!key.equals("Total")) {
|
||||
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
|
||||
|
@ -213,8 +214,7 @@ public class Export {
|
|||
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
||||
List dataEntry = new ArrayList<>();
|
||||
dataEntry.add(e.getKey().getK1());
|
||||
if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi")) &&
|
||||
headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")){
|
||||
if (filter.getCalculateFor().equals(CalculateFor.LEMMA)){
|
||||
dataEntry.add(e.getKey().getK1().toLowerCase());
|
||||
}
|
||||
|
||||
|
@ -255,16 +255,55 @@ public class Export {
|
|||
// }
|
||||
dataEntry.add(e.getValue().toString());
|
||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
||||
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
|
||||
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
|
||||
for (String key : taxonomyResults.keySet()){
|
||||
if(!key.equals("Total")) {
|
||||
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
|
||||
dataEntry.add(frequency.toString());
|
||||
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
|
||||
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key)));
|
||||
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
|
||||
}
|
||||
}
|
||||
|
||||
// Write msd separated per letters at the end of each line in csv
|
||||
if (filter.getWriteMsdAtTheEnd()) {
|
||||
String msd = "";
|
||||
if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
msd = e.getKey().getK1();
|
||||
} else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
|
||||
i = 0;
|
||||
for (CalculateFor otherKey : filter.getMultipleKeys()){
|
||||
switch(i){
|
||||
case 0:
|
||||
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
msd = e.getKey().getK2();
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
msd = e.getKey().getK3();
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
msd = e.getKey().getK4();
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
msd = e.getKey().getK5();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
String [] charArray = msd.split("(?!^)");
|
||||
dataEntry.addAll(Arrays.asList(charArray));
|
||||
|
||||
}
|
||||
|
||||
csvFilePrinter.printRecord(dataEntry);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
|
|
|
@ -37,19 +37,22 @@
|
|||
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
|
||||
<CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Izpiši razbit MSD" />
|
||||
<CheckBox fx:id="writeMsdAtTheEndChB" layoutX="263.0" layoutY="145.0" selected="false" />
|
||||
|
||||
<!-- MSD and Taxonomy separated -->
|
||||
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" />
|
||||
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/>
|
||||
<TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD"/>
|
||||
<TextField fx:id="msdTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
|
||||
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="360.0" prefWidth="180.0" />
|
||||
|
||||
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false"
|
||||
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
||||
|
|
|
@ -70,7 +70,7 @@
|
|||
|
||||
|
||||
<Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Upoštevaj ločila" />
|
||||
<CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="225.0" selected="true" />
|
||||
<CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="225.0" selected="false" />
|
||||
|
||||
|
||||
<!-- MSD and Taxonomy separated -->
|
||||
|
|
Loading…
Reference in New Issue
Block a user