Reimplementation of other signs (,/*() etc.) in ngrams.

This commit is contained in:
Luka 2018-08-28 11:41:19 +02:00
parent a8d147de52
commit 1c00f1a283
9 changed files with 203 additions and 91 deletions

View File

@ -260,6 +260,12 @@ public class XML_processing {
} else if (qName.equals("c3")) { } else if (qName.equals("c3")) {
String c3Content = eventReader.nextEvent().asCharacters().getData(); String c3Content = eventReader.nextEvent().asCharacters().getData();
if(stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() &&
stavek.size() > 0){
stavek.add(new Word(c3Content, c3Content, "/"));
}
if (c3Content.equals(".") && includeThisBlock) { if (c3Content.equals(".") && includeThisBlock) {
// add sentence to corpus // add sentence to corpus
corpus.add(new Sentence(stavek, null)); corpus.add(new Sentence(stavek, null));
@ -276,9 +282,6 @@ public class XML_processing {
// the data anymore // the data anymore
corpus.clear(); corpus.clear();
} }
}
else if(includeThisBlock){
inPunctuation = true;
} }
} else if (headTags.contains(qName)) { } else if (headTags.contains(qName)) {
String tagContent = eventReader.nextEvent().asCharacters().getData(); String tagContent = eventReader.nextEvent().asCharacters().getData();
@ -296,16 +299,6 @@ public class XML_processing {
if (in_word) { if (in_word) {
stavek.add(new Word(characters.getData(), lemma, msd)); stavek.add(new Word(characters.getData(), lemma, msd));
in_word = false; in_word = false;
} else if(inPunctuation){
String punctuation = ",";
if (stavek.size() > 0){
stavek.get(stavek.size()-1).setWord(stavek.get(stavek.size()-1).getWord() + punctuation);
stavek.get(stavek.size()-1).setLemma(stavek.get(stavek.size()-1).getLemma() + punctuation);
stavek.get(stavek.size()-1).setMsd(stavek.get(stavek.size()-1).getMsd() + punctuation);
}
inPunctuation = false;
} }
break; break;
@ -548,13 +541,16 @@ public class XML_processing {
inWord = false; inWord = false;
} }
if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) { if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
// String punctuation = characters.getData(); String punctuation = characters.getData();
String punctuation = ","; sentence.add(new Word(punctuation, punctuation, "/"));
sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
inPunctuation = false; inPunctuation = false;
// String punctuation = ",";
//
// sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
// sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
// sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
// inPunctuation = false;
} }
break; break;

View File

@ -56,8 +56,8 @@ public class Ngrams {
// String test = key; // String test = key;
// } // }
if (stats.getFilter().getNotePunctuations()) // if (stats.getFilter().getNotePunctuations())
key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key; // key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
MultipleHMKeys multipleKeys; MultipleHMKeys multipleKeys;
@ -68,28 +68,28 @@ public class Ngrams {
break; break;
case 1: case 1:
String k1_2 = wordToString(ngramCandidate, otherKeys.get(0)); String k1_2 = wordToString(ngramCandidate, otherKeys.get(0));
if (stats.getFilter().getNotePunctuations()) // if (stats.getFilter().getNotePunctuations())
k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2; // k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
multipleKeys = new MultipleHMKeys2(key, k1_2); multipleKeys = new MultipleHMKeys2(key, k1_2);
break; break;
case 2: case 2:
String k2_2 = wordToString(ngramCandidate, otherKeys.get(0)); String k2_2 = wordToString(ngramCandidate, otherKeys.get(0));
String k2_3 = wordToString(ngramCandidate, otherKeys.get(1)); String k2_3 = wordToString(ngramCandidate, otherKeys.get(1));
if (stats.getFilter().getNotePunctuations()) { // if (stats.getFilter().getNotePunctuations()) {
k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2; // k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3; // k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
} // }
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3); multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
break; break;
case 3: case 3:
String k3_2 = wordToString(ngramCandidate, otherKeys.get(0)); String k3_2 = wordToString(ngramCandidate, otherKeys.get(0));
String k3_3 = wordToString(ngramCandidate, otherKeys.get(1)); String k3_3 = wordToString(ngramCandidate, otherKeys.get(1));
String k3_4 = wordToString(ngramCandidate, otherKeys.get(2)); String k3_4 = wordToString(ngramCandidate, otherKeys.get(2));
if (stats.getFilter().getNotePunctuations()) { // if (stats.getFilter().getNotePunctuations()) {
k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2; // k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3; // k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4; // k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
} // }
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4); multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
break; break;
case 4: case 4:
@ -97,12 +97,12 @@ public class Ngrams {
String k4_3 = wordToString(ngramCandidate, otherKeys.get(1)); String k4_3 = wordToString(ngramCandidate, otherKeys.get(1));
String k4_4 = wordToString(ngramCandidate, otherKeys.get(2)); String k4_4 = wordToString(ngramCandidate, otherKeys.get(2));
String k4_5 = wordToString(ngramCandidate, otherKeys.get(3)); String k4_5 = wordToString(ngramCandidate, otherKeys.get(3));
if (stats.getFilter().getNotePunctuations()) { // if (stats.getFilter().getNotePunctuations()) {
k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2; // k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3; // k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4; // k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5; // k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
} // }
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5); multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
break; break;
default: default:
@ -241,22 +241,22 @@ public class Ngrams {
*/ */
private static Word checkAndModifySkipgramPunctuation(List<Word> sentence, int i, int j, StatisticsNew stats){ private static Word checkAndModifySkipgramPunctuation(List<Word> sentence, int i, int j, StatisticsNew stats){
// if punctuation checkbox selected and there words at indexes i and j are not next to each other // if punctuation checkbox selected and there words at indexes i and j are not next to each other
if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){ // if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){
boolean middleWordsHavePunctuation = false; // boolean middleWordsHavePunctuation = false;
for (int n = i + 1; n < j; n++){ // for (int n = i + 1; n < j; n++){
if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){ // if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){
middleWordsHavePunctuation = true; // middleWordsHavePunctuation = true;
break; // break;
} // }
} // }
if (middleWordsHavePunctuation){ // if (middleWordsHavePunctuation){
//
String punctuation = ","; // String punctuation = ",";
return new Word(sentence.get(i).getWord() + punctuation, // return new Word(sentence.get(i).getWord() + punctuation,
sentence.get(i).getLemma() + punctuation, // sentence.get(i).getLemma() + punctuation,
sentence.get(i).getMsd() + punctuation); // sentence.get(i).getMsd() + punctuation);
} // }
} // }
return sentence.get(i); return sentence.get(i);
} }
@ -348,8 +348,8 @@ public class Ngrams {
// String test = key; // String test = key;
// } // }
if (stats.getFilter().getNotePunctuations()) // if (stats.getFilter().getNotePunctuations())
key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key; // key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
MultipleHMKeys multipleKeys; MultipleHMKeys multipleKeys;
@ -360,28 +360,28 @@ public class Ngrams {
break; break;
case 1: case 1:
String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0)); String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0));
if (stats.getFilter().getNotePunctuations()) // if (stats.getFilter().getNotePunctuations())
k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2; // k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
multipleKeys = new MultipleHMKeys2(key, k1_2); multipleKeys = new MultipleHMKeys2(key, k1_2);
break; break;
case 2: case 2:
String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0)); String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0));
String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1)); String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1));
if (stats.getFilter().getNotePunctuations()) { // if (stats.getFilter().getNotePunctuations()) {
k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2; // k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3; // k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
} // }
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3); multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
break; break;
case 3: case 3:
String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0)); String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0));
String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1)); String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1));
String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2)); String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2));
if (stats.getFilter().getNotePunctuations()) { // if (stats.getFilter().getNotePunctuations()) {
k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2; // k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3; // k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4; // k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
} // }
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4); multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
break; break;
case 4: case 4:
@ -389,12 +389,12 @@ public class Ngrams {
String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1)); String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1));
String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2)); String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2));
String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3)); String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3));
if (stats.getFilter().getNotePunctuations()) { // if (stats.getFilter().getNotePunctuations()) {
k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2; // k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3; // k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4; // k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5; // k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
} // }
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5); multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
break; break;
default: default:

View File

@ -45,6 +45,29 @@ public enum CalculateFor {
return null; return null;
} }
public String toMetadataString() {
switch(this){
case WORD:
return "Skupna vsota vseh različnic:";
case NORMALIZED_WORD:
return "Skupna vsota vseh normaliziranih različnic:";
case LEMMA:
return "Skupna vsota vseh lem:";
case MORPHOSYNTACTIC_SPECS:
return "Skupna vsota vseh oblikoskladenjskih oznak:";
case MORPHOSYNTACTIC_PROPERTY:
return "Skupna vsota vseh oblikoskladenjskih lastnosti:";
case WORD_TYPE:
return "Skupna vsota vseh besednih vrst:";
case DIST_WORDS:
return "Skupna vsota vseh različnic:";
case DIST_LEMMAS:
return "Skupna vsota vseh lem:";
default:
return null;
}
}
public String toHeaderString() { public String toHeaderString() {
switch(this){ switch(this){
case WORD: case WORD:

View File

@ -25,6 +25,7 @@ public class Filter {
DISPLAY_TAXONOMY, DISPLAY_TAXONOMY,
MSD, MSD,
HAS_MSD, HAS_MSD,
WRITE_MSD_AT_THE_END,
SOLAR_FILTERS, SOLAR_FILTERS,
MULTIPLE_KEYS, MULTIPLE_KEYS,
NOTE_PUNCTUATIONS, NOTE_PUNCTUATIONS,
@ -34,6 +35,7 @@ public class Filter {
public Filter() { public Filter() {
filter = new HashMap<>(); filter = new HashMap<>();
filter.put(WRITE_MSD_AT_THE_END, false);
} }
public Filter(AnalysisLevel al, CalculateFor cf) { public Filter(AnalysisLevel al, CalculateFor cf) {
@ -41,6 +43,7 @@ public class Filter {
filter.put(ANALYSIS_LEVEL, al); filter.put(ANALYSIS_LEVEL, al);
filter.put(CALCULATE_FOR, cf); filter.put(CALCULATE_FOR, cf);
filter.put(WRITE_MSD_AT_THE_END, false);
} }
public void setAl(AnalysisLevel al) { public void setAl(AnalysisLevel al) {
@ -124,6 +127,14 @@ public class Filter {
return (ArrayList<Pattern>) filter.get(MSD); return (ArrayList<Pattern>) filter.get(MSD);
} }
public void setWriteMsdAtTheEnd(boolean writeMsdAtTheEnd) {
filter.put(WRITE_MSD_AT_THE_END, writeMsdAtTheEnd);
}
public boolean getWriteMsdAtTheEnd() {
return (boolean) filter.get(WRITE_MSD_AT_THE_END);
}
public void setHasMsd(boolean hasMsd) { public void setHasMsd(boolean hasMsd) {
filter.put(HAS_MSD, hasMsd); filter.put(HAS_MSD, hasMsd);
} }

View File

@ -8,7 +8,6 @@ import javafx.collections.ObservableList;
import javafx.concurrent.Task; import javafx.concurrent.Task;
import javafx.fxml.FXML; import javafx.fxml.FXML;
import javafx.scene.control.*; import javafx.scene.control.*;
import javafx.scene.layout.Pane;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
@ -17,6 +16,7 @@ import org.controlsfx.control.CheckComboBox;
import java.io.File; import java.io.File;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.util.*; import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import static alg.XML_processing.readXML; import static alg.XML_processing.readXML;
@ -49,6 +49,10 @@ public class OneWordAnalysisTab {
private CheckBox displayTaxonomyChB; private CheckBox displayTaxonomyChB;
private boolean displayTaxonomy; private boolean displayTaxonomy;
@FXML
private CheckBox writeMsdAtTheEndChB;
private boolean writeMsdAtTheEnd;
@FXML @FXML
private ComboBox<String> calculateForCB; private ComboBox<String> calculateForCB;
private CalculateFor calculateFor; private CalculateFor calculateFor;
@ -96,6 +100,7 @@ public class OneWordAnalysisTab {
private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
// TODO: pass observables for taxonomy based on header scan // TODO: pass observables for taxonomy based on header scan
@ -107,6 +112,8 @@ public class OneWordAnalysisTab {
currentMode = MODE.WORD; currentMode = MODE.WORD;
toggleMode(currentMode); toggleMode(currentMode);
AtomicBoolean writeMsdAtTheEndEnableCalculateFor = new AtomicBoolean(false);
// calculateForCB // calculateForCB
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> { calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
calculateFor = CalculateFor.factory(newValue); calculateFor = CalculateFor.factory(newValue);
@ -121,9 +128,22 @@ public class OneWordAnalysisTab {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord); alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
} else if(newValue.equals("normalizirana različnica")) { } else if(newValue.equals("normalizirana različnica")) {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord); alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
} else if(newValue.equals("oblikoskladenjska oznaka")) {
writeMsdAtTheEndEnableCalculateFor.set(true);
writeMsdAtTheEndChB.setDisable(false);
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd);
}else { }else {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty); alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
} }
if (!newValue.equals("oblikoskladenjska oznaka")){
writeMsdAtTheEnd = false;
writeMsdAtTheEndChB.setSelected(false);
writeMsdAtTheEndChB.setDisable(true);
writeMsdAtTheEndEnableCalculateFor.set(false);
}
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> { alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
alsoVisualize = new ArrayList<>(); alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
@ -177,6 +197,13 @@ public class OneWordAnalysisTab {
alsoVisualize = new ArrayList<>(); alsoVisualize = new ArrayList<>();
ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
alsoVisualize.addAll(checkedItems); alsoVisualize.addAll(checkedItems);
if (checkedItems.contains("oblikoskladenjska oznaka") || writeMsdAtTheEndEnableCalculateFor.get()){
writeMsdAtTheEndChB.setDisable(false);
} else {
writeMsdAtTheEnd = false;
writeMsdAtTheEndChB.setSelected(false);
writeMsdAtTheEndChB.setDisable(true);
}
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
}); });
alsoVisualizeCCB.getCheckModel().clearChecks(); alsoVisualizeCCB.getCheckModel().clearChecks();
@ -204,6 +231,15 @@ public class OneWordAnalysisTab {
}); });
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
writeMsdAtTheEnd = false;
writeMsdAtTheEndChB.setDisable(true);
// set
writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
writeMsdAtTheEnd = newValue;
logger.info("write msd at the end: ", writeMsdAtTheEnd);
});
// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
// set default values // set default values
minimalOccurrencesTF.setText("1"); minimalOccurrencesTF.setText("1");
minimalOccurrences = 1; minimalOccurrences = 1;
@ -390,6 +426,7 @@ public class OneWordAnalysisTab {
filter.setMultipleKeys(alsoVisualize); filter.setMultipleKeys(alsoVisualize);
filter.setMinimalOccurrences(minimalOccurrences); filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy); filter.setMinimalTaxonomy(minimalTaxonomy);
filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd);
String message = Validation.validateForStringLevel(filter); String message = Validation.validateForStringLevel(filter);
if (message == null) { if (message == null) {

View File

@ -124,6 +124,7 @@ public class StringAnalysisTabNew2 {
private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
@ -169,7 +170,7 @@ public class StringAnalysisTabNew2 {
minimalTaxonomyTF.setText("1"); minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1; minimalTaxonomy = 1;
notePunctuations = true; notePunctuations = false;
// set // set
notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> { notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
notePunctuations = newValue; notePunctuations = newValue;
@ -199,6 +200,8 @@ public class StringAnalysisTabNew2 {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord); alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
} else if(newValue.equals("normalizirana različnica")) { } else if(newValue.equals("normalizirana različnica")) {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord); alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
}else if(newValue.equals("oblikoskladenjska oznaka")) {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd);
}else { }else {
alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty); alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
} }

View File

@ -90,6 +90,7 @@ public class Export {
FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString()); FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString());
if (filter.getCalculateFor().equals(CalculateFor.LEMMA)) if (filter.getCalculateFor().equals(CalculateFor.LEMMA))
FILE_HEADER_AL.add("Lema male črke"); FILE_HEADER_AL.add("Lema male črke");
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
// if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) { // if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) {
// if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) { // if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
@ -161,7 +162,7 @@ public class Export {
// } else { // } else {
// FILE_HEADER_AL.add("Delež glede na vse leme"); // FILE_HEADER_AL.add("Delež glede na vse leme");
// } // }
FILE_HEADER_AL.add("Skupna relativna pogostost"); FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
for (String key : taxonomyResults.keySet()) { for (String key : taxonomyResults.keySet()) {
if(!key.equals("Total")) { if(!key.equals("Total")) {
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]"); FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
@ -213,8 +214,7 @@ public class Export {
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) { for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
List dataEntry = new ArrayList<>(); List dataEntry = new ArrayList<>();
dataEntry.add(e.getKey().getK1()); dataEntry.add(e.getKey().getK1());
if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi")) && if (filter.getCalculateFor().equals(CalculateFor.LEMMA)){
headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")){
dataEntry.add(e.getKey().getK1().toLowerCase()); dataEntry.add(e.getKey().getK1().toLowerCase());
} }
@ -255,16 +255,55 @@ public class Export {
// } // }
dataEntry.add(e.getValue().toString()); dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies)); dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies)); dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
for (String key : taxonomyResults.keySet()){ for (String key : taxonomyResults.keySet()){
if(!key.equals("Total")) { if(!key.equals("Total")) {
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey()); AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
dataEntry.add(frequency.toString()); dataEntry.add(frequency.toString());
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key))); dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key))); dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
} }
} }
// Write msd separated per letters at the end of each line in csv
if (filter.getWriteMsdAtTheEnd()) {
String msd = "";
if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK1();
} else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
i = 0;
for (CalculateFor otherKey : filter.getMultipleKeys()){
switch(i){
case 0:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK2();
}
break;
case 1:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK3();
}
break;
case 2:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK4();
}
break;
case 3:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK5();
}
break;
}
i++;
}
}
String [] charArray = msd.split("(?!^)");
dataEntry.addAll(Arrays.asList(charArray));
}
csvFilePrinter.printRecord(dataEntry); csvFilePrinter.printRecord(dataEntry);
} }
} catch (Exception e) { } catch (Exception e) {

View File

@ -37,19 +37,22 @@
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" /> <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
<CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" /> <CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
<Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Izpiši razbit MSD" />
<CheckBox fx:id="writeMsdAtTheEndChB" layoutX="263.0" layoutY="145.0" selected="false" />
<!-- MSD and Taxonomy separated --> <!-- MSD and Taxonomy separated -->
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" /> <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov" />
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/> <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0"/> <TextField fx:id="msdTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/> <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/> <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" /> <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" /> <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" /> <Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" /> <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="360.0" prefWidth="180.0" />
<Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false" <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/> prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>

View File

@ -70,7 +70,7 @@
<Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Upoštevaj ločila" /> <Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Upoštevaj ločila" />
<CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="225.0" selected="true" /> <CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="225.0" selected="false" />
<!-- MSD and Taxonomy separated --> <!-- MSD and Taxonomy separated -->