Added fixes on ssj500k functionality, fixed prefix/suffix bug and some other bugs.

master
Luka 6 years ago
parent 9efe3d529b
commit ca83cb023b

@ -313,6 +313,17 @@ public class XML_processing {
} }
if (c3Content.equals(".") && includeThisBlock) { if (c3Content.equals(".") && includeThisBlock) {
if (stats.getFilter().getNgramValue() == 0){
int numSentenceParts = 0;
for(Word w : stavek){
int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
}
stats.updateUniGramOccurrences(numSentenceParts, new ArrayList<>());
} else if(stats.getFilter().getNgramValue() >= 1) {
stats.updateUniGramOccurrences(stavek.size(), new ArrayList<>());
}
// add sentence to corpus // add sentence to corpus
corpus.add(new Sentence(stavek, null)); corpus.add(new Sentence(stavek, null));
// and start a new one // and start a new one
@ -637,8 +648,16 @@ public class XML_processing {
// parser reached end of the current sentence // parser reached end of the current sentence
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) { if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
// count all UniGramOccurrences in sentence for statistics // count all UniGramOccurrences in sentence for statistics
stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy); if (stats.getFilter().getNgramValue() == 0){
int numSentenceParts = 0;
for(Word w : sentence){
int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
}
stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
} else if(stats.getFilter().getNgramValue() >= 1) {
stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
}
// add sentence to corpus if it passes filters // add sentence to corpus if it passes filters
sentence = runFilters(sentence, stats.getFilter()); sentence = runFilters(sentence, stats.getFilter());
@ -713,6 +732,7 @@ public class XML_processing {
public static boolean readXMLSSJ500K(String path, StatisticsNew stats) { public static boolean readXMLSSJ500K(String path, StatisticsNew stats) {
boolean inWord = false; boolean inWord = false;
boolean inPunctuation = false; boolean inPunctuation = false;
boolean taxonomyMatch = true;
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>(); ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
// ArrayList<String> currentFiletaxonomyLong = new ArrayList<>(); // ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = ""; String lemma = "";
@ -759,10 +779,14 @@ public class XML_processing {
// keep only taxonomy properties // keep only taxonomy properties
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", "")); Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""));
currentFiletaxonomy.add(currentFiletaxonomyElement); currentFiletaxonomy.add(currentFiletaxonomyElement);
Tax taxonomy = new Tax(); // Tax taxonomy = new Tax();
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); // currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
} }
} } else if (qName.equals("bibl")) {
// before proceeding to read this file, make sure that taxonomy filters are a match
taxonomyMatch = true;
}
break; break;
case XMLStreamConstants.CHARACTERS: case XMLStreamConstants.CHARACTERS:
@ -789,10 +813,21 @@ public class XML_processing {
// parser reached end of the current sentence // parser reached end of the current sentence
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) { if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
if (stats.getFilter().getNgramValue() == 0){
int numSentenceParts = 0;
for(Word w : sentence){
int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
}
stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
} else if(stats.getFilter().getNgramValue() >= 1) {
stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
}
// add sentence to corpus if it passes filters // add sentence to corpus if it passes filters
sentence = runFilters(sentence, stats.getFilter()); sentence = runFilters(sentence, stats.getFilter());
if (!ValidationUtil.isEmpty(sentence)) { if (!ValidationUtil.isEmpty(sentence) && taxonomyMatch) {
corpus.add(new Sentence(sentence, currentFiletaxonomy)); corpus.add(new Sentence(sentence, currentFiletaxonomy));
} }
@ -821,7 +856,20 @@ public class XML_processing {
currentFiletaxonomy = new ArrayList<>(); currentFiletaxonomy = new ArrayList<>();
// currentFiletaxonomyLong = new ArrayList<>(); // currentFiletaxonomyLong = new ArrayList<>();
} } else if (endElement.getName().getLocalPart().equals("bibl")) {
// before proceeding to read this file, make sure that taxonomy filters are a match
if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
if (currentFiletaxonomy.isEmpty()) {
// taxonomies don't match so stop
// return false;
taxonomyMatch = false;
// System.out.println("TEST");
}
}
}
break; break;
} }
@ -925,7 +973,7 @@ public class XML_processing {
// keep only taxonomy properties // keep only taxonomy properties
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue())); Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()));
currentFiletaxonomy.add(currentFiletaxonomyElement); currentFiletaxonomy.add(currentFiletaxonomyElement);
Tax taxonomy = new Tax(); // Tax taxonomy = new Tax();
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); // currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
} }
} else if (qName.equalsIgnoreCase("div")) { } else if (qName.equalsIgnoreCase("div")) {
@ -1007,6 +1055,17 @@ public class XML_processing {
sentence = GOSCorpusHM.remove(GOSCorpusHMKey); sentence = GOSCorpusHM.remove(GOSCorpusHMKey);
if (stats.getFilter().getNgramValue() == 0){
int numSentenceParts = 0;
for(Word w : sentence){
int v = w.getW1().length() - (stats.getFilter().getStringLength() - 1);
numSentenceParts = (v >= 0) ? (numSentenceParts + v) : numSentenceParts;
}
stats.updateUniGramOccurrences(numSentenceParts, currentFiletaxonomy);
} else if(stats.getFilter().getNgramValue() >= 1) {
stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
}
// add sentence to corpus if it passes filters // add sentence to corpus if it passes filters
if (includeFile && !ValidationUtil.isEmpty(sentence)) { if (includeFile && !ValidationUtil.isEmpty(sentence)) {
sentence = runFilters(sentence, stats.getFilter()); sentence = runFilters(sentence, stats.getFilter());
@ -1040,7 +1099,7 @@ public class XML_processing {
// disregard this entry if taxonomies don't match // disregard this entry if taxonomies don't match
includeFile = !currentFiletaxonomy.isEmpty(); includeFile = !currentFiletaxonomy.isEmpty();
currentFiletaxonomy = new ArrayList<>(); // currentFiletaxonomy = new ArrayList<>();
} }
} }

@ -45,72 +45,218 @@ public enum CalculateFor {
return null; return null;
} }
public String toMetadataString() { public String totalSumString(int ngram) {
if (ngram == 0) {
switch (this) {
case WORD:
return "Skupna vsota vseh črkovnih nizov različnic:";
case NORMALIZED_WORD:
return "Skupna vsota vseh črkovnih nizov normaliziranih različnic:";
case LEMMA:
return "Skupna vsota vseh črkovnih nizov lem:";
case MORPHOSYNTACTIC_SPECS:
return "Skupna vsota vseh črkovnih nizov oblikoskladenjskih oznak:";
case MORPHOSYNTACTIC_PROPERTY:
return "Skupna vsota vseh črkovnih nizov oblikoskladenjskih lastnosti:";
case WORD_TYPE:
return "Skupna vsota vseh črkovnih nizov besednih vrst:";
case DIST_WORDS:
return "Skupna vsota vseh črkovnih nizov različnic:";
case DIST_LEMMAS:
return "Skupna vsota vseh črkovnih nizov lem:";
default:
return null;
}
} else if (ngram >= 1) {
switch (this) {
case WORD:
return "Skupna vsota vseh različnic:";
case NORMALIZED_WORD:
return "Skupna vsota vseh normaliziranih različnic:";
case LEMMA:
return "Skupna vsota vseh lem:";
case MORPHOSYNTACTIC_SPECS:
return "Skupna vsota vseh oblikoskladenjskih oznak:";
case MORPHOSYNTACTIC_PROPERTY:
return "Skupna vsota vseh oblikoskladenjskih lastnosti:";
case WORD_TYPE:
return "Skupna vsota vseh besednih vrst:";
case DIST_WORDS:
return "Skupna vsota vseh različnic:";
case DIST_LEMMAS:
return "Skupna vsota vseh lem:";
default:
return null;
}
}
return null;
}
public String foundSumString(int ngram) {
if (ngram == 0) {
switch (this) {
case WORD:
return "Skupna vsota vseh najdenih črkovnih nizov različnic:";
case NORMALIZED_WORD:
return "Skupna vsota vseh najdenih črkovnih nizov normaliziranih različnic:";
case LEMMA:
return "Skupna vsota vseh najdenih črkovnih nizov lem:";
case MORPHOSYNTACTIC_SPECS:
return "Skupna vsota vseh najdenih črkovnih nizov oblikoskladenjskih oznak:";
case MORPHOSYNTACTIC_PROPERTY:
return "Skupna vsota vseh najdenih črkovnih nizov oblikoskladenjskih lastnosti:";
case WORD_TYPE:
return "Skupna vsota vseh najdenih črkovnih nizov besednih vrst:";
case DIST_WORDS:
return "Skupna vsota vseh najdenih črkovnih nizov različnic:";
case DIST_LEMMAS:
return "Skupna vsota vseh najdenih črkovnih nizov lem:";
default:
return null;
}
} else if (ngram >= 1) {
switch (this) {
case WORD:
return "Skupna vsota vseh najdenih različnic:";
case NORMALIZED_WORD:
return "Skupna vsota vseh najdenih normaliziranih različnic:";
case LEMMA:
return "Skupna vsota vseh najdenih lem:";
case MORPHOSYNTACTIC_SPECS:
return "Skupna vsota vseh najdenih oblikoskladenjskih oznak:";
case MORPHOSYNTACTIC_PROPERTY:
return "Skupna vsota vseh najdenih oblikoskladenjskih lastnosti:";
case WORD_TYPE:
return "Skupna vsota vseh najdenih besednih vrst:";
case DIST_WORDS:
return "Skupna vsota vseh najdenih različnic:";
case DIST_LEMMAS:
return "Skupna vsota vseh najdenih lem:";
default:
return null;
}
}
return null;
}
public String totalAbsoluteFrequencyString(int ngram) {
if (ngram == 0) {
return "Skupna absolutna pogostost črkovnega niza";
}
switch(this){ switch(this){
case WORD: case WORD:
return "Skupna vsota vseh različnic:"; return "Skupna absolutna pogostost različnice";
case NORMALIZED_WORD: case NORMALIZED_WORD:
return "Skupna vsota vseh normaliziranih različnic:"; return "Skupna absolutna pogostost normalizirane različnice";
case LEMMA: case LEMMA:
return "Skupna vsota vseh lem:"; return "Skupna absolutna pogostost leme";
case MORPHOSYNTACTIC_SPECS: case MORPHOSYNTACTIC_SPECS:
return "Skupna vsota vseh oblikoskladenjskih oznak:"; return "Skupna absolutna pogostost oblikoskladenjske oznake";
case MORPHOSYNTACTIC_PROPERTY: case MORPHOSYNTACTIC_PROPERTY:
return "Skupna vsota vseh oblikoskladenjskih lastnosti:"; return "Skupna absolutna pogostost oblikoskladenjske lastnosti";
case WORD_TYPE: case WORD_TYPE:
return "Skupna vsota vseh besednih vrst:"; return "Skupna absolutna pogostost besedne vrste";
case DIST_WORDS: case DIST_WORDS:
return "Skupna vsota vseh različnic:"; return "Skupna absolutna pogostost različnice";
case DIST_LEMMAS: case DIST_LEMMAS:
return "Skupna vsota vseh lem:"; return "Skupna absolutna pogostost leme";
default: default:
return null; return null;
} }
} }
public String toHeaderString() { public String shareOfTotalString(int ngram) {
if (ngram == 0) {
return "Delež glede na skupno vsoto vseh najdenih črkovnih nizov";
}
switch(this){ switch(this){
case WORD: case WORD:
return "Različnica"; return "Delež glede na vse najdene različnice";
case NORMALIZED_WORD: case NORMALIZED_WORD:
return "Normalizirana različnica"; return "Delež glede na vse najdene normalizirane različnice";
case LEMMA: case LEMMA:
return "Lema"; return "Delež glede na vse najdene leme";
case MORPHOSYNTACTIC_SPECS: case MORPHOSYNTACTIC_SPECS:
return "Oblikoskladenjska oznaka"; return "Delež glede na vse najdene oblikoskladenjske oznake";
case MORPHOSYNTACTIC_PROPERTY: case MORPHOSYNTACTIC_PROPERTY:
return "Oblikoskladenjska lastnost"; return "Delež glede na vse najdene oblikoskladenjske lastnosti";
case WORD_TYPE: case WORD_TYPE:
return "Besedna vrsta"; return "Delež glede na vse najdene besedne vrste";
case DIST_WORDS: case DIST_WORDS:
return "Različnica"; return "Delež glede na vse najdene različnice";
case DIST_LEMMAS: case DIST_LEMMAS:
return "Lema"; return "Delež glede na vse najdene leme";
default:
return null;
}
}
public String toHeaderString(int ngram) {
if (ngram == 0) {
return "Črkovni niz";
} else if (ngram == 1) {
switch (this) {
case WORD:
return "Različnica";
case NORMALIZED_WORD:
return "Normalizirana različnica";
case LEMMA:
return "Lema";
case MORPHOSYNTACTIC_SPECS:
return "Oblikoskladenjska oznaka";
case MORPHOSYNTACTIC_PROPERTY:
return "Oblikoskladenjska lastnost";
case WORD_TYPE:
return "Besedna vrsta";
case DIST_WORDS:
return "Različnica";
case DIST_LEMMAS:
return "Lema";
default:
return null;
}
}
switch (this) {
case WORD:
case DIST_WORDS:
return "Različnica niza";
case NORMALIZED_WORD:
return "Normalizirana različnica niza";
case LEMMA:
case DIST_LEMMAS:
return "Lema niza";
case MORPHOSYNTACTIC_SPECS:
return "Oblikoskladenjska oznaka niza";
case MORPHOSYNTACTIC_PROPERTY:
return "Oblikoskladenjska lastnost niza";
case WORD_TYPE:
return "Besedna vrsta niza";
default: default:
return null; return null;
} }
} }
public String toPercentString() { // public String toPercentString() {
switch(this){ // switch(this){
case WORD: // case WORD:
return "Delež glede na vse različnice"; // return "Delež glede na vse različnice";
case NORMALIZED_WORD: // case NORMALIZED_WORD:
return "Delež glede na vse normalizirane različnice"; // return "Delež glede na vse normalizirane različnice";
case LEMMA: // case LEMMA:
return "Delež glede na vse leme"; // return "Delež glede na vse leme";
case MORPHOSYNTACTIC_SPECS: // case MORPHOSYNTACTIC_SPECS:
return "Delež glede na vse oblikoskladenjske oznake"; // return "Delež glede na vse oblikoskladenjske oznake";
case MORPHOSYNTACTIC_PROPERTY: // case MORPHOSYNTACTIC_PROPERTY:
return "Delež glede na vse oblikoskladenjske lastnosti"; // return "Delež glede na vse oblikoskladenjske lastnosti";
case WORD_TYPE: // case WORD_TYPE:
return "Delež glede na vse besedne vrste"; // return "Delež glede na vse besedne vrste";
case DIST_WORDS: // case DIST_WORDS:
return "Delež glede na vse različnice"; // return "Delež glede na vse različnice";
case DIST_LEMMAS: // case DIST_LEMMAS:
return "Delež glede na vse leme"; // return "Delež glede na vse leme";
default: // default:
return null; // return null;
} // }
} // }
} }

@ -10,6 +10,7 @@ import java.util.*;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.ImmutablePair;
@ -59,7 +60,7 @@ public class StatisticsNew {
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) { if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
if (this.filter.getTaxonomy().isEmpty()) { if (this.filter.getTaxonomy().isEmpty()) {
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) { for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
this.taxonomyResult.put(Taxonomy.factory(this.corpus.getTaxonomy().get(i)), new ConcurrentHashMap<>()); this.taxonomyResult.put(Taxonomy.factoryLongName(this.corpus.getTaxonomy().get(i)), new ConcurrentHashMap<>());
} }
} else { } else {
for (int i = 0; i < this.filter.getTaxonomy().size(); i++) { for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
@ -236,7 +237,7 @@ public class StatisticsNew {
analysisProducedResults = true; analysisProducedResults = true;
} }
removeMinimalOccurrences(taxonomyResult.get(Taxonomy.TOTAL), filter.getMinimalOccurrences()); removeMinimalOccurrences(filter.getMinimalOccurrences());
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy()); removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get(Taxonomy.TOTAL), Util.getValidInt(limit)))); stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get(Taxonomy.TOTAL), Util.getValidInt(limit))));
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter); Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
@ -265,12 +266,14 @@ public class StatisticsNew {
/** /**
* Removes lines where total number of occurrences is lower than specified number (minimalOccurrences) * Removes lines where total number of occurrences is lower than specified number (minimalOccurrences)
*/ */
private void removeMinimalOccurrences(Map<MultipleHMKeys, AtomicLong> taxonomyResultTotal, Integer minimalOccurrences) { private void removeMinimalOccurrences(Integer minimalOccurrences) {
if (minimalOccurrences == 0) if (minimalOccurrences == 0)
return; return;
for (MultipleHMKeys key : taxonomyResultTotal.keySet()){ for (MultipleHMKeys key : taxonomyResult.get(Taxonomy.TOTAL).keySet()){
if(taxonomyResultTotal.get(key).intValue() < minimalOccurrences){ if(taxonomyResult.get(Taxonomy.TOTAL).get(key).intValue() < minimalOccurrences){
taxonomyResultTotal.remove(key); for (Taxonomy t : taxonomyResult.keySet()){
taxonomyResult.get(t).remove(key);
}
} }
} }
} }
@ -498,15 +501,17 @@ public class StatisticsNew {
info.put("Izračunaj za:", filter.getCalculateFor().toString()); info.put("Izračunaj za:", filter.getCalculateFor().toString());
// also write // also write
if (filter.getMultipleKeys().size() > 0){ if (ngramLevel > 0) {
if (filter.getMultipleKeys().size() > 0) {
StringBuilder mk = new StringBuilder(); StringBuilder mk = new StringBuilder();
for (CalculateFor s : filter.getMultipleKeys()) { for (CalculateFor s : filter.getMultipleKeys()) {
mk.append(s.toString()).append("; "); mk.append(s.toString()).append("; ");
}
info.put("Upoštevaj tudi: ", String.join("; ", mk.substring(0, mk.length() - 2)));
} else {
info.put("Upoštevaj tudi: ", "");
} }
info.put("Izpiši tudi: ", String.join("; ", mk.substring(0, mk.length() - 2)));
} else {
info.put("Izpiši tudi: ", "");
} }
// data limitations // data limitations
@ -535,14 +540,16 @@ public class StatisticsNew {
} }
// also write - n - gram > 1 // also write - n - gram > 1
if (ngramLevel > 1 && filter.getCollocability().size() > 0){ if(ngramLevel > 1) {
StringBuilder mk = new StringBuilder(); if (filter.getCollocability().size() > 0) {
for (Collocability s : filter.getCollocability()) { StringBuilder mk = new StringBuilder();
mk.append(s.toString()).append("; "); for (Collocability s : filter.getCollocability()) {
mk.append(s.toString()).append("; ");
}
info.put("Kolokabilnost: ", String.join("; ", mk.substring(0, mk.length() - 2)));
} else {
info.put("Kolokabilnost: ", "");
} }
info.put("Kolokabilnost: ", String.join("; ", mk.substring(0, mk.length() - 2)));
} else {
info.put("Kolokabilnost: ", "");
} }
// fragmented MSD - n-gram = 1 // fragmented MSD - n-gram = 1
@ -580,13 +587,48 @@ public class StatisticsNew {
// } // }
info.put("Taksonomija: ", ""); info.put("Taksonomija: ", "");
if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) || filter.getDisplayTaxonomy()) {
ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy()); ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
String sep = ""; if (filter.getDisplayTaxonomy() && tax.size() == 0) {
// ArrayList<String> intList = (new ArrayList<>(taxonomyResult.keySet()).stream()
// .forEach(x -> {x.toString();}));
// ArrayList<String> taxonomyString = new ArrayList<>();
// for (Taxonomy t : taxonomyResult.keySet()){
// taxonomyString.add(t.toString());
// }
// ObservableList<String> taxonomyObservableString = Tax.getTaxonomyForComboBox(corpus.getCorpusType(), new HashSet<>(taxonomyString));
// ArrayList<String> sortedTaxonomyString = new ArrayList<>();
// for (String t : taxonomyObservableString){
// sortedTaxonomyString.add(t);
// }
// getTaxonomyForTaxonomyResult
tax = Tax.getTaxonomyForTaxonomyResult(corpus.getCorpusType(), taxonomyResult.keySet());
}
// String sep = "";
for (String s : tax) { for (String s : tax) {
info.put(sep = sep + " ", s);
if (s == null) {
continue;
}
// info.put(sep = sep + " ", s);
if (uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s)) == null) {
info.put(s, "");
continue;
}
int n = uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s)).intValue();
if (n == 0) {
info.put(s, "");
} else {
info.put(s, String.valueOf(n));
}
} }
} }
info.put("Min. št. pojavitev: ", String.valueOf(filter.getMinimalOccurrences())); info.put("Min. št. pojavitev: ", String.valueOf(filter.getMinimalOccurrences()));

@ -135,6 +135,60 @@ public class Tax {
return FXCollections.observableArrayList(taxForCombo); return FXCollections.observableArrayList(taxForCombo);
} }
/**
* Returns taxonomy names only for items found in headers
*/
public static ArrayList<String> getTaxonomyForTaxonomyResult(CorpusType corpusType, Set<Taxonomy> foundTax) {
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
Set<Taxonomy> foundTaxHS= new HashSet<>(foundTax);
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
tax = GIGAFIDA_TAXONOMY;
} else if (corpusType == CorpusType.GOS) {
tax = GOS_TAXONOMY;
}
ArrayList<String> taxForCombo = new ArrayList<>();
// adds parents taxonomy as well
Set<Taxonomy> genFoundTax = new HashSet<>();
for(Taxonomy e : foundTaxHS){
String[] elList = e.toString().split("\\.");
for(int i = 1; i < elList.length - 1; i++){
Taxonomy candidate = Taxonomy.factory(String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i)));
genFoundTax.add(candidate);
}
}
// ArrayList<String> taxonomyString = new ArrayList<>();
// for (Taxonomy t : taxonomyResult.keySet()){
// taxonomyString.add(t.toString());
// }
// ObservableList<String> taxonomyObservableString = Tax.getTaxonomyForComboBox(corpus.getCorpusType(), new HashSet<>(taxonomyString));
// ArrayList<String> sortedTaxonomyString = new ArrayList<>();
// for (String t : taxonomyObservableString){
// sortedTaxonomyString.add(t);
// }
foundTaxHS.addAll(genFoundTax);
// assures same relative order
for (String t : tax.keySet()) {
if (foundTaxHS.contains(Taxonomy.factory(t))) {
taxForCombo.add(tax.get(t));
}
}
return taxForCombo;
}
public static HashSet<CorpusType> getCorpusTypesWithTaxonomy() { public static HashSet<CorpusType> getCorpusTypesWithTaxonomy() {
return corpusTypesWithTaxonomy; return corpusTypesWithTaxonomy;
} }
@ -204,7 +258,7 @@ public class Tax {
public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<Taxonomy> taxonomy) { public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<Taxonomy> taxonomy) {
LinkedHashMap<String, String> tax = new LinkedHashMap<>(); LinkedHashMap<String, String> tax = new LinkedHashMap<>();
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) { if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
tax = GIGAFIDA_TAXONOMY; tax = GIGAFIDA_TAXONOMY;
} else if (corpusType == CorpusType.GOS) { } else if (corpusType == CorpusType.GOS) {
tax = GOS_TAXONOMY; tax = GOS_TAXONOMY;

@ -502,6 +502,7 @@ public enum Taxonomy {
r.add(SITUACIJA_TELEVIZIJA); r.add(SITUACIJA_TELEVIZIJA);
} else if(disjointTaxonomy.equals(KANAL)){ } else if(disjointTaxonomy.equals(KANAL)){
r.add(KANAL_OSEBNI_STIK); r.add(KANAL_OSEBNI_STIK);
r.add(KANAL_TELEFON);
r.add(KANAL_RADIO); r.add(KANAL_RADIO);
r.add(KANAL_TELEVIZIJA); r.add(KANAL_TELEVIZIJA);
} else if(disjointTaxonomy.equals(SSJ_TISK)){ } else if(disjointTaxonomy.equals(SSJ_TISK)){
@ -646,6 +647,9 @@ public enum Taxonomy {
connections.put(SSJ_KNJIZNO, SSJ_TISK); connections.put(SSJ_KNJIZNO, SSJ_TISK);
connections.put(SSJ_LEPOSLOVNO, SSJ_KNJIZNO); connections.put(SSJ_LEPOSLOVNO, SSJ_KNJIZNO);
connections.put(SSJ_STROKOVNO, SSJ_KNJIZNO); connections.put(SSJ_STROKOVNO, SSJ_KNJIZNO);
connections.put(SSJ_PERIODICNO, SSJ_TISK);
connections.put(SSJ_CASOPIS, SSJ_PERIODICNO);
connections.put(SSJ_REVIJA, SSJ_PERIODICNO);
connections.put(SSJ_DRUGO, SSJ_TISK); connections.put(SSJ_DRUGO, SSJ_TISK);
connections.put(FT_P_GOVORNI, FT_P_PRENOSNIK); connections.put(FT_P_GOVORNI, FT_P_PRENOSNIK);

@ -181,6 +181,7 @@ public class CharacterAnalysisTab {
// taxonomy // taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.setDisable(false);
taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() { taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
@ -217,21 +218,27 @@ public class CharacterAnalysisTab {
} }
displayTaxonomy = false; displayTaxonomy = false;
displayTaxonomyChB.setSelected(false);
// set // set
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
displayTaxonomy = newValue; displayTaxonomyChB.setDisable(false);
if(displayTaxonomy){ displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
minimalTaxonomyTF.setDisable(false); displayTaxonomy = newValue;
} else { if (displayTaxonomy) {
minimalTaxonomyTF.setDisable(true); minimalTaxonomyTF.setDisable(false);
} else {
minimalTaxonomyTF.setDisable(true);
minimalTaxonomyTF.setText("1"); minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1; minimalTaxonomy = 1;
} }
logger.info("display taxonomy: ", displayTaxonomy); logger.info("display taxonomy: ", displayTaxonomy);
}); });
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
} else {
displayTaxonomyChB.setDisable(true);
}
// cvv // cvv
calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> { calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {

@ -14,6 +14,7 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.controlsfx.control.CheckComboBox; import org.controlsfx.control.CheckComboBox;
import org.controlsfx.control.IndexedCheckModel;
import java.io.File; import java.io.File;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
@ -220,14 +221,16 @@ public class OneWordAnalysisTab {
// taxonomy // taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.setDisable(false);
taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
boolean changing = true; ListChangeListener<String> listener = new ListChangeListener<String>() {
public boolean changing = true;
@Override @Override
public void onChanged(ListChangeListener.Change<? extends String> c){ public void onChanged(Change<? extends String> c) {
if(changing) { if (changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems); ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
@ -249,27 +252,41 @@ public class OneWordAnalysisTab {
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
} }
} }
}); };
// taxonomyCCB.getCheckModel().getCheckedItems().removeListener(listener);
// System.out.println("THIS WORKS!!!!");
taxonomyCCB.getCheckModel().clearChecks(); taxonomyCCB.getCheckModel().clearChecks();
// System.out.println("YES???");
taxonomyCCB.getCheckModel().getCheckedItems().addListener(listener);
// taxonomyCCB.setCheckModel(null);
// taxonomyCCB.getCheckModel().clearChecks();
} else { } else {
taxonomyCCB.setDisable(true); taxonomyCCB.setDisable(true);
} }
displayTaxonomy = false; displayTaxonomy = false;
displayTaxonomyChB.setSelected(false);
// set // set
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
displayTaxonomy = newValue; displayTaxonomyChB.setDisable(false);
if(displayTaxonomy){ displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
minimalTaxonomyTF.setDisable(false); displayTaxonomy = newValue;
} else { if (displayTaxonomy) {
minimalTaxonomyTF.setDisable(true); minimalTaxonomyTF.setDisable(false);
} else {
minimalTaxonomyTF.setDisable(true);
minimalTaxonomyTF.setText("1"); minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1; minimalTaxonomy = 1;
} }
logger.info("display taxonomy: ", displayTaxonomy); logger.info("display taxonomy: ", displayTaxonomy);
}); });
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
} else {
displayTaxonomyChB.setDisable(true);
}
writeMsdAtTheEnd = false; writeMsdAtTheEnd = false;
writeMsdAtTheEndChB.setDisable(true); writeMsdAtTheEndChB.setDisable(true);

@ -198,20 +198,27 @@ public class StringAnalysisTabNew2 {
notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB)); notePunctuationsChB.setTooltip(new Tooltip(TOOLTIP_readNotePunctuationsChB));
displayTaxonomy = false; displayTaxonomy = false;
displayTaxonomyChB.setSelected(false);
// set // set
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
displayTaxonomy = newValue;
if(displayTaxonomy){
minimalTaxonomyTF.setDisable(false);
} else {
minimalTaxonomyTF.setDisable(true);
minimalTaxonomyTF.setText("1"); if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
minimalTaxonomy = 1; displayTaxonomyChB.setDisable(false);
} displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
logger.info("display taxonomy: ", displayTaxonomy); displayTaxonomy = newValue;
}); if (displayTaxonomy) {
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); minimalTaxonomyTF.setDisable(false);
} else {
minimalTaxonomyTF.setDisable(true);
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
}
logger.info("display taxonomy: ", displayTaxonomy);
});
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
} else {
displayTaxonomyChB.setDisable(true);
}
// calculateForCB // calculateForCB
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> { calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
@ -306,6 +313,7 @@ public class StringAnalysisTabNew2 {
// taxonomy // taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.setDisable(false);
taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() { taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
@ -667,7 +675,11 @@ public class StringAnalysisTabNew2 {
readXML(f.toString(), statisticsOneGrams); readXML(f.toString(), statisticsOneGrams);
i++; i++;
this.updateProgress(i, corpusFiles.size() * 2); this.updateProgress(i, corpusFiles.size() * 2);
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName())); if (statistic.getFilter().getCollocability().size() > 0) {
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
} else {
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
} }
return null; return null;
@ -776,7 +788,7 @@ public class StringAnalysisTabNew2 {
this.updateProgress(i, corpusFiles.size()); this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName())); this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
} }
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName())); // this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
} }
return null; return null;

@ -343,6 +343,7 @@ public class WordLevelTab {
// taxonomy // taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.setDisable(false);
taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() { taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
@ -379,20 +380,26 @@ public class WordLevelTab {
} }
displayTaxonomy = false; displayTaxonomy = false;
displayTaxonomyChB.setSelected(false);
// set // set
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
displayTaxonomy = newValue; displayTaxonomyChB.setDisable(false);
if(displayTaxonomy){ displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
minimalTaxonomyTF.setDisable(false); displayTaxonomy = newValue;
} else { if (displayTaxonomy) {
minimalTaxonomyTF.setDisable(true); minimalTaxonomyTF.setDisable(false);
} else {
minimalTaxonomyTF.setText("1"); minimalTaxonomyTF.setDisable(true);
minimalTaxonomy = 1;
} minimalTaxonomyTF.setText("1");
logger.info("display taxonomy: ", displayTaxonomy); minimalTaxonomy = 1;
}); }
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); logger.info("display taxonomy: ", displayTaxonomy);
});
displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
} else {
displayTaxonomyChB.setDisable(true);
}
// writeMsdAtTheEnd = false; // writeMsdAtTheEnd = false;
// writeMsdAtTheEndChB.setDisable(true); // writeMsdAtTheEndChB.setDisable(true);

@ -64,27 +64,27 @@ public class Export {
//Delimiter used in CSV file //Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n"; String NEW_LINE_SEPARATOR = "\n";
List<Object> FILE_HEADER_AL = new ArrayList<Object>(); List<Object> FILE_HEADER_AL = new ArrayList<>();
Object[] FILE_HEADER; Object[] FILE_HEADER;
//Count frequencies //Count frequencies
long num_frequencies = 0; // long num_frequencies = 0;
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) { // for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
Map<MultipleHMKeys, Long> map = p.getRight(); // Map<MultipleHMKeys, Long> map = p.getRight();
if (map.isEmpty()) // if (map.isEmpty())
continue; // continue;
num_frequencies = Util.mapSumFrequencies(map); // num_frequencies = Util.mapSumFrequencies(map);
}
// Map<String, Long> num_taxonomy_frequencies = new ConcurrentHashMap<>();
// for (String taxonomyKey : taxonomyResults.keySet()) {
// num_taxonomy_frequencies.put(taxonomyKey, (long) 0);
// for (AtomicLong value : taxonomyResults.get(taxonomyKey).values()){
// long val = num_taxonomy_frequencies.get(taxonomyKey);
// val += value.get();
// num_taxonomy_frequencies.put(taxonomyKey, val);
// }
// } // }
Map<Taxonomy, Long> num_selected_taxonomy_frequencies = new ConcurrentHashMap<>();
for (Taxonomy taxonomyKey : taxonomyResults.keySet()) {
num_selected_taxonomy_frequencies.put(taxonomyKey, (long) 0);
for (AtomicLong value : taxonomyResults.get(taxonomyKey).values()){
long val = num_selected_taxonomy_frequencies.get(taxonomyKey);
val += value.get();
num_selected_taxonomy_frequencies.put(taxonomyKey, val);
}
}
Map<Taxonomy, AtomicLong> num_taxonomy_frequencies = statistics.getUniGramOccurrences(); Map<Taxonomy, AtomicLong> num_taxonomy_frequencies = statistics.getUniGramOccurrences();
@ -92,32 +92,37 @@ public class Export {
if (!ValidationUtil.isEmpty(filter.getSkipValue()) && filter.getSkipValue() > 0) { if (!ValidationUtil.isEmpty(filter.getSkipValue()) && filter.getSkipValue() > 0) {
FILE_HEADER_AL.add("Izpuščene besede"); FILE_HEADER_AL.add("Izpuščene besede");
} }
FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString()); FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString(filter.getNgramValue()));
if (filter.getCalculateFor().equals(CalculateFor.LEMMA)) if (filter.getCalculateFor().equals(CalculateFor.LEMMA)) {
FILE_HEADER_AL.add("Lema male črke"); if(filter.getNgramValue() == 0) {
FILE_HEADER_AL.add("Črkovni niz (male črke)");
} else if(filter.getNgramValue() >= 1) {
FILE_HEADER_AL.add("Lema (male črke)");
}
}
if (filter.getSuffixLength() != null && filter.getSuffixList() != null && filter.getPrefixLength() != null && filter.getPrefixList() != null) { if (filter.getSuffixLength() != null && filter.getSuffixList() != null && filter.getPrefixLength() != null && filter.getPrefixList() != null) {
if (filter.getPrefixLength() > 0 || filter.getPrefixList().size() > 0) { if (filter.getPrefixLength() > 0 || filter.getPrefixList().size() > 0) {
FILE_HEADER_AL.add("Predpona"); FILE_HEADER_AL.add("Začetni del besede");
} }
FILE_HEADER_AL.add("Preostali del besede"); FILE_HEADER_AL.add("Preostali del besede");
if (filter.getSuffixLength() > 0 || filter.getSuffixList().size() > 0) { if (filter.getSuffixLength() > 0 || filter.getSuffixList().size() > 0) {
FILE_HEADER_AL.add("Pripona"); FILE_HEADER_AL.add("Končni del besede");
} }
} }
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(statistics.getUniGramOccurrences().get(Taxonomy.TOTAL).longValue())); headerInfoBlock.put(filter.getCalculateFor().totalSumString(filter.getNgramValue()), String.valueOf(num_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
headerInfoBlock.put(filter.getCalculateFor().foundSumString(filter.getNgramValue()), String.valueOf(num_selected_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
// headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies)); // headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
for (CalculateFor otherKey : filter.getMultipleKeys()) { for (CalculateFor otherKey : filter.getMultipleKeys()) {
FILE_HEADER_AL.add(otherKey.toHeaderString()); FILE_HEADER_AL.add(otherKey.toHeaderString(filter.getNgramValue()));
if (otherKey.equals(CalculateFor.LEMMA)) if (otherKey.equals(CalculateFor.LEMMA))
FILE_HEADER_AL.add("Lema male črke"); FILE_HEADER_AL.add("Lema (male črke)");
} }
FILE_HEADER_AL.add("Skupna absolutna pogostost"); FILE_HEADER_AL.add(filter.getCalculateFor().totalAbsoluteFrequencyString(filter.getNgramValue()));
FILE_HEADER_AL.add(filter.getCalculateFor().toPercentString()); FILE_HEADER_AL.add(filter.getCalculateFor().shareOfTotalString(filter.getNgramValue()));
FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)"); FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
@ -216,6 +221,9 @@ public class Export {
// real prefix // real prefix
String rpf = ""; String rpf = "";
for(String pf : filter.getPrefixList()){ for(String pf : filter.getPrefixList()){
if (key.length() < pf.length()) {
continue;
}
if (pf.equals(key.substring(0, pf.length()))){ if (pf.equals(key.substring(0, pf.length()))){
rpf = pf; rpf = pf;
break; break;
@ -225,6 +233,9 @@ public class Export {
// real suffix // real suffix
String rsf = ""; String rsf = "";
for(String sf : filter.getSuffixList()){ for(String sf : filter.getSuffixList()){
if (key.length() < sf.length()) {
continue;
}
if (sf.equals(key.substring(key.length() - sf.length()))){ if (sf.equals(key.substring(key.length() - sf.length()))){
rsf = sf; rsf = sf;
break; break;
@ -268,13 +279,13 @@ public class Export {
dataEntry.add(e.getValue().toString()); dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies)); dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(Taxonomy.TOTAL)));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies)); dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
for (Taxonomy key : taxonomyResults.keySet()){ for (Taxonomy key : taxonomyResults.keySet()){
if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) { if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey()); AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
dataEntry.add(frequency.toString()); dataEntry.add(frequency.toString());
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key).longValue())); dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key)));
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue())); dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue()));
// dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences())); // dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences()));
// dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences())); // dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences()));

@ -30,7 +30,7 @@
</items> </items>
</ComboBox> </ComboBox>
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" /> <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Upoštevaj tudi" />
<CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/> <CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>

@ -54,7 +54,7 @@
</items> </items>
</ComboBox> </ComboBox>
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" /> <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Upoštevaj tudi" />
<CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/> <CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" /> <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />

@ -30,7 +30,7 @@
</items> </items>
</ComboBox> </ComboBox>
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izpiši tudi" /> <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Upoštevaj tudi" />
<CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/> <CheckComboBox fx:id="alsoVisualizeCCB" layoutX="185.0" layoutY="60.0" prefHeight="25.0" prefWidth="180.0"/>

@ -0,0 +1,9 @@
.root {
-fx-font: 16px Arial;
}
.test {
-fx-font: 8px Arial;
-fx-font-weight: bold;
-fx-text-fill: red;
}
Loading…
Cancel
Save