BIG REFACTOR - erasing unused code
This commit is contained in:
@ -1,15 +0,0 @@
package alg;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
public class Common {
public static <K, V> void updateMap(Map<K, AtomicLong> map, K o) {
// if not in map
AtomicLong r = map.putIfAbsent(o, new AtomicLong(1));
// else
if (r != null)
@ -19,7 +19,6 @@ import gui.I18N;
import javafx.beans.InvalidationListener;
import javafx.concurrent.Task;
import org.apache.logging.log4j.LogManager;
@ -38,35 +37,10 @@ public class XML_processing {
public static boolean isCollocability = false;
public static InvalidationListener progressBarListener;
public double getProgress() {
return progressProperty().get();
public ReadOnlyDoubleProperty progressProperty() {
return progress ;
// public static void processCorpus(Statistics stats) {
// // we can preset the list's size, so there won't be a need to resize it
// List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT);
// int i = 0;
// for (File f : Settings.corpus) {
// i++;
// readXML(f.toString(), stats);
// }
// }
// public static void readXML(String path, Statistics stats) {
// if (stats.getCorpusType() == CorpusType.GIGAFIDA) {
// readXMLGigafida(path, stats);
// } else if (stats.getCorpusType() == CorpusType.GOS) {
// readXMLGos(path, stats);
// } else if (stats.getCorpusType() == CorpusType.SOLAR) {
// readXMLSolar(path, stats);
// }
// }
public static boolean readXML(String path, StatisticsNew stats) {
if (stats.getCorpus().getCorpusType() == CorpusType.GIGAFIDA
|| stats.getCorpus().getCorpusType() == CorpusType.CCKRES) {
@ -81,7 +55,6 @@ public class XML_processing {
} else if (stats.getCorpus().getCorpusType() == CorpusType.VERT) {
return readVERT(path, stats);
// task.updateProgress(fileNum, size);
return false;
@ -174,15 +147,10 @@ public class XML_processing {
} else if (stats.getFilter().getAl() == AnalysisLevel.WORD_LEVEL) {
alg.word.ForkJoin wc = new alg.word.ForkJoin(corpus, stats);
} else {
// TODO:
// alg.inflectedJOS.ForkJoin wc = new alg.inflectedJOS.ForkJoin(corpus, stats);
// pool.invoke(wc);
// if running with minimalRelFre frequency erase all ngrams with occurrences lower than set value per 1M
if(stats.getFilter().getIsMinimalRelFreScraper()) {
// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
long countFor1MWords = stats.getUniGramOccurrences().get(stats.getCorpus().getTotal()).longValue();
if(countFor1MWords > 1000000L){
double absToRelFactor = (stats.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
@ -197,125 +165,9 @@ public class XML_processing {
stats.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
// System.out.println("asd");
// public static void readXMLGos(String path, Statistics stats) {
// boolean in_word = false;
// String taksonomija = "";
// String lemma = "";
// String msd = "";
// String type = stats.isGosOrthMode() ? "orth" : "norm"; // orth & norm
// List<Word> stavek = new ArrayList<>();
// List<Sentence> corpus = new ArrayList<>();
// String sentenceDelimiter = "seg";
// String taxonomyPrefix = "gos.";
// try {
// XMLInputFactory factory = XMLInputFactory.newInstance();
// XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
// while (eventReader.hasNext()) {
// XMLEvent event = eventReader.nextEvent();
// switch (event.getEventType()) {
// case XMLStreamConstants.START_ELEMENT:
// StartElement startElement = event.asStartElement();
// String qName = startElement.getName().getLocalPart();
// // "word" node
// if (qName.equals("w")) {
// in_word = true;
// if (type.equals("norm")) {
// // make sure we're looking at <w lemma...> and not <w type...>
// Iterator var = startElement.getAttributes();
// ArrayList<Object> attributes = new ArrayList<>();
// while (var.hasNext()) {
// attributes.add(;
// }
// if (attributes.contains("msd")) {
// msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
// } else {
// msd = null;
// }
// if (attributes.contains("lemma")) {
// lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
// }
// }
// }
// // taxonomy node
// else if (qName.equalsIgnoreCase("catRef")) {
// // there are some term nodes at the beginning that are of no interest to us
// // they differ by not having the attribute "ref", so test will equal null
// Attribute test = startElement.getAttributeByName(QName.valueOf("target"));
// if (test != null) {
// // keep only taxonomy properties
// taksonomija = String.valueOf(test.getValue()).replace(taxonomyPrefix, "");
// }
// } else if (qName.equalsIgnoreCase("div")) {
// type = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
// }
// break;
// case XMLStreamConstants.CHARACTERS:
// Characters characters = event.asCharacters();
// // "word" node value
// if (in_word) {
// if (type.equals("norm") && msd != null) {
// stavek.add(new Word(characters.getData(), lemma, msd));
// } else {
// stavek.add(new Word(characters.getData()));
// }
// in_word = false;
// }
// break;
// case XMLStreamConstants.END_ELEMENT:
// EndElement endElement = event.asEndElement();
// // parser reached end of the current sentence
// if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
// // add sentence to corpus
// corpus.add(new Sentence(stavek, taksonomija, type));
// // and start a new one
// stavek = new ArrayList<>();
// /* Invoke Fork-Join when we reach maximum limit of
// * sentences (because we can't read everything to
// * memory) or we reach the end of the file.
// */
// if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
// fj(corpus, stats);
// // empty the current corpus, since we don't need
// // the data anymore
// corpus.clear();
// }
// }
// // backup
// if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
// fj(corpus, stats);
// corpus.clear();
// }
// break;
// }
// }
// } catch (FileNotFoundException | XMLStreamException e) {
// e.printStackTrace();
// }
// }
public static boolean readXMLSolar(String path, StatisticsNew stats) {
boolean in_word = false;
@ -327,7 +179,6 @@ public class XML_processing {
List<Sentence> corpus = new ArrayList<>();
// used for filter
// Set<String> headTags = new HashSet<>(Arrays.asList("sola", "predmet", "razred", "regija", "tip", "leto"));
Set<String> headTags = new HashSet<>(Arrays.asList(SOLA, PREDMET, RAZRED, REGIJA, TIP, LETO));
Map<String, String> headBlock = null;
boolean includeThisBlock = false;
@ -372,9 +223,7 @@ public class XML_processing {
switch (event.getEventType()) {
case XMLStreamConstants.START_ELEMENT:
StartElement startElement = event.asStartElement();
// System.out.println(String.format("%s", startElement.toString()));
String qName = startElement.getName().getLocalPart();
// "word" node
@ -423,7 +272,7 @@ public class XML_processing {
stavek = new ArrayList<>();
} else if (qName.equals("head")) {
headBlock = new HashMap<>();
} else { // if (headTags.contains(qName)) {
} else {
boolean inHeadTags = false;
String headTag = "";
for (String tag : headTags){
@ -436,8 +285,6 @@ public class XML_processing {
if(inHeadTags) {
String tagContent = eventReader.nextEvent().asCharacters().getData();
headBlock.put(headTag, tagContent);
// String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
// resultFilters.get(headTag).add(tagContent);
@ -562,22 +409,16 @@ public class XML_processing {
if (line.length() > 4 && line.substring(1, 5).equals("text")) {
// split over "\" "
String[] split = line.split("\" ");
// String mediumId = "";
// String typeId = "";
// String proofreadId = "";
boolean idsPresent = false;
for (String el : split) {
String[] attribute = el.split("=\"");
if (attribute[0].equals("medium_id")) {
// mediumId = attribute[1];
idsPresent = true;
} else if (attribute[0].equals("type_id")) {
// typeId = attribute[1];
idsPresent = true;
} else if (attribute[0].equals("proofread_id")) {
// proofreadId = attribute[1];
idsPresent = true;
@ -586,13 +427,10 @@ public class XML_processing {
for (String el : split) {
String[] attribute = el.split("=\"");
if (attribute[0].equals("medium")) {
// mediumId = attribute[1];
} else if (attribute[0].equals("type")) {
// typeId = attribute[1];
} else if (attribute[0].equals("proofread")) {
// proofreadId = attribute[1];
@ -679,7 +517,6 @@ public class XML_processing {
// solar
// } else if (!parseTaxonomy && headTags.contains(elementName)) {
} else if (!parseTaxonomy) {
boolean inHeadTags = false;
String headTag = "";
@ -737,7 +574,6 @@ public class XML_processing {
boolean inPunctuation = false;
boolean taxonomyMatch = true;
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
// ArrayList<Taxonomy> currentFiletaxonomyLong = new ArrayList<>();
String lemma = "";
String msd = "";
@ -780,8 +616,6 @@ public class XML_processing {
// keep only taxonomy properties
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""), stats.getCorpus());
Tax taxonomy = new Tax();
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
@ -795,40 +629,13 @@ public class XML_processing {
sentence.add(createWord(word, lemma, msd, word, stats.getFilter()));
inWord = false;
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
if (stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
String punctuation = characters.getData();
sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter()));
inPunctuation = false;
// String punctuation = ",";
// sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
// sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
// sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
// inPunctuation = false;
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
// String actualPunctuation = characters.getData();
// if (actualPunctuation.equals(".") || actualPunctuation.equals("!") || actualPunctuation.equals("?") || actualPunctuation.equals("..."))
// break;
// String punctuation = ",";
// int skip_number = 0;
// if (!ValidationUtil.isEmpty(stats.getFilter().getSkipValue())){
// skip_number = stats.getFilter().getSkipValue();
// }
// for(int i = 1; i < skip_number + 2; i ++){
// if (i < sentence.size() && !sentence.get(sentence.size() - i).equals(punctuation)) {
// sentence.get(sentence.size() - i).setWord(sentence.get(sentence.size() - i).getWord() + punctuation);
// sentence.get(sentence.size() - i).setLemma(sentence.get(sentence.size() - i).getLemma() + punctuation);
// sentence.get(sentence.size() - i).setMsd(sentence.get(sentence.size() - i).getMsd() + punctuation);
// }
// }
// inPunctuation = false;
// }
case XMLStreamConstants.END_ELEMENT:
EndElement endElement = event.asEndElement();
@ -869,10 +676,6 @@ public class XML_processing {
fj(corpus, stats);
// empty the current corpus, since we don't need the data anymore
// TODO: if (stats.isUseDB()) {
// stats.storeTmpResultsToDB();
// }
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
// before proceeding to read this file, make sure that taxonomy filters are a match
@ -883,7 +686,6 @@ public class XML_processing {
if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) {
// taxonomies don't match so stop
// union (select words that match any of selected taxonomy
// return false;
taxonomyMatch = false;
} else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){
@ -898,10 +700,6 @@ public class XML_processing {
// join corpus and stats
fj(corpus, stats);
// TODO: if (stats.isUseDB()) {
// stats.storeTmpResultsToDB();
// }
@ -909,7 +707,6 @@ public class XML_processing {
} catch (FileNotFoundException | XMLStreamException e) {
throw new java.lang.RuntimeException("XMLStreamException | FileNotFoundException");
// e.printStackTrace();
} finally {
if (eventReader != null) {
try {
@ -929,7 +726,6 @@ public class XML_processing {
boolean inPunctuation = false;
boolean taxonomyMatch = true;
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
// ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = "";
String msd = "";
@ -1006,8 +802,6 @@ public class XML_processing {
// keep only taxonomy properties
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""), stats.getCorpus());
// Tax taxonomy = new Tax();
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
} else if (stats.getCorpus().getTaxonomy().size() > 0 && qName.equalsIgnoreCase("catRef")) {
// get value from attribute target
@ -1017,41 +811,7 @@ public class XML_processing {
// keep only taxonomy properties
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).split(":")[1], stats.getCorpus());
// Tax taxonomy = new Tax();
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
// if (parseTaxonomy && elementName.equalsIgnoreCase("catRef")) {
// HashMap<String, String> atts = extractAttributes(startElement);
// String debug = "";
// String tax = startElement.getAttributeByName(QName.valueOf("target"))
// .getValue()
// .replace("#", "");
// if (tax.indexOf(':') >= 0) {
// tax = tax.split(":")[1];
// }
// resultTaxonomy.add(tax);
// } else if (parseTaxonomy && elementName.equalsIgnoreCase("term")) {
// String tax = startElement.getAttributeByName(QName.valueOf("ref"))
// .getValue()
// .replace("#", "");
// resultTaxonomy.add(tax);
// } else if (!parseTaxonomy && headTags.contains(elementName)) {
// String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
// resultFilters.get(elementName).add(tagContent);
// }
} else if (qName.equals("bibl")) {
// before proceeding to read this file, make sure that taxonomy filters are a match
taxonomyMatch = true;
@ -1068,14 +828,10 @@ public class XML_processing {
// "word" node value
if (inWord) {
String word = characters.getData();
// if (word.equals("Banovec")){
// System.out.println("Test");
// }
sentence.add(createWord(word, lemma, msd, word, stats.getFilter()));
inWord = false;
if (stats.getFilter().getNotePunctuations() && inPunctuation) {
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
String punctuation = characters.getData();
sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter()));
inPunctuation = false;
@ -1085,9 +841,6 @@ public class XML_processing {
case XMLStreamConstants.END_ELEMENT:
EndElement endElement = event.asEndElement();
String var = endElement.getName().getLocalPart();
String debug = "";
// parser reached end of the current sentence
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
if (stats.getFilter().getNgramValue() == 0){
@ -1119,10 +872,6 @@ public class XML_processing {
fj(corpus, stats);
// empty the current corpus, since we don't need the data anymore
// TODO: if (stats.isUseDB()) {
// stats.storeTmpResultsToDB();
// }
// fallback
@ -1133,7 +882,6 @@ public class XML_processing {
currentFiletaxonomy = new ArrayList<>();
// currentFiletaxonomyLong = new ArrayList<>();
} else if (endElement.getName().getLocalPart().equals("bibl")) {
// before proceeding to read this file, make sure that taxonomy filters are a match
@ -1143,7 +891,6 @@ public class XML_processing {
if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) {
// taxonomies don't match so stop
// union (select words that match any of selected taxonomy
// return false;
taxonomyMatch = false;
} else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){
@ -1162,10 +909,6 @@ public class XML_processing {
fj(corpus, stats);
// empty the current corpus, since we don't need the data anymore
// TODO: if (stats.isUseDB()) {
// stats.storeTmpResultsToDB();
// }
} catch (FileNotFoundException | XMLStreamException e) {
@ -1185,12 +928,9 @@ public class XML_processing {
public static boolean readXMLGos(String path, StatisticsNew stats) {
boolean inWord = false;
boolean inPunctuation = false;
boolean inOrthDiv = false;
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
boolean inSeparatedWord = false;
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
// ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = "";
String msd = "";
@ -1201,10 +941,6 @@ public class XML_processing {
String sentenceDelimiter = "seg";
int wordIndex = 0;
String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm
int numLines = 0;
int lineNum = 0;
@ -1248,7 +984,6 @@ public class XML_processing {
lineNum ++;
XMLEvent event = eventReader.nextEvent();
// System.out.print(String.format("%s", event.toString().replaceAll("\\[''\\]::", "")));
switch (event.getEventType()) {
case XMLStreamConstants.START_ELEMENT:
@ -1278,11 +1013,6 @@ public class XML_processing {
if (atts.containsKey("lemma")) {
lemma = atts.get("lemma");
// if (!inOrthDiv) {
// msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
// lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
// }
} else if (atts.containsKey("type") && atts.get("type").equals("separated")) {
inSeparatedWord = true;
@ -1299,11 +1029,7 @@ public class XML_processing {
// keep only taxonomy properties
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()), stats.getCorpus());
// Tax taxonomy = new Tax();
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
} else if (qName.equalsIgnoreCase("div")) {
gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
} else if (qName.equalsIgnoreCase("seg")) {
HashMap<String, String> atts = extractAttributes(startElement);
@ -1322,20 +1048,14 @@ public class XML_processing {
case XMLStreamConstants.CHARACTERS:
// "word" node value
if (inWord) {
// if (GOSCorpusHMKey.equals("gos.028-0108.norm") && wordIndex > 8){
// System.out.println(wordIndex);
// }
// if algorithm is in orthodox part add new word to sentence
if (inOrthDiv){
// GOSCorpusHM.put(GOSCorpusHMKey, sentence);
String word = "";
Characters characters = event.asCharacters();
sentence.add(createWord(characters.getData(), "", "", "", stats.getFilter()));
// if algorithm is in normalized part find orthodox word and add other info to it
} else {
Characters characters = event.asCharacters();
// System.out.println(wordIndex);
// System.out.println(GOSCorpusHMKey + " " + lemma + " " + wordIndex);
if (wordIndex < GOSCorpusHM.get(GOSCorpusHMKey).size()) {
Word currentWord = GOSCorpusHM.get(GOSCorpusHMKey).get(wordIndex);
currentWord.setLemma(lemma, stats.getFilter().getWordParts());
@ -1349,9 +1069,7 @@ public class XML_processing {
GOSCorpusHM.get(GOSCorpusHMKey).add(wordIndex, createWord(currentWord.getWord(stats.getFilter().getWordParts()),
"", "", "", stats.getFilter()));
} //else {
// System.out.println("Error");
// }
@ -1393,17 +1111,7 @@ public class XML_processing {
// add sentence to corpus if it passes filters
if (includeFile && !ValidationUtil.isEmpty(sentence)) {
// for(Word w : sentence) {
// if (w.getW1().equals("")) {
// System.out.println("HERE!!!");
// }
// }
sentence = runFilters(sentence, stats.getFilter());
// for(Word w : sentence) {
// if (w.getW1().equals("")) {
// System.out.println("HERE!!!");
// }
// }
corpus.add(new Sentence(sentence, currentFiletaxonomy));
@ -1430,21 +1138,12 @@ public class XML_processing {
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
// before proceeding to read this file, make sure that taxonomy filters are a match
// if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
// currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
// // disregard this entry if taxonomies don't match
// includeFile = !currentFiletaxonomy.isEmpty();
//// currentFiletaxonomy = new ArrayList<>();
// }
if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) {
// taxonomies don't match so stop
// union (select words that match any of selected taxonomy
// return false;
includeFile = false;
} else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){
@ -1462,7 +1161,6 @@ public class XML_processing {
currentFiletaxonomy = new ArrayList<>();
// currentFiletaxonomyLong = new ArrayList<>();
@ -1488,9 +1186,6 @@ public class XML_processing {
public static boolean readVERT(String path, StatisticsNew stats) {
// taxonomy corpora
// HashSet<String> resultTaxonomy = new HashSet<>();
// regi path
String regiPath = path.substring(0, path.length()-4) + "regi";
@ -1503,7 +1198,6 @@ public class XML_processing {
// read regi file
regiIt = FileUtils.lineIterator(new File(regiPath), "UTF-8");
try {
boolean insideHeader = false;
int attributeIndex = 0;
while (regiIt.hasNext()) {
String line = regiIt.nextLine();
@ -1534,7 +1228,6 @@ public class XML_processing {
} catch (IOException e) {
throw new java.lang.RuntimeException("IOException");
// e.printStackTrace();
int numLines = 0;
@ -1556,7 +1249,6 @@ public class XML_processing {
LineIterator it;
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
boolean inParagraph = false;
boolean inSentence = false;
boolean taxonomyMatch = true;
int lineNum = 0;
@ -1572,8 +1264,6 @@ public class XML_processing {
try {
it = FileUtils.lineIterator(new File(path), "UTF-8");
try {
boolean insideHeader = false;
while (it.hasNext()) {
int percentage = (int) (lineNum * 100.0 / numLines);
if(progress.get() < percentage) {
@ -1596,7 +1286,6 @@ public class XML_processing {
boolean proofread = false;
for (String el : split) {
String[] attribute = el.split("=\"");
boolean idsPresent = false;
if (attribute[0].equals("medium_id") && !attribute[1].equals("-")) {
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(attribute[1], stats.getCorpus());
@ -1639,12 +1328,6 @@ public class XML_processing {
// else if((line.length() >= 3 && line.substring(0, 2).equals("<p") && line.substring(line.length() - 1, line.length()).equals(">")) ||
// (line.length() >= 3 && line.substring(0, 3).equals("<ab") && line.substring(line.length() - 1, line.length()).equals(">"))){
// inParagraph = true;
// } else if((line.length() == 4 && line.equals("</p>")) || (line.length() == 5 && line.equals("</ab>"))){
// inParagraph = false;
// }
else if(line.length() >= 3 && line.substring(0, 2).equals("<s") && line.substring(line.length() - 1, line.length()).equals(">")){
inSentence = true;
} else if(line.length() == 4 && line.equals("</s>")){
@ -1677,10 +1360,7 @@ public class XML_processing {
// and start a new one
sentence = new ArrayList<>();
// corpus.add(new Sentence(sentence, currentFiletaxonomy));
} else if(!(line.charAt(0) == '<' && line.charAt(line.length() - 1) == '>') && inSentence){
// } else if(!(line.charAt(0) == '<' && line.charAt(line.length() - 1) == '>') && inSentence && inParagraph){
String[] split = line.split("\t");
if(slovene) {
if (split[lemmaIndex].length() > 2 && split[lemmaIndex].charAt(split[lemmaIndex].length() - 2) == '-' && Character.isAlphabetic(split[lemmaIndex].charAt(split[lemmaIndex].length() - 1)) &&
@ -1721,7 +1401,6 @@ public class XML_processing {
} catch (IOException e) {
// resultTaxonomy.remove("-");
return true;
@ -1,67 +0,0 @@
//package alg.inflectedJOS;
//import java.util.List;
//import java.util.concurrent.RecursiveAction;
//import data.Sentence;
//import data.Statistics;
//public class ForkJoin extends RecursiveAction {
// private static final long serialVersionUID = -1260951004477299634L;
// private static final int ACCEPTABLE_SIZE = 1000;
// private List<Sentence> corpus;
// private Statistics stats;
// private int start;
// private int end;
// /**
// * Constructor for subproblems.
// */
// private ForkJoin(List<Sentence> corpus, int start, int end, Statistics stats) {
// this.corpus = corpus;
// this.start = start;
// this.end = end;
// this.stats = stats;
// }
// /**
// * Default constructor for the initial problem
// */
// public ForkJoin(List<Sentence> corpus, Statistics stats) {
// this.corpus = corpus;
// this.start = 0;
// this.end = corpus.size();
// this.stats = stats;
// }
// private void computeDirectly() {
// List<Sentence> subCorpus = corpus.subList(start, end);
// if (stats.isTaxonomySet()) {
// InflectedJOSCount.calculateForAll(subCorpus, stats, stats.getInflectedJosTaxonomy());
// } else {
// InflectedJOSCount.calculateForAll(subCorpus, stats, null);
// }
// }
// @Override
// protected void compute() {
// int subCorpusSize = end - start;
// if (subCorpusSize < ACCEPTABLE_SIZE) {
// computeDirectly();
// } else {
// int mid = start + subCorpusSize / 2;
// ForkJoin left = new ForkJoin(corpus, start, mid, stats);
// ForkJoin right = new ForkJoin(corpus, mid, end, stats);
// // fork (push to queue)-> compute -> join
// left.fork();
// right.fork();
// left.join();
// right.join();
// }
// }
@ -1,170 +0,0 @@
//package alg.inflectedJOS;
//import java.util.ArrayList;
//import java.util.HashMap;
//import java.util.List;
//import org.apache.commons.lang3.StringUtils;
//import alg.Common;
//import data.Sentence;
//import data.Statistics;
//import data.StatisticsNew;
//import data.Word;
//public class InflectedJOSCount {
// public static HashMap<Integer, ArrayList<ArrayList<Integer>>> indices;
// // static {
// // // calculate all possible combinations of indices we will substitute with a '-' for substring statistics
// // indices = new HashMap<>();
// // for (int i = 5; i <= 8; i++) {
// // indices.put(i, calculateCombinations(i));
// // }
// // }
// //
// // private static List<Integer> calculateCombinations(int i) {
// // int arr[] = {1, 2, 3, 4, 5};
// // int r = 3;
// // int n = arr.length;
// // ArrayList<ArrayList<Integer>> result = new ArrayList<>();
// //
// // return printCombination(arr, n, r);
// // }
// //
// // /* arr[] ---> Input Array
// // data[] ---> Temporary array to store current combination
// // start & end ---> Staring and Ending indexes in arr[]
// // index ---> Current index in data[]
// // r ---> Size of a combination to be printed */
// // static void combinationUtil(int arr[], int data[], int start,
// // int end, int index, int r, ArrayList<ArrayList<Integer>> result) {
// // // Current combination is ready to be printed, print it
// // ArrayList<Integer> tmpResult = new ArrayList<>();
// //
// // if (index == r) {
// // ArrayList<Integer> tmpResult = new ArrayList<>();
// // for (int j = 0; j < r; j++)
// // System.out.print(data[j] + " ");
// // System.out.println("");
// // return;
// // }
// //
// // // replace index with all possible elements. The condition
// // // "end-i+1 >= r-index" makes sure that including one element
// // // at index will make a combination with remaining elements
// // // at remaining positions
// // for (int i = start; i <= end && end - i + 1 >= r - index; i++) {
// // data[index] = arr[i];
// // combinationUtil(arr, data, i + 1, end, index + 1, r);
// // }
// // }
// //
// // // The main function that prints all combinations of size r
// // // in arr[] of size n. This function mainly uses combinationUtil()
// // static void printCombination(int arr[], int n, int r) {
// // // A temporary array to store all combination one by one
// // int data[] = new int[r];
// //
// // // Print all combination using temprary array 'data[]'
// // combinationUtil(arr, data, 0, n - 1, 0, r);
// // }
// // public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
// // for (Sentence s : corpus) {
// // // disregard if wrong taxonomy
// // if (!(s.getObservableListTaxonomy().startsWith(taxonomy))) {
// // continue;
// // }
// //
// // calculateCommon(s, stats.result);
// //
// // for (Word word : s.getWords()) {
// // // skip if current word is not inflected
// // if (!(word.getMsd().length() > 0)) {
// // continue;
// // }
// //
// // String msd = word.getMsd();
// //
// // StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
// //
// // for (int i = 1; i < msd.length(); i++) {
// // entry.setCharAt(i, msd.charAt(i));
// // Common.updateMap(stats.result, entry.toString());
// // entry.setCharAt(i, '-');
// // }
// // }
// // }
// // }
// // public static void calculateForAll(List<Sentence> corpus, Statistics stats) {
// // for (Sentence s : corpus) {
// // for (Word word : s.getWords()) {
// // if (!(word.getMsd().length() > 0)) {
// // continue;
// // }
// //
// // String msd = word.getMsd();
// //
// // StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
// //
// // for (int i = 1; i < msd.length(); i++) {
// // entry.setCharAt(i, msd.charAt(i));
// // Common.updateMap(stats.result, entry.toString());
// // entry.setCharAt(i, '-');
// // }
// // }
// // }
// // }
// static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
// for (Sentence s : corpus) {
// // disregard if wrong taxonomy
//// if (taxonomy != null && !(s.getObservableListTaxonomy().startsWith(taxonomy))) {
//// continue;
//// }
// for (Word word : s.getWords()) {
// // skip if current word is not inflected
// if (!(word.getMsd().length() > 0)) {
// continue;
// }
// String msd = word.getMsd();
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
// for (int i = 1; i < msd.length(); i++) {
// entry.setCharAt(i, msd.charAt(i));
// Common.updateMap(stats.result, entry.toString());
// entry.setCharAt(i, '-');
// }
// }
// }
// }
// public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats, String taxonomy) {
// for (Sentence s : corpus) {
// for (Word word : s.getWords()) {
// // skip if current word is not inflected
// // // TODO: if has defined msd and is of correct type (create a set)
// // if (!(word.getMsd().length() > 0)) {
// // continue;
// // }
// String msd = word.getMsd();
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
// for (int i = 1; i < msd.length(); i++) {
// entry.setCharAt(i, msd.charAt(i));
// stats.updateResults(entry.toString());
// entry.setCharAt(i, '-');
// }
// }
// }
// }
@ -1,132 +0,0 @@
package alg.inflectedJOS;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import data.Enums.InflectedJosTypes;
import data.StatisticsNew;
import data.Taxonomy;
import gui.ValidationUtil;
import util.Combinations;
// adapted from
public class WordFormation {
private static HashMap<String, Long> josTypeResult;
private static Object[][] tmpResults;
private static HashMap<Integer, HashSet<HashSet<Integer>>> indices;
static {
indices = new HashMap<>();
for (int i = 4; i <= 8; i++) {
indices.put(i, Combinations.generateIndices(i));
public static void calculateStatistics(StatisticsNew stat) {
Map<String, AtomicLong> result = stat.getResult();
// 1. filter - keep only inflected types
result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.toString().charAt(0)));
// 2. for each inflected type get all possible subcombinations
for (Character josChar : InflectedJosTypes.inflectedJosTypes) {
josTypeResult = new HashMap<>();
// filter out results for a single word type
Map<String, AtomicLong> singleTypeResults = result.entrySet().stream()
.filter(x -> x.getKey().charAt(0) == josChar)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
if (ValidationUtil.isEmpty(singleTypeResults)) {
// get all possible indices combos for a msd of this length
// HashSet<HashSet<Integer>> indicesCombos = indices.get()
for (Map.Entry<String, AtomicLong> e : singleTypeResults.entrySet()) {
int l = e.getKey().length();
for (HashSet<Integer> indicesCombo : indices.get(e.getKey().length())) {
updateResults(mask(e.getKey(), indicesCombo), e.getValue().longValue());
private static String mask(String word, HashSet<Integer> indicesCombo) {
StringBuilder sb = new StringBuilder();
for (int i = 1; i < word.length(); i++) {
sb.append(indicesCombo.contains(i) ? word.charAt(i) : ".");
return sb.toString();
private static void updateResults(String s, Long nOfOccurences) {
// if not in map add
Long r = josTypeResult.putIfAbsent(s, nOfOccurences);
// else update
if (r != null) {
josTypeResult.put(s, josTypeResult.get(s) + nOfOccurences);
private static void resultsMapToArray(Long totalValue) {
Double total = totalValue * 1.0;
Object[][] josTypeResultArray = new Object[josTypeResult.size()][3];
int i = 0;
for (Map.Entry<String, Long> e : josTypeResult.entrySet()) {
josTypeResultArray[i][0] = e.getKey();
josTypeResultArray[i][1] = e.getValue();
josTypeResultArray[i][2] = e.getValue() / total;
if (e.getValue() > total) {
String debug = "";
if (tmpResults == null) {
tmpResults = josTypeResultArray;
} else {
int firstLength = tmpResults.length;
int secondLength = josTypeResultArray.length;
Object[][] tmp = new Object[firstLength + secondLength][3];
System.arraycopy(tmpResults, 0, tmp, 0, firstLength);
System.arraycopy(josTypeResultArray, 0, tmp, firstLength, secondLength);
tmpResults = tmp;
// tmpResults = ArrayUtils.addAll(tmpResults, josTypeResultArray);
private static void printArray() {
for (int i = 0; i < tmpResults.length; i++) {
for (int j = 0; j < tmpResults[i].length; j++) {
System.out.print(tmpResults[i][j] + "\t");
@ -80,36 +80,13 @@ public class Ngrams {
// boolean a = (correctPrefix.equals("") && !correctSuffix.equals(""));
// boolean b = (!correctPrefix.equals("") && correctSuffix.equals(""));
// boolean c = (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length());
// boolean d = !((correctPrefix.equals("") && !correctSuffix.equals("")) ||
// (!correctPrefix.equals("") && correctSuffix.equals("")) ||
// (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()));
if(!((stats.getFilter().getPrefixList().size() == 0 && !correctSuffix.equals("")) ||
(!correctPrefix.equals("") && stats.getFilter().getSuffixList().size() == 0) ||
(!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()))){
// if(!((correctPrefix.equals("") && !correctSuffix.equals("")) ||
// (!correctPrefix.equals("") && correctSuffix.equals("")) ||
// (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()))){
// continue;
// }
// if last letter is ',' erase it
// if (key.equals("")){
// String test = key;
// }
// if (stats.getFilter().getNotePunctuations())
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
MultipleHMKeys multipleKeys;
// create MultipleHMKeys for different amount of other keys
@ -119,28 +96,17 @@ public class Ngrams {
case 1:
String k1_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
// if (stats.getFilter().getNotePunctuations())
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
multipleKeys = new MultipleHMKeys2(key, k1_2);
case 2:
String k2_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
String k2_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
// if (stats.getFilter().getNotePunctuations()) {
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
// }
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
case 3:
String k3_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
String k3_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
String k3_4 = wordToString(ngramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
// if (stats.getFilter().getNotePunctuations()) {
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
// }
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
case 4:
@ -148,41 +114,13 @@ public class Ngrams {
String k4_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
String k4_4 = wordToString(ngramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
String k4_5 = wordToString(ngramCandidate, otherKeys.get(3), stats.getFilter().getWordParts());
// if (stats.getFilter().getNotePunctuations()) {
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
// }
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
multipleKeys = null;
// String lemma = "";
// String wordType = "";
// String msd = "";
// for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){
// if(otherKey.toString().equals("lema")){
// lemma = wordToString(ngramCandidate, otherKey);
// } else if(otherKey.toString().equals("besedna vrsta")){
// wordType = wordToString(ngramCandidate, otherKey).substring(0, 1);
// } else if(otherKey.toString().equals("oblikoskladenjska oznaka")){
// msd = wordToString(ngramCandidate, otherKey);
// }
// }
// MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
stats.updateTaxonomyResults(multipleKeys, s.getTaxonomy());
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
@ -191,18 +129,12 @@ public class Ngrams {
* Checks whether an ngram candidate passes specified regex filter.
private static boolean passesRegex(List<Word> ngramCandidate, ArrayList<Pattern> regex, ArrayList<CalculateFor> wordParts) {
// if (ngramCandidate.size() != regex.size()) {
// logger.error("ngramCandidate.size() & msd.size() mismatch"); // should not occur anyway
// return false;
// }
int j = 0;
for (int i = 0; i < ngramCandidate.size(); i++) {
String msd = ngramCandidate.get(i).getMsd(wordParts);
if (msd.equals("*")){
//if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
if (!msd.matches(regex.get(j).pattern() + ".*")) {
return false;
@ -247,11 +179,6 @@ public class Ngrams {
.map(w -> Character.toString(w.getMsd(wordParts).length() > 0 ? w.getMsd(wordParts).charAt(0) : '/'))
// candidate.addAll(ngramCandidate
// .stream()
// .map(w -> Character.toString(w.getMsd().charAt(0)))
// .collect(Collectors.toList()));
// .substring(0, 1)
return StringUtils.join(candidate, " ");
@ -322,32 +249,6 @@ public class Ngrams {
* Checks skipped words and if necessary adds punctuations.
* @return List of candidates represented as a list<candidates(String)>
private static Word checkAndModifySkipgramPunctuation(List<Word> sentence, int i, int j, StatisticsNew stats){
// if punctuation checkbox selected and there words at indexes i and j are not next to each other
// if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){
// boolean middleWordsHavePunctuation = false;
// for (int n = i + 1; n < j; n++){
// if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){
// middleWordsHavePunctuation = true;
// break;
// }
// }
// if (middleWordsHavePunctuation){
// String punctuation = ",";
// return new Word(sentence.get(i).getWord() + punctuation,
// sentence.get(i).getLemma() + punctuation,
// sentence.get(i).getMsd() + punctuation);
// }
// }
return sentence.get(i);
* Extracts skipgram candidates.
@ -363,8 +264,6 @@ public class Ngrams {
for (Sentence s : corpus) {
List<Word> sentence = s.getWords();
// stats.updateUniGramOccurrences(s.getWords().size());
if (sentence == null){
@ -373,7 +272,6 @@ public class Ngrams {
for (int j = i + 1; j <= i + skip + 1; j++) { // 2gram
if (ngram == 2 && j < sentence.size()) {
currentLoop = new ArrayList<>();
// currentLoop.add(sentence.get(i));
fillSkipgrams(currentLoop, i, j, w);
@ -439,25 +337,10 @@ public class Ngrams {
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats, List<Taxonomy> taxonomy) {
// count if no regex is set or if it is & candidate passes it
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())) {
// String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
// key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
// stats.updateTaxonomyResults(new MultipleHMKeys1(key),
// stats.getCorpus().getObservableListTaxonomy());
ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();
String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor(), stats.getFilter().getWordParts());
// if last letter is ',' erase it
// if (key.equals("")){
// String test = key;
// }
// if (stats.getFilter().getNotePunctuations())
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
MultipleHMKeys multipleKeys;
// create MultipleHMKeys for different amount of other keys
@ -467,28 +350,17 @@ public class Ngrams {
case 1:
String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
// if (stats.getFilter().getNotePunctuations())
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
multipleKeys = new MultipleHMKeys2(key, k1_2);
case 2:
String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
// if (stats.getFilter().getNotePunctuations()) {
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
// }
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
case 3:
String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
// if (stats.getFilter().getNotePunctuations()) {
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
// }
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
case 4:
@ -496,12 +368,6 @@ public class Ngrams {
String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3), stats.getFilter().getWordParts());
// if (stats.getFilter().getNotePunctuations()) {
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
// }
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
@ -1,167 +0,0 @@
package alg.word;
import java.util.ArrayList;
import java.util.List;
import alg.Common;
import data.CalculateFor;
import data.Sentence;
import data.Statistics;
import data.Word;
//class WordCount {
// private static void calculateNoFilter(List<Sentence> corpus, Statistics stats) {
// for (Sentence s : corpus) {
// List<String> sentence = new ArrayList<>(s.getWords().size());
// if (stats.getCf() == CalculateFor.LEMMA) {
// sentence.addAll(s.getWords()
// .stream()
// .map(Word::getLemma)
// .collect(Collectors.toList()));
// } else if (stats.getCf() == CalculateFor.WORD) {
// sentence.addAll(s.getWords()
// .stream()
// .map(Word::getWord)
// .collect(Collectors.toList()));
// }
// for (String word : sentence) {
// Common.updateMap(stats.result, word);
// }
// }
// }
// private static void calculateVCC(List<Sentence> corpus, Statistics stats) {
// for (Sentence s : corpus) {
// List<String> sentence = new ArrayList<>(s.getWords().size());
// if (stats.getCf() == CalculateFor.LEMMA) {
// sentence.addAll(s.getWords()
// .stream()
// .map(Word::getCVVLemma)
// .collect(Collectors.toList()));
// } else if (stats.getCf() == CalculateFor.WORD) {
// sentence.addAll(s.getWords()
// .stream()
// .map(Word::getCVVWord)
// .collect(Collectors.toList()));
// }
// for (String word : sentence) {
// if (word.length() > stats.getSubstringLength()) {
// for (int i = 0; i <= word.length() - stats.getSubstringLength(); i++) {
// String substring = word.substring(i, i + stats.getSubstringLength());
// Common.updateMap(stats.result, substring);
// }
// }
// }
// }
// }
// private static void calculateForJosType(List<Sentence> corpus, Statistics stats) {
// for (Sentence s : corpus) {
// List<String> sentence = new ArrayList<>(s.getWords().size());
// List<Word> filteredWords = new ArrayList<>();
// for (Word word : s.getWords()) {
// if (word.getMsd() != null && word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
// filteredWords.add(word);
// }
// }
// if (stats.getCf() == CalculateFor.LEMMA) {
// sentence.addAll(filteredWords
// .stream()
// .map(Word::getLemma)
// .collect(Collectors.toList()));
// } else if (stats.getCf() == CalculateFor.WORD) {
// sentence.addAll(filteredWords
// .stream()
// .map(Word::getWord)
// .collect(Collectors.toList()));
// }
// for (String word : sentence) {
// Common.updateMap(stats.result, word);
// }
// }
// }
// private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
// for (Sentence s : corpus) {
// if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
// List<String> sentence = new ArrayList<>(s.getWords().size());
// List<Word> filteredWords = new ArrayList<>();
// for (Word word : s.getWords()) {
// if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
// filteredWords.add(word);
// }
// }
// if (stats.getCf() == CalculateFor.LEMMA) {
// sentence.addAll(filteredWords
// .stream()
// .map(Word::getLemma)
// .collect(Collectors.toList()));
// } else if (stats.getCf() == CalculateFor.WORD) {
// sentence.addAll(filteredWords
// .stream()
// .map(Word::getWord)
// .collect(Collectors.toList()));
// }
// for (String word : sentence) {
// Common.updateMap(stats.result, word);
// }
// }
// }
// }
// private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
// for (Sentence s : corpus) {
// if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
// List<String> sentence = new ArrayList<>(s.getWords().size());
// if (stats.getCf() == CalculateFor.LEMMA) {
// sentence.addAll(s.getWords()
// .stream()
// .map(Word::getLemma)
// .collect(Collectors.toList()));
// } else if (stats.getCf() == CalculateFor.WORD) {
// sentence.addAll(s.getWords()
// .stream()
// .map(Word::getWord)
// .collect(Collectors.toList()));
// }
// for (String word : sentence) {
// Common.updateMap(stats.result, word);
// }
// }
// }
// }
// static void calculateForAll(List<Sentence> corpus, Statistics stats) {
// boolean taxonomyIsSet = stats.isTaxonomySet();
// boolean JosTypeIsSet = stats.isJOSTypeSet();
// // branching because even though the only difference is an if or two &&
// // O(if) = 1, the amount of ifs adds up and this saves some time
// if (taxonomyIsSet && JosTypeIsSet) {
// calculateForTaxonomyAndJosType(corpus, stats);
// } else if (taxonomyIsSet && !JosTypeIsSet) {
// calculateForTaxonomy(corpus, stats);
// } else if (!taxonomyIsSet && JosTypeIsSet) {
// calculateForJosType(corpus, stats);
// } else {
// if (stats.isVcc()) {
// calculateVCC(corpus, stats);
// } else {
// calculateNoFilter(corpus, stats);
// }
// }
// }
@ -3,24 +3,6 @@ package data;
import gui.I18N;
public enum CalculateFor {
// calculateFor.WORD=word
// calculateFor.NORMALIZED_WORD=normalized word
// calculateFor.LEMMA=lemma
// calculateFor.MORPHOSYNTACTIC_PROPERTY=oblikoskladenjska lastnost
// calculateFor.WORD_TYPE=besedna vrsta
// calculateFor.DIST_WORDS=različnica
// calculateFor.DIST_LEMMAS=lema
// WORD("različnica"),
// NORMALIZED_WORD("normalizirana različnica"),
// LEMMA("lema"),
// MORPHOSYNTACTIC_SPECS("oblikoskladenjska oznaka"),
// MORPHOSYNTACTIC_PROPERTY("oblikoskladenjska lastnost"),
// WORD_TYPE("besedna vrsta"),
// DIST_WORDS("različnica"),
// DIST_LEMMAS("lema");
@ -44,7 +26,6 @@ public enum CalculateFor {
public static CalculateFor factory(String cf) {
if (cf != null) {
// String name = I18N.findI18NString(cf, "calculateFor");
if (WORD.toString().equals(cf)) {
return WORD;
@ -275,27 +256,4 @@ public enum CalculateFor {
return null;
// public String toPercentString() {
// switch(this){
// case WORD:
// return "Delež glede na vse različnice";
// return "Delež glede na vse normalizirane različnice";
// case LEMMA:
// return "Delež glede na vse leme";
// return "Delež glede na vse oblikoskladenjske oznake";
// return "Delež glede na vse oblikoskladenjske lastnosti";
// case WORD_TYPE:
// return "Delež glede na vse besedne vrste";
// case DIST_WORDS:
// return "Delež glede na vse različnice";
// case DIST_LEMMAS:
// return "Delež glede na vse leme";
// default:
// return null;
// }
// }
@ -74,13 +74,4 @@ public enum Collocability {
return null;
// public String toPercentString() {
// switch(this){
// case DICE:
// return "Delež glede na vse različnice";
// default:
// return null;
// }
// }
@ -17,7 +17,6 @@ import org.apache.logging.log4j.Logger;
import gui.ValidationUtil;
import javafx.collections.ObservableList;
import org.controlsfx.control.CheckComboBox;
public class Corpus {
public final static Logger logger = LogManager.getLogger(Corpus.class);
@ -33,7 +32,6 @@ public class Corpus {
public HashMap<String, ObservableList<String>> solarSelectedFilters; // if solar selected
private HashMap<String, HashSet<String>> solarFiltersForXML; // if solar - used while parsing xml
private boolean gosOrthMode;
boolean hasMsdData;
private ArrayList<String> validationErrors;
private String corpusName = "";
private String punctuation = "punctuation.COMMA";
@ -48,7 +46,6 @@ public class Corpus {
public void setCorpusName(String corpusName) {
// System.out.println(corpusName);
this.corpusName = corpusName;
||||"Corpus.set: ", corpusName);
@ -58,7 +55,6 @@ public class Corpus {
public void setPunctuation(String punctuation) {
// System.out.println(corpusName);
this.punctuation = punctuation;
||||"Punctuation.set: ", punctuation);
@ -99,10 +95,6 @@ public class Corpus {
|||"Corpus.set: ", detectedCorpusFiles);
public boolean isHeaderRead() {
return headerRead;
public void setHeaderRead(boolean headerRead) {
this.headerRead = headerRead;
@ -128,11 +120,6 @@ public class Corpus {
return FXCollections.observableArrayList(al);
// public ObservableList<String> getFormattedTaxonomy() {
// ArrayList<String> al = Tax.getTaxonomyFormatted(new ArrayList<>(taxonomy), corpusType);
// return FXCollections.observableArrayList(al);
// }
public void setTaxonomy(ObservableList<String> taxonomy) {
this.taxonomy = new ArrayList<>();
@ -155,15 +142,6 @@ public class Corpus {
return solarSelectedFilters;
public void setSolarSelectedFilters(HashMap<String, ObservableList<String>> solarFilters) {
this.solarSelectedFilters = solarFilters;
||||"Corpus.set: ", solarFilters);
public HashMap<String, HashSet<String>> getSolarFiltersForXML() {
return solarFiltersForXML;
public void setSolarFiltersForXML(HashMap<String, HashSet<String>> solarFiltersForXML) {
this.solarFiltersForXML = solarFiltersForXML;
||||"Corpus.set: ", solarFiltersForXML);
@ -173,23 +151,10 @@ public class Corpus {
return gosOrthMode;
public void setGosOrthMode(boolean gosOrthMode) {
this.gosOrthMode = gosOrthMode;
||||"Corpus.set: ", gosOrthMode);
public ArrayList<String> getValidationErrors() {
return validationErrors;
public String getValidationErrorsToString() {
return StringUtils.join(validationErrors, "\n - ");
public void setValidationErrors(ArrayList<String> validationErrors) {
this.validationErrors = validationErrors;
public boolean validate() {
if (corpusType == null) {
@ -1,12 +0,0 @@
package data.Enums;
import java.util.Arrays;
import java.util.HashSet;
public class InflectedJosTypes {
public static final HashSet<Character> inflectedJosTypes = new HashSet<>();
static {
inflectedJosTypes.addAll(Arrays.asList('S', 'G', 'P'));
@ -1,68 +0,0 @@
package data.Enums;
import java.util.HashMap;
public enum Msd {
NOUN("samostalnik", 'S', "Noun", 'N', 5),
VERB("glagol", 'G', "Verb", 'V', 7),
ADJECTIVE("pridevnik", 'P', "Adjective", 'A', 6),
ADVERB("prislov", 'R', "Adverb", 'R', 2),
PRONOUN("zaimek", 'Z', "Pronoun", 'P', 8),
NUMERAL("števnik", 'K', "Numeral", 'M', 6),
PREPOSITION("predlog", 'D', "Preposition", 'S', 1),
CONJUNCTION("veznik", 'V', "Conjunction", 'C', 1),
PARTICLE("členek", 'L', "Particle", 'Q', 0),
INTERJECTION("medmet", 'M', "Interjection", 'I', 0),
ABBREVIATION("okrajšava", 'O', "Abbreviation", 'Y', 0),
RESIDUAL("neuvrščeno", 'N', "Residual", 'X', 1);
private final String siName;
private final Character siCode;
private final String enName;
private final Character enCode;
private final Integer nOfAttributes;
private static HashMap<Character, Integer> siCodeNOfAttributes;
static {
siCodeNOfAttributes = new HashMap<>();
for (Msd msd : Msd.values()) {
siCodeNOfAttributes.put(msd.getSiCode(), msd.nOfAttributes);
Msd(String siName, Character siCode, String enName, Character enCode, int nOfAttributes) {
this.siName = siName;
this.siCode = siCode;
this.enName = enName;
this.enCode = enCode;
this.nOfAttributes = nOfAttributes;
public String getSiName() {
return siName;
public Character getSiCode() {
return siCode;
public String getEnName() {
return enName;
public Character getEnCode() {
return enCode;
* Returns the number of attributes for the given type.
* @param msd
* @return
public static int getMsdLengthForType(String msd) {
return siCodeNOfAttributes.get(msd.charAt(0)) + 1;
@ -27,9 +27,6 @@ public class SolarFilters {
SOLAR_FILTERS.put(TIP, FXCollections.observableArrayList("esej/spis", "pisni izdelek (učna ura)", "test (daljše besedilo)", "test (odgovori na vprašanja)"));
public static final ObservableList<String> N_GRAM_COMPUTE_FOR_FULL = FXCollections.observableArrayList("različnica", "lema", "oblikoskladenjska oznaka", "oblikoskladenjska lastnost", "besedna vrsta");
public static final ObservableList<String> N_GRAM_COMPUTE_FOR_LIMITED = FXCollections.observableArrayList("različnica", "lema");
* Returns filters with all possible values
@ -349,7 +349,6 @@ public class Filter implements Cloneable {
public Object clone() throws CloneNotSupportedException{
Filter f = null;
try {
@ -1,71 +0,0 @@
package data;
public enum GigafidaJosWordType {
SAMOSTALNIK("samostalnik", 'S'),
GLAGOL("glagol", 'G'),
PRIDEVNIK("pridevnik", 'P'),
PRISLOV("prislov", 'R'),
ZAIMEK("zaimek", 'Z'),
STEVNIK("stevnik", 'K'),
PREDLOG("predlog", 'D'),
VEZNIK("veznik", 'V'),
CLENEK("clenek", 'L'),
MEDMET("medmet", 'M'),
OKRAJSAVA("okrajsava", 'O');
private final String name;
private final char wordType;
GigafidaJosWordType(String name, char wordType) {
|||| = name;
this.wordType = wordType;
public String toString() {
public char getWordType() {
return wordType;
public static GigafidaJosWordType factory(String wType) {
if (wType != null) {
if (SAMOSTALNIK.toString().equals(wType)) {
if (GLAGOL.toString().equals(wType)) {
return GLAGOL;
if (PRIDEVNIK.toString().equals(wType)) {
if (PRISLOV.toString().equals(wType)) {
return PRISLOV;
if (ZAIMEK.toString().equals(wType)) {
return ZAIMEK;
if (STEVNIK.toString().equals(wType)) {
return STEVNIK;
if (PREDLOG.toString().equals(wType)) {
return PREDLOG;
if (VEZNIK.toString().equals(wType)) {
return VEZNIK;
if (CLENEK.toString().equals(wType)) {
return CLENEK;
if (MEDMET.toString().equals(wType)) {
return MEDMET;
if (OKRAJSAVA.toString().equals(wType)) {
return null;
@ -1,76 +0,0 @@
package data;
import java.util.ArrayList;
import java.util.Arrays;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
public enum GigafidaTaxonomy {
TISK("tisk", "T"),
KNJIZNO("knjižno", "T.K"),
LEPOSLOVNO("leposlovno", "T.K.L"),
STROKOVNO("strokovno", "T.K.S"),
PERIODICNO("periodično", "T.P"),
CASOPIS("časopis", "T.P.C"),
REVIJA("revija", "T.P.R"),
INTERNET("internet", "I");
private final String name;
private final String taxonomy;
private static final ObservableList<String> FOR_COMBO_BOX;
static {
ArrayList<String> values = ->;
FOR_COMBO_BOX = FXCollections.observableArrayList(values);
GigafidaTaxonomy(String name, String taxonomy) {
|||| = name;
this.taxonomy = taxonomy;
public String toString() {
public String getTaxonomnyString() {
return this.taxonomy;
public static GigafidaTaxonomy factory(String tax) {
if (tax != null) {
if (TISK.toString().equals(tax)) {
return TISK;
if (KNJIZNO.toString().equals(tax)) {
return KNJIZNO;
if (LEPOSLOVNO.toString().equals(tax)) {
if (STROKOVNO.toString().equals(tax)) {
if (PERIODICNO.toString().equals(tax)) {
if (CASOPIS.toString().equals(tax)) {
return CASOPIS;
if (REVIJA.toString().equals(tax)) {
return REVIJA;
if (INTERNET.toString().equals(tax)) {
return INTERNET;
return null;
public static ObservableList<String> getForComboBox() {
@ -1,85 +0,0 @@
package data;
import java.util.ArrayList;
import java.util.Arrays;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
public enum GosTaxonomy {
JAVNI("javni", "gos.T.J"),
INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "gos.T.J.I"),
RAZVEDRILNI("razvedrilni", "gos.T.J.R"),
NEJAVNI("nejavni", "gos.T.N"),
NEZASEBNI("nezasebni", "gos.T.N.N"),
ZASEBNI("zasebni", "gos.T.N.Z"),
OSEBNI_STIK("osebni stik", "gos.K.O"),
TELEFON("telefon", "gos.K.P"),
RADIO("radio", "gos.K.R"),
TELEVIZIJA("televizija", "gos.K.T");
private final String name;
private final String taxonomy;
private static final ObservableList<String> FOR_COMBO_BOX;
static {
ArrayList<String> values = ->;
FOR_COMBO_BOX = FXCollections.observableArrayList(values);
GosTaxonomy(String name, String taxonomy) {
|||| = name;
this.taxonomy = taxonomy;
public String toString() {
public String getTaxonomnyString() {
return this.taxonomy;
public static GosTaxonomy factory(String tax) {
if (tax != null) {
if (JAVNI.toString().equals(tax)) {
return JAVNI;
if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
if (RAZVEDRILNI.toString().equals(tax)) {
if (NEJAVNI.toString().equals(tax)) {
return NEJAVNI;
if (NEZASEBNI.toString().equals(tax)) {
if (ZASEBNI.toString().equals(tax)) {
return ZASEBNI;
if (OSEBNI_STIK.toString().equals(tax)) {
if (TELEFON.toString().equals(tax)) {
return TELEFON;
if (RADIO.toString().equals(tax)) {
return RADIO;
if (TELEVIZIJA.toString().equals(tax)) {
return null;
public static ObservableList<String> getForComboBox() {
@ -36,15 +36,12 @@ public final class MultipleHMKeys2 implements MultipleHMKeys {
public int hashCode() {
return Objects.hash(k1, k2);
// return key.hashCode();
public boolean equals(Object obj) {
return (obj instanceof MultipleHMKeys2) && ((MultipleHMKeys2) obj).k1.equals(k1)
&& ((MultipleHMKeys2) obj).k2.equals(k2);
// return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key.equals(key);
public MultipleHMKeys[] splitNgramTo1grams(){
@ -18,22 +18,6 @@ public class Sentence {
this.taxonomy = taxonomy;
// public Sentence(List<Word> words) {
// this.words = words;
// }
public Sentence(List<Word> words, List<Taxonomy> taxonomy, Map<String, String> properties) {
this.words = words;
this.taxonomy = taxonomy;
|||| = properties;
public Sentence(List<Word> words, List<Taxonomy> taxonomy, String type) {
this.words = words;
this.taxonomy = taxonomy;
this.type = type;
public List<Word> getWords() {
return words;
@ -8,9 +8,6 @@ public class Settings {
public static final int CORPUS_SENTENCE_LIMIT = 50000;
public static final boolean PRINT_LOG = false;
public static final String FX_ACCENT_OK = "-fx-accent: forestgreen;";
public static final String FX_ACCENT_NOK = "-fx-accent: red;";
public static Collection<File> corpus;
public static File resultsFilePath;
@ -1,299 +0,0 @@
package data;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import util.Util;
import util.db.RDB;
public class Statistics {
private CorpusType corpusType;
private AnalysisLevel analysisLevel;
private boolean useDB;
private RDB db;
private boolean analysisProducedResults;
private String taxonomy;
private boolean taxonomyIsSet;
private char JOSType;
private boolean JOSTypeIsSet;
private String resultTitle;
public Map<String, AtomicLong> result = new ConcurrentHashMap<>();
// nGrams
private int nGramLevel;
private Integer skip;
private CalculateFor cf;
private List<Pattern> morphosyntacticFilter;
// distributions
private String distributionTaxonomy;
private char distributionJosWordType;
private boolean vcc;
private Integer substringLength;
// inflected JOS
private String inflectedJosTaxonomy;
// GOS
boolean gosOrthMode;
// šolar
Map<String, Object> solarHeadBlockFilter;
// for ngrams
public Statistics(AnalysisLevel al, int nGramLevel, Integer skip, CalculateFor cf) {
String dateTime =""));
|||| = cf;
this.analysisLevel = al;
this.nGramLevel = nGramLevel;
this.skip = skip == null || skip == 0 ? null : skip;
this.resultTitle = String.format("%s%d-gram_%s_%s",
this.skip != null ? String.format("%d-%s-", skip, "skip") : "",
// for words distributions
// public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) {
// String dateTime =""));
// this.resultTitle = String.format("%s_%s_%s",
// distributionTaxonomy != null ? distributionTaxonomy.toString() : "",
// distributionJosWordType != null ? distributionJosWordType.toString() : "",
// dateTime);
// this.analysisLevel = al;
// = cf;
// this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null;
// this.taxonomyIsSet = distributionTaxonomy != null;
// this.JOSTypeIsSet = distributionJosWordType != null;
// this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' ';
// }
public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) {
String dateTime =""));
this.resultTitle = String.format("%s_%d_%s",
"Distribucija zaporedij samoglasnikov in soglasnikov",
this.analysisLevel = al;
|||| = cf;
this.substringLength = substringLength;
this.vcc = true;
// public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) {
// String dateTime =""));
// this.resultTitle = String.format("InflectedJOS_%s_%s",
// distributionTaxonomy != null ? distributionTaxonomy : "",
// dateTime);
// this.analysisLevel = al;
// this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null;
// this.taxonomyIsSet = inflectedJosTaxonomy != null;
// }
public Integer getSkip() {
return skip;
public Integer getSubstringLength() {
return substringLength;
public String getInflectedJosTaxonomy() {
return inflectedJosTaxonomy;
public void setSubstringLength(Integer substringLength) {
this.substringLength = substringLength;
public boolean isVcc() {
return vcc;
public void setVcc(boolean vcc) {
this.vcc = vcc;
public String getDistributionTaxonomy() {
return distributionTaxonomy;
public void setDistributionTaxonomy(String distributionTaxonomy) {
this.distributionTaxonomy = distributionTaxonomy;
public char getDistributionJosWordType() {
return distributionJosWordType;
public void setDistributionJosWordType(char distributionJosWordType) {
this.distributionJosWordType = distributionJosWordType;
public void setMorphosyntacticFilter(List<String> morphosyntacticFilter) {
// change filter strings to regex patterns
this.morphosyntacticFilter = new ArrayList<>();
for (String s : morphosyntacticFilter) {
this.morphosyntacticFilter.add(Pattern.compile(s.replaceAll("\\*", ".")));
public List<Pattern> getMsd() {
return morphosyntacticFilter;
public Map<String, AtomicLong> getResult() {
return result;
public void setTaxonomy(String taxonomy) {
this.taxonomy = taxonomy;
public void setTaxonomyIsSet(boolean taxonomyIsSet) {
this.taxonomyIsSet = taxonomyIsSet;
public char getJOSType() {
return JOSType;
public void setJOSType(char JOSType) {
this.JOSType = JOSType;
public boolean isJOSTypeSet() {
return JOSTypeIsSet;
public void setJOSType(boolean JOSTypeIsSet) {
this.JOSTypeIsSet = JOSTypeIsSet;
public void saveResultToDisk(int... limit) throws UnsupportedEncodingException {
// Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
// if (useDB) {
// result = db.getDump();
// db.delete();
// }
// // if no results and nothing to save, return false
// if (!(result.size() > 0)) {
// analysisProducedResults = false;
// return;
// } else {
// analysisProducedResults = true;
// }
// stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
// Export.SetToCSV(stats);
// private Map<String, Integer> getSortedResultInflected(Map map) {
// // first convert to <String, Integer>
// Map<String, Integer> m = Util.sortByValue(Util.atomicInt2StringAndInt(map), 0);
// Map<String, Integer> sortedM = new TreeMap<>();
// sortedM.putAll(m);
// return sortedM;
// }
private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
public String getTaxonomy() {
return taxonomy;
public boolean isTaxonomySet() {
return taxonomyIsSet;
public int getnGramLevel() {
return nGramLevel;
public CalculateFor getCf() {
return cf;
public AnalysisLevel getAnalysisLevel() {
return analysisLevel;
public CorpusType getCorpusType() {
return corpusType;
public void setCorpusType(CorpusType corpusType) {
this.corpusType = corpusType;
public boolean isGosOrthMode() {
return gosOrthMode;
public void setGosOrthMode(boolean gosOrthMode) {
this.gosOrthMode = gosOrthMode;
public Map<String, Object> getSolarHeadBlockFilter() {
return solarHeadBlockFilter;
public void setSolarHeadBlockFilter(Map<String, Object> solarHeadBlockFilter) {
this.solarHeadBlockFilter = solarHeadBlockFilter;
public boolean isUseDB() {
return useDB;
public void setUseDB(boolean useDB) {
if (useDB && db == null) {
db = new RDB();
this.useDB = useDB;
* Stores results from this batch to a database and clears results map
public void storeTmpResultsToDB() {
try {
result = new ConcurrentHashMap<>();
} catch (UnsupportedEncodingException e) {
public boolean isAnalysisProducedResults() {
return analysisProducedResults;
@ -18,7 +18,6 @@ import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import alg.inflectedJOS.WordFormation;
import data.Enums.WordLevelType;
import javafx.collections.ObservableList;
import util.Export;
@ -166,22 +166,6 @@ public class Tax {
// ArrayList<String> taxonomyString = new ArrayList<>();
// for (Taxonomy t : taxonomyResult.keySet()){
// taxonomyString.add(t.toString());
// }
// ObservableList<String> taxonomyObservableString = Tax.getTaxonomyForComboBox(corpus.getCorpusType(), new HashSet<>(taxonomyString));
// ArrayList<String> sortedTaxonomyString = new ArrayList<>();
// for (String t : taxonomyObservableString){
// sortedTaxonomyString.add(t);
// }
// assures same relative order
@ -198,59 +182,6 @@ public class Tax {
return corpusTypesWithTaxonomy;
public static ArrayList<String> getTaxonomyCodes(ArrayList<Taxonomy> taxonomyNames, CorpusType corpusType) {
ArrayList<String> result = new ArrayList<>();
if (ValidationUtil.isEmpty(taxonomyNames)) {
return result;
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
} else if (corpusType == CorpusType.GOS) {
// for easier lookup
Map<String, String> taxInversed = tax.entrySet()
.collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
for (Taxonomy taxonomyName : taxonomyNames) {
return result;
// public static ArrayList<String> getTaxonomyFormatted(ArrayList<String> taxonomyNames, CorpusType corpusType) {
// ArrayList<String> result = new ArrayList<>();
// if (ValidationUtil.isEmpty(taxonomyNames)) {
// return result;
// }
// LinkedHashMap<String, String> tax = new LinkedHashMap<>();
// if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
// } else if (corpusType == CorpusType.GOS) {
// tax = GOS_TAXONOMY;
// }
// // for easier lookup
// Map<String, String> taxInversed = tax.entrySet()
// .stream()
// .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
// for (String taxonomyName : taxonomyNames) {
// result.add(taxInversed.get(taxonomyName) + " - " + taxonomyName);
// }
// return result;
// }
* Returns a list of proper names for codes
@ -283,13 +214,4 @@ public class Tax {
return result;
public static String getLongTaxonomyName(String shortName){
if (GIGAFIDA_TAXONOMY.containsKey(shortName))
return GIGAFIDA_TAXONOMY.get(shortName);
else if(GOS_TAXONOMY.containsKey(shortName))
return GOS_TAXONOMY.get(shortName);
return null;
@ -28,14 +28,6 @@ enum TaxonomyEnum {
// Gigafida
// KNJIZNO("knjižno", "T.K", "gigafida"),
// LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"),
// STROKOVNO("strokovno", "T.K.S", "gigafida"),
// PERIODICNO("periodično", "T.P", "gigafida"),
// CASOPIS("časopis", "T.P.C", "gigafida"),
// REVIJA("revija", "T.P.R", "gigafida"),
// INTERNET("internet", "I", "gigafida"),
SSJ_TISK("SSJ.T", "SSJ.T - tisk"),
SSJ_KNJIZNO("SSJ.T.K", " SSJ.T.K - tisk-knjižno"),
SSJ_LEPOSLOVNO("SSJ.T.K.L", " SSJ.T.K.L - tisk-knjižno-leposlovno"),
@ -148,9 +140,6 @@ enum TaxonomyEnum {
// Gigafida
// if (TISK.toString().equals(tax)) {
// return TISK;
// }
if (SSJ_TISK.toString().equals(tax)) {
return SSJ_TISK;
@ -339,9 +328,6 @@ enum TaxonomyEnum {
// Gigafida
// if (TISK.toString().equals(tax)) {
// return TISK;
// }
if (SSJ_TISK.toLongNameString().equals(tax)) {
return SSJ_TISK;
@ -483,7 +469,6 @@ enum TaxonomyEnum {
public static ArrayList<TaxonomyEnum> taxonomySelected(TaxonomyEnum disjointTaxonomy) {
ArrayList<TaxonomyEnum> r = new ArrayList<>();
// System.out.println(disjointTaxonomy);
@ -696,12 +681,8 @@ enum TaxonomyEnum {
public static ArrayList<TaxonomyEnum> convertStringListToTaxonomyList(ObservableList<String> stringList, Corpus corpus){
// System.out.println("1.");
// System.out.println(stringList);
ArrayList<TaxonomyEnum> taxonomyList = new ArrayList<>();
// System.out.println("INTERESTING STUFF");
// System.out.println(stringList);
for (String e : stringList) {
for (Taxonomy t : corpus.getTaxonomy()){
if (t.toLongNameString().equals(e)) {
@ -709,18 +690,11 @@ enum TaxonomyEnum {
// System.out.println(taxonomyList);
// System.out.println("-----------------");
return taxonomyList;
public static void modifyingTaxonomy(ArrayList<TaxonomyEnum> taxonomy, ArrayList<TaxonomyEnum> checkedItemsTaxonomy, Corpus corpus){
// get taxonomies that were selected/deselected by user
// System.out.println("Print here:");
// System.out.println(taxonomy);
// System.out.println(checkedItemsTaxonomy);
// System.out.println("-------------");
Set<TaxonomyEnum> disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy);
if (taxonomy != null) {
@ -739,7 +713,6 @@ enum TaxonomyEnum {
if(!TaxonomyEnum.convertStringListToTaxonomyList(corpus.getObservableListTaxonomy(), corpus).contains(s)){
// taxonomy.remove(s);
@ -790,11 +763,6 @@ public class Taxonomy {
// public Taxonomy(String name, String longName) {
// = name;
// this.longName = longName;
// }
public String toString() {
@ -813,7 +781,6 @@ public class Taxonomy {
return t;
return null;
// return new Taxonomy(tax, false);
public static Taxonomy factoryLongName(String tax, Corpus corpus) {
@ -822,87 +789,6 @@ public class Taxonomy {
return t;
return null;
// return new Taxonomy(tax, true);
// public static ArrayList<Taxonomy> taxonomySelected(Taxonomy disjointTaxonomy) {
// ArrayList<TaxonomyEnum> rTaxonomyEnum = TaxonomyEnum.taxonomySelected(disjointTaxonomy.getTaxonomyEnum());
// ArrayList<Taxonomy> r = new ArrayList<>();
// for(TaxonomyEnum t : rTaxonomyEnum){
// r.add(new Taxonomy(t.toString(), false));
// }
// return r;
// }
public static ArrayList<Taxonomy> taxonomyDeselected(Taxonomy disjointTaxonomy){
// ArrayList<TaxonomyEnum> r = new ArrayList<>();
// Map<TaxonomyEnum, TaxonomyEnum> connections = new ConcurrentHashMap<>();
// connections.put(DISKURZ_JAVNI, DISKURZ);
// connections.put(DISKURZ_NEJAVNI, DISKURZ);
// connections.put(SITUACIJA_RADIO, SITUACIJA);
// connections.put(KANAL_OSEBNI_STIK, KANAL);
// connections.put(KANAL_TELEFON, KANAL);
// connections.put(KANAL_RADIO, KANAL);
// connections.put(KANAL_TELEVIZIJA, KANAL);
// connections.put(SSJ_KNJIZNO, SSJ_TISK);
// connections.put(SSJ_LEPOSLOVNO, SSJ_KNJIZNO);
// connections.put(SSJ_STROKOVNO, SSJ_KNJIZNO);
// connections.put(SSJ_PERIODICNO, SSJ_TISK);
// connections.put(SSJ_CASOPIS, SSJ_PERIODICNO);
// connections.put(SSJ_REVIJA, SSJ_PERIODICNO);
// connections.put(SSJ_DRUGO, SSJ_TISK);
// connections.put(FT_P_GOVORNI, FT_P_PRENOSNIK);
// connections.put(FT_P_ELEKTRONSKI, FT_P_PRENOSNIK);
// connections.put(FT_P_PISNI, FT_P_PRENOSNIK);
// connections.put(FT_P_OBJAVLJENO, FT_P_PISNI);
// connections.put(FT_P_KNJIZNO, FT_P_OBJAVLJENO);
// connections.put(FT_P_PERIODICNO, FT_P_OBJAVLJENO);
// connections.put(FT_P_CASOPISNO, FT_P_OBJAVLJENO);
// connections.put(FT_P_DNEVNO, FT_P_CASOPISNO);
// connections.put(FT_P_REVIALNO, FT_P_PERIODICNO);
// connections.put(FT_P_TEDENSKO, FT_P_REVIALNO);
// connections.put(FT_P_MESECNO, FT_P_REVIALNO);
// connections.put(FT_P_OBCASNO, FT_P_REVIALNO);
// connections.put(FT_P_NEOBJAVLJENO, FT_P_PISNI);
// connections.put(FT_P_JAVNO, FT_P_NEOBJAVLJENO);
// connections.put(FT_P_INTERNO, FT_P_NEOBJAVLJENO);
// connections.put(FT_P_ZASEBNO, FT_P_NEOBJAVLJENO);
// connections.put(FT_UMETNOSTNA, FT_ZVRST);
// connections.put(FT_PESNISKA, FT_UMETNOSTNA);
// connections.put(FT_PROZNA, FT_UMETNOSTNA);
// connections.put(FT_DRAMSKA, FT_UMETNOSTNA);
// connections.put(FT_NEUMETNOSTNA, FT_ZVRST);
// connections.put(FT_STROKOVNA, FT_NEUMETNOSTNA);
// connections.put(FT_HID, FT_STROKOVNA);
// connections.put(FT_NIT, FT_STROKOVNA);
// connections.put(FT_PRAVNA, FT_NEUMETNOSTNA);
// connections.put(FT_DA, FT_LEKTORIRANO);
// connections.put(FT_NE, FT_LEKTORIRANO);
// TaxonomyEnum currentTaxonomy = disjointTaxonomy;
// r.add(currentTaxonomy);
// while(connections.containsKey(currentTaxonomy)){
// currentTaxonomy = connections.get(currentTaxonomy);
// r.add(currentTaxonomy);
// }
// Collections.reverse(r);
// return r;
return null;
public static ArrayList<Taxonomy> convertStringListToTaxonomyList(ObservableList<String> stringList, Corpus corpus){
@ -919,7 +805,6 @@ public class Taxonomy {
public static ArrayList<TaxonomyEnum> taxonomyToTaxonomyEnum(ArrayList<Taxonomy> taxonomy){
// System.out.println(taxonomy);
if (taxonomy == null) {
return null;
@ -934,11 +819,6 @@ public class Taxonomy {
public static ArrayList<Taxonomy> taxonomyEnumToTaxonomy(ArrayList<TaxonomyEnum> taxonomy, Corpus corpus){
// ArrayList<Taxonomy> r = new ArrayList<>();
// for (TaxonomyEnum t : taxonomy){
// r.add(new Taxonomy(t));
// }
// return r;
ArrayList<Taxonomy> r = new ArrayList<>();
for (TaxonomyEnum te : taxonomy){
for (Taxonomy t : corpus.getTaxonomy()){
@ -15,7 +15,6 @@ import javafx.collections.ObservableList;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.control.*;
//import javafx.scene.image.Image;
import javafx.scene.image.ImageView;
import javafx.scene.layout.AnchorPane;
import javafx.scene.layout.Pane;
@ -29,7 +28,6 @@ import;
import java.util.*;
import java.util.regex.Pattern;
import static alg.XML_processing.readXML;
import static gui.GUIController.showAlert;
@ -129,20 +127,10 @@ public class CharacterAnalysisTab {
private ComboBox<String> taxonomySetOperationCB;
private String taxonomySetOperation;
// @FXML
// private ToggleGroup calculateForRB;
// private CalculateFor calculateFor;
private ComboBox<String> calculateForCB;
private CalculateFor calculateFor;
private RadioButton lemmaRB;
private RadioButton varietyRB;
private Pane paneLetters;
@ -171,13 +159,12 @@ public class CharacterAnalysisTab {
private Corpus corpus;
private HashMap<String, HashSet<String>> solarFiltersMap;
private Filter filter;
private boolean useDb;
private HostServices hostService;
private ListChangeListener<String> taxonomyListener;
private ChangeListener<Boolean> msdListener;
private ChangeListener<Boolean> minimalOccurrencesListener;
private ChangeListener<Boolean> minimalTaxonomyListener;
private boolean useDb;
private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY));
@ -185,14 +172,8 @@ public class CharacterAnalysisTab {
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
private static final ArrayList<String> TAXONOMY_SET_OPERATION = new ArrayList<>(Arrays.asList(TAXONOMY_SET_OPERATION_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("različnica", "lema");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
// TODO: pass observables for taxonomy based on header scan
// after header scan
private ObservableList<String> taxonomyCCBValues;
private CorpusType currentCorpusType;
public void init() {
@ -203,24 +184,11 @@ public class CharacterAnalysisTab {
currentMode = MODE.LETTER;
// calculateForRB.selectedToggleProperty().addListener(new ChangeListener<Toggle>() {
// @Override
// public void changed(ObservableValue<? extends Toggle> observable, Toggle oldValue, Toggle newValue) {
// //"calculateForRB:", newValue.toString());
// RadioButton chk = (RadioButton)newValue.getToggleGroup().getSelectedToggle(); // Cast object to radio button
// calculateFor = CalculateFor.factory(chk.getText());
//"calculateForRB:", chk.getText());
// //System.out.println("Selected Radio Button - "+chk.getText());
// }
// });
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
if(newValue == null){
newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
// System.out.println(oldValue);
// System.out.println(newValue);
calculateFor = CalculateFor.factory(newValue);
||||"calculateForCB:", calculateFor.toString());
@ -299,7 +267,6 @@ public class CharacterAnalysisTab {
public void onChanged(Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
@ -309,7 +276,6 @@ public class CharacterAnalysisTab {
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
for (Taxonomy t : checkedItemsTaxonomy) {
@ -468,97 +434,6 @@ public class CharacterAnalysisTab {
* case a: values for combo boxes can change after a corpus change
* <ul>
* <li>different corpus type - reset all fields so no old values remain</li>
* <li>same corpus type, different subset - keep</li>
* </ul>
* <p>
* case b: values for combo boxes can change after a header scan
* <ul>
* <li>at first, fields are populated by corpus type defaults</li>
* <li>after, with gathered data</li>
* </ul>
* <p></p>
* ngrams: 1
* calculateFor: word
* msd:
* taxonomy:
* skip: 0
* iscvv: false
* string length: 1
// public void populateFields() {
// // corpus changed if: current one is null (this is first run of the app)
// // or if currentCorpus != gui's corpus
// boolean corpusChanged = currentCorpusType == null
// || currentCorpusType != corpus.getCorpusType();
// // TODO: check for GOS, GIGAFIDA, SOLAR...
// // refresh and:
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
//// if (calculateFor == null) {
//// calculateForRB.selectToggle(lemmaRB);
//// calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString());
//// }
// if (!filter.hasMsd()) {
// // if current corpus doesn't have msd data, disable this field
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(true);
//"no msd data");
// } else {
// if (ValidationUtil.isEmpty(msd)
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// // msd has not been set previously
// // or msd has been set but the corpus changed -> reset
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(false);
//"msd reset");
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
// msdTF.setText(StringUtils.join(msdStrings, " "));
// msdTF.setDisable(false);
//"msd kept");
// }
// }
// // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
// // keep calculateCvv
// calculatecvvCB.setSelected(calculateCvv);
// // keep string length if set
// if (stringLength != null) {
// stringLengthTF.setText(String.valueOf(stringLength));
// } else {
// stringLengthTF.setText("1");
// stringLength = 1;
// }
// // TODO: trigger on rescan
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// // user changed corpus (by type) or by selection & triggered a rescan of headers
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// currentCorpusType = corpus.getCorpusType();
// // setTaxonomyIsDirty(false);
// } else {
// }
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
// }
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
Tooltip tooltip = new Tooltip();
@ -719,7 +594,6 @@ public class CharacterAnalysisTab {
int i = 0;
// DateFormat df = new SimpleDateFormat("hh:mm:ss");
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
@ -759,23 +633,16 @@ public class CharacterAnalysisTab {
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
// updateProgress((iFinal * 100) + (double) observable, corpusFiles.size() * 100);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
// xml_processing.progressProperty().addListener((obs, oldProgress, newProgress) ->
// updateProgress((iFinal * 100) + newProgress.doubleValue(), corpusFiles.size() * 100));
xml_processing.readXML(f.toString(), statistic);
if (isCancelled()) {
// readXML(f.toString(), statistic, this, corpusFiles.size(), startTime, previousTime, i);
return null;
@ -799,7 +666,6 @@ public class CharacterAnalysisTab {
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
@ -810,7 +676,6 @@ public class CharacterAnalysisTab {
logger.error("Error while executing", e);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
@ -820,7 +685,6 @@ public class CharacterAnalysisTab {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
@ -2,10 +2,8 @@ package gui;
import static data.CorpusType.*;
import static gui.GUIController.*;
import static gui.Messages.*;
import static util.Util.*;
import java.awt.*;
import java.lang.reflect.Constructor;
@ -53,16 +51,11 @@ public class CorpusTab {
private Button chooseCorpusLocationB;
private File chosenCorpusLocation;
private CheckBox readHeaderInfoChB;
private boolean readHeaderInfo;
// @FXML
// private CheckBox gosUseOrthChB;
// private boolean gosUseOrth;
private Button chooseResultsLocationB;
@ -213,11 +206,8 @@ public class CorpusTab {
selectReaderCB.valueProperty().addListener((observable, oldValue, newValue) -> {
if(newValue == null){
// newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
// System.out.println(oldValue);
// System.out.println(newValue);
selectReader = newValue;
if(corpus != null && corpus.getCorpusType() != null) {
@ -236,12 +226,9 @@ public class CorpusTab {
// comma / point choice
punctuationCB.valueProperty().addListener((observable, oldValue, newValue) -> {
if(newValue == null){
// newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
newValue = I18N.getTranslatedValue(oldValue, PUNCTUATION);
// System.out.println(oldValue);
// System.out.println(newValue);
punctuation = newValue;
if(corpus != null) {
corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
@ -252,7 +239,6 @@ public class CorpusTab {
// add listeners
chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation());
// chooseCorpusLocationB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_chooseCorpusLocationB")));
helpH.setOnAction(e -> openHelpWebsite());
readHeaderInfoChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
@ -262,18 +248,6 @@ public class CorpusTab {
||||"read headers: ", readHeaderInfo);
// readHeaderInfoChB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_readHeaderInfoChB")));
// gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
// gosUseOrth = newValue;
// corpus.setGosOrthMode(gosUseOrth);
//// wordFormationTab.setDisable(gosUseOrth);
// satNew2Controller.toggleMode(null);
// oneWordTabController.toggleMode(null);
// catController.toggleMode(null);
//"gosUseOrth: ", gosUseOrth);
// });
chooseResultsLocationB.setOnAction(e -> chooseResultsLocation(null));
@ -284,31 +258,12 @@ public class CorpusTab {
I18N.setLocale(new Locale.Builder().setLanguage("sl").setRegion("SI").build());
// StringBuilder sb = new StringBuilder();
// sb.append(corpusLocation)
// .append("\n")
// .append(String.format(I18N.get("message.NOTIFICATION_FOUND_X_FILES"), corpusFilesSize))
// .append("\n")
// .append(String.format(I18N.get("message.NOTIFICATION_CORPUS"), corpusType.toString()));
// chooseCorpusLabelContent = sb.toString();
// chooseCorpusL.textProperty().unbind();
// chooseCorpusL.setText(chooseCorpusLabelContent);
||||"change language");
// set labels and toggle visibility
// toggleGosChBVisibility();
// chooseCorpusLabelContent = Messages.LABEL_CORPUS_LOCATION_NOT_SET;
// chooseCorpusL.setText(chooseCorpusLabelContent);
// chooseResultsLabelContent = Messages.LABEL_RESULTS_LOCATION_NOT_SET;
// chooseResultsL.setText(chooseResultsLabelContent);
@ -391,11 +346,6 @@ public class CorpusTab {
corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("vert", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
Collection<File> corpusFilesRegi = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("regi", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
// if (!checkRegiFile(corpusFilesRegi)){
// return;
// }
if (corpusFiles.size() == 0){
||||"alert: ", I18N.get("message.WARNING_CORPUS_NOT_FOUND"));
showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_CORPUS_NOT_FOUND"), null);
@ -405,7 +355,6 @@ public class CorpusTab {
corpusLocation = selectedDirectory.getAbsolutePath();
corpusFilesSize = String.valueOf(corpusFiles.size());
Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
// corpusType = VERT;
@ -446,12 +395,10 @@ public class CorpusTab {
} else {
// System.out.println(corpusLocation);
corpusLocation = selectedDirectory.getAbsolutePath();
corpusFilesSize = String.valueOf(corpusFiles.size());
Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
// String chooseCorpusLabelContentTmp = detectCorpusType(corpusFiles);
StringBuilder sb = new StringBuilder();
@ -491,7 +438,6 @@ public class CorpusTab {
// System.out.println(outputName);
corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
@ -534,7 +480,6 @@ public class CorpusTab {
private void setResults() {
// if everything is ok
// check and enable checkbox if GOS
// toggleGosChBVisibility();
// set default results location
String defaultResultsLocationPath = corpus.getChosenCorpusLocation().getAbsolutePath();
@ -543,28 +488,6 @@ public class CorpusTab {
Messages.setChooseCorpusL(chooseCorpusL, chooseCorpusLabelContent);
private boolean checkRegiFile(Collection<File> corpusFiles) {
// CorpusType corpusType = corpus.getCorpusType();
// Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
for (File file : corpusFiles) {
// try to open .regi file
String regiPath = file.getAbsolutePath().substring(0, file.getAbsolutePath().length() - 4) + "regi";
LineIterator regiIt;
try {
// read regi file
regiIt = FileUtils.lineIterator(new File(regiPath), "UTF-8");
} catch (IOException e) {
GUIController.showAlert(Alert.AlertType.ERROR, String.format(I18N.get("message.ERROR_NO_REGI_FILE_FOUND"), regiPath));
return false;
return true;
private void readHeaderInfo() {
CorpusType corpusType = corpus.getCorpusType();
Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
@ -592,8 +515,6 @@ public class CorpusTab {
if (corpusIsSplit) {
// System.out.println(i);
// System.out.println(corpusFiles.size());
updateProgress(i, corpusFiles.size());
@ -615,10 +536,7 @@ public class CorpusTab {
// Messages.reload();
// chooseCorpusL.textProperty().bind(I18N.createStringBinding("message.LABEL_CORPUS_LOCATION_NOT_SET"));
// chooseResultsL.textProperty().bind(I18N.createStringBinding("message.LABEL_RESULTS_LOCATION_NOT_SET"));
||||"No taxonomy found in headers.");
GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_TAXONOMY_FOUND"));
@ -749,12 +667,6 @@ public class CorpusTab {
task.setOnSucceeded(e -> {
ObservableList<String> readTaxonomy = Tax.getTaxonomyForComboBox(corpusType, task.getValue());
// if (ValidationUtil.isEmpty(readTaxonomy)) {
// // if no taxonomy found alert the user and keep other tabs disabled
//"No vert filters found in headers.");
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_SOLAR_FILTERS_FOUND"));
// } else {
// set taxonomy, update label
@ -790,10 +702,7 @@ public class CorpusTab {
@ -824,13 +733,6 @@ public class CorpusTab {
return directoryChooser.showDialog(stage);
* Hides GOS related checkbox until needed.
// private void toggleGosChBVisibility() {
// gosUseOrthChB.setVisible(corpus != null && corpus.getCorpusType() != null && corpus.getCorpusType() == CorpusType.GOS);
// }
private void selectReader() {
switch (selectReader) {
// "vert", "Solar", "GOS", "SSJ500K", "Gigafida", "Gigafida (old)", "Kres (old)"
@ -940,18 +842,10 @@ public class CorpusTab {
public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
/*public void setWfController(WordFormationTab wfController) {
this.wfController = wfController;
public void setWlController(WordLevelTab wlController) {
this.wlController = wlController;
public void setWordFormationTab(Tab wordFormationTab) {
this.wordFormationTab = wordFormationTab;
public void setHostServices(HostServices hostServices){
this.hostService = hostServices;
@ -52,21 +52,17 @@ public class FiltersForSolar {
public Label solarFilters;
public Label selectedFiltersL;
public TextArea selectedFiltersTextArea;
private Button changeLanguageB;
private Hyperlink helpH;
// private HashMap<String, ObservableList<String>> selectedFilters;
private Corpus corpus;
private StringAnalysisTabNew2 satNew2Controller;
private OneWordAnalysisTab oneWordTabController;
private CharacterAnalysisTab catController;
//private WordFormationTab wfController;
private WordLevelTab wlController;
private HostServices hostService;
@ -172,7 +168,6 @@ public class FiltersForSolar {
ArrayList<String> values = new ArrayList<>(entry.getValue());
if (!values.isEmpty()) {
// allFilters.append(entry.getKey())
allFilters.append(I18N.get(entry.getKey() + "L"))
.append(": ");
@ -202,7 +197,6 @@ public class FiltersForSolar {
@ -215,7 +209,6 @@ public class FiltersForSolar {
@ -230,10 +223,6 @@ public class FiltersForSolar {
public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
/*public void setWfController(WordFormationTab wfController) {
this.wfController = wfController;
public void setWlController(WordLevelTab wlController) {
this.wlController = wlController;
@ -49,29 +49,11 @@ public class GUIController extends Application {
private CorpusTab ctController;
private Parent ct;
//private WordFormationTab wfController;
private Parent wf;
private WordLevelTab wlController;
private Parent wl;
private FiltersForSolar ffsController;
private Parent ffs;
private SelectedFiltersPane sfpController;
private Parent sfp;
public Tab stringLevelTab;
public Tab wordLevelTab;
public Tab wordFormationTab;*/
@ -83,28 +65,9 @@ public class GUIController extends Application {
public void start(Stage primaryStage) throws IOException {
// File fileDir = new File("");
// BufferedReader in = new BufferedReader(
// new InputStreamReader(
// new FileInputStream(fileDir), "UTF8"));
// String str;
// while ((str = in.readLine()) != null) {
// System.out.println(str);
// }
// in.close();
Parent root = FXMLLoader.load(getClass().getResource("/GUI.fxml"));
// Parent root = FXMLLoader.load(ResourceLookup.resources.url("GUI.fxml"));
// primaryStage.setTitle("Luščilnik");
// StringBinding a = I18N.createStringBinding("window.title");
Scene scene = new Scene(root, 800, 600);
// scene.getStylesheets().add(GUIController.class.getResource("bootstrap3.css").toExternalForm())
stage = primaryStage;
@ -130,13 +93,10 @@ public class GUIController extends Application {
@ -146,14 +106,11 @@ public class GUIController extends Application {
@ -1,17 +1,12 @@
package gui;
import com.sun.javafx.collections.ObservableListWrapper;
import javafx.beans.binding.Bindings;
import javafx.beans.binding.ObjectBinding;
import javafx.beans.binding.StringBinding;
import javafx.beans.value.ObservableValue;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
import javafx.scene.control.Button;
import javafx.scene.control.Label;
import javafx.scene.control.Tooltip;
import java.text.MessageFormat;
@ -111,22 +106,6 @@ public final class I18N {
return Bindings.createStringBinding(() -> get(key, args), locale);
// public static ObservableValue<ObservableList<String>> createListStringBinding(final String key, Object... args) {
// ObservableList<StringBinding> r = (ObservableList<StringBinding>) new ArrayList<StringBinding>();
// r.add(Bindings.createStringBinding(() -> get(key, args), locale));
// return r;
// }
* creates a Object Binding to a localized Object that is computed by calling the given func
* @param func
* function called on every change
* @return StringBinding
public static StringBinding createStringBinding(Callable<String> func) {
return Bindings.createStringBinding(func, locale);
* creates a String binding to a localized String for the given message bundle key
@ -138,22 +117,6 @@ public final class I18N {
return Bindings.createObjectBinding(() -> getObject(keys, args), locale);
// public static ObservableValue<ObservableList<String>> createListStringBinding(final String key, Object... args) {
// ObservableList<StringBinding> r = (ObservableList<StringBinding>) new ArrayList<StringBinding>();
// r.add(Bindings.createStringBinding(() -> get(key, args), locale));
// return r;
// }
* creates a String Binding to a localized String that is computed by calling the given func
* @param func
* function called on every change
* @return ObjectBinding
public static ObjectBinding createObjectBinding(Callable<String> func) {
return Bindings.createObjectBinding(func, locale);
public static String getIndependent(final String key, Locale locale, final Object... args) {
ResourceBundle bundle = ResourceBundle.getBundle("message", locale);
@ -164,7 +127,6 @@ public final class I18N {
return val;
// return MessageFormat.format(bundle.getString(key), args);
public static String getRootValue(String oldValue, ArrayList<String> nGramComputeForLetters) {
@ -230,35 +192,4 @@ public final class I18N {
return FXCollections.observableArrayList(translatedWords);
* DUPLICATE OF toString()
* searches for possible values in translations and returns key of the string
* == .toString()
* @param w, prefix
* function called on every change
* @return ObjectBinding
public static String findI18NString(String w, String prefix){
ResourceBundle bundle = ResourceBundle.getBundle("message", getLocale());
for (String key : bundle.keySet()){
if(prefix.length() > key.length() || !key.substring(0, prefix.length()).equals(prefix)){
String val = bundle.getString(key);
try {
String newVal = new String(val.getBytes("ISO-8859-1"), "UTF-8");
if (newVal.equals(w)){
return key;
} catch (UnsupportedEncodingException e) {
return null;
@ -10,35 +10,17 @@ import javafx.scene.control.Label;
public class Messages {
// warnings & errors
public static String WARNING_CORPUS_NOT_FOUND = I18N.get("message.WARNING_CORPUS_NOT_FOUND");
public static String WARNING_WORD_OR_LEMMA = I18N.get("message.WARNING_WORD_OR_LEMMA");
public static String WARNING_NUMBER_TOO_BIG = I18N.get("message.WARNING_NUMBER_TOO_BIG");
public static String WARNING_NO_TAXONOMY_FOUND = I18N.get("message.WARNING_NO_TAXONOMY_FOUND");
public static String ERROR_WHILE_EXECUTING = I18N.get("message.ERROR_WHILE_EXECUTING");
public static String ERROR_NOT_ENOUGH_MEMORY= I18N.get("message.ERROR_NOT_ENOUGH_MEMORY");
// missing
public static String MISSING_NGRAM_LEVEL = I18N.get("message.MISSING_NGRAM_LEVEL");
public static String MISSING_CALCULATE_FOR = I18N.get("message.MISSING_CALCULATE_FOR");
public static String MISSING_SKIP = I18N.get("message.MISSING_SKIP");
public static String MISSING_STRING_LENGTH = I18N.get("message.MISSING_STRING_LENGTH");
// general notifications - static content/set only once
public static String RESULTS_PATH_SET_TO_DEFAULT = I18N.get("message.RESULTS_PATH_SET_TO_DEFAULT");
// ongoing notifications - displayed while processing, dynamically changing
@ -47,14 +29,7 @@ public class Messages {
// Labels
public static String LABEL_SCANNING_CORPUS = I18N.get("message.LABEL_SCANNING_CORPUS");
public static String COMPLETED = I18N.get("message.COMPLETED");
// public static String TOOLTIP_chooseCorpusLocationB = I18N.get("message.TOOLTIP_chooseCorpusLocationB");
// public static String TOOLTIP_readHeaderInfoChB = I18N.get("message.TOOLTIP_readHeaderInfoChB");
public static String TOOLTIP_readNotePunctuationsChB = I18N.get("message.TOOLTIP_readNotePunctuationsChB");
public static String TOOLTIP_readDisplayTaxonomyChB = I18N.get("message.TOOLTIP_readDisplayTaxonomyChB");
@ -1,12 +1,8 @@
package gui;
import alg.XML_processing;
import data.*;
import javafx.application.HostServices;
import javafx.beans.InvalidationListener;
import javafx.beans.Observable;
import javafx.beans.binding.StringBinding;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.collections.ListChangeListener;
@ -24,12 +20,10 @@ import javafx.scene.image.ImageView;
import util.Tasks;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Pattern;
import static alg.XML_processing.readXML;
import static gui.GUIController.showAlert;
@ -38,7 +32,6 @@ public class OneWordAnalysisTab {
private AnchorPane oneWordAnalysisTabPane;
// private ArrayList<String> alsoVisualize;
public TextArea selectedFiltersTextArea;
@ -197,43 +190,22 @@ public class OneWordAnalysisTab {
private ChangeListener<Boolean> minimalTaxonomyListener;
private ChangeListener<Boolean> minimalRelFreListener;
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY));
@ -242,9 +214,6 @@ public class OneWordAnalysisTab {
// TODO: pass observables for taxonomy based on header scan
// after header scan
private ObservableList<String> taxonomyCCBValues;
private CorpusType currentCorpusType;
public void init() {
// add CSS style
@ -339,9 +308,6 @@ public class OneWordAnalysisTab {
|||"Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
// alsoVisualizeCCB.getCheckModel().clearChecks();
// alsoVisualizeCCB.getItems().removeAll();
// alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
@ -462,18 +428,14 @@ public class OneWordAnalysisTab {
public void onChanged(Change<? extends String> c) {
if (changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
for (Taxonomy t : checkedItemsTaxonomy) {
@ -527,7 +489,6 @@ public class OneWordAnalysisTab {
writeMsdAtTheEnd = newValue;
||||"write msd at the end: ", writeMsdAtTheEnd);
// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
notePunctuations = false;
// set
@ -653,84 +614,6 @@ public class OneWordAnalysisTab {
* case a: values for combo boxes can change after a corpus change
* <ul>
* <li>different corpus type - reset all fields so no old values remain</li>
* <li>same corpus type, different subset - keep</li>
* </ul>
* <p>
* case b: values for combo boxes can change after a header scan
* <ul>
* <li>at first, fields are populated by corpus type defaults</li>
* <li>after, with gathered data</li>
* </ul>
* <p></p>
* ngrams: 1
* calculateFor: word
* msd:
* taxonomy:
* skip: 0
* iscvv: false
* string length: 1
// public void populateFields() {
// // corpus changed if: current one is null (this is first run of the app)
// // or if currentCorpus != gui's corpus
// boolean corpusChanged = currentCorpusType == null
// || currentCorpusType != corpus.getCorpusType();
// // TODO: check for GOS, GIGAFIDA, SOLAR...
// // refresh and:
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
// if (calculateFor == null) {
// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
// }
// if (!filter.hasMsd()) {
// // if current corpus doesn't have msd data, disable this field
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(true);
//"no msd data");
// } else {
// if (ValidationUtil.isEmpty(msd)
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// // msd has not been set previously
// // or msd has been set but the corpus changed -> reset
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(false);
//"msd reset");
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
// msdTF.setText(StringUtils.join(msdStrings, " "));
// msdTF.setDisable(false);
//"msd kept");
// }
// }
// // TODO: trigger on rescan
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// // user changed corpus (by type) or by selection & triggered a rescan of headers
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// currentCorpusType = corpus.getCorpusType();
// // setTaxonomyIsDirty(false);
// } else {
// }
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
// }
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
Tooltip tooltip = new Tooltip();
@ -819,7 +702,6 @@ public class OneWordAnalysisTab {
// filter.setNotePunctuations(true);
// setMsd must be behind alsoVisualize
@ -878,136 +760,14 @@ public class OneWordAnalysisTab {
private void execute(StatisticsNew statistic) {
||||"Started execution: ", statistic.getFilter());
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
// final Task<Void> task = new Task<Void>() {
// @SuppressWarnings("Duplicates")
// @Override
// protected Void call() throws Exception {
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
// if(multipleFiles){
// cancel.setVisible(true);
// }
// int i = 0;
// Date startTime = new Date();
// Date previousTime = new Date();
// int remainingSeconds = -1;
// for (File f : corpusFiles) {
// final int iFinal = i;
// XML_processing xml_processing = new XML_processing();
// xml_processing.isCancelled = false;
// i++;
// if(xml_processing.progressBarListener != null) {
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
// }
// if (multipleFiles) {
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
// previousTime = new Date();
// }
// this.updateProgress(i, corpusFiles.size());
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
//// if (isCancelled()) {
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
//// break;
//// }
// } else {
// xml_processing.progressBarListener = new InvalidationListener() {
// int remainingSeconds = -1;
// Date previousTime = new Date();
// @Override
// public void invalidated(Observable observable) {
// cancel.setVisible(true);
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
// ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// previousTime = new Date();
// }
// xml_processing.isCancelled = isCancelled();
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
// }
// };
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
// }
// xml_processing.readXML(f.toString(), statistic);
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// }
// return null;
// }
// };
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
// task.setOnSucceeded(e -> {
// try {
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
// } else {
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
// logger.error("Error while saving", e1);
// }
// ngramProgressBar.progressProperty().unbind();
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// // When cancel button is pressed cancel analysis
// cancel.setOnAction(e -> {
// task.cancel();
//"cancel button");
// });
// final Thread thread = new Thread(task, "task");
// thread.setDaemon(true);
// thread.start();
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
if (statistic.getFilter().getMinimalRelFre() > 1){
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
// final Task<Void> mainTask = prepareTaskForMinRelFre(statistic);
final Thread thread = new Thread(mainTask, "task");
} else {
final Task<Void> mainTask = t.prepareMainTask(statistic);
// final Task<Void> mainTask = prepareMainTask(statistic);
final Thread thread = new Thread(mainTask, "task");
@ -1,18 +0,0 @@
package gui;
import javafx.scene.control.Label;
public class SelectedFiltersPane {
public Label selectedFiltersLabel;
public Label getSelectedFiltersLabel() {
return selectedFiltersLabel;
public void setSelectedFiltersLabel(String filters) {
this.selectedFiltersLabel = new Label(filters);
@ -1,21 +1,12 @@
package gui;
import static alg.XML_processing.*;
import static gui.GUIController.*;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import alg.XML_processing;
import javafx.application.HostServices;
import javafx.beans.InvalidationListener;
import javafx.beans.Observable;
import javafx.beans.binding.StringBinding;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.scene.image.ImageView;
@ -147,15 +138,6 @@ public class StringAnalysisTabNew2 {
private CheckComboBox<String> taxonomyCCB;
private ArrayList<Taxonomy> taxonomy;
// @FXML
// private CheckBox calculatecvvCB;
// private boolean calculateCvv;
// @FXML
// private TextField stringLengthTF;
// private Integer stringLength;
private ComboBox<String> calculateForCB;
private CalculateFor calculateFor;
@ -225,8 +207,6 @@ public class StringAnalysisTabNew2 {
private Corpus corpus;
private HashMap<String, HashSet<String>> solarFiltersMap;
private Filter filter;
private boolean useDb;
private HostServices hostService;
private ListChangeListener<String> taxonomyListener;
private ListChangeListener<String> alsoVisualizeListener;
@ -236,44 +216,25 @@ public class StringAnalysisTabNew2 {
private ChangeListener<Boolean> minimalOccurrencesListener;
private ChangeListener<Boolean> minimalTaxonomyListener;
private ChangeListener<Boolean> minimalRelFreListener;
private boolean useDb;
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
private static final ObservableList<String> COLLOCABILITY_ITEMS = FXCollections.observableArrayList("Dice", "t-score", "MI", "MI3", "logDice", "simple LL");
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY));
@ -282,9 +243,6 @@ public class StringAnalysisTabNew2 {
// TODO: pass observables for taxonomy based on header scan
// after header scan
private ObservableList<String> taxonomyCCBValues;
private CorpusType currentCorpusType;
public void init() {
// add CSS style
@ -420,13 +378,6 @@ public class StringAnalysisTabNew2 {
} else {
// alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
// alsoVisualize = new ArrayList<>();
// ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
// alsoVisualize.addAll(checkedItems);
//"Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
// });
// alsoVisualizeCCB.getCheckModel().clearChecks();
alsoVisualizeListener = new ListChangeListener<String>() {
@ -437,9 +388,6 @@ public class StringAnalysisTabNew2 {
|||"Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
// alsoVisualizeCCB.getCheckModel().clearChecks();
// alsoVisualizeCCB.getItems().removeAll();
// alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS));
@ -500,16 +448,6 @@ public class StringAnalysisTabNew2 {
// collocabilityCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
// collocability = new ArrayList<>();
// ObservableList<Collocability> checkedItems = FXCollections.observableArrayList();
// for (String el : collocabilityCCB.getCheckModel().getCheckedItems()) {
// checkedItems.add(Collocability.factory(el));
// }
// collocability.addAll(checkedItems);
//"Selected collocabilities: %s", StringUtils.join(collocabilityCCB.getCheckModel().getCheckedItems(), ",")));
// });
// msd
if (msdListener != null){
@ -595,9 +533,6 @@ public class StringAnalysisTabNew2 {
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
taxonomy = new ArrayList<>();
@ -606,7 +541,6 @@ public class StringAnalysisTabNew2 {
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
for (Taxonomy t : checkedItemsTaxonomy) {
@ -646,32 +580,6 @@ public class StringAnalysisTabNew2 {
skipValue = 0;
// cvv
// calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
// calculateCvv = newValue;
//"calculate cvv: " + calculateCvv);
// });
// calculatecvvCB.setSelected(false);
// string length
// stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
// if (!newValue) {
// // focus lost
// String value = stringLengthTF.getText();
// if (!ValidationUtil.isEmpty(value)) {
// if (!ValidationUtil.isNumber(value)) {
// logAlert("stringlengthTf: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
// }
// stringLength = Integer.parseInt(value);
// } else {
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_MISSING_STRING_LENGTH"));
// stringLengthTF.setText("1");
// logAlert(I18N.get("message.WARNING_MISSING_STRING_LENGTH"));
// }
// }
// });
minimalOccurrences = 1;
@ -781,108 +689,6 @@ public class StringAnalysisTabNew2 {
* case a: values for combo boxes can change after a corpus change
* <ul>
* <li>different corpus type - reset all fields so no old values remain</li>
* <li>same corpus type, different subset - keep</li>
* </ul>
* <p>
* case b: values for combo boxes can change after a header scan
* <ul>
* <li>at first, fields are populated by corpus type defaults</li>
* <li>after, with gathered data</li>
* </ul>
* <p></p>
* ngrams: 1
* calculateFor: word
* msd:
* taxonomy:
* skip: 0
* iscvv: false
* string length: 1
// public void populateFields() {
// // corpus changed if: current one is null (this is first run of the app)
// // or if currentCorpus != gui's corpus
// boolean corpusChanged = currentCorpusType == null
// || currentCorpusType != corpus.getCorpusType();
// // keep ngram value if set
// if (ngramValue == null) {
// ngramValueCB.getSelectionModel().select("1");
// ngramValue = 1;
// }
// // TODO: check for GOS, GIGAFIDA, SOLAR...
// // refresh and:
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
// if (calculateFor == null) {
// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
// }
// if (!filter.hasMsd()) {
// // if current corpus doesn't have msd data, disable this field
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(true);
//"no msd data");
// } else {
// if (ValidationUtil.isEmpty(msd)
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// // msd has not been set previously
// // or msd has been set but the corpus changed -> reset
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(false);
//"msd reset");
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
// msdTF.setText(StringUtils.join(msdStrings, " "));
// msdTF.setDisable(false);
//"msd kept");
// }
// }
// // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
// // keep skip value
// if (skipValue == null) {
// skipValueCB.getSelectionModel().select("0");
// skipValue = 0;
// }
// // keep calculateCvv
// calculatecvvCB.setSelected(calculateCvv);
// // keep string length if set
// if (stringLength != null) {
// stringLengthTF.setText(String.valueOf(stringLength));
// } else {
// stringLengthTF.setText("1");
// stringLength = 1;
// }
// // TODO: trigger on rescan
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// // user changed corpus (by type) or by selection & triggered a rescan of headers
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// currentCorpusType = corpus.getCorpusType();
// // setTaxonomyIsDirty(false);
// } else {
// }
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
// }
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
Tooltip tooltip = new Tooltip();
@ -898,7 +704,6 @@ public class StringAnalysisTabNew2 {
// writeMsdAtTheEndL.textProperty().bind(I18N.createStringBinding("label.writeMsdAtTheEnd"));
@ -948,10 +753,6 @@ public class StringAnalysisTabNew2 {
if (mode == MODE.WORD) {
// paneLetters.setVisible(false);
// if (corpus.getCorpusType() == CorpusType.GOS)
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
// else
if (corpus.getCorpusType() == CorpusType.GOS) {
@ -980,7 +781,6 @@ public class StringAnalysisTabNew2 {
// filter.setIsCvv(calculateCvv);
@ -993,16 +793,11 @@ public class StringAnalysisTabNew2 {
// if (ngramValue != null && ngramValue == 0) {
// filter.setStringLength(stringLength);
// }
String message = Validation.validateForStringLevel(filter);
if (message == null) {
// no errors
||||"Executing: ", filter.toString());
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
} else {
@ -1043,547 +838,6 @@ public class StringAnalysisTabNew2 {
// public void calculate_collocabilities(StatisticsNew statistics, StatisticsNew oneWordStatistics) {
// statistics.updateCalculateCollocabilities(oneWordStatistics);
// }
// private final Task<Void> prepareTaskForMinRelFre(StatisticsNew statistic) {
// Filter f = statistic.getFilter();
//"Started execution: ", f);
// Task<Void> task_collocability = null;
// try{
// Filter f2 = (Filter) f.clone();
// f2.setIsMinimalRelFreScraper(true);
// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb);
//// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f, useDb);
// Collection<File> corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles();
// final Task<Void> task = new Task<Void>() {
// @SuppressWarnings("Duplicates")
// @Override
// protected Void call() throws Exception {
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statisticsMinRelFre.getCorpus().getCorpusType());
// if(multipleFiles){
// cancel.setVisible(true);
// }
// Date startTime = new Date();
// Date previousTime = new Date();
// int remainingSeconds = -1;
// int corpusSize;
// int i;
// if(statistic.getFilter().getCollocability().size() > 0){
// i = 0;
// corpusSize = corpusFiles.size() * 3;
// } else {
// i = 0;
// corpusSize = corpusFiles.size() * 2;
// }
// for (File f : corpusFiles) {
// final int iFinal = i;
// XML_processing xml_processing = new XML_processing();
// xml_processing.isCancelled = false;
// i++;
// if(xml_processing.progressBarListener != null) {
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
// }
// if (multipleFiles) {
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
// previousTime = new Date();
// }
// this.updateProgress(i, corpusSize);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
//// if (isCancelled()) {
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
//// break;
//// }
// } else {
// xml_processing.progressBarListener = new InvalidationListener() {
// int remainingSeconds = -1;
// Date previousTime = new Date();
// @Override
// public void invalidated(Observable observable) {
// cancel.setVisible(true);
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
//// System.out.println(remainingSeconds);
// previousTime = new Date();
// }
// xml_processing.isCancelled = isCancelled();
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
// }
// };
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
// }
// xml_processing.readXML(f.toString(), statisticsMinRelFre);
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// if(!(multipleFiles)){
// cancel.setVisible(false);
// }
// }
// // add remaining minRelFre results
// if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) {
//// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
// long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue();
// double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
// statisticsMinRelFre.updateMinimalRelFre(statisticsMinRelFre.getTaxonomyResult().get(statisticsMinRelFre.getCorpus().getTotal()).entrySet(), absToRelFactor);
// // reset all values
// for(Taxonomy taxonomy : statisticsMinRelFre.getTaxonomyResult().keySet()){
// statisticsMinRelFre.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>());
// }
// for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){
// statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
// }
//// System.out.println("asd");
// }
// return null;
// }
// };
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
// task.setOnSucceeded(e -> {
// statistic.updateMinimalRelFre(statisticsMinRelFre.getMinimalRelFreNgrams(), statisticsMinRelFre.getMinimalRelFre1grams());
// final Task<Void> taskCollocability = prepareMainTask(statistic);
// final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
// thread_collocability.setDaemon(true);
// thread_collocability.start();
// });
// task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
// // ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
// // ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// // When cancel button is pressed cancel analysis
// cancel.setOnAction(e -> {
// task.cancel();
//"cancel button");
// });
// return task;
// }catch(CloneNotSupportedException c){ return null; }
// }
// private final Task<Void> prepareMainTask(StatisticsNew statistic) {
// Filter f = statistic.getFilter();
//"Started execution: ", f);
// Task<Void> task_collocability = null;
// Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
// final Task<Void> task = new Task<Void>() {
// @SuppressWarnings("Duplicates")
// @Override
// protected Void call() throws Exception {
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
// if(multipleFiles){
// cancel.setVisible(true);
// }
//// int i = corpusFiles.size();
//// Date startTime = new Date();
//// Date previousTime = new Date();
//// int remainingSeconds = -1;
//// int corpusSize;
//// if (statistic.getFilter().getCollocability().size() > 0) {
//// corpusSize = corpusFiles.size() * 2;
//// } else {
//// corpusSize = corpusFiles.size();
//// }
// Date startTime = new Date();
// Date previousTime = new Date();
// int remainingSeconds = -1;
// int corpusSize;
// int i;
// int taskIndex = 0;
// if(statistic.getFilter().getCollocability().size() > 0 && statistic.getFilter().getMinimalRelFre() > 1){
// i = corpusFiles.size();
// corpusSize = corpusFiles.size() * 3;
// } else if (statistic.getFilter().getMinimalRelFre() > 1) {
// i = corpusFiles.size();
// corpusSize = corpusFiles.size() * 2;
// } else if (statistic.getFilter().getCollocability().size() > 0) {
// i = 0;
// corpusSize = corpusFiles.size() * 2;
// } else {
// i = 0;
// corpusSize = corpusFiles.size();
// }
// for (File f : corpusFiles) {
// final int iFinal = i;
// XML_processing xml_processing = new XML_processing();
// xml_processing.isCancelled = false;
// i++;
// taskIndex++;
// if(xml_processing.progressBarListener != null) {
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
// }
// if (multipleFiles) {
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
// previousTime = new Date();
// }
// this.updateProgress(i, corpusSize);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
//// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
//// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
//// previousTime = new Date();
//// }
//// this.updateProgress(i, corpusSize);
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
// } else {
// xml_processing.progressBarListener = new InvalidationListener() {
// int remainingSeconds = -1;
// Date previousTime = new Date();
// @Override
// public void invalidated(Observable observable) {
// cancel.setVisible(true);
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
//// System.out.println(remainingSeconds);
// previousTime = new Date();
// }
// xml_processing.isCancelled = isCancelled();
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
// }
// };
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
// }
// xml_processing.readXML(f.toString(), statistic);
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// if(!(multipleFiles)){
// cancel.setVisible(false);
// }
//// readXML(f.toString(), statistic);
//// i++;
//// if (isCancelled()) {
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
//// break;
//// }
//// if (statistic.getFilter().getCollocability().size() > 0) {
//// this.updateProgress(i, corpusFiles.size() * 2);
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
//// } else {
//// this.updateProgress(i, corpusFiles.size());
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
//// }
////// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
// }
// // if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing
// if (statistic.getFilter().getMinimalRelFre() > 1){
//// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
// long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue();
// double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
// for(Map.Entry<MultipleHMKeys, AtomicLong> entry : statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet()){
// if(entry.getValue().longValue() < absToRelFactor){
// statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).remove(entry.getKey());
// }
// }
// statistic.updateMinimalRelFre(statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet(), absToRelFactor);
// }
// return null;
// }
// };
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
// task.setOnSucceeded(e -> {
// if (f.getCollocability().size() > 0) {
// try{
// Filter f2 = (Filter) f.clone();
// f2.setNgramValue(1);
// StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
// final Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
// final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
// thread_collocability.setDaemon(true);
// thread_collocability.start();
// }catch(CloneNotSupportedException c){}
// } else {
// try {
//// System.out.print(statistics);
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
// } else {
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
// logger.error("Error while saving", e1);
// } catch (OutOfMemoryError e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
// logger.error("Out of memory error", e1);
// }
// ngramProgressBar.progressProperty().unbind();
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// }
// });
// task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// // When cancel button is pressed cancel analysis
// cancel.setOnAction(e -> {
// task.cancel();
//"cancel button");
// });
// return task;
// }
// private final Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
// Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
// final Task<Void> task = new Task<Void>() {
// @SuppressWarnings("Duplicates")
// @Override
// protected Void call() throws Exception {
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
// if(multipleFiles){
// cancel.setVisible(true);
// }
//// int i = corpusFiles.size();
// Date startTime = new Date();
// Date previousTime = new Date();
// int remainingSeconds = -1;
//// int corpusSize;
//// if (statistic.getFilter().getCollocability().size() > 0) {
//// corpusSize = corpusFiles.size() * 2;
//// } else {
//// corpusSize = corpusFiles.size();
//// }
// int corpusSize;
// int i;
// int taskIndex = 0;
// if(statistic.getFilter().getMinimalRelFre() > 1){
// i = corpusFiles.size() * 2;
// corpusSize = corpusFiles.size() * 3;
// } else {
// i = corpusFiles.size();
// corpusSize = corpusFiles.size() * 2;
// }
// for (File f : corpusFiles) {
// final int iFinal = i;
// XML_processing xml_processing = new XML_processing();
// i++;
// taskIndex++;
// if(xml_processing.progressBarListener != null) {
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
// }
// if (multipleFiles) {
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
// previousTime = new Date();
// }
// this.updateProgress(i, corpusSize);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
//// if (isCancelled()) {
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
//// break;
//// }
// } else {
// xml_processing.progressBarListener = new InvalidationListener() {
// int remainingSeconds = -1;
// Date previousTime = new Date();
// @Override
// public void invalidated(Observable observable) {
// cancel.setVisible(true);
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)));
//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
//// System.out.println(remainingSeconds);
// previousTime = new Date();
// }
// xml_processing.isCancelled = isCancelled();
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
// }
// };
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
// }
// xml_processing.isCollocability = true;
// xml_processing.readXML(f.toString(), statisticsOneGrams);
// xml_processing.isCollocability = false;
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
//// readXML(f.toString(), statisticsOneGrams);
//// i++;
//// this.updateProgress(i, corpusFiles.size() * 2);
//// if (statistic.getFilter().getCollocability().size() > 0) {
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
//// } else {
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
//// }
// }
// return null;
// }
// };
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
// task.setOnSucceeded(e -> {
// try {
// System.out.print(statistic);
//// calculate_collocabilities(statistic, statisticsOneGrams);
// statistic.updateCalculateCollocabilities(statisticsOneGrams);
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
// } else {
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
// logger.error("Error while saving", e1);
// } catch (OutOfMemoryError e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
// logger.error("Out of memory error", e1);
// }
//// try {
//// boolean successullySaved = statistic.saveResultToDisk();
//// if (successullySaved) {
//// } else {
//// }
//// } catch (UnsupportedEncodingException e1) {
//// logger.error("Error while saving", e1);
//// } catch (OutOfMemoryError e1){
//// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
//// logger.error("Out of memory error", e1);
//// }
// ngramProgressBar.progressProperty().unbind();
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// // When cancel button is pressed cancel analysis
// cancel.setOnAction(e -> {
// task.cancel();
////"cancel button");
// });
// return task;
// }
private void execute(StatisticsNew statistic) {
Filter f = statistic.getFilter();
||||"Started execution: ", f);
@ -1591,13 +845,11 @@ public class StringAnalysisTabNew2 {
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
if (f.getMinimalRelFre() > 1){
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
// final Task<Void> mainTask = prepareTaskForMinRelFre(statistic);
final Thread thread = new Thread(mainTask, "task");
} else {
final Task<Void> mainTask = t.prepareMainTask(statistic);
// final Task<Void> mainTask = prepareMainTask(statistic);
final Thread thread = new Thread(mainTask, "task");
@ -9,7 +9,6 @@ import org.apache.commons.lang3.math.NumberUtils;
public class ValidationUtil {
public static boolean isNumber(String value) {
//return NumberUtils.isCreatable(value);
return NumberUtils.isNumber(value);
@ -1,260 +0,0 @@
//package gui;
//import static alg.XML_processing.*;
//import static gui.GUIController.*;
//import java.util.*;
//import javafx.application.HostServices;
//import javafx.scene.control.*;
//import org.apache.commons.lang3.StringUtils;
//import org.apache.logging.log4j.LogManager;
//import org.apache.logging.log4j.Logger;
//import org.controlsfx.control.CheckComboBox;
//import data.*;
//import javafx.collections.ListChangeListener;
//import javafx.collections.ObservableList;
//import javafx.concurrent.Task;
//import javafx.fxml.FXML;
//import javafx.scene.layout.AnchorPane;
//public class WordFormationTab {
// public final static Logger logger = LogManager.getLogger(WordFormationTab.class);
// public AnchorPane wordAnalysisTabPane;
// @FXML
// public Label selectedFiltersLabel;
// @FXML
// public Label solarFilters;
// @FXML
// private CheckComboBox<String> taxonomyCCB;
// private ArrayList<Taxonomy> taxonomy;
// @FXML
// private TextField minimalOccurrencesTF;
// private Integer minimalOccurrences;
// @FXML
// private TextField minimalTaxonomyTF;
// private Integer minimalTaxonomy;
// @FXML
// private Button computeB;
// @FXML
// public ProgressBar ngramProgressBar;
// @FXML
// public Label progressLabel;
// @FXML
// private Hyperlink helpH;
// private Corpus corpus;
// private HashMap<String, HashSet<String>> solarFiltersMap;
// private HostServices hostService;
// // after header scan
// private ObservableList<String> taxonomyCCBValues;
// private CorpusType currentCorpusType;
// private boolean useDb;
// public void init() {
// // taxonomy
// if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
// taxonomyCCB.getItems().removeAll();
// taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
// taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
// taxonomy = new ArrayList<>();
// ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus);
// taxonomy.addAll(checkedItemsTaxonomy);
//"Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
// });
// taxonomyCCB.getCheckModel().clearChecks();
// } else {
// taxonomyCCB.setDisable(true);
// }
// // set default values
// minimalOccurrencesTF.setText("1");
// minimalOccurrences = 1;
// minimalTaxonomyTF.setText("1");
// minimalTaxonomy = 1;
// minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
// if (!newValue) {
// // focus lost
// String value = minimalOccurrencesTF.getText();
// if (!ValidationUtil.isEmpty(value)) {
// if (!ValidationUtil.isNumber(value)) {
// logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
// } else {
// minimalOccurrences = Integer.parseInt(value);
// }
// } else {
// minimalOccurrencesTF.setText("1");
// minimalOccurrences = 1;
// }
// }
// });
// minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
// if (!newValue) {
// // focus lost
// String value = minimalTaxonomyTF.getText();
// if (!ValidationUtil.isEmpty(value)) {
// if (!ValidationUtil.isNumber(value)) {
// logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
// } else {
// minimalTaxonomy = Integer.parseInt(value);
// }
// } else {
// minimalTaxonomyTF.setText("1");
// minimalTaxonomy = 1;
// }
// }
// });
// computeB.setOnAction(e -> {
// compute();
//"compute button");
// });
// helpH.setOnAction(e -> openHelpWebsite());
// }
// private void compute() {
// Filter filter = new Filter();
// filter.setNgramValue(1);
// filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
// filter.setTaxonomy(taxonomy);
// filter.setAl(AnalysisLevel.STRING_LEVEL);
// filter.setSkipValue(0);
// filter.setMsd(new ArrayList<>());
// filter.setIsCvv(false);
// filter.setSolarFilters(solarFiltersMap);
// filter.setMinimalOccurrences(minimalOccurrences);
// filter.setMinimalTaxonomy(minimalTaxonomy);
// String message = Validation.validateForStringLevel(filter);
// if (message == null) {
// // no errors
//"Executing: ", filter.toString());
// StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
// execute(statistic);
// } else {
// logAlert(message);
// showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
// }
// }
// private void openHelpWebsite(){
// hostService.showDocument(Messages.HELP_URL);
// }
// private void execute(StatisticsNew statistic) {
//"Started execution: ", statistic.getFilter());
// Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
// final Task<Void> task = new Task<Void>() {
// @SuppressWarnings("Duplicates")
// @Override
// protected Void call() throws Exception {
// int i = 0;
// Date startTime = new Date();
// Date previousTime = new Date();
// for (File f : corpusFiles) {
// readXML(f.toString(), statistic);
// i++;
// this.updateProgress(i, corpusFiles.size());
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
// }
// return null;
// }
// };
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
// task.setOnSucceeded(e -> {
// try {
// // first, we have to recalculate all occurrences to detailed statistics
// boolean successullySaved = statistic.recalculateAndSaveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
// } else {
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
// logger.error("Error while saving", e1);
// }
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// });
// task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// });
// final Thread thread = new Thread(task, "task");
// thread.setDaemon(true);
// thread.start();
// }
// private void logAlert(String alert) {
//"alert: " + alert);
// }
// public void setCorpus(Corpus corpus) {
// this.corpus = corpus;
// if (corpus.getCorpusType() != CorpusType.SOLAR) {
// setSelectedFiltersLabel(null);
// } else {
// setSelectedFiltersLabel("/");
// }
// }
// public void setSelectedFiltersLabel(String content) {
// if (content != null) {
// solarFilters.setVisible(true);
// selectedFiltersLabel.setVisible(true);
// selectedFiltersLabel.setText(content);
// } else {
// solarFilters.setVisible(false);
// selectedFiltersLabel.setVisible(false);
// }
// }
// public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
// this.solarFiltersMap = solarFiltersMap;
// }
// public void setHostServices(HostServices hostServices){
// this.hostService = hostServices;
// }
@ -1,12 +1,8 @@
package gui;
import alg.XML_processing;
import data.*;
import javafx.application.HostServices;
import javafx.beans.InvalidationListener;
import javafx.beans.Observable;
import javafx.beans.binding.StringBinding;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.collections.ListChangeListener;
@ -23,12 +19,10 @@ import org.controlsfx.control.CheckComboBox;
import util.Tasks;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Pattern;
import static alg.XML_processing.readXML;
import static gui.GUIController.showAlert;
@ -165,10 +159,6 @@ public class WordLevelTab {
private TextField suffixListTF;
private ArrayList<String> suffixList;
// @FXML
// private CheckBox writeMsdAtTheEndChB;
// private boolean writeMsdAtTheEnd;
private ComboBox<String> calculateForCB;
private CalculateFor calculateFor;
@ -215,7 +205,6 @@ public class WordLevelTab {
private Corpus corpus;
private HashMap<String, HashSet<String>> solarFiltersMap;
private Filter filter;
private boolean useDb;
private HostServices hostService;
private ListChangeListener<String> taxonomyListener;
@ -226,44 +215,31 @@ public class WordLevelTab {
private ChangeListener<Boolean> minimalTaxonomyListener;
private ChangeListener<Boolean> minimalRelFreListener;
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY));
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
private static final ArrayList<String> TAXONOMY_SET_OPERATION = new ArrayList<>(Arrays.asList(TAXONOMY_SET_OPERATION_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {};
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY));
// TODO: pass observables for taxonomy based on header scan
// after header scan
private ObservableList<String> taxonomyCCBValues;
private CorpusType currentCorpusType;
public void init() {
// add CSS style
@ -328,21 +304,12 @@ public class WordLevelTab {
} else if (newValue.equals(CalculateFor.NORMALIZED_WORD.toString())) {
} else if (newValue.equals(CalculateFor.MORPHOSYNTACTIC_SPECS.toString())) {
// writeMsdAtTheEndEnableCalculateFor.set(true);
// writeMsdAtTheEndChB.setDisable(false);
} else {
// if (!newValue.equals("oblikoskladenjska oznaka")){
// writeMsdAtTheEnd = false;
// writeMsdAtTheEndChB.setSelected(false);
// writeMsdAtTheEndChB.setDisable(true);
// writeMsdAtTheEndEnableCalculateFor.set(false);
// }
alsoVisualizeListener = new ListChangeListener<String>() {
public void onChanged(Change<? extends String> c) {
@ -353,10 +320,6 @@ public class WordLevelTab {
// alsoVisualizeCCB.getCheckModel().clearChecks();
// alsoVisualizeCCB.getItems().removeAll();
// alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
if (languageChanged) {
@ -442,7 +405,6 @@ public class WordLevelTab {
// suffixList = value;
@ -475,7 +437,6 @@ public class WordLevelTab {
// suffixList = value;
if(suffixList.size() > 0){
@ -492,8 +453,6 @@ public class WordLevelTab {
// prefixLengthCB.setDisable(true);
if (msdListener != null){
@ -581,10 +540,8 @@ public class WordLevelTab {
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
@ -592,7 +549,6 @@ public class WordLevelTab {
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
for (Taxonomy t : checkedItemsTaxonomy) {
@ -639,15 +595,6 @@ public class WordLevelTab {
// writeMsdAtTheEnd = false;
// writeMsdAtTheEndChB.setDisable(true);
// // set
// writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
// writeMsdAtTheEnd = newValue;
//"write msd at the end: ", writeMsdAtTheEnd);
// });
// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
// set default values
minimalOccurrences = 1;
@ -764,85 +711,6 @@ public class WordLevelTab {
* case a: values for combo boxes can change after a corpus change
* <ul>
* <li>different corpus type - reset all fields so no old values remain</li>
* <li>same corpus type, different subset - keep</li>
* </ul>
* <p>
* case b: values for combo boxes can change after a header scan
* <ul>
* <li>at first, fields are populated by corpus type defaults</li>
* <li>after, with gathered data</li>
* </ul>
* <p></p>
* ngrams: 1
* calculateFor: word
* msd:
* taxonomy:
* skip: 0
* iscvv: false
* string length: 1
// public void populateFields() {
// // corpus changed if: current one is null (this is first run of the app)
// // or if currentCorpus != gui's corpus
// boolean corpusChanged = currentCorpusType == null
// || currentCorpusType != corpus.getCorpusType();
// // TODO: check for GOS, GIGAFIDA, SOLAR...
// // refresh and:
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
// if (calculateFor == null) {
// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
// }
// if (!filter.hasMsd()) {
// // if current corpus doesn't have msd data, disable this field
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(true);
//"no msd data");
// } else {
// if (ValidationUtil.isEmpty(msd)
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// // msd has not been set previously
// // or msd has been set but the corpus changed -> reset
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(false);
//"msd reset");
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
// msdTF.setText(StringUtils.join(msdStrings, " "));
// msdTF.setDisable(false);
//"msd kept");
// }
// }
// // TODO: trigger on rescan
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// // user changed corpus (by type) or by selection & triggered a rescan of headers
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// currentCorpusType = corpus.getCorpusType();
// // setTaxonomyIsDirty(false);
// } else {
// }
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
// }
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
Tooltip tooltip = new Tooltip();
@ -911,11 +779,9 @@ public class WordLevelTab {
if (corpus.getCorpusType() == CorpusType.GOS) {
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
} else {
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
@ -923,7 +789,6 @@ public class WordLevelTab {
if (corpus.isGosOrthMode()) {
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
} else {
@ -954,7 +819,6 @@ public class WordLevelTab {
// filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd);
String message = Validation.validateForStringLevel(filter);
if (message == null) {
@ -1004,134 +868,14 @@ public class WordLevelTab {
private void execute(StatisticsNew statistic) {
||||"Started execution: ", statistic.getFilter());
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
// final Task<Void> task = new Task<Void>() {
// @SuppressWarnings("Duplicates")
// @Override
// protected Void call() throws Exception {
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
// if(multipleFiles){
// cancel.setVisible(true);
// }
// int i = 0;
// Date startTime = new Date();
// Date previousTime = new Date();
// int remainingSeconds = -1;
// for (File f : corpusFiles) {
// final int iFinal = i;
// XML_processing xml_processing = new XML_processing();
// xml_processing.isCancelled = false;
// i++;
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// if(xml_processing.progressBarListener != null) {
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
// }
// if (multipleFiles) {
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
// previousTime = new Date();
// }
// this.updateProgress(i, corpusFiles.size());
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
// } else {
// xml_processing.progressBarListener = new InvalidationListener() {
// int remainingSeconds = -1;
// Date previousTime = new Date();
// @Override
// public void invalidated(Observable observable) {
// cancel.setVisible(true);
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
// ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// previousTime = new Date();
// }
// xml_processing.isCancelled = isCancelled();
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
// }
// };
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
// }
// xml_processing.readXML(f.toString(), statistic);
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// }
// return null;
// }
// };
// ngramProgressBar.progressProperty().bind(task.progressProperty());
// progressLabel.textProperty().bind(task.messageProperty());
// task.setOnSucceeded(e -> {
// try {
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
// } else {
// }
// } catch (UnsupportedEncodingException e1) {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
// logger.error("Error while saving", e1);
// }
// ngramProgressBar.progressProperty().unbind();
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// task.setOnFailed(e -> {
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
// logger.error("Error while executing", e);
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// task.setOnCancelled(e -> {
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.progressProperty().unbind();
// ngramProgressBar.setProgress(0.0);
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
// progressLabel.textProperty().unbind();
// progressLabel.setText("");
// cancel.setVisible(false);
// });
// // When cancel button is pressed cancel analysis
// cancel.setOnAction(e -> {
// task.cancel();
//"cancel button");
// });
// final Thread thread = new Thread(task, "task");
// thread.setDaemon(true);
// thread.start();
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
if (statistic.getFilter().getMinimalRelFre() > 1){
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
// final Task<Void> mainTask = prepareTaskForMinRelFre(statistic);
final Thread thread = new Thread(mainTask, "task");
} else {
final Task<Void> mainTask = t.prepareMainTask(statistic);
// final Task<Void> mainTask = prepareMainTask(statistic);
final Thread thread = new Thread(mainTask, "task");
@ -1144,5 +888,4 @@ public class WordLevelTab {
public void setHostServices(HostServices hostServices){
this.hostService = hostServices;
@ -1,46 +0,0 @@
package util;
import java.util.Arrays;
import java.util.HashSet;
public class Combinations {
private static HashSet<HashSet<Integer>> result = new HashSet<>();
/* arr[] ---> Input Array
data[] ---> Temporary array to store current combination
start & end ---> Staring and Ending indexes in arr[]
index ---> Current index in data[]
r ---> Size of a combination to be printed */
static void combinationUtil(int arr[], Integer data[], int start, int end, int index, int combinationLength) {
// Current combination is ready to be printed, print it
if (index == combinationLength) {
result.add(new HashSet<>(Arrays.asList(data)));
// replace index with all possible elements. The condition
// "end-i+1 >= r-index" makes sure that including one element
// at index will make a combination with remaining elements
// at remaining positions
for (int i = start; i <= end && end - i + 1 >= combinationLength - index; i++) {
data[index] = arr[i];
combinationUtil(arr, data, i + 1, end, index + 1, combinationLength);
public static HashSet<HashSet<Integer>> generateIndices(int maxNOfIndices) {
result = new HashSet<>();
int[] arr = IntStream.range(1, maxNOfIndices).toArray();
for (int i = 1; i < maxNOfIndices - 1; i++) {
// A temporary array to store all combination one by one
combinationUtil(arr, new Integer[i], 0, arr.length - 1, 0, i);
// also add an empty one for X.... (all of this type)
result.add(new HashSet<>());
return result;
@ -6,7 +6,6 @@ import*;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicLong;
import data.*;
@ -16,49 +15,11 @@ import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.QuoteMode;
import org.apache.commons.lang3.tuple.Pair;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import data.Enums.WordLevelType;
public class Export {
// public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) {
// JSONArray wrapper = new JSONArray();
// for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
// JSONArray data_wrapper = new JSONArray();
// JSONObject metric = new JSONObject();
// String title = p.getLeft();
// Map<MultipleHMKeys, Long> map = p.getRight();
// if (map.isEmpty())
// continue;
// long total = Util.mapSumFrequencies(map);
// for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
// JSONObject data_entry = new JSONObject();
// data_entry.put("word", e.getKey());
// data_entry.put("frequency", e.getValue());
// data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
// data_wrapper.add(data_entry);
// }
// metric.put("Title", title);
// metric.put("data", data_wrapper);
// wrapper.add(metric);
// }
// try (FileWriter file = new FileWriter("statistics.json")) {
// file.write(wrapper.toJSONString());
// } catch (IOException e) {
// e.printStackTrace();
// }
// }
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
StatisticsNew statistics, Filter filter) {
Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult();
@ -68,15 +29,6 @@ public class Export {
List<Object> FILE_HEADER_AL = new ArrayList<>();
//Count frequencies
// long num_frequencies = 0;
// for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
// Map<MultipleHMKeys, Long> map = p.getRight();
// if (map.isEmpty())
// continue;
// num_frequencies = Util.mapSumFrequencies(map);
// }
Map<Taxonomy, Long> num_selected_taxonomy_frequencies = new ConcurrentHashMap<>();
for (Taxonomy taxonomyKey : taxonomyResults.keySet()) {
num_selected_taxonomy_frequencies.put(taxonomyKey, (long) 0);
@ -113,7 +65,6 @@ public class Export {
headerInfoBlock.put(filter.getCalculateFor().totalSumString(filter.getNgramValue()), String.valueOf(num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
headerInfoBlock.put(filter.getCalculateFor().foundSumString(filter.getNgramValue()), String.valueOf(num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
// headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
for (CalculateFor otherKey : filter.getMultipleKeys()) {
@ -163,10 +114,7 @@ public class Export {
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
String title = p.getLeft();
// statistics.setTimeEnding();
title = statistics.generateResultTitle();
// statistics.
fileName = title.replace(": ", "-");
fileName = fileName.replace(" ", "_").concat(".csv");
@ -178,8 +126,6 @@ public class Export {
if (map.isEmpty())
// long total = Util.mapSumFrequencies(map);
OutputStreamWriter fileWriter = null;
CSVPrinter csvFilePrinter = null;
@ -289,10 +235,7 @@ public class Export {
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key), statistics.getCorpus().getPunctuation()));
dataEntry.add(formatNumberForExport(((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue(), statistics.getCorpus().getPunctuation()));
// dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences()));
// dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences()));
if (filter.getCollocability().size() > 0){
@ -303,39 +246,6 @@ public class Export {
// Write msd separated per letters at the end of each line in csv
if (filter.getWriteMsdAtTheEnd()) {
// String msd = "";
// if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
// msd = e.getKey().getK1();
// } else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
// i = 0;
// for (CalculateFor otherKey : filter.getMultipleKeys()){
// switch(i){
// case 0:
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
// msd = e.getKey().getK2();
// }
// break;
// case 1:
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
// msd = e.getKey().getK3();
// }
// break;
// case 2:
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
// msd = e.getKey().getK4();
// }
// break;
// case 3:
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
// msd = e.getKey().getK5();
// }
// break;
// }
// i++;
// }
// }
String msd = e.getKey().getMsd(filter);
String [] charArray = msd.split("(?!^)");
@ -372,67 +282,6 @@ public class Export {
return s;
// public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
// //Delimiter used in CSV file
// String NEW_LINE_SEPARATOR = "\n";
// //CSV file header
// Object[] FILE_HEADER = {"word", "frequency", "percent"};
// String fileName = "";
// fileName = title.replace(": ", "-");
// fileName = fileName.replace(" ", "_").concat(".csv");
// fileName = resultsPath.toString().concat(File.separator).concat(fileName);
// OutputStreamWriter fileWriter = null;
// CSVPrinter csvFilePrinter = null;
// //Create the CSVFormat object with "\n" as a record delimiter
// CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
// try {
// //initialize FileWriter object
// fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
// //initialize CSVPrinter object
// csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
// // write info block
// printHeaderInfo(csvFilePrinter, headerInfoBlock);
// //Create CSV file header
// csvFilePrinter.printRecord(FILE_HEADER);
// for (Object[] resultEntry : result) {
// List dataEntry = new ArrayList<>();
// dataEntry.add(resultEntry[0]);
// dataEntry.add(resultEntry[1]);
// dataEntry.add(formatNumberAsPercent(resultEntry[2]), statistics.getCorpus().getPunctuation());
// csvFilePrinter.printRecord(dataEntry);
// }
// } catch (Exception e) {
// System.out.println("Error in CsvFileWriter!");
// e.printStackTrace();
// } finally {
// try {
// if (fileWriter != null) {
// fileWriter.flush();
// fileWriter.close();
// }
// if (csvFilePrinter != null) {
// csvFilePrinter.close();
// }
// } catch (IOException e) {
// System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
// e.printStackTrace();
// }
// }
// return fileName;
// }
public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
//Delimiter used in CSV file
@ -1,31 +0,0 @@
package util;
public class Key /*implements Comparable<Key> */ {
// private final String value;
// Key(String value) {
// this.value = value;
// }
// @Override
// public int compareTo(Key o) {
// return, o.value);
// }
// @Override
// public boolean equals(Object o) {
// if (this.equals(o)) {
// return true;
// }
// if (o == null || getClass() != o.getClass()) {
// return false;
// }
// Key key = (Key) o;
// return Objects.equals(value, key.value);
// }
// @Override
// public int hashCode() {
// return 0;
// }
@ -57,9 +57,6 @@ public class Tasks {
StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb);
// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f, useDb);
Collection<File> corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles();
final javafx.concurrent.Task<Void> task = new javafx.concurrent.Task<Void>() {
@ -97,10 +94,6 @@ public class Tasks {
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
} else {
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
@ -112,10 +105,6 @@ public class Tasks {
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
// System.out.println(remainingSeconds);
previousTime = new Date();
xml_processing.isCancelled = isCancelled();
@ -138,7 +127,6 @@ public class Tasks {
// add remaining minRelFre results
if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) {
// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue();
double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
@ -151,8 +139,6 @@ public class Tasks {
for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){
statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
// System.out.println("asd");
return null;
@ -174,7 +160,6 @@ public class Tasks {
logger.error("Error while executing", e);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
@ -184,7 +169,6 @@ public class Tasks {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
@ -215,19 +199,6 @@ public class Tasks {
// int i = corpusFiles.size();
// Date startTime = new Date();
// Date previousTime = new Date();
// int remainingSeconds = -1;
// int corpusSize;
// if (statistic.getFilter().getCollocability().size() > 0) {
// corpusSize = corpusFiles.size() * 2;
// } else {
// corpusSize = corpusFiles.size();
// }
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
@ -264,13 +235,6 @@ public class Tasks {
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
// previousTime = new Date();
// }
// this.updateProgress(i, corpusSize);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
} else {
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
@ -282,10 +246,6 @@ public class Tasks {
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
// System.out.println(remainingSeconds);
previousTime = new Date();
xml_processing.isCancelled = isCancelled();
@ -304,24 +264,9 @@ public class Tasks {
// readXML(f.toString(), statistic);
// i++;
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// if (statistic.getFilter().getCollocability().size() > 0) {
// this.updateProgress(i, corpusFiles.size() * 2);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
// } else {
// this.updateProgress(i, corpusFiles.size());
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
// }
//// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
// if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing
if (statistic.getFilter().getMinimalRelFre() > 1){
// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue();
double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
@ -356,7 +301,6 @@ public class Tasks {
} else {
try {
// System.out.print(statistics);
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
@ -371,7 +315,6 @@ public class Tasks {
logger.error("Out of memory error", e1);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
@ -385,7 +328,6 @@ public class Tasks {
logger.error("Error while executing", e);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
@ -395,7 +337,6 @@ public class Tasks {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
@ -421,17 +362,9 @@ public class Tasks {
// int i = corpusFiles.size();
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
// int corpusSize;
// if (statistic.getFilter().getCollocability().size() > 0) {
// corpusSize = corpusFiles.size() * 2;
// } else {
// corpusSize = corpusFiles.size();
// }
int corpusSize;
int i;
@ -461,10 +394,6 @@ public class Tasks {
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
} else {
xml_processing.progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
@ -476,10 +405,6 @@ public class Tasks {
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)));
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
// System.out.println(remainingSeconds);
previousTime = new Date();
xml_processing.isCancelled = isCancelled();
@ -497,14 +422,6 @@ public class Tasks {
// readXML(f.toString(), statisticsOneGrams);
// i++;
// this.updateProgress(i, corpusFiles.size() * 2);
// if (statistic.getFilter().getCollocability().size() > 0) {
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
// } else {
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
// }
return null;
@ -517,7 +434,6 @@ public class Tasks {
task.setOnSucceeded(e -> {
try {
// calculate_collocabilities(statistic, statisticsOneGrams);
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
@ -532,21 +448,6 @@ public class Tasks {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
logger.error("Out of memory error", e1);
// try {
// boolean successullySaved = statistic.saveResultToDisk();
// if (successullySaved) {
// } else {
// }
// } catch (UnsupportedEncodingException e1) {
// logger.error("Error while saving", e1);
// } catch (OutOfMemoryError e1){
// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
// logger.error("Out of memory error", e1);
// }
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
@ -559,7 +460,6 @@ public class Tasks {
logger.error("Error while executing", e);
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
@ -569,7 +469,6 @@ public class Tasks {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
@ -578,7 +477,6 @@ public class Tasks {
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
//"cancel button");
return task;
@ -49,15 +49,4 @@ public class TimeWatch {
return "Elapsed Time in nano seconds: ";
private void exampleUsage() {
TimeWatch watch = TimeWatch.start();
// do something...
System.out.println("Elapsed Time custom format: " + watch.toMinuteSeconds());
System.out.println("Elapsed Time in seconds: " + watch.time(TimeUnit.SECONDS));
System.out.println("Elapsed Time in nano seconds: " + watch.time());
@ -20,22 +20,6 @@ import gui.ValidationUtil;
public class Util {
public final static Logger logger = LogManager.getLogger(Util.class);
public static String toReadableTime(long time) {
long hours = time(TimeUnit.HOURS, time);
long minutes = time(TimeUnit.MINUTES, time) - TimeUnit.HOURS.toMinutes(hours);
long seconds = time(TimeUnit.SECONDS, time) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
long milliseconds = time(TimeUnit.MILLISECONDS, time) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
long microseconds = time(TimeUnit.MICROSECONDS, time) - TimeUnit.HOURS.toMicros(hours) - TimeUnit.MINUTES.toMicros(minutes) - TimeUnit.SECONDS.toMicros(seconds) - TimeUnit.MILLISECONDS.toMicros(milliseconds);
long nanoseconds = time(TimeUnit.NANOSECONDS, time) - TimeUnit.HOURS.toNanos(hours) - TimeUnit.MINUTES.toNanos(minutes) - TimeUnit.SECONDS.toNanos(seconds) - TimeUnit.MILLISECONDS.toNanos(milliseconds) - TimeUnit.MICROSECONDS.toNanos(microseconds);
return String.format("%d h, %d min, %d s, %d ms, %d µs, %d ns", hours, minutes, seconds, milliseconds, microseconds, nanoseconds);
private static long time(TimeUnit unit, long t) {
return unit.convert(t, TimeUnit.NANOSECONDS);
* Converts a number to a more readable format.
* 12345 -> 12.345
@ -97,12 +81,6 @@ public class Util {
return types.contains(o.getClass());
public static <K, V> void printMap(Map<K, V> map) {
System.out.println("\nkey: value");
map.forEach((k, v) -> System.out.print(String.format("%s:\t %,8d%n", k, v)));
* Generic map converter -> since AtomicLongs aren't as comparable.
* Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
@ -117,23 +95,6 @@ public class Util {
return m;
public class ValueThenKeyComparator<K extends Comparable<? super K>,
V extends Comparable<? super V>>
implements Comparator<Map.Entry<K, V>> {
public int compare(Map.Entry<K, V> a, Map.Entry<K, V> b) {
int cmp1 = a.getValue().compareTo(b.getValue());
if (cmp1 != 0) {
return cmp1;
} else {
return a.getKey().compareTo(b.getKey());
* Sorts a map in a descending order by value.
@ -183,25 +144,6 @@ public class Util {
return result;
public static <K, V> void printMap(Map<K, Integer> map, String title, int number_of_words) {
System.out.println(String.format("\n%s\n------------\nkey: value\tpercent", title));
map.forEach((k, v) ->
System.out.println(String.format("%s:\t %s\t %s%%",
Util.formatNumberReadable((double) v / number_of_words * 100))));
static long mapSumFrequencies(Map<MultipleHMKeys, Long> map) {
long sum = 0;
for (long value : map.values()) {
sum += value;
return sum;
* Used for passing optional integer values for sorting.
@ -84,16 +84,6 @@ public class RDB {
// public byte[] atomicIntToByteArray(final AtomicLong i) {
// BigInteger bigInt = BigInteger.valueOf(i.intValue());
// return bigInt.toByteArray();
// }
public RocksDB getDb() {
return db;
public Map<String, AtomicLong> getDump() throws UnsupportedEncodingException {
Map<String, AtomicLong> dump = new HashMap<>();
@ -34,17 +34,6 @@
<ImageView fx:id="displayTaxonomyI" layoutX="370.0" layoutY="107.5" pickOnBounds="true" preserveRatio="true">
<Image url="questionmark.png" backgroundLoading="true"/>
<!--<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" />-->
<!--<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD" />-->
<!--<TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0" />-->
<!--<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija" />-->
<!--<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0" />-->
<!--<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" />-->
<!--<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />-->
<!--<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" />-->
<!--<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />-->
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0">
@ -16,7 +16,6 @@
<!--<TextField fx:id="stringLengthTF" layoutX="225.0" layoutY="20.0" prefWidth="140.0" />-->
<Label fx:id="chooseCorpusLocationL" layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Nastavi lokacijo korpusa" />
<Button fx:id="chooseCorpusLocationB" layoutX="225.0" layoutY="20.0" prefWidth="140.0" mnemonicParsing="false"/>
<ImageView fx:id="chooseCorpusLocationI" layoutX="370.0" layoutY="27.5" pickOnBounds="true" preserveRatio="true">
@ -26,7 +25,6 @@
<Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0">
<Label fx:id="chooseCorpusL" prefHeight="70.0" prefWidth="704.0" text="Label"/>
<!--<CheckBox fx:id="gosUseOrthChB" layoutY="65.0" mnemonicParsing="false" text="Uporabi pogovorni zapis"/>-->
<ProgressIndicator fx:id="locationScanPI" layoutX="10.0" layoutY="60.0" prefHeight="50.0" progress="0.0"/>
@ -7,7 +7,6 @@
<?import org.controlsfx.control.CheckComboBox?>
<?import javafx.scene.control.Button?>
<?import javafx.scene.control.TextField?>
<?import javafx.scene.control.TextArea?>
<AnchorPane fx:id="solarFiltersTabPane" prefHeight="600.0" prefWidth="800.0" xmlns=""
xmlns:fx="" fx:controller="gui.FiltersForSolar">
@ -31,7 +30,6 @@
<!-- MSD and Taxonomy separated -->
<Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="100.0" prefHeight="340.0" maxHeight="200.0" prefWidth="275.0" text=" " wrapText="true" editable="false"/>
<!--<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />-->
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0"/>
@ -110,11 +110,6 @@
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="380.0" prefHeight="95.0" maxHeight="95.0" prefWidth="360.0" text=" " wrapText="true" editable="false"/>
<!--<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">-->
<!--<Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />-->
<!--<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />-->
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
<Button fx:id="changeLanguageB" layoutX="710.0" layoutY="40.0" mnemonicParsing="false" prefWidth="50.0"/>
@ -13,7 +13,6 @@
<?import javafx.scene.layout.Pane?>
<?import org.controlsfx.control.CheckComboBox?>
<?import javafx.scene.control.Separator?>
<?import javafx.scene.control.TextArea?>
<?import javafx.scene.image.ImageView?>
<?import javafx.scene.image.Image?>
@ -1,32 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<?import org.controlsfx.control.CheckComboBox?>
<?import javafx.scene.control.*?>
<?import javafx.scene.layout.AnchorPane?>
<?import javafx.scene.layout.Pane?>
<AnchorPane fx:id="wordAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns=""
xmlns:fx="" fx:controller="gui.WordFormationTab">
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. pojavitev" />
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Min. št. taksonomij" />
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0" />
<Button fx:id="computeB" layoutX="10.0" layoutY="422.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
text=" " wrapText="true"/>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
@ -23,9 +23,9 @@ public class CorpusTests {
File f = Settings.corpus.iterator().next();
Statistics stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, 0, CalculateFor.WORD);
// stats.setCorpusType(CorpusType.GOS);
// Statistics stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, 0, CalculateFor.WORD);
// // stats.setCorpusType(CorpusType.GOS);
// stats.setCorpusType(CorpusType.SOLAR);
// XML_processing.readXMLGos(f.toString(), stats);
// XML_processing.readXML(f.toString(), stats);
@ -33,10 +33,10 @@ public class CorpusTests {
public void test() {
ObservableList<String> var = GosTaxonomy.getForComboBox();
String debug = "";
// @Test
// public void test() {
// ObservableList<String> var = GosTaxonomy.getForComboBox();
// String debug = "";
// }
Reference in New Issue
Block a user