BIG REFACTOR - erasing unused code
This commit is contained in:
parent
10666b4453
commit
2c028cd334
|
@ -1,15 +0,0 @@
|
||||||
package alg;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
|
|
||||||
public class Common {
|
|
||||||
public static <K, V> void updateMap(Map<K, AtomicLong> map, K o) {
|
|
||||||
// if not in map
|
|
||||||
AtomicLong r = map.putIfAbsent(o, new AtomicLong(1));
|
|
||||||
|
|
||||||
// else
|
|
||||||
if (r != null)
|
|
||||||
map.get(o).incrementAndGet();
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -19,7 +19,6 @@ import gui.I18N;
|
||||||
import javafx.beans.InvalidationListener;
|
import javafx.beans.InvalidationListener;
|
||||||
import javafx.beans.property.ReadOnlyDoubleProperty;
|
import javafx.beans.property.ReadOnlyDoubleProperty;
|
||||||
import javafx.beans.property.ReadOnlyDoubleWrapper;
|
import javafx.beans.property.ReadOnlyDoubleWrapper;
|
||||||
import javafx.concurrent.Task;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.commons.io.LineIterator;
|
import org.apache.commons.io.LineIterator;
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
|
@ -38,35 +37,10 @@ public class XML_processing {
|
||||||
public static boolean isCollocability = false;
|
public static boolean isCollocability = false;
|
||||||
public static InvalidationListener progressBarListener;
|
public static InvalidationListener progressBarListener;
|
||||||
|
|
||||||
public double getProgress() {
|
|
||||||
return progressProperty().get();
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReadOnlyDoubleProperty progressProperty() {
|
public ReadOnlyDoubleProperty progressProperty() {
|
||||||
return progress ;
|
return progress ;
|
||||||
}
|
}
|
||||||
|
|
||||||
// public static void processCorpus(Statistics stats) {
|
|
||||||
// // we can preset the list's size, so there won't be a need to resize it
|
|
||||||
// List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT);
|
|
||||||
//
|
|
||||||
// int i = 0;
|
|
||||||
// for (File f : Settings.corpus) {
|
|
||||||
// i++;
|
|
||||||
// readXML(f.toString(), stats);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// public static void readXML(String path, Statistics stats) {
|
|
||||||
// if (stats.getCorpusType() == CorpusType.GIGAFIDA) {
|
|
||||||
// readXMLGigafida(path, stats);
|
|
||||||
// } else if (stats.getCorpusType() == CorpusType.GOS) {
|
|
||||||
// readXMLGos(path, stats);
|
|
||||||
// } else if (stats.getCorpusType() == CorpusType.SOLAR) {
|
|
||||||
// readXMLSolar(path, stats);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
public static boolean readXML(String path, StatisticsNew stats) {
|
public static boolean readXML(String path, StatisticsNew stats) {
|
||||||
if (stats.getCorpus().getCorpusType() == CorpusType.GIGAFIDA
|
if (stats.getCorpus().getCorpusType() == CorpusType.GIGAFIDA
|
||||||
|| stats.getCorpus().getCorpusType() == CorpusType.CCKRES) {
|
|| stats.getCorpus().getCorpusType() == CorpusType.CCKRES) {
|
||||||
|
@ -81,7 +55,6 @@ public class XML_processing {
|
||||||
} else if (stats.getCorpus().getCorpusType() == CorpusType.VERT) {
|
} else if (stats.getCorpus().getCorpusType() == CorpusType.VERT) {
|
||||||
return readVERT(path, stats);
|
return readVERT(path, stats);
|
||||||
}
|
}
|
||||||
// task.updateProgress(fileNum, size);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -174,15 +147,10 @@ public class XML_processing {
|
||||||
} else if (stats.getFilter().getAl() == AnalysisLevel.WORD_LEVEL) {
|
} else if (stats.getFilter().getAl() == AnalysisLevel.WORD_LEVEL) {
|
||||||
alg.word.ForkJoin wc = new alg.word.ForkJoin(corpus, stats);
|
alg.word.ForkJoin wc = new alg.word.ForkJoin(corpus, stats);
|
||||||
pool.invoke(wc);
|
pool.invoke(wc);
|
||||||
} else {
|
|
||||||
// TODO:
|
|
||||||
// alg.inflectedJOS.ForkJoin wc = new alg.inflectedJOS.ForkJoin(corpus, stats);
|
|
||||||
// pool.invoke(wc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if running with minimalRelFre frequency erase all ngrams with occurrences lower than set value per 1M
|
// if running with minimalRelFre frequency erase all ngrams with occurrences lower than set value per 1M
|
||||||
if(stats.getFilter().getIsMinimalRelFreScraper()) {
|
if(stats.getFilter().getIsMinimalRelFreScraper()) {
|
||||||
// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
|
|
||||||
long countFor1MWords = stats.getUniGramOccurrences().get(stats.getCorpus().getTotal()).longValue();
|
long countFor1MWords = stats.getUniGramOccurrences().get(stats.getCorpus().getTotal()).longValue();
|
||||||
if(countFor1MWords > 1000000L){
|
if(countFor1MWords > 1000000L){
|
||||||
double absToRelFactor = (stats.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
double absToRelFactor = (stats.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
||||||
|
@ -197,125 +165,9 @@ public class XML_processing {
|
||||||
stats.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
|
stats.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// System.out.println("asd");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// public static void readXMLGos(String path, Statistics stats) {
|
|
||||||
// boolean in_word = false;
|
|
||||||
// String taksonomija = "";
|
|
||||||
// String lemma = "";
|
|
||||||
// String msd = "";
|
|
||||||
// String type = stats.isGosOrthMode() ? "orth" : "norm"; // orth & norm
|
|
||||||
//
|
|
||||||
// List<Word> stavek = new ArrayList<>();
|
|
||||||
// List<Sentence> corpus = new ArrayList<>();
|
|
||||||
// String sentenceDelimiter = "seg";
|
|
||||||
// String taxonomyPrefix = "gos.";
|
|
||||||
//
|
|
||||||
// try {
|
|
||||||
// XMLInputFactory factory = XMLInputFactory.newInstance();
|
|
||||||
// XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
|
|
||||||
//
|
|
||||||
// while (eventReader.hasNext()) {
|
|
||||||
// XMLEvent event = eventReader.nextEvent();
|
|
||||||
//
|
|
||||||
// switch (event.getEventType()) {
|
|
||||||
// case XMLStreamConstants.START_ELEMENT:
|
|
||||||
//
|
|
||||||
// StartElement startElement = event.asStartElement();
|
|
||||||
// String qName = startElement.getName().getLocalPart();
|
|
||||||
//
|
|
||||||
// // "word" node
|
|
||||||
// if (qName.equals("w")) {
|
|
||||||
// in_word = true;
|
|
||||||
//
|
|
||||||
// if (type.equals("norm")) {
|
|
||||||
// // make sure we're looking at <w lemma...> and not <w type...>
|
|
||||||
// Iterator var = startElement.getAttributes();
|
|
||||||
// ArrayList<Object> attributes = new ArrayList<>();
|
|
||||||
// while (var.hasNext()) {
|
|
||||||
// attributes.add(var.next());
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if (attributes.contains("msd")) {
|
|
||||||
// msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
|
|
||||||
// } else {
|
|
||||||
// msd = null;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if (attributes.contains("lemma")) {
|
|
||||||
// lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// // taxonomy node
|
|
||||||
// else if (qName.equalsIgnoreCase("catRef")) {
|
|
||||||
// // there are some term nodes at the beginning that are of no interest to us
|
|
||||||
// // they differ by not having the attribute "ref", so test will equal null
|
|
||||||
// Attribute test = startElement.getAttributeByName(QName.valueOf("target"));
|
|
||||||
//
|
|
||||||
// if (test != null) {
|
|
||||||
// // keep only taxonomy properties
|
|
||||||
// taksonomija = String.valueOf(test.getValue()).replace(taxonomyPrefix, "");
|
|
||||||
// }
|
|
||||||
// } else if (qName.equalsIgnoreCase("div")) {
|
|
||||||
// type = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
// break;
|
|
||||||
//
|
|
||||||
// case XMLStreamConstants.CHARACTERS:
|
|
||||||
// Characters characters = event.asCharacters();
|
|
||||||
//
|
|
||||||
// // "word" node value
|
|
||||||
// if (in_word) {
|
|
||||||
// if (type.equals("norm") && msd != null) {
|
|
||||||
// stavek.add(new Word(characters.getData(), lemma, msd));
|
|
||||||
// } else {
|
|
||||||
// stavek.add(new Word(characters.getData()));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// in_word = false;
|
|
||||||
// }
|
|
||||||
// break;
|
|
||||||
//
|
|
||||||
// case XMLStreamConstants.END_ELEMENT:
|
|
||||||
// EndElement endElement = event.asEndElement();
|
|
||||||
//
|
|
||||||
// // parser reached end of the current sentence
|
|
||||||
// if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
|
|
||||||
// // add sentence to corpus
|
|
||||||
// corpus.add(new Sentence(stavek, taksonomija, type));
|
|
||||||
// // and start a new one
|
|
||||||
// stavek = new ArrayList<>();
|
|
||||||
//
|
|
||||||
// /* Invoke Fork-Join when we reach maximum limit of
|
|
||||||
// * sentences (because we can't read everything to
|
|
||||||
// * memory) or we reach the end of the file.
|
|
||||||
// */
|
|
||||||
// if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
|
|
||||||
// fj(corpus, stats);
|
|
||||||
// // empty the current corpus, since we don't need
|
|
||||||
// // the data anymore
|
|
||||||
// corpus.clear();
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // backup
|
|
||||||
// if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
|
|
||||||
// fj(corpus, stats);
|
|
||||||
// corpus.clear();
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// } catch (FileNotFoundException | XMLStreamException e) {
|
|
||||||
// e.printStackTrace();
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
public static boolean readXMLSolar(String path, StatisticsNew stats) {
|
public static boolean readXMLSolar(String path, StatisticsNew stats) {
|
||||||
boolean in_word = false;
|
boolean in_word = false;
|
||||||
|
@ -327,7 +179,6 @@ public class XML_processing {
|
||||||
List<Sentence> corpus = new ArrayList<>();
|
List<Sentence> corpus = new ArrayList<>();
|
||||||
|
|
||||||
// used for filter
|
// used for filter
|
||||||
// Set<String> headTags = new HashSet<>(Arrays.asList("sola", "predmet", "razred", "regija", "tip", "leto"));
|
|
||||||
Set<String> headTags = new HashSet<>(Arrays.asList(SOLA, PREDMET, RAZRED, REGIJA, TIP, LETO));
|
Set<String> headTags = new HashSet<>(Arrays.asList(SOLA, PREDMET, RAZRED, REGIJA, TIP, LETO));
|
||||||
Map<String, String> headBlock = null;
|
Map<String, String> headBlock = null;
|
||||||
boolean includeThisBlock = false;
|
boolean includeThisBlock = false;
|
||||||
|
@ -372,9 +223,7 @@ public class XML_processing {
|
||||||
|
|
||||||
switch (event.getEventType()) {
|
switch (event.getEventType()) {
|
||||||
case XMLStreamConstants.START_ELEMENT:
|
case XMLStreamConstants.START_ELEMENT:
|
||||||
|
|
||||||
StartElement startElement = event.asStartElement();
|
StartElement startElement = event.asStartElement();
|
||||||
// System.out.println(String.format("%s", startElement.toString()));
|
|
||||||
String qName = startElement.getName().getLocalPart();
|
String qName = startElement.getName().getLocalPart();
|
||||||
|
|
||||||
// "word" node
|
// "word" node
|
||||||
|
@ -423,7 +272,7 @@ public class XML_processing {
|
||||||
stavek = new ArrayList<>();
|
stavek = new ArrayList<>();
|
||||||
} else if (qName.equals("head")) {
|
} else if (qName.equals("head")) {
|
||||||
headBlock = new HashMap<>();
|
headBlock = new HashMap<>();
|
||||||
} else { // if (headTags.contains(qName)) {
|
} else {
|
||||||
boolean inHeadTags = false;
|
boolean inHeadTags = false;
|
||||||
String headTag = "";
|
String headTag = "";
|
||||||
for (String tag : headTags){
|
for (String tag : headTags){
|
||||||
|
@ -436,8 +285,6 @@ public class XML_processing {
|
||||||
if(inHeadTags) {
|
if(inHeadTags) {
|
||||||
String tagContent = eventReader.nextEvent().asCharacters().getData();
|
String tagContent = eventReader.nextEvent().asCharacters().getData();
|
||||||
headBlock.put(headTag, tagContent);
|
headBlock.put(headTag, tagContent);
|
||||||
// String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
|
|
||||||
// resultFilters.get(headTag).add(tagContent);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -562,22 +409,16 @@ public class XML_processing {
|
||||||
if (line.length() > 4 && line.substring(1, 5).equals("text")) {
|
if (line.length() > 4 && line.substring(1, 5).equals("text")) {
|
||||||
// split over "\" "
|
// split over "\" "
|
||||||
String[] split = line.split("\" ");
|
String[] split = line.split("\" ");
|
||||||
// String mediumId = "";
|
|
||||||
// String typeId = "";
|
|
||||||
// String proofreadId = "";
|
|
||||||
boolean idsPresent = false;
|
boolean idsPresent = false;
|
||||||
for (String el : split) {
|
for (String el : split) {
|
||||||
String[] attribute = el.split("=\"");
|
String[] attribute = el.split("=\"");
|
||||||
if (attribute[0].equals("medium_id")) {
|
if (attribute[0].equals("medium_id")) {
|
||||||
// mediumId = attribute[1];
|
|
||||||
idsPresent = true;
|
idsPresent = true;
|
||||||
resultTaxonomy.add(attribute[1]);
|
resultTaxonomy.add(attribute[1]);
|
||||||
} else if (attribute[0].equals("type_id")) {
|
} else if (attribute[0].equals("type_id")) {
|
||||||
// typeId = attribute[1];
|
|
||||||
idsPresent = true;
|
idsPresent = true;
|
||||||
resultTaxonomy.add(attribute[1]);
|
resultTaxonomy.add(attribute[1]);
|
||||||
} else if (attribute[0].equals("proofread_id")) {
|
} else if (attribute[0].equals("proofread_id")) {
|
||||||
// proofreadId = attribute[1];
|
|
||||||
idsPresent = true;
|
idsPresent = true;
|
||||||
resultTaxonomy.add(attribute[1]);
|
resultTaxonomy.add(attribute[1]);
|
||||||
}
|
}
|
||||||
|
@ -586,13 +427,10 @@ public class XML_processing {
|
||||||
for (String el : split) {
|
for (String el : split) {
|
||||||
String[] attribute = el.split("=\"");
|
String[] attribute = el.split("=\"");
|
||||||
if (attribute[0].equals("medium")) {
|
if (attribute[0].equals("medium")) {
|
||||||
// mediumId = attribute[1];
|
|
||||||
resultTaxonomy.add(attribute[1]);
|
resultTaxonomy.add(attribute[1]);
|
||||||
} else if (attribute[0].equals("type")) {
|
} else if (attribute[0].equals("type")) {
|
||||||
// typeId = attribute[1];
|
|
||||||
resultTaxonomy.add(attribute[1]);
|
resultTaxonomy.add(attribute[1]);
|
||||||
} else if (attribute[0].equals("proofread")) {
|
} else if (attribute[0].equals("proofread")) {
|
||||||
// proofreadId = attribute[1];
|
|
||||||
resultTaxonomy.add(attribute[1]);
|
resultTaxonomy.add(attribute[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -679,7 +517,6 @@ public class XML_processing {
|
||||||
|
|
||||||
resultTaxonomy.add(tax);
|
resultTaxonomy.add(tax);
|
||||||
// solar
|
// solar
|
||||||
// } else if (!parseTaxonomy && headTags.contains(elementName)) {
|
|
||||||
} else if (!parseTaxonomy) {
|
} else if (!parseTaxonomy) {
|
||||||
boolean inHeadTags = false;
|
boolean inHeadTags = false;
|
||||||
String headTag = "";
|
String headTag = "";
|
||||||
|
@ -737,7 +574,6 @@ public class XML_processing {
|
||||||
boolean inPunctuation = false;
|
boolean inPunctuation = false;
|
||||||
boolean taxonomyMatch = true;
|
boolean taxonomyMatch = true;
|
||||||
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
||||||
// ArrayList<Taxonomy> currentFiletaxonomyLong = new ArrayList<>();
|
|
||||||
String lemma = "";
|
String lemma = "";
|
||||||
String msd = "";
|
String msd = "";
|
||||||
|
|
||||||
|
@ -780,8 +616,6 @@ public class XML_processing {
|
||||||
// keep only taxonomy properties
|
// keep only taxonomy properties
|
||||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""), stats.getCorpus());
|
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""), stats.getCorpus());
|
||||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||||
Tax taxonomy = new Tax();
|
|
||||||
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -795,40 +629,13 @@ public class XML_processing {
|
||||||
sentence.add(createWord(word, lemma, msd, word, stats.getFilter()));
|
sentence.add(createWord(word, lemma, msd, word, stats.getFilter()));
|
||||||
inWord = false;
|
inWord = false;
|
||||||
}
|
}
|
||||||
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
|
||||||
if (stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
if (stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
||||||
String punctuation = characters.getData();
|
String punctuation = characters.getData();
|
||||||
sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter()));
|
sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter()));
|
||||||
inPunctuation = false;
|
inPunctuation = false;
|
||||||
|
|
||||||
// String punctuation = ",";
|
|
||||||
//
|
|
||||||
// sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
|
|
||||||
// sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
|
|
||||||
// sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
|
|
||||||
// inPunctuation = false;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
|
||||||
// String actualPunctuation = characters.getData();
|
|
||||||
// if (actualPunctuation.equals(".") || actualPunctuation.equals("!") || actualPunctuation.equals("?") || actualPunctuation.equals("..."))
|
|
||||||
// break;
|
|
||||||
// String punctuation = ",";
|
|
||||||
// int skip_number = 0;
|
|
||||||
// if (!ValidationUtil.isEmpty(stats.getFilter().getSkipValue())){
|
|
||||||
// skip_number = stats.getFilter().getSkipValue();
|
|
||||||
// }
|
|
||||||
// for(int i = 1; i < skip_number + 2; i ++){
|
|
||||||
// if (i < sentence.size() && !sentence.get(sentence.size() - i).equals(punctuation)) {
|
|
||||||
// sentence.get(sentence.size() - i).setWord(sentence.get(sentence.size() - i).getWord() + punctuation);
|
|
||||||
// sentence.get(sentence.size() - i).setLemma(sentence.get(sentence.size() - i).getLemma() + punctuation);
|
|
||||||
// sentence.get(sentence.size() - i).setMsd(sentence.get(sentence.size() - i).getMsd() + punctuation);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// inPunctuation = false;
|
|
||||||
// }
|
|
||||||
|
|
||||||
case XMLStreamConstants.END_ELEMENT:
|
case XMLStreamConstants.END_ELEMENT:
|
||||||
EndElement endElement = event.asEndElement();
|
EndElement endElement = event.asEndElement();
|
||||||
|
|
||||||
|
@ -869,10 +676,6 @@ public class XML_processing {
|
||||||
fj(corpus, stats);
|
fj(corpus, stats);
|
||||||
// empty the current corpus, since we don't need the data anymore
|
// empty the current corpus, since we don't need the data anymore
|
||||||
corpus.clear();
|
corpus.clear();
|
||||||
|
|
||||||
// TODO: if (stats.isUseDB()) {
|
|
||||||
// stats.storeTmpResultsToDB();
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
|
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
|
||||||
// before proceeding to read this file, make sure that taxonomy filters are a match
|
// before proceeding to read this file, make sure that taxonomy filters are a match
|
||||||
|
@ -883,7 +686,6 @@ public class XML_processing {
|
||||||
if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) {
|
if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) {
|
||||||
// taxonomies don't match so stop
|
// taxonomies don't match so stop
|
||||||
// union (select words that match any of selected taxonomy
|
// union (select words that match any of selected taxonomy
|
||||||
// return false;
|
|
||||||
taxonomyMatch = false;
|
taxonomyMatch = false;
|
||||||
//
|
//
|
||||||
} else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){
|
} else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){
|
||||||
|
@ -898,10 +700,6 @@ public class XML_processing {
|
||||||
// join corpus and stats
|
// join corpus and stats
|
||||||
fj(corpus, stats);
|
fj(corpus, stats);
|
||||||
corpus.clear();
|
corpus.clear();
|
||||||
|
|
||||||
// TODO: if (stats.isUseDB()) {
|
|
||||||
// stats.storeTmpResultsToDB();
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -909,7 +707,6 @@ public class XML_processing {
|
||||||
}
|
}
|
||||||
} catch (FileNotFoundException | XMLStreamException e) {
|
} catch (FileNotFoundException | XMLStreamException e) {
|
||||||
throw new java.lang.RuntimeException("XMLStreamException | FileNotFoundException");
|
throw new java.lang.RuntimeException("XMLStreamException | FileNotFoundException");
|
||||||
// e.printStackTrace();
|
|
||||||
} finally {
|
} finally {
|
||||||
if (eventReader != null) {
|
if (eventReader != null) {
|
||||||
try {
|
try {
|
||||||
|
@ -929,7 +726,6 @@ public class XML_processing {
|
||||||
boolean inPunctuation = false;
|
boolean inPunctuation = false;
|
||||||
boolean taxonomyMatch = true;
|
boolean taxonomyMatch = true;
|
||||||
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
||||||
// ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
|
||||||
String lemma = "";
|
String lemma = "";
|
||||||
String msd = "";
|
String msd = "";
|
||||||
|
|
||||||
|
@ -1006,8 +802,6 @@ public class XML_processing {
|
||||||
// keep only taxonomy properties
|
// keep only taxonomy properties
|
||||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""), stats.getCorpus());
|
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""), stats.getCorpus());
|
||||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||||
// Tax taxonomy = new Tax();
|
|
||||||
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
|
||||||
}
|
}
|
||||||
} else if (stats.getCorpus().getTaxonomy().size() > 0 && qName.equalsIgnoreCase("catRef")) {
|
} else if (stats.getCorpus().getTaxonomy().size() > 0 && qName.equalsIgnoreCase("catRef")) {
|
||||||
// get value from attribute target
|
// get value from attribute target
|
||||||
|
@ -1017,41 +811,7 @@ public class XML_processing {
|
||||||
// keep only taxonomy properties
|
// keep only taxonomy properties
|
||||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).split(":")[1], stats.getCorpus());
|
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).split(":")[1], stats.getCorpus());
|
||||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||||
// Tax taxonomy = new Tax();
|
|
||||||
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// if (parseTaxonomy && elementName.equalsIgnoreCase("catRef")) {
|
|
||||||
// HashMap<String, String> atts = extractAttributes(startElement);
|
|
||||||
// String debug = "";
|
|
||||||
//
|
|
||||||
// String tax = startElement.getAttributeByName(QName.valueOf("target"))
|
|
||||||
// .getValue()
|
|
||||||
// .replace("#", "");
|
|
||||||
//
|
|
||||||
// if (tax.indexOf(':') >= 0) {
|
|
||||||
// tax = tax.split(":")[1];
|
|
||||||
// }
|
|
||||||
// resultTaxonomy.add(tax);
|
|
||||||
// } else if (parseTaxonomy && elementName.equalsIgnoreCase("term")) {
|
|
||||||
// String tax = startElement.getAttributeByName(QName.valueOf("ref"))
|
|
||||||
// .getValue()
|
|
||||||
// .replace("#", "");
|
|
||||||
//
|
|
||||||
// resultTaxonomy.add(tax);
|
|
||||||
// } else if (!parseTaxonomy && headTags.contains(elementName)) {
|
|
||||||
// String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
|
|
||||||
// resultFilters.get(elementName).add(tagContent);
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} else if (qName.equals("bibl")) {
|
} else if (qName.equals("bibl")) {
|
||||||
// before proceeding to read this file, make sure that taxonomy filters are a match
|
// before proceeding to read this file, make sure that taxonomy filters are a match
|
||||||
taxonomyMatch = true;
|
taxonomyMatch = true;
|
||||||
|
@ -1068,14 +828,10 @@ public class XML_processing {
|
||||||
// "word" node value
|
// "word" node value
|
||||||
if (inWord) {
|
if (inWord) {
|
||||||
String word = characters.getData();
|
String word = characters.getData();
|
||||||
// if (word.equals("Banovec")){
|
|
||||||
// System.out.println("Test");
|
|
||||||
// }
|
|
||||||
sentence.add(createWord(word, lemma, msd, word, stats.getFilter()));
|
sentence.add(createWord(word, lemma, msd, word, stats.getFilter()));
|
||||||
inWord = false;
|
inWord = false;
|
||||||
}
|
}
|
||||||
if (stats.getFilter().getNotePunctuations() && inPunctuation) {
|
if (stats.getFilter().getNotePunctuations() && inPunctuation) {
|
||||||
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
|
||||||
String punctuation = characters.getData();
|
String punctuation = characters.getData();
|
||||||
sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter()));
|
sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter()));
|
||||||
inPunctuation = false;
|
inPunctuation = false;
|
||||||
|
@ -1085,9 +841,6 @@ public class XML_processing {
|
||||||
case XMLStreamConstants.END_ELEMENT:
|
case XMLStreamConstants.END_ELEMENT:
|
||||||
EndElement endElement = event.asEndElement();
|
EndElement endElement = event.asEndElement();
|
||||||
|
|
||||||
String var = endElement.getName().getLocalPart();
|
|
||||||
String debug = "";
|
|
||||||
|
|
||||||
// parser reached end of the current sentence
|
// parser reached end of the current sentence
|
||||||
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
|
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
|
||||||
if (stats.getFilter().getNgramValue() == 0){
|
if (stats.getFilter().getNgramValue() == 0){
|
||||||
|
@ -1119,10 +872,6 @@ public class XML_processing {
|
||||||
fj(corpus, stats);
|
fj(corpus, stats);
|
||||||
// empty the current corpus, since we don't need the data anymore
|
// empty the current corpus, since we don't need the data anymore
|
||||||
corpus.clear();
|
corpus.clear();
|
||||||
|
|
||||||
// TODO: if (stats.isUseDB()) {
|
|
||||||
// stats.storeTmpResultsToDB();
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// fallback
|
// fallback
|
||||||
|
@ -1133,7 +882,6 @@ public class XML_processing {
|
||||||
corpus.clear();
|
corpus.clear();
|
||||||
|
|
||||||
currentFiletaxonomy = new ArrayList<>();
|
currentFiletaxonomy = new ArrayList<>();
|
||||||
// currentFiletaxonomyLong = new ArrayList<>();
|
|
||||||
} else if (endElement.getName().getLocalPart().equals("bibl")) {
|
} else if (endElement.getName().getLocalPart().equals("bibl")) {
|
||||||
// before proceeding to read this file, make sure that taxonomy filters are a match
|
// before proceeding to read this file, make sure that taxonomy filters are a match
|
||||||
|
|
||||||
|
@ -1143,7 +891,6 @@ public class XML_processing {
|
||||||
if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) {
|
if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) {
|
||||||
// taxonomies don't match so stop
|
// taxonomies don't match so stop
|
||||||
// union (select words that match any of selected taxonomy
|
// union (select words that match any of selected taxonomy
|
||||||
// return false;
|
|
||||||
taxonomyMatch = false;
|
taxonomyMatch = false;
|
||||||
//
|
//
|
||||||
} else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){
|
} else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){
|
||||||
|
@ -1162,10 +909,6 @@ public class XML_processing {
|
||||||
fj(corpus, stats);
|
fj(corpus, stats);
|
||||||
// empty the current corpus, since we don't need the data anymore
|
// empty the current corpus, since we don't need the data anymore
|
||||||
corpus.clear();
|
corpus.clear();
|
||||||
|
|
||||||
// TODO: if (stats.isUseDB()) {
|
|
||||||
// stats.storeTmpResultsToDB();
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
} catch (FileNotFoundException | XMLStreamException e) {
|
} catch (FileNotFoundException | XMLStreamException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
|
@ -1185,12 +928,9 @@ public class XML_processing {
|
||||||
@SuppressWarnings("Duplicates")
|
@SuppressWarnings("Duplicates")
|
||||||
public static boolean readXMLGos(String path, StatisticsNew stats) {
|
public static boolean readXMLGos(String path, StatisticsNew stats) {
|
||||||
boolean inWord = false;
|
boolean inWord = false;
|
||||||
boolean inPunctuation = false;
|
|
||||||
boolean inOrthDiv = false;
|
boolean inOrthDiv = false;
|
||||||
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
|
|
||||||
boolean inSeparatedWord = false;
|
boolean inSeparatedWord = false;
|
||||||
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
||||||
// ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
|
||||||
String lemma = "";
|
String lemma = "";
|
||||||
String msd = "";
|
String msd = "";
|
||||||
|
|
||||||
|
@ -1201,10 +941,6 @@ public class XML_processing {
|
||||||
String sentenceDelimiter = "seg";
|
String sentenceDelimiter = "seg";
|
||||||
int wordIndex = 0;
|
int wordIndex = 0;
|
||||||
|
|
||||||
String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int numLines = 0;
|
int numLines = 0;
|
||||||
int lineNum = 0;
|
int lineNum = 0;
|
||||||
progress.set(0.0);
|
progress.set(0.0);
|
||||||
|
@ -1248,7 +984,6 @@ public class XML_processing {
|
||||||
}
|
}
|
||||||
lineNum ++;
|
lineNum ++;
|
||||||
XMLEvent event = eventReader.nextEvent();
|
XMLEvent event = eventReader.nextEvent();
|
||||||
// System.out.print(String.format("%s", event.toString().replaceAll("\\['http://www.tei-c.org/ns/1.0'\\]::", "")));
|
|
||||||
|
|
||||||
switch (event.getEventType()) {
|
switch (event.getEventType()) {
|
||||||
case XMLStreamConstants.START_ELEMENT:
|
case XMLStreamConstants.START_ELEMENT:
|
||||||
|
@ -1278,11 +1013,6 @@ public class XML_processing {
|
||||||
if (atts.containsKey("lemma")) {
|
if (atts.containsKey("lemma")) {
|
||||||
lemma = atts.get("lemma");
|
lemma = atts.get("lemma");
|
||||||
}
|
}
|
||||||
//
|
|
||||||
// if (!inOrthDiv) {
|
|
||||||
// msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
|
|
||||||
// lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
|
|
||||||
// }
|
|
||||||
} else if (atts.containsKey("type") && atts.get("type").equals("separated")) {
|
} else if (atts.containsKey("type") && atts.get("type").equals("separated")) {
|
||||||
inSeparatedWord = true;
|
inSeparatedWord = true;
|
||||||
}
|
}
|
||||||
|
@ -1299,11 +1029,7 @@ public class XML_processing {
|
||||||
// keep only taxonomy properties
|
// keep only taxonomy properties
|
||||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()), stats.getCorpus());
|
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()), stats.getCorpus());
|
||||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||||
// Tax taxonomy = new Tax();
|
|
||||||
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
|
||||||
}
|
}
|
||||||
} else if (qName.equalsIgnoreCase("div")) {
|
|
||||||
gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
|
|
||||||
} else if (qName.equalsIgnoreCase("seg")) {
|
} else if (qName.equalsIgnoreCase("seg")) {
|
||||||
HashMap<String, String> atts = extractAttributes(startElement);
|
HashMap<String, String> atts = extractAttributes(startElement);
|
||||||
|
|
||||||
|
@ -1322,20 +1048,14 @@ public class XML_processing {
|
||||||
case XMLStreamConstants.CHARACTERS:
|
case XMLStreamConstants.CHARACTERS:
|
||||||
// "word" node value
|
// "word" node value
|
||||||
if (inWord) {
|
if (inWord) {
|
||||||
// if (GOSCorpusHMKey.equals("gos.028-0108.norm") && wordIndex > 8){
|
|
||||||
// System.out.println(wordIndex);
|
|
||||||
// }
|
|
||||||
// if algorithm is in orthodox part add new word to sentence
|
// if algorithm is in orthodox part add new word to sentence
|
||||||
if (inOrthDiv){
|
if (inOrthDiv){
|
||||||
// GOSCorpusHM.put(GOSCorpusHMKey, sentence);
|
|
||||||
String word = "";
|
String word = "";
|
||||||
Characters characters = event.asCharacters();
|
Characters characters = event.asCharacters();
|
||||||
sentence.add(createWord(characters.getData(), "", "", "", stats.getFilter()));
|
sentence.add(createWord(characters.getData(), "", "", "", stats.getFilter()));
|
||||||
// if algorithm is in normalized part find orthodox word and add other info to it
|
// if algorithm is in normalized part find orthodox word and add other info to it
|
||||||
} else {
|
} else {
|
||||||
Characters characters = event.asCharacters();
|
Characters characters = event.asCharacters();
|
||||||
// System.out.println(wordIndex);
|
|
||||||
// System.out.println(GOSCorpusHMKey + " " + lemma + " " + wordIndex);
|
|
||||||
if (wordIndex < GOSCorpusHM.get(GOSCorpusHMKey).size()) {
|
if (wordIndex < GOSCorpusHM.get(GOSCorpusHMKey).size()) {
|
||||||
Word currentWord = GOSCorpusHM.get(GOSCorpusHMKey).get(wordIndex);
|
Word currentWord = GOSCorpusHM.get(GOSCorpusHMKey).get(wordIndex);
|
||||||
currentWord.setLemma(lemma, stats.getFilter().getWordParts());
|
currentWord.setLemma(lemma, stats.getFilter().getWordParts());
|
||||||
|
@ -1349,9 +1069,7 @@ public class XML_processing {
|
||||||
GOSCorpusHM.get(GOSCorpusHMKey).add(wordIndex, createWord(currentWord.getWord(stats.getFilter().getWordParts()),
|
GOSCorpusHM.get(GOSCorpusHMKey).add(wordIndex, createWord(currentWord.getWord(stats.getFilter().getWordParts()),
|
||||||
"", "", "", stats.getFilter()));
|
"", "", "", stats.getFilter()));
|
||||||
}
|
}
|
||||||
} //else {
|
}
|
||||||
// System.out.println("Error");
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1393,17 +1111,7 @@ public class XML_processing {
|
||||||
|
|
||||||
// add sentence to corpus if it passes filters
|
// add sentence to corpus if it passes filters
|
||||||
if (includeFile && !ValidationUtil.isEmpty(sentence)) {
|
if (includeFile && !ValidationUtil.isEmpty(sentence)) {
|
||||||
// for(Word w : sentence) {
|
|
||||||
// if (w.getW1().equals("")) {
|
|
||||||
// System.out.println("HERE!!!");
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
sentence = runFilters(sentence, stats.getFilter());
|
sentence = runFilters(sentence, stats.getFilter());
|
||||||
// for(Word w : sentence) {
|
|
||||||
// if (w.getW1().equals("")) {
|
|
||||||
// System.out.println("HERE!!!");
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
corpus.add(new Sentence(sentence, currentFiletaxonomy));
|
corpus.add(new Sentence(sentence, currentFiletaxonomy));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1430,21 +1138,12 @@ public class XML_processing {
|
||||||
|
|
||||||
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
|
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
|
||||||
// before proceeding to read this file, make sure that taxonomy filters are a match
|
// before proceeding to read this file, make sure that taxonomy filters are a match
|
||||||
// if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
|
|
||||||
// currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
|
|
||||||
//
|
|
||||||
// // disregard this entry if taxonomies don't match
|
|
||||||
// includeFile = !currentFiletaxonomy.isEmpty();
|
|
||||||
//
|
|
||||||
//// currentFiletaxonomy = new ArrayList<>();
|
|
||||||
// }
|
|
||||||
if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
|
if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
|
||||||
currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
|
currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
|
||||||
|
|
||||||
if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) {
|
if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) {
|
||||||
// taxonomies don't match so stop
|
// taxonomies don't match so stop
|
||||||
// union (select words that match any of selected taxonomy
|
// union (select words that match any of selected taxonomy
|
||||||
// return false;
|
|
||||||
includeFile = false;
|
includeFile = false;
|
||||||
//
|
//
|
||||||
} else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){
|
} else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){
|
||||||
|
@ -1462,7 +1161,6 @@ public class XML_processing {
|
||||||
corpus.clear();
|
corpus.clear();
|
||||||
|
|
||||||
currentFiletaxonomy = new ArrayList<>();
|
currentFiletaxonomy = new ArrayList<>();
|
||||||
// currentFiletaxonomyLong = new ArrayList<>();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -1488,9 +1186,6 @@ public class XML_processing {
|
||||||
@SuppressWarnings("Duplicates")
|
@SuppressWarnings("Duplicates")
|
||||||
public static boolean readVERT(String path, StatisticsNew stats) {
|
public static boolean readVERT(String path, StatisticsNew stats) {
|
||||||
// taxonomy corpora
|
// taxonomy corpora
|
||||||
// HashSet<String> resultTaxonomy = new HashSet<>();
|
|
||||||
|
|
||||||
|
|
||||||
// regi path
|
// regi path
|
||||||
String regiPath = path.substring(0, path.length()-4) + "regi";
|
String regiPath = path.substring(0, path.length()-4) + "regi";
|
||||||
|
|
||||||
|
@ -1503,7 +1198,6 @@ public class XML_processing {
|
||||||
// read regi file
|
// read regi file
|
||||||
regiIt = FileUtils.lineIterator(new File(regiPath), "UTF-8");
|
regiIt = FileUtils.lineIterator(new File(regiPath), "UTF-8");
|
||||||
try {
|
try {
|
||||||
boolean insideHeader = false;
|
|
||||||
int attributeIndex = 0;
|
int attributeIndex = 0;
|
||||||
while (regiIt.hasNext()) {
|
while (regiIt.hasNext()) {
|
||||||
String line = regiIt.nextLine();
|
String line = regiIt.nextLine();
|
||||||
|
@ -1534,7 +1228,6 @@ public class XML_processing {
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new java.lang.RuntimeException("IOException");
|
throw new java.lang.RuntimeException("IOException");
|
||||||
// e.printStackTrace();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int numLines = 0;
|
int numLines = 0;
|
||||||
|
@ -1556,7 +1249,6 @@ public class XML_processing {
|
||||||
LineIterator it;
|
LineIterator it;
|
||||||
|
|
||||||
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
||||||
boolean inParagraph = false;
|
|
||||||
boolean inSentence = false;
|
boolean inSentence = false;
|
||||||
boolean taxonomyMatch = true;
|
boolean taxonomyMatch = true;
|
||||||
int lineNum = 0;
|
int lineNum = 0;
|
||||||
|
@ -1572,8 +1264,6 @@ public class XML_processing {
|
||||||
try {
|
try {
|
||||||
it = FileUtils.lineIterator(new File(path), "UTF-8");
|
it = FileUtils.lineIterator(new File(path), "UTF-8");
|
||||||
try {
|
try {
|
||||||
boolean insideHeader = false;
|
|
||||||
|
|
||||||
while (it.hasNext()) {
|
while (it.hasNext()) {
|
||||||
int percentage = (int) (lineNum * 100.0 / numLines);
|
int percentage = (int) (lineNum * 100.0 / numLines);
|
||||||
if(progress.get() < percentage) {
|
if(progress.get() < percentage) {
|
||||||
|
@ -1596,7 +1286,6 @@ public class XML_processing {
|
||||||
boolean proofread = false;
|
boolean proofread = false;
|
||||||
for (String el : split) {
|
for (String el : split) {
|
||||||
String[] attribute = el.split("=\"");
|
String[] attribute = el.split("=\"");
|
||||||
boolean idsPresent = false;
|
|
||||||
if (attribute[0].equals("medium_id") && !attribute[1].equals("-")) {
|
if (attribute[0].equals("medium_id") && !attribute[1].equals("-")) {
|
||||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(attribute[1], stats.getCorpus());
|
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(attribute[1], stats.getCorpus());
|
||||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||||
|
@ -1639,12 +1328,6 @@ public class XML_processing {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
// else if((line.length() >= 3 && line.substring(0, 2).equals("<p") && line.substring(line.length() - 1, line.length()).equals(">")) ||
|
|
||||||
// (line.length() >= 3 && line.substring(0, 3).equals("<ab") && line.substring(line.length() - 1, line.length()).equals(">"))){
|
|
||||||
// inParagraph = true;
|
|
||||||
// } else if((line.length() == 4 && line.equals("</p>")) || (line.length() == 5 && line.equals("</ab>"))){
|
|
||||||
// inParagraph = false;
|
|
||||||
// }
|
|
||||||
else if(line.length() >= 3 && line.substring(0, 2).equals("<s") && line.substring(line.length() - 1, line.length()).equals(">")){
|
else if(line.length() >= 3 && line.substring(0, 2).equals("<s") && line.substring(line.length() - 1, line.length()).equals(">")){
|
||||||
inSentence = true;
|
inSentence = true;
|
||||||
} else if(line.length() == 4 && line.equals("</s>")){
|
} else if(line.length() == 4 && line.equals("</s>")){
|
||||||
|
@ -1677,10 +1360,7 @@ public class XML_processing {
|
||||||
|
|
||||||
// and start a new one
|
// and start a new one
|
||||||
sentence = new ArrayList<>();
|
sentence = new ArrayList<>();
|
||||||
|
|
||||||
// corpus.add(new Sentence(sentence, currentFiletaxonomy));
|
|
||||||
} else if(!(line.charAt(0) == '<' && line.charAt(line.length() - 1) == '>') && inSentence){
|
} else if(!(line.charAt(0) == '<' && line.charAt(line.length() - 1) == '>') && inSentence){
|
||||||
// } else if(!(line.charAt(0) == '<' && line.charAt(line.length() - 1) == '>') && inSentence && inParagraph){
|
|
||||||
String[] split = line.split("\t");
|
String[] split = line.split("\t");
|
||||||
if(slovene) {
|
if(slovene) {
|
||||||
if (split[lemmaIndex].length() > 2 && split[lemmaIndex].charAt(split[lemmaIndex].length() - 2) == '-' && Character.isAlphabetic(split[lemmaIndex].charAt(split[lemmaIndex].length() - 1)) &&
|
if (split[lemmaIndex].length() > 2 && split[lemmaIndex].charAt(split[lemmaIndex].length() - 2) == '-' && Character.isAlphabetic(split[lemmaIndex].charAt(split[lemmaIndex].length() - 1)) &&
|
||||||
|
@ -1721,7 +1401,6 @@ public class XML_processing {
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
// resultTaxonomy.remove("-");
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,67 +0,0 @@
|
||||||
//package alg.inflectedJOS;
|
|
||||||
//
|
|
||||||
//import java.util.List;
|
|
||||||
//import java.util.concurrent.RecursiveAction;
|
|
||||||
//
|
|
||||||
//import data.Sentence;
|
|
||||||
//import data.Statistics;
|
|
||||||
//
|
|
||||||
//public class ForkJoin extends RecursiveAction {
|
|
||||||
// private static final long serialVersionUID = -1260951004477299634L;
|
|
||||||
//
|
|
||||||
// private static final int ACCEPTABLE_SIZE = 1000;
|
|
||||||
// private List<Sentence> corpus;
|
|
||||||
// private Statistics stats;
|
|
||||||
// private int start;
|
|
||||||
// private int end;
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// /**
|
|
||||||
// * Constructor for subproblems.
|
|
||||||
// */
|
|
||||||
// private ForkJoin(List<Sentence> corpus, int start, int end, Statistics stats) {
|
|
||||||
// this.corpus = corpus;
|
|
||||||
// this.start = start;
|
|
||||||
// this.end = end;
|
|
||||||
// this.stats = stats;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// /**
|
|
||||||
// * Default constructor for the initial problem
|
|
||||||
// */
|
|
||||||
// public ForkJoin(List<Sentence> corpus, Statistics stats) {
|
|
||||||
// this.corpus = corpus;
|
|
||||||
// this.start = 0;
|
|
||||||
// this.end = corpus.size();
|
|
||||||
// this.stats = stats;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// private void computeDirectly() {
|
|
||||||
// List<Sentence> subCorpus = corpus.subList(start, end);
|
|
||||||
//
|
|
||||||
// if (stats.isTaxonomySet()) {
|
|
||||||
// InflectedJOSCount.calculateForAll(subCorpus, stats, stats.getInflectedJosTaxonomy());
|
|
||||||
// } else {
|
|
||||||
// InflectedJOSCount.calculateForAll(subCorpus, stats, null);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// @Override
|
|
||||||
// protected void compute() {
|
|
||||||
// int subCorpusSize = end - start;
|
|
||||||
//
|
|
||||||
// if (subCorpusSize < ACCEPTABLE_SIZE) {
|
|
||||||
// computeDirectly();
|
|
||||||
// } else {
|
|
||||||
// int mid = start + subCorpusSize / 2;
|
|
||||||
// ForkJoin left = new ForkJoin(corpus, start, mid, stats);
|
|
||||||
// ForkJoin right = new ForkJoin(corpus, mid, end, stats);
|
|
||||||
//
|
|
||||||
// // fork (push to queue)-> compute -> join
|
|
||||||
// left.fork();
|
|
||||||
// right.fork();
|
|
||||||
// left.join();
|
|
||||||
// right.join();
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//}
|
|
|
@ -1,170 +0,0 @@
|
||||||
//package alg.inflectedJOS;
|
|
||||||
//
|
|
||||||
//import java.util.ArrayList;
|
|
||||||
//import java.util.HashMap;
|
|
||||||
//import java.util.List;
|
|
||||||
//
|
|
||||||
//import org.apache.commons.lang3.StringUtils;
|
|
||||||
//
|
|
||||||
//import alg.Common;
|
|
||||||
//import data.Sentence;
|
|
||||||
//import data.Statistics;
|
|
||||||
//import data.StatisticsNew;
|
|
||||||
//import data.Word;
|
|
||||||
//
|
|
||||||
//public class InflectedJOSCount {
|
|
||||||
//
|
|
||||||
// public static HashMap<Integer, ArrayList<ArrayList<Integer>>> indices;
|
|
||||||
//
|
|
||||||
// // static {
|
|
||||||
// // // calculate all possible combinations of indices we will substitute with a '-' for substring statistics
|
|
||||||
// // indices = new HashMap<>();
|
|
||||||
// // for (int i = 5; i <= 8; i++) {
|
|
||||||
// // indices.put(i, calculateCombinations(i));
|
|
||||||
// // }
|
|
||||||
// // }
|
|
||||||
// //
|
|
||||||
// // private static List<Integer> calculateCombinations(int i) {
|
|
||||||
// // int arr[] = {1, 2, 3, 4, 5};
|
|
||||||
// // int r = 3;
|
|
||||||
// // int n = arr.length;
|
|
||||||
// // ArrayList<ArrayList<Integer>> result = new ArrayList<>();
|
|
||||||
// //
|
|
||||||
// // return printCombination(arr, n, r);
|
|
||||||
// // }
|
|
||||||
// //
|
|
||||||
// // /* arr[] ---> Input Array
|
|
||||||
// // data[] ---> Temporary array to store current combination
|
|
||||||
// // start & end ---> Staring and Ending indexes in arr[]
|
|
||||||
// // index ---> Current index in data[]
|
|
||||||
// // r ---> Size of a combination to be printed */
|
|
||||||
// // static void combinationUtil(int arr[], int data[], int start,
|
|
||||||
// // int end, int index, int r, ArrayList<ArrayList<Integer>> result) {
|
|
||||||
// // // Current combination is ready to be printed, print it
|
|
||||||
// // ArrayList<Integer> tmpResult = new ArrayList<>();
|
|
||||||
// //
|
|
||||||
// // if (index == r) {
|
|
||||||
// // ArrayList<Integer> tmpResult = new ArrayList<>();
|
|
||||||
// // for (int j = 0; j < r; j++)
|
|
||||||
// // System.out.print(data[j] + " ");
|
|
||||||
// // System.out.println("");
|
|
||||||
// // return;
|
|
||||||
// // }
|
|
||||||
// //
|
|
||||||
// // // replace index with all possible elements. The condition
|
|
||||||
// // // "end-i+1 >= r-index" makes sure that including one element
|
|
||||||
// // // at index will make a combination with remaining elements
|
|
||||||
// // // at remaining positions
|
|
||||||
// // for (int i = start; i <= end && end - i + 1 >= r - index; i++) {
|
|
||||||
// // data[index] = arr[i];
|
|
||||||
// // combinationUtil(arr, data, i + 1, end, index + 1, r);
|
|
||||||
// // }
|
|
||||||
// // }
|
|
||||||
// //
|
|
||||||
// // // The main function that prints all combinations of size r
|
|
||||||
// // // in arr[] of size n. This function mainly uses combinationUtil()
|
|
||||||
// // static void printCombination(int arr[], int n, int r) {
|
|
||||||
// // // A temporary array to store all combination one by one
|
|
||||||
// // int data[] = new int[r];
|
|
||||||
// //
|
|
||||||
// // // Print all combination using temprary array 'data[]'
|
|
||||||
// // combinationUtil(arr, data, 0, n - 1, 0, r);
|
|
||||||
// // }
|
|
||||||
//
|
|
||||||
// // public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
|
||||||
// // for (Sentence s : corpus) {
|
|
||||||
// // // disregard if wrong taxonomy
|
|
||||||
// // if (!(s.getObservableListTaxonomy().startsWith(taxonomy))) {
|
|
||||||
// // continue;
|
|
||||||
// // }
|
|
||||||
// //
|
|
||||||
// // calculateCommon(s, stats.result);
|
|
||||||
// //
|
|
||||||
// // for (Word word : s.getWords()) {
|
|
||||||
// // // skip if current word is not inflected
|
|
||||||
// // if (!(word.getMsd().length() > 0)) {
|
|
||||||
// // continue;
|
|
||||||
// // }
|
|
||||||
// //
|
|
||||||
// // String msd = word.getMsd();
|
|
||||||
// //
|
|
||||||
// // StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
|
||||||
// //
|
|
||||||
// // for (int i = 1; i < msd.length(); i++) {
|
|
||||||
// // entry.setCharAt(i, msd.charAt(i));
|
|
||||||
// // Common.updateMap(stats.result, entry.toString());
|
|
||||||
// // entry.setCharAt(i, '-');
|
|
||||||
// // }
|
|
||||||
// // }
|
|
||||||
// // }
|
|
||||||
// // }
|
|
||||||
//
|
|
||||||
// // public static void calculateForAll(List<Sentence> corpus, Statistics stats) {
|
|
||||||
// // for (Sentence s : corpus) {
|
|
||||||
// // for (Word word : s.getWords()) {
|
|
||||||
// // if (!(word.getMsd().length() > 0)) {
|
|
||||||
// // continue;
|
|
||||||
// // }
|
|
||||||
// //
|
|
||||||
// // String msd = word.getMsd();
|
|
||||||
// //
|
|
||||||
// // StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
|
||||||
// //
|
|
||||||
// // for (int i = 1; i < msd.length(); i++) {
|
|
||||||
// // entry.setCharAt(i, msd.charAt(i));
|
|
||||||
// // Common.updateMap(stats.result, entry.toString());
|
|
||||||
// // entry.setCharAt(i, '-');
|
|
||||||
// // }
|
|
||||||
// // }
|
|
||||||
// // }
|
|
||||||
// // }
|
|
||||||
//
|
|
||||||
// static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
|
||||||
// for (Sentence s : corpus) {
|
|
||||||
// // disregard if wrong taxonomy
|
|
||||||
//// if (taxonomy != null && !(s.getObservableListTaxonomy().startsWith(taxonomy))) {
|
|
||||||
//// continue;
|
|
||||||
//// }
|
|
||||||
//
|
|
||||||
// for (Word word : s.getWords()) {
|
|
||||||
// // skip if current word is not inflected
|
|
||||||
// if (!(word.getMsd().length() > 0)) {
|
|
||||||
// continue;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// String msd = word.getMsd();
|
|
||||||
//
|
|
||||||
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
|
||||||
//
|
|
||||||
// for (int i = 1; i < msd.length(); i++) {
|
|
||||||
// entry.setCharAt(i, msd.charAt(i));
|
|
||||||
// Common.updateMap(stats.result, entry.toString());
|
|
||||||
// entry.setCharAt(i, '-');
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats, String taxonomy) {
|
|
||||||
// for (Sentence s : corpus) {
|
|
||||||
//
|
|
||||||
// for (Word word : s.getWords()) {
|
|
||||||
// // skip if current word is not inflected
|
|
||||||
// // // TODO: if has defined msd and is of correct type (create a set)
|
|
||||||
// // if (!(word.getMsd().length() > 0)) {
|
|
||||||
// // continue;
|
|
||||||
// // }
|
|
||||||
//
|
|
||||||
// String msd = word.getMsd();
|
|
||||||
//
|
|
||||||
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
|
||||||
//
|
|
||||||
// for (int i = 1; i < msd.length(); i++) {
|
|
||||||
// entry.setCharAt(i, msd.charAt(i));
|
|
||||||
// stats.updateResults(entry.toString());
|
|
||||||
// entry.setCharAt(i, '-');
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//}
|
|
|
@ -1,132 +0,0 @@
|
||||||
package alg.inflectedJOS;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import data.Enums.InflectedJosTypes;
|
|
||||||
import data.StatisticsNew;
|
|
||||||
import data.Taxonomy;
|
|
||||||
import gui.ValidationUtil;
|
|
||||||
import util.Combinations;
|
|
||||||
|
|
||||||
// adapted from http://www.geeksforgeeks.org/print-all-possible-combinations-of-r-elements-in-a-given-array-of-size-n/
|
|
||||||
public class WordFormation {
|
|
||||||
private static HashMap<String, Long> josTypeResult;
|
|
||||||
private static Object[][] tmpResults;
|
|
||||||
|
|
||||||
private static HashMap<Integer, HashSet<HashSet<Integer>>> indices;
|
|
||||||
|
|
||||||
static {
|
|
||||||
indices = new HashMap<>();
|
|
||||||
|
|
||||||
for (int i = 4; i <= 8; i++) {
|
|
||||||
indices.put(i, Combinations.generateIndices(i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void calculateStatistics(StatisticsNew stat) {
|
|
||||||
Map<String, AtomicLong> result = stat.getResult();
|
|
||||||
|
|
||||||
// 1. filter - keep only inflected types
|
|
||||||
result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.toString().charAt(0)));
|
|
||||||
|
|
||||||
// 2. for each inflected type get all possible subcombinations
|
|
||||||
for (Character josChar : InflectedJosTypes.inflectedJosTypes) {
|
|
||||||
josTypeResult = new HashMap<>();
|
|
||||||
|
|
||||||
// filter out results for a single word type
|
|
||||||
Map<String, AtomicLong> singleTypeResults = result.entrySet().stream()
|
|
||||||
.filter(x -> x.getKey().charAt(0) == josChar)
|
|
||||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
|
||||||
|
|
||||||
if (ValidationUtil.isEmpty(singleTypeResults)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// get all possible indices combos for a msd of this length
|
|
||||||
// HashSet<HashSet<Integer>> indicesCombos = indices.get()
|
|
||||||
//Combinations.generateIndices(singleTypeResults.keySet().stream().findFirst().get().length());
|
|
||||||
|
|
||||||
for (Map.Entry<String, AtomicLong> e : singleTypeResults.entrySet()) {
|
|
||||||
int l = e.getKey().length();
|
|
||||||
|
|
||||||
for (HashSet<Integer> indicesCombo : indices.get(e.getKey().length())) {
|
|
||||||
updateResults(mask(e.getKey(), indicesCombo), e.getValue().longValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resultsMapToArray(singleTypeResults.values().stream().mapToLong(Number::longValue).sum());
|
|
||||||
}
|
|
||||||
|
|
||||||
stat.setResultCustom(tmpResults);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String mask(String word, HashSet<Integer> indicesCombo) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
|
|
||||||
sb.append(word.charAt(0));
|
|
||||||
for (int i = 1; i < word.length(); i++) {
|
|
||||||
sb.append(indicesCombo.contains(i) ? word.charAt(i) : ".");
|
|
||||||
}
|
|
||||||
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private static void updateResults(String s, Long nOfOccurences) {
|
|
||||||
// if not in map add
|
|
||||||
Long r = josTypeResult.putIfAbsent(s, nOfOccurences);
|
|
||||||
|
|
||||||
// else update
|
|
||||||
if (r != null) {
|
|
||||||
josTypeResult.put(s, josTypeResult.get(s) + nOfOccurences);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void resultsMapToArray(Long totalValue) {
|
|
||||||
Double total = totalValue * 1.0;
|
|
||||||
Object[][] josTypeResultArray = new Object[josTypeResult.size()][3];
|
|
||||||
|
|
||||||
int i = 0;
|
|
||||||
for (Map.Entry<String, Long> e : josTypeResult.entrySet()) {
|
|
||||||
josTypeResultArray[i][0] = e.getKey();
|
|
||||||
josTypeResultArray[i][1] = e.getValue();
|
|
||||||
josTypeResultArray[i][2] = e.getValue() / total;
|
|
||||||
|
|
||||||
if (e.getValue() > total) {
|
|
||||||
|
|
||||||
String debug = "";
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tmpResults == null) {
|
|
||||||
tmpResults = josTypeResultArray;
|
|
||||||
} else {
|
|
||||||
int firstLength = tmpResults.length;
|
|
||||||
int secondLength = josTypeResultArray.length;
|
|
||||||
Object[][] tmp = new Object[firstLength + secondLength][3];
|
|
||||||
|
|
||||||
System.arraycopy(tmpResults, 0, tmp, 0, firstLength);
|
|
||||||
System.arraycopy(josTypeResultArray, 0, tmp, firstLength, secondLength);
|
|
||||||
|
|
||||||
tmpResults = tmp;
|
|
||||||
|
|
||||||
// tmpResults = ArrayUtils.addAll(tmpResults, josTypeResultArray);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void printArray() {
|
|
||||||
for (int i = 0; i < tmpResults.length; i++) {
|
|
||||||
for (int j = 0; j < tmpResults[i].length; j++) {
|
|
||||||
System.out.print(tmpResults[i][j] + "\t");
|
|
||||||
}
|
|
||||||
System.out.println();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -80,36 +80,13 @@ public class Ngrams {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// boolean a = (correctPrefix.equals("") && !correctSuffix.equals(""));
|
|
||||||
// boolean b = (!correctPrefix.equals("") && correctSuffix.equals(""));
|
|
||||||
// boolean c = (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length());
|
|
||||||
// boolean d = !((correctPrefix.equals("") && !correctSuffix.equals("")) ||
|
|
||||||
// (!correctPrefix.equals("") && correctSuffix.equals("")) ||
|
|
||||||
// (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()));
|
|
||||||
|
|
||||||
if(!((stats.getFilter().getPrefixList().size() == 0 && !correctSuffix.equals("")) ||
|
if(!((stats.getFilter().getPrefixList().size() == 0 && !correctSuffix.equals("")) ||
|
||||||
(!correctPrefix.equals("") && stats.getFilter().getSuffixList().size() == 0) ||
|
(!correctPrefix.equals("") && stats.getFilter().getSuffixList().size() == 0) ||
|
||||||
(!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()))){
|
(!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()))){
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if(!((correctPrefix.equals("") && !correctSuffix.equals("")) ||
|
|
||||||
// (!correctPrefix.equals("") && correctSuffix.equals("")) ||
|
|
||||||
// (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()))){
|
|
||||||
// continue;
|
|
||||||
// }
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if last letter is ',' erase it
|
|
||||||
|
|
||||||
// if (key.equals("")){
|
|
||||||
// String test = key;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (stats.getFilter().getNotePunctuations())
|
|
||||||
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
|
||||||
|
|
||||||
MultipleHMKeys multipleKeys;
|
MultipleHMKeys multipleKeys;
|
||||||
|
|
||||||
// create MultipleHMKeys for different amount of other keys
|
// create MultipleHMKeys for different amount of other keys
|
||||||
|
@ -119,28 +96,17 @@ public class Ngrams {
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
String k1_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
String k1_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||||
// if (stats.getFilter().getNotePunctuations())
|
|
||||||
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
|
||||||
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
String k2_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
String k2_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||||
String k2_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
String k2_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||||
// if (stats.getFilter().getNotePunctuations()) {
|
|
||||||
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
|
||||||
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
|
||||||
// }
|
|
||||||
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
String k3_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
String k3_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||||
String k3_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
String k3_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||||
String k3_4 = wordToString(ngramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
String k3_4 = wordToString(ngramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
||||||
// if (stats.getFilter().getNotePunctuations()) {
|
|
||||||
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
|
||||||
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
|
||||||
// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
|
||||||
// }
|
|
||||||
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
|
@ -148,41 +114,13 @@ public class Ngrams {
|
||||||
String k4_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
String k4_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||||
String k4_4 = wordToString(ngramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
String k4_4 = wordToString(ngramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
||||||
String k4_5 = wordToString(ngramCandidate, otherKeys.get(3), stats.getFilter().getWordParts());
|
String k4_5 = wordToString(ngramCandidate, otherKeys.get(3), stats.getFilter().getWordParts());
|
||||||
// if (stats.getFilter().getNotePunctuations()) {
|
|
||||||
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
|
||||||
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
|
||||||
// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
|
||||||
// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
|
||||||
// }
|
|
||||||
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
|
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
multipleKeys = null;
|
multipleKeys = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// String lemma = "";
|
|
||||||
// String wordType = "";
|
|
||||||
// String msd = "";
|
|
||||||
// for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){
|
|
||||||
// if(otherKey.toString().equals("lema")){
|
|
||||||
// lemma = wordToString(ngramCandidate, otherKey);
|
|
||||||
// } else if(otherKey.toString().equals("besedna vrsta")){
|
|
||||||
// wordType = wordToString(ngramCandidate, otherKey).substring(0, 1);
|
|
||||||
// } else if(otherKey.toString().equals("oblikoskladenjska oznaka")){
|
|
||||||
// msd = wordToString(ngramCandidate, otherKey);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// UPDATE TAXONOMY HERE!!!
|
|
||||||
stats.updateTaxonomyResults(multipleKeys, s.getTaxonomy());
|
stats.updateTaxonomyResults(multipleKeys, s.getTaxonomy());
|
||||||
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -191,18 +129,12 @@ public class Ngrams {
|
||||||
* Checks whether an ngram candidate passes specified regex filter.
|
* Checks whether an ngram candidate passes specified regex filter.
|
||||||
*/
|
*/
|
||||||
private static boolean passesRegex(List<Word> ngramCandidate, ArrayList<Pattern> regex, ArrayList<CalculateFor> wordParts) {
|
private static boolean passesRegex(List<Word> ngramCandidate, ArrayList<Pattern> regex, ArrayList<CalculateFor> wordParts) {
|
||||||
// if (ngramCandidate.size() != regex.size()) {
|
|
||||||
// logger.error("ngramCandidate.size() & msd.size() mismatch"); // should not occur anyway
|
|
||||||
// return false;
|
|
||||||
// }
|
|
||||||
|
|
||||||
int j = 0;
|
int j = 0;
|
||||||
for (int i = 0; i < ngramCandidate.size(); i++) {
|
for (int i = 0; i < ngramCandidate.size(); i++) {
|
||||||
String msd = ngramCandidate.get(i).getMsd(wordParts);
|
String msd = ngramCandidate.get(i).getMsd(wordParts);
|
||||||
if (msd.equals("*")){
|
if (msd.equals("*")){
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
//if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
|
|
||||||
if (!msd.matches(regex.get(j).pattern() + ".*")) {
|
if (!msd.matches(regex.get(j).pattern() + ".*")) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -247,11 +179,6 @@ public class Ngrams {
|
||||||
.stream()
|
.stream()
|
||||||
.map(w -> Character.toString(w.getMsd(wordParts).length() > 0 ? w.getMsd(wordParts).charAt(0) : '/'))
|
.map(w -> Character.toString(w.getMsd(wordParts).length() > 0 ? w.getMsd(wordParts).charAt(0) : '/'))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
// candidate.addAll(ngramCandidate
|
|
||||||
// .stream()
|
|
||||||
// .map(w -> Character.toString(w.getMsd().charAt(0)))
|
|
||||||
// .collect(Collectors.toList()));
|
|
||||||
// .substring(0, 1)
|
|
||||||
return StringUtils.join(candidate, " ");
|
return StringUtils.join(candidate, " ");
|
||||||
case NORMALIZED_WORD:
|
case NORMALIZED_WORD:
|
||||||
candidate.addAll(ngramCandidate
|
candidate.addAll(ngramCandidate
|
||||||
|
@ -322,32 +249,6 @@ public class Ngrams {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks skipped words and if necessary adds punctuations.
|
|
||||||
*
|
|
||||||
* @return List of candidates represented as a list<candidates(String)>
|
|
||||||
*/
|
|
||||||
private static Word checkAndModifySkipgramPunctuation(List<Word> sentence, int i, int j, StatisticsNew stats){
|
|
||||||
// if punctuation checkbox selected and there words at indexes i and j are not next to each other
|
|
||||||
// if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){
|
|
||||||
// boolean middleWordsHavePunctuation = false;
|
|
||||||
// for (int n = i + 1; n < j; n++){
|
|
||||||
// if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){
|
|
||||||
// middleWordsHavePunctuation = true;
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// if (middleWordsHavePunctuation){
|
|
||||||
//
|
|
||||||
// String punctuation = ",";
|
|
||||||
// return new Word(sentence.get(i).getWord() + punctuation,
|
|
||||||
// sentence.get(i).getLemma() + punctuation,
|
|
||||||
// sentence.get(i).getMsd() + punctuation);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
return sentence.get(i);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts skipgram candidates.
|
* Extracts skipgram candidates.
|
||||||
|
@ -363,8 +264,6 @@ public class Ngrams {
|
||||||
for (Sentence s : corpus) {
|
for (Sentence s : corpus) {
|
||||||
List<Word> sentence = s.getWords();
|
List<Word> sentence = s.getWords();
|
||||||
|
|
||||||
// stats.updateUniGramOccurrences(s.getWords().size());
|
|
||||||
|
|
||||||
if (sentence == null){
|
if (sentence == null){
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -373,7 +272,6 @@ public class Ngrams {
|
||||||
for (int j = i + 1; j <= i + skip + 1; j++) { // 2gram
|
for (int j = i + 1; j <= i + skip + 1; j++) { // 2gram
|
||||||
if (ngram == 2 && j < sentence.size()) {
|
if (ngram == 2 && j < sentence.size()) {
|
||||||
currentLoop = new ArrayList<>();
|
currentLoop = new ArrayList<>();
|
||||||
// currentLoop.add(sentence.get(i));
|
|
||||||
currentLoop.add(sentence.get(i));
|
currentLoop.add(sentence.get(i));
|
||||||
fillSkipgrams(currentLoop, i, j, w);
|
fillSkipgrams(currentLoop, i, j, w);
|
||||||
currentLoop.add(sentence.get(j));
|
currentLoop.add(sentence.get(j));
|
||||||
|
@ -439,25 +337,10 @@ public class Ngrams {
|
||||||
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats, List<Taxonomy> taxonomy) {
|
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats, List<Taxonomy> taxonomy) {
|
||||||
// count if no regex is set or if it is & candidate passes it
|
// count if no regex is set or if it is & candidate passes it
|
||||||
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())) {
|
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())) {
|
||||||
// String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
|
|
||||||
// key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
|
||||||
// stats.updateTaxonomyResults(new MultipleHMKeys1(key),
|
|
||||||
// stats.getCorpus().getObservableListTaxonomy());
|
|
||||||
|
|
||||||
|
|
||||||
ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();
|
ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();
|
||||||
|
|
||||||
String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor(), stats.getFilter().getWordParts());
|
String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor(), stats.getFilter().getWordParts());
|
||||||
|
|
||||||
// if last letter is ',' erase it
|
|
||||||
|
|
||||||
// if (key.equals("")){
|
|
||||||
// String test = key;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (stats.getFilter().getNotePunctuations())
|
|
||||||
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
|
||||||
|
|
||||||
MultipleHMKeys multipleKeys;
|
MultipleHMKeys multipleKeys;
|
||||||
|
|
||||||
// create MultipleHMKeys for different amount of other keys
|
// create MultipleHMKeys for different amount of other keys
|
||||||
|
@ -467,28 +350,17 @@ public class Ngrams {
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||||
// if (stats.getFilter().getNotePunctuations())
|
|
||||||
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
|
||||||
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||||
String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||||
// if (stats.getFilter().getNotePunctuations()) {
|
|
||||||
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
|
||||||
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
|
||||||
// }
|
|
||||||
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||||
String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||||
String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
||||||
// if (stats.getFilter().getNotePunctuations()) {
|
|
||||||
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
|
||||||
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
|
||||||
// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
|
||||||
// }
|
|
||||||
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
|
@ -496,12 +368,6 @@ public class Ngrams {
|
||||||
String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||||
String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
||||||
String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3), stats.getFilter().getWordParts());
|
String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3), stats.getFilter().getWordParts());
|
||||||
// if (stats.getFilter().getNotePunctuations()) {
|
|
||||||
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
|
||||||
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
|
||||||
// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
|
||||||
// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
|
||||||
// }
|
|
||||||
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
|
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -1,167 +0,0 @@
|
||||||
package alg.word;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import alg.Common;
|
|
||||||
import data.CalculateFor;
|
|
||||||
import data.Sentence;
|
|
||||||
import data.Statistics;
|
|
||||||
import data.Word;
|
|
||||||
|
|
||||||
//class WordCount {
|
|
||||||
// private static void calculateNoFilter(List<Sentence> corpus, Statistics stats) {
|
|
||||||
// for (Sentence s : corpus) {
|
|
||||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
|
||||||
//
|
|
||||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
|
||||||
// sentence.addAll(s.getWords()
|
|
||||||
// .stream()
|
|
||||||
// .map(Word::getLemma)
|
|
||||||
// .collect(Collectors.toList()));
|
|
||||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
|
||||||
// sentence.addAll(s.getWords()
|
|
||||||
// .stream()
|
|
||||||
// .map(Word::getWord)
|
|
||||||
// .collect(Collectors.toList()));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// for (String word : sentence) {
|
|
||||||
// Common.updateMap(stats.result, word);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// private static void calculateVCC(List<Sentence> corpus, Statistics stats) {
|
|
||||||
// for (Sentence s : corpus) {
|
|
||||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
|
||||||
//
|
|
||||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
|
||||||
// sentence.addAll(s.getWords()
|
|
||||||
// .stream()
|
|
||||||
// .map(Word::getCVVLemma)
|
|
||||||
// .collect(Collectors.toList()));
|
|
||||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
|
||||||
// sentence.addAll(s.getWords()
|
|
||||||
// .stream()
|
|
||||||
// .map(Word::getCVVWord)
|
|
||||||
// .collect(Collectors.toList()));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// for (String word : sentence) {
|
|
||||||
// if (word.length() > stats.getSubstringLength()) {
|
|
||||||
// for (int i = 0; i <= word.length() - stats.getSubstringLength(); i++) {
|
|
||||||
// String substring = word.substring(i, i + stats.getSubstringLength());
|
|
||||||
// Common.updateMap(stats.result, substring);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// private static void calculateForJosType(List<Sentence> corpus, Statistics stats) {
|
|
||||||
// for (Sentence s : corpus) {
|
|
||||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
|
||||||
// List<Word> filteredWords = new ArrayList<>();
|
|
||||||
//
|
|
||||||
// for (Word word : s.getWords()) {
|
|
||||||
// if (word.getMsd() != null && word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
|
|
||||||
// filteredWords.add(word);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
|
||||||
// sentence.addAll(filteredWords
|
|
||||||
// .stream()
|
|
||||||
// .map(Word::getLemma)
|
|
||||||
// .collect(Collectors.toList()));
|
|
||||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
|
||||||
// sentence.addAll(filteredWords
|
|
||||||
// .stream()
|
|
||||||
// .map(Word::getWord)
|
|
||||||
// .collect(Collectors.toList()));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// for (String word : sentence) {
|
|
||||||
// Common.updateMap(stats.result, word);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
|
|
||||||
// for (Sentence s : corpus) {
|
|
||||||
// if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
|
|
||||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
|
||||||
// List<Word> filteredWords = new ArrayList<>();
|
|
||||||
//
|
|
||||||
// for (Word word : s.getWords()) {
|
|
||||||
// if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
|
|
||||||
// filteredWords.add(word);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
|
||||||
// sentence.addAll(filteredWords
|
|
||||||
// .stream()
|
|
||||||
// .map(Word::getLemma)
|
|
||||||
// .collect(Collectors.toList()));
|
|
||||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
|
||||||
// sentence.addAll(filteredWords
|
|
||||||
// .stream()
|
|
||||||
// .map(Word::getWord)
|
|
||||||
// .collect(Collectors.toList()));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// for (String word : sentence) {
|
|
||||||
// Common.updateMap(stats.result, word);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
|
|
||||||
// for (Sentence s : corpus) {
|
|
||||||
// if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
|
|
||||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
|
||||||
//
|
|
||||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
|
||||||
// sentence.addAll(s.getWords()
|
|
||||||
// .stream()
|
|
||||||
// .map(Word::getLemma)
|
|
||||||
// .collect(Collectors.toList()));
|
|
||||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
|
||||||
// sentence.addAll(s.getWords()
|
|
||||||
// .stream()
|
|
||||||
// .map(Word::getWord)
|
|
||||||
// .collect(Collectors.toList()));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// for (String word : sentence) {
|
|
||||||
// Common.updateMap(stats.result, word);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// static void calculateForAll(List<Sentence> corpus, Statistics stats) {
|
|
||||||
// boolean taxonomyIsSet = stats.isTaxonomySet();
|
|
||||||
// boolean JosTypeIsSet = stats.isJOSTypeSet();
|
|
||||||
//
|
|
||||||
// // branching because even though the only difference is an if or two &&
|
|
||||||
// // O(if) = 1, the amount of ifs adds up and this saves some time
|
|
||||||
// if (taxonomyIsSet && JosTypeIsSet) {
|
|
||||||
// calculateForTaxonomyAndJosType(corpus, stats);
|
|
||||||
// } else if (taxonomyIsSet && !JosTypeIsSet) {
|
|
||||||
// calculateForTaxonomy(corpus, stats);
|
|
||||||
// } else if (!taxonomyIsSet && JosTypeIsSet) {
|
|
||||||
// calculateForJosType(corpus, stats);
|
|
||||||
// } else {
|
|
||||||
// if (stats.isVcc()) {
|
|
||||||
// calculateVCC(corpus, stats);
|
|
||||||
// } else {
|
|
||||||
// calculateNoFilter(corpus, stats);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//}
|
|
|
@ -3,24 +3,6 @@ package data;
|
||||||
import gui.I18N;
|
import gui.I18N;
|
||||||
|
|
||||||
public enum CalculateFor {
|
public enum CalculateFor {
|
||||||
// calculateFor.WORD=word
|
|
||||||
// calculateFor.NORMALIZED_WORD=normalized word
|
|
||||||
// calculateFor.LEMMA=lemma
|
|
||||||
// calculateFor.MORPHOSYNTACTIC_SPECS=msd
|
|
||||||
// calculateFor.MORPHOSYNTACTIC_PROPERTY=oblikoskladenjska lastnost
|
|
||||||
// calculateFor.WORD_TYPE=besedna vrsta
|
|
||||||
// calculateFor.DIST_WORDS=različnica
|
|
||||||
// calculateFor.DIST_LEMMAS=lema
|
|
||||||
|
|
||||||
// WORD("različnica"),
|
|
||||||
// NORMALIZED_WORD("normalizirana različnica"),
|
|
||||||
// LEMMA("lema"),
|
|
||||||
// MORPHOSYNTACTIC_SPECS("oblikoskladenjska oznaka"),
|
|
||||||
// MORPHOSYNTACTIC_PROPERTY("oblikoskladenjska lastnost"),
|
|
||||||
// WORD_TYPE("besedna vrsta"),
|
|
||||||
// DIST_WORDS("različnica"),
|
|
||||||
// DIST_LEMMAS("lema");
|
|
||||||
|
|
||||||
WORD("calculateFor.WORD"),
|
WORD("calculateFor.WORD"),
|
||||||
LOWERCASE_WORD("calculateFor.LOWERCASE_WORD"),
|
LOWERCASE_WORD("calculateFor.LOWERCASE_WORD"),
|
||||||
NORMALIZED_WORD("calculateFor.NORMALIZED_WORD"),
|
NORMALIZED_WORD("calculateFor.NORMALIZED_WORD"),
|
||||||
|
@ -44,7 +26,6 @@ public enum CalculateFor {
|
||||||
|
|
||||||
public static CalculateFor factory(String cf) {
|
public static CalculateFor factory(String cf) {
|
||||||
if (cf != null) {
|
if (cf != null) {
|
||||||
// String name = I18N.findI18NString(cf, "calculateFor");
|
|
||||||
if (WORD.toString().equals(cf)) {
|
if (WORD.toString().equals(cf)) {
|
||||||
return WORD;
|
return WORD;
|
||||||
}
|
}
|
||||||
|
@ -275,27 +256,4 @@ public enum CalculateFor {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// public String toPercentString() {
|
|
||||||
// switch(this){
|
|
||||||
// case WORD:
|
|
||||||
// return "Delež glede na vse različnice";
|
|
||||||
// case NORMALIZED_WORD:
|
|
||||||
// return "Delež glede na vse normalizirane različnice";
|
|
||||||
// case LEMMA:
|
|
||||||
// return "Delež glede na vse leme";
|
|
||||||
// case MORPHOSYNTACTIC_SPECS:
|
|
||||||
// return "Delež glede na vse oblikoskladenjske oznake";
|
|
||||||
// case MORPHOSYNTACTIC_PROPERTY:
|
|
||||||
// return "Delež glede na vse oblikoskladenjske lastnosti";
|
|
||||||
// case WORD_TYPE:
|
|
||||||
// return "Delež glede na vse besedne vrste";
|
|
||||||
// case DIST_WORDS:
|
|
||||||
// return "Delež glede na vse različnice";
|
|
||||||
// case DIST_LEMMAS:
|
|
||||||
// return "Delež glede na vse leme";
|
|
||||||
// default:
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -74,13 +74,4 @@ public enum Collocability {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// public String toPercentString() {
|
|
||||||
// switch(this){
|
|
||||||
// case DICE:
|
|
||||||
// return "Delež glede na vse različnice";
|
|
||||||
// default:
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,6 @@ import org.apache.logging.log4j.Logger;
|
||||||
import data.Enums.solar.SolarFilters;
|
import data.Enums.solar.SolarFilters;
|
||||||
import gui.ValidationUtil;
|
import gui.ValidationUtil;
|
||||||
import javafx.collections.ObservableList;
|
import javafx.collections.ObservableList;
|
||||||
import org.controlsfx.control.CheckComboBox;
|
|
||||||
|
|
||||||
public class Corpus {
|
public class Corpus {
|
||||||
public final static Logger logger = LogManager.getLogger(Corpus.class);
|
public final static Logger logger = LogManager.getLogger(Corpus.class);
|
||||||
|
@ -33,7 +32,6 @@ public class Corpus {
|
||||||
public HashMap<String, ObservableList<String>> solarSelectedFilters; // if solar selected
|
public HashMap<String, ObservableList<String>> solarSelectedFilters; // if solar selected
|
||||||
private HashMap<String, HashSet<String>> solarFiltersForXML; // if solar - used while parsing xml
|
private HashMap<String, HashSet<String>> solarFiltersForXML; // if solar - used while parsing xml
|
||||||
private boolean gosOrthMode;
|
private boolean gosOrthMode;
|
||||||
boolean hasMsdData;
|
|
||||||
private ArrayList<String> validationErrors;
|
private ArrayList<String> validationErrors;
|
||||||
private String corpusName = "";
|
private String corpusName = "";
|
||||||
private String punctuation = "punctuation.COMMA";
|
private String punctuation = "punctuation.COMMA";
|
||||||
|
@ -48,7 +46,6 @@ public class Corpus {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCorpusName(String corpusName) {
|
public void setCorpusName(String corpusName) {
|
||||||
// System.out.println(corpusName);
|
|
||||||
this.corpusName = corpusName;
|
this.corpusName = corpusName;
|
||||||
logger.info("Corpus.set: ", corpusName);
|
logger.info("Corpus.set: ", corpusName);
|
||||||
}
|
}
|
||||||
|
@ -58,7 +55,6 @@ public class Corpus {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPunctuation(String punctuation) {
|
public void setPunctuation(String punctuation) {
|
||||||
// System.out.println(corpusName);
|
|
||||||
this.punctuation = punctuation;
|
this.punctuation = punctuation;
|
||||||
logger.info("Punctuation.set: ", punctuation);
|
logger.info("Punctuation.set: ", punctuation);
|
||||||
}
|
}
|
||||||
|
@ -99,10 +95,6 @@ public class Corpus {
|
||||||
logger.info("Corpus.set: ", detectedCorpusFiles);
|
logger.info("Corpus.set: ", detectedCorpusFiles);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isHeaderRead() {
|
|
||||||
return headerRead;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setHeaderRead(boolean headerRead) {
|
public void setHeaderRead(boolean headerRead) {
|
||||||
this.headerRead = headerRead;
|
this.headerRead = headerRead;
|
||||||
}
|
}
|
||||||
|
@ -128,11 +120,6 @@ public class Corpus {
|
||||||
}
|
}
|
||||||
return FXCollections.observableArrayList(al);
|
return FXCollections.observableArrayList(al);
|
||||||
}
|
}
|
||||||
//
|
|
||||||
// public ObservableList<String> getFormattedTaxonomy() {
|
|
||||||
// ArrayList<String> al = Tax.getTaxonomyFormatted(new ArrayList<>(taxonomy), corpusType);
|
|
||||||
// return FXCollections.observableArrayList(al);
|
|
||||||
// }
|
|
||||||
|
|
||||||
public void setTaxonomy(ObservableList<String> taxonomy) {
|
public void setTaxonomy(ObservableList<String> taxonomy) {
|
||||||
this.taxonomy = new ArrayList<>();
|
this.taxonomy = new ArrayList<>();
|
||||||
|
@ -155,15 +142,6 @@ public class Corpus {
|
||||||
return solarSelectedFilters;
|
return solarSelectedFilters;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSolarSelectedFilters(HashMap<String, ObservableList<String>> solarFilters) {
|
|
||||||
this.solarSelectedFilters = solarFilters;
|
|
||||||
logger.info("Corpus.set: ", solarFilters);
|
|
||||||
}
|
|
||||||
|
|
||||||
public HashMap<String, HashSet<String>> getSolarFiltersForXML() {
|
|
||||||
return solarFiltersForXML;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSolarFiltersForXML(HashMap<String, HashSet<String>> solarFiltersForXML) {
|
public void setSolarFiltersForXML(HashMap<String, HashSet<String>> solarFiltersForXML) {
|
||||||
this.solarFiltersForXML = solarFiltersForXML;
|
this.solarFiltersForXML = solarFiltersForXML;
|
||||||
logger.info("Corpus.set: ", solarFiltersForXML);
|
logger.info("Corpus.set: ", solarFiltersForXML);
|
||||||
|
@ -173,23 +151,10 @@ public class Corpus {
|
||||||
return gosOrthMode;
|
return gosOrthMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setGosOrthMode(boolean gosOrthMode) {
|
|
||||||
this.gosOrthMode = gosOrthMode;
|
|
||||||
logger.info("Corpus.set: ", gosOrthMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
public ArrayList<String> getValidationErrors() {
|
|
||||||
return validationErrors;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getValidationErrorsToString() {
|
public String getValidationErrorsToString() {
|
||||||
return StringUtils.join(validationErrors, "\n - ");
|
return StringUtils.join(validationErrors, "\n - ");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setValidationErrors(ArrayList<String> validationErrors) {
|
|
||||||
this.validationErrors = validationErrors;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean validate() {
|
public boolean validate() {
|
||||||
if (corpusType == null) {
|
if (corpusType == null) {
|
||||||
validationErrors.add(I18N.get("message.LABEL_RESULTS_CORPUS_TYPE_NOT_SET"));
|
validationErrors.add(I18N.get("message.LABEL_RESULTS_CORPUS_TYPE_NOT_SET"));
|
||||||
|
|
|
@ -1,12 +0,0 @@
|
||||||
package data.Enums;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashSet;
|
|
||||||
|
|
||||||
public class InflectedJosTypes {
|
|
||||||
public static final HashSet<Character> inflectedJosTypes = new HashSet<>();
|
|
||||||
|
|
||||||
static {
|
|
||||||
inflectedJosTypes.addAll(Arrays.asList('S', 'G', 'P'));
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,68 +0,0 @@
|
||||||
package data.Enums;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
public enum Msd {
|
|
||||||
NOUN("samostalnik", 'S', "Noun", 'N', 5),
|
|
||||||
VERB("glagol", 'G', "Verb", 'V', 7),
|
|
||||||
ADJECTIVE("pridevnik", 'P', "Adjective", 'A', 6),
|
|
||||||
ADVERB("prislov", 'R', "Adverb", 'R', 2),
|
|
||||||
PRONOUN("zaimek", 'Z', "Pronoun", 'P', 8),
|
|
||||||
NUMERAL("števnik", 'K', "Numeral", 'M', 6),
|
|
||||||
PREPOSITION("predlog", 'D', "Preposition", 'S', 1),
|
|
||||||
CONJUNCTION("veznik", 'V', "Conjunction", 'C', 1),
|
|
||||||
PARTICLE("členek", 'L', "Particle", 'Q', 0),
|
|
||||||
INTERJECTION("medmet", 'M', "Interjection", 'I', 0),
|
|
||||||
ABBREVIATION("okrajšava", 'O', "Abbreviation", 'Y', 0),
|
|
||||||
RESIDUAL("neuvrščeno", 'N', "Residual", 'X', 1);
|
|
||||||
|
|
||||||
private final String siName;
|
|
||||||
private final Character siCode;
|
|
||||||
private final String enName;
|
|
||||||
private final Character enCode;
|
|
||||||
private final Integer nOfAttributes;
|
|
||||||
|
|
||||||
private static HashMap<Character, Integer> siCodeNOfAttributes;
|
|
||||||
|
|
||||||
static {
|
|
||||||
siCodeNOfAttributes = new HashMap<>();
|
|
||||||
for (Msd msd : Msd.values()) {
|
|
||||||
siCodeNOfAttributes.put(msd.getSiCode(), msd.nOfAttributes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Msd(String siName, Character siCode, String enName, Character enCode, int nOfAttributes) {
|
|
||||||
this.siName = siName;
|
|
||||||
this.siCode = siCode;
|
|
||||||
this.enName = enName;
|
|
||||||
this.enCode = enCode;
|
|
||||||
this.nOfAttributes = nOfAttributes;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getSiName() {
|
|
||||||
return siName;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Character getSiCode() {
|
|
||||||
return siCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getEnName() {
|
|
||||||
return enName;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Character getEnCode() {
|
|
||||||
return enCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the number of attributes for the given type.
|
|
||||||
*
|
|
||||||
* @param msd
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public static int getMsdLengthForType(String msd) {
|
|
||||||
return siCodeNOfAttributes.get(msd.charAt(0)) + 1;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -27,9 +27,6 @@ public class SolarFilters {
|
||||||
SOLAR_FILTERS.put(TIP, FXCollections.observableArrayList("esej/spis", "pisni izdelek (učna ura)", "test (daljše besedilo)", "test (odgovori na vprašanja)"));
|
SOLAR_FILTERS.put(TIP, FXCollections.observableArrayList("esej/spis", "pisni izdelek (učna ura)", "test (daljše besedilo)", "test (odgovori na vprašanja)"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final ObservableList<String> N_GRAM_COMPUTE_FOR_FULL = FXCollections.observableArrayList("različnica", "lema", "oblikoskladenjska oznaka", "oblikoskladenjska lastnost", "besedna vrsta");
|
|
||||||
public static final ObservableList<String> N_GRAM_COMPUTE_FOR_LIMITED = FXCollections.observableArrayList("različnica", "lema");
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns filters with all possible values
|
* Returns filters with all possible values
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -349,7 +349,6 @@ public class Filter implements Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public Object clone() throws CloneNotSupportedException{
|
public Object clone() throws CloneNotSupportedException{
|
||||||
Filter f = null;
|
Filter f = null;
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -1,71 +0,0 @@
|
||||||
package data;
|
|
||||||
|
|
||||||
public enum GigafidaJosWordType {
|
|
||||||
SAMOSTALNIK("samostalnik", 'S'),
|
|
||||||
GLAGOL("glagol", 'G'),
|
|
||||||
PRIDEVNIK("pridevnik", 'P'),
|
|
||||||
PRISLOV("prislov", 'R'),
|
|
||||||
ZAIMEK("zaimek", 'Z'),
|
|
||||||
STEVNIK("stevnik", 'K'),
|
|
||||||
PREDLOG("predlog", 'D'),
|
|
||||||
VEZNIK("veznik", 'V'),
|
|
||||||
CLENEK("clenek", 'L'),
|
|
||||||
MEDMET("medmet", 'M'),
|
|
||||||
OKRAJSAVA("okrajsava", 'O');
|
|
||||||
|
|
||||||
|
|
||||||
private final String name;
|
|
||||||
private final char wordType;
|
|
||||||
|
|
||||||
GigafidaJosWordType(String name, char wordType) {
|
|
||||||
this.name = name;
|
|
||||||
this.wordType = wordType;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return this.name;
|
|
||||||
}
|
|
||||||
|
|
||||||
public char getWordType() {
|
|
||||||
return wordType;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static GigafidaJosWordType factory(String wType) {
|
|
||||||
if (wType != null) {
|
|
||||||
if (SAMOSTALNIK.toString().equals(wType)) {
|
|
||||||
return SAMOSTALNIK;
|
|
||||||
}
|
|
||||||
if (GLAGOL.toString().equals(wType)) {
|
|
||||||
return GLAGOL;
|
|
||||||
}
|
|
||||||
if (PRIDEVNIK.toString().equals(wType)) {
|
|
||||||
return PRIDEVNIK;
|
|
||||||
}
|
|
||||||
if (PRISLOV.toString().equals(wType)) {
|
|
||||||
return PRISLOV;
|
|
||||||
}
|
|
||||||
if (ZAIMEK.toString().equals(wType)) {
|
|
||||||
return ZAIMEK;
|
|
||||||
}
|
|
||||||
if (STEVNIK.toString().equals(wType)) {
|
|
||||||
return STEVNIK;
|
|
||||||
}
|
|
||||||
if (PREDLOG.toString().equals(wType)) {
|
|
||||||
return PREDLOG;
|
|
||||||
}
|
|
||||||
if (VEZNIK.toString().equals(wType)) {
|
|
||||||
return VEZNIK;
|
|
||||||
}
|
|
||||||
if (CLENEK.toString().equals(wType)) {
|
|
||||||
return CLENEK;
|
|
||||||
}
|
|
||||||
if (MEDMET.toString().equals(wType)) {
|
|
||||||
return MEDMET;
|
|
||||||
}
|
|
||||||
if (OKRAJSAVA.toString().equals(wType)) {
|
|
||||||
return OKRAJSAVA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,76 +0,0 @@
|
||||||
package data;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import javafx.collections.FXCollections;
|
|
||||||
import javafx.collections.ObservableList;
|
|
||||||
|
|
||||||
public enum GigafidaTaxonomy {
|
|
||||||
TISK("tisk", "T"),
|
|
||||||
KNJIZNO("knjižno", "T.K"),
|
|
||||||
LEPOSLOVNO("leposlovno", "T.K.L"),
|
|
||||||
STROKOVNO("strokovno", "T.K.S"),
|
|
||||||
PERIODICNO("periodično", "T.P"),
|
|
||||||
CASOPIS("časopis", "T.P.C"),
|
|
||||||
REVIJA("revija", "T.P.R"),
|
|
||||||
INTERNET("internet", "I");
|
|
||||||
|
|
||||||
private final String name;
|
|
||||||
private final String taxonomy;
|
|
||||||
|
|
||||||
private static final ObservableList<String> FOR_COMBO_BOX;
|
|
||||||
|
|
||||||
static {
|
|
||||||
ArrayList<String> values = Arrays.stream(GigafidaTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new));
|
|
||||||
FOR_COMBO_BOX = FXCollections.observableArrayList(values);
|
|
||||||
}
|
|
||||||
|
|
||||||
GigafidaTaxonomy(String name, String taxonomy) {
|
|
||||||
this.name = name;
|
|
||||||
this.taxonomy = taxonomy;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return this.name;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTaxonomnyString() {
|
|
||||||
return this.taxonomy;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static GigafidaTaxonomy factory(String tax) {
|
|
||||||
if (tax != null) {
|
|
||||||
if (TISK.toString().equals(tax)) {
|
|
||||||
return TISK;
|
|
||||||
}
|
|
||||||
if (KNJIZNO.toString().equals(tax)) {
|
|
||||||
return KNJIZNO;
|
|
||||||
}
|
|
||||||
if (LEPOSLOVNO.toString().equals(tax)) {
|
|
||||||
return LEPOSLOVNO;
|
|
||||||
}
|
|
||||||
if (STROKOVNO.toString().equals(tax)) {
|
|
||||||
return STROKOVNO;
|
|
||||||
}
|
|
||||||
if (PERIODICNO.toString().equals(tax)) {
|
|
||||||
return PERIODICNO;
|
|
||||||
}
|
|
||||||
if (CASOPIS.toString().equals(tax)) {
|
|
||||||
return CASOPIS;
|
|
||||||
}
|
|
||||||
if (REVIJA.toString().equals(tax)) {
|
|
||||||
return REVIJA;
|
|
||||||
}
|
|
||||||
if (INTERNET.toString().equals(tax)) {
|
|
||||||
return INTERNET;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static ObservableList<String> getForComboBox() {
|
|
||||||
return FOR_COMBO_BOX;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,85 +0,0 @@
|
||||||
package data;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import javafx.collections.FXCollections;
|
|
||||||
import javafx.collections.ObservableList;
|
|
||||||
|
|
||||||
public enum GosTaxonomy {
|
|
||||||
JAVNI("javni", "gos.T.J"),
|
|
||||||
INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "gos.T.J.I"),
|
|
||||||
RAZVEDRILNI("razvedrilni", "gos.T.J.R"),
|
|
||||||
NEJAVNI("nejavni", "gos.T.N"),
|
|
||||||
NEZASEBNI("nezasebni", "gos.T.N.N"),
|
|
||||||
ZASEBNI("zasebni", "gos.T.N.Z"),
|
|
||||||
OSEBNI_STIK("osebni stik", "gos.K.O"),
|
|
||||||
TELEFON("telefon", "gos.K.P"),
|
|
||||||
RADIO("radio", "gos.K.R"),
|
|
||||||
TELEVIZIJA("televizija", "gos.K.T");
|
|
||||||
|
|
||||||
|
|
||||||
private final String name;
|
|
||||||
private final String taxonomy;
|
|
||||||
|
|
||||||
private static final ObservableList<String> FOR_COMBO_BOX;
|
|
||||||
|
|
||||||
static {
|
|
||||||
ArrayList<String> values = Arrays.stream(GosTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new));
|
|
||||||
FOR_COMBO_BOX = FXCollections.observableArrayList(values);
|
|
||||||
}
|
|
||||||
|
|
||||||
GosTaxonomy(String name, String taxonomy) {
|
|
||||||
this.name = name;
|
|
||||||
this.taxonomy = taxonomy;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return this.name;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTaxonomnyString() {
|
|
||||||
return this.taxonomy;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static GosTaxonomy factory(String tax) {
|
|
||||||
if (tax != null) {
|
|
||||||
if (JAVNI.toString().equals(tax)) {
|
|
||||||
return JAVNI;
|
|
||||||
}
|
|
||||||
if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
|
|
||||||
return INFORMATIVNO_IZOBRAZEVALNI;
|
|
||||||
}
|
|
||||||
if (RAZVEDRILNI.toString().equals(tax)) {
|
|
||||||
return RAZVEDRILNI;
|
|
||||||
}
|
|
||||||
if (NEJAVNI.toString().equals(tax)) {
|
|
||||||
return NEJAVNI;
|
|
||||||
}
|
|
||||||
if (NEZASEBNI.toString().equals(tax)) {
|
|
||||||
return NEZASEBNI;
|
|
||||||
}
|
|
||||||
if (ZASEBNI.toString().equals(tax)) {
|
|
||||||
return ZASEBNI;
|
|
||||||
}
|
|
||||||
if (OSEBNI_STIK.toString().equals(tax)) {
|
|
||||||
return OSEBNI_STIK;
|
|
||||||
}
|
|
||||||
if (TELEFON.toString().equals(tax)) {
|
|
||||||
return TELEFON;
|
|
||||||
}
|
|
||||||
if (RADIO.toString().equals(tax)) {
|
|
||||||
return RADIO;
|
|
||||||
}
|
|
||||||
if (TELEVIZIJA.toString().equals(tax)) {
|
|
||||||
return TELEVIZIJA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static ObservableList<String> getForComboBox() {
|
|
||||||
return FOR_COMBO_BOX;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -36,15 +36,12 @@ public final class MultipleHMKeys2 implements MultipleHMKeys {
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(k1, k2);
|
return Objects.hash(k1, k2);
|
||||||
// return key.hashCode();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
return (obj instanceof MultipleHMKeys2) && ((MultipleHMKeys2) obj).k1.equals(k1)
|
return (obj instanceof MultipleHMKeys2) && ((MultipleHMKeys2) obj).k1.equals(k1)
|
||||||
&& ((MultipleHMKeys2) obj).k2.equals(k2);
|
&& ((MultipleHMKeys2) obj).k2.equals(k2);
|
||||||
|
|
||||||
// return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key.equals(key);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public MultipleHMKeys[] splitNgramTo1grams(){
|
public MultipleHMKeys[] splitNgramTo1grams(){
|
||||||
|
|
|
@ -18,22 +18,6 @@ public class Sentence {
|
||||||
this.taxonomy = taxonomy;
|
this.taxonomy = taxonomy;
|
||||||
}
|
}
|
||||||
|
|
||||||
// public Sentence(List<Word> words) {
|
|
||||||
// this.words = words;
|
|
||||||
// }
|
|
||||||
|
|
||||||
public Sentence(List<Word> words, List<Taxonomy> taxonomy, Map<String, String> properties) {
|
|
||||||
this.words = words;
|
|
||||||
this.taxonomy = taxonomy;
|
|
||||||
this.properties = properties;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Sentence(List<Word> words, List<Taxonomy> taxonomy, String type) {
|
|
||||||
this.words = words;
|
|
||||||
this.taxonomy = taxonomy;
|
|
||||||
this.type = type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Word> getWords() {
|
public List<Word> getWords() {
|
||||||
return words;
|
return words;
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,9 +8,6 @@ public class Settings {
|
||||||
public static final int CORPUS_SENTENCE_LIMIT = 50000;
|
public static final int CORPUS_SENTENCE_LIMIT = 50000;
|
||||||
public static final boolean PRINT_LOG = false;
|
public static final boolean PRINT_LOG = false;
|
||||||
|
|
||||||
public static final String FX_ACCENT_OK = "-fx-accent: forestgreen;";
|
|
||||||
public static final String FX_ACCENT_NOK = "-fx-accent: red;";
|
|
||||||
|
|
||||||
public static Collection<File> corpus;
|
public static Collection<File> corpus;
|
||||||
public static File resultsFilePath;
|
public static File resultsFilePath;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,299 +0,0 @@
|
||||||
package data;
|
|
||||||
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.time.LocalDateTime;
|
|
||||||
import java.time.format.DateTimeFormatter;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import util.Util;
|
|
||||||
import util.db.RDB;
|
|
||||||
|
|
||||||
public class Statistics {
|
|
||||||
private CorpusType corpusType;
|
|
||||||
private AnalysisLevel analysisLevel;
|
|
||||||
private boolean useDB;
|
|
||||||
private RDB db;
|
|
||||||
|
|
||||||
private boolean analysisProducedResults;
|
|
||||||
|
|
||||||
private String taxonomy;
|
|
||||||
private boolean taxonomyIsSet;
|
|
||||||
|
|
||||||
private char JOSType;
|
|
||||||
private boolean JOSTypeIsSet;
|
|
||||||
|
|
||||||
private String resultTitle;
|
|
||||||
public Map<String, AtomicLong> result = new ConcurrentHashMap<>();
|
|
||||||
|
|
||||||
// nGrams
|
|
||||||
private int nGramLevel;
|
|
||||||
private Integer skip;
|
|
||||||
private CalculateFor cf;
|
|
||||||
private List<Pattern> morphosyntacticFilter;
|
|
||||||
|
|
||||||
// distributions
|
|
||||||
private String distributionTaxonomy;
|
|
||||||
private char distributionJosWordType;
|
|
||||||
private boolean vcc;
|
|
||||||
private Integer substringLength;
|
|
||||||
|
|
||||||
// inflected JOS
|
|
||||||
private String inflectedJosTaxonomy;
|
|
||||||
|
|
||||||
// GOS
|
|
||||||
boolean gosOrthMode;
|
|
||||||
|
|
||||||
// šolar
|
|
||||||
Map<String, Object> solarHeadBlockFilter;
|
|
||||||
|
|
||||||
|
|
||||||
// for ngrams
|
|
||||||
public Statistics(AnalysisLevel al, int nGramLevel, Integer skip, CalculateFor cf) {
|
|
||||||
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
|
||||||
this.cf = cf;
|
|
||||||
this.analysisLevel = al;
|
|
||||||
this.nGramLevel = nGramLevel;
|
|
||||||
this.skip = skip == null || skip == 0 ? null : skip;
|
|
||||||
|
|
||||||
this.resultTitle = String.format("%s%d-gram_%s_%s",
|
|
||||||
this.skip != null ? String.format("%d-%s-", skip, "skip") : "",
|
|
||||||
nGramLevel,
|
|
||||||
cf.toString(),
|
|
||||||
dateTime);
|
|
||||||
}
|
|
||||||
|
|
||||||
// for words distributions
|
|
||||||
// public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) {
|
|
||||||
// String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
|
||||||
//
|
|
||||||
// this.resultTitle = String.format("%s_%s_%s",
|
|
||||||
// distributionTaxonomy != null ? distributionTaxonomy.toString() : "",
|
|
||||||
// distributionJosWordType != null ? distributionJosWordType.toString() : "",
|
|
||||||
// dateTime);
|
|
||||||
//
|
|
||||||
// this.analysisLevel = al;
|
|
||||||
// this.cf = cf;
|
|
||||||
// this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null;
|
|
||||||
// this.taxonomyIsSet = distributionTaxonomy != null;
|
|
||||||
//
|
|
||||||
// this.JOSTypeIsSet = distributionJosWordType != null;
|
|
||||||
// this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' ';
|
|
||||||
// }
|
|
||||||
|
|
||||||
public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) {
|
|
||||||
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
|
||||||
|
|
||||||
this.resultTitle = String.format("%s_%d_%s",
|
|
||||||
"Distribucija zaporedij samoglasnikov in soglasnikov",
|
|
||||||
substringLength,
|
|
||||||
dateTime);
|
|
||||||
|
|
||||||
this.analysisLevel = al;
|
|
||||||
this.cf = cf;
|
|
||||||
this.substringLength = substringLength;
|
|
||||||
this.vcc = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) {
|
|
||||||
// String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
|
||||||
//
|
|
||||||
// this.resultTitle = String.format("InflectedJOS_%s_%s",
|
|
||||||
// distributionTaxonomy != null ? distributionTaxonomy : "",
|
|
||||||
// dateTime);
|
|
||||||
//
|
|
||||||
// this.analysisLevel = al;
|
|
||||||
// this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null;
|
|
||||||
// this.taxonomyIsSet = inflectedJosTaxonomy != null;
|
|
||||||
// }
|
|
||||||
|
|
||||||
public Integer getSkip() {
|
|
||||||
return skip;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer getSubstringLength() {
|
|
||||||
return substringLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getInflectedJosTaxonomy() {
|
|
||||||
return inflectedJosTaxonomy;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSubstringLength(Integer substringLength) {
|
|
||||||
this.substringLength = substringLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isVcc() {
|
|
||||||
return vcc;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setVcc(boolean vcc) {
|
|
||||||
this.vcc = vcc;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDistributionTaxonomy() {
|
|
||||||
return distributionTaxonomy;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDistributionTaxonomy(String distributionTaxonomy) {
|
|
||||||
this.distributionTaxonomy = distributionTaxonomy;
|
|
||||||
}
|
|
||||||
|
|
||||||
public char getDistributionJosWordType() {
|
|
||||||
return distributionJosWordType;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDistributionJosWordType(char distributionJosWordType) {
|
|
||||||
this.distributionJosWordType = distributionJosWordType;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setMorphosyntacticFilter(List<String> morphosyntacticFilter) {
|
|
||||||
// change filter strings to regex patterns
|
|
||||||
this.morphosyntacticFilter = new ArrayList<>();
|
|
||||||
for (String s : morphosyntacticFilter) {
|
|
||||||
this.morphosyntacticFilter.add(Pattern.compile(s.replaceAll("\\*", ".")));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Pattern> getMsd() {
|
|
||||||
return morphosyntacticFilter;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Map<String, AtomicLong> getResult() {
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setTaxonomy(String taxonomy) {
|
|
||||||
this.taxonomy = taxonomy;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setTaxonomyIsSet(boolean taxonomyIsSet) {
|
|
||||||
this.taxonomyIsSet = taxonomyIsSet;
|
|
||||||
}
|
|
||||||
|
|
||||||
public char getJOSType() {
|
|
||||||
return JOSType;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setJOSType(char JOSType) {
|
|
||||||
this.JOSType = JOSType;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isJOSTypeSet() {
|
|
||||||
return JOSTypeIsSet;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setJOSType(boolean JOSTypeIsSet) {
|
|
||||||
this.JOSTypeIsSet = JOSTypeIsSet;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void saveResultToDisk(int... limit) throws UnsupportedEncodingException {
|
|
||||||
// Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
|
|
||||||
//
|
|
||||||
// if (useDB) {
|
|
||||||
// result = db.getDump();
|
|
||||||
// db.delete();
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // if no results and nothing to save, return false
|
|
||||||
// if (!(result.size() > 0)) {
|
|
||||||
// analysisProducedResults = false;
|
|
||||||
// return;
|
|
||||||
// } else {
|
|
||||||
// analysisProducedResults = true;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
|
|
||||||
// Export.SetToCSV(stats);
|
|
||||||
}
|
|
||||||
|
|
||||||
// private Map<String, Integer> getSortedResultInflected(Map map) {
|
|
||||||
// // first convert to <String, Integer>
|
|
||||||
// Map<String, Integer> m = Util.sortByValue(Util.atomicInt2StringAndInt(map), 0);
|
|
||||||
//
|
|
||||||
// Map<String, Integer> sortedM = new TreeMap<>();
|
|
||||||
//
|
|
||||||
// sortedM.putAll(m);
|
|
||||||
//
|
|
||||||
// return sortedM;
|
|
||||||
// }
|
|
||||||
|
|
||||||
private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {
|
|
||||||
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTaxonomy() {
|
|
||||||
return taxonomy;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isTaxonomySet() {
|
|
||||||
return taxonomyIsSet;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getnGramLevel() {
|
|
||||||
return nGramLevel;
|
|
||||||
}
|
|
||||||
|
|
||||||
public CalculateFor getCf() {
|
|
||||||
return cf;
|
|
||||||
}
|
|
||||||
|
|
||||||
public AnalysisLevel getAnalysisLevel() {
|
|
||||||
return analysisLevel;
|
|
||||||
}
|
|
||||||
|
|
||||||
public CorpusType getCorpusType() {
|
|
||||||
return corpusType;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCorpusType(CorpusType corpusType) {
|
|
||||||
this.corpusType = corpusType;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isGosOrthMode() {
|
|
||||||
return gosOrthMode;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setGosOrthMode(boolean gosOrthMode) {
|
|
||||||
this.gosOrthMode = gosOrthMode;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Map<String, Object> getSolarHeadBlockFilter() {
|
|
||||||
return solarHeadBlockFilter;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSolarHeadBlockFilter(Map<String, Object> solarHeadBlockFilter) {
|
|
||||||
this.solarHeadBlockFilter = solarHeadBlockFilter;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isUseDB() {
|
|
||||||
return useDB;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setUseDB(boolean useDB) {
|
|
||||||
if (useDB && db == null) {
|
|
||||||
db = new RDB();
|
|
||||||
}
|
|
||||||
this.useDB = useDB;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Stores results from this batch to a database and clears results map
|
|
||||||
*/
|
|
||||||
public void storeTmpResultsToDB() {
|
|
||||||
try {
|
|
||||||
db.writeBatch(result);
|
|
||||||
result = new ConcurrentHashMap<>();
|
|
||||||
} catch (UnsupportedEncodingException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isAnalysisProducedResults() {
|
|
||||||
return analysisProducedResults;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -18,7 +18,6 @@ import org.apache.commons.lang3.tuple.Pair;
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
|
|
||||||
import alg.inflectedJOS.WordFormation;
|
|
||||||
import data.Enums.WordLevelType;
|
import data.Enums.WordLevelType;
|
||||||
import javafx.collections.ObservableList;
|
import javafx.collections.ObservableList;
|
||||||
import util.Export;
|
import util.Export;
|
||||||
|
|
|
@ -166,22 +166,6 @@ public class Tax {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// ArrayList<String> taxonomyString = new ArrayList<>();
|
|
||||||
// for (Taxonomy t : taxonomyResult.keySet()){
|
|
||||||
// taxonomyString.add(t.toString());
|
|
||||||
// }
|
|
||||||
// ObservableList<String> taxonomyObservableString = Tax.getTaxonomyForComboBox(corpus.getCorpusType(), new HashSet<>(taxonomyString));
|
|
||||||
// ArrayList<String> sortedTaxonomyString = new ArrayList<>();
|
|
||||||
// for (String t : taxonomyObservableString){
|
|
||||||
// sortedTaxonomyString.add(t);
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
foundTaxHS.addAll(genFoundTax);
|
foundTaxHS.addAll(genFoundTax);
|
||||||
|
|
||||||
// assures same relative order
|
// assures same relative order
|
||||||
|
@ -198,59 +182,6 @@ public class Tax {
|
||||||
return corpusTypesWithTaxonomy;
|
return corpusTypesWithTaxonomy;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ArrayList<String> getTaxonomyCodes(ArrayList<Taxonomy> taxonomyNames, CorpusType corpusType) {
|
|
||||||
ArrayList<String> result = new ArrayList<>();
|
|
||||||
|
|
||||||
if (ValidationUtil.isEmpty(taxonomyNames)) {
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
|
|
||||||
|
|
||||||
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
|
|
||||||
tax = GIGAFIDA_TAXONOMY;
|
|
||||||
} else if (corpusType == CorpusType.GOS) {
|
|
||||||
tax = GOS_TAXONOMY;
|
|
||||||
}
|
|
||||||
|
|
||||||
// for easier lookup
|
|
||||||
Map<String, String> taxInversed = tax.entrySet()
|
|
||||||
.stream()
|
|
||||||
.collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
|
|
||||||
|
|
||||||
for (Taxonomy taxonomyName : taxonomyNames) {
|
|
||||||
result.add(taxInversed.get(taxonomyName.toString()));
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// public static ArrayList<String> getTaxonomyFormatted(ArrayList<String> taxonomyNames, CorpusType corpusType) {
|
|
||||||
// ArrayList<String> result = new ArrayList<>();
|
|
||||||
//
|
|
||||||
// if (ValidationUtil.isEmpty(taxonomyNames)) {
|
|
||||||
// return result;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// LinkedHashMap<String, String> tax = new LinkedHashMap<>();
|
|
||||||
//
|
|
||||||
// if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
|
|
||||||
// tax = GIGAFIDA_TAXONOMY;
|
|
||||||
// } else if (corpusType == CorpusType.GOS) {
|
|
||||||
// tax = GOS_TAXONOMY;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // for easier lookup
|
|
||||||
// Map<String, String> taxInversed = tax.entrySet()
|
|
||||||
// .stream()
|
|
||||||
// .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
|
|
||||||
//
|
|
||||||
// for (String taxonomyName : taxonomyNames) {
|
|
||||||
// result.add(taxInversed.get(taxonomyName) + " - " + taxonomyName);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return result;
|
|
||||||
// }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a list of proper names for codes
|
* Returns a list of proper names for codes
|
||||||
|
@ -283,13 +214,4 @@ public class Tax {
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getLongTaxonomyName(String shortName){
|
|
||||||
if (GIGAFIDA_TAXONOMY.containsKey(shortName))
|
|
||||||
return GIGAFIDA_TAXONOMY.get(shortName);
|
|
||||||
else if(GOS_TAXONOMY.containsKey(shortName))
|
|
||||||
return GOS_TAXONOMY.get(shortName);
|
|
||||||
else
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,14 +28,6 @@ enum TaxonomyEnum {
|
||||||
|
|
||||||
|
|
||||||
// Gigafida
|
// Gigafida
|
||||||
// KNJIZNO("knjižno", "T.K", "gigafida"),
|
|
||||||
// LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"),
|
|
||||||
// STROKOVNO("strokovno", "T.K.S", "gigafida"),
|
|
||||||
// PERIODICNO("periodično", "T.P", "gigafida"),
|
|
||||||
// CASOPIS("časopis", "T.P.C", "gigafida"),
|
|
||||||
// REVIJA("revija", "T.P.R", "gigafida"),
|
|
||||||
// INTERNET("internet", "I", "gigafida"),
|
|
||||||
|
|
||||||
SSJ_TISK("SSJ.T", "SSJ.T - tisk"),
|
SSJ_TISK("SSJ.T", "SSJ.T - tisk"),
|
||||||
SSJ_KNJIZNO("SSJ.T.K", " SSJ.T.K - tisk-knjižno"),
|
SSJ_KNJIZNO("SSJ.T.K", " SSJ.T.K - tisk-knjižno"),
|
||||||
SSJ_LEPOSLOVNO("SSJ.T.K.L", " SSJ.T.K.L - tisk-knjižno-leposlovno"),
|
SSJ_LEPOSLOVNO("SSJ.T.K.L", " SSJ.T.K.L - tisk-knjižno-leposlovno"),
|
||||||
|
@ -148,9 +140,6 @@ enum TaxonomyEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gigafida
|
// Gigafida
|
||||||
// if (TISK.toString().equals(tax)) {
|
|
||||||
// return TISK;
|
|
||||||
// }
|
|
||||||
if (SSJ_TISK.toString().equals(tax)) {
|
if (SSJ_TISK.toString().equals(tax)) {
|
||||||
return SSJ_TISK;
|
return SSJ_TISK;
|
||||||
}
|
}
|
||||||
|
@ -339,9 +328,6 @@ enum TaxonomyEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gigafida
|
// Gigafida
|
||||||
// if (TISK.toString().equals(tax)) {
|
|
||||||
// return TISK;
|
|
||||||
// }
|
|
||||||
if (SSJ_TISK.toLongNameString().equals(tax)) {
|
if (SSJ_TISK.toLongNameString().equals(tax)) {
|
||||||
return SSJ_TISK;
|
return SSJ_TISK;
|
||||||
}
|
}
|
||||||
|
@ -483,7 +469,6 @@ enum TaxonomyEnum {
|
||||||
public static ArrayList<TaxonomyEnum> taxonomySelected(TaxonomyEnum disjointTaxonomy) {
|
public static ArrayList<TaxonomyEnum> taxonomySelected(TaxonomyEnum disjointTaxonomy) {
|
||||||
ArrayList<TaxonomyEnum> r = new ArrayList<>();
|
ArrayList<TaxonomyEnum> r = new ArrayList<>();
|
||||||
|
|
||||||
// System.out.println(disjointTaxonomy);
|
|
||||||
if(disjointTaxonomy.equals(DISKURZ)){
|
if(disjointTaxonomy.equals(DISKURZ)){
|
||||||
r.add(DISKURZ_JAVNI);
|
r.add(DISKURZ_JAVNI);
|
||||||
r.add(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI);
|
r.add(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI);
|
||||||
|
@ -696,12 +681,8 @@ enum TaxonomyEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ArrayList<TaxonomyEnum> convertStringListToTaxonomyList(ObservableList<String> stringList, Corpus corpus){
|
public static ArrayList<TaxonomyEnum> convertStringListToTaxonomyList(ObservableList<String> stringList, Corpus corpus){
|
||||||
// System.out.println("1.");
|
|
||||||
// System.out.println(stringList);
|
|
||||||
ArrayList<TaxonomyEnum> taxonomyList = new ArrayList<>();
|
ArrayList<TaxonomyEnum> taxonomyList = new ArrayList<>();
|
||||||
|
|
||||||
// System.out.println("INTERESTING STUFF");
|
|
||||||
// System.out.println(stringList);
|
|
||||||
for (String e : stringList) {
|
for (String e : stringList) {
|
||||||
for (Taxonomy t : corpus.getTaxonomy()){
|
for (Taxonomy t : corpus.getTaxonomy()){
|
||||||
if (t.toLongNameString().equals(e)) {
|
if (t.toLongNameString().equals(e)) {
|
||||||
|
@ -709,18 +690,11 @@ enum TaxonomyEnum {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// System.out.println(taxonomyList);
|
|
||||||
// System.out.println("-----------------");
|
|
||||||
return taxonomyList;
|
return taxonomyList;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void modifyingTaxonomy(ArrayList<TaxonomyEnum> taxonomy, ArrayList<TaxonomyEnum> checkedItemsTaxonomy, Corpus corpus){
|
public static void modifyingTaxonomy(ArrayList<TaxonomyEnum> taxonomy, ArrayList<TaxonomyEnum> checkedItemsTaxonomy, Corpus corpus){
|
||||||
// get taxonomies that were selected/deselected by user
|
// get taxonomies that were selected/deselected by user
|
||||||
// System.out.println("Print here:");
|
|
||||||
// System.out.println(taxonomy);
|
|
||||||
// System.out.println(checkedItemsTaxonomy);
|
|
||||||
// System.out.println("-------------");
|
|
||||||
|
|
||||||
Set<TaxonomyEnum> disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy);
|
Set<TaxonomyEnum> disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy);
|
||||||
if (taxonomy != null) {
|
if (taxonomy != null) {
|
||||||
disjointTaxonomies.addAll(taxonomy);
|
disjointTaxonomies.addAll(taxonomy);
|
||||||
|
@ -739,7 +713,6 @@ enum TaxonomyEnum {
|
||||||
if(!TaxonomyEnum.convertStringListToTaxonomyList(corpus.getObservableListTaxonomy(), corpus).contains(s)){
|
if(!TaxonomyEnum.convertStringListToTaxonomyList(corpus.getObservableListTaxonomy(), corpus).contains(s)){
|
||||||
disjointTaxonomies.remove(s);
|
disjointTaxonomies.remove(s);
|
||||||
disArr.remove(s);
|
disArr.remove(s);
|
||||||
// taxonomy.remove(s);
|
|
||||||
i--;
|
i--;
|
||||||
}
|
}
|
||||||
i++;
|
i++;
|
||||||
|
@ -790,11 +763,6 @@ public class Taxonomy {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// public Taxonomy(String name, String longName) {
|
|
||||||
// this.name = name;
|
|
||||||
// this.longName = longName;
|
|
||||||
// }
|
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return this.name;
|
return this.name;
|
||||||
}
|
}
|
||||||
|
@ -813,7 +781,6 @@ public class Taxonomy {
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
// return new Taxonomy(tax, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Taxonomy factoryLongName(String tax, Corpus corpus) {
|
public static Taxonomy factoryLongName(String tax, Corpus corpus) {
|
||||||
|
@ -822,87 +789,6 @@ public class Taxonomy {
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
// return new Taxonomy(tax, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// public static ArrayList<Taxonomy> taxonomySelected(Taxonomy disjointTaxonomy) {
|
|
||||||
// ArrayList<TaxonomyEnum> rTaxonomyEnum = TaxonomyEnum.taxonomySelected(disjointTaxonomy.getTaxonomyEnum());
|
|
||||||
//
|
|
||||||
// ArrayList<Taxonomy> r = new ArrayList<>();
|
|
||||||
//
|
|
||||||
// for(TaxonomyEnum t : rTaxonomyEnum){
|
|
||||||
// r.add(new Taxonomy(t.toString(), false));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return r;
|
|
||||||
// }
|
|
||||||
|
|
||||||
public static ArrayList<Taxonomy> taxonomyDeselected(Taxonomy disjointTaxonomy){
|
|
||||||
// ArrayList<TaxonomyEnum> r = new ArrayList<>();
|
|
||||||
// Map<TaxonomyEnum, TaxonomyEnum> connections = new ConcurrentHashMap<>();
|
|
||||||
// connections.put(DISKURZ_JAVNI, DISKURZ);
|
|
||||||
// connections.put(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI, DISKURZ_JAVNI);
|
|
||||||
// connections.put(DISKURZ_RAZVEDRILNI, DISKURZ_JAVNI);
|
|
||||||
// connections.put(DISKURZ_NEJAVNI, DISKURZ);
|
|
||||||
// connections.put(DISKURZ_NEZASEBNI, DISKURZ_NEJAVNI);
|
|
||||||
// connections.put(DISKURZ_ZASEBNI, DISKURZ_NEJAVNI);
|
|
||||||
// connections.put(SITUACIJA_RADIO, SITUACIJA);
|
|
||||||
// connections.put(SITUACIJA_TELEVIZIJA, SITUACIJA);
|
|
||||||
// connections.put(KANAL_OSEBNI_STIK, KANAL);
|
|
||||||
// connections.put(KANAL_TELEFON, KANAL);
|
|
||||||
// connections.put(KANAL_RADIO, KANAL);
|
|
||||||
// connections.put(KANAL_TELEVIZIJA, KANAL);
|
|
||||||
//
|
|
||||||
// connections.put(SSJ_KNJIZNO, SSJ_TISK);
|
|
||||||
// connections.put(SSJ_LEPOSLOVNO, SSJ_KNJIZNO);
|
|
||||||
// connections.put(SSJ_STROKOVNO, SSJ_KNJIZNO);
|
|
||||||
// connections.put(SSJ_PERIODICNO, SSJ_TISK);
|
|
||||||
// connections.put(SSJ_CASOPIS, SSJ_PERIODICNO);
|
|
||||||
// connections.put(SSJ_REVIJA, SSJ_PERIODICNO);
|
|
||||||
// connections.put(SSJ_DRUGO, SSJ_TISK);
|
|
||||||
//
|
|
||||||
// connections.put(FT_P_GOVORNI, FT_P_PRENOSNIK);
|
|
||||||
// connections.put(FT_P_ELEKTRONSKI, FT_P_PRENOSNIK);
|
|
||||||
// connections.put(FT_P_PISNI, FT_P_PRENOSNIK);
|
|
||||||
// connections.put(FT_P_OBJAVLJENO, FT_P_PISNI);
|
|
||||||
// connections.put(FT_P_KNJIZNO, FT_P_OBJAVLJENO);
|
|
||||||
// connections.put(FT_P_PERIODICNO, FT_P_OBJAVLJENO);
|
|
||||||
// connections.put(FT_P_CASOPISNO, FT_P_OBJAVLJENO);
|
|
||||||
// connections.put(FT_P_DNEVNO, FT_P_CASOPISNO);
|
|
||||||
// connections.put(FT_P_VECKRAT_TEDENSKO, FT_P_CASOPISNO);
|
|
||||||
// connections.put(FT_P_CASOPISNO_TEDENSKO, FT_P_CASOPISNO);
|
|
||||||
// connections.put(FT_P_REVIALNO, FT_P_PERIODICNO);
|
|
||||||
// connections.put(FT_P_TEDENSKO, FT_P_REVIALNO);
|
|
||||||
// connections.put(FT_P_STIRINAJSTDNEVNO, FT_P_REVIALNO);
|
|
||||||
// connections.put(FT_P_MESECNO, FT_P_REVIALNO);
|
|
||||||
// connections.put(FT_P_REDKEJE_KOT_MESECNO, FT_P_REVIALNO);
|
|
||||||
// connections.put(FT_P_OBCASNO, FT_P_REVIALNO);
|
|
||||||
// connections.put(FT_P_NEOBJAVLJENO, FT_P_PISNI);
|
|
||||||
// connections.put(FT_P_JAVNO, FT_P_NEOBJAVLJENO);
|
|
||||||
// connections.put(FT_P_INTERNO, FT_P_NEOBJAVLJENO);
|
|
||||||
// connections.put(FT_P_ZASEBNO, FT_P_NEOBJAVLJENO);
|
|
||||||
// connections.put(FT_UMETNOSTNA, FT_ZVRST);
|
|
||||||
// connections.put(FT_PESNISKA, FT_UMETNOSTNA);
|
|
||||||
// connections.put(FT_PROZNA, FT_UMETNOSTNA);
|
|
||||||
// connections.put(FT_DRAMSKA, FT_UMETNOSTNA);
|
|
||||||
// connections.put(FT_NEUMETNOSTNA, FT_ZVRST);
|
|
||||||
// connections.put(FT_STROKOVNA, FT_NEUMETNOSTNA);
|
|
||||||
// connections.put(FT_HID, FT_STROKOVNA);
|
|
||||||
// connections.put(FT_NIT, FT_STROKOVNA);
|
|
||||||
// connections.put(FT_NESTROKOVNA, FT_NEUMETNOSTNA);
|
|
||||||
// connections.put(FT_PRAVNA, FT_NEUMETNOSTNA);
|
|
||||||
// connections.put(FT_DA, FT_LEKTORIRANO);
|
|
||||||
// connections.put(FT_NE, FT_LEKTORIRANO);
|
|
||||||
//
|
|
||||||
// TaxonomyEnum currentTaxonomy = disjointTaxonomy;
|
|
||||||
// r.add(currentTaxonomy);
|
|
||||||
// while(connections.containsKey(currentTaxonomy)){
|
|
||||||
// currentTaxonomy = connections.get(currentTaxonomy);
|
|
||||||
// r.add(currentTaxonomy);
|
|
||||||
// }
|
|
||||||
// Collections.reverse(r);
|
|
||||||
// return r;
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ArrayList<Taxonomy> convertStringListToTaxonomyList(ObservableList<String> stringList, Corpus corpus){
|
public static ArrayList<Taxonomy> convertStringListToTaxonomyList(ObservableList<String> stringList, Corpus corpus){
|
||||||
|
@ -919,7 +805,6 @@ public class Taxonomy {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ArrayList<TaxonomyEnum> taxonomyToTaxonomyEnum(ArrayList<Taxonomy> taxonomy){
|
public static ArrayList<TaxonomyEnum> taxonomyToTaxonomyEnum(ArrayList<Taxonomy> taxonomy){
|
||||||
// System.out.println(taxonomy);
|
|
||||||
if (taxonomy == null) {
|
if (taxonomy == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -934,11 +819,6 @@ public class Taxonomy {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ArrayList<Taxonomy> taxonomyEnumToTaxonomy(ArrayList<TaxonomyEnum> taxonomy, Corpus corpus){
|
public static ArrayList<Taxonomy> taxonomyEnumToTaxonomy(ArrayList<TaxonomyEnum> taxonomy, Corpus corpus){
|
||||||
// ArrayList<Taxonomy> r = new ArrayList<>();
|
|
||||||
// for (TaxonomyEnum t : taxonomy){
|
|
||||||
// r.add(new Taxonomy(t));
|
|
||||||
// }
|
|
||||||
// return r;
|
|
||||||
ArrayList<Taxonomy> r = new ArrayList<>();
|
ArrayList<Taxonomy> r = new ArrayList<>();
|
||||||
for (TaxonomyEnum te : taxonomy){
|
for (TaxonomyEnum te : taxonomy){
|
||||||
for (Taxonomy t : corpus.getTaxonomy()){
|
for (Taxonomy t : corpus.getTaxonomy()){
|
||||||
|
|
|
@ -15,7 +15,6 @@ import javafx.collections.ObservableList;
|
||||||
import javafx.concurrent.Task;
|
import javafx.concurrent.Task;
|
||||||
import javafx.fxml.FXML;
|
import javafx.fxml.FXML;
|
||||||
import javafx.scene.control.*;
|
import javafx.scene.control.*;
|
||||||
//import javafx.scene.image.Image;
|
|
||||||
import javafx.scene.image.ImageView;
|
import javafx.scene.image.ImageView;
|
||||||
import javafx.scene.layout.AnchorPane;
|
import javafx.scene.layout.AnchorPane;
|
||||||
import javafx.scene.layout.Pane;
|
import javafx.scene.layout.Pane;
|
||||||
|
@ -29,7 +28,6 @@ import java.io.UnsupportedEncodingException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import static alg.XML_processing.readXML;
|
|
||||||
import static gui.GUIController.showAlert;
|
import static gui.GUIController.showAlert;
|
||||||
|
|
||||||
@SuppressWarnings("Duplicates")
|
@SuppressWarnings("Duplicates")
|
||||||
|
@ -129,20 +127,10 @@ public class CharacterAnalysisTab {
|
||||||
private ComboBox<String> taxonomySetOperationCB;
|
private ComboBox<String> taxonomySetOperationCB;
|
||||||
private String taxonomySetOperation;
|
private String taxonomySetOperation;
|
||||||
|
|
||||||
// @FXML
|
|
||||||
// private ToggleGroup calculateForRB;
|
|
||||||
// private CalculateFor calculateFor;
|
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private ComboBox<String> calculateForCB;
|
private ComboBox<String> calculateForCB;
|
||||||
private CalculateFor calculateFor;
|
private CalculateFor calculateFor;
|
||||||
|
|
||||||
@FXML
|
|
||||||
private RadioButton lemmaRB;
|
|
||||||
|
|
||||||
@FXML
|
|
||||||
private RadioButton varietyRB;
|
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private Pane paneLetters;
|
private Pane paneLetters;
|
||||||
|
|
||||||
|
@ -171,13 +159,12 @@ public class CharacterAnalysisTab {
|
||||||
|
|
||||||
private Corpus corpus;
|
private Corpus corpus;
|
||||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||||
private Filter filter;
|
|
||||||
private boolean useDb;
|
|
||||||
private HostServices hostService;
|
private HostServices hostService;
|
||||||
private ListChangeListener<String> taxonomyListener;
|
private ListChangeListener<String> taxonomyListener;
|
||||||
private ChangeListener<Boolean> msdListener;
|
private ChangeListener<Boolean> msdListener;
|
||||||
private ChangeListener<Boolean> minimalOccurrencesListener;
|
private ChangeListener<Boolean> minimalOccurrencesListener;
|
||||||
private ChangeListener<Boolean> minimalTaxonomyListener;
|
private ChangeListener<Boolean> minimalTaxonomyListener;
|
||||||
|
private boolean useDb;
|
||||||
|
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
|
private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY));
|
||||||
|
@ -185,14 +172,8 @@ public class CharacterAnalysisTab {
|
||||||
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
|
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
|
||||||
private static final ArrayList<String> TAXONOMY_SET_OPERATION = new ArrayList<>(Arrays.asList(TAXONOMY_SET_OPERATION_ARRAY));
|
private static final ArrayList<String> TAXONOMY_SET_OPERATION = new ArrayList<>(Arrays.asList(TAXONOMY_SET_OPERATION_ARRAY));
|
||||||
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("različnica", "lema");
|
|
||||||
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
|
||||||
|
|
||||||
|
|
||||||
// TODO: pass observables for taxonomy based on header scan
|
// TODO: pass observables for taxonomy based on header scan
|
||||||
// after header scan
|
// after header scan
|
||||||
private ObservableList<String> taxonomyCCBValues;
|
|
||||||
private CorpusType currentCorpusType;
|
|
||||||
|
|
||||||
public void init() {
|
public void init() {
|
||||||
characterAnalysisTab.getStylesheets().add("style.css");
|
characterAnalysisTab.getStylesheets().add("style.css");
|
||||||
|
@ -203,24 +184,11 @@ public class CharacterAnalysisTab {
|
||||||
currentMode = MODE.LETTER;
|
currentMode = MODE.LETTER;
|
||||||
toggleMode(currentMode);
|
toggleMode(currentMode);
|
||||||
|
|
||||||
// calculateForRB.selectedToggleProperty().addListener(new ChangeListener<Toggle>() {
|
|
||||||
// @Override
|
|
||||||
// public void changed(ObservableValue<? extends Toggle> observable, Toggle oldValue, Toggle newValue) {
|
|
||||||
// //logger.info("calculateForRB:", newValue.toString());
|
|
||||||
// RadioButton chk = (RadioButton)newValue.getToggleGroup().getSelectedToggle(); // Cast object to radio button
|
|
||||||
// calculateFor = CalculateFor.factory(chk.getText());
|
|
||||||
// logger.info("calculateForRB:", chk.getText());
|
|
||||||
// //System.out.println("Selected Radio Button - "+chk.getText());
|
|
||||||
// }
|
|
||||||
// });
|
|
||||||
|
|
||||||
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
if(newValue == null){
|
if(newValue == null){
|
||||||
newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
|
newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
|
||||||
calculateForCB.getSelectionModel().select(newValue);
|
calculateForCB.getSelectionModel().select(newValue);
|
||||||
}
|
}
|
||||||
// System.out.println(oldValue);
|
|
||||||
// System.out.println(newValue);
|
|
||||||
calculateFor = CalculateFor.factory(newValue);
|
calculateFor = CalculateFor.factory(newValue);
|
||||||
logger.info("calculateForCB:", calculateFor.toString());
|
logger.info("calculateForCB:", calculateFor.toString());
|
||||||
});
|
});
|
||||||
|
@ -299,7 +267,6 @@ public class CharacterAnalysisTab {
|
||||||
public void onChanged(Change<? extends String> c){
|
public void onChanged(Change<? extends String> c){
|
||||||
if(changing) {
|
if(changing) {
|
||||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||||
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
|
|
||||||
|
|
||||||
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
|
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
|
||||||
|
|
||||||
|
@ -309,7 +276,6 @@ public class CharacterAnalysisTab {
|
||||||
taxonomyCCB.getItems().removeAll();
|
taxonomyCCB.getItems().removeAll();
|
||||||
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
||||||
|
|
||||||
// taxonomyCCB.getCheckModel().clearChecks();
|
|
||||||
changing = false;
|
changing = false;
|
||||||
taxonomyCCB.getCheckModel().clearChecks();
|
taxonomyCCB.getCheckModel().clearChecks();
|
||||||
for (Taxonomy t : checkedItemsTaxonomy) {
|
for (Taxonomy t : checkedItemsTaxonomy) {
|
||||||
|
@ -468,97 +434,6 @@ public class CharacterAnalysisTab {
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* case a: values for combo boxes can change after a corpus change
|
|
||||||
* <ul>
|
|
||||||
* <li>different corpus type - reset all fields so no old values remain</li>
|
|
||||||
* <li>same corpus type, different subset - keep</li>
|
|
||||||
* </ul>
|
|
||||||
* <p>
|
|
||||||
* case b: values for combo boxes can change after a header scan
|
|
||||||
* <ul>
|
|
||||||
* <li>at first, fields are populated by corpus type defaults</li>
|
|
||||||
* <li>after, with gathered data</li>
|
|
||||||
* </ul>
|
|
||||||
* <p></p>
|
|
||||||
* ngrams: 1
|
|
||||||
* calculateFor: word
|
|
||||||
* msd:
|
|
||||||
* taxonomy:
|
|
||||||
* skip: 0
|
|
||||||
* iscvv: false
|
|
||||||
* string length: 1
|
|
||||||
*/
|
|
||||||
// public void populateFields() {
|
|
||||||
// // corpus changed if: current one is null (this is first run of the app)
|
|
||||||
// // or if currentCorpus != gui's corpus
|
|
||||||
// boolean corpusChanged = currentCorpusType == null
|
|
||||||
// || currentCorpusType != corpus.getCorpusType();
|
|
||||||
//
|
|
||||||
// // TODO: check for GOS, GIGAFIDA, SOLAR...
|
|
||||||
// // refresh and:
|
|
||||||
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
|
||||||
//// if (calculateFor == null) {
|
|
||||||
//// calculateForRB.selectToggle(lemmaRB);
|
|
||||||
//// calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString());
|
|
||||||
//// }
|
|
||||||
//
|
|
||||||
// if (!filter.hasMsd()) {
|
|
||||||
// // if current corpus doesn't have msd data, disable this field
|
|
||||||
// msd = new ArrayList<>();
|
|
||||||
// msdTF.setText("");
|
|
||||||
// msdTF.setDisable(true);
|
|
||||||
// logger.info("no msd data");
|
|
||||||
// } else {
|
|
||||||
// if (ValidationUtil.isEmpty(msd)
|
|
||||||
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
|
||||||
// // msd has not been set previously
|
|
||||||
// // or msd has been set but the corpus changed -> reset
|
|
||||||
// msd = new ArrayList<>();
|
|
||||||
// msdTF.setText("");
|
|
||||||
// msdTF.setDisable(false);
|
|
||||||
// logger.info("msd reset");
|
|
||||||
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
|
||||||
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
|
|
||||||
// msdTF.setText(StringUtils.join(msdStrings, " "));
|
|
||||||
// msdTF.setDisable(false);
|
|
||||||
// logger.info("msd kept");
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
|
|
||||||
//
|
|
||||||
// // keep calculateCvv
|
|
||||||
// calculatecvvCB.setSelected(calculateCvv);
|
|
||||||
//
|
|
||||||
// // keep string length if set
|
|
||||||
// if (stringLength != null) {
|
|
||||||
// stringLengthTF.setText(String.valueOf(stringLength));
|
|
||||||
// } else {
|
|
||||||
// stringLengthTF.setText("1");
|
|
||||||
// stringLength = 1;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // TODO: trigger on rescan
|
|
||||||
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
|
||||||
// // user changed corpus (by type) or by selection & triggered a rescan of headers
|
|
||||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
|
||||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
|
||||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
|
||||||
//
|
|
||||||
// currentCorpusType = corpus.getCorpusType();
|
|
||||||
// // setTaxonomyIsDirty(false);
|
|
||||||
// } else {
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
|
||||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
|
||||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
|
||||||
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
|
|
||||||
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
|
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
|
||||||
Tooltip tooltip = new Tooltip();
|
Tooltip tooltip = new Tooltip();
|
||||||
tooltip.textProperty().bind(stringBinding);
|
tooltip.textProperty().bind(stringBinding);
|
||||||
|
@ -719,7 +594,6 @@ public class CharacterAnalysisTab {
|
||||||
cancel.setVisible(true);
|
cancel.setVisible(true);
|
||||||
}
|
}
|
||||||
int i = 0;
|
int i = 0;
|
||||||
// DateFormat df = new SimpleDateFormat("hh:mm:ss");
|
|
||||||
Date startTime = new Date();
|
Date startTime = new Date();
|
||||||
Date previousTime = new Date();
|
Date previousTime = new Date();
|
||||||
int remainingSeconds = -1;
|
int remainingSeconds = -1;
|
||||||
|
@ -759,23 +633,16 @@ public class CharacterAnalysisTab {
|
||||||
xml_processing.isCancelled = isCancelled();
|
xml_processing.isCancelled = isCancelled();
|
||||||
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
|
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
|
||||||
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
|
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
|
||||||
// updateProgress((iFinal * 100) + (double) observable, corpusFiles.size() * 100);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
|
|
||||||
|
|
||||||
|
|
||||||
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
||||||
|
|
||||||
// xml_processing.progressProperty().addListener((obs, oldProgress, newProgress) ->
|
|
||||||
// updateProgress((iFinal * 100) + newProgress.doubleValue(), corpusFiles.size() * 100));
|
|
||||||
}
|
}
|
||||||
xml_processing.readXML(f.toString(), statistic);
|
xml_processing.readXML(f.toString(), statistic);
|
||||||
if (isCancelled()) {
|
if (isCancelled()) {
|
||||||
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// readXML(f.toString(), statistic, this, corpusFiles.size(), startTime, previousTime, i);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
@ -799,7 +666,6 @@ public class CharacterAnalysisTab {
|
||||||
}
|
}
|
||||||
|
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
|
@ -810,7 +676,6 @@ public class CharacterAnalysisTab {
|
||||||
logger.error("Error while executing", e);
|
logger.error("Error while executing", e);
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
ngramProgressBar.setProgress(0.0);
|
ngramProgressBar.setProgress(0.0);
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
|
@ -820,7 +685,6 @@ public class CharacterAnalysisTab {
|
||||||
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
ngramProgressBar.setProgress(0.0);
|
ngramProgressBar.setProgress(0.0);
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
|
|
|
@ -2,10 +2,8 @@ package gui;
|
||||||
|
|
||||||
import static data.CorpusType.*;
|
import static data.CorpusType.*;
|
||||||
import static gui.GUIController.*;
|
import static gui.GUIController.*;
|
||||||
import static gui.Messages.*;
|
|
||||||
import static util.Util.*;
|
import static util.Util.*;
|
||||||
|
|
||||||
import java.awt.*;
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.reflect.Constructor;
|
import java.lang.reflect.Constructor;
|
||||||
|
@ -53,16 +51,11 @@ public class CorpusTab {
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private Button chooseCorpusLocationB;
|
private Button chooseCorpusLocationB;
|
||||||
private File chosenCorpusLocation;
|
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private CheckBox readHeaderInfoChB;
|
private CheckBox readHeaderInfoChB;
|
||||||
private boolean readHeaderInfo;
|
private boolean readHeaderInfo;
|
||||||
|
|
||||||
// @FXML
|
|
||||||
// private CheckBox gosUseOrthChB;
|
|
||||||
// private boolean gosUseOrth;
|
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private Button chooseResultsLocationB;
|
private Button chooseResultsLocationB;
|
||||||
|
|
||||||
|
@ -213,11 +206,8 @@ public class CorpusTab {
|
||||||
|
|
||||||
selectReaderCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
selectReaderCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
if(newValue == null){
|
if(newValue == null){
|
||||||
// newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
|
|
||||||
selectReaderCB.getSelectionModel().select(newValue);
|
selectReaderCB.getSelectionModel().select(newValue);
|
||||||
}
|
}
|
||||||
// System.out.println(oldValue);
|
|
||||||
// System.out.println(newValue);
|
|
||||||
selectReader = newValue;
|
selectReader = newValue;
|
||||||
selectReader();
|
selectReader();
|
||||||
if(corpus != null && corpus.getCorpusType() != null) {
|
if(corpus != null && corpus.getCorpusType() != null) {
|
||||||
|
@ -236,12 +226,9 @@ public class CorpusTab {
|
||||||
// comma / point choice
|
// comma / point choice
|
||||||
punctuationCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
punctuationCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
if(newValue == null){
|
if(newValue == null){
|
||||||
// newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
|
|
||||||
newValue = I18N.getTranslatedValue(oldValue, PUNCTUATION);
|
newValue = I18N.getTranslatedValue(oldValue, PUNCTUATION);
|
||||||
punctuationCB.getSelectionModel().select(newValue);
|
punctuationCB.getSelectionModel().select(newValue);
|
||||||
}
|
}
|
||||||
// System.out.println(oldValue);
|
|
||||||
// System.out.println(newValue);
|
|
||||||
punctuation = newValue;
|
punctuation = newValue;
|
||||||
if(corpus != null) {
|
if(corpus != null) {
|
||||||
corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
|
corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
|
||||||
|
@ -252,7 +239,6 @@ public class CorpusTab {
|
||||||
|
|
||||||
// add listeners
|
// add listeners
|
||||||
chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation());
|
chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation());
|
||||||
// chooseCorpusLocationB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_chooseCorpusLocationB")));
|
|
||||||
helpH.setOnAction(e -> openHelpWebsite());
|
helpH.setOnAction(e -> openHelpWebsite());
|
||||||
|
|
||||||
readHeaderInfoChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
readHeaderInfoChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
|
@ -262,18 +248,6 @@ public class CorpusTab {
|
||||||
}
|
}
|
||||||
logger.info("read headers: ", readHeaderInfo);
|
logger.info("read headers: ", readHeaderInfo);
|
||||||
});
|
});
|
||||||
// readHeaderInfoChB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_readHeaderInfoChB")));
|
|
||||||
|
|
||||||
// gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
|
||||||
// gosUseOrth = newValue;
|
|
||||||
// corpus.setGosOrthMode(gosUseOrth);
|
|
||||||
//// wordFormationTab.setDisable(gosUseOrth);
|
|
||||||
// satNew2Controller.toggleMode(null);
|
|
||||||
// oneWordTabController.toggleMode(null);
|
|
||||||
// catController.toggleMode(null);
|
|
||||||
//
|
|
||||||
// logger.info("gosUseOrth: ", gosUseOrth);
|
|
||||||
// });
|
|
||||||
|
|
||||||
chooseResultsLocationB.setOnAction(e -> chooseResultsLocation(null));
|
chooseResultsLocationB.setOnAction(e -> chooseResultsLocation(null));
|
||||||
|
|
||||||
|
@ -284,31 +258,12 @@ public class CorpusTab {
|
||||||
I18N.setLocale(new Locale.Builder().setLanguage("sl").setRegion("SI").build());
|
I18N.setLocale(new Locale.Builder().setLanguage("sl").setRegion("SI").build());
|
||||||
}
|
}
|
||||||
Messages.reload();
|
Messages.reload();
|
||||||
|
|
||||||
// StringBuilder sb = new StringBuilder();
|
|
||||||
// sb.append(corpusLocation)
|
|
||||||
// .append("\n")
|
|
||||||
// .append(String.format(I18N.get("message.NOTIFICATION_FOUND_X_FILES"), corpusFilesSize))
|
|
||||||
// .append("\n")
|
|
||||||
// .append(String.format(I18N.get("message.NOTIFICATION_CORPUS"), corpusType.toString()));
|
|
||||||
//
|
|
||||||
// chooseCorpusLabelContent = sb.toString();
|
|
||||||
// chooseCorpusL.textProperty().unbind();
|
|
||||||
// chooseCorpusL.setText(chooseCorpusLabelContent);
|
|
||||||
Messages.updateChooseCorpusL();
|
Messages.updateChooseCorpusL();
|
||||||
|
|
||||||
logger.info("change language");
|
logger.info("change language");
|
||||||
});
|
});
|
||||||
|
|
||||||
// set labels and toggle visibility
|
// set labels and toggle visibility
|
||||||
// toggleGosChBVisibility();
|
|
||||||
|
|
||||||
// chooseCorpusLabelContent = Messages.LABEL_CORPUS_LOCATION_NOT_SET;
|
|
||||||
// chooseCorpusL.setText(chooseCorpusLabelContent);
|
|
||||||
//
|
|
||||||
// chooseResultsLabelContent = Messages.LABEL_RESULTS_LOCATION_NOT_SET;
|
|
||||||
// chooseResultsL.setText(chooseResultsLabelContent);
|
|
||||||
|
|
||||||
togglePiAndSetCorpusWrapper(false);
|
togglePiAndSetCorpusWrapper(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -391,11 +346,6 @@ public class CorpusTab {
|
||||||
corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("vert", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
|
corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("vert", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
|
||||||
Collection<File> corpusFilesRegi = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("regi", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
|
Collection<File> corpusFilesRegi = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("regi", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
|
||||||
|
|
||||||
|
|
||||||
// if (!checkRegiFile(corpusFilesRegi)){
|
|
||||||
// return;
|
|
||||||
// }
|
|
||||||
|
|
||||||
if (corpusFiles.size() == 0){
|
if (corpusFiles.size() == 0){
|
||||||
logger.info("alert: ", I18N.get("message.WARNING_CORPUS_NOT_FOUND"));
|
logger.info("alert: ", I18N.get("message.WARNING_CORPUS_NOT_FOUND"));
|
||||||
showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_CORPUS_NOT_FOUND"), null);
|
showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_CORPUS_NOT_FOUND"), null);
|
||||||
|
@ -405,7 +355,6 @@ public class CorpusTab {
|
||||||
corpusLocation = selectedDirectory.getAbsolutePath();
|
corpusLocation = selectedDirectory.getAbsolutePath();
|
||||||
corpusFilesSize = String.valueOf(corpusFiles.size());
|
corpusFilesSize = String.valueOf(corpusFiles.size());
|
||||||
Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
|
Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
|
||||||
// corpusType = VERT;
|
|
||||||
|
|
||||||
corpus.setCorpusType(corpusType);
|
corpus.setCorpusType(corpusType);
|
||||||
|
|
||||||
|
@ -446,12 +395,10 @@ public class CorpusTab {
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// System.out.println(corpusLocation);
|
|
||||||
corpusLocation = selectedDirectory.getAbsolutePath();
|
corpusLocation = selectedDirectory.getAbsolutePath();
|
||||||
corpusFilesSize = String.valueOf(corpusFiles.size());
|
corpusFilesSize = String.valueOf(corpusFiles.size());
|
||||||
Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
|
Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
|
||||||
|
|
||||||
// String chooseCorpusLabelContentTmp = detectCorpusType(corpusFiles);
|
|
||||||
selectReader();
|
selectReader();
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append(corpusLocation)
|
sb.append(corpusLocation)
|
||||||
|
@ -491,7 +438,6 @@ public class CorpusTab {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// System.out.println(outputName);
|
|
||||||
corpus.setCorpusName(outputName);
|
corpus.setCorpusName(outputName);
|
||||||
corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
|
corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
|
||||||
}
|
}
|
||||||
|
@ -534,7 +480,6 @@ public class CorpusTab {
|
||||||
private void setResults() {
|
private void setResults() {
|
||||||
// if everything is ok
|
// if everything is ok
|
||||||
// check and enable checkbox if GOS
|
// check and enable checkbox if GOS
|
||||||
// toggleGosChBVisibility();
|
|
||||||
|
|
||||||
// set default results location
|
// set default results location
|
||||||
String defaultResultsLocationPath = corpus.getChosenCorpusLocation().getAbsolutePath();
|
String defaultResultsLocationPath = corpus.getChosenCorpusLocation().getAbsolutePath();
|
||||||
|
@ -543,28 +488,6 @@ public class CorpusTab {
|
||||||
Messages.setChooseCorpusL(chooseCorpusL, chooseCorpusLabelContent);
|
Messages.setChooseCorpusL(chooseCorpusL, chooseCorpusLabelContent);
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean checkRegiFile(Collection<File> corpusFiles) {
|
|
||||||
// CorpusType corpusType = corpus.getCorpusType();
|
|
||||||
// Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
|
|
||||||
|
|
||||||
|
|
||||||
for (File file : corpusFiles) {
|
|
||||||
// try to open .regi file
|
|
||||||
String regiPath = file.getAbsolutePath().substring(0, file.getAbsolutePath().length() - 4) + "regi";
|
|
||||||
LineIterator regiIt;
|
|
||||||
try {
|
|
||||||
// read regi file
|
|
||||||
regiIt = FileUtils.lineIterator(new File(regiPath), "UTF-8");
|
|
||||||
LineIterator.closeQuietly(regiIt);
|
|
||||||
} catch (IOException e) {
|
|
||||||
GUIController.showAlert(Alert.AlertType.ERROR, String.format(I18N.get("message.ERROR_NO_REGI_FILE_FOUND"), regiPath));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private void readHeaderInfo() {
|
private void readHeaderInfo() {
|
||||||
CorpusType corpusType = corpus.getCorpusType();
|
CorpusType corpusType = corpus.getCorpusType();
|
||||||
Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
|
Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
|
||||||
|
@ -592,8 +515,6 @@ public class CorpusTab {
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
if (corpusIsSplit) {
|
if (corpusIsSplit) {
|
||||||
// System.out.println(i);
|
|
||||||
// System.out.println(corpusFiles.size());
|
|
||||||
updateProgress(i, corpusFiles.size());
|
updateProgress(i, corpusFiles.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -615,10 +536,7 @@ public class CorpusTab {
|
||||||
characterLevelTab.setDisable(true);
|
characterLevelTab.setDisable(true);
|
||||||
wordLevelTab.setDisable(true);
|
wordLevelTab.setDisable(true);
|
||||||
filterTab.setDisable(true);
|
filterTab.setDisable(true);
|
||||||
// Messages.reload();
|
|
||||||
Messages.updateChooseCorpusL();
|
Messages.updateChooseCorpusL();
|
||||||
// chooseCorpusL.textProperty().bind(I18N.createStringBinding("message.LABEL_CORPUS_LOCATION_NOT_SET"));
|
|
||||||
// chooseResultsL.textProperty().bind(I18N.createStringBinding("message.LABEL_RESULTS_LOCATION_NOT_SET"));
|
|
||||||
|
|
||||||
logger.info("No taxonomy found in headers.");
|
logger.info("No taxonomy found in headers.");
|
||||||
GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_TAXONOMY_FOUND"));
|
GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_TAXONOMY_FOUND"));
|
||||||
|
@ -749,12 +667,6 @@ public class CorpusTab {
|
||||||
|
|
||||||
task.setOnSucceeded(e -> {
|
task.setOnSucceeded(e -> {
|
||||||
ObservableList<String> readTaxonomy = Tax.getTaxonomyForComboBox(corpusType, task.getValue());
|
ObservableList<String> readTaxonomy = Tax.getTaxonomyForComboBox(corpusType, task.getValue());
|
||||||
|
|
||||||
// if (ValidationUtil.isEmpty(readTaxonomy)) {
|
|
||||||
// // if no taxonomy found alert the user and keep other tabs disabled
|
|
||||||
// logger.info("No vert filters found in headers.");
|
|
||||||
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_SOLAR_FILTERS_FOUND"));
|
|
||||||
// } else {
|
|
||||||
// set taxonomy, update label
|
// set taxonomy, update label
|
||||||
corpus.setTaxonomy(readTaxonomy);
|
corpus.setTaxonomy(readTaxonomy);
|
||||||
corpus.setHeaderRead(true);
|
corpus.setHeaderRead(true);
|
||||||
|
@ -790,10 +702,7 @@ public class CorpusTab {
|
||||||
characterLevelTab.setDisable(false);
|
characterLevelTab.setDisable(false);
|
||||||
catController.setCorpus(corpus);
|
catController.setCorpus(corpus);
|
||||||
catController.init();
|
catController.init();
|
||||||
//wordFormationTab.setDisable(false);
|
|
||||||
wordLevelTab.setDisable(false);
|
wordLevelTab.setDisable(false);
|
||||||
//wfController.setCorpus(corpus);
|
|
||||||
//wfController.init();
|
|
||||||
wlController.setCorpus(corpus);
|
wlController.setCorpus(corpus);
|
||||||
wlController.init();
|
wlController.init();
|
||||||
|
|
||||||
|
@ -824,13 +733,6 @@ public class CorpusTab {
|
||||||
return directoryChooser.showDialog(stage);
|
return directoryChooser.showDialog(stage);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Hides GOS related checkbox until needed.
|
|
||||||
*/
|
|
||||||
// private void toggleGosChBVisibility() {
|
|
||||||
// gosUseOrthChB.setVisible(corpus != null && corpus.getCorpusType() != null && corpus.getCorpusType() == CorpusType.GOS);
|
|
||||||
// }
|
|
||||||
|
|
||||||
private void selectReader() {
|
private void selectReader() {
|
||||||
switch (selectReader) {
|
switch (selectReader) {
|
||||||
// "vert", "Solar", "GOS", "SSJ500K", "Gigafida", "Gigafida (old)", "Kres (old)"
|
// "vert", "Solar", "GOS", "SSJ500K", "Gigafida", "Gigafida (old)", "Kres (old)"
|
||||||
|
@ -940,18 +842,10 @@ public class CorpusTab {
|
||||||
|
|
||||||
public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
|
public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
|
||||||
|
|
||||||
/*public void setWfController(WordFormationTab wfController) {
|
|
||||||
this.wfController = wfController;
|
|
||||||
}*/
|
|
||||||
|
|
||||||
public void setWlController(WordLevelTab wlController) {
|
public void setWlController(WordLevelTab wlController) {
|
||||||
this.wlController = wlController;
|
this.wlController = wlController;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setWordFormationTab(Tab wordFormationTab) {
|
|
||||||
this.wordFormationTab = wordFormationTab;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setHostServices(HostServices hostServices){
|
public void setHostServices(HostServices hostServices){
|
||||||
this.hostService = hostServices;
|
this.hostService = hostServices;
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,21 +52,17 @@ public class FiltersForSolar {
|
||||||
@FXML
|
@FXML
|
||||||
public Label solarFilters;
|
public Label solarFilters;
|
||||||
@FXML
|
@FXML
|
||||||
public Label selectedFiltersL;
|
|
||||||
@FXML
|
|
||||||
public TextArea selectedFiltersTextArea;
|
public TextArea selectedFiltersTextArea;
|
||||||
@FXML
|
@FXML
|
||||||
private Button changeLanguageB;
|
private Button changeLanguageB;
|
||||||
@FXML
|
@FXML
|
||||||
private Hyperlink helpH;
|
private Hyperlink helpH;
|
||||||
|
|
||||||
// private HashMap<String, ObservableList<String>> selectedFilters;
|
|
||||||
private Corpus corpus;
|
private Corpus corpus;
|
||||||
|
|
||||||
private StringAnalysisTabNew2 satNew2Controller;
|
private StringAnalysisTabNew2 satNew2Controller;
|
||||||
private OneWordAnalysisTab oneWordTabController;
|
private OneWordAnalysisTab oneWordTabController;
|
||||||
private CharacterAnalysisTab catController;
|
private CharacterAnalysisTab catController;
|
||||||
//private WordFormationTab wfController;
|
|
||||||
private WordLevelTab wlController;
|
private WordLevelTab wlController;
|
||||||
private HostServices hostService;
|
private HostServices hostService;
|
||||||
|
|
||||||
|
@ -172,7 +168,6 @@ public class FiltersForSolar {
|
||||||
ArrayList<String> values = new ArrayList<>(entry.getValue());
|
ArrayList<String> values = new ArrayList<>(entry.getValue());
|
||||||
|
|
||||||
if (!values.isEmpty()) {
|
if (!values.isEmpty()) {
|
||||||
// allFilters.append(entry.getKey())
|
|
||||||
allFilters.append(I18N.get(entry.getKey() + "L"))
|
allFilters.append(I18N.get(entry.getKey() + "L"))
|
||||||
.append(": ");
|
.append(": ");
|
||||||
|
|
||||||
|
@ -202,7 +197,6 @@ public class FiltersForSolar {
|
||||||
satNew2Controller.setSolarFiltersMap(solarFiltersMap);
|
satNew2Controller.setSolarFiltersMap(solarFiltersMap);
|
||||||
oneWordTabController.setSolarFiltersMap(solarFiltersMap);
|
oneWordTabController.setSolarFiltersMap(solarFiltersMap);
|
||||||
catController.setSolarFiltersMap(solarFiltersMap);
|
catController.setSolarFiltersMap(solarFiltersMap);
|
||||||
//wfController.setSolarFiltersMap(solarFiltersMap);
|
|
||||||
wlController.setSolarFiltersMap(solarFiltersMap);
|
wlController.setSolarFiltersMap(solarFiltersMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -215,7 +209,6 @@ public class FiltersForSolar {
|
||||||
satNew2Controller.setSelectedFiltersLabel(content);
|
satNew2Controller.setSelectedFiltersLabel(content);
|
||||||
oneWordTabController.setSelectedFiltersLabel(content);
|
oneWordTabController.setSelectedFiltersLabel(content);
|
||||||
catController.setSelectedFiltersLabel(content);
|
catController.setSelectedFiltersLabel(content);
|
||||||
//wfController.setSelectedFiltersLabel(content);
|
|
||||||
wlController.setSelectedFiltersLabel(content);
|
wlController.setSelectedFiltersLabel(content);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -230,10 +223,6 @@ public class FiltersForSolar {
|
||||||
|
|
||||||
public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
|
public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
|
||||||
|
|
||||||
/*public void setWfController(WordFormationTab wfController) {
|
|
||||||
this.wfController = wfController;
|
|
||||||
}*/
|
|
||||||
|
|
||||||
public void setWlController(WordLevelTab wlController) {
|
public void setWlController(WordLevelTab wlController) {
|
||||||
this.wlController = wlController;
|
this.wlController = wlController;
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,29 +49,11 @@ public class GUIController extends Application {
|
||||||
@FXML
|
@FXML
|
||||||
private CorpusTab ctController;
|
private CorpusTab ctController;
|
||||||
@FXML
|
@FXML
|
||||||
private Parent ct;
|
|
||||||
//@FXML
|
|
||||||
//private WordFormationTab wfController;
|
|
||||||
@FXML
|
|
||||||
private Parent wf;
|
|
||||||
@FXML
|
|
||||||
private WordLevelTab wlController;
|
private WordLevelTab wlController;
|
||||||
@FXML
|
@FXML
|
||||||
private Parent wl;
|
|
||||||
@FXML
|
|
||||||
private FiltersForSolar ffsController;
|
private FiltersForSolar ffsController;
|
||||||
@FXML
|
@FXML
|
||||||
private Parent ffs;
|
|
||||||
@FXML
|
|
||||||
private SelectedFiltersPane sfpController;
|
|
||||||
@FXML
|
|
||||||
private Parent sfp;
|
|
||||||
@FXML
|
|
||||||
public Tab stringLevelTab;
|
|
||||||
@FXML
|
|
||||||
public Tab wordLevelTab;
|
public Tab wordLevelTab;
|
||||||
/*@FXML
|
|
||||||
public Tab wordFormationTab;*/
|
|
||||||
|
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
|
@ -83,28 +65,9 @@ public class GUIController extends Application {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void start(Stage primaryStage) throws IOException {
|
public void start(Stage primaryStage) throws IOException {
|
||||||
// File fileDir = new File("message_sl_unicode.properties");
|
|
||||||
//
|
|
||||||
// BufferedReader in = new BufferedReader(
|
|
||||||
// new InputStreamReader(
|
|
||||||
// new FileInputStream(fileDir), "UTF8"));
|
|
||||||
//
|
|
||||||
// String str;
|
|
||||||
//
|
|
||||||
// while ((str = in.readLine()) != null) {
|
|
||||||
// System.out.println(str);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// in.close();
|
|
||||||
|
|
||||||
Parent root = FXMLLoader.load(getClass().getResource("/GUI.fxml"));
|
Parent root = FXMLLoader.load(getClass().getResource("/GUI.fxml"));
|
||||||
// Parent root = FXMLLoader.load(ResourceLookup.resources.url("GUI.fxml"));
|
|
||||||
// primaryStage.setTitle("Luščilnik");
|
|
||||||
// StringBinding a = I18N.createStringBinding("window.title");
|
|
||||||
primaryStage.titleProperty().bind(I18N.createStringBinding("window.title"));
|
primaryStage.titleProperty().bind(I18N.createStringBinding("window.title"));
|
||||||
Scene scene = new Scene(root, 800, 600);
|
Scene scene = new Scene(root, 800, 600);
|
||||||
// https://github.com/dicolar/jbootx
|
|
||||||
// scene.getStylesheets().add(GUIController.class.getResource("bootstrap3.css").toExternalForm())
|
|
||||||
primaryStage.setScene(scene);
|
primaryStage.setScene(scene);
|
||||||
stage = primaryStage;
|
stage = primaryStage;
|
||||||
primaryStage.show();
|
primaryStage.show();
|
||||||
|
@ -130,13 +93,10 @@ public class GUIController extends Application {
|
||||||
ctController.setSatNew2Controller(satNew2Controller);
|
ctController.setSatNew2Controller(satNew2Controller);
|
||||||
ctController.setOneWordTabController(oneWordTabController);
|
ctController.setOneWordTabController(oneWordTabController);
|
||||||
ctController.setCatController(catController);
|
ctController.setCatController(catController);
|
||||||
//ctController.setWfController(wfController);
|
|
||||||
ctController.setWlController(wlController);
|
ctController.setWlController(wlController);
|
||||||
ctController.setTabPane(tabPane);
|
ctController.setTabPane(tabPane);
|
||||||
ctController.setFfsController(ffsController);
|
ctController.setFfsController(ffsController);
|
||||||
//ctController.setWordFormationTab(wordFormationTab);
|
|
||||||
ctController.setWordLevelTab(wordLevelTab);
|
ctController.setWordLevelTab(wordLevelTab);
|
||||||
//System.out.println(com.sun.javafx.runtime.VersionInfo.getRuntimeVersion());
|
|
||||||
|
|
||||||
ctController.setHostServices(getHostServices());
|
ctController.setHostServices(getHostServices());
|
||||||
|
|
||||||
|
@ -146,14 +106,11 @@ public class GUIController extends Application {
|
||||||
oneWordTabController.setHostServices(getHostServices());
|
oneWordTabController.setHostServices(getHostServices());
|
||||||
catController.setCorpus(corpus);
|
catController.setCorpus(corpus);
|
||||||
catController.setHostServices(getHostServices());
|
catController.setHostServices(getHostServices());
|
||||||
//wfController.setCorpus(corpus);
|
|
||||||
//wfController.setHostServices(getHostServices());
|
|
||||||
wlController.setCorpus(corpus);
|
wlController.setCorpus(corpus);
|
||||||
wlController.setHostServices(getHostServices());
|
wlController.setHostServices(getHostServices());
|
||||||
ffsController.setSatNew2Controller(satNew2Controller);
|
ffsController.setSatNew2Controller(satNew2Controller);
|
||||||
ffsController.setOneWordTabController(oneWordTabController);
|
ffsController.setOneWordTabController(oneWordTabController);
|
||||||
ffsController.setCatController(catController);
|
ffsController.setCatController(catController);
|
||||||
//ffsController.setWfController(wfController);
|
|
||||||
ffsController.setWlController(wlController);
|
ffsController.setWlController(wlController);
|
||||||
ffsController.setHostServices(getHostServices());
|
ffsController.setHostServices(getHostServices());
|
||||||
|
|
||||||
|
|
|
@ -1,17 +1,12 @@
|
||||||
package gui;
|
package gui;
|
||||||
|
|
||||||
import com.sun.javafx.collections.ObservableListWrapper;
|
|
||||||
import javafx.beans.binding.Bindings;
|
import javafx.beans.binding.Bindings;
|
||||||
import javafx.beans.binding.ObjectBinding;
|
import javafx.beans.binding.ObjectBinding;
|
||||||
import javafx.beans.binding.StringBinding;
|
import javafx.beans.binding.StringBinding;
|
||||||
import javafx.beans.property.ObjectProperty;
|
import javafx.beans.property.ObjectProperty;
|
||||||
import javafx.beans.property.SimpleObjectProperty;
|
import javafx.beans.property.SimpleObjectProperty;
|
||||||
import javafx.beans.value.ObservableValue;
|
|
||||||
import javafx.collections.FXCollections;
|
import javafx.collections.FXCollections;
|
||||||
import javafx.collections.ObservableList;
|
import javafx.collections.ObservableList;
|
||||||
import javafx.scene.control.Button;
|
|
||||||
import javafx.scene.control.Label;
|
|
||||||
import javafx.scene.control.Tooltip;
|
|
||||||
|
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.text.MessageFormat;
|
import java.text.MessageFormat;
|
||||||
|
@ -111,22 +106,6 @@ public final class I18N {
|
||||||
return Bindings.createStringBinding(() -> get(key, args), locale);
|
return Bindings.createStringBinding(() -> get(key, args), locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
// public static ObservableValue<ObservableList<String>> createListStringBinding(final String key, Object... args) {
|
|
||||||
// ObservableList<StringBinding> r = (ObservableList<StringBinding>) new ArrayList<StringBinding>();
|
|
||||||
// r.add(Bindings.createStringBinding(() -> get(key, args), locale));
|
|
||||||
// return r;
|
|
||||||
// }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* creates a Object Binding to a localized Object that is computed by calling the given func
|
|
||||||
*
|
|
||||||
* @param func
|
|
||||||
* function called on every change
|
|
||||||
* @return StringBinding
|
|
||||||
*/
|
|
||||||
public static StringBinding createStringBinding(Callable<String> func) {
|
|
||||||
return Bindings.createStringBinding(func, locale);
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* creates a String binding to a localized String for the given message bundle key
|
* creates a String binding to a localized String for the given message bundle key
|
||||||
*
|
*
|
||||||
|
@ -138,22 +117,6 @@ public final class I18N {
|
||||||
return Bindings.createObjectBinding(() -> getObject(keys, args), locale);
|
return Bindings.createObjectBinding(() -> getObject(keys, args), locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
// public static ObservableValue<ObservableList<String>> createListStringBinding(final String key, Object... args) {
|
|
||||||
// ObservableList<StringBinding> r = (ObservableList<StringBinding>) new ArrayList<StringBinding>();
|
|
||||||
// r.add(Bindings.createStringBinding(() -> get(key, args), locale));
|
|
||||||
// return r;
|
|
||||||
// }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* creates a String Binding to a localized String that is computed by calling the given func
|
|
||||||
*
|
|
||||||
* @param func
|
|
||||||
* function called on every change
|
|
||||||
* @return ObjectBinding
|
|
||||||
*/
|
|
||||||
public static ObjectBinding createObjectBinding(Callable<String> func) {
|
|
||||||
return Bindings.createObjectBinding(func, locale);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String getIndependent(final String key, Locale locale, final Object... args) {
|
public static String getIndependent(final String key, Locale locale, final Object... args) {
|
||||||
ResourceBundle bundle = ResourceBundle.getBundle("message", locale);
|
ResourceBundle bundle = ResourceBundle.getBundle("message", locale);
|
||||||
|
@ -164,7 +127,6 @@ public final class I18N {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
return val;
|
return val;
|
||||||
// return MessageFormat.format(bundle.getString(key), args);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getRootValue(String oldValue, ArrayList<String> nGramComputeForLetters) {
|
public static String getRootValue(String oldValue, ArrayList<String> nGramComputeForLetters) {
|
||||||
|
@ -230,35 +192,4 @@ public final class I18N {
|
||||||
|
|
||||||
return FXCollections.observableArrayList(translatedWords);
|
return FXCollections.observableArrayList(translatedWords);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* DUPLICATE OF toString()
|
|
||||||
* searches for possible values in translations and returns key of the string
|
|
||||||
* == .toString()
|
|
||||||
*
|
|
||||||
* @param w, prefix
|
|
||||||
* function called on every change
|
|
||||||
* @return ObjectBinding
|
|
||||||
*/
|
|
||||||
public static String findI18NString(String w, String prefix){
|
|
||||||
ResourceBundle bundle = ResourceBundle.getBundle("message", getLocale());
|
|
||||||
for (String key : bundle.keySet()){
|
|
||||||
if(prefix.length() > key.length() || !key.substring(0, prefix.length()).equals(prefix)){
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
String val = bundle.getString(key);
|
|
||||||
try {
|
|
||||||
String newVal = new String(val.getBytes("ISO-8859-1"), "UTF-8");
|
|
||||||
|
|
||||||
if (newVal.equals(w)){
|
|
||||||
return key;
|
|
||||||
}
|
|
||||||
} catch (UnsupportedEncodingException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,35 +10,17 @@ import javafx.scene.control.Label;
|
||||||
public class Messages {
|
public class Messages {
|
||||||
|
|
||||||
// warnings & errors
|
// warnings & errors
|
||||||
public static String WARNING_CORPUS_NOT_FOUND = I18N.get("message.WARNING_CORPUS_NOT_FOUND");
|
|
||||||
public static String WARNING_RESULTS_DIR_NOT_VALID = I18N.get("message.WARNING_RESULTS_DIR_NOT_VALID");
|
|
||||||
public static String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS = I18N.get("message.WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS");
|
|
||||||
public static String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS_INFO = I18N.get("message.WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS_INFO");
|
|
||||||
public static String WARNING_WORD_OR_LEMMA = I18N.get("message.WARNING_WORD_OR_LEMMA");
|
|
||||||
public static String WARNING_ONLY_NUMBERS_ALLOWED = I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED");
|
public static String WARNING_ONLY_NUMBERS_ALLOWED = I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED");
|
||||||
public static String WARNING_NUMBER_TOO_BIG = I18N.get("message.WARNING_NUMBER_TOO_BIG");
|
|
||||||
public static String WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES = I18N.get("message.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES");
|
public static String WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES = I18N.get("message.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES");
|
||||||
public static String WARNING_MISSING_STRING_LENGTH = I18N.get("message.WARNING_MISSING_STRING_LENGTH");
|
public static String WARNING_MISSING_STRING_LENGTH = I18N.get("message.WARNING_MISSING_STRING_LENGTH");
|
||||||
public static String WARNING_NO_TAXONOMY_FOUND = I18N.get("message.WARNING_NO_TAXONOMY_FOUND");
|
|
||||||
public static String WARNING_NO_SOLAR_FILTERS_FOUND = I18N.get("message.WARNING_NO_SOLAR_FILTERS_FOUND");
|
|
||||||
public static String ERROR_WHILE_EXECUTING = I18N.get("message.ERROR_WHILE_EXECUTING");
|
public static String ERROR_WHILE_EXECUTING = I18N.get("message.ERROR_WHILE_EXECUTING");
|
||||||
public static String ERROR_WHILE_SAVING_RESULTS_TO_CSV = I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV");
|
public static String ERROR_WHILE_SAVING_RESULTS_TO_CSV = I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV");
|
||||||
public static String ERROR_NOT_ENOUGH_MEMORY= I18N.get("message.ERROR_NOT_ENOUGH_MEMORY");
|
public static String ERROR_NOT_ENOUGH_MEMORY= I18N.get("message.ERROR_NOT_ENOUGH_MEMORY");
|
||||||
|
|
||||||
// missing
|
|
||||||
public static String MISSING_NGRAM_LEVEL = I18N.get("message.MISSING_NGRAM_LEVEL");
|
|
||||||
public static String MISSING_CALCULATE_FOR = I18N.get("message.MISSING_CALCULATE_FOR");
|
|
||||||
public static String MISSING_SKIP = I18N.get("message.MISSING_SKIP");
|
|
||||||
public static String MISSING_STRING_LENGTH = I18N.get("message.MISSING_STRING_LENGTH");
|
|
||||||
public static String MISMATCHED_STRING_LENGTH_AND_MSD_REGEX = I18N.get("message.MISMATCHED_STRING_LENGTH_AND_MSD_REGEX");
|
|
||||||
|
|
||||||
|
|
||||||
// general notifications - static content/set only once
|
// general notifications - static content/set only once
|
||||||
public static String NOTIFICATION_FOUND_X_FILES = I18N.get("message.NOTIFICATION_FOUND_X_FILES");
|
|
||||||
public static String NOTIFICATION_ANALYSIS_COMPLETED = I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED");
|
public static String NOTIFICATION_ANALYSIS_COMPLETED = I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED");
|
||||||
public static String NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS = I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS");
|
public static String NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS = I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS");
|
||||||
public static String RESULTS_PATH_SET_TO_DEFAULT = I18N.get("message.RESULTS_PATH_SET_TO_DEFAULT");
|
|
||||||
public static String NOTIFICATION_ANALYSIS_CANCLED = I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED");
|
|
||||||
|
|
||||||
// ongoing notifications - displayed while processing, dynamically changing
|
// ongoing notifications - displayed while processing, dynamically changing
|
||||||
public static String ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y = I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y");
|
public static String ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y = I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y");
|
||||||
|
@ -47,14 +29,7 @@ public class Messages {
|
||||||
// Labels
|
// Labels
|
||||||
public static String LABEL_CORPUS_LOCATION_NOT_SET = I18N.get("message.LABEL_CORPUS_LOCATION_NOT_SET");
|
public static String LABEL_CORPUS_LOCATION_NOT_SET = I18N.get("message.LABEL_CORPUS_LOCATION_NOT_SET");
|
||||||
public static String LABEL_RESULTS_LOCATION_NOT_SET = I18N.get("message.LABEL_RESULTS_LOCATION_NOT_SET");
|
public static String LABEL_RESULTS_LOCATION_NOT_SET = I18N.get("message.LABEL_RESULTS_LOCATION_NOT_SET");
|
||||||
public static String LABEL_RESULTS_CORPUS_TYPE_NOT_SET = I18N.get("message.LABEL_RESULTS_CORPUS_TYPE_NOT_SET");
|
|
||||||
|
|
||||||
public static String LABEL_SCANNING_CORPUS = I18N.get("message.LABEL_SCANNING_CORPUS");
|
|
||||||
public static String LABEL_SCANNING_SINGLE_FILE_CORPUS = I18N.get("message.LABEL_SCANNING_SINGLE_FILE_CORPUS");
|
|
||||||
public static String COMPLETED = I18N.get("message.COMPLETED");
|
|
||||||
|
|
||||||
// public static String TOOLTIP_chooseCorpusLocationB = I18N.get("message.TOOLTIP_chooseCorpusLocationB");
|
|
||||||
// public static String TOOLTIP_readHeaderInfoChB = I18N.get("message.TOOLTIP_readHeaderInfoChB");
|
|
||||||
public static String TOOLTIP_readNotePunctuationsChB = I18N.get("message.TOOLTIP_readNotePunctuationsChB");
|
public static String TOOLTIP_readNotePunctuationsChB = I18N.get("message.TOOLTIP_readNotePunctuationsChB");
|
||||||
public static String TOOLTIP_readDisplayTaxonomyChB = I18N.get("message.TOOLTIP_readDisplayTaxonomyChB");
|
public static String TOOLTIP_readDisplayTaxonomyChB = I18N.get("message.TOOLTIP_readDisplayTaxonomyChB");
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,8 @@
|
||||||
package gui;
|
package gui;
|
||||||
|
|
||||||
import alg.XML_processing;
|
|
||||||
import data.*;
|
import data.*;
|
||||||
import javafx.application.HostServices;
|
import javafx.application.HostServices;
|
||||||
import javafx.beans.InvalidationListener;
|
|
||||||
import javafx.beans.Observable;
|
|
||||||
import javafx.beans.binding.StringBinding;
|
import javafx.beans.binding.StringBinding;
|
||||||
import javafx.beans.property.ReadOnlyDoubleWrapper;
|
|
||||||
import javafx.beans.value.ChangeListener;
|
import javafx.beans.value.ChangeListener;
|
||||||
import javafx.beans.value.ObservableValue;
|
import javafx.beans.value.ObservableValue;
|
||||||
import javafx.collections.ListChangeListener;
|
import javafx.collections.ListChangeListener;
|
||||||
|
@ -24,12 +20,10 @@ import javafx.scene.image.ImageView;
|
||||||
import util.Tasks;
|
import util.Tasks;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import static alg.XML_processing.readXML;
|
|
||||||
import static gui.GUIController.showAlert;
|
import static gui.GUIController.showAlert;
|
||||||
|
|
||||||
@SuppressWarnings("Duplicates")
|
@SuppressWarnings("Duplicates")
|
||||||
|
@ -38,7 +32,6 @@ public class OneWordAnalysisTab {
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private AnchorPane oneWordAnalysisTabPane;
|
private AnchorPane oneWordAnalysisTabPane;
|
||||||
// private ArrayList<String> alsoVisualize;
|
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
public TextArea selectedFiltersTextArea;
|
public TextArea selectedFiltersTextArea;
|
||||||
|
@ -197,43 +190,22 @@ public class OneWordAnalysisTab {
|
||||||
private ChangeListener<Boolean> minimalTaxonomyListener;
|
private ChangeListener<Boolean> minimalTaxonomyListener;
|
||||||
private ChangeListener<Boolean> minimalRelFreListener;
|
private ChangeListener<Boolean> minimalRelFreListener;
|
||||||
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
|
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
|
||||||
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
|
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {};
|
private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY));
|
||||||
|
|
||||||
|
@ -242,9 +214,6 @@ public class OneWordAnalysisTab {
|
||||||
|
|
||||||
// TODO: pass observables for taxonomy based on header scan
|
// TODO: pass observables for taxonomy based on header scan
|
||||||
// after header scan
|
// after header scan
|
||||||
private ObservableList<String> taxonomyCCBValues;
|
|
||||||
private CorpusType currentCorpusType;
|
|
||||||
|
|
||||||
public void init() {
|
public void init() {
|
||||||
// add CSS style
|
// add CSS style
|
||||||
oneWordAnalysisTabPane.getStylesheets().add("style.css");
|
oneWordAnalysisTabPane.getStylesheets().add("style.css");
|
||||||
|
@ -339,9 +308,6 @@ public class OneWordAnalysisTab {
|
||||||
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// alsoVisualizeCCB.getCheckModel().clearChecks();
|
|
||||||
// alsoVisualizeCCB.getItems().removeAll();
|
|
||||||
// alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
|
|
||||||
|
|
||||||
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
|
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
|
||||||
|
|
||||||
|
@ -462,18 +428,14 @@ public class OneWordAnalysisTab {
|
||||||
public void onChanged(Change<? extends String> c) {
|
public void onChanged(Change<? extends String> c) {
|
||||||
if (changing) {
|
if (changing) {
|
||||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||||
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
|
|
||||||
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
|
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
|
||||||
|
|
||||||
// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
|
|
||||||
|
|
||||||
taxonomy = new ArrayList<>();
|
taxonomy = new ArrayList<>();
|
||||||
taxonomy.addAll(checkedItemsTaxonomy);
|
taxonomy.addAll(checkedItemsTaxonomy);
|
||||||
|
|
||||||
taxonomyCCB.getItems().removeAll();
|
taxonomyCCB.getItems().removeAll();
|
||||||
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
||||||
|
|
||||||
// taxonomyCCB.getCheckModel().clearChecks();
|
|
||||||
changing = false;
|
changing = false;
|
||||||
taxonomyCCB.getCheckModel().clearChecks();
|
taxonomyCCB.getCheckModel().clearChecks();
|
||||||
for (Taxonomy t : checkedItemsTaxonomy) {
|
for (Taxonomy t : checkedItemsTaxonomy) {
|
||||||
|
@ -527,7 +489,6 @@ public class OneWordAnalysisTab {
|
||||||
writeMsdAtTheEnd = newValue;
|
writeMsdAtTheEnd = newValue;
|
||||||
logger.info("write msd at the end: ", writeMsdAtTheEnd);
|
logger.info("write msd at the end: ", writeMsdAtTheEnd);
|
||||||
});
|
});
|
||||||
// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
|
|
||||||
|
|
||||||
notePunctuations = false;
|
notePunctuations = false;
|
||||||
// set
|
// set
|
||||||
|
@ -653,84 +614,6 @@ public class OneWordAnalysisTab {
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* case a: values for combo boxes can change after a corpus change
|
|
||||||
* <ul>
|
|
||||||
* <li>different corpus type - reset all fields so no old values remain</li>
|
|
||||||
* <li>same corpus type, different subset - keep</li>
|
|
||||||
* </ul>
|
|
||||||
* <p>
|
|
||||||
* case b: values for combo boxes can change after a header scan
|
|
||||||
* <ul>
|
|
||||||
* <li>at first, fields are populated by corpus type defaults</li>
|
|
||||||
* <li>after, with gathered data</li>
|
|
||||||
* </ul>
|
|
||||||
* <p></p>
|
|
||||||
* ngrams: 1
|
|
||||||
* calculateFor: word
|
|
||||||
* msd:
|
|
||||||
* taxonomy:
|
|
||||||
* skip: 0
|
|
||||||
* iscvv: false
|
|
||||||
* string length: 1
|
|
||||||
*/
|
|
||||||
// public void populateFields() {
|
|
||||||
// // corpus changed if: current one is null (this is first run of the app)
|
|
||||||
// // or if currentCorpus != gui's corpus
|
|
||||||
// boolean corpusChanged = currentCorpusType == null
|
|
||||||
// || currentCorpusType != corpus.getCorpusType();
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// // TODO: check for GOS, GIGAFIDA, SOLAR...
|
|
||||||
// // refresh and:
|
|
||||||
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
|
||||||
// if (calculateFor == null) {
|
|
||||||
// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
|
|
||||||
// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if (!filter.hasMsd()) {
|
|
||||||
// // if current corpus doesn't have msd data, disable this field
|
|
||||||
// msd = new ArrayList<>();
|
|
||||||
// msdTF.setText("");
|
|
||||||
// msdTF.setDisable(true);
|
|
||||||
// logger.info("no msd data");
|
|
||||||
// } else {
|
|
||||||
// if (ValidationUtil.isEmpty(msd)
|
|
||||||
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
|
||||||
// // msd has not been set previously
|
|
||||||
// // or msd has been set but the corpus changed -> reset
|
|
||||||
// msd = new ArrayList<>();
|
|
||||||
// msdTF.setText("");
|
|
||||||
// msdTF.setDisable(false);
|
|
||||||
// logger.info("msd reset");
|
|
||||||
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
|
||||||
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
|
|
||||||
// msdTF.setText(StringUtils.join(msdStrings, " "));
|
|
||||||
// msdTF.setDisable(false);
|
|
||||||
// logger.info("msd kept");
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // TODO: trigger on rescan
|
|
||||||
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
|
||||||
// // user changed corpus (by type) or by selection & triggered a rescan of headers
|
|
||||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
|
||||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
|
||||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
|
||||||
//
|
|
||||||
// currentCorpusType = corpus.getCorpusType();
|
|
||||||
// // setTaxonomyIsDirty(false);
|
|
||||||
// } else {
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
|
||||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
|
||||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
|
||||||
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
|
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
|
||||||
Tooltip tooltip = new Tooltip();
|
Tooltip tooltip = new Tooltip();
|
||||||
tooltip.textProperty().bind(stringBinding);
|
tooltip.textProperty().bind(stringBinding);
|
||||||
|
@ -819,7 +702,6 @@ public class OneWordAnalysisTab {
|
||||||
filter.setSolarFilters(solarFiltersMap);
|
filter.setSolarFilters(solarFiltersMap);
|
||||||
filter.setStringLength(1);
|
filter.setStringLength(1);
|
||||||
filter.setMultipleKeys(alsoVisualize);
|
filter.setMultipleKeys(alsoVisualize);
|
||||||
// filter.setNotePunctuations(true);
|
|
||||||
filter.setNotePunctuations(notePunctuations);
|
filter.setNotePunctuations(notePunctuations);
|
||||||
|
|
||||||
// setMsd must be behind alsoVisualize
|
// setMsd must be behind alsoVisualize
|
||||||
|
@ -878,136 +760,14 @@ public class OneWordAnalysisTab {
|
||||||
private void execute(StatisticsNew statistic) {
|
private void execute(StatisticsNew statistic) {
|
||||||
logger.info("Started execution: ", statistic.getFilter());
|
logger.info("Started execution: ", statistic.getFilter());
|
||||||
|
|
||||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
|
||||||
|
|
||||||
// final Task<Void> task = new Task<Void>() {
|
|
||||||
// @SuppressWarnings("Duplicates")
|
|
||||||
// @Override
|
|
||||||
// protected Void call() throws Exception {
|
|
||||||
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
|
|
||||||
// if(multipleFiles){
|
|
||||||
// cancel.setVisible(true);
|
|
||||||
// }
|
|
||||||
// int i = 0;
|
|
||||||
// Date startTime = new Date();
|
|
||||||
// Date previousTime = new Date();
|
|
||||||
// int remainingSeconds = -1;
|
|
||||||
// for (File f : corpusFiles) {
|
|
||||||
// final int iFinal = i;
|
|
||||||
// XML_processing xml_processing = new XML_processing();
|
|
||||||
// xml_processing.isCancelled = false;
|
|
||||||
// i++;
|
|
||||||
// if(xml_processing.progressBarListener != null) {
|
|
||||||
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
|
|
||||||
// }
|
|
||||||
// if (multipleFiles) {
|
|
||||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
|
||||||
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
|
|
||||||
// previousTime = new Date();
|
|
||||||
// }
|
|
||||||
// this.updateProgress(i, corpusFiles.size());
|
|
||||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
|
|
||||||
//// if (isCancelled()) {
|
|
||||||
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
//// break;
|
|
||||||
//// }
|
|
||||||
// } else {
|
|
||||||
//
|
|
||||||
// xml_processing.progressBarListener = new InvalidationListener() {
|
|
||||||
// int remainingSeconds = -1;
|
|
||||||
// Date previousTime = new Date();
|
|
||||||
// @Override
|
|
||||||
// public void invalidated(Observable observable) {
|
|
||||||
// cancel.setVisible(true);
|
|
||||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
|
||||||
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
|
||||||
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
|
||||||
// ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
|
||||||
// previousTime = new Date();
|
|
||||||
// }
|
|
||||||
// xml_processing.isCancelled = isCancelled();
|
|
||||||
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
|
|
||||||
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
|
||||||
// }
|
|
||||||
// xml_processing.readXML(f.toString(), statistic);
|
|
||||||
// if (isCancelled()) {
|
|
||||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
|
||||||
// progressLabel.textProperty().bind(task.messageProperty());
|
|
||||||
//
|
|
||||||
// task.setOnSucceeded(e -> {
|
|
||||||
// try {
|
|
||||||
// boolean successullySaved = statistic.saveResultToDisk();
|
|
||||||
// if (successullySaved) {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
|
||||||
// } else {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
|
|
||||||
// }
|
|
||||||
// } catch (UnsupportedEncodingException e1) {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
|
|
||||||
// logger.error("Error while saving", e1);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// task.setOnFailed(e -> {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
|
||||||
// logger.error("Error while executing", e);
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
// ngramProgressBar.setProgress(0.0);
|
|
||||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// task.setOnCancelled(e -> {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
// ngramProgressBar.setProgress(0.0);
|
|
||||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// // When cancel button is pressed cancel analysis
|
|
||||||
// cancel.setOnAction(e -> {
|
|
||||||
// task.cancel();
|
|
||||||
// logger.info("cancel button");
|
|
||||||
// });
|
|
||||||
|
|
||||||
// final Thread thread = new Thread(task, "task");
|
|
||||||
// thread.setDaemon(true);
|
|
||||||
// thread.start();
|
|
||||||
|
|
||||||
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
|
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
|
||||||
if (statistic.getFilter().getMinimalRelFre() > 1){
|
if (statistic.getFilter().getMinimalRelFre() > 1){
|
||||||
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
|
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
|
||||||
// final Task<Void> mainTask = prepareTaskForMinRelFre(statistic);
|
|
||||||
final Thread thread = new Thread(mainTask, "task");
|
final Thread thread = new Thread(mainTask, "task");
|
||||||
thread.setDaemon(true);
|
thread.setDaemon(true);
|
||||||
thread.start();
|
thread.start();
|
||||||
} else {
|
} else {
|
||||||
final Task<Void> mainTask = t.prepareMainTask(statistic);
|
final Task<Void> mainTask = t.prepareMainTask(statistic);
|
||||||
// final Task<Void> mainTask = prepareMainTask(statistic);
|
|
||||||
final Thread thread = new Thread(mainTask, "task");
|
final Thread thread = new Thread(mainTask, "task");
|
||||||
thread.setDaemon(true);
|
thread.setDaemon(true);
|
||||||
thread.start();
|
thread.start();
|
||||||
|
|
|
@ -1,18 +0,0 @@
|
||||||
package gui;
|
|
||||||
|
|
||||||
import javafx.scene.control.Label;
|
|
||||||
|
|
||||||
public class SelectedFiltersPane {
|
|
||||||
|
|
||||||
|
|
||||||
public Label selectedFiltersLabel;
|
|
||||||
|
|
||||||
public Label getSelectedFiltersLabel() {
|
|
||||||
return selectedFiltersLabel;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSelectedFiltersLabel(String filters) {
|
|
||||||
this.selectedFiltersLabel = new Label(filters);
|
|
||||||
this.selectedFiltersLabel.setText("test?");
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,21 +1,12 @@
|
||||||
package gui;
|
package gui;
|
||||||
|
|
||||||
import static alg.XML_processing.*;
|
|
||||||
import static gui.GUIController.*;
|
import static gui.GUIController.*;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import alg.XML_processing;
|
|
||||||
import javafx.application.HostServices;
|
import javafx.application.HostServices;
|
||||||
import javafx.beans.InvalidationListener;
|
|
||||||
import javafx.beans.Observable;
|
|
||||||
import javafx.beans.binding.StringBinding;
|
import javafx.beans.binding.StringBinding;
|
||||||
import javafx.beans.property.ReadOnlyDoubleWrapper;
|
|
||||||
import javafx.beans.value.ChangeListener;
|
import javafx.beans.value.ChangeListener;
|
||||||
import javafx.beans.value.ObservableValue;
|
import javafx.beans.value.ObservableValue;
|
||||||
import javafx.scene.image.ImageView;
|
import javafx.scene.image.ImageView;
|
||||||
|
@ -147,15 +138,6 @@ public class StringAnalysisTabNew2 {
|
||||||
@FXML
|
@FXML
|
||||||
private CheckComboBox<String> taxonomyCCB;
|
private CheckComboBox<String> taxonomyCCB;
|
||||||
private ArrayList<Taxonomy> taxonomy;
|
private ArrayList<Taxonomy> taxonomy;
|
||||||
//
|
|
||||||
// @FXML
|
|
||||||
// private CheckBox calculatecvvCB;
|
|
||||||
// private boolean calculateCvv;
|
|
||||||
|
|
||||||
// @FXML
|
|
||||||
// private TextField stringLengthTF;
|
|
||||||
// private Integer stringLength;
|
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private ComboBox<String> calculateForCB;
|
private ComboBox<String> calculateForCB;
|
||||||
private CalculateFor calculateFor;
|
private CalculateFor calculateFor;
|
||||||
|
@ -225,8 +207,6 @@ public class StringAnalysisTabNew2 {
|
||||||
|
|
||||||
private Corpus corpus;
|
private Corpus corpus;
|
||||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||||
private Filter filter;
|
|
||||||
private boolean useDb;
|
|
||||||
private HostServices hostService;
|
private HostServices hostService;
|
||||||
private ListChangeListener<String> taxonomyListener;
|
private ListChangeListener<String> taxonomyListener;
|
||||||
private ListChangeListener<String> alsoVisualizeListener;
|
private ListChangeListener<String> alsoVisualizeListener;
|
||||||
|
@ -236,44 +216,25 @@ public class StringAnalysisTabNew2 {
|
||||||
private ChangeListener<Boolean> minimalOccurrencesListener;
|
private ChangeListener<Boolean> minimalOccurrencesListener;
|
||||||
private ChangeListener<Boolean> minimalTaxonomyListener;
|
private ChangeListener<Boolean> minimalTaxonomyListener;
|
||||||
private ChangeListener<Boolean> minimalRelFreListener;
|
private ChangeListener<Boolean> minimalRelFreListener;
|
||||||
|
private boolean useDb;
|
||||||
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
|
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
|
||||||
private static final ObservableList<String> COLLOCABILITY_ITEMS = FXCollections.observableArrayList("Dice", "t-score", "MI", "MI3", "logDice", "simple LL");
|
private static final ObservableList<String> COLLOCABILITY_ITEMS = FXCollections.observableArrayList("Dice", "t-score", "MI", "MI3", "logDice", "simple LL");
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
|
||||||
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
|
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {};
|
private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY));
|
||||||
|
|
||||||
|
@ -282,9 +243,6 @@ public class StringAnalysisTabNew2 {
|
||||||
|
|
||||||
// TODO: pass observables for taxonomy based on header scan
|
// TODO: pass observables for taxonomy based on header scan
|
||||||
// after header scan
|
// after header scan
|
||||||
private ObservableList<String> taxonomyCCBValues;
|
|
||||||
private CorpusType currentCorpusType;
|
|
||||||
|
|
||||||
public void init() {
|
public void init() {
|
||||||
// add CSS style
|
// add CSS style
|
||||||
stringAnalysisTabPaneNew2.getStylesheets().add("style.css");
|
stringAnalysisTabPaneNew2.getStylesheets().add("style.css");
|
||||||
|
@ -420,13 +378,6 @@ public class StringAnalysisTabNew2 {
|
||||||
} else {
|
} else {
|
||||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_EMPTY));
|
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_EMPTY));
|
||||||
}
|
}
|
||||||
// alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
|
||||||
// alsoVisualize = new ArrayList<>();
|
|
||||||
// ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
|
|
||||||
// alsoVisualize.addAll(checkedItems);
|
|
||||||
// logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
|
||||||
// });
|
|
||||||
// alsoVisualizeCCB.getCheckModel().clearChecks();
|
|
||||||
|
|
||||||
alsoVisualizeListener = new ListChangeListener<String>() {
|
alsoVisualizeListener = new ListChangeListener<String>() {
|
||||||
@Override
|
@Override
|
||||||
|
@ -437,9 +388,6 @@ public class StringAnalysisTabNew2 {
|
||||||
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// alsoVisualizeCCB.getCheckModel().clearChecks();
|
|
||||||
// alsoVisualizeCCB.getItems().removeAll();
|
|
||||||
// alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS));
|
|
||||||
|
|
||||||
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
|
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
|
||||||
|
|
||||||
|
@ -500,16 +448,6 @@ public class StringAnalysisTabNew2 {
|
||||||
collocabilityCCB.setDisable(false);
|
collocabilityCCB.setDisable(false);
|
||||||
|
|
||||||
collocabilityCCB.getCheckModel().getCheckedItems().addListener(collocabilityListener);
|
collocabilityCCB.getCheckModel().getCheckedItems().addListener(collocabilityListener);
|
||||||
// collocabilityCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
|
||||||
// collocability = new ArrayList<>();
|
|
||||||
// ObservableList<Collocability> checkedItems = FXCollections.observableArrayList();
|
|
||||||
// for (String el : collocabilityCCB.getCheckModel().getCheckedItems()) {
|
|
||||||
// checkedItems.add(Collocability.factory(el));
|
|
||||||
// }
|
|
||||||
// collocability.addAll(checkedItems);
|
|
||||||
// logger.info(String.format("Selected collocabilities: %s", StringUtils.join(collocabilityCCB.getCheckModel().getCheckedItems(), ",")));
|
|
||||||
// });
|
|
||||||
|
|
||||||
|
|
||||||
// msd
|
// msd
|
||||||
if (msdListener != null){
|
if (msdListener != null){
|
||||||
|
@ -595,9 +533,6 @@ public class StringAnalysisTabNew2 {
|
||||||
public void onChanged(ListChangeListener.Change<? extends String> c){
|
public void onChanged(ListChangeListener.Change<? extends String> c){
|
||||||
if(changing) {
|
if(changing) {
|
||||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||||
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
|
|
||||||
//
|
|
||||||
// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
|
|
||||||
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
|
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
|
||||||
|
|
||||||
taxonomy = new ArrayList<>();
|
taxonomy = new ArrayList<>();
|
||||||
|
@ -606,7 +541,6 @@ public class StringAnalysisTabNew2 {
|
||||||
taxonomyCCB.getItems().removeAll();
|
taxonomyCCB.getItems().removeAll();
|
||||||
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
||||||
|
|
||||||
// taxonomyCCB.getCheckModel().clearChecks();
|
|
||||||
changing = false;
|
changing = false;
|
||||||
taxonomyCCB.getCheckModel().clearChecks();
|
taxonomyCCB.getCheckModel().clearChecks();
|
||||||
for (Taxonomy t : checkedItemsTaxonomy) {
|
for (Taxonomy t : checkedItemsTaxonomy) {
|
||||||
|
@ -646,32 +580,6 @@ public class StringAnalysisTabNew2 {
|
||||||
skipValue = 0;
|
skipValue = 0;
|
||||||
|
|
||||||
// cvv
|
// cvv
|
||||||
// calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
|
||||||
// calculateCvv = newValue;
|
|
||||||
// logger.info("calculate cvv: " + calculateCvv);
|
|
||||||
// });
|
|
||||||
|
|
||||||
// calculatecvvCB.setSelected(false);
|
|
||||||
|
|
||||||
// string length
|
|
||||||
// stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
|
||||||
// if (!newValue) {
|
|
||||||
// // focus lost
|
|
||||||
// String value = stringLengthTF.getText();
|
|
||||||
// if (!ValidationUtil.isEmpty(value)) {
|
|
||||||
// if (!ValidationUtil.isNumber(value)) {
|
|
||||||
// logAlert("stringlengthTf: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
|
||||||
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
|
||||||
// }
|
|
||||||
// stringLength = Integer.parseInt(value);
|
|
||||||
// } else {
|
|
||||||
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_MISSING_STRING_LENGTH"));
|
|
||||||
// stringLengthTF.setText("1");
|
|
||||||
// logAlert(I18N.get("message.WARNING_MISSING_STRING_LENGTH"));
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// });
|
|
||||||
|
|
||||||
minimalOccurrencesTF.setText("1");
|
minimalOccurrencesTF.setText("1");
|
||||||
minimalOccurrences = 1;
|
minimalOccurrences = 1;
|
||||||
|
|
||||||
|
@ -781,108 +689,6 @@ public class StringAnalysisTabNew2 {
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* case a: values for combo boxes can change after a corpus change
|
|
||||||
* <ul>
|
|
||||||
* <li>different corpus type - reset all fields so no old values remain</li>
|
|
||||||
* <li>same corpus type, different subset - keep</li>
|
|
||||||
* </ul>
|
|
||||||
* <p>
|
|
||||||
* case b: values for combo boxes can change after a header scan
|
|
||||||
* <ul>
|
|
||||||
* <li>at first, fields are populated by corpus type defaults</li>
|
|
||||||
* <li>after, with gathered data</li>
|
|
||||||
* </ul>
|
|
||||||
* <p></p>
|
|
||||||
* ngrams: 1
|
|
||||||
* calculateFor: word
|
|
||||||
* msd:
|
|
||||||
* taxonomy:
|
|
||||||
* skip: 0
|
|
||||||
* iscvv: false
|
|
||||||
* string length: 1
|
|
||||||
*/
|
|
||||||
// public void populateFields() {
|
|
||||||
// // corpus changed if: current one is null (this is first run of the app)
|
|
||||||
// // or if currentCorpus != gui's corpus
|
|
||||||
// boolean corpusChanged = currentCorpusType == null
|
|
||||||
// || currentCorpusType != corpus.getCorpusType();
|
|
||||||
//
|
|
||||||
// // keep ngram value if set
|
|
||||||
// if (ngramValue == null) {
|
|
||||||
// ngramValueCB.getSelectionModel().select("1");
|
|
||||||
// ngramValue = 1;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // TODO: check for GOS, GIGAFIDA, SOLAR...
|
|
||||||
// // refresh and:
|
|
||||||
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
|
||||||
// if (calculateFor == null) {
|
|
||||||
// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
|
|
||||||
// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if (!filter.hasMsd()) {
|
|
||||||
// // if current corpus doesn't have msd data, disable this field
|
|
||||||
// msd = new ArrayList<>();
|
|
||||||
// msdTF.setText("");
|
|
||||||
// msdTF.setDisable(true);
|
|
||||||
// logger.info("no msd data");
|
|
||||||
// } else {
|
|
||||||
// if (ValidationUtil.isEmpty(msd)
|
|
||||||
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
|
||||||
// // msd has not been set previously
|
|
||||||
// // or msd has been set but the corpus changed -> reset
|
|
||||||
// msd = new ArrayList<>();
|
|
||||||
// msdTF.setText("");
|
|
||||||
// msdTF.setDisable(false);
|
|
||||||
// logger.info("msd reset");
|
|
||||||
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
|
||||||
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
|
|
||||||
// msdTF.setText(StringUtils.join(msdStrings, " "));
|
|
||||||
// msdTF.setDisable(false);
|
|
||||||
// logger.info("msd kept");
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
|
|
||||||
//
|
|
||||||
// // keep skip value
|
|
||||||
// if (skipValue == null) {
|
|
||||||
// skipValueCB.getSelectionModel().select("0");
|
|
||||||
// skipValue = 0;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // keep calculateCvv
|
|
||||||
// calculatecvvCB.setSelected(calculateCvv);
|
|
||||||
//
|
|
||||||
// // keep string length if set
|
|
||||||
// if (stringLength != null) {
|
|
||||||
// stringLengthTF.setText(String.valueOf(stringLength));
|
|
||||||
// } else {
|
|
||||||
// stringLengthTF.setText("1");
|
|
||||||
// stringLength = 1;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // TODO: trigger on rescan
|
|
||||||
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
|
||||||
// // user changed corpus (by type) or by selection & triggered a rescan of headers
|
|
||||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
|
||||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
|
||||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
|
||||||
//
|
|
||||||
// currentCorpusType = corpus.getCorpusType();
|
|
||||||
// // setTaxonomyIsDirty(false);
|
|
||||||
// } else {
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
|
||||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
|
||||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
|
||||||
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
|
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
|
||||||
Tooltip tooltip = new Tooltip();
|
Tooltip tooltip = new Tooltip();
|
||||||
tooltip.textProperty().bind(stringBinding);
|
tooltip.textProperty().bind(stringBinding);
|
||||||
|
@ -898,7 +704,6 @@ public class StringAnalysisTabNew2 {
|
||||||
calculateForL.textProperty().bind(I18N.createStringBinding("label.calculateFor"));
|
calculateForL.textProperty().bind(I18N.createStringBinding("label.calculateFor"));
|
||||||
alsoVisualizeL.textProperty().bind(I18N.createStringBinding("label.alsoVisualize"));
|
alsoVisualizeL.textProperty().bind(I18N.createStringBinding("label.alsoVisualize"));
|
||||||
displayTaxonomyL.textProperty().bind(I18N.createStringBinding("label.displayTaxonomy"));
|
displayTaxonomyL.textProperty().bind(I18N.createStringBinding("label.displayTaxonomy"));
|
||||||
// writeMsdAtTheEndL.textProperty().bind(I18N.createStringBinding("label.writeMsdAtTheEnd"));
|
|
||||||
skipValueL.textProperty().bind(I18N.createStringBinding("label.skipValue"));
|
skipValueL.textProperty().bind(I18N.createStringBinding("label.skipValue"));
|
||||||
slowSpeedWarning1L.textProperty().bind(I18N.createStringBinding("label.slowSpeedWarning"));
|
slowSpeedWarning1L.textProperty().bind(I18N.createStringBinding("label.slowSpeedWarning"));
|
||||||
slowSpeedWarning2L.textProperty().bind(I18N.createStringBinding("label.slowSpeedWarning"));
|
slowSpeedWarning2L.textProperty().bind(I18N.createStringBinding("label.slowSpeedWarning"));
|
||||||
|
@ -948,10 +753,6 @@ public class StringAnalysisTabNew2 {
|
||||||
|
|
||||||
if (mode == MODE.WORD) {
|
if (mode == MODE.WORD) {
|
||||||
paneWords.setVisible(true);
|
paneWords.setVisible(true);
|
||||||
// paneLetters.setVisible(false);
|
|
||||||
// if (corpus.getCorpusType() == CorpusType.GOS)
|
|
||||||
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
|
|
||||||
// else
|
|
||||||
if (corpus.getCorpusType() == CorpusType.GOS) {
|
if (corpus.getCorpusType() == CorpusType.GOS) {
|
||||||
calculateForCB.itemsProperty().unbind();
|
calculateForCB.itemsProperty().unbind();
|
||||||
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_GOS));
|
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_GOS));
|
||||||
|
@ -980,7 +781,6 @@ public class StringAnalysisTabNew2 {
|
||||||
filter.setDisplayTaxonomy(displayTaxonomy);
|
filter.setDisplayTaxonomy(displayTaxonomy);
|
||||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||||
filter.setSkipValue(skipValue);
|
filter.setSkipValue(skipValue);
|
||||||
// filter.setIsCvv(calculateCvv);
|
|
||||||
filter.setSolarFilters(solarFiltersMap);
|
filter.setSolarFilters(solarFiltersMap);
|
||||||
filter.setNotePunctuations(notePunctuations);
|
filter.setNotePunctuations(notePunctuations);
|
||||||
filter.setMultipleKeys(alsoVisualize);
|
filter.setMultipleKeys(alsoVisualize);
|
||||||
|
@ -993,16 +793,11 @@ public class StringAnalysisTabNew2 {
|
||||||
filter.setCollocability(collocability);
|
filter.setCollocability(collocability);
|
||||||
filter.setTaxonomySetOperation(taxonomySetOperation);
|
filter.setTaxonomySetOperation(taxonomySetOperation);
|
||||||
|
|
||||||
// if (ngramValue != null && ngramValue == 0) {
|
|
||||||
// filter.setStringLength(stringLength);
|
|
||||||
// }
|
|
||||||
|
|
||||||
String message = Validation.validateForStringLevel(filter);
|
String message = Validation.validateForStringLevel(filter);
|
||||||
if (message == null) {
|
if (message == null) {
|
||||||
// no errors
|
// no errors
|
||||||
logger.info("Executing: ", filter.toString());
|
logger.info("Executing: ", filter.toString());
|
||||||
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
||||||
// ADD THINGS HERE!!!
|
|
||||||
execute(statistic);
|
execute(statistic);
|
||||||
} else {
|
} else {
|
||||||
logAlert(message);
|
logAlert(message);
|
||||||
|
@ -1043,547 +838,6 @@ public class StringAnalysisTabNew2 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// public void calculate_collocabilities(StatisticsNew statistics, StatisticsNew oneWordStatistics) {
|
|
||||||
// statistics.updateCalculateCollocabilities(oneWordStatistics);
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
|
|
||||||
// private final Task<Void> prepareTaskForMinRelFre(StatisticsNew statistic) {
|
|
||||||
// Filter f = statistic.getFilter();
|
|
||||||
// logger.info("Started execution: ", f);
|
|
||||||
// Task<Void> task_collocability = null;
|
|
||||||
//
|
|
||||||
// try{
|
|
||||||
// Filter f2 = (Filter) f.clone();
|
|
||||||
// f2.setIsMinimalRelFreScraper(true);
|
|
||||||
// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb);
|
|
||||||
//
|
|
||||||
//
|
|
||||||
//// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f, useDb);
|
|
||||||
//
|
|
||||||
// Collection<File> corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles();
|
|
||||||
//
|
|
||||||
// final Task<Void> task = new Task<Void>() {
|
|
||||||
// @SuppressWarnings("Duplicates")
|
|
||||||
// @Override
|
|
||||||
// protected Void call() throws Exception {
|
|
||||||
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statisticsMinRelFre.getCorpus().getCorpusType());
|
|
||||||
// if(multipleFiles){
|
|
||||||
// cancel.setVisible(true);
|
|
||||||
// }
|
|
||||||
// Date startTime = new Date();
|
|
||||||
// Date previousTime = new Date();
|
|
||||||
// int remainingSeconds = -1;
|
|
||||||
// int corpusSize;
|
|
||||||
// int i;
|
|
||||||
// if(statistic.getFilter().getCollocability().size() > 0){
|
|
||||||
// i = 0;
|
|
||||||
// corpusSize = corpusFiles.size() * 3;
|
|
||||||
// } else {
|
|
||||||
// i = 0;
|
|
||||||
// corpusSize = corpusFiles.size() * 2;
|
|
||||||
// }
|
|
||||||
// for (File f : corpusFiles) {
|
|
||||||
// final int iFinal = i;
|
|
||||||
// XML_processing xml_processing = new XML_processing();
|
|
||||||
// xml_processing.isCancelled = false;
|
|
||||||
// i++;
|
|
||||||
// if(xml_processing.progressBarListener != null) {
|
|
||||||
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
|
|
||||||
// }
|
|
||||||
// if (multipleFiles) {
|
|
||||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
|
||||||
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
|
|
||||||
// previousTime = new Date();
|
|
||||||
// }
|
|
||||||
// this.updateProgress(i, corpusSize);
|
|
||||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
|
||||||
//// if (isCancelled()) {
|
|
||||||
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
//// break;
|
|
||||||
//// }
|
|
||||||
// } else {
|
|
||||||
// xml_processing.progressBarListener = new InvalidationListener() {
|
|
||||||
// int remainingSeconds = -1;
|
|
||||||
// Date previousTime = new Date();
|
|
||||||
// @Override
|
|
||||||
// public void invalidated(Observable observable) {
|
|
||||||
// cancel.setVisible(true);
|
|
||||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
|
||||||
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
|
||||||
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
|
||||||
// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
|
||||||
//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
|
|
||||||
//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
|
|
||||||
//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
|
|
||||||
//// System.out.println(remainingSeconds);
|
|
||||||
// previousTime = new Date();
|
|
||||||
// }
|
|
||||||
// xml_processing.isCancelled = isCancelled();
|
|
||||||
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
|
|
||||||
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
|
||||||
// }
|
|
||||||
// xml_processing.readXML(f.toString(), statisticsMinRelFre);
|
|
||||||
// if (isCancelled()) {
|
|
||||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// if(!(multipleFiles)){
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // add remaining minRelFre results
|
|
||||||
// if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) {
|
|
||||||
//// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
|
|
||||||
// long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue();
|
|
||||||
// double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
|
||||||
//
|
|
||||||
// statisticsMinRelFre.updateMinimalRelFre(statisticsMinRelFre.getTaxonomyResult().get(statisticsMinRelFre.getCorpus().getTotal()).entrySet(), absToRelFactor);
|
|
||||||
//
|
|
||||||
// // reset all values
|
|
||||||
// for(Taxonomy taxonomy : statisticsMinRelFre.getTaxonomyResult().keySet()){
|
|
||||||
// statisticsMinRelFre.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>());
|
|
||||||
// }
|
|
||||||
// for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){
|
|
||||||
// statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//// System.out.println("asd");
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
|
||||||
// progressLabel.textProperty().bind(task.messageProperty());
|
|
||||||
// task.setOnSucceeded(e -> {
|
|
||||||
// statistic.updateMinimalRelFre(statisticsMinRelFre.getMinimalRelFreNgrams(), statisticsMinRelFre.getMinimalRelFre1grams());
|
|
||||||
// final Task<Void> taskCollocability = prepareMainTask(statistic);
|
|
||||||
// final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
|
|
||||||
// thread_collocability.setDaemon(true);
|
|
||||||
// thread_collocability.start();
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// task.setOnFailed(e -> {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
|
||||||
// logger.error("Error while executing", e);
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
// ngramProgressBar.setProgress(0.0);
|
|
||||||
// // ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// task.setOnCancelled(e -> {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
// ngramProgressBar.setProgress(0.0);
|
|
||||||
// // ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// // When cancel button is pressed cancel analysis
|
|
||||||
// cancel.setOnAction(e -> {
|
|
||||||
// task.cancel();
|
|
||||||
// logger.info("cancel button");
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// return task;
|
|
||||||
// }catch(CloneNotSupportedException c){ return null; }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// private final Task<Void> prepareMainTask(StatisticsNew statistic) {
|
|
||||||
// Filter f = statistic.getFilter();
|
|
||||||
// logger.info("Started execution: ", f);
|
|
||||||
// Task<Void> task_collocability = null;
|
|
||||||
//
|
|
||||||
// Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
|
||||||
//
|
|
||||||
// final Task<Void> task = new Task<Void>() {
|
|
||||||
// @SuppressWarnings("Duplicates")
|
|
||||||
// @Override
|
|
||||||
// protected Void call() throws Exception {
|
|
||||||
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
|
|
||||||
// if(multipleFiles){
|
|
||||||
// cancel.setVisible(true);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//
|
|
||||||
//// int i = corpusFiles.size();
|
|
||||||
//// Date startTime = new Date();
|
|
||||||
//// Date previousTime = new Date();
|
|
||||||
//// int remainingSeconds = -1;
|
|
||||||
//// int corpusSize;
|
|
||||||
//// if (statistic.getFilter().getCollocability().size() > 0) {
|
|
||||||
//// corpusSize = corpusFiles.size() * 2;
|
|
||||||
//// } else {
|
|
||||||
//// corpusSize = corpusFiles.size();
|
|
||||||
//// }
|
|
||||||
//
|
|
||||||
// Date startTime = new Date();
|
|
||||||
// Date previousTime = new Date();
|
|
||||||
// int remainingSeconds = -1;
|
|
||||||
// int corpusSize;
|
|
||||||
// int i;
|
|
||||||
// int taskIndex = 0;
|
|
||||||
// if(statistic.getFilter().getCollocability().size() > 0 && statistic.getFilter().getMinimalRelFre() > 1){
|
|
||||||
// i = corpusFiles.size();
|
|
||||||
// corpusSize = corpusFiles.size() * 3;
|
|
||||||
// } else if (statistic.getFilter().getMinimalRelFre() > 1) {
|
|
||||||
// i = corpusFiles.size();
|
|
||||||
// corpusSize = corpusFiles.size() * 2;
|
|
||||||
// } else if (statistic.getFilter().getCollocability().size() > 0) {
|
|
||||||
// i = 0;
|
|
||||||
// corpusSize = corpusFiles.size() * 2;
|
|
||||||
// } else {
|
|
||||||
// i = 0;
|
|
||||||
// corpusSize = corpusFiles.size();
|
|
||||||
// }
|
|
||||||
// for (File f : corpusFiles) {
|
|
||||||
// final int iFinal = i;
|
|
||||||
// XML_processing xml_processing = new XML_processing();
|
|
||||||
// xml_processing.isCancelled = false;
|
|
||||||
// i++;
|
|
||||||
// taskIndex++;
|
|
||||||
// if(xml_processing.progressBarListener != null) {
|
|
||||||
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
|
|
||||||
// }
|
|
||||||
// if (multipleFiles) {
|
|
||||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
|
||||||
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
|
|
||||||
// previousTime = new Date();
|
|
||||||
// }
|
|
||||||
// this.updateProgress(i, corpusSize);
|
|
||||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
|
||||||
//
|
|
||||||
//// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
|
||||||
//// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
|
|
||||||
//// previousTime = new Date();
|
|
||||||
//// }
|
|
||||||
//// this.updateProgress(i, corpusSize);
|
|
||||||
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
|
||||||
//
|
|
||||||
// } else {
|
|
||||||
// xml_processing.progressBarListener = new InvalidationListener() {
|
|
||||||
// int remainingSeconds = -1;
|
|
||||||
// Date previousTime = new Date();
|
|
||||||
// @Override
|
|
||||||
// public void invalidated(Observable observable) {
|
|
||||||
// cancel.setVisible(true);
|
|
||||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
|
||||||
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
|
||||||
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
|
||||||
// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
|
||||||
//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
|
|
||||||
//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
|
|
||||||
//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
|
|
||||||
//// System.out.println(remainingSeconds);
|
|
||||||
// previousTime = new Date();
|
|
||||||
// }
|
|
||||||
// xml_processing.isCancelled = isCancelled();
|
|
||||||
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
|
|
||||||
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
|
||||||
// }
|
|
||||||
// xml_processing.readXML(f.toString(), statistic);
|
|
||||||
// if (isCancelled()) {
|
|
||||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// if(!(multipleFiles)){
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// }
|
|
||||||
//// readXML(f.toString(), statistic);
|
|
||||||
//// i++;
|
|
||||||
//// if (isCancelled()) {
|
|
||||||
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
//// break;
|
|
||||||
//// }
|
|
||||||
//// if (statistic.getFilter().getCollocability().size() > 0) {
|
|
||||||
//// this.updateProgress(i, corpusFiles.size() * 2);
|
|
||||||
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
|
|
||||||
//// } else {
|
|
||||||
//// this.updateProgress(i, corpusFiles.size());
|
|
||||||
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
|
|
||||||
//// }
|
|
||||||
////// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
|
|
||||||
// }
|
|
||||||
// // if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing
|
|
||||||
// if (statistic.getFilter().getMinimalRelFre() > 1){
|
|
||||||
//// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
|
|
||||||
// long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue();
|
|
||||||
// double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// for(Map.Entry<MultipleHMKeys, AtomicLong> entry : statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet()){
|
|
||||||
// if(entry.getValue().longValue() < absToRelFactor){
|
|
||||||
// statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).remove(entry.getKey());
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// statistic.updateMinimalRelFre(statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet(), absToRelFactor);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
|
||||||
// progressLabel.textProperty().bind(task.messageProperty());
|
|
||||||
// task.setOnSucceeded(e -> {
|
|
||||||
// if (f.getCollocability().size() > 0) {
|
|
||||||
// try{
|
|
||||||
// Filter f2 = (Filter) f.clone();
|
|
||||||
// f2.setNgramValue(1);
|
|
||||||
// StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
|
|
||||||
// final Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
|
|
||||||
// final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
|
|
||||||
// thread_collocability.setDaemon(true);
|
|
||||||
// thread_collocability.start();
|
|
||||||
// }catch(CloneNotSupportedException c){}
|
|
||||||
//
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// } else {
|
|
||||||
// try {
|
|
||||||
//// System.out.print(statistics);
|
|
||||||
// boolean successullySaved = statistic.saveResultToDisk();
|
|
||||||
// if (successullySaved) {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
|
||||||
// } else {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
|
|
||||||
// }
|
|
||||||
// } catch (UnsupportedEncodingException e1) {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
|
|
||||||
// logger.error("Error while saving", e1);
|
|
||||||
// } catch (OutOfMemoryError e1) {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
|
|
||||||
// logger.error("Out of memory error", e1);
|
|
||||||
// }
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// task.setOnFailed(e -> {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
|
||||||
// logger.error("Error while executing", e);
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
// ngramProgressBar.setProgress(0.0);
|
|
||||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// task.setOnCancelled(e -> {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
// ngramProgressBar.setProgress(0.0);
|
|
||||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// // When cancel button is pressed cancel analysis
|
|
||||||
// cancel.setOnAction(e -> {
|
|
||||||
// task.cancel();
|
|
||||||
// logger.info("cancel button");
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// return task;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// private final Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
|
|
||||||
// Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
|
|
||||||
//
|
|
||||||
// final Task<Void> task = new Task<Void>() {
|
|
||||||
// @SuppressWarnings("Duplicates")
|
|
||||||
// @Override
|
|
||||||
// protected Void call() throws Exception {
|
|
||||||
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
|
|
||||||
// if(multipleFiles){
|
|
||||||
// cancel.setVisible(true);
|
|
||||||
// }
|
|
||||||
//// int i = corpusFiles.size();
|
|
||||||
// Date startTime = new Date();
|
|
||||||
// Date previousTime = new Date();
|
|
||||||
// int remainingSeconds = -1;
|
|
||||||
//// int corpusSize;
|
|
||||||
//// if (statistic.getFilter().getCollocability().size() > 0) {
|
|
||||||
//// corpusSize = corpusFiles.size() * 2;
|
|
||||||
//// } else {
|
|
||||||
//// corpusSize = corpusFiles.size();
|
|
||||||
//// }
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// int corpusSize;
|
|
||||||
// int i;
|
|
||||||
// int taskIndex = 0;
|
|
||||||
// if(statistic.getFilter().getMinimalRelFre() > 1){
|
|
||||||
// i = corpusFiles.size() * 2;
|
|
||||||
// corpusSize = corpusFiles.size() * 3;
|
|
||||||
// } else {
|
|
||||||
// i = corpusFiles.size();
|
|
||||||
// corpusSize = corpusFiles.size() * 2;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// for (File f : corpusFiles) {
|
|
||||||
// final int iFinal = i;
|
|
||||||
// XML_processing xml_processing = new XML_processing();
|
|
||||||
// i++;
|
|
||||||
// taskIndex++;
|
|
||||||
// if(xml_processing.progressBarListener != null) {
|
|
||||||
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
|
|
||||||
// }
|
|
||||||
// if (multipleFiles) {
|
|
||||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
|
||||||
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
|
|
||||||
// previousTime = new Date();
|
|
||||||
// }
|
|
||||||
// this.updateProgress(i, corpusSize);
|
|
||||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
|
||||||
//// if (isCancelled()) {
|
|
||||||
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
//// break;
|
|
||||||
//// }
|
|
||||||
// } else {
|
|
||||||
// xml_processing.progressBarListener = new InvalidationListener() {
|
|
||||||
// int remainingSeconds = -1;
|
|
||||||
// Date previousTime = new Date();
|
|
||||||
// @Override
|
|
||||||
// public void invalidated(Observable observable) {
|
|
||||||
// cancel.setVisible(true);
|
|
||||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
|
||||||
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
|
||||||
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
|
||||||
// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
|
||||||
//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
|
|
||||||
//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)));
|
|
||||||
//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
|
|
||||||
//// System.out.println(remainingSeconds);
|
|
||||||
// previousTime = new Date();
|
|
||||||
// }
|
|
||||||
// xml_processing.isCancelled = isCancelled();
|
|
||||||
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
|
|
||||||
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
|
||||||
// }
|
|
||||||
// xml_processing.isCollocability = true;
|
|
||||||
// xml_processing.readXML(f.toString(), statisticsOneGrams);
|
|
||||||
// xml_processing.isCollocability = false;
|
|
||||||
// if (isCancelled()) {
|
|
||||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
//// readXML(f.toString(), statisticsOneGrams);
|
|
||||||
//// i++;
|
|
||||||
//// this.updateProgress(i, corpusFiles.size() * 2);
|
|
||||||
//// if (statistic.getFilter().getCollocability().size() > 0) {
|
|
||||||
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
|
|
||||||
//// } else {
|
|
||||||
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
|
|
||||||
//// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
|
||||||
// progressLabel.textProperty().bind(task.messageProperty());
|
|
||||||
//
|
|
||||||
// task.setOnSucceeded(e -> {
|
|
||||||
// try {
|
|
||||||
// System.out.print(statistic);
|
|
||||||
//// calculate_collocabilities(statistic, statisticsOneGrams);
|
|
||||||
// statistic.updateCalculateCollocabilities(statisticsOneGrams);
|
|
||||||
// boolean successullySaved = statistic.saveResultToDisk();
|
|
||||||
// if (successullySaved) {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
|
||||||
// } else {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
|
|
||||||
// }
|
|
||||||
// } catch (UnsupportedEncodingException e1) {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
|
|
||||||
// logger.error("Error while saving", e1);
|
|
||||||
// } catch (OutOfMemoryError e1) {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
|
|
||||||
// logger.error("Out of memory error", e1);
|
|
||||||
// }
|
|
||||||
//// try {
|
|
||||||
//// boolean successullySaved = statistic.saveResultToDisk();
|
|
||||||
//// if (successullySaved) {
|
|
||||||
//// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
|
||||||
//// } else {
|
|
||||||
//// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
|
||||||
//// }
|
|
||||||
//// } catch (UnsupportedEncodingException e1) {
|
|
||||||
//// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
|
||||||
//// logger.error("Error while saving", e1);
|
|
||||||
//// } catch (OutOfMemoryError e1){
|
|
||||||
//// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
|
||||||
//// logger.error("Out of memory error", e1);
|
|
||||||
//// }
|
|
||||||
////
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// task.setOnFailed(e -> {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
|
||||||
// logger.error("Error while executing", e);
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
// ngramProgressBar.setProgress(0.0);
|
|
||||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// task.setOnCancelled(e -> {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
// ngramProgressBar.setProgress(0.0);
|
|
||||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// // When cancel button is pressed cancel analysis
|
|
||||||
// cancel.setOnAction(e -> {
|
|
||||||
// task.cancel();
|
|
||||||
//// logger.info("cancel button");
|
|
||||||
// });
|
|
||||||
// return task;
|
|
||||||
// }
|
|
||||||
|
|
||||||
private void execute(StatisticsNew statistic) {
|
private void execute(StatisticsNew statistic) {
|
||||||
Filter f = statistic.getFilter();
|
Filter f = statistic.getFilter();
|
||||||
logger.info("Started execution: ", f);
|
logger.info("Started execution: ", f);
|
||||||
|
@ -1591,13 +845,11 @@ public class StringAnalysisTabNew2 {
|
||||||
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
|
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
|
||||||
if (f.getMinimalRelFre() > 1){
|
if (f.getMinimalRelFre() > 1){
|
||||||
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
|
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
|
||||||
// final Task<Void> mainTask = prepareTaskForMinRelFre(statistic);
|
|
||||||
final Thread thread = new Thread(mainTask, "task");
|
final Thread thread = new Thread(mainTask, "task");
|
||||||
thread.setDaemon(true);
|
thread.setDaemon(true);
|
||||||
thread.start();
|
thread.start();
|
||||||
} else {
|
} else {
|
||||||
final Task<Void> mainTask = t.prepareMainTask(statistic);
|
final Task<Void> mainTask = t.prepareMainTask(statistic);
|
||||||
// final Task<Void> mainTask = prepareMainTask(statistic);
|
|
||||||
final Thread thread = new Thread(mainTask, "task");
|
final Thread thread = new Thread(mainTask, "task");
|
||||||
thread.setDaemon(true);
|
thread.setDaemon(true);
|
||||||
thread.start();
|
thread.start();
|
||||||
|
|
|
@ -9,7 +9,6 @@ import org.apache.commons.lang3.math.NumberUtils;
|
||||||
public class ValidationUtil {
|
public class ValidationUtil {
|
||||||
|
|
||||||
public static boolean isNumber(String value) {
|
public static boolean isNumber(String value) {
|
||||||
//return NumberUtils.isCreatable(value);
|
|
||||||
return NumberUtils.isNumber(value);
|
return NumberUtils.isNumber(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,260 +0,0 @@
|
||||||
//package gui;
|
|
||||||
//
|
|
||||||
//import static alg.XML_processing.*;
|
|
||||||
//import static gui.GUIController.*;
|
|
||||||
//
|
|
||||||
//import java.io.File;
|
|
||||||
//import java.io.UnsupportedEncodingException;
|
|
||||||
//import java.util.*;
|
|
||||||
//
|
|
||||||
//import javafx.application.HostServices;
|
|
||||||
//import javafx.scene.control.*;
|
|
||||||
//import org.apache.commons.lang3.StringUtils;
|
|
||||||
//import org.apache.logging.log4j.LogManager;
|
|
||||||
//import org.apache.logging.log4j.Logger;
|
|
||||||
//import org.controlsfx.control.CheckComboBox;
|
|
||||||
//
|
|
||||||
//import data.*;
|
|
||||||
//import javafx.collections.ListChangeListener;
|
|
||||||
//import javafx.collections.ObservableList;
|
|
||||||
//import javafx.concurrent.Task;
|
|
||||||
//import javafx.fxml.FXML;
|
|
||||||
//import javafx.scene.layout.AnchorPane;
|
|
||||||
//
|
|
||||||
//@SuppressWarnings("Duplicates")
|
|
||||||
//public class WordFormationTab {
|
|
||||||
// public final static Logger logger = LogManager.getLogger(WordFormationTab.class);
|
|
||||||
//
|
|
||||||
// public AnchorPane wordAnalysisTabPane;
|
|
||||||
//
|
|
||||||
// @FXML
|
|
||||||
// public Label selectedFiltersLabel;
|
|
||||||
// @FXML
|
|
||||||
// public Label solarFilters;
|
|
||||||
//
|
|
||||||
// @FXML
|
|
||||||
// private CheckComboBox<String> taxonomyCCB;
|
|
||||||
// private ArrayList<Taxonomy> taxonomy;
|
|
||||||
//
|
|
||||||
// @FXML
|
|
||||||
// private TextField minimalOccurrencesTF;
|
|
||||||
// private Integer minimalOccurrences;
|
|
||||||
//
|
|
||||||
// @FXML
|
|
||||||
// private TextField minimalTaxonomyTF;
|
|
||||||
// private Integer minimalTaxonomy;
|
|
||||||
//
|
|
||||||
// @FXML
|
|
||||||
// private Button computeB;
|
|
||||||
//
|
|
||||||
// @FXML
|
|
||||||
// public ProgressBar ngramProgressBar;
|
|
||||||
// @FXML
|
|
||||||
// public Label progressLabel;
|
|
||||||
//
|
|
||||||
// @FXML
|
|
||||||
// private Hyperlink helpH;
|
|
||||||
//
|
|
||||||
// private Corpus corpus;
|
|
||||||
// private HashMap<String, HashSet<String>> solarFiltersMap;
|
|
||||||
// private HostServices hostService;
|
|
||||||
//
|
|
||||||
// // after header scan
|
|
||||||
// private ObservableList<String> taxonomyCCBValues;
|
|
||||||
// private CorpusType currentCorpusType;
|
|
||||||
// private boolean useDb;
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// public void init() {
|
|
||||||
// // taxonomy
|
|
||||||
// if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
|
||||||
// taxonomyCCB.getItems().removeAll();
|
|
||||||
// taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
|
||||||
// taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
|
||||||
// taxonomy = new ArrayList<>();
|
|
||||||
// ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
|
||||||
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus);
|
|
||||||
// taxonomy.addAll(checkedItemsTaxonomy);
|
|
||||||
// logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
|
|
||||||
// });
|
|
||||||
// taxonomyCCB.getCheckModel().clearChecks();
|
|
||||||
// } else {
|
|
||||||
// taxonomyCCB.setDisable(true);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // set default values
|
|
||||||
// minimalOccurrencesTF.setText("1");
|
|
||||||
// minimalOccurrences = 1;
|
|
||||||
//
|
|
||||||
// minimalTaxonomyTF.setText("1");
|
|
||||||
// minimalTaxonomy = 1;
|
|
||||||
//
|
|
||||||
// minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
|
||||||
// if (!newValue) {
|
|
||||||
// // focus lost
|
|
||||||
// String value = minimalOccurrencesTF.getText();
|
|
||||||
// if (!ValidationUtil.isEmpty(value)) {
|
|
||||||
// if (!ValidationUtil.isNumber(value)) {
|
|
||||||
// logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
|
||||||
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
|
||||||
// } else {
|
|
||||||
// minimalOccurrences = Integer.parseInt(value);
|
|
||||||
// }
|
|
||||||
// } else {
|
|
||||||
// minimalOccurrencesTF.setText("1");
|
|
||||||
// minimalOccurrences = 1;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
|
||||||
// if (!newValue) {
|
|
||||||
// // focus lost
|
|
||||||
// String value = minimalTaxonomyTF.getText();
|
|
||||||
// if (!ValidationUtil.isEmpty(value)) {
|
|
||||||
// if (!ValidationUtil.isNumber(value)) {
|
|
||||||
// logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
|
||||||
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
|
||||||
// } else {
|
|
||||||
// minimalTaxonomy = Integer.parseInt(value);
|
|
||||||
// }
|
|
||||||
// } else {
|
|
||||||
// minimalTaxonomyTF.setText("1");
|
|
||||||
// minimalTaxonomy = 1;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// computeB.setOnAction(e -> {
|
|
||||||
// compute();
|
|
||||||
// logger.info("compute button");
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// helpH.setOnAction(e -> openHelpWebsite());
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// private void compute() {
|
|
||||||
// Filter filter = new Filter();
|
|
||||||
// filter.setNgramValue(1);
|
|
||||||
// filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
|
|
||||||
// filter.setTaxonomy(taxonomy);
|
|
||||||
// filter.setAl(AnalysisLevel.STRING_LEVEL);
|
|
||||||
// filter.setSkipValue(0);
|
|
||||||
// filter.setMsd(new ArrayList<>());
|
|
||||||
// filter.setIsCvv(false);
|
|
||||||
// filter.setSolarFilters(solarFiltersMap);
|
|
||||||
// filter.setMinimalOccurrences(minimalOccurrences);
|
|
||||||
// filter.setMinimalTaxonomy(minimalTaxonomy);
|
|
||||||
//
|
|
||||||
// String message = Validation.validateForStringLevel(filter);
|
|
||||||
// if (message == null) {
|
|
||||||
// // no errors
|
|
||||||
// logger.info("Executing: ", filter.toString());
|
|
||||||
// StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
|
||||||
// execute(statistic);
|
|
||||||
// } else {
|
|
||||||
// logAlert(message);
|
|
||||||
// showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// private void openHelpWebsite(){
|
|
||||||
// hostService.showDocument(Messages.HELP_URL);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// private void execute(StatisticsNew statistic) {
|
|
||||||
// logger.info("Started execution: ", statistic.getFilter());
|
|
||||||
//
|
|
||||||
// Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
|
||||||
//
|
|
||||||
// final Task<Void> task = new Task<Void>() {
|
|
||||||
// @SuppressWarnings("Duplicates")
|
|
||||||
// @Override
|
|
||||||
// protected Void call() throws Exception {
|
|
||||||
// int i = 0;
|
|
||||||
// Date startTime = new Date();
|
|
||||||
// Date previousTime = new Date();
|
|
||||||
// for (File f : corpusFiles) {
|
|
||||||
// readXML(f.toString(), statistic);
|
|
||||||
// i++;
|
|
||||||
// this.updateProgress(i, corpusFiles.size());
|
|
||||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
|
||||||
// progressLabel.textProperty().bind(task.messageProperty());
|
|
||||||
//
|
|
||||||
// task.setOnSucceeded(e -> {
|
|
||||||
// try {
|
|
||||||
// // first, we have to recalculate all occurrences to detailed statistics
|
|
||||||
// boolean successullySaved = statistic.recalculateAndSaveResultToDisk();
|
|
||||||
//
|
|
||||||
// if (successullySaved) {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
|
||||||
// } else {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
|
|
||||||
// }
|
|
||||||
// } catch (UnsupportedEncodingException e1) {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
|
|
||||||
// logger.error("Error while saving", e1);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// task.setOnFailed(e -> {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
|
||||||
// logger.error("Error while executing", e);
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
// ngramProgressBar.setProgress(0.0);
|
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// final Thread thread = new Thread(task, "task");
|
|
||||||
// thread.setDaemon(true);
|
|
||||||
// thread.start();
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// private void logAlert(String alert) {
|
|
||||||
// logger.info("alert: " + alert);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// public void setCorpus(Corpus corpus) {
|
|
||||||
// this.corpus = corpus;
|
|
||||||
//
|
|
||||||
// if (corpus.getCorpusType() != CorpusType.SOLAR) {
|
|
||||||
// setSelectedFiltersLabel(null);
|
|
||||||
// } else {
|
|
||||||
// setSelectedFiltersLabel("/");
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public void setSelectedFiltersLabel(String content) {
|
|
||||||
// if (content != null) {
|
|
||||||
// solarFilters.setVisible(true);
|
|
||||||
// selectedFiltersLabel.setVisible(true);
|
|
||||||
// selectedFiltersLabel.setText(content);
|
|
||||||
// } else {
|
|
||||||
// solarFilters.setVisible(false);
|
|
||||||
// selectedFiltersLabel.setVisible(false);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
|
|
||||||
// this.solarFiltersMap = solarFiltersMap;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public void setHostServices(HostServices hostServices){
|
|
||||||
// this.hostService = hostServices;
|
|
||||||
// }
|
|
||||||
//}
|
|
|
@ -1,12 +1,8 @@
|
||||||
package gui;
|
package gui;
|
||||||
|
|
||||||
import alg.XML_processing;
|
|
||||||
import data.*;
|
import data.*;
|
||||||
import javafx.application.HostServices;
|
import javafx.application.HostServices;
|
||||||
import javafx.beans.InvalidationListener;
|
|
||||||
import javafx.beans.Observable;
|
|
||||||
import javafx.beans.binding.StringBinding;
|
import javafx.beans.binding.StringBinding;
|
||||||
import javafx.beans.property.ReadOnlyDoubleWrapper;
|
|
||||||
import javafx.beans.value.ChangeListener;
|
import javafx.beans.value.ChangeListener;
|
||||||
import javafx.beans.value.ObservableValue;
|
import javafx.beans.value.ObservableValue;
|
||||||
import javafx.collections.ListChangeListener;
|
import javafx.collections.ListChangeListener;
|
||||||
|
@ -23,12 +19,10 @@ import org.controlsfx.control.CheckComboBox;
|
||||||
import util.Tasks;
|
import util.Tasks;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import static alg.XML_processing.readXML;
|
|
||||||
import static gui.GUIController.showAlert;
|
import static gui.GUIController.showAlert;
|
||||||
|
|
||||||
@SuppressWarnings("Duplicates")
|
@SuppressWarnings("Duplicates")
|
||||||
|
@ -165,10 +159,6 @@ public class WordLevelTab {
|
||||||
private TextField suffixListTF;
|
private TextField suffixListTF;
|
||||||
private ArrayList<String> suffixList;
|
private ArrayList<String> suffixList;
|
||||||
|
|
||||||
// @FXML
|
|
||||||
// private CheckBox writeMsdAtTheEndChB;
|
|
||||||
// private boolean writeMsdAtTheEnd;
|
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private ComboBox<String> calculateForCB;
|
private ComboBox<String> calculateForCB;
|
||||||
private CalculateFor calculateFor;
|
private CalculateFor calculateFor;
|
||||||
|
@ -215,7 +205,6 @@ public class WordLevelTab {
|
||||||
|
|
||||||
private Corpus corpus;
|
private Corpus corpus;
|
||||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||||
private Filter filter;
|
|
||||||
private boolean useDb;
|
private boolean useDb;
|
||||||
private HostServices hostService;
|
private HostServices hostService;
|
||||||
private ListChangeListener<String> taxonomyListener;
|
private ListChangeListener<String> taxonomyListener;
|
||||||
|
@ -226,44 +215,31 @@ public class WordLevelTab {
|
||||||
private ChangeListener<Boolean> minimalTaxonomyListener;
|
private ChangeListener<Boolean> minimalTaxonomyListener;
|
||||||
private ChangeListener<Boolean> minimalRelFreListener;
|
private ChangeListener<Boolean> minimalRelFreListener;
|
||||||
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
|
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY));
|
||||||
|
|
||||||
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
|
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
|
||||||
private static final ArrayList<String> TAXONOMY_SET_OPERATION = new ArrayList<>(Arrays.asList(TAXONOMY_SET_OPERATION_ARRAY));
|
private static final ArrayList<String> TAXONOMY_SET_OPERATION = new ArrayList<>(Arrays.asList(TAXONOMY_SET_OPERATION_ARRAY));
|
||||||
|
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {};
|
private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {};
|
||||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY));
|
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY));
|
||||||
|
|
||||||
// TODO: pass observables for taxonomy based on header scan
|
// TODO: pass observables for taxonomy based on header scan
|
||||||
// after header scan
|
// after header scan
|
||||||
private ObservableList<String> taxonomyCCBValues;
|
|
||||||
private CorpusType currentCorpusType;
|
|
||||||
|
|
||||||
public void init() {
|
public void init() {
|
||||||
// add CSS style
|
// add CSS style
|
||||||
wordLevelAnalysisTabPane.getStylesheets().add("style.css");
|
wordLevelAnalysisTabPane.getStylesheets().add("style.css");
|
||||||
|
@ -328,21 +304,12 @@ public class WordLevelTab {
|
||||||
} else if (newValue.equals(CalculateFor.NORMALIZED_WORD.toString())) {
|
} else if (newValue.equals(CalculateFor.NORMALIZED_WORD.toString())) {
|
||||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS));
|
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS));
|
||||||
} else if (newValue.equals(CalculateFor.MORPHOSYNTACTIC_SPECS.toString())) {
|
} else if (newValue.equals(CalculateFor.MORPHOSYNTACTIC_SPECS.toString())) {
|
||||||
// writeMsdAtTheEndEnableCalculateFor.set(true);
|
|
||||||
// writeMsdAtTheEndChB.setDisable(false);
|
|
||||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_MSD));
|
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_MSD));
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_EMPTY));
|
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_EMPTY));
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (!newValue.equals("oblikoskladenjska oznaka")){
|
|
||||||
// writeMsdAtTheEnd = false;
|
|
||||||
// writeMsdAtTheEndChB.setSelected(false);
|
|
||||||
// writeMsdAtTheEndChB.setDisable(true);
|
|
||||||
// writeMsdAtTheEndEnableCalculateFor.set(false);
|
|
||||||
// }
|
|
||||||
|
|
||||||
alsoVisualizeListener = new ListChangeListener<String>() {
|
alsoVisualizeListener = new ListChangeListener<String>() {
|
||||||
@Override
|
@Override
|
||||||
public void onChanged(Change<? extends String> c) {
|
public void onChanged(Change<? extends String> c) {
|
||||||
|
@ -353,10 +320,6 @@ public class WordLevelTab {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// alsoVisualizeCCB.getCheckModel().clearChecks();
|
|
||||||
// alsoVisualizeCCB.getItems().removeAll();
|
|
||||||
// alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
|
|
||||||
|
|
||||||
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
|
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
|
||||||
|
|
||||||
if (languageChanged) {
|
if (languageChanged) {
|
||||||
|
@ -442,7 +405,6 @@ public class WordLevelTab {
|
||||||
prefixList.add(w);
|
prefixList.add(w);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// suffixList = value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.println(prefixList);
|
System.out.println(prefixList);
|
||||||
|
@ -475,7 +437,6 @@ public class WordLevelTab {
|
||||||
suffixList.add(w);
|
suffixList.add(w);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// suffixList = value;
|
|
||||||
}
|
}
|
||||||
System.out.println(suffixList);
|
System.out.println(suffixList);
|
||||||
if(suffixList.size() > 0){
|
if(suffixList.size() > 0){
|
||||||
|
@ -492,8 +453,6 @@ public class WordLevelTab {
|
||||||
computeNgramsB.setDisable(true);
|
computeNgramsB.setDisable(true);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
// prefixLengthCB.setDisable(true);
|
|
||||||
|
|
||||||
|
|
||||||
if (msdListener != null){
|
if (msdListener != null){
|
||||||
msdTF.focusedProperty().removeListener(msdListener);
|
msdTF.focusedProperty().removeListener(msdListener);
|
||||||
|
@ -581,10 +540,8 @@ public class WordLevelTab {
|
||||||
public void onChanged(ListChangeListener.Change<? extends String> c){
|
public void onChanged(ListChangeListener.Change<? extends String> c){
|
||||||
if(changing) {
|
if(changing) {
|
||||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||||
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
|
|
||||||
|
|
||||||
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
|
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
|
||||||
// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
|
|
||||||
|
|
||||||
taxonomy = new ArrayList<>();
|
taxonomy = new ArrayList<>();
|
||||||
taxonomy.addAll(checkedItemsTaxonomy);
|
taxonomy.addAll(checkedItemsTaxonomy);
|
||||||
|
@ -592,7 +549,6 @@ public class WordLevelTab {
|
||||||
taxonomyCCB.getItems().removeAll();
|
taxonomyCCB.getItems().removeAll();
|
||||||
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
||||||
|
|
||||||
// taxonomyCCB.getCheckModel().clearChecks();
|
|
||||||
changing = false;
|
changing = false;
|
||||||
taxonomyCCB.getCheckModel().clearChecks();
|
taxonomyCCB.getCheckModel().clearChecks();
|
||||||
for (Taxonomy t : checkedItemsTaxonomy) {
|
for (Taxonomy t : checkedItemsTaxonomy) {
|
||||||
|
@ -639,15 +595,6 @@ public class WordLevelTab {
|
||||||
displayTaxonomyChB.setDisable(true);
|
displayTaxonomyChB.setDisable(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// writeMsdAtTheEnd = false;
|
|
||||||
// writeMsdAtTheEndChB.setDisable(true);
|
|
||||||
// // set
|
|
||||||
// writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
|
||||||
// writeMsdAtTheEnd = newValue;
|
|
||||||
// logger.info("write msd at the end: ", writeMsdAtTheEnd);
|
|
||||||
// });
|
|
||||||
// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
|
|
||||||
|
|
||||||
// set default values
|
// set default values
|
||||||
minimalOccurrencesTF.setText("1");
|
minimalOccurrencesTF.setText("1");
|
||||||
minimalOccurrences = 1;
|
minimalOccurrences = 1;
|
||||||
|
@ -764,85 +711,6 @@ public class WordLevelTab {
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* case a: values for combo boxes can change after a corpus change
|
|
||||||
* <ul>
|
|
||||||
* <li>different corpus type - reset all fields so no old values remain</li>
|
|
||||||
* <li>same corpus type, different subset - keep</li>
|
|
||||||
* </ul>
|
|
||||||
* <p>
|
|
||||||
* case b: values for combo boxes can change after a header scan
|
|
||||||
* <ul>
|
|
||||||
* <li>at first, fields are populated by corpus type defaults</li>
|
|
||||||
* <li>after, with gathered data</li>
|
|
||||||
* </ul>
|
|
||||||
* <p></p>
|
|
||||||
* ngrams: 1
|
|
||||||
* calculateFor: word
|
|
||||||
* msd:
|
|
||||||
* taxonomy:
|
|
||||||
* skip: 0
|
|
||||||
* iscvv: false
|
|
||||||
* string length: 1
|
|
||||||
*/
|
|
||||||
// public void populateFields() {
|
|
||||||
// // corpus changed if: current one is null (this is first run of the app)
|
|
||||||
// // or if currentCorpus != gui's corpus
|
|
||||||
// boolean corpusChanged = currentCorpusType == null
|
|
||||||
// || currentCorpusType != corpus.getCorpusType();
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// // TODO: check for GOS, GIGAFIDA, SOLAR...
|
|
||||||
// // refresh and:
|
|
||||||
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
|
||||||
// if (calculateFor == null) {
|
|
||||||
// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
|
|
||||||
// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if (!filter.hasMsd()) {
|
|
||||||
// // if current corpus doesn't have msd data, disable this field
|
|
||||||
// msd = new ArrayList<>();
|
|
||||||
// msdTF.setText("");
|
|
||||||
// msdTF.setDisable(true);
|
|
||||||
// logger.info("no msd data");
|
|
||||||
// } else {
|
|
||||||
// if (ValidationUtil.isEmpty(msd)
|
|
||||||
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
|
||||||
// // msd has not been set previously
|
|
||||||
// // or msd has been set but the corpus changed -> reset
|
|
||||||
// msd = new ArrayList<>();
|
|
||||||
// msdTF.setText("");
|
|
||||||
// msdTF.setDisable(false);
|
|
||||||
// logger.info("msd reset");
|
|
||||||
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
|
||||||
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
|
|
||||||
// msdTF.setText(StringUtils.join(msdStrings, " "));
|
|
||||||
// msdTF.setDisable(false);
|
|
||||||
// logger.info("msd kept");
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // TODO: trigger on rescan
|
|
||||||
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
|
||||||
// // user changed corpus (by type) or by selection & triggered a rescan of headers
|
|
||||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
|
||||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
|
||||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
|
||||||
//
|
|
||||||
// currentCorpusType = corpus.getCorpusType();
|
|
||||||
// // setTaxonomyIsDirty(false);
|
|
||||||
// } else {
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
|
||||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
|
||||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
|
||||||
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
|
|
||||||
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
|
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
|
||||||
Tooltip tooltip = new Tooltip();
|
Tooltip tooltip = new Tooltip();
|
||||||
tooltip.textProperty().bind(stringBinding);
|
tooltip.textProperty().bind(stringBinding);
|
||||||
|
@ -911,11 +779,9 @@ public class WordLevelTab {
|
||||||
if (corpus.getCorpusType() == CorpusType.GOS) {
|
if (corpus.getCorpusType() == CorpusType.GOS) {
|
||||||
calculateForCB.itemsProperty().unbind();
|
calculateForCB.itemsProperty().unbind();
|
||||||
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_GOS));
|
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_GOS));
|
||||||
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
|
|
||||||
} else {
|
} else {
|
||||||
calculateForCB.itemsProperty().unbind();
|
calculateForCB.itemsProperty().unbind();
|
||||||
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS));
|
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS));
|
||||||
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -923,7 +789,6 @@ public class WordLevelTab {
|
||||||
if (corpus.isGosOrthMode()) {
|
if (corpus.isGosOrthMode()) {
|
||||||
calculateForCB.itemsProperty().unbind();
|
calculateForCB.itemsProperty().unbind();
|
||||||
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_ORTH));
|
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_ORTH));
|
||||||
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
|
|
||||||
msdTF.setDisable(true);
|
msdTF.setDisable(true);
|
||||||
} else {
|
} else {
|
||||||
msdTF.setDisable(false);
|
msdTF.setDisable(false);
|
||||||
|
@ -954,7 +819,6 @@ public class WordLevelTab {
|
||||||
filter.setPrefixList(prefixList);
|
filter.setPrefixList(prefixList);
|
||||||
filter.setSuffixList(suffixList);
|
filter.setSuffixList(suffixList);
|
||||||
filter.setTaxonomySetOperation(taxonomySetOperation);
|
filter.setTaxonomySetOperation(taxonomySetOperation);
|
||||||
// filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd);
|
|
||||||
|
|
||||||
String message = Validation.validateForStringLevel(filter);
|
String message = Validation.validateForStringLevel(filter);
|
||||||
if (message == null) {
|
if (message == null) {
|
||||||
|
@ -1004,134 +868,14 @@ public class WordLevelTab {
|
||||||
private void execute(StatisticsNew statistic) {
|
private void execute(StatisticsNew statistic) {
|
||||||
logger.info("Started execution: ", statistic.getFilter());
|
logger.info("Started execution: ", statistic.getFilter());
|
||||||
|
|
||||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
|
||||||
|
|
||||||
// final Task<Void> task = new Task<Void>() {
|
|
||||||
// @SuppressWarnings("Duplicates")
|
|
||||||
// @Override
|
|
||||||
// protected Void call() throws Exception {
|
|
||||||
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
|
|
||||||
// if(multipleFiles){
|
|
||||||
// cancel.setVisible(true);
|
|
||||||
// }
|
|
||||||
// int i = 0;
|
|
||||||
// Date startTime = new Date();
|
|
||||||
// Date previousTime = new Date();
|
|
||||||
// int remainingSeconds = -1;
|
|
||||||
// for (File f : corpusFiles) {
|
|
||||||
// final int iFinal = i;
|
|
||||||
// XML_processing xml_processing = new XML_processing();
|
|
||||||
// xml_processing.isCancelled = false;
|
|
||||||
// i++;
|
|
||||||
// if (isCancelled()) {
|
|
||||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// if(xml_processing.progressBarListener != null) {
|
|
||||||
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
|
|
||||||
// }
|
|
||||||
// if (multipleFiles) {
|
|
||||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
|
||||||
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
|
|
||||||
// previousTime = new Date();
|
|
||||||
// }
|
|
||||||
// this.updateProgress(i, corpusFiles.size());
|
|
||||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
|
|
||||||
// } else {
|
|
||||||
// xml_processing.progressBarListener = new InvalidationListener() {
|
|
||||||
// int remainingSeconds = -1;
|
|
||||||
// Date previousTime = new Date();
|
|
||||||
// @Override
|
|
||||||
// public void invalidated(Observable observable) {
|
|
||||||
// cancel.setVisible(true);
|
|
||||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
|
||||||
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
|
||||||
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
|
||||||
// ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
|
||||||
// previousTime = new Date();
|
|
||||||
// }
|
|
||||||
// xml_processing.isCancelled = isCancelled();
|
|
||||||
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
|
|
||||||
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
|
||||||
// }
|
|
||||||
// xml_processing.readXML(f.toString(), statistic);
|
|
||||||
// if (isCancelled()) {
|
|
||||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
|
||||||
// progressLabel.textProperty().bind(task.messageProperty());
|
|
||||||
//
|
|
||||||
// task.setOnSucceeded(e -> {
|
|
||||||
// try {
|
|
||||||
// boolean successullySaved = statistic.saveResultToDisk();
|
|
||||||
// if (successullySaved) {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
|
||||||
// } else {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
|
|
||||||
// }
|
|
||||||
// } catch (UnsupportedEncodingException e1) {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
|
|
||||||
// logger.error("Error while saving", e1);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// task.setOnFailed(e -> {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
|
||||||
// logger.error("Error while executing", e);
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
// ngramProgressBar.setProgress(0.0);
|
|
||||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// task.setOnCancelled(e -> {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
|
||||||
// ngramProgressBar.progressProperty().unbind();
|
|
||||||
// ngramProgressBar.setProgress(0.0);
|
|
||||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
// progressLabel.textProperty().unbind();
|
|
||||||
// progressLabel.setText("");
|
|
||||||
// cancel.setVisible(false);
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// // When cancel button is pressed cancel analysis
|
|
||||||
// cancel.setOnAction(e -> {
|
|
||||||
// task.cancel();
|
|
||||||
// logger.info("cancel button");
|
|
||||||
// });
|
|
||||||
//
|
|
||||||
// final Thread thread = new Thread(task, "task");
|
|
||||||
// thread.setDaemon(true);
|
|
||||||
// thread.start();
|
|
||||||
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
|
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
|
||||||
if (statistic.getFilter().getMinimalRelFre() > 1){
|
if (statistic.getFilter().getMinimalRelFre() > 1){
|
||||||
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
|
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
|
||||||
// final Task<Void> mainTask = prepareTaskForMinRelFre(statistic);
|
|
||||||
final Thread thread = new Thread(mainTask, "task");
|
final Thread thread = new Thread(mainTask, "task");
|
||||||
thread.setDaemon(true);
|
thread.setDaemon(true);
|
||||||
thread.start();
|
thread.start();
|
||||||
} else {
|
} else {
|
||||||
final Task<Void> mainTask = t.prepareMainTask(statistic);
|
final Task<Void> mainTask = t.prepareMainTask(statistic);
|
||||||
// final Task<Void> mainTask = prepareMainTask(statistic);
|
|
||||||
final Thread thread = new Thread(mainTask, "task");
|
final Thread thread = new Thread(mainTask, "task");
|
||||||
thread.setDaemon(true);
|
thread.setDaemon(true);
|
||||||
thread.start();
|
thread.start();
|
||||||
|
@ -1144,5 +888,4 @@ public class WordLevelTab {
|
||||||
public void setHostServices(HostServices hostServices){
|
public void setHostServices(HostServices hostServices){
|
||||||
this.hostService = hostServices;
|
this.hostService = hostServices;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,46 +0,0 @@
|
||||||
package util;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.stream.IntStream;
|
|
||||||
|
|
||||||
public class Combinations {
|
|
||||||
private static HashSet<HashSet<Integer>> result = new HashSet<>();
|
|
||||||
|
|
||||||
|
|
||||||
/* arr[] ---> Input Array
|
|
||||||
data[] ---> Temporary array to store current combination
|
|
||||||
start & end ---> Staring and Ending indexes in arr[]
|
|
||||||
index ---> Current index in data[]
|
|
||||||
r ---> Size of a combination to be printed */
|
|
||||||
static void combinationUtil(int arr[], Integer data[], int start, int end, int index, int combinationLength) {
|
|
||||||
// Current combination is ready to be printed, print it
|
|
||||||
if (index == combinationLength) {
|
|
||||||
result.add(new HashSet<>(Arrays.asList(data)));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// replace index with all possible elements. The condition
|
|
||||||
// "end-i+1 >= r-index" makes sure that including one element
|
|
||||||
// at index will make a combination with remaining elements
|
|
||||||
// at remaining positions
|
|
||||||
for (int i = start; i <= end && end - i + 1 >= combinationLength - index; i++) {
|
|
||||||
data[index] = arr[i];
|
|
||||||
combinationUtil(arr, data, i + 1, end, index + 1, combinationLength);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static HashSet<HashSet<Integer>> generateIndices(int maxNOfIndices) {
|
|
||||||
result = new HashSet<>();
|
|
||||||
int[] arr = IntStream.range(1, maxNOfIndices).toArray();
|
|
||||||
for (int i = 1; i < maxNOfIndices - 1; i++) {
|
|
||||||
// A temporary array to store all combination one by one
|
|
||||||
combinationUtil(arr, new Integer[i], 0, arr.length - 1, 0, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
// also add an empty one for X.... (all of this type)
|
|
||||||
result.add(new HashSet<>());
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -6,7 +6,6 @@ import java.io.*;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ConcurrentMap;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import data.*;
|
import data.*;
|
||||||
|
@ -16,49 +15,11 @@ import org.apache.commons.csv.CSVFormat;
|
||||||
import org.apache.commons.csv.CSVPrinter;
|
import org.apache.commons.csv.CSVPrinter;
|
||||||
import org.apache.commons.csv.QuoteMode;
|
import org.apache.commons.csv.QuoteMode;
|
||||||
import org.apache.commons.lang3.tuple.Pair;
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
import org.json.simple.JSONArray;
|
|
||||||
import org.json.simple.JSONObject;
|
|
||||||
|
|
||||||
import data.Enums.WordLevelType;
|
import data.Enums.WordLevelType;
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public class Export {
|
public class Export {
|
||||||
// public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) {
|
|
||||||
// JSONArray wrapper = new JSONArray();
|
|
||||||
//
|
|
||||||
// for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
|
||||||
// JSONArray data_wrapper = new JSONArray();
|
|
||||||
// JSONObject metric = new JSONObject();
|
|
||||||
//
|
|
||||||
// String title = p.getLeft();
|
|
||||||
// Map<MultipleHMKeys, Long> map = p.getRight();
|
|
||||||
//
|
|
||||||
// if (map.isEmpty())
|
|
||||||
// continue;
|
|
||||||
//
|
|
||||||
// long total = Util.mapSumFrequencies(map);
|
|
||||||
//
|
|
||||||
// for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
|
||||||
// JSONObject data_entry = new JSONObject();
|
|
||||||
// data_entry.put("word", e.getKey());
|
|
||||||
// data_entry.put("frequency", e.getValue());
|
|
||||||
// data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
|
|
||||||
//
|
|
||||||
// data_wrapper.add(data_entry);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// metric.put("Title", title);
|
|
||||||
// metric.put("data", data_wrapper);
|
|
||||||
// wrapper.add(metric);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// try (FileWriter file = new FileWriter("statistics.json")) {
|
|
||||||
// file.write(wrapper.toJSONString());
|
|
||||||
// } catch (IOException e) {
|
|
||||||
// e.printStackTrace();
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
||||||
StatisticsNew statistics, Filter filter) {
|
StatisticsNew statistics, Filter filter) {
|
||||||
Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult();
|
Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult();
|
||||||
|
@ -68,15 +29,6 @@ public class Export {
|
||||||
List<Object> FILE_HEADER_AL = new ArrayList<>();
|
List<Object> FILE_HEADER_AL = new ArrayList<>();
|
||||||
Object[] FILE_HEADER;
|
Object[] FILE_HEADER;
|
||||||
|
|
||||||
//Count frequencies
|
|
||||||
// long num_frequencies = 0;
|
|
||||||
// for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
|
||||||
// Map<MultipleHMKeys, Long> map = p.getRight();
|
|
||||||
// if (map.isEmpty())
|
|
||||||
// continue;
|
|
||||||
// num_frequencies = Util.mapSumFrequencies(map);
|
|
||||||
// }
|
|
||||||
|
|
||||||
Map<Taxonomy, Long> num_selected_taxonomy_frequencies = new ConcurrentHashMap<>();
|
Map<Taxonomy, Long> num_selected_taxonomy_frequencies = new ConcurrentHashMap<>();
|
||||||
for (Taxonomy taxonomyKey : taxonomyResults.keySet()) {
|
for (Taxonomy taxonomyKey : taxonomyResults.keySet()) {
|
||||||
num_selected_taxonomy_frequencies.put(taxonomyKey, (long) 0);
|
num_selected_taxonomy_frequencies.put(taxonomyKey, (long) 0);
|
||||||
|
@ -113,7 +65,6 @@ public class Export {
|
||||||
|
|
||||||
headerInfoBlock.put(filter.getCalculateFor().totalSumString(filter.getNgramValue()), String.valueOf(num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
|
headerInfoBlock.put(filter.getCalculateFor().totalSumString(filter.getNgramValue()), String.valueOf(num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
|
||||||
headerInfoBlock.put(filter.getCalculateFor().foundSumString(filter.getNgramValue()), String.valueOf(num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
|
headerInfoBlock.put(filter.getCalculateFor().foundSumString(filter.getNgramValue()), String.valueOf(num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
|
||||||
// headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
|
|
||||||
|
|
||||||
for (CalculateFor otherKey : filter.getMultipleKeys()) {
|
for (CalculateFor otherKey : filter.getMultipleKeys()) {
|
||||||
FILE_HEADER_AL.add(otherKey.toHeaderString(filter.getNgramValue()));
|
FILE_HEADER_AL.add(otherKey.toHeaderString(filter.getNgramValue()));
|
||||||
|
@ -163,10 +114,7 @@ public class Export {
|
||||||
|
|
||||||
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||||
String title = p.getLeft();
|
String title = p.getLeft();
|
||||||
|
|
||||||
// statistics.setTimeEnding();
|
|
||||||
title = statistics.generateResultTitle();
|
title = statistics.generateResultTitle();
|
||||||
// statistics.
|
|
||||||
|
|
||||||
fileName = title.replace(": ", "-");
|
fileName = title.replace(": ", "-");
|
||||||
fileName = fileName.replace(" ", "_").concat(".csv");
|
fileName = fileName.replace(" ", "_").concat(".csv");
|
||||||
|
@ -178,8 +126,6 @@ public class Export {
|
||||||
if (map.isEmpty())
|
if (map.isEmpty())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// long total = Util.mapSumFrequencies(map);
|
|
||||||
|
|
||||||
OutputStreamWriter fileWriter = null;
|
OutputStreamWriter fileWriter = null;
|
||||||
CSVPrinter csvFilePrinter = null;
|
CSVPrinter csvFilePrinter = null;
|
||||||
|
|
||||||
|
@ -289,10 +235,7 @@ public class Export {
|
||||||
dataEntry.add(frequency.toString());
|
dataEntry.add(frequency.toString());
|
||||||
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key), statistics.getCorpus().getPunctuation()));
|
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key), statistics.getCorpus().getPunctuation()));
|
||||||
dataEntry.add(formatNumberForExport(((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue(), statistics.getCorpus().getPunctuation()));
|
dataEntry.add(formatNumberForExport(((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue(), statistics.getCorpus().getPunctuation()));
|
||||||
// dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences()));
|
|
||||||
// dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (filter.getCollocability().size() > 0){
|
if (filter.getCollocability().size() > 0){
|
||||||
|
@ -303,39 +246,6 @@ public class Export {
|
||||||
|
|
||||||
// Write msd separated per letters at the end of each line in csv
|
// Write msd separated per letters at the end of each line in csv
|
||||||
if (filter.getWriteMsdAtTheEnd()) {
|
if (filter.getWriteMsdAtTheEnd()) {
|
||||||
// String msd = "";
|
|
||||||
//
|
|
||||||
// if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
||||||
// msd = e.getKey().getK1();
|
|
||||||
// } else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
|
|
||||||
// i = 0;
|
|
||||||
// for (CalculateFor otherKey : filter.getMultipleKeys()){
|
|
||||||
// switch(i){
|
|
||||||
// case 0:
|
|
||||||
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
||||||
// msd = e.getKey().getK2();
|
|
||||||
// }
|
|
||||||
// break;
|
|
||||||
// case 1:
|
|
||||||
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
||||||
// msd = e.getKey().getK3();
|
|
||||||
// }
|
|
||||||
// break;
|
|
||||||
// case 2:
|
|
||||||
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
||||||
// msd = e.getKey().getK4();
|
|
||||||
// }
|
|
||||||
// break;
|
|
||||||
// case 3:
|
|
||||||
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
||||||
// msd = e.getKey().getK5();
|
|
||||||
// }
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// i++;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
String msd = e.getKey().getMsd(filter);
|
String msd = e.getKey().getMsd(filter);
|
||||||
String [] charArray = msd.split("(?!^)");
|
String [] charArray = msd.split("(?!^)");
|
||||||
dataEntry.addAll(Arrays.asList(charArray));
|
dataEntry.addAll(Arrays.asList(charArray));
|
||||||
|
@ -372,67 +282,6 @@ public class Export {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
// public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
|
||||||
// //Delimiter used in CSV file
|
|
||||||
// String NEW_LINE_SEPARATOR = "\n";
|
|
||||||
//
|
|
||||||
// //CSV file header
|
|
||||||
// Object[] FILE_HEADER = {"word", "frequency", "percent"};
|
|
||||||
//
|
|
||||||
// String fileName = "";
|
|
||||||
//
|
|
||||||
// fileName = title.replace(": ", "-");
|
|
||||||
// fileName = fileName.replace(" ", "_").concat(".csv");
|
|
||||||
//
|
|
||||||
// fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
|
||||||
//
|
|
||||||
// OutputStreamWriter fileWriter = null;
|
|
||||||
// CSVPrinter csvFilePrinter = null;
|
|
||||||
//
|
|
||||||
// //Create the CSVFormat object with "\n" as a record delimiter
|
|
||||||
// CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
|
|
||||||
//
|
|
||||||
// try {
|
|
||||||
// //initialize FileWriter object
|
|
||||||
// fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
|
|
||||||
//
|
|
||||||
// //initialize CSVPrinter object
|
|
||||||
// csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
|
||||||
//
|
|
||||||
// // write info block
|
|
||||||
// printHeaderInfo(csvFilePrinter, headerInfoBlock);
|
|
||||||
//
|
|
||||||
// //Create CSV file header
|
|
||||||
// csvFilePrinter.printRecord(FILE_HEADER);
|
|
||||||
//
|
|
||||||
// for (Object[] resultEntry : result) {
|
|
||||||
// List dataEntry = new ArrayList<>();
|
|
||||||
// dataEntry.add(resultEntry[0]);
|
|
||||||
// dataEntry.add(resultEntry[1]);
|
|
||||||
// dataEntry.add(formatNumberAsPercent(resultEntry[2]), statistics.getCorpus().getPunctuation());
|
|
||||||
// csvFilePrinter.printRecord(dataEntry);
|
|
||||||
// }
|
|
||||||
// } catch (Exception e) {
|
|
||||||
// System.out.println("Error in CsvFileWriter!");
|
|
||||||
// e.printStackTrace();
|
|
||||||
// } finally {
|
|
||||||
// try {
|
|
||||||
// if (fileWriter != null) {
|
|
||||||
// fileWriter.flush();
|
|
||||||
// fileWriter.close();
|
|
||||||
// }
|
|
||||||
// if (csvFilePrinter != null) {
|
|
||||||
// csvFilePrinter.close();
|
|
||||||
// }
|
|
||||||
// } catch (IOException e) {
|
|
||||||
// System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
|
|
||||||
// e.printStackTrace();
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return fileName;
|
|
||||||
// }
|
|
||||||
|
|
||||||
public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
||||||
//Delimiter used in CSV file
|
//Delimiter used in CSV file
|
||||||
String NEW_LINE_SEPARATOR = "\n";
|
String NEW_LINE_SEPARATOR = "\n";
|
||||||
|
|
|
@ -1,31 +0,0 @@
|
||||||
package util;
|
|
||||||
|
|
||||||
public class Key /*implements Comparable<Key> */ {
|
|
||||||
// private final String value;
|
|
||||||
//
|
|
||||||
// Key(String value) {
|
|
||||||
// this.value = value;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// @Override
|
|
||||||
// public int compareTo(Key o) {
|
|
||||||
// return Objects.compare(this.value, o.value);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// @Override
|
|
||||||
// public boolean equals(Object o) {
|
|
||||||
// if (this.equals(o)) {
|
|
||||||
// return true;
|
|
||||||
// }
|
|
||||||
// if (o == null || getClass() != o.getClass()) {
|
|
||||||
// return false;
|
|
||||||
// }
|
|
||||||
// Key key = (Key) o;
|
|
||||||
// return Objects.equals(value, key.value);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// @Override
|
|
||||||
// public int hashCode() {
|
|
||||||
// return 0;
|
|
||||||
// }
|
|
||||||
}
|
|
|
@ -57,9 +57,6 @@ public class Tasks {
|
||||||
f2.setIsMinimalRelFreScraper(true);
|
f2.setIsMinimalRelFreScraper(true);
|
||||||
StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb);
|
StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb);
|
||||||
|
|
||||||
|
|
||||||
// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f, useDb);
|
|
||||||
|
|
||||||
Collection<File> corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles();
|
Collection<File> corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles();
|
||||||
|
|
||||||
final javafx.concurrent.Task<Void> task = new javafx.concurrent.Task<Void>() {
|
final javafx.concurrent.Task<Void> task = new javafx.concurrent.Task<Void>() {
|
||||||
|
@ -97,10 +94,6 @@ public class Tasks {
|
||||||
}
|
}
|
||||||
this.updateProgress(i, corpusSize);
|
this.updateProgress(i, corpusSize);
|
||||||
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
||||||
// if (isCancelled()) {
|
|
||||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
} else {
|
} else {
|
||||||
xml_processing.progressBarListener = new InvalidationListener() {
|
xml_processing.progressBarListener = new InvalidationListener() {
|
||||||
int remainingSeconds = -1;
|
int remainingSeconds = -1;
|
||||||
|
@ -112,10 +105,6 @@ public class Tasks {
|
||||||
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
||||||
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
||||||
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
||||||
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
|
|
||||||
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
|
|
||||||
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
|
|
||||||
// System.out.println(remainingSeconds);
|
|
||||||
previousTime = new Date();
|
previousTime = new Date();
|
||||||
}
|
}
|
||||||
xml_processing.isCancelled = isCancelled();
|
xml_processing.isCancelled = isCancelled();
|
||||||
|
@ -138,7 +127,6 @@ public class Tasks {
|
||||||
|
|
||||||
// add remaining minRelFre results
|
// add remaining minRelFre results
|
||||||
if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) {
|
if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) {
|
||||||
// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
|
|
||||||
long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue();
|
long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue();
|
||||||
double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
||||||
|
|
||||||
|
@ -151,8 +139,6 @@ public class Tasks {
|
||||||
for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){
|
for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){
|
||||||
statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
|
statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
// System.out.println("asd");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
@ -174,7 +160,6 @@ public class Tasks {
|
||||||
logger.error("Error while executing", e);
|
logger.error("Error while executing", e);
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
ngramProgressBar.setProgress(0.0);
|
ngramProgressBar.setProgress(0.0);
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
|
@ -184,7 +169,6 @@ public class Tasks {
|
||||||
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
ngramProgressBar.setProgress(0.0);
|
ngramProgressBar.setProgress(0.0);
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
|
@ -215,19 +199,6 @@ public class Tasks {
|
||||||
if(multipleFiles){
|
if(multipleFiles){
|
||||||
cancel.setVisible(true);
|
cancel.setVisible(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// int i = corpusFiles.size();
|
|
||||||
// Date startTime = new Date();
|
|
||||||
// Date previousTime = new Date();
|
|
||||||
// int remainingSeconds = -1;
|
|
||||||
// int corpusSize;
|
|
||||||
// if (statistic.getFilter().getCollocability().size() > 0) {
|
|
||||||
// corpusSize = corpusFiles.size() * 2;
|
|
||||||
// } else {
|
|
||||||
// corpusSize = corpusFiles.size();
|
|
||||||
// }
|
|
||||||
|
|
||||||
Date startTime = new Date();
|
Date startTime = new Date();
|
||||||
Date previousTime = new Date();
|
Date previousTime = new Date();
|
||||||
int remainingSeconds = -1;
|
int remainingSeconds = -1;
|
||||||
|
@ -264,13 +235,6 @@ public class Tasks {
|
||||||
this.updateProgress(i, corpusSize);
|
this.updateProgress(i, corpusSize);
|
||||||
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
||||||
|
|
||||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
|
||||||
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
|
|
||||||
// previousTime = new Date();
|
|
||||||
// }
|
|
||||||
// this.updateProgress(i, corpusSize);
|
|
||||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
xml_processing.progressBarListener = new InvalidationListener() {
|
xml_processing.progressBarListener = new InvalidationListener() {
|
||||||
int remainingSeconds = -1;
|
int remainingSeconds = -1;
|
||||||
|
@ -282,10 +246,6 @@ public class Tasks {
|
||||||
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
||||||
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
||||||
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
||||||
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
|
|
||||||
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
|
|
||||||
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
|
|
||||||
// System.out.println(remainingSeconds);
|
|
||||||
previousTime = new Date();
|
previousTime = new Date();
|
||||||
}
|
}
|
||||||
xml_processing.isCancelled = isCancelled();
|
xml_processing.isCancelled = isCancelled();
|
||||||
|
@ -304,24 +264,9 @@ public class Tasks {
|
||||||
if(!(multipleFiles)){
|
if(!(multipleFiles)){
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
}
|
}
|
||||||
// readXML(f.toString(), statistic);
|
|
||||||
// i++;
|
|
||||||
// if (isCancelled()) {
|
|
||||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// if (statistic.getFilter().getCollocability().size() > 0) {
|
|
||||||
// this.updateProgress(i, corpusFiles.size() * 2);
|
|
||||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
|
|
||||||
// } else {
|
|
||||||
// this.updateProgress(i, corpusFiles.size());
|
|
||||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
|
|
||||||
// }
|
|
||||||
//// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
|
|
||||||
}
|
}
|
||||||
// if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing
|
// if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing
|
||||||
if (statistic.getFilter().getMinimalRelFre() > 1){
|
if (statistic.getFilter().getMinimalRelFre() > 1){
|
||||||
// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
|
|
||||||
long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue();
|
long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue();
|
||||||
double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
||||||
|
|
||||||
|
@ -356,7 +301,6 @@ public class Tasks {
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
try {
|
try {
|
||||||
// System.out.print(statistics);
|
|
||||||
boolean successullySaved = statistic.saveResultToDisk();
|
boolean successullySaved = statistic.saveResultToDisk();
|
||||||
if (successullySaved) {
|
if (successullySaved) {
|
||||||
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
||||||
|
@ -371,7 +315,6 @@ public class Tasks {
|
||||||
logger.error("Out of memory error", e1);
|
logger.error("Out of memory error", e1);
|
||||||
}
|
}
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
|
@ -385,7 +328,6 @@ public class Tasks {
|
||||||
logger.error("Error while executing", e);
|
logger.error("Error while executing", e);
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
ngramProgressBar.setProgress(0.0);
|
ngramProgressBar.setProgress(0.0);
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
|
@ -395,7 +337,6 @@ public class Tasks {
|
||||||
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
ngramProgressBar.setProgress(0.0);
|
ngramProgressBar.setProgress(0.0);
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
|
@ -421,17 +362,9 @@ public class Tasks {
|
||||||
if(multipleFiles){
|
if(multipleFiles){
|
||||||
cancel.setVisible(true);
|
cancel.setVisible(true);
|
||||||
}
|
}
|
||||||
// int i = corpusFiles.size();
|
|
||||||
Date startTime = new Date();
|
Date startTime = new Date();
|
||||||
Date previousTime = new Date();
|
Date previousTime = new Date();
|
||||||
int remainingSeconds = -1;
|
int remainingSeconds = -1;
|
||||||
// int corpusSize;
|
|
||||||
// if (statistic.getFilter().getCollocability().size() > 0) {
|
|
||||||
// corpusSize = corpusFiles.size() * 2;
|
|
||||||
// } else {
|
|
||||||
// corpusSize = corpusFiles.size();
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
int corpusSize;
|
int corpusSize;
|
||||||
int i;
|
int i;
|
||||||
|
@ -461,10 +394,6 @@ public class Tasks {
|
||||||
}
|
}
|
||||||
this.updateProgress(i, corpusSize);
|
this.updateProgress(i, corpusSize);
|
||||||
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
||||||
// if (isCancelled()) {
|
|
||||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
} else {
|
} else {
|
||||||
xml_processing.progressBarListener = new InvalidationListener() {
|
xml_processing.progressBarListener = new InvalidationListener() {
|
||||||
int remainingSeconds = -1;
|
int remainingSeconds = -1;
|
||||||
|
@ -476,10 +405,6 @@ public class Tasks {
|
||||||
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
||||||
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
||||||
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
||||||
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
|
|
||||||
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)));
|
|
||||||
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
|
|
||||||
// System.out.println(remainingSeconds);
|
|
||||||
previousTime = new Date();
|
previousTime = new Date();
|
||||||
}
|
}
|
||||||
xml_processing.isCancelled = isCancelled();
|
xml_processing.isCancelled = isCancelled();
|
||||||
|
@ -497,14 +422,6 @@ public class Tasks {
|
||||||
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// readXML(f.toString(), statisticsOneGrams);
|
|
||||||
// i++;
|
|
||||||
// this.updateProgress(i, corpusFiles.size() * 2);
|
|
||||||
// if (statistic.getFilter().getCollocability().size() > 0) {
|
|
||||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
|
|
||||||
// } else {
|
|
||||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
@ -517,7 +434,6 @@ public class Tasks {
|
||||||
task.setOnSucceeded(e -> {
|
task.setOnSucceeded(e -> {
|
||||||
try {
|
try {
|
||||||
System.out.print(statistic);
|
System.out.print(statistic);
|
||||||
// calculate_collocabilities(statistic, statisticsOneGrams);
|
|
||||||
statistic.updateCalculateCollocabilities(statisticsOneGrams);
|
statistic.updateCalculateCollocabilities(statisticsOneGrams);
|
||||||
boolean successullySaved = statistic.saveResultToDisk();
|
boolean successullySaved = statistic.saveResultToDisk();
|
||||||
if (successullySaved) {
|
if (successullySaved) {
|
||||||
|
@ -532,21 +448,6 @@ public class Tasks {
|
||||||
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
|
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
|
||||||
logger.error("Out of memory error", e1);
|
logger.error("Out of memory error", e1);
|
||||||
}
|
}
|
||||||
// try {
|
|
||||||
// boolean successullySaved = statistic.saveResultToDisk();
|
|
||||||
// if (successullySaved) {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
|
||||||
// } else {
|
|
||||||
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
|
||||||
// }
|
|
||||||
// } catch (UnsupportedEncodingException e1) {
|
|
||||||
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
|
||||||
// logger.error("Error while saving", e1);
|
|
||||||
// } catch (OutOfMemoryError e1){
|
|
||||||
// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
|
||||||
// logger.error("Out of memory error", e1);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
|
@ -559,7 +460,6 @@ public class Tasks {
|
||||||
logger.error("Error while executing", e);
|
logger.error("Error while executing", e);
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
ngramProgressBar.setProgress(0.0);
|
ngramProgressBar.setProgress(0.0);
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
|
@ -569,7 +469,6 @@ public class Tasks {
|
||||||
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||||
ngramProgressBar.progressProperty().unbind();
|
ngramProgressBar.progressProperty().unbind();
|
||||||
ngramProgressBar.setProgress(0.0);
|
ngramProgressBar.setProgress(0.0);
|
||||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
cancel.setVisible(false);
|
cancel.setVisible(false);
|
||||||
|
@ -578,7 +477,6 @@ public class Tasks {
|
||||||
// When cancel button is pressed cancel analysis
|
// When cancel button is pressed cancel analysis
|
||||||
cancel.setOnAction(e -> {
|
cancel.setOnAction(e -> {
|
||||||
task.cancel();
|
task.cancel();
|
||||||
// logger.info("cancel button");
|
|
||||||
});
|
});
|
||||||
return task;
|
return task;
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,15 +49,4 @@ public class TimeWatch {
|
||||||
|
|
||||||
return "Elapsed Time in nano seconds: ";
|
return "Elapsed Time in nano seconds: ";
|
||||||
}
|
}
|
||||||
|
|
||||||
private void exampleUsage() {
|
|
||||||
TimeWatch watch = TimeWatch.start();
|
|
||||||
|
|
||||||
// do something...
|
|
||||||
|
|
||||||
System.out.println("Elapsed Time custom format: " + watch.toMinuteSeconds());
|
|
||||||
System.out.println("Elapsed Time in seconds: " + watch.time(TimeUnit.SECONDS));
|
|
||||||
System.out.println("Elapsed Time in nano seconds: " + watch.time());
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
|
@ -20,22 +20,6 @@ import gui.ValidationUtil;
|
||||||
public class Util {
|
public class Util {
|
||||||
public final static Logger logger = LogManager.getLogger(Util.class);
|
public final static Logger logger = LogManager.getLogger(Util.class);
|
||||||
|
|
||||||
|
|
||||||
public static String toReadableTime(long time) {
|
|
||||||
long hours = time(TimeUnit.HOURS, time);
|
|
||||||
long minutes = time(TimeUnit.MINUTES, time) - TimeUnit.HOURS.toMinutes(hours);
|
|
||||||
long seconds = time(TimeUnit.SECONDS, time) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
|
|
||||||
long milliseconds = time(TimeUnit.MILLISECONDS, time) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
|
|
||||||
long microseconds = time(TimeUnit.MICROSECONDS, time) - TimeUnit.HOURS.toMicros(hours) - TimeUnit.MINUTES.toMicros(minutes) - TimeUnit.SECONDS.toMicros(seconds) - TimeUnit.MILLISECONDS.toMicros(milliseconds);
|
|
||||||
long nanoseconds = time(TimeUnit.NANOSECONDS, time) - TimeUnit.HOURS.toNanos(hours) - TimeUnit.MINUTES.toNanos(minutes) - TimeUnit.SECONDS.toNanos(seconds) - TimeUnit.MILLISECONDS.toNanos(milliseconds) - TimeUnit.MICROSECONDS.toNanos(microseconds);
|
|
||||||
|
|
||||||
return String.format("%d h, %d min, %d s, %d ms, %d µs, %d ns", hours, minutes, seconds, milliseconds, microseconds, nanoseconds);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static long time(TimeUnit unit, long t) {
|
|
||||||
return unit.convert(t, TimeUnit.NANOSECONDS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts a number to a more readable format.
|
* Converts a number to a more readable format.
|
||||||
* 12345 -> 12.345
|
* 12345 -> 12.345
|
||||||
|
@ -97,12 +81,6 @@ public class Util {
|
||||||
return types.contains(o.getClass());
|
return types.contains(o.getClass());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <K, V> void printMap(Map<K, V> map) {
|
|
||||||
System.out.println("\nkey: value");
|
|
||||||
map.forEach((k, v) -> System.out.print(String.format("%s:\t %,8d%n", k, v)));
|
|
||||||
System.out.println();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generic map converter -> since AtomicLongs aren't as comparable.
|
* Generic map converter -> since AtomicLongs aren't as comparable.
|
||||||
* Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
|
* Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
|
||||||
|
@ -117,23 +95,6 @@ public class Util {
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
public class ValueThenKeyComparator<K extends Comparable<? super K>,
|
|
||||||
V extends Comparable<? super V>>
|
|
||||||
implements Comparator<Map.Entry<K, V>> {
|
|
||||||
|
|
||||||
public int compare(Map.Entry<K, V> a, Map.Entry<K, V> b) {
|
|
||||||
int cmp1 = a.getValue().compareTo(b.getValue());
|
|
||||||
if (cmp1 != 0) {
|
|
||||||
return cmp1;
|
|
||||||
} else {
|
|
||||||
return a.getKey().compareTo(b.getKey());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sorts a map in a descending order by value.
|
* Sorts a map in a descending order by value.
|
||||||
|
@ -183,25 +144,6 @@ public class Util {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <K, V> void printMap(Map<K, Integer> map, String title, int number_of_words) {
|
|
||||||
System.out.println(String.format("\n%s\n------------\nkey: value\tpercent", title));
|
|
||||||
map.forEach((k, v) ->
|
|
||||||
System.out.println(String.format("%s:\t %s\t %s%%",
|
|
||||||
k,
|
|
||||||
Util.formatNumberReadable(v),
|
|
||||||
Util.formatNumberReadable((double) v / number_of_words * 100))));
|
|
||||||
System.out.println();
|
|
||||||
}
|
|
||||||
|
|
||||||
static long mapSumFrequencies(Map<MultipleHMKeys, Long> map) {
|
|
||||||
long sum = 0;
|
|
||||||
|
|
||||||
for (long value : map.values()) {
|
|
||||||
sum += value;
|
|
||||||
}
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used for passing optional integer values for sorting.
|
* Used for passing optional integer values for sorting.
|
||||||
|
|
|
@ -84,16 +84,6 @@ public class RDB {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// public byte[] atomicIntToByteArray(final AtomicLong i) {
|
|
||||||
// BigInteger bigInt = BigInteger.valueOf(i.intValue());
|
|
||||||
//
|
|
||||||
// return bigInt.toByteArray();
|
|
||||||
// }
|
|
||||||
|
|
||||||
public RocksDB getDb() {
|
|
||||||
return db;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Map<String, AtomicLong> getDump() throws UnsupportedEncodingException {
|
public Map<String, AtomicLong> getDump() throws UnsupportedEncodingException {
|
||||||
Map<String, AtomicLong> dump = new HashMap<>();
|
Map<String, AtomicLong> dump = new HashMap<>();
|
||||||
RocksDB.loadLibrary();
|
RocksDB.loadLibrary();
|
||||||
|
|
|
@ -34,17 +34,6 @@
|
||||||
<ImageView fx:id="displayTaxonomyI" layoutX="370.0" layoutY="107.5" pickOnBounds="true" preserveRatio="true">
|
<ImageView fx:id="displayTaxonomyI" layoutX="370.0" layoutY="107.5" pickOnBounds="true" preserveRatio="true">
|
||||||
<Image url="questionmark.png" backgroundLoading="true"/>
|
<Image url="questionmark.png" backgroundLoading="true"/>
|
||||||
</ImageView>
|
</ImageView>
|
||||||
<!--<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" />-->
|
|
||||||
<!--<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD" />-->
|
|
||||||
<!--<TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0" />-->
|
|
||||||
<!--<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija" />-->
|
|
||||||
<!--<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0" />-->
|
|
||||||
|
|
||||||
<!--<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" />-->
|
|
||||||
<!--<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />-->
|
|
||||||
|
|
||||||
<!--<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" />-->
|
|
||||||
<!--<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />-->
|
|
||||||
|
|
||||||
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0">
|
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0">
|
||||||
<children>
|
<children>
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
fx:controller="gui.CorpusTab">
|
fx:controller="gui.CorpusTab">
|
||||||
<children>
|
<children>
|
||||||
<Pane/>
|
<Pane/>
|
||||||
<!--<TextField fx:id="stringLengthTF" layoutX="225.0" layoutY="20.0" prefWidth="140.0" />-->
|
|
||||||
<Label fx:id="chooseCorpusLocationL" layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Nastavi lokacijo korpusa" />
|
<Label fx:id="chooseCorpusLocationL" layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Nastavi lokacijo korpusa" />
|
||||||
<Button fx:id="chooseCorpusLocationB" layoutX="225.0" layoutY="20.0" prefWidth="140.0" mnemonicParsing="false"/>
|
<Button fx:id="chooseCorpusLocationB" layoutX="225.0" layoutY="20.0" prefWidth="140.0" mnemonicParsing="false"/>
|
||||||
<ImageView fx:id="chooseCorpusLocationI" layoutX="370.0" layoutY="27.5" pickOnBounds="true" preserveRatio="true">
|
<ImageView fx:id="chooseCorpusLocationI" layoutX="370.0" layoutY="27.5" pickOnBounds="true" preserveRatio="true">
|
||||||
|
@ -26,7 +25,6 @@
|
||||||
<Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0">
|
<Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0">
|
||||||
<children>
|
<children>
|
||||||
<Label fx:id="chooseCorpusL" prefHeight="70.0" prefWidth="704.0" text="Label"/>
|
<Label fx:id="chooseCorpusL" prefHeight="70.0" prefWidth="704.0" text="Label"/>
|
||||||
<!--<CheckBox fx:id="gosUseOrthChB" layoutY="65.0" mnemonicParsing="false" text="Uporabi pogovorni zapis"/>-->
|
|
||||||
</children>
|
</children>
|
||||||
</Pane>
|
</Pane>
|
||||||
<ProgressIndicator fx:id="locationScanPI" layoutX="10.0" layoutY="60.0" prefHeight="50.0" progress="0.0"/>
|
<ProgressIndicator fx:id="locationScanPI" layoutX="10.0" layoutY="60.0" prefHeight="50.0" progress="0.0"/>
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
<?import org.controlsfx.control.CheckComboBox?>
|
<?import org.controlsfx.control.CheckComboBox?>
|
||||||
|
|
||||||
<?import javafx.scene.control.Button?>
|
<?import javafx.scene.control.Button?>
|
||||||
<?import javafx.scene.control.TextField?>
|
|
||||||
<?import javafx.scene.control.TextArea?>
|
<?import javafx.scene.control.TextArea?>
|
||||||
<AnchorPane fx:id="solarFiltersTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
<AnchorPane fx:id="solarFiltersTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
||||||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.FiltersForSolar">
|
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.FiltersForSolar">
|
||||||
|
@ -31,7 +30,6 @@
|
||||||
<!-- MSD and Taxonomy separated -->
|
<!-- MSD and Taxonomy separated -->
|
||||||
<Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />
|
<Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />
|
||||||
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="100.0" prefHeight="340.0" maxHeight="200.0" prefWidth="275.0" text=" " wrapText="true" editable="false"/>
|
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="100.0" prefHeight="340.0" maxHeight="200.0" prefWidth="275.0" text=" " wrapText="true" editable="false"/>
|
||||||
<!--<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />-->
|
|
||||||
</Pane>
|
</Pane>
|
||||||
|
|
||||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0"/>
|
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0"/>
|
||||||
|
|
|
@ -110,11 +110,6 @@
|
||||||
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="380.0" prefHeight="95.0" maxHeight="95.0" prefWidth="360.0" text=" " wrapText="true" editable="false"/>
|
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="380.0" prefHeight="95.0" maxHeight="95.0" prefWidth="360.0" text=" " wrapText="true" editable="false"/>
|
||||||
</Pane>
|
</Pane>
|
||||||
|
|
||||||
<!--<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">-->
|
|
||||||
<!--<Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />-->
|
|
||||||
<!--<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />-->
|
|
||||||
<!--</Pane>-->
|
|
||||||
|
|
||||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
|
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
|
||||||
<Button fx:id="changeLanguageB" layoutX="710.0" layoutY="40.0" mnemonicParsing="false" prefWidth="50.0"/>
|
<Button fx:id="changeLanguageB" layoutX="710.0" layoutY="40.0" mnemonicParsing="false" prefWidth="50.0"/>
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,6 @@
|
||||||
<?import javafx.scene.layout.Pane?>
|
<?import javafx.scene.layout.Pane?>
|
||||||
<?import org.controlsfx.control.CheckComboBox?>
|
<?import org.controlsfx.control.CheckComboBox?>
|
||||||
|
|
||||||
<?import javafx.scene.control.Separator?>
|
|
||||||
<?import javafx.scene.control.TextArea?>
|
<?import javafx.scene.control.TextArea?>
|
||||||
<?import javafx.scene.image.ImageView?>
|
<?import javafx.scene.image.ImageView?>
|
||||||
<?import javafx.scene.image.Image?>
|
<?import javafx.scene.image.Image?>
|
||||||
|
|
|
@ -1,32 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
|
|
||||||
<?import org.controlsfx.control.CheckComboBox?>
|
|
||||||
<?import javafx.scene.control.*?>
|
|
||||||
<?import javafx.scene.layout.AnchorPane?>
|
|
||||||
<?import javafx.scene.layout.Pane?>
|
|
||||||
<AnchorPane fx:id="wordAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
|
||||||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordFormationTab">
|
|
||||||
<Pane>
|
|
||||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
|
|
||||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
|
|
||||||
|
|
||||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. pojavitev" />
|
|
||||||
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
|
|
||||||
|
|
||||||
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
|
||||||
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0" />
|
|
||||||
|
|
||||||
<Button fx:id="computeB" layoutX="10.0" layoutY="422.0" mnemonicParsing="false"
|
|
||||||
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
|
||||||
</Pane>
|
|
||||||
|
|
||||||
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
|
|
||||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
|
|
||||||
text=" " wrapText="true"/>
|
|
||||||
|
|
||||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
|
|
||||||
|
|
||||||
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
|
|
||||||
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
|
|
||||||
|
|
||||||
</AnchorPane>
|
|
|
@ -23,9 +23,9 @@ public class CorpusTests {
|
||||||
|
|
||||||
File f = Settings.corpus.iterator().next();
|
File f = Settings.corpus.iterator().next();
|
||||||
|
|
||||||
Statistics stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, 0, CalculateFor.WORD);
|
// Statistics stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, 0, CalculateFor.WORD);
|
||||||
// stats.setCorpusType(CorpusType.GOS);
|
// // stats.setCorpusType(CorpusType.GOS);
|
||||||
stats.setCorpusType(CorpusType.SOLAR);
|
// stats.setCorpusType(CorpusType.SOLAR);
|
||||||
|
|
||||||
// XML_processing.readXMLGos(f.toString(), stats);
|
// XML_processing.readXMLGos(f.toString(), stats);
|
||||||
// XML_processing.readXML(f.toString(), stats);
|
// XML_processing.readXML(f.toString(), stats);
|
||||||
|
@ -33,10 +33,10 @@ public class CorpusTests {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
// @Test
|
||||||
public void test() {
|
// public void test() {
|
||||||
ObservableList<String> var = GosTaxonomy.getForComboBox();
|
// ObservableList<String> var = GosTaxonomy.getForComboBox();
|
||||||
String debug = "";
|
// String debug = "";
|
||||||
|
//
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user