BIG REFACTOR - erasing unused code
This commit is contained in:
parent
10666b4453
commit
2c028cd334
|
@ -1,15 +0,0 @@
|
|||
package alg;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
public class Common {
|
||||
public static <K, V> void updateMap(Map<K, AtomicLong> map, K o) {
|
||||
// if not in map
|
||||
AtomicLong r = map.putIfAbsent(o, new AtomicLong(1));
|
||||
|
||||
// else
|
||||
if (r != null)
|
||||
map.get(o).incrementAndGet();
|
||||
}
|
||||
}
|
|
@ -19,7 +19,6 @@ import gui.I18N;
|
|||
import javafx.beans.InvalidationListener;
|
||||
import javafx.beans.property.ReadOnlyDoubleProperty;
|
||||
import javafx.beans.property.ReadOnlyDoubleWrapper;
|
||||
import javafx.concurrent.Task;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.LineIterator;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
|
@ -38,35 +37,10 @@ public class XML_processing {
|
|||
public static boolean isCollocability = false;
|
||||
public static InvalidationListener progressBarListener;
|
||||
|
||||
public double getProgress() {
|
||||
return progressProperty().get();
|
||||
}
|
||||
|
||||
public ReadOnlyDoubleProperty progressProperty() {
|
||||
return progress ;
|
||||
}
|
||||
|
||||
// public static void processCorpus(Statistics stats) {
|
||||
// // we can preset the list's size, so there won't be a need to resize it
|
||||
// List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT);
|
||||
//
|
||||
// int i = 0;
|
||||
// for (File f : Settings.corpus) {
|
||||
// i++;
|
||||
// readXML(f.toString(), stats);
|
||||
// }
|
||||
// }
|
||||
|
||||
// public static void readXML(String path, Statistics stats) {
|
||||
// if (stats.getCorpusType() == CorpusType.GIGAFIDA) {
|
||||
// readXMLGigafida(path, stats);
|
||||
// } else if (stats.getCorpusType() == CorpusType.GOS) {
|
||||
// readXMLGos(path, stats);
|
||||
// } else if (stats.getCorpusType() == CorpusType.SOLAR) {
|
||||
// readXMLSolar(path, stats);
|
||||
// }
|
||||
// }
|
||||
|
||||
public static boolean readXML(String path, StatisticsNew stats) {
|
||||
if (stats.getCorpus().getCorpusType() == CorpusType.GIGAFIDA
|
||||
|| stats.getCorpus().getCorpusType() == CorpusType.CCKRES) {
|
||||
|
@ -81,7 +55,6 @@ public class XML_processing {
|
|||
} else if (stats.getCorpus().getCorpusType() == CorpusType.VERT) {
|
||||
return readVERT(path, stats);
|
||||
}
|
||||
// task.updateProgress(fileNum, size);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -174,15 +147,10 @@ public class XML_processing {
|
|||
} else if (stats.getFilter().getAl() == AnalysisLevel.WORD_LEVEL) {
|
||||
alg.word.ForkJoin wc = new alg.word.ForkJoin(corpus, stats);
|
||||
pool.invoke(wc);
|
||||
} else {
|
||||
// TODO:
|
||||
// alg.inflectedJOS.ForkJoin wc = new alg.inflectedJOS.ForkJoin(corpus, stats);
|
||||
// pool.invoke(wc);
|
||||
}
|
||||
|
||||
// if running with minimalRelFre frequency erase all ngrams with occurrences lower than set value per 1M
|
||||
if(stats.getFilter().getIsMinimalRelFreScraper()) {
|
||||
// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
|
||||
long countFor1MWords = stats.getUniGramOccurrences().get(stats.getCorpus().getTotal()).longValue();
|
||||
if(countFor1MWords > 1000000L){
|
||||
double absToRelFactor = (stats.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
||||
|
@ -197,125 +165,9 @@ public class XML_processing {
|
|||
stats.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
|
||||
}
|
||||
}
|
||||
// System.out.println("asd");
|
||||
}
|
||||
}
|
||||
|
||||
// public static void readXMLGos(String path, Statistics stats) {
|
||||
// boolean in_word = false;
|
||||
// String taksonomija = "";
|
||||
// String lemma = "";
|
||||
// String msd = "";
|
||||
// String type = stats.isGosOrthMode() ? "orth" : "norm"; // orth & norm
|
||||
//
|
||||
// List<Word> stavek = new ArrayList<>();
|
||||
// List<Sentence> corpus = new ArrayList<>();
|
||||
// String sentenceDelimiter = "seg";
|
||||
// String taxonomyPrefix = "gos.";
|
||||
//
|
||||
// try {
|
||||
// XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
// XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
|
||||
//
|
||||
// while (eventReader.hasNext()) {
|
||||
// XMLEvent event = eventReader.nextEvent();
|
||||
//
|
||||
// switch (event.getEventType()) {
|
||||
// case XMLStreamConstants.START_ELEMENT:
|
||||
//
|
||||
// StartElement startElement = event.asStartElement();
|
||||
// String qName = startElement.getName().getLocalPart();
|
||||
//
|
||||
// // "word" node
|
||||
// if (qName.equals("w")) {
|
||||
// in_word = true;
|
||||
//
|
||||
// if (type.equals("norm")) {
|
||||
// // make sure we're looking at <w lemma...> and not <w type...>
|
||||
// Iterator var = startElement.getAttributes();
|
||||
// ArrayList<Object> attributes = new ArrayList<>();
|
||||
// while (var.hasNext()) {
|
||||
// attributes.add(var.next());
|
||||
// }
|
||||
//
|
||||
// if (attributes.contains("msd")) {
|
||||
// msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
|
||||
// } else {
|
||||
// msd = null;
|
||||
// }
|
||||
//
|
||||
// if (attributes.contains("lemma")) {
|
||||
// lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// // taxonomy node
|
||||
// else if (qName.equalsIgnoreCase("catRef")) {
|
||||
// // there are some term nodes at the beginning that are of no interest to us
|
||||
// // they differ by not having the attribute "ref", so test will equal null
|
||||
// Attribute test = startElement.getAttributeByName(QName.valueOf("target"));
|
||||
//
|
||||
// if (test != null) {
|
||||
// // keep only taxonomy properties
|
||||
// taksonomija = String.valueOf(test.getValue()).replace(taxonomyPrefix, "");
|
||||
// }
|
||||
// } else if (qName.equalsIgnoreCase("div")) {
|
||||
// type = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
|
||||
//
|
||||
// }
|
||||
// break;
|
||||
//
|
||||
// case XMLStreamConstants.CHARACTERS:
|
||||
// Characters characters = event.asCharacters();
|
||||
//
|
||||
// // "word" node value
|
||||
// if (in_word) {
|
||||
// if (type.equals("norm") && msd != null) {
|
||||
// stavek.add(new Word(characters.getData(), lemma, msd));
|
||||
// } else {
|
||||
// stavek.add(new Word(characters.getData()));
|
||||
// }
|
||||
//
|
||||
// in_word = false;
|
||||
// }
|
||||
// break;
|
||||
//
|
||||
// case XMLStreamConstants.END_ELEMENT:
|
||||
// EndElement endElement = event.asEndElement();
|
||||
//
|
||||
// // parser reached end of the current sentence
|
||||
// if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
|
||||
// // add sentence to corpus
|
||||
// corpus.add(new Sentence(stavek, taksonomija, type));
|
||||
// // and start a new one
|
||||
// stavek = new ArrayList<>();
|
||||
//
|
||||
// /* Invoke Fork-Join when we reach maximum limit of
|
||||
// * sentences (because we can't read everything to
|
||||
// * memory) or we reach the end of the file.
|
||||
// */
|
||||
// if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
|
||||
// fj(corpus, stats);
|
||||
// // empty the current corpus, since we don't need
|
||||
// // the data anymore
|
||||
// corpus.clear();
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // backup
|
||||
// if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
|
||||
// fj(corpus, stats);
|
||||
// corpus.clear();
|
||||
// }
|
||||
//
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// } catch (FileNotFoundException | XMLStreamException e) {
|
||||
// e.printStackTrace();
|
||||
// }
|
||||
// }
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public static boolean readXMLSolar(String path, StatisticsNew stats) {
|
||||
boolean in_word = false;
|
||||
|
@ -327,7 +179,6 @@ public class XML_processing {
|
|||
List<Sentence> corpus = new ArrayList<>();
|
||||
|
||||
// used for filter
|
||||
// Set<String> headTags = new HashSet<>(Arrays.asList("sola", "predmet", "razred", "regija", "tip", "leto"));
|
||||
Set<String> headTags = new HashSet<>(Arrays.asList(SOLA, PREDMET, RAZRED, REGIJA, TIP, LETO));
|
||||
Map<String, String> headBlock = null;
|
||||
boolean includeThisBlock = false;
|
||||
|
@ -372,9 +223,7 @@ public class XML_processing {
|
|||
|
||||
switch (event.getEventType()) {
|
||||
case XMLStreamConstants.START_ELEMENT:
|
||||
|
||||
StartElement startElement = event.asStartElement();
|
||||
// System.out.println(String.format("%s", startElement.toString()));
|
||||
String qName = startElement.getName().getLocalPart();
|
||||
|
||||
// "word" node
|
||||
|
@ -423,7 +272,7 @@ public class XML_processing {
|
|||
stavek = new ArrayList<>();
|
||||
} else if (qName.equals("head")) {
|
||||
headBlock = new HashMap<>();
|
||||
} else { // if (headTags.contains(qName)) {
|
||||
} else {
|
||||
boolean inHeadTags = false;
|
||||
String headTag = "";
|
||||
for (String tag : headTags){
|
||||
|
@ -436,8 +285,6 @@ public class XML_processing {
|
|||
if(inHeadTags) {
|
||||
String tagContent = eventReader.nextEvent().asCharacters().getData();
|
||||
headBlock.put(headTag, tagContent);
|
||||
// String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
|
||||
// resultFilters.get(headTag).add(tagContent);
|
||||
}
|
||||
|
||||
|
||||
|
@ -562,22 +409,16 @@ public class XML_processing {
|
|||
if (line.length() > 4 && line.substring(1, 5).equals("text")) {
|
||||
// split over "\" "
|
||||
String[] split = line.split("\" ");
|
||||
// String mediumId = "";
|
||||
// String typeId = "";
|
||||
// String proofreadId = "";
|
||||
boolean idsPresent = false;
|
||||
for (String el : split) {
|
||||
String[] attribute = el.split("=\"");
|
||||
if (attribute[0].equals("medium_id")) {
|
||||
// mediumId = attribute[1];
|
||||
idsPresent = true;
|
||||
resultTaxonomy.add(attribute[1]);
|
||||
} else if (attribute[0].equals("type_id")) {
|
||||
// typeId = attribute[1];
|
||||
idsPresent = true;
|
||||
resultTaxonomy.add(attribute[1]);
|
||||
} else if (attribute[0].equals("proofread_id")) {
|
||||
// proofreadId = attribute[1];
|
||||
idsPresent = true;
|
||||
resultTaxonomy.add(attribute[1]);
|
||||
}
|
||||
|
@ -586,13 +427,10 @@ public class XML_processing {
|
|||
for (String el : split) {
|
||||
String[] attribute = el.split("=\"");
|
||||
if (attribute[0].equals("medium")) {
|
||||
// mediumId = attribute[1];
|
||||
resultTaxonomy.add(attribute[1]);
|
||||
} else if (attribute[0].equals("type")) {
|
||||
// typeId = attribute[1];
|
||||
resultTaxonomy.add(attribute[1]);
|
||||
} else if (attribute[0].equals("proofread")) {
|
||||
// proofreadId = attribute[1];
|
||||
resultTaxonomy.add(attribute[1]);
|
||||
}
|
||||
}
|
||||
|
@ -679,7 +517,6 @@ public class XML_processing {
|
|||
|
||||
resultTaxonomy.add(tax);
|
||||
// solar
|
||||
// } else if (!parseTaxonomy && headTags.contains(elementName)) {
|
||||
} else if (!parseTaxonomy) {
|
||||
boolean inHeadTags = false;
|
||||
String headTag = "";
|
||||
|
@ -737,7 +574,6 @@ public class XML_processing {
|
|||
boolean inPunctuation = false;
|
||||
boolean taxonomyMatch = true;
|
||||
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
||||
// ArrayList<Taxonomy> currentFiletaxonomyLong = new ArrayList<>();
|
||||
String lemma = "";
|
||||
String msd = "";
|
||||
|
||||
|
@ -780,8 +616,6 @@ public class XML_processing {
|
|||
// keep only taxonomy properties
|
||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""), stats.getCorpus());
|
||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||
Tax taxonomy = new Tax();
|
||||
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -795,40 +629,13 @@ public class XML_processing {
|
|||
sentence.add(createWord(word, lemma, msd, word, stats.getFilter()));
|
||||
inWord = false;
|
||||
}
|
||||
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
||||
if (stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
||||
String punctuation = characters.getData();
|
||||
sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter()));
|
||||
inPunctuation = false;
|
||||
|
||||
// String punctuation = ",";
|
||||
//
|
||||
// sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
|
||||
// sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
|
||||
// sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
|
||||
// inPunctuation = false;
|
||||
}
|
||||
break;
|
||||
|
||||
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
||||
// String actualPunctuation = characters.getData();
|
||||
// if (actualPunctuation.equals(".") || actualPunctuation.equals("!") || actualPunctuation.equals("?") || actualPunctuation.equals("..."))
|
||||
// break;
|
||||
// String punctuation = ",";
|
||||
// int skip_number = 0;
|
||||
// if (!ValidationUtil.isEmpty(stats.getFilter().getSkipValue())){
|
||||
// skip_number = stats.getFilter().getSkipValue();
|
||||
// }
|
||||
// for(int i = 1; i < skip_number + 2; i ++){
|
||||
// if (i < sentence.size() && !sentence.get(sentence.size() - i).equals(punctuation)) {
|
||||
// sentence.get(sentence.size() - i).setWord(sentence.get(sentence.size() - i).getWord() + punctuation);
|
||||
// sentence.get(sentence.size() - i).setLemma(sentence.get(sentence.size() - i).getLemma() + punctuation);
|
||||
// sentence.get(sentence.size() - i).setMsd(sentence.get(sentence.size() - i).getMsd() + punctuation);
|
||||
// }
|
||||
// }
|
||||
// inPunctuation = false;
|
||||
// }
|
||||
|
||||
case XMLStreamConstants.END_ELEMENT:
|
||||
EndElement endElement = event.asEndElement();
|
||||
|
||||
|
@ -869,10 +676,6 @@ public class XML_processing {
|
|||
fj(corpus, stats);
|
||||
// empty the current corpus, since we don't need the data anymore
|
||||
corpus.clear();
|
||||
|
||||
// TODO: if (stats.isUseDB()) {
|
||||
// stats.storeTmpResultsToDB();
|
||||
// }
|
||||
}
|
||||
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
|
||||
// before proceeding to read this file, make sure that taxonomy filters are a match
|
||||
|
@ -883,7 +686,6 @@ public class XML_processing {
|
|||
if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) {
|
||||
// taxonomies don't match so stop
|
||||
// union (select words that match any of selected taxonomy
|
||||
// return false;
|
||||
taxonomyMatch = false;
|
||||
//
|
||||
} else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){
|
||||
|
@ -898,10 +700,6 @@ public class XML_processing {
|
|||
// join corpus and stats
|
||||
fj(corpus, stats);
|
||||
corpus.clear();
|
||||
|
||||
// TODO: if (stats.isUseDB()) {
|
||||
// stats.storeTmpResultsToDB();
|
||||
// }
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -909,7 +707,6 @@ public class XML_processing {
|
|||
}
|
||||
} catch (FileNotFoundException | XMLStreamException e) {
|
||||
throw new java.lang.RuntimeException("XMLStreamException | FileNotFoundException");
|
||||
// e.printStackTrace();
|
||||
} finally {
|
||||
if (eventReader != null) {
|
||||
try {
|
||||
|
@ -929,7 +726,6 @@ public class XML_processing {
|
|||
boolean inPunctuation = false;
|
||||
boolean taxonomyMatch = true;
|
||||
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
||||
// ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
||||
String lemma = "";
|
||||
String msd = "";
|
||||
|
||||
|
@ -1006,8 +802,6 @@ public class XML_processing {
|
|||
// keep only taxonomy properties
|
||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""), stats.getCorpus());
|
||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||
// Tax taxonomy = new Tax();
|
||||
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
||||
}
|
||||
} else if (stats.getCorpus().getTaxonomy().size() > 0 && qName.equalsIgnoreCase("catRef")) {
|
||||
// get value from attribute target
|
||||
|
@ -1017,41 +811,7 @@ public class XML_processing {
|
|||
// keep only taxonomy properties
|
||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).split(":")[1], stats.getCorpus());
|
||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||
// Tax taxonomy = new Tax();
|
||||
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// if (parseTaxonomy && elementName.equalsIgnoreCase("catRef")) {
|
||||
// HashMap<String, String> atts = extractAttributes(startElement);
|
||||
// String debug = "";
|
||||
//
|
||||
// String tax = startElement.getAttributeByName(QName.valueOf("target"))
|
||||
// .getValue()
|
||||
// .replace("#", "");
|
||||
//
|
||||
// if (tax.indexOf(':') >= 0) {
|
||||
// tax = tax.split(":")[1];
|
||||
// }
|
||||
// resultTaxonomy.add(tax);
|
||||
// } else if (parseTaxonomy && elementName.equalsIgnoreCase("term")) {
|
||||
// String tax = startElement.getAttributeByName(QName.valueOf("ref"))
|
||||
// .getValue()
|
||||
// .replace("#", "");
|
||||
//
|
||||
// resultTaxonomy.add(tax);
|
||||
// } else if (!parseTaxonomy && headTags.contains(elementName)) {
|
||||
// String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
|
||||
// resultFilters.get(elementName).add(tagContent);
|
||||
// }
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} else if (qName.equals("bibl")) {
|
||||
// before proceeding to read this file, make sure that taxonomy filters are a match
|
||||
taxonomyMatch = true;
|
||||
|
@ -1068,14 +828,10 @@ public class XML_processing {
|
|||
// "word" node value
|
||||
if (inWord) {
|
||||
String word = characters.getData();
|
||||
// if (word.equals("Banovec")){
|
||||
// System.out.println("Test");
|
||||
// }
|
||||
sentence.add(createWord(word, lemma, msd, word, stats.getFilter()));
|
||||
inWord = false;
|
||||
}
|
||||
if (stats.getFilter().getNotePunctuations() && inPunctuation) {
|
||||
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
||||
String punctuation = characters.getData();
|
||||
sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter()));
|
||||
inPunctuation = false;
|
||||
|
@ -1085,9 +841,6 @@ public class XML_processing {
|
|||
case XMLStreamConstants.END_ELEMENT:
|
||||
EndElement endElement = event.asEndElement();
|
||||
|
||||
String var = endElement.getName().getLocalPart();
|
||||
String debug = "";
|
||||
|
||||
// parser reached end of the current sentence
|
||||
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
|
||||
if (stats.getFilter().getNgramValue() == 0){
|
||||
|
@ -1119,10 +872,6 @@ public class XML_processing {
|
|||
fj(corpus, stats);
|
||||
// empty the current corpus, since we don't need the data anymore
|
||||
corpus.clear();
|
||||
|
||||
// TODO: if (stats.isUseDB()) {
|
||||
// stats.storeTmpResultsToDB();
|
||||
// }
|
||||
}
|
||||
}
|
||||
// fallback
|
||||
|
@ -1133,7 +882,6 @@ public class XML_processing {
|
|||
corpus.clear();
|
||||
|
||||
currentFiletaxonomy = new ArrayList<>();
|
||||
// currentFiletaxonomyLong = new ArrayList<>();
|
||||
} else if (endElement.getName().getLocalPart().equals("bibl")) {
|
||||
// before proceeding to read this file, make sure that taxonomy filters are a match
|
||||
|
||||
|
@ -1143,7 +891,6 @@ public class XML_processing {
|
|||
if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) {
|
||||
// taxonomies don't match so stop
|
||||
// union (select words that match any of selected taxonomy
|
||||
// return false;
|
||||
taxonomyMatch = false;
|
||||
//
|
||||
} else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){
|
||||
|
@ -1162,10 +909,6 @@ public class XML_processing {
|
|||
fj(corpus, stats);
|
||||
// empty the current corpus, since we don't need the data anymore
|
||||
corpus.clear();
|
||||
|
||||
// TODO: if (stats.isUseDB()) {
|
||||
// stats.storeTmpResultsToDB();
|
||||
// }
|
||||
}
|
||||
} catch (FileNotFoundException | XMLStreamException e) {
|
||||
e.printStackTrace();
|
||||
|
@ -1185,12 +928,9 @@ public class XML_processing {
|
|||
@SuppressWarnings("Duplicates")
|
||||
public static boolean readXMLGos(String path, StatisticsNew stats) {
|
||||
boolean inWord = false;
|
||||
boolean inPunctuation = false;
|
||||
boolean inOrthDiv = false;
|
||||
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
|
||||
boolean inSeparatedWord = false;
|
||||
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
||||
// ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
||||
String lemma = "";
|
||||
String msd = "";
|
||||
|
||||
|
@ -1201,10 +941,6 @@ public class XML_processing {
|
|||
String sentenceDelimiter = "seg";
|
||||
int wordIndex = 0;
|
||||
|
||||
String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm
|
||||
|
||||
|
||||
|
||||
int numLines = 0;
|
||||
int lineNum = 0;
|
||||
progress.set(0.0);
|
||||
|
@ -1248,7 +984,6 @@ public class XML_processing {
|
|||
}
|
||||
lineNum ++;
|
||||
XMLEvent event = eventReader.nextEvent();
|
||||
// System.out.print(String.format("%s", event.toString().replaceAll("\\['http://www.tei-c.org/ns/1.0'\\]::", "")));
|
||||
|
||||
switch (event.getEventType()) {
|
||||
case XMLStreamConstants.START_ELEMENT:
|
||||
|
@ -1278,11 +1013,6 @@ public class XML_processing {
|
|||
if (atts.containsKey("lemma")) {
|
||||
lemma = atts.get("lemma");
|
||||
}
|
||||
//
|
||||
// if (!inOrthDiv) {
|
||||
// msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
|
||||
// lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
|
||||
// }
|
||||
} else if (atts.containsKey("type") && atts.get("type").equals("separated")) {
|
||||
inSeparatedWord = true;
|
||||
}
|
||||
|
@ -1299,11 +1029,7 @@ public class XML_processing {
|
|||
// keep only taxonomy properties
|
||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()), stats.getCorpus());
|
||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||
// Tax taxonomy = new Tax();
|
||||
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
||||
}
|
||||
} else if (qName.equalsIgnoreCase("div")) {
|
||||
gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
|
||||
} else if (qName.equalsIgnoreCase("seg")) {
|
||||
HashMap<String, String> atts = extractAttributes(startElement);
|
||||
|
||||
|
@ -1322,20 +1048,14 @@ public class XML_processing {
|
|||
case XMLStreamConstants.CHARACTERS:
|
||||
// "word" node value
|
||||
if (inWord) {
|
||||
// if (GOSCorpusHMKey.equals("gos.028-0108.norm") && wordIndex > 8){
|
||||
// System.out.println(wordIndex);
|
||||
// }
|
||||
// if algorithm is in orthodox part add new word to sentence
|
||||
if (inOrthDiv){
|
||||
// GOSCorpusHM.put(GOSCorpusHMKey, sentence);
|
||||
String word = "";
|
||||
Characters characters = event.asCharacters();
|
||||
sentence.add(createWord(characters.getData(), "", "", "", stats.getFilter()));
|
||||
// if algorithm is in normalized part find orthodox word and add other info to it
|
||||
} else {
|
||||
Characters characters = event.asCharacters();
|
||||
// System.out.println(wordIndex);
|
||||
// System.out.println(GOSCorpusHMKey + " " + lemma + " " + wordIndex);
|
||||
if (wordIndex < GOSCorpusHM.get(GOSCorpusHMKey).size()) {
|
||||
Word currentWord = GOSCorpusHM.get(GOSCorpusHMKey).get(wordIndex);
|
||||
currentWord.setLemma(lemma, stats.getFilter().getWordParts());
|
||||
|
@ -1349,9 +1069,7 @@ public class XML_processing {
|
|||
GOSCorpusHM.get(GOSCorpusHMKey).add(wordIndex, createWord(currentWord.getWord(stats.getFilter().getWordParts()),
|
||||
"", "", "", stats.getFilter()));
|
||||
}
|
||||
} //else {
|
||||
// System.out.println("Error");
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1393,17 +1111,7 @@ public class XML_processing {
|
|||
|
||||
// add sentence to corpus if it passes filters
|
||||
if (includeFile && !ValidationUtil.isEmpty(sentence)) {
|
||||
// for(Word w : sentence) {
|
||||
// if (w.getW1().equals("")) {
|
||||
// System.out.println("HERE!!!");
|
||||
// }
|
||||
// }
|
||||
sentence = runFilters(sentence, stats.getFilter());
|
||||
// for(Word w : sentence) {
|
||||
// if (w.getW1().equals("")) {
|
||||
// System.out.println("HERE!!!");
|
||||
// }
|
||||
// }
|
||||
corpus.add(new Sentence(sentence, currentFiletaxonomy));
|
||||
}
|
||||
|
||||
|
@ -1430,21 +1138,12 @@ public class XML_processing {
|
|||
|
||||
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
|
||||
// before proceeding to read this file, make sure that taxonomy filters are a match
|
||||
// if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
|
||||
// currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
|
||||
//
|
||||
// // disregard this entry if taxonomies don't match
|
||||
// includeFile = !currentFiletaxonomy.isEmpty();
|
||||
//
|
||||
//// currentFiletaxonomy = new ArrayList<>();
|
||||
// }
|
||||
if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
|
||||
currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
|
||||
|
||||
if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) {
|
||||
// taxonomies don't match so stop
|
||||
// union (select words that match any of selected taxonomy
|
||||
// return false;
|
||||
includeFile = false;
|
||||
//
|
||||
} else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){
|
||||
|
@ -1462,7 +1161,6 @@ public class XML_processing {
|
|||
corpus.clear();
|
||||
|
||||
currentFiletaxonomy = new ArrayList<>();
|
||||
// currentFiletaxonomyLong = new ArrayList<>();
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -1488,9 +1186,6 @@ public class XML_processing {
|
|||
@SuppressWarnings("Duplicates")
|
||||
public static boolean readVERT(String path, StatisticsNew stats) {
|
||||
// taxonomy corpora
|
||||
// HashSet<String> resultTaxonomy = new HashSet<>();
|
||||
|
||||
|
||||
// regi path
|
||||
String regiPath = path.substring(0, path.length()-4) + "regi";
|
||||
|
||||
|
@ -1503,7 +1198,6 @@ public class XML_processing {
|
|||
// read regi file
|
||||
regiIt = FileUtils.lineIterator(new File(regiPath), "UTF-8");
|
||||
try {
|
||||
boolean insideHeader = false;
|
||||
int attributeIndex = 0;
|
||||
while (regiIt.hasNext()) {
|
||||
String line = regiIt.nextLine();
|
||||
|
@ -1534,7 +1228,6 @@ public class XML_processing {
|
|||
}
|
||||
} catch (IOException e) {
|
||||
throw new java.lang.RuntimeException("IOException");
|
||||
// e.printStackTrace();
|
||||
}
|
||||
|
||||
int numLines = 0;
|
||||
|
@ -1556,7 +1249,6 @@ public class XML_processing {
|
|||
LineIterator it;
|
||||
|
||||
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
||||
boolean inParagraph = false;
|
||||
boolean inSentence = false;
|
||||
boolean taxonomyMatch = true;
|
||||
int lineNum = 0;
|
||||
|
@ -1572,8 +1264,6 @@ public class XML_processing {
|
|||
try {
|
||||
it = FileUtils.lineIterator(new File(path), "UTF-8");
|
||||
try {
|
||||
boolean insideHeader = false;
|
||||
|
||||
while (it.hasNext()) {
|
||||
int percentage = (int) (lineNum * 100.0 / numLines);
|
||||
if(progress.get() < percentage) {
|
||||
|
@ -1596,7 +1286,6 @@ public class XML_processing {
|
|||
boolean proofread = false;
|
||||
for (String el : split) {
|
||||
String[] attribute = el.split("=\"");
|
||||
boolean idsPresent = false;
|
||||
if (attribute[0].equals("medium_id") && !attribute[1].equals("-")) {
|
||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(attribute[1], stats.getCorpus());
|
||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||
|
@ -1639,12 +1328,6 @@ public class XML_processing {
|
|||
}
|
||||
|
||||
}
|
||||
// else if((line.length() >= 3 && line.substring(0, 2).equals("<p") && line.substring(line.length() - 1, line.length()).equals(">")) ||
|
||||
// (line.length() >= 3 && line.substring(0, 3).equals("<ab") && line.substring(line.length() - 1, line.length()).equals(">"))){
|
||||
// inParagraph = true;
|
||||
// } else if((line.length() == 4 && line.equals("</p>")) || (line.length() == 5 && line.equals("</ab>"))){
|
||||
// inParagraph = false;
|
||||
// }
|
||||
else if(line.length() >= 3 && line.substring(0, 2).equals("<s") && line.substring(line.length() - 1, line.length()).equals(">")){
|
||||
inSentence = true;
|
||||
} else if(line.length() == 4 && line.equals("</s>")){
|
||||
|
@ -1677,10 +1360,7 @@ public class XML_processing {
|
|||
|
||||
// and start a new one
|
||||
sentence = new ArrayList<>();
|
||||
|
||||
// corpus.add(new Sentence(sentence, currentFiletaxonomy));
|
||||
} else if(!(line.charAt(0) == '<' && line.charAt(line.length() - 1) == '>') && inSentence){
|
||||
// } else if(!(line.charAt(0) == '<' && line.charAt(line.length() - 1) == '>') && inSentence && inParagraph){
|
||||
String[] split = line.split("\t");
|
||||
if(slovene) {
|
||||
if (split[lemmaIndex].length() > 2 && split[lemmaIndex].charAt(split[lemmaIndex].length() - 2) == '-' && Character.isAlphabetic(split[lemmaIndex].charAt(split[lemmaIndex].length() - 1)) &&
|
||||
|
@ -1721,7 +1401,6 @@ public class XML_processing {
|
|||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
// resultTaxonomy.remove("-");
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,67 +0,0 @@
|
|||
//package alg.inflectedJOS;
|
||||
//
|
||||
//import java.util.List;
|
||||
//import java.util.concurrent.RecursiveAction;
|
||||
//
|
||||
//import data.Sentence;
|
||||
//import data.Statistics;
|
||||
//
|
||||
//public class ForkJoin extends RecursiveAction {
|
||||
// private static final long serialVersionUID = -1260951004477299634L;
|
||||
//
|
||||
// private static final int ACCEPTABLE_SIZE = 1000;
|
||||
// private List<Sentence> corpus;
|
||||
// private Statistics stats;
|
||||
// private int start;
|
||||
// private int end;
|
||||
//
|
||||
//
|
||||
// /**
|
||||
// * Constructor for subproblems.
|
||||
// */
|
||||
// private ForkJoin(List<Sentence> corpus, int start, int end, Statistics stats) {
|
||||
// this.corpus = corpus;
|
||||
// this.start = start;
|
||||
// this.end = end;
|
||||
// this.stats = stats;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Default constructor for the initial problem
|
||||
// */
|
||||
// public ForkJoin(List<Sentence> corpus, Statistics stats) {
|
||||
// this.corpus = corpus;
|
||||
// this.start = 0;
|
||||
// this.end = corpus.size();
|
||||
// this.stats = stats;
|
||||
// }
|
||||
//
|
||||
// private void computeDirectly() {
|
||||
// List<Sentence> subCorpus = corpus.subList(start, end);
|
||||
//
|
||||
// if (stats.isTaxonomySet()) {
|
||||
// InflectedJOSCount.calculateForAll(subCorpus, stats, stats.getInflectedJosTaxonomy());
|
||||
// } else {
|
||||
// InflectedJOSCount.calculateForAll(subCorpus, stats, null);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// protected void compute() {
|
||||
// int subCorpusSize = end - start;
|
||||
//
|
||||
// if (subCorpusSize < ACCEPTABLE_SIZE) {
|
||||
// computeDirectly();
|
||||
// } else {
|
||||
// int mid = start + subCorpusSize / 2;
|
||||
// ForkJoin left = new ForkJoin(corpus, start, mid, stats);
|
||||
// ForkJoin right = new ForkJoin(corpus, mid, end, stats);
|
||||
//
|
||||
// // fork (push to queue)-> compute -> join
|
||||
// left.fork();
|
||||
// right.fork();
|
||||
// left.join();
|
||||
// right.join();
|
||||
// }
|
||||
// }
|
||||
//}
|
|
@ -1,170 +0,0 @@
|
|||
//package alg.inflectedJOS;
|
||||
//
|
||||
//import java.util.ArrayList;
|
||||
//import java.util.HashMap;
|
||||
//import java.util.List;
|
||||
//
|
||||
//import org.apache.commons.lang3.StringUtils;
|
||||
//
|
||||
//import alg.Common;
|
||||
//import data.Sentence;
|
||||
//import data.Statistics;
|
||||
//import data.StatisticsNew;
|
||||
//import data.Word;
|
||||
//
|
||||
//public class InflectedJOSCount {
|
||||
//
|
||||
// public static HashMap<Integer, ArrayList<ArrayList<Integer>>> indices;
|
||||
//
|
||||
// // static {
|
||||
// // // calculate all possible combinations of indices we will substitute with a '-' for substring statistics
|
||||
// // indices = new HashMap<>();
|
||||
// // for (int i = 5; i <= 8; i++) {
|
||||
// // indices.put(i, calculateCombinations(i));
|
||||
// // }
|
||||
// // }
|
||||
// //
|
||||
// // private static List<Integer> calculateCombinations(int i) {
|
||||
// // int arr[] = {1, 2, 3, 4, 5};
|
||||
// // int r = 3;
|
||||
// // int n = arr.length;
|
||||
// // ArrayList<ArrayList<Integer>> result = new ArrayList<>();
|
||||
// //
|
||||
// // return printCombination(arr, n, r);
|
||||
// // }
|
||||
// //
|
||||
// // /* arr[] ---> Input Array
|
||||
// // data[] ---> Temporary array to store current combination
|
||||
// // start & end ---> Staring and Ending indexes in arr[]
|
||||
// // index ---> Current index in data[]
|
||||
// // r ---> Size of a combination to be printed */
|
||||
// // static void combinationUtil(int arr[], int data[], int start,
|
||||
// // int end, int index, int r, ArrayList<ArrayList<Integer>> result) {
|
||||
// // // Current combination is ready to be printed, print it
|
||||
// // ArrayList<Integer> tmpResult = new ArrayList<>();
|
||||
// //
|
||||
// // if (index == r) {
|
||||
// // ArrayList<Integer> tmpResult = new ArrayList<>();
|
||||
// // for (int j = 0; j < r; j++)
|
||||
// // System.out.print(data[j] + " ");
|
||||
// // System.out.println("");
|
||||
// // return;
|
||||
// // }
|
||||
// //
|
||||
// // // replace index with all possible elements. The condition
|
||||
// // // "end-i+1 >= r-index" makes sure that including one element
|
||||
// // // at index will make a combination with remaining elements
|
||||
// // // at remaining positions
|
||||
// // for (int i = start; i <= end && end - i + 1 >= r - index; i++) {
|
||||
// // data[index] = arr[i];
|
||||
// // combinationUtil(arr, data, i + 1, end, index + 1, r);
|
||||
// // }
|
||||
// // }
|
||||
// //
|
||||
// // // The main function that prints all combinations of size r
|
||||
// // // in arr[] of size n. This function mainly uses combinationUtil()
|
||||
// // static void printCombination(int arr[], int n, int r) {
|
||||
// // // A temporary array to store all combination one by one
|
||||
// // int data[] = new int[r];
|
||||
// //
|
||||
// // // Print all combination using temprary array 'data[]'
|
||||
// // combinationUtil(arr, data, 0, n - 1, 0, r);
|
||||
// // }
|
||||
//
|
||||
// // public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
||||
// // for (Sentence s : corpus) {
|
||||
// // // disregard if wrong taxonomy
|
||||
// // if (!(s.getObservableListTaxonomy().startsWith(taxonomy))) {
|
||||
// // continue;
|
||||
// // }
|
||||
// //
|
||||
// // calculateCommon(s, stats.result);
|
||||
// //
|
||||
// // for (Word word : s.getWords()) {
|
||||
// // // skip if current word is not inflected
|
||||
// // if (!(word.getMsd().length() > 0)) {
|
||||
// // continue;
|
||||
// // }
|
||||
// //
|
||||
// // String msd = word.getMsd();
|
||||
// //
|
||||
// // StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
// //
|
||||
// // for (int i = 1; i < msd.length(); i++) {
|
||||
// // entry.setCharAt(i, msd.charAt(i));
|
||||
// // Common.updateMap(stats.result, entry.toString());
|
||||
// // entry.setCharAt(i, '-');
|
||||
// // }
|
||||
// // }
|
||||
// // }
|
||||
// // }
|
||||
//
|
||||
// // public static void calculateForAll(List<Sentence> corpus, Statistics stats) {
|
||||
// // for (Sentence s : corpus) {
|
||||
// // for (Word word : s.getWords()) {
|
||||
// // if (!(word.getMsd().length() > 0)) {
|
||||
// // continue;
|
||||
// // }
|
||||
// //
|
||||
// // String msd = word.getMsd();
|
||||
// //
|
||||
// // StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
// //
|
||||
// // for (int i = 1; i < msd.length(); i++) {
|
||||
// // entry.setCharAt(i, msd.charAt(i));
|
||||
// // Common.updateMap(stats.result, entry.toString());
|
||||
// // entry.setCharAt(i, '-');
|
||||
// // }
|
||||
// // }
|
||||
// // }
|
||||
// // }
|
||||
//
|
||||
// static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
||||
// for (Sentence s : corpus) {
|
||||
// // disregard if wrong taxonomy
|
||||
//// if (taxonomy != null && !(s.getObservableListTaxonomy().startsWith(taxonomy))) {
|
||||
//// continue;
|
||||
//// }
|
||||
//
|
||||
// for (Word word : s.getWords()) {
|
||||
// // skip if current word is not inflected
|
||||
// if (!(word.getMsd().length() > 0)) {
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// String msd = word.getMsd();
|
||||
//
|
||||
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
//
|
||||
// for (int i = 1; i < msd.length(); i++) {
|
||||
// entry.setCharAt(i, msd.charAt(i));
|
||||
// Common.updateMap(stats.result, entry.toString());
|
||||
// entry.setCharAt(i, '-');
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats, String taxonomy) {
|
||||
// for (Sentence s : corpus) {
|
||||
//
|
||||
// for (Word word : s.getWords()) {
|
||||
// // skip if current word is not inflected
|
||||
// // // TODO: if has defined msd and is of correct type (create a set)
|
||||
// // if (!(word.getMsd().length() > 0)) {
|
||||
// // continue;
|
||||
// // }
|
||||
//
|
||||
// String msd = word.getMsd();
|
||||
//
|
||||
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
//
|
||||
// for (int i = 1; i < msd.length(); i++) {
|
||||
// entry.setCharAt(i, msd.charAt(i));
|
||||
// stats.updateResults(entry.toString());
|
||||
// entry.setCharAt(i, '-');
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//}
|
|
@ -1,132 +0,0 @@
|
|||
package alg.inflectedJOS;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import data.Enums.InflectedJosTypes;
|
||||
import data.StatisticsNew;
|
||||
import data.Taxonomy;
|
||||
import gui.ValidationUtil;
|
||||
import util.Combinations;
|
||||
|
||||
// adapted from http://www.geeksforgeeks.org/print-all-possible-combinations-of-r-elements-in-a-given-array-of-size-n/
|
||||
public class WordFormation {
|
||||
private static HashMap<String, Long> josTypeResult;
|
||||
private static Object[][] tmpResults;
|
||||
|
||||
private static HashMap<Integer, HashSet<HashSet<Integer>>> indices;
|
||||
|
||||
static {
|
||||
indices = new HashMap<>();
|
||||
|
||||
for (int i = 4; i <= 8; i++) {
|
||||
indices.put(i, Combinations.generateIndices(i));
|
||||
}
|
||||
}
|
||||
|
||||
public static void calculateStatistics(StatisticsNew stat) {
|
||||
Map<String, AtomicLong> result = stat.getResult();
|
||||
|
||||
// 1. filter - keep only inflected types
|
||||
result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.toString().charAt(0)));
|
||||
|
||||
// 2. for each inflected type get all possible subcombinations
|
||||
for (Character josChar : InflectedJosTypes.inflectedJosTypes) {
|
||||
josTypeResult = new HashMap<>();
|
||||
|
||||
// filter out results for a single word type
|
||||
Map<String, AtomicLong> singleTypeResults = result.entrySet().stream()
|
||||
.filter(x -> x.getKey().charAt(0) == josChar)
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
||||
|
||||
if (ValidationUtil.isEmpty(singleTypeResults)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// get all possible indices combos for a msd of this length
|
||||
// HashSet<HashSet<Integer>> indicesCombos = indices.get()
|
||||
//Combinations.generateIndices(singleTypeResults.keySet().stream().findFirst().get().length());
|
||||
|
||||
for (Map.Entry<String, AtomicLong> e : singleTypeResults.entrySet()) {
|
||||
int l = e.getKey().length();
|
||||
|
||||
for (HashSet<Integer> indicesCombo : indices.get(e.getKey().length())) {
|
||||
updateResults(mask(e.getKey(), indicesCombo), e.getValue().longValue());
|
||||
}
|
||||
}
|
||||
|
||||
resultsMapToArray(singleTypeResults.values().stream().mapToLong(Number::longValue).sum());
|
||||
}
|
||||
|
||||
stat.setResultCustom(tmpResults);
|
||||
}
|
||||
|
||||
private static String mask(String word, HashSet<Integer> indicesCombo) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
sb.append(word.charAt(0));
|
||||
for (int i = 1; i < word.length(); i++) {
|
||||
sb.append(indicesCombo.contains(i) ? word.charAt(i) : ".");
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
private static void updateResults(String s, Long nOfOccurences) {
|
||||
// if not in map add
|
||||
Long r = josTypeResult.putIfAbsent(s, nOfOccurences);
|
||||
|
||||
// else update
|
||||
if (r != null) {
|
||||
josTypeResult.put(s, josTypeResult.get(s) + nOfOccurences);
|
||||
}
|
||||
}
|
||||
|
||||
private static void resultsMapToArray(Long totalValue) {
|
||||
Double total = totalValue * 1.0;
|
||||
Object[][] josTypeResultArray = new Object[josTypeResult.size()][3];
|
||||
|
||||
int i = 0;
|
||||
for (Map.Entry<String, Long> e : josTypeResult.entrySet()) {
|
||||
josTypeResultArray[i][0] = e.getKey();
|
||||
josTypeResultArray[i][1] = e.getValue();
|
||||
josTypeResultArray[i][2] = e.getValue() / total;
|
||||
|
||||
if (e.getValue() > total) {
|
||||
|
||||
String debug = "";
|
||||
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if (tmpResults == null) {
|
||||
tmpResults = josTypeResultArray;
|
||||
} else {
|
||||
int firstLength = tmpResults.length;
|
||||
int secondLength = josTypeResultArray.length;
|
||||
Object[][] tmp = new Object[firstLength + secondLength][3];
|
||||
|
||||
System.arraycopy(tmpResults, 0, tmp, 0, firstLength);
|
||||
System.arraycopy(josTypeResultArray, 0, tmp, firstLength, secondLength);
|
||||
|
||||
tmpResults = tmp;
|
||||
|
||||
// tmpResults = ArrayUtils.addAll(tmpResults, josTypeResultArray);
|
||||
}
|
||||
}
|
||||
|
||||
private static void printArray() {
|
||||
for (int i = 0; i < tmpResults.length; i++) {
|
||||
for (int j = 0; j < tmpResults[i].length; j++) {
|
||||
System.out.print(tmpResults[i][j] + "\t");
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -80,36 +80,13 @@ public class Ngrams {
|
|||
}
|
||||
}
|
||||
|
||||
// boolean a = (correctPrefix.equals("") && !correctSuffix.equals(""));
|
||||
// boolean b = (!correctPrefix.equals("") && correctSuffix.equals(""));
|
||||
// boolean c = (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length());
|
||||
// boolean d = !((correctPrefix.equals("") && !correctSuffix.equals("")) ||
|
||||
// (!correctPrefix.equals("") && correctSuffix.equals("")) ||
|
||||
// (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()));
|
||||
|
||||
if(!((stats.getFilter().getPrefixList().size() == 0 && !correctSuffix.equals("")) ||
|
||||
(!correctPrefix.equals("") && stats.getFilter().getSuffixList().size() == 0) ||
|
||||
(!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()))){
|
||||
continue;
|
||||
}
|
||||
|
||||
// if(!((correctPrefix.equals("") && !correctSuffix.equals("")) ||
|
||||
// (!correctPrefix.equals("") && correctSuffix.equals("")) ||
|
||||
// (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()))){
|
||||
// continue;
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
// if last letter is ',' erase it
|
||||
|
||||
// if (key.equals("")){
|
||||
// String test = key;
|
||||
// }
|
||||
|
||||
// if (stats.getFilter().getNotePunctuations())
|
||||
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
|
||||
MultipleHMKeys multipleKeys;
|
||||
|
||||
// create MultipleHMKeys for different amount of other keys
|
||||
|
@ -119,28 +96,17 @@ public class Ngrams {
|
|||
break;
|
||||
case 1:
|
||||
String k1_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations())
|
||||
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
||||
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
||||
break;
|
||||
case 2:
|
||||
String k2_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
String k2_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
||||
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
||||
// }
|
||||
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
||||
break;
|
||||
case 3:
|
||||
String k3_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
String k3_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||
String k3_4 = wordToString(ngramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
||||
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
||||
// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
||||
// }
|
||||
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
||||
break;
|
||||
case 4:
|
||||
|
@ -148,41 +114,13 @@ public class Ngrams {
|
|||
String k4_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||
String k4_4 = wordToString(ngramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
||||
String k4_5 = wordToString(ngramCandidate, otherKeys.get(3), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
||||
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
||||
// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
||||
// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
||||
// }
|
||||
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
|
||||
break;
|
||||
default:
|
||||
multipleKeys = null;
|
||||
}
|
||||
|
||||
|
||||
// String lemma = "";
|
||||
// String wordType = "";
|
||||
// String msd = "";
|
||||
// for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){
|
||||
// if(otherKey.toString().equals("lema")){
|
||||
// lemma = wordToString(ngramCandidate, otherKey);
|
||||
// } else if(otherKey.toString().equals("besedna vrsta")){
|
||||
// wordType = wordToString(ngramCandidate, otherKey).substring(0, 1);
|
||||
// } else if(otherKey.toString().equals("oblikoskladenjska oznaka")){
|
||||
// msd = wordToString(ngramCandidate, otherKey);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// UPDATE TAXONOMY HERE!!!
|
||||
stats.updateTaxonomyResults(multipleKeys, s.getTaxonomy());
|
||||
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -191,18 +129,12 @@ public class Ngrams {
|
|||
* Checks whether an ngram candidate passes specified regex filter.
|
||||
*/
|
||||
private static boolean passesRegex(List<Word> ngramCandidate, ArrayList<Pattern> regex, ArrayList<CalculateFor> wordParts) {
|
||||
// if (ngramCandidate.size() != regex.size()) {
|
||||
// logger.error("ngramCandidate.size() & msd.size() mismatch"); // should not occur anyway
|
||||
// return false;
|
||||
// }
|
||||
|
||||
int j = 0;
|
||||
for (int i = 0; i < ngramCandidate.size(); i++) {
|
||||
String msd = ngramCandidate.get(i).getMsd(wordParts);
|
||||
if (msd.equals("*")){
|
||||
continue;
|
||||
}
|
||||
//if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
|
||||
if (!msd.matches(regex.get(j).pattern() + ".*")) {
|
||||
return false;
|
||||
}
|
||||
|
@ -247,11 +179,6 @@ public class Ngrams {
|
|||
.stream()
|
||||
.map(w -> Character.toString(w.getMsd(wordParts).length() > 0 ? w.getMsd(wordParts).charAt(0) : '/'))
|
||||
.collect(Collectors.toList()));
|
||||
// candidate.addAll(ngramCandidate
|
||||
// .stream()
|
||||
// .map(w -> Character.toString(w.getMsd().charAt(0)))
|
||||
// .collect(Collectors.toList()));
|
||||
// .substring(0, 1)
|
||||
return StringUtils.join(candidate, " ");
|
||||
case NORMALIZED_WORD:
|
||||
candidate.addAll(ngramCandidate
|
||||
|
@ -322,32 +249,6 @@ public class Ngrams {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks skipped words and if necessary adds punctuations.
|
||||
*
|
||||
* @return List of candidates represented as a list<candidates(String)>
|
||||
*/
|
||||
private static Word checkAndModifySkipgramPunctuation(List<Word> sentence, int i, int j, StatisticsNew stats){
|
||||
// if punctuation checkbox selected and there words at indexes i and j are not next to each other
|
||||
// if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){
|
||||
// boolean middleWordsHavePunctuation = false;
|
||||
// for (int n = i + 1; n < j; n++){
|
||||
// if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){
|
||||
// middleWordsHavePunctuation = true;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// if (middleWordsHavePunctuation){
|
||||
//
|
||||
// String punctuation = ",";
|
||||
// return new Word(sentence.get(i).getWord() + punctuation,
|
||||
// sentence.get(i).getLemma() + punctuation,
|
||||
// sentence.get(i).getMsd() + punctuation);
|
||||
// }
|
||||
// }
|
||||
return sentence.get(i);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts skipgram candidates.
|
||||
|
@ -363,8 +264,6 @@ public class Ngrams {
|
|||
for (Sentence s : corpus) {
|
||||
List<Word> sentence = s.getWords();
|
||||
|
||||
// stats.updateUniGramOccurrences(s.getWords().size());
|
||||
|
||||
if (sentence == null){
|
||||
continue;
|
||||
}
|
||||
|
@ -373,7 +272,6 @@ public class Ngrams {
|
|||
for (int j = i + 1; j <= i + skip + 1; j++) { // 2gram
|
||||
if (ngram == 2 && j < sentence.size()) {
|
||||
currentLoop = new ArrayList<>();
|
||||
// currentLoop.add(sentence.get(i));
|
||||
currentLoop.add(sentence.get(i));
|
||||
fillSkipgrams(currentLoop, i, j, w);
|
||||
currentLoop.add(sentence.get(j));
|
||||
|
@ -439,25 +337,10 @@ public class Ngrams {
|
|||
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats, List<Taxonomy> taxonomy) {
|
||||
// count if no regex is set or if it is & candidate passes it
|
||||
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())) {
|
||||
// String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
|
||||
// key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
// stats.updateTaxonomyResults(new MultipleHMKeys1(key),
|
||||
// stats.getCorpus().getObservableListTaxonomy());
|
||||
|
||||
|
||||
ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();
|
||||
|
||||
String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor(), stats.getFilter().getWordParts());
|
||||
|
||||
// if last letter is ',' erase it
|
||||
|
||||
// if (key.equals("")){
|
||||
// String test = key;
|
||||
// }
|
||||
|
||||
// if (stats.getFilter().getNotePunctuations())
|
||||
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
|
||||
MultipleHMKeys multipleKeys;
|
||||
|
||||
// create MultipleHMKeys for different amount of other keys
|
||||
|
@ -467,28 +350,17 @@ public class Ngrams {
|
|||
break;
|
||||
case 1:
|
||||
String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations())
|
||||
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
||||
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
||||
break;
|
||||
case 2:
|
||||
String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
||||
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
||||
// }
|
||||
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
||||
break;
|
||||
case 3:
|
||||
String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||
String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
||||
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
||||
// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
||||
// }
|
||||
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
||||
break;
|
||||
case 4:
|
||||
|
@ -496,12 +368,6 @@ public class Ngrams {
|
|||
String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||
String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
||||
String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
||||
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
||||
// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
||||
// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
||||
// }
|
||||
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -1,167 +0,0 @@
|
|||
package alg.word;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import alg.Common;
|
||||
import data.CalculateFor;
|
||||
import data.Sentence;
|
||||
import data.Statistics;
|
||||
import data.Word;
|
||||
|
||||
//class WordCount {
|
||||
// private static void calculateNoFilter(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
//
|
||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
// sentence.addAll(s.getWords()
|
||||
// .stream()
|
||||
// .map(Word::getLemma)
|
||||
// .collect(Collectors.toList()));
|
||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
||||
// sentence.addAll(s.getWords()
|
||||
// .stream()
|
||||
// .map(Word::getWord)
|
||||
// .collect(Collectors.toList()));
|
||||
// }
|
||||
//
|
||||
// for (String word : sentence) {
|
||||
// Common.updateMap(stats.result, word);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private static void calculateVCC(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
//
|
||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
// sentence.addAll(s.getWords()
|
||||
// .stream()
|
||||
// .map(Word::getCVVLemma)
|
||||
// .collect(Collectors.toList()));
|
||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
||||
// sentence.addAll(s.getWords()
|
||||
// .stream()
|
||||
// .map(Word::getCVVWord)
|
||||
// .collect(Collectors.toList()));
|
||||
// }
|
||||
//
|
||||
// for (String word : sentence) {
|
||||
// if (word.length() > stats.getSubstringLength()) {
|
||||
// for (int i = 0; i <= word.length() - stats.getSubstringLength(); i++) {
|
||||
// String substring = word.substring(i, i + stats.getSubstringLength());
|
||||
// Common.updateMap(stats.result, substring);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private static void calculateForJosType(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
// List<Word> filteredWords = new ArrayList<>();
|
||||
//
|
||||
// for (Word word : s.getWords()) {
|
||||
// if (word.getMsd() != null && word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
|
||||
// filteredWords.add(word);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
// sentence.addAll(filteredWords
|
||||
// .stream()
|
||||
// .map(Word::getLemma)
|
||||
// .collect(Collectors.toList()));
|
||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
||||
// sentence.addAll(filteredWords
|
||||
// .stream()
|
||||
// .map(Word::getWord)
|
||||
// .collect(Collectors.toList()));
|
||||
// }
|
||||
//
|
||||
// for (String word : sentence) {
|
||||
// Common.updateMap(stats.result, word);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
// if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
|
||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
// List<Word> filteredWords = new ArrayList<>();
|
||||
//
|
||||
// for (Word word : s.getWords()) {
|
||||
// if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
|
||||
// filteredWords.add(word);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
// sentence.addAll(filteredWords
|
||||
// .stream()
|
||||
// .map(Word::getLemma)
|
||||
// .collect(Collectors.toList()));
|
||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
||||
// sentence.addAll(filteredWords
|
||||
// .stream()
|
||||
// .map(Word::getWord)
|
||||
// .collect(Collectors.toList()));
|
||||
// }
|
||||
//
|
||||
// for (String word : sentence) {
|
||||
// Common.updateMap(stats.result, word);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
// if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
|
||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
//
|
||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
// sentence.addAll(s.getWords()
|
||||
// .stream()
|
||||
// .map(Word::getLemma)
|
||||
// .collect(Collectors.toList()));
|
||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
||||
// sentence.addAll(s.getWords()
|
||||
// .stream()
|
||||
// .map(Word::getWord)
|
||||
// .collect(Collectors.toList()));
|
||||
// }
|
||||
//
|
||||
// for (String word : sentence) {
|
||||
// Common.updateMap(stats.result, word);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// static void calculateForAll(List<Sentence> corpus, Statistics stats) {
|
||||
// boolean taxonomyIsSet = stats.isTaxonomySet();
|
||||
// boolean JosTypeIsSet = stats.isJOSTypeSet();
|
||||
//
|
||||
// // branching because even though the only difference is an if or two &&
|
||||
// // O(if) = 1, the amount of ifs adds up and this saves some time
|
||||
// if (taxonomyIsSet && JosTypeIsSet) {
|
||||
// calculateForTaxonomyAndJosType(corpus, stats);
|
||||
// } else if (taxonomyIsSet && !JosTypeIsSet) {
|
||||
// calculateForTaxonomy(corpus, stats);
|
||||
// } else if (!taxonomyIsSet && JosTypeIsSet) {
|
||||
// calculateForJosType(corpus, stats);
|
||||
// } else {
|
||||
// if (stats.isVcc()) {
|
||||
// calculateVCC(corpus, stats);
|
||||
// } else {
|
||||
// calculateNoFilter(corpus, stats);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//}
|
|
@ -3,24 +3,6 @@ package data;
|
|||
import gui.I18N;
|
||||
|
||||
public enum CalculateFor {
|
||||
// calculateFor.WORD=word
|
||||
// calculateFor.NORMALIZED_WORD=normalized word
|
||||
// calculateFor.LEMMA=lemma
|
||||
// calculateFor.MORPHOSYNTACTIC_SPECS=msd
|
||||
// calculateFor.MORPHOSYNTACTIC_PROPERTY=oblikoskladenjska lastnost
|
||||
// calculateFor.WORD_TYPE=besedna vrsta
|
||||
// calculateFor.DIST_WORDS=različnica
|
||||
// calculateFor.DIST_LEMMAS=lema
|
||||
|
||||
// WORD("različnica"),
|
||||
// NORMALIZED_WORD("normalizirana različnica"),
|
||||
// LEMMA("lema"),
|
||||
// MORPHOSYNTACTIC_SPECS("oblikoskladenjska oznaka"),
|
||||
// MORPHOSYNTACTIC_PROPERTY("oblikoskladenjska lastnost"),
|
||||
// WORD_TYPE("besedna vrsta"),
|
||||
// DIST_WORDS("različnica"),
|
||||
// DIST_LEMMAS("lema");
|
||||
|
||||
WORD("calculateFor.WORD"),
|
||||
LOWERCASE_WORD("calculateFor.LOWERCASE_WORD"),
|
||||
NORMALIZED_WORD("calculateFor.NORMALIZED_WORD"),
|
||||
|
@ -44,7 +26,6 @@ public enum CalculateFor {
|
|||
|
||||
public static CalculateFor factory(String cf) {
|
||||
if (cf != null) {
|
||||
// String name = I18N.findI18NString(cf, "calculateFor");
|
||||
if (WORD.toString().equals(cf)) {
|
||||
return WORD;
|
||||
}
|
||||
|
@ -275,27 +256,4 @@ public enum CalculateFor {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// public String toPercentString() {
|
||||
// switch(this){
|
||||
// case WORD:
|
||||
// return "Delež glede na vse različnice";
|
||||
// case NORMALIZED_WORD:
|
||||
// return "Delež glede na vse normalizirane različnice";
|
||||
// case LEMMA:
|
||||
// return "Delež glede na vse leme";
|
||||
// case MORPHOSYNTACTIC_SPECS:
|
||||
// return "Delež glede na vse oblikoskladenjske oznake";
|
||||
// case MORPHOSYNTACTIC_PROPERTY:
|
||||
// return "Delež glede na vse oblikoskladenjske lastnosti";
|
||||
// case WORD_TYPE:
|
||||
// return "Delež glede na vse besedne vrste";
|
||||
// case DIST_WORDS:
|
||||
// return "Delež glede na vse različnice";
|
||||
// case DIST_LEMMAS:
|
||||
// return "Delež glede na vse leme";
|
||||
// default:
|
||||
// return null;
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
|
|
@ -74,13 +74,4 @@ public enum Collocability {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// public String toPercentString() {
|
||||
// switch(this){
|
||||
// case DICE:
|
||||
// return "Delež glede na vse različnice";
|
||||
// default:
|
||||
// return null;
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ import org.apache.logging.log4j.Logger;
|
|||
import data.Enums.solar.SolarFilters;
|
||||
import gui.ValidationUtil;
|
||||
import javafx.collections.ObservableList;
|
||||
import org.controlsfx.control.CheckComboBox;
|
||||
|
||||
public class Corpus {
|
||||
public final static Logger logger = LogManager.getLogger(Corpus.class);
|
||||
|
@ -33,7 +32,6 @@ public class Corpus {
|
|||
public HashMap<String, ObservableList<String>> solarSelectedFilters; // if solar selected
|
||||
private HashMap<String, HashSet<String>> solarFiltersForXML; // if solar - used while parsing xml
|
||||
private boolean gosOrthMode;
|
||||
boolean hasMsdData;
|
||||
private ArrayList<String> validationErrors;
|
||||
private String corpusName = "";
|
||||
private String punctuation = "punctuation.COMMA";
|
||||
|
@ -48,7 +46,6 @@ public class Corpus {
|
|||
}
|
||||
|
||||
public void setCorpusName(String corpusName) {
|
||||
// System.out.println(corpusName);
|
||||
this.corpusName = corpusName;
|
||||
logger.info("Corpus.set: ", corpusName);
|
||||
}
|
||||
|
@ -58,7 +55,6 @@ public class Corpus {
|
|||
}
|
||||
|
||||
public void setPunctuation(String punctuation) {
|
||||
// System.out.println(corpusName);
|
||||
this.punctuation = punctuation;
|
||||
logger.info("Punctuation.set: ", punctuation);
|
||||
}
|
||||
|
@ -99,10 +95,6 @@ public class Corpus {
|
|||
logger.info("Corpus.set: ", detectedCorpusFiles);
|
||||
}
|
||||
|
||||
public boolean isHeaderRead() {
|
||||
return headerRead;
|
||||
}
|
||||
|
||||
public void setHeaderRead(boolean headerRead) {
|
||||
this.headerRead = headerRead;
|
||||
}
|
||||
|
@ -128,11 +120,6 @@ public class Corpus {
|
|||
}
|
||||
return FXCollections.observableArrayList(al);
|
||||
}
|
||||
//
|
||||
// public ObservableList<String> getFormattedTaxonomy() {
|
||||
// ArrayList<String> al = Tax.getTaxonomyFormatted(new ArrayList<>(taxonomy), corpusType);
|
||||
// return FXCollections.observableArrayList(al);
|
||||
// }
|
||||
|
||||
public void setTaxonomy(ObservableList<String> taxonomy) {
|
||||
this.taxonomy = new ArrayList<>();
|
||||
|
@ -155,15 +142,6 @@ public class Corpus {
|
|||
return solarSelectedFilters;
|
||||
}
|
||||
|
||||
public void setSolarSelectedFilters(HashMap<String, ObservableList<String>> solarFilters) {
|
||||
this.solarSelectedFilters = solarFilters;
|
||||
logger.info("Corpus.set: ", solarFilters);
|
||||
}
|
||||
|
||||
public HashMap<String, HashSet<String>> getSolarFiltersForXML() {
|
||||
return solarFiltersForXML;
|
||||
}
|
||||
|
||||
public void setSolarFiltersForXML(HashMap<String, HashSet<String>> solarFiltersForXML) {
|
||||
this.solarFiltersForXML = solarFiltersForXML;
|
||||
logger.info("Corpus.set: ", solarFiltersForXML);
|
||||
|
@ -173,23 +151,10 @@ public class Corpus {
|
|||
return gosOrthMode;
|
||||
}
|
||||
|
||||
public void setGosOrthMode(boolean gosOrthMode) {
|
||||
this.gosOrthMode = gosOrthMode;
|
||||
logger.info("Corpus.set: ", gosOrthMode);
|
||||
}
|
||||
|
||||
public ArrayList<String> getValidationErrors() {
|
||||
return validationErrors;
|
||||
}
|
||||
|
||||
public String getValidationErrorsToString() {
|
||||
return StringUtils.join(validationErrors, "\n - ");
|
||||
}
|
||||
|
||||
public void setValidationErrors(ArrayList<String> validationErrors) {
|
||||
this.validationErrors = validationErrors;
|
||||
}
|
||||
|
||||
public boolean validate() {
|
||||
if (corpusType == null) {
|
||||
validationErrors.add(I18N.get("message.LABEL_RESULTS_CORPUS_TYPE_NOT_SET"));
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
package data.Enums;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
|
||||
public class InflectedJosTypes {
|
||||
public static final HashSet<Character> inflectedJosTypes = new HashSet<>();
|
||||
|
||||
static {
|
||||
inflectedJosTypes.addAll(Arrays.asList('S', 'G', 'P'));
|
||||
}
|
||||
}
|
|
@ -1,68 +0,0 @@
|
|||
package data.Enums;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
public enum Msd {
|
||||
NOUN("samostalnik", 'S', "Noun", 'N', 5),
|
||||
VERB("glagol", 'G', "Verb", 'V', 7),
|
||||
ADJECTIVE("pridevnik", 'P', "Adjective", 'A', 6),
|
||||
ADVERB("prislov", 'R', "Adverb", 'R', 2),
|
||||
PRONOUN("zaimek", 'Z', "Pronoun", 'P', 8),
|
||||
NUMERAL("števnik", 'K', "Numeral", 'M', 6),
|
||||
PREPOSITION("predlog", 'D', "Preposition", 'S', 1),
|
||||
CONJUNCTION("veznik", 'V', "Conjunction", 'C', 1),
|
||||
PARTICLE("členek", 'L', "Particle", 'Q', 0),
|
||||
INTERJECTION("medmet", 'M', "Interjection", 'I', 0),
|
||||
ABBREVIATION("okrajšava", 'O', "Abbreviation", 'Y', 0),
|
||||
RESIDUAL("neuvrščeno", 'N', "Residual", 'X', 1);
|
||||
|
||||
private final String siName;
|
||||
private final Character siCode;
|
||||
private final String enName;
|
||||
private final Character enCode;
|
||||
private final Integer nOfAttributes;
|
||||
|
||||
private static HashMap<Character, Integer> siCodeNOfAttributes;
|
||||
|
||||
static {
|
||||
siCodeNOfAttributes = new HashMap<>();
|
||||
for (Msd msd : Msd.values()) {
|
||||
siCodeNOfAttributes.put(msd.getSiCode(), msd.nOfAttributes);
|
||||
}
|
||||
}
|
||||
|
||||
Msd(String siName, Character siCode, String enName, Character enCode, int nOfAttributes) {
|
||||
this.siName = siName;
|
||||
this.siCode = siCode;
|
||||
this.enName = enName;
|
||||
this.enCode = enCode;
|
||||
this.nOfAttributes = nOfAttributes;
|
||||
}
|
||||
|
||||
public String getSiName() {
|
||||
return siName;
|
||||
}
|
||||
|
||||
public Character getSiCode() {
|
||||
return siCode;
|
||||
}
|
||||
|
||||
public String getEnName() {
|
||||
return enName;
|
||||
}
|
||||
|
||||
public Character getEnCode() {
|
||||
return enCode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of attributes for the given type.
|
||||
*
|
||||
* @param msd
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static int getMsdLengthForType(String msd) {
|
||||
return siCodeNOfAttributes.get(msd.charAt(0)) + 1;
|
||||
}
|
||||
}
|
|
@ -27,9 +27,6 @@ public class SolarFilters {
|
|||
SOLAR_FILTERS.put(TIP, FXCollections.observableArrayList("esej/spis", "pisni izdelek (učna ura)", "test (daljše besedilo)", "test (odgovori na vprašanja)"));
|
||||
}
|
||||
|
||||
public static final ObservableList<String> N_GRAM_COMPUTE_FOR_FULL = FXCollections.observableArrayList("različnica", "lema", "oblikoskladenjska oznaka", "oblikoskladenjska lastnost", "besedna vrsta");
|
||||
public static final ObservableList<String> N_GRAM_COMPUTE_FOR_LIMITED = FXCollections.observableArrayList("različnica", "lema");
|
||||
|
||||
/**
|
||||
* Returns filters with all possible values
|
||||
*/
|
||||
|
|
|
@ -349,7 +349,6 @@ public class Filter implements Cloneable {
|
|||
}
|
||||
|
||||
|
||||
|
||||
public Object clone() throws CloneNotSupportedException{
|
||||
Filter f = null;
|
||||
try {
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
package data;
|
||||
|
||||
public enum GigafidaJosWordType {
|
||||
SAMOSTALNIK("samostalnik", 'S'),
|
||||
GLAGOL("glagol", 'G'),
|
||||
PRIDEVNIK("pridevnik", 'P'),
|
||||
PRISLOV("prislov", 'R'),
|
||||
ZAIMEK("zaimek", 'Z'),
|
||||
STEVNIK("stevnik", 'K'),
|
||||
PREDLOG("predlog", 'D'),
|
||||
VEZNIK("veznik", 'V'),
|
||||
CLENEK("clenek", 'L'),
|
||||
MEDMET("medmet", 'M'),
|
||||
OKRAJSAVA("okrajsava", 'O');
|
||||
|
||||
|
||||
private final String name;
|
||||
private final char wordType;
|
||||
|
||||
GigafidaJosWordType(String name, char wordType) {
|
||||
this.name = name;
|
||||
this.wordType = wordType;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
public char getWordType() {
|
||||
return wordType;
|
||||
}
|
||||
|
||||
public static GigafidaJosWordType factory(String wType) {
|
||||
if (wType != null) {
|
||||
if (SAMOSTALNIK.toString().equals(wType)) {
|
||||
return SAMOSTALNIK;
|
||||
}
|
||||
if (GLAGOL.toString().equals(wType)) {
|
||||
return GLAGOL;
|
||||
}
|
||||
if (PRIDEVNIK.toString().equals(wType)) {
|
||||
return PRIDEVNIK;
|
||||
}
|
||||
if (PRISLOV.toString().equals(wType)) {
|
||||
return PRISLOV;
|
||||
}
|
||||
if (ZAIMEK.toString().equals(wType)) {
|
||||
return ZAIMEK;
|
||||
}
|
||||
if (STEVNIK.toString().equals(wType)) {
|
||||
return STEVNIK;
|
||||
}
|
||||
if (PREDLOG.toString().equals(wType)) {
|
||||
return PREDLOG;
|
||||
}
|
||||
if (VEZNIK.toString().equals(wType)) {
|
||||
return VEZNIK;
|
||||
}
|
||||
if (CLENEK.toString().equals(wType)) {
|
||||
return CLENEK;
|
||||
}
|
||||
if (MEDMET.toString().equals(wType)) {
|
||||
return MEDMET;
|
||||
}
|
||||
if (OKRAJSAVA.toString().equals(wType)) {
|
||||
return OKRAJSAVA;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -1,76 +0,0 @@
|
|||
package data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javafx.collections.FXCollections;
|
||||
import javafx.collections.ObservableList;
|
||||
|
||||
public enum GigafidaTaxonomy {
|
||||
TISK("tisk", "T"),
|
||||
KNJIZNO("knjižno", "T.K"),
|
||||
LEPOSLOVNO("leposlovno", "T.K.L"),
|
||||
STROKOVNO("strokovno", "T.K.S"),
|
||||
PERIODICNO("periodično", "T.P"),
|
||||
CASOPIS("časopis", "T.P.C"),
|
||||
REVIJA("revija", "T.P.R"),
|
||||
INTERNET("internet", "I");
|
||||
|
||||
private final String name;
|
||||
private final String taxonomy;
|
||||
|
||||
private static final ObservableList<String> FOR_COMBO_BOX;
|
||||
|
||||
static {
|
||||
ArrayList<String> values = Arrays.stream(GigafidaTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new));
|
||||
FOR_COMBO_BOX = FXCollections.observableArrayList(values);
|
||||
}
|
||||
|
||||
GigafidaTaxonomy(String name, String taxonomy) {
|
||||
this.name = name;
|
||||
this.taxonomy = taxonomy;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
public String getTaxonomnyString() {
|
||||
return this.taxonomy;
|
||||
}
|
||||
|
||||
public static GigafidaTaxonomy factory(String tax) {
|
||||
if (tax != null) {
|
||||
if (TISK.toString().equals(tax)) {
|
||||
return TISK;
|
||||
}
|
||||
if (KNJIZNO.toString().equals(tax)) {
|
||||
return KNJIZNO;
|
||||
}
|
||||
if (LEPOSLOVNO.toString().equals(tax)) {
|
||||
return LEPOSLOVNO;
|
||||
}
|
||||
if (STROKOVNO.toString().equals(tax)) {
|
||||
return STROKOVNO;
|
||||
}
|
||||
if (PERIODICNO.toString().equals(tax)) {
|
||||
return PERIODICNO;
|
||||
}
|
||||
if (CASOPIS.toString().equals(tax)) {
|
||||
return CASOPIS;
|
||||
}
|
||||
if (REVIJA.toString().equals(tax)) {
|
||||
return REVIJA;
|
||||
}
|
||||
if (INTERNET.toString().equals(tax)) {
|
||||
return INTERNET;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static ObservableList<String> getForComboBox() {
|
||||
return FOR_COMBO_BOX;
|
||||
}
|
||||
}
|
|
@ -1,85 +0,0 @@
|
|||
package data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javafx.collections.FXCollections;
|
||||
import javafx.collections.ObservableList;
|
||||
|
||||
public enum GosTaxonomy {
|
||||
JAVNI("javni", "gos.T.J"),
|
||||
INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "gos.T.J.I"),
|
||||
RAZVEDRILNI("razvedrilni", "gos.T.J.R"),
|
||||
NEJAVNI("nejavni", "gos.T.N"),
|
||||
NEZASEBNI("nezasebni", "gos.T.N.N"),
|
||||
ZASEBNI("zasebni", "gos.T.N.Z"),
|
||||
OSEBNI_STIK("osebni stik", "gos.K.O"),
|
||||
TELEFON("telefon", "gos.K.P"),
|
||||
RADIO("radio", "gos.K.R"),
|
||||
TELEVIZIJA("televizija", "gos.K.T");
|
||||
|
||||
|
||||
private final String name;
|
||||
private final String taxonomy;
|
||||
|
||||
private static final ObservableList<String> FOR_COMBO_BOX;
|
||||
|
||||
static {
|
||||
ArrayList<String> values = Arrays.stream(GosTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new));
|
||||
FOR_COMBO_BOX = FXCollections.observableArrayList(values);
|
||||
}
|
||||
|
||||
GosTaxonomy(String name, String taxonomy) {
|
||||
this.name = name;
|
||||
this.taxonomy = taxonomy;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
public String getTaxonomnyString() {
|
||||
return this.taxonomy;
|
||||
}
|
||||
|
||||
public static GosTaxonomy factory(String tax) {
|
||||
if (tax != null) {
|
||||
if (JAVNI.toString().equals(tax)) {
|
||||
return JAVNI;
|
||||
}
|
||||
if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
|
||||
return INFORMATIVNO_IZOBRAZEVALNI;
|
||||
}
|
||||
if (RAZVEDRILNI.toString().equals(tax)) {
|
||||
return RAZVEDRILNI;
|
||||
}
|
||||
if (NEJAVNI.toString().equals(tax)) {
|
||||
return NEJAVNI;
|
||||
}
|
||||
if (NEZASEBNI.toString().equals(tax)) {
|
||||
return NEZASEBNI;
|
||||
}
|
||||
if (ZASEBNI.toString().equals(tax)) {
|
||||
return ZASEBNI;
|
||||
}
|
||||
if (OSEBNI_STIK.toString().equals(tax)) {
|
||||
return OSEBNI_STIK;
|
||||
}
|
||||
if (TELEFON.toString().equals(tax)) {
|
||||
return TELEFON;
|
||||
}
|
||||
if (RADIO.toString().equals(tax)) {
|
||||
return RADIO;
|
||||
}
|
||||
if (TELEVIZIJA.toString().equals(tax)) {
|
||||
return TELEVIZIJA;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static ObservableList<String> getForComboBox() {
|
||||
return FOR_COMBO_BOX;
|
||||
}
|
||||
}
|
|
@ -36,15 +36,12 @@ public final class MultipleHMKeys2 implements MultipleHMKeys {
|
|||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(k1, k2);
|
||||
// return key.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof MultipleHMKeys2) && ((MultipleHMKeys2) obj).k1.equals(k1)
|
||||
&& ((MultipleHMKeys2) obj).k2.equals(k2);
|
||||
|
||||
// return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key.equals(key);
|
||||
}
|
||||
|
||||
public MultipleHMKeys[] splitNgramTo1grams(){
|
||||
|
|
|
@ -18,22 +18,6 @@ public class Sentence {
|
|||
this.taxonomy = taxonomy;
|
||||
}
|
||||
|
||||
// public Sentence(List<Word> words) {
|
||||
// this.words = words;
|
||||
// }
|
||||
|
||||
public Sentence(List<Word> words, List<Taxonomy> taxonomy, Map<String, String> properties) {
|
||||
this.words = words;
|
||||
this.taxonomy = taxonomy;
|
||||
this.properties = properties;
|
||||
}
|
||||
|
||||
public Sentence(List<Word> words, List<Taxonomy> taxonomy, String type) {
|
||||
this.words = words;
|
||||
this.taxonomy = taxonomy;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public List<Word> getWords() {
|
||||
return words;
|
||||
}
|
||||
|
|
|
@ -8,9 +8,6 @@ public class Settings {
|
|||
public static final int CORPUS_SENTENCE_LIMIT = 50000;
|
||||
public static final boolean PRINT_LOG = false;
|
||||
|
||||
public static final String FX_ACCENT_OK = "-fx-accent: forestgreen;";
|
||||
public static final String FX_ACCENT_NOK = "-fx-accent: red;";
|
||||
|
||||
public static Collection<File> corpus;
|
||||
public static File resultsFilePath;
|
||||
}
|
||||
|
|
|
@ -1,299 +0,0 @@
|
|||
package data;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import util.Util;
|
||||
import util.db.RDB;
|
||||
|
||||
public class Statistics {
|
||||
private CorpusType corpusType;
|
||||
private AnalysisLevel analysisLevel;
|
||||
private boolean useDB;
|
||||
private RDB db;
|
||||
|
||||
private boolean analysisProducedResults;
|
||||
|
||||
private String taxonomy;
|
||||
private boolean taxonomyIsSet;
|
||||
|
||||
private char JOSType;
|
||||
private boolean JOSTypeIsSet;
|
||||
|
||||
private String resultTitle;
|
||||
public Map<String, AtomicLong> result = new ConcurrentHashMap<>();
|
||||
|
||||
// nGrams
|
||||
private int nGramLevel;
|
||||
private Integer skip;
|
||||
private CalculateFor cf;
|
||||
private List<Pattern> morphosyntacticFilter;
|
||||
|
||||
// distributions
|
||||
private String distributionTaxonomy;
|
||||
private char distributionJosWordType;
|
||||
private boolean vcc;
|
||||
private Integer substringLength;
|
||||
|
||||
// inflected JOS
|
||||
private String inflectedJosTaxonomy;
|
||||
|
||||
// GOS
|
||||
boolean gosOrthMode;
|
||||
|
||||
// šolar
|
||||
Map<String, Object> solarHeadBlockFilter;
|
||||
|
||||
|
||||
// for ngrams
|
||||
public Statistics(AnalysisLevel al, int nGramLevel, Integer skip, CalculateFor cf) {
|
||||
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
||||
this.cf = cf;
|
||||
this.analysisLevel = al;
|
||||
this.nGramLevel = nGramLevel;
|
||||
this.skip = skip == null || skip == 0 ? null : skip;
|
||||
|
||||
this.resultTitle = String.format("%s%d-gram_%s_%s",
|
||||
this.skip != null ? String.format("%d-%s-", skip, "skip") : "",
|
||||
nGramLevel,
|
||||
cf.toString(),
|
||||
dateTime);
|
||||
}
|
||||
|
||||
// for words distributions
|
||||
// public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) {
|
||||
// String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
||||
//
|
||||
// this.resultTitle = String.format("%s_%s_%s",
|
||||
// distributionTaxonomy != null ? distributionTaxonomy.toString() : "",
|
||||
// distributionJosWordType != null ? distributionJosWordType.toString() : "",
|
||||
// dateTime);
|
||||
//
|
||||
// this.analysisLevel = al;
|
||||
// this.cf = cf;
|
||||
// this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null;
|
||||
// this.taxonomyIsSet = distributionTaxonomy != null;
|
||||
//
|
||||
// this.JOSTypeIsSet = distributionJosWordType != null;
|
||||
// this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' ';
|
||||
// }
|
||||
|
||||
public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) {
|
||||
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
||||
|
||||
this.resultTitle = String.format("%s_%d_%s",
|
||||
"Distribucija zaporedij samoglasnikov in soglasnikov",
|
||||
substringLength,
|
||||
dateTime);
|
||||
|
||||
this.analysisLevel = al;
|
||||
this.cf = cf;
|
||||
this.substringLength = substringLength;
|
||||
this.vcc = true;
|
||||
}
|
||||
|
||||
// public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) {
|
||||
// String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
||||
//
|
||||
// this.resultTitle = String.format("InflectedJOS_%s_%s",
|
||||
// distributionTaxonomy != null ? distributionTaxonomy : "",
|
||||
// dateTime);
|
||||
//
|
||||
// this.analysisLevel = al;
|
||||
// this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null;
|
||||
// this.taxonomyIsSet = inflectedJosTaxonomy != null;
|
||||
// }
|
||||
|
||||
public Integer getSkip() {
|
||||
return skip;
|
||||
}
|
||||
|
||||
public Integer getSubstringLength() {
|
||||
return substringLength;
|
||||
}
|
||||
|
||||
public String getInflectedJosTaxonomy() {
|
||||
return inflectedJosTaxonomy;
|
||||
}
|
||||
|
||||
public void setSubstringLength(Integer substringLength) {
|
||||
this.substringLength = substringLength;
|
||||
}
|
||||
|
||||
public boolean isVcc() {
|
||||
return vcc;
|
||||
}
|
||||
|
||||
public void setVcc(boolean vcc) {
|
||||
this.vcc = vcc;
|
||||
}
|
||||
|
||||
public String getDistributionTaxonomy() {
|
||||
return distributionTaxonomy;
|
||||
}
|
||||
|
||||
public void setDistributionTaxonomy(String distributionTaxonomy) {
|
||||
this.distributionTaxonomy = distributionTaxonomy;
|
||||
}
|
||||
|
||||
public char getDistributionJosWordType() {
|
||||
return distributionJosWordType;
|
||||
}
|
||||
|
||||
public void setDistributionJosWordType(char distributionJosWordType) {
|
||||
this.distributionJosWordType = distributionJosWordType;
|
||||
}
|
||||
|
||||
public void setMorphosyntacticFilter(List<String> morphosyntacticFilter) {
|
||||
// change filter strings to regex patterns
|
||||
this.morphosyntacticFilter = new ArrayList<>();
|
||||
for (String s : morphosyntacticFilter) {
|
||||
this.morphosyntacticFilter.add(Pattern.compile(s.replaceAll("\\*", ".")));
|
||||
}
|
||||
}
|
||||
|
||||
public List<Pattern> getMsd() {
|
||||
return morphosyntacticFilter;
|
||||
}
|
||||
|
||||
public Map<String, AtomicLong> getResult() {
|
||||
return result;
|
||||
}
|
||||
|
||||
public void setTaxonomy(String taxonomy) {
|
||||
this.taxonomy = taxonomy;
|
||||
}
|
||||
|
||||
public void setTaxonomyIsSet(boolean taxonomyIsSet) {
|
||||
this.taxonomyIsSet = taxonomyIsSet;
|
||||
}
|
||||
|
||||
public char getJOSType() {
|
||||
return JOSType;
|
||||
}
|
||||
|
||||
public void setJOSType(char JOSType) {
|
||||
this.JOSType = JOSType;
|
||||
}
|
||||
|
||||
public boolean isJOSTypeSet() {
|
||||
return JOSTypeIsSet;
|
||||
}
|
||||
|
||||
public void setJOSType(boolean JOSTypeIsSet) {
|
||||
this.JOSTypeIsSet = JOSTypeIsSet;
|
||||
}
|
||||
|
||||
public void saveResultToDisk(int... limit) throws UnsupportedEncodingException {
|
||||
// Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
|
||||
//
|
||||
// if (useDB) {
|
||||
// result = db.getDump();
|
||||
// db.delete();
|
||||
// }
|
||||
//
|
||||
// // if no results and nothing to save, return false
|
||||
// if (!(result.size() > 0)) {
|
||||
// analysisProducedResults = false;
|
||||
// return;
|
||||
// } else {
|
||||
// analysisProducedResults = true;
|
||||
// }
|
||||
//
|
||||
// stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
|
||||
// Export.SetToCSV(stats);
|
||||
}
|
||||
|
||||
// private Map<String, Integer> getSortedResultInflected(Map map) {
|
||||
// // first convert to <String, Integer>
|
||||
// Map<String, Integer> m = Util.sortByValue(Util.atomicInt2StringAndInt(map), 0);
|
||||
//
|
||||
// Map<String, Integer> sortedM = new TreeMap<>();
|
||||
//
|
||||
// sortedM.putAll(m);
|
||||
//
|
||||
// return sortedM;
|
||||
// }
|
||||
|
||||
private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {
|
||||
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
||||
}
|
||||
|
||||
public String getTaxonomy() {
|
||||
return taxonomy;
|
||||
}
|
||||
|
||||
public boolean isTaxonomySet() {
|
||||
return taxonomyIsSet;
|
||||
}
|
||||
|
||||
public int getnGramLevel() {
|
||||
return nGramLevel;
|
||||
}
|
||||
|
||||
public CalculateFor getCf() {
|
||||
return cf;
|
||||
}
|
||||
|
||||
public AnalysisLevel getAnalysisLevel() {
|
||||
return analysisLevel;
|
||||
}
|
||||
|
||||
public CorpusType getCorpusType() {
|
||||
return corpusType;
|
||||
}
|
||||
|
||||
public void setCorpusType(CorpusType corpusType) {
|
||||
this.corpusType = corpusType;
|
||||
}
|
||||
|
||||
public boolean isGosOrthMode() {
|
||||
return gosOrthMode;
|
||||
}
|
||||
|
||||
public void setGosOrthMode(boolean gosOrthMode) {
|
||||
this.gosOrthMode = gosOrthMode;
|
||||
}
|
||||
|
||||
public Map<String, Object> getSolarHeadBlockFilter() {
|
||||
return solarHeadBlockFilter;
|
||||
}
|
||||
|
||||
public void setSolarHeadBlockFilter(Map<String, Object> solarHeadBlockFilter) {
|
||||
this.solarHeadBlockFilter = solarHeadBlockFilter;
|
||||
}
|
||||
|
||||
public boolean isUseDB() {
|
||||
return useDB;
|
||||
}
|
||||
|
||||
public void setUseDB(boolean useDB) {
|
||||
if (useDB && db == null) {
|
||||
db = new RDB();
|
||||
}
|
||||
this.useDB = useDB;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores results from this batch to a database and clears results map
|
||||
*/
|
||||
public void storeTmpResultsToDB() {
|
||||
try {
|
||||
db.writeBatch(result);
|
||||
result = new ConcurrentHashMap<>();
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isAnalysisProducedResults() {
|
||||
return analysisProducedResults;
|
||||
}
|
||||
}
|
|
@ -18,7 +18,6 @@ import org.apache.commons.lang3.tuple.Pair;
|
|||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import alg.inflectedJOS.WordFormation;
|
||||
import data.Enums.WordLevelType;
|
||||
import javafx.collections.ObservableList;
|
||||
import util.Export;
|
||||
|
|
|
@ -166,22 +166,6 @@ public class Tax {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// ArrayList<String> taxonomyString = new ArrayList<>();
|
||||
// for (Taxonomy t : taxonomyResult.keySet()){
|
||||
// taxonomyString.add(t.toString());
|
||||
// }
|
||||
// ObservableList<String> taxonomyObservableString = Tax.getTaxonomyForComboBox(corpus.getCorpusType(), new HashSet<>(taxonomyString));
|
||||
// ArrayList<String> sortedTaxonomyString = new ArrayList<>();
|
||||
// for (String t : taxonomyObservableString){
|
||||
// sortedTaxonomyString.add(t);
|
||||
// }
|
||||
|
||||
|
||||
|
||||
|
||||
foundTaxHS.addAll(genFoundTax);
|
||||
|
||||
// assures same relative order
|
||||
|
@ -198,59 +182,6 @@ public class Tax {
|
|||
return corpusTypesWithTaxonomy;
|
||||
}
|
||||
|
||||
public static ArrayList<String> getTaxonomyCodes(ArrayList<Taxonomy> taxonomyNames, CorpusType corpusType) {
|
||||
ArrayList<String> result = new ArrayList<>();
|
||||
|
||||
if (ValidationUtil.isEmpty(taxonomyNames)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
|
||||
|
||||
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
|
||||
tax = GIGAFIDA_TAXONOMY;
|
||||
} else if (corpusType == CorpusType.GOS) {
|
||||
tax = GOS_TAXONOMY;
|
||||
}
|
||||
|
||||
// for easier lookup
|
||||
Map<String, String> taxInversed = tax.entrySet()
|
||||
.stream()
|
||||
.collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
|
||||
|
||||
for (Taxonomy taxonomyName : taxonomyNames) {
|
||||
result.add(taxInversed.get(taxonomyName.toString()));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// public static ArrayList<String> getTaxonomyFormatted(ArrayList<String> taxonomyNames, CorpusType corpusType) {
|
||||
// ArrayList<String> result = new ArrayList<>();
|
||||
//
|
||||
// if (ValidationUtil.isEmpty(taxonomyNames)) {
|
||||
// return result;
|
||||
// }
|
||||
//
|
||||
// LinkedHashMap<String, String> tax = new LinkedHashMap<>();
|
||||
//
|
||||
// if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
|
||||
// tax = GIGAFIDA_TAXONOMY;
|
||||
// } else if (corpusType == CorpusType.GOS) {
|
||||
// tax = GOS_TAXONOMY;
|
||||
// }
|
||||
//
|
||||
// // for easier lookup
|
||||
// Map<String, String> taxInversed = tax.entrySet()
|
||||
// .stream()
|
||||
// .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
|
||||
//
|
||||
// for (String taxonomyName : taxonomyNames) {
|
||||
// result.add(taxInversed.get(taxonomyName) + " - " + taxonomyName);
|
||||
// }
|
||||
//
|
||||
// return result;
|
||||
// }
|
||||
|
||||
/**
|
||||
* Returns a list of proper names for codes
|
||||
|
@ -283,13 +214,4 @@ public class Tax {
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static String getLongTaxonomyName(String shortName){
|
||||
if (GIGAFIDA_TAXONOMY.containsKey(shortName))
|
||||
return GIGAFIDA_TAXONOMY.get(shortName);
|
||||
else if(GOS_TAXONOMY.containsKey(shortName))
|
||||
return GOS_TAXONOMY.get(shortName);
|
||||
else
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,14 +28,6 @@ enum TaxonomyEnum {
|
|||
|
||||
|
||||
// Gigafida
|
||||
// KNJIZNO("knjižno", "T.K", "gigafida"),
|
||||
// LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"),
|
||||
// STROKOVNO("strokovno", "T.K.S", "gigafida"),
|
||||
// PERIODICNO("periodično", "T.P", "gigafida"),
|
||||
// CASOPIS("časopis", "T.P.C", "gigafida"),
|
||||
// REVIJA("revija", "T.P.R", "gigafida"),
|
||||
// INTERNET("internet", "I", "gigafida"),
|
||||
|
||||
SSJ_TISK("SSJ.T", "SSJ.T - tisk"),
|
||||
SSJ_KNJIZNO("SSJ.T.K", " SSJ.T.K - tisk-knjižno"),
|
||||
SSJ_LEPOSLOVNO("SSJ.T.K.L", " SSJ.T.K.L - tisk-knjižno-leposlovno"),
|
||||
|
@ -148,9 +140,6 @@ enum TaxonomyEnum {
|
|||
}
|
||||
|
||||
// Gigafida
|
||||
// if (TISK.toString().equals(tax)) {
|
||||
// return TISK;
|
||||
// }
|
||||
if (SSJ_TISK.toString().equals(tax)) {
|
||||
return SSJ_TISK;
|
||||
}
|
||||
|
@ -339,9 +328,6 @@ enum TaxonomyEnum {
|
|||
}
|
||||
|
||||
// Gigafida
|
||||
// if (TISK.toString().equals(tax)) {
|
||||
// return TISK;
|
||||
// }
|
||||
if (SSJ_TISK.toLongNameString().equals(tax)) {
|
||||
return SSJ_TISK;
|
||||
}
|
||||
|
@ -483,7 +469,6 @@ enum TaxonomyEnum {
|
|||
public static ArrayList<TaxonomyEnum> taxonomySelected(TaxonomyEnum disjointTaxonomy) {
|
||||
ArrayList<TaxonomyEnum> r = new ArrayList<>();
|
||||
|
||||
// System.out.println(disjointTaxonomy);
|
||||
if(disjointTaxonomy.equals(DISKURZ)){
|
||||
r.add(DISKURZ_JAVNI);
|
||||
r.add(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI);
|
||||
|
@ -696,12 +681,8 @@ enum TaxonomyEnum {
|
|||
}
|
||||
|
||||
public static ArrayList<TaxonomyEnum> convertStringListToTaxonomyList(ObservableList<String> stringList, Corpus corpus){
|
||||
// System.out.println("1.");
|
||||
// System.out.println(stringList);
|
||||
ArrayList<TaxonomyEnum> taxonomyList = new ArrayList<>();
|
||||
|
||||
// System.out.println("INTERESTING STUFF");
|
||||
// System.out.println(stringList);
|
||||
for (String e : stringList) {
|
||||
for (Taxonomy t : corpus.getTaxonomy()){
|
||||
if (t.toLongNameString().equals(e)) {
|
||||
|
@ -709,18 +690,11 @@ enum TaxonomyEnum {
|
|||
}
|
||||
}
|
||||
}
|
||||
// System.out.println(taxonomyList);
|
||||
// System.out.println("-----------------");
|
||||
return taxonomyList;
|
||||
}
|
||||
|
||||
public static void modifyingTaxonomy(ArrayList<TaxonomyEnum> taxonomy, ArrayList<TaxonomyEnum> checkedItemsTaxonomy, Corpus corpus){
|
||||
// get taxonomies that were selected/deselected by user
|
||||
// System.out.println("Print here:");
|
||||
// System.out.println(taxonomy);
|
||||
// System.out.println(checkedItemsTaxonomy);
|
||||
// System.out.println("-------------");
|
||||
|
||||
Set<TaxonomyEnum> disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy);
|
||||
if (taxonomy != null) {
|
||||
disjointTaxonomies.addAll(taxonomy);
|
||||
|
@ -739,7 +713,6 @@ enum TaxonomyEnum {
|
|||
if(!TaxonomyEnum.convertStringListToTaxonomyList(corpus.getObservableListTaxonomy(), corpus).contains(s)){
|
||||
disjointTaxonomies.remove(s);
|
||||
disArr.remove(s);
|
||||
// taxonomy.remove(s);
|
||||
i--;
|
||||
}
|
||||
i++;
|
||||
|
@ -790,11 +763,6 @@ public class Taxonomy {
|
|||
|
||||
}
|
||||
|
||||
// public Taxonomy(String name, String longName) {
|
||||
// this.name = name;
|
||||
// this.longName = longName;
|
||||
// }
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
|
@ -813,7 +781,6 @@ public class Taxonomy {
|
|||
return t;
|
||||
}
|
||||
return null;
|
||||
// return new Taxonomy(tax, false);
|
||||
}
|
||||
|
||||
public static Taxonomy factoryLongName(String tax, Corpus corpus) {
|
||||
|
@ -822,87 +789,6 @@ public class Taxonomy {
|
|||
return t;
|
||||
}
|
||||
return null;
|
||||
// return new Taxonomy(tax, true);
|
||||
}
|
||||
|
||||
// public static ArrayList<Taxonomy> taxonomySelected(Taxonomy disjointTaxonomy) {
|
||||
// ArrayList<TaxonomyEnum> rTaxonomyEnum = TaxonomyEnum.taxonomySelected(disjointTaxonomy.getTaxonomyEnum());
|
||||
//
|
||||
// ArrayList<Taxonomy> r = new ArrayList<>();
|
||||
//
|
||||
// for(TaxonomyEnum t : rTaxonomyEnum){
|
||||
// r.add(new Taxonomy(t.toString(), false));
|
||||
// }
|
||||
//
|
||||
// return r;
|
||||
// }
|
||||
|
||||
public static ArrayList<Taxonomy> taxonomyDeselected(Taxonomy disjointTaxonomy){
|
||||
// ArrayList<TaxonomyEnum> r = new ArrayList<>();
|
||||
// Map<TaxonomyEnum, TaxonomyEnum> connections = new ConcurrentHashMap<>();
|
||||
// connections.put(DISKURZ_JAVNI, DISKURZ);
|
||||
// connections.put(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI, DISKURZ_JAVNI);
|
||||
// connections.put(DISKURZ_RAZVEDRILNI, DISKURZ_JAVNI);
|
||||
// connections.put(DISKURZ_NEJAVNI, DISKURZ);
|
||||
// connections.put(DISKURZ_NEZASEBNI, DISKURZ_NEJAVNI);
|
||||
// connections.put(DISKURZ_ZASEBNI, DISKURZ_NEJAVNI);
|
||||
// connections.put(SITUACIJA_RADIO, SITUACIJA);
|
||||
// connections.put(SITUACIJA_TELEVIZIJA, SITUACIJA);
|
||||
// connections.put(KANAL_OSEBNI_STIK, KANAL);
|
||||
// connections.put(KANAL_TELEFON, KANAL);
|
||||
// connections.put(KANAL_RADIO, KANAL);
|
||||
// connections.put(KANAL_TELEVIZIJA, KANAL);
|
||||
//
|
||||
// connections.put(SSJ_KNJIZNO, SSJ_TISK);
|
||||
// connections.put(SSJ_LEPOSLOVNO, SSJ_KNJIZNO);
|
||||
// connections.put(SSJ_STROKOVNO, SSJ_KNJIZNO);
|
||||
// connections.put(SSJ_PERIODICNO, SSJ_TISK);
|
||||
// connections.put(SSJ_CASOPIS, SSJ_PERIODICNO);
|
||||
// connections.put(SSJ_REVIJA, SSJ_PERIODICNO);
|
||||
// connections.put(SSJ_DRUGO, SSJ_TISK);
|
||||
//
|
||||
// connections.put(FT_P_GOVORNI, FT_P_PRENOSNIK);
|
||||
// connections.put(FT_P_ELEKTRONSKI, FT_P_PRENOSNIK);
|
||||
// connections.put(FT_P_PISNI, FT_P_PRENOSNIK);
|
||||
// connections.put(FT_P_OBJAVLJENO, FT_P_PISNI);
|
||||
// connections.put(FT_P_KNJIZNO, FT_P_OBJAVLJENO);
|
||||
// connections.put(FT_P_PERIODICNO, FT_P_OBJAVLJENO);
|
||||
// connections.put(FT_P_CASOPISNO, FT_P_OBJAVLJENO);
|
||||
// connections.put(FT_P_DNEVNO, FT_P_CASOPISNO);
|
||||
// connections.put(FT_P_VECKRAT_TEDENSKO, FT_P_CASOPISNO);
|
||||
// connections.put(FT_P_CASOPISNO_TEDENSKO, FT_P_CASOPISNO);
|
||||
// connections.put(FT_P_REVIALNO, FT_P_PERIODICNO);
|
||||
// connections.put(FT_P_TEDENSKO, FT_P_REVIALNO);
|
||||
// connections.put(FT_P_STIRINAJSTDNEVNO, FT_P_REVIALNO);
|
||||
// connections.put(FT_P_MESECNO, FT_P_REVIALNO);
|
||||
// connections.put(FT_P_REDKEJE_KOT_MESECNO, FT_P_REVIALNO);
|
||||
// connections.put(FT_P_OBCASNO, FT_P_REVIALNO);
|
||||
// connections.put(FT_P_NEOBJAVLJENO, FT_P_PISNI);
|
||||
// connections.put(FT_P_JAVNO, FT_P_NEOBJAVLJENO);
|
||||
// connections.put(FT_P_INTERNO, FT_P_NEOBJAVLJENO);
|
||||
// connections.put(FT_P_ZASEBNO, FT_P_NEOBJAVLJENO);
|
||||
// connections.put(FT_UMETNOSTNA, FT_ZVRST);
|
||||
// connections.put(FT_PESNISKA, FT_UMETNOSTNA);
|
||||
// connections.put(FT_PROZNA, FT_UMETNOSTNA);
|
||||
// connections.put(FT_DRAMSKA, FT_UMETNOSTNA);
|
||||
// connections.put(FT_NEUMETNOSTNA, FT_ZVRST);
|
||||
// connections.put(FT_STROKOVNA, FT_NEUMETNOSTNA);
|
||||
// connections.put(FT_HID, FT_STROKOVNA);
|
||||
// connections.put(FT_NIT, FT_STROKOVNA);
|
||||
// connections.put(FT_NESTROKOVNA, FT_NEUMETNOSTNA);
|
||||
// connections.put(FT_PRAVNA, FT_NEUMETNOSTNA);
|
||||
// connections.put(FT_DA, FT_LEKTORIRANO);
|
||||
// connections.put(FT_NE, FT_LEKTORIRANO);
|
||||
//
|
||||
// TaxonomyEnum currentTaxonomy = disjointTaxonomy;
|
||||
// r.add(currentTaxonomy);
|
||||
// while(connections.containsKey(currentTaxonomy)){
|
||||
// currentTaxonomy = connections.get(currentTaxonomy);
|
||||
// r.add(currentTaxonomy);
|
||||
// }
|
||||
// Collections.reverse(r);
|
||||
// return r;
|
||||
return null;
|
||||
}
|
||||
|
||||
public static ArrayList<Taxonomy> convertStringListToTaxonomyList(ObservableList<String> stringList, Corpus corpus){
|
||||
|
@ -919,7 +805,6 @@ public class Taxonomy {
|
|||
}
|
||||
|
||||
public static ArrayList<TaxonomyEnum> taxonomyToTaxonomyEnum(ArrayList<Taxonomy> taxonomy){
|
||||
// System.out.println(taxonomy);
|
||||
if (taxonomy == null) {
|
||||
return null;
|
||||
}
|
||||
|
@ -934,11 +819,6 @@ public class Taxonomy {
|
|||
}
|
||||
|
||||
public static ArrayList<Taxonomy> taxonomyEnumToTaxonomy(ArrayList<TaxonomyEnum> taxonomy, Corpus corpus){
|
||||
// ArrayList<Taxonomy> r = new ArrayList<>();
|
||||
// for (TaxonomyEnum t : taxonomy){
|
||||
// r.add(new Taxonomy(t));
|
||||
// }
|
||||
// return r;
|
||||
ArrayList<Taxonomy> r = new ArrayList<>();
|
||||
for (TaxonomyEnum te : taxonomy){
|
||||
for (Taxonomy t : corpus.getTaxonomy()){
|
||||
|
|
|
@ -15,7 +15,6 @@ import javafx.collections.ObservableList;
|
|||
import javafx.concurrent.Task;
|
||||
import javafx.fxml.FXML;
|
||||
import javafx.scene.control.*;
|
||||
//import javafx.scene.image.Image;
|
||||
import javafx.scene.image.ImageView;
|
||||
import javafx.scene.layout.AnchorPane;
|
||||
import javafx.scene.layout.Pane;
|
||||
|
@ -29,7 +28,6 @@ import java.io.UnsupportedEncodingException;
|
|||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static alg.XML_processing.readXML;
|
||||
import static gui.GUIController.showAlert;
|
||||
|
||||
@SuppressWarnings("Duplicates")
|
||||
|
@ -129,20 +127,10 @@ public class CharacterAnalysisTab {
|
|||
private ComboBox<String> taxonomySetOperationCB;
|
||||
private String taxonomySetOperation;
|
||||
|
||||
// @FXML
|
||||
// private ToggleGroup calculateForRB;
|
||||
// private CalculateFor calculateFor;
|
||||
|
||||
@FXML
|
||||
private ComboBox<String> calculateForCB;
|
||||
private CalculateFor calculateFor;
|
||||
|
||||
@FXML
|
||||
private RadioButton lemmaRB;
|
||||
|
||||
@FXML
|
||||
private RadioButton varietyRB;
|
||||
|
||||
@FXML
|
||||
private Pane paneLetters;
|
||||
|
||||
|
@ -171,13 +159,12 @@ public class CharacterAnalysisTab {
|
|||
|
||||
private Corpus corpus;
|
||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||
private Filter filter;
|
||||
private boolean useDb;
|
||||
private HostServices hostService;
|
||||
private ListChangeListener<String> taxonomyListener;
|
||||
private ChangeListener<Boolean> msdListener;
|
||||
private ChangeListener<Boolean> minimalOccurrencesListener;
|
||||
private ChangeListener<Boolean> minimalTaxonomyListener;
|
||||
private boolean useDb;
|
||||
|
||||
private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
|
||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY));
|
||||
|
@ -185,14 +172,8 @@ public class CharacterAnalysisTab {
|
|||
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
|
||||
private static final ArrayList<String> TAXONOMY_SET_OPERATION = new ArrayList<>(Arrays.asList(TAXONOMY_SET_OPERATION_ARRAY));
|
||||
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("različnica", "lema");
|
||||
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||
|
||||
|
||||
// TODO: pass observables for taxonomy based on header scan
|
||||
// after header scan
|
||||
private ObservableList<String> taxonomyCCBValues;
|
||||
private CorpusType currentCorpusType;
|
||||
|
||||
public void init() {
|
||||
characterAnalysisTab.getStylesheets().add("style.css");
|
||||
|
@ -203,24 +184,11 @@ public class CharacterAnalysisTab {
|
|||
currentMode = MODE.LETTER;
|
||||
toggleMode(currentMode);
|
||||
|
||||
// calculateForRB.selectedToggleProperty().addListener(new ChangeListener<Toggle>() {
|
||||
// @Override
|
||||
// public void changed(ObservableValue<? extends Toggle> observable, Toggle oldValue, Toggle newValue) {
|
||||
// //logger.info("calculateForRB:", newValue.toString());
|
||||
// RadioButton chk = (RadioButton)newValue.getToggleGroup().getSelectedToggle(); // Cast object to radio button
|
||||
// calculateFor = CalculateFor.factory(chk.getText());
|
||||
// logger.info("calculateForRB:", chk.getText());
|
||||
// //System.out.println("Selected Radio Button - "+chk.getText());
|
||||
// }
|
||||
// });
|
||||
|
||||
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||
if(newValue == null){
|
||||
newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
|
||||
calculateForCB.getSelectionModel().select(newValue);
|
||||
}
|
||||
// System.out.println(oldValue);
|
||||
// System.out.println(newValue);
|
||||
calculateFor = CalculateFor.factory(newValue);
|
||||
logger.info("calculateForCB:", calculateFor.toString());
|
||||
});
|
||||
|
@ -299,7 +267,6 @@ public class CharacterAnalysisTab {
|
|||
public void onChanged(Change<? extends String> c){
|
||||
if(changing) {
|
||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
|
||||
|
||||
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
|
||||
|
||||
|
@ -309,7 +276,6 @@ public class CharacterAnalysisTab {
|
|||
taxonomyCCB.getItems().removeAll();
|
||||
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
||||
|
||||
// taxonomyCCB.getCheckModel().clearChecks();
|
||||
changing = false;
|
||||
taxonomyCCB.getCheckModel().clearChecks();
|
||||
for (Taxonomy t : checkedItemsTaxonomy) {
|
||||
|
@ -468,97 +434,6 @@ public class CharacterAnalysisTab {
|
|||
cancel.setVisible(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* case a: values for combo boxes can change after a corpus change
|
||||
* <ul>
|
||||
* <li>different corpus type - reset all fields so no old values remain</li>
|
||||
* <li>same corpus type, different subset - keep</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* case b: values for combo boxes can change after a header scan
|
||||
* <ul>
|
||||
* <li>at first, fields are populated by corpus type defaults</li>
|
||||
* <li>after, with gathered data</li>
|
||||
* </ul>
|
||||
* <p></p>
|
||||
* ngrams: 1
|
||||
* calculateFor: word
|
||||
* msd:
|
||||
* taxonomy:
|
||||
* skip: 0
|
||||
* iscvv: false
|
||||
* string length: 1
|
||||
*/
|
||||
// public void populateFields() {
|
||||
// // corpus changed if: current one is null (this is first run of the app)
|
||||
// // or if currentCorpus != gui's corpus
|
||||
// boolean corpusChanged = currentCorpusType == null
|
||||
// || currentCorpusType != corpus.getCorpusType();
|
||||
//
|
||||
// // TODO: check for GOS, GIGAFIDA, SOLAR...
|
||||
// // refresh and:
|
||||
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
||||
//// if (calculateFor == null) {
|
||||
//// calculateForRB.selectToggle(lemmaRB);
|
||||
//// calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString());
|
||||
//// }
|
||||
//
|
||||
// if (!filter.hasMsd()) {
|
||||
// // if current corpus doesn't have msd data, disable this field
|
||||
// msd = new ArrayList<>();
|
||||
// msdTF.setText("");
|
||||
// msdTF.setDisable(true);
|
||||
// logger.info("no msd data");
|
||||
// } else {
|
||||
// if (ValidationUtil.isEmpty(msd)
|
||||
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
||||
// // msd has not been set previously
|
||||
// // or msd has been set but the corpus changed -> reset
|
||||
// msd = new ArrayList<>();
|
||||
// msdTF.setText("");
|
||||
// msdTF.setDisable(false);
|
||||
// logger.info("msd reset");
|
||||
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
||||
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
|
||||
// msdTF.setText(StringUtils.join(msdStrings, " "));
|
||||
// msdTF.setDisable(false);
|
||||
// logger.info("msd kept");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
|
||||
//
|
||||
// // keep calculateCvv
|
||||
// calculatecvvCB.setSelected(calculateCvv);
|
||||
//
|
||||
// // keep string length if set
|
||||
// if (stringLength != null) {
|
||||
// stringLengthTF.setText(String.valueOf(stringLength));
|
||||
// } else {
|
||||
// stringLengthTF.setText("1");
|
||||
// stringLength = 1;
|
||||
// }
|
||||
//
|
||||
// // TODO: trigger on rescan
|
||||
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
||||
// // user changed corpus (by type) or by selection & triggered a rescan of headers
|
||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
//
|
||||
// currentCorpusType = corpus.getCorpusType();
|
||||
// // setTaxonomyIsDirty(false);
|
||||
// } else {
|
||||
//
|
||||
// }
|
||||
//
|
||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
||||
//
|
||||
// }
|
||||
|
||||
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
|
||||
Tooltip tooltip = new Tooltip();
|
||||
tooltip.textProperty().bind(stringBinding);
|
||||
|
@ -719,7 +594,6 @@ public class CharacterAnalysisTab {
|
|||
cancel.setVisible(true);
|
||||
}
|
||||
int i = 0;
|
||||
// DateFormat df = new SimpleDateFormat("hh:mm:ss");
|
||||
Date startTime = new Date();
|
||||
Date previousTime = new Date();
|
||||
int remainingSeconds = -1;
|
||||
|
@ -759,23 +633,16 @@ public class CharacterAnalysisTab {
|
|||
xml_processing.isCancelled = isCancelled();
|
||||
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
|
||||
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
|
||||
// updateProgress((iFinal * 100) + (double) observable, corpusFiles.size() * 100);
|
||||
}
|
||||
};
|
||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
|
||||
|
||||
|
||||
xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
||||
|
||||
// xml_processing.progressProperty().addListener((obs, oldProgress, newProgress) ->
|
||||
// updateProgress((iFinal * 100) + newProgress.doubleValue(), corpusFiles.size() * 100));
|
||||
}
|
||||
xml_processing.readXML(f.toString(), statistic);
|
||||
if (isCancelled()) {
|
||||
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
break;
|
||||
}
|
||||
// readXML(f.toString(), statistic, this, corpusFiles.size(), startTime, previousTime, i);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
@ -799,7 +666,6 @@ public class CharacterAnalysisTab {
|
|||
}
|
||||
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
cancel.setVisible(false);
|
||||
|
@ -810,7 +676,6 @@ public class CharacterAnalysisTab {
|
|||
logger.error("Error while executing", e);
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
cancel.setVisible(false);
|
||||
|
@ -820,7 +685,6 @@ public class CharacterAnalysisTab {
|
|||
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
cancel.setVisible(false);
|
||||
|
|
|
@ -2,10 +2,8 @@ package gui;
|
|||
|
||||
import static data.CorpusType.*;
|
||||
import static gui.GUIController.*;
|
||||
import static gui.Messages.*;
|
||||
import static util.Util.*;
|
||||
|
||||
import java.awt.*;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Constructor;
|
||||
|
@ -53,16 +51,11 @@ public class CorpusTab {
|
|||
|
||||
@FXML
|
||||
private Button chooseCorpusLocationB;
|
||||
private File chosenCorpusLocation;
|
||||
|
||||
@FXML
|
||||
private CheckBox readHeaderInfoChB;
|
||||
private boolean readHeaderInfo;
|
||||
|
||||
// @FXML
|
||||
// private CheckBox gosUseOrthChB;
|
||||
// private boolean gosUseOrth;
|
||||
|
||||
@FXML
|
||||
private Button chooseResultsLocationB;
|
||||
|
||||
|
@ -213,11 +206,8 @@ public class CorpusTab {
|
|||
|
||||
selectReaderCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||
if(newValue == null){
|
||||
// newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
|
||||
selectReaderCB.getSelectionModel().select(newValue);
|
||||
}
|
||||
// System.out.println(oldValue);
|
||||
// System.out.println(newValue);
|
||||
selectReader = newValue;
|
||||
selectReader();
|
||||
if(corpus != null && corpus.getCorpusType() != null) {
|
||||
|
@ -236,12 +226,9 @@ public class CorpusTab {
|
|||
// comma / point choice
|
||||
punctuationCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||
if(newValue == null){
|
||||
// newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
|
||||
newValue = I18N.getTranslatedValue(oldValue, PUNCTUATION);
|
||||
punctuationCB.getSelectionModel().select(newValue);
|
||||
}
|
||||
// System.out.println(oldValue);
|
||||
// System.out.println(newValue);
|
||||
punctuation = newValue;
|
||||
if(corpus != null) {
|
||||
corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
|
||||
|
@ -252,7 +239,6 @@ public class CorpusTab {
|
|||
|
||||
// add listeners
|
||||
chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation());
|
||||
// chooseCorpusLocationB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_chooseCorpusLocationB")));
|
||||
helpH.setOnAction(e -> openHelpWebsite());
|
||||
|
||||
readHeaderInfoChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
|
@ -262,18 +248,6 @@ public class CorpusTab {
|
|||
}
|
||||
logger.info("read headers: ", readHeaderInfo);
|
||||
});
|
||||
// readHeaderInfoChB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_readHeaderInfoChB")));
|
||||
|
||||
// gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
// gosUseOrth = newValue;
|
||||
// corpus.setGosOrthMode(gosUseOrth);
|
||||
//// wordFormationTab.setDisable(gosUseOrth);
|
||||
// satNew2Controller.toggleMode(null);
|
||||
// oneWordTabController.toggleMode(null);
|
||||
// catController.toggleMode(null);
|
||||
//
|
||||
// logger.info("gosUseOrth: ", gosUseOrth);
|
||||
// });
|
||||
|
||||
chooseResultsLocationB.setOnAction(e -> chooseResultsLocation(null));
|
||||
|
||||
|
@ -284,31 +258,12 @@ public class CorpusTab {
|
|||
I18N.setLocale(new Locale.Builder().setLanguage("sl").setRegion("SI").build());
|
||||
}
|
||||
Messages.reload();
|
||||
|
||||
// StringBuilder sb = new StringBuilder();
|
||||
// sb.append(corpusLocation)
|
||||
// .append("\n")
|
||||
// .append(String.format(I18N.get("message.NOTIFICATION_FOUND_X_FILES"), corpusFilesSize))
|
||||
// .append("\n")
|
||||
// .append(String.format(I18N.get("message.NOTIFICATION_CORPUS"), corpusType.toString()));
|
||||
//
|
||||
// chooseCorpusLabelContent = sb.toString();
|
||||
// chooseCorpusL.textProperty().unbind();
|
||||
// chooseCorpusL.setText(chooseCorpusLabelContent);
|
||||
Messages.updateChooseCorpusL();
|
||||
|
||||
logger.info("change language");
|
||||
});
|
||||
|
||||
// set labels and toggle visibility
|
||||
// toggleGosChBVisibility();
|
||||
|
||||
// chooseCorpusLabelContent = Messages.LABEL_CORPUS_LOCATION_NOT_SET;
|
||||
// chooseCorpusL.setText(chooseCorpusLabelContent);
|
||||
//
|
||||
// chooseResultsLabelContent = Messages.LABEL_RESULTS_LOCATION_NOT_SET;
|
||||
// chooseResultsL.setText(chooseResultsLabelContent);
|
||||
|
||||
togglePiAndSetCorpusWrapper(false);
|
||||
}
|
||||
|
||||
|
@ -391,11 +346,6 @@ public class CorpusTab {
|
|||
corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("vert", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
|
||||
Collection<File> corpusFilesRegi = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("regi", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
|
||||
|
||||
|
||||
// if (!checkRegiFile(corpusFilesRegi)){
|
||||
// return;
|
||||
// }
|
||||
|
||||
if (corpusFiles.size() == 0){
|
||||
logger.info("alert: ", I18N.get("message.WARNING_CORPUS_NOT_FOUND"));
|
||||
showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_CORPUS_NOT_FOUND"), null);
|
||||
|
@ -405,7 +355,6 @@ public class CorpusTab {
|
|||
corpusLocation = selectedDirectory.getAbsolutePath();
|
||||
corpusFilesSize = String.valueOf(corpusFiles.size());
|
||||
Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
|
||||
// corpusType = VERT;
|
||||
|
||||
corpus.setCorpusType(corpusType);
|
||||
|
||||
|
@ -446,12 +395,10 @@ public class CorpusTab {
|
|||
}
|
||||
|
||||
} else {
|
||||
// System.out.println(corpusLocation);
|
||||
corpusLocation = selectedDirectory.getAbsolutePath();
|
||||
corpusFilesSize = String.valueOf(corpusFiles.size());
|
||||
Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
|
||||
|
||||
// String chooseCorpusLabelContentTmp = detectCorpusType(corpusFiles);
|
||||
selectReader();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(corpusLocation)
|
||||
|
@ -491,7 +438,6 @@ public class CorpusTab {
|
|||
}
|
||||
}
|
||||
}
|
||||
// System.out.println(outputName);
|
||||
corpus.setCorpusName(outputName);
|
||||
corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
|
||||
}
|
||||
|
@ -534,7 +480,6 @@ public class CorpusTab {
|
|||
private void setResults() {
|
||||
// if everything is ok
|
||||
// check and enable checkbox if GOS
|
||||
// toggleGosChBVisibility();
|
||||
|
||||
// set default results location
|
||||
String defaultResultsLocationPath = corpus.getChosenCorpusLocation().getAbsolutePath();
|
||||
|
@ -543,28 +488,6 @@ public class CorpusTab {
|
|||
Messages.setChooseCorpusL(chooseCorpusL, chooseCorpusLabelContent);
|
||||
}
|
||||
|
||||
private boolean checkRegiFile(Collection<File> corpusFiles) {
|
||||
// CorpusType corpusType = corpus.getCorpusType();
|
||||
// Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
|
||||
|
||||
|
||||
for (File file : corpusFiles) {
|
||||
// try to open .regi file
|
||||
String regiPath = file.getAbsolutePath().substring(0, file.getAbsolutePath().length() - 4) + "regi";
|
||||
LineIterator regiIt;
|
||||
try {
|
||||
// read regi file
|
||||
regiIt = FileUtils.lineIterator(new File(regiPath), "UTF-8");
|
||||
LineIterator.closeQuietly(regiIt);
|
||||
} catch (IOException e) {
|
||||
GUIController.showAlert(Alert.AlertType.ERROR, String.format(I18N.get("message.ERROR_NO_REGI_FILE_FOUND"), regiPath));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
private void readHeaderInfo() {
|
||||
CorpusType corpusType = corpus.getCorpusType();
|
||||
Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
|
||||
|
@ -592,8 +515,6 @@ public class CorpusTab {
|
|||
i++;
|
||||
|
||||
if (corpusIsSplit) {
|
||||
// System.out.println(i);
|
||||
// System.out.println(corpusFiles.size());
|
||||
updateProgress(i, corpusFiles.size());
|
||||
}
|
||||
}
|
||||
|
@ -615,10 +536,7 @@ public class CorpusTab {
|
|||
characterLevelTab.setDisable(true);
|
||||
wordLevelTab.setDisable(true);
|
||||
filterTab.setDisable(true);
|
||||
// Messages.reload();
|
||||
Messages.updateChooseCorpusL();
|
||||
// chooseCorpusL.textProperty().bind(I18N.createStringBinding("message.LABEL_CORPUS_LOCATION_NOT_SET"));
|
||||
// chooseResultsL.textProperty().bind(I18N.createStringBinding("message.LABEL_RESULTS_LOCATION_NOT_SET"));
|
||||
|
||||
logger.info("No taxonomy found in headers.");
|
||||
GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_TAXONOMY_FOUND"));
|
||||
|
@ -749,12 +667,6 @@ public class CorpusTab {
|
|||
|
||||
task.setOnSucceeded(e -> {
|
||||
ObservableList<String> readTaxonomy = Tax.getTaxonomyForComboBox(corpusType, task.getValue());
|
||||
|
||||
// if (ValidationUtil.isEmpty(readTaxonomy)) {
|
||||
// // if no taxonomy found alert the user and keep other tabs disabled
|
||||
// logger.info("No vert filters found in headers.");
|
||||
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_SOLAR_FILTERS_FOUND"));
|
||||
// } else {
|
||||
// set taxonomy, update label
|
||||
corpus.setTaxonomy(readTaxonomy);
|
||||
corpus.setHeaderRead(true);
|
||||
|
@ -790,10 +702,7 @@ public class CorpusTab {
|
|||
characterLevelTab.setDisable(false);
|
||||
catController.setCorpus(corpus);
|
||||
catController.init();
|
||||
//wordFormationTab.setDisable(false);
|
||||
wordLevelTab.setDisable(false);
|
||||
//wfController.setCorpus(corpus);
|
||||
//wfController.init();
|
||||
wlController.setCorpus(corpus);
|
||||
wlController.init();
|
||||
|
||||
|
@ -824,13 +733,6 @@ public class CorpusTab {
|
|||
return directoryChooser.showDialog(stage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hides GOS related checkbox until needed.
|
||||
*/
|
||||
// private void toggleGosChBVisibility() {
|
||||
// gosUseOrthChB.setVisible(corpus != null && corpus.getCorpusType() != null && corpus.getCorpusType() == CorpusType.GOS);
|
||||
// }
|
||||
|
||||
private void selectReader() {
|
||||
switch (selectReader) {
|
||||
// "vert", "Solar", "GOS", "SSJ500K", "Gigafida", "Gigafida (old)", "Kres (old)"
|
||||
|
@ -940,18 +842,10 @@ public class CorpusTab {
|
|||
|
||||
public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
|
||||
|
||||
/*public void setWfController(WordFormationTab wfController) {
|
||||
this.wfController = wfController;
|
||||
}*/
|
||||
|
||||
public void setWlController(WordLevelTab wlController) {
|
||||
this.wlController = wlController;
|
||||
}
|
||||
|
||||
public void setWordFormationTab(Tab wordFormationTab) {
|
||||
this.wordFormationTab = wordFormationTab;
|
||||
}
|
||||
|
||||
public void setHostServices(HostServices hostServices){
|
||||
this.hostService = hostServices;
|
||||
}
|
||||
|
|
|
@ -52,21 +52,17 @@ public class FiltersForSolar {
|
|||
@FXML
|
||||
public Label solarFilters;
|
||||
@FXML
|
||||
public Label selectedFiltersL;
|
||||
@FXML
|
||||
public TextArea selectedFiltersTextArea;
|
||||
@FXML
|
||||
private Button changeLanguageB;
|
||||
@FXML
|
||||
private Hyperlink helpH;
|
||||
|
||||
// private HashMap<String, ObservableList<String>> selectedFilters;
|
||||
private Corpus corpus;
|
||||
|
||||
private StringAnalysisTabNew2 satNew2Controller;
|
||||
private OneWordAnalysisTab oneWordTabController;
|
||||
private CharacterAnalysisTab catController;
|
||||
//private WordFormationTab wfController;
|
||||
private WordLevelTab wlController;
|
||||
private HostServices hostService;
|
||||
|
||||
|
@ -172,7 +168,6 @@ public class FiltersForSolar {
|
|||
ArrayList<String> values = new ArrayList<>(entry.getValue());
|
||||
|
||||
if (!values.isEmpty()) {
|
||||
// allFilters.append(entry.getKey())
|
||||
allFilters.append(I18N.get(entry.getKey() + "L"))
|
||||
.append(": ");
|
||||
|
||||
|
@ -202,7 +197,6 @@ public class FiltersForSolar {
|
|||
satNew2Controller.setSolarFiltersMap(solarFiltersMap);
|
||||
oneWordTabController.setSolarFiltersMap(solarFiltersMap);
|
||||
catController.setSolarFiltersMap(solarFiltersMap);
|
||||
//wfController.setSolarFiltersMap(solarFiltersMap);
|
||||
wlController.setSolarFiltersMap(solarFiltersMap);
|
||||
}
|
||||
|
||||
|
@ -215,7 +209,6 @@ public class FiltersForSolar {
|
|||
satNew2Controller.setSelectedFiltersLabel(content);
|
||||
oneWordTabController.setSelectedFiltersLabel(content);
|
||||
catController.setSelectedFiltersLabel(content);
|
||||
//wfController.setSelectedFiltersLabel(content);
|
||||
wlController.setSelectedFiltersLabel(content);
|
||||
}
|
||||
|
||||
|
@ -230,10 +223,6 @@ public class FiltersForSolar {
|
|||
|
||||
public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
|
||||
|
||||
/*public void setWfController(WordFormationTab wfController) {
|
||||
this.wfController = wfController;
|
||||
}*/
|
||||
|
||||
public void setWlController(WordLevelTab wlController) {
|
||||
this.wlController = wlController;
|
||||
}
|
||||
|
|
|
@ -49,29 +49,11 @@ public class GUIController extends Application {
|
|||
@FXML
|
||||
private CorpusTab ctController;
|
||||
@FXML
|
||||
private Parent ct;
|
||||
//@FXML
|
||||
//private WordFormationTab wfController;
|
||||
@FXML
|
||||
private Parent wf;
|
||||
@FXML
|
||||
private WordLevelTab wlController;
|
||||
@FXML
|
||||
private Parent wl;
|
||||
@FXML
|
||||
private FiltersForSolar ffsController;
|
||||
@FXML
|
||||
private Parent ffs;
|
||||
@FXML
|
||||
private SelectedFiltersPane sfpController;
|
||||
@FXML
|
||||
private Parent sfp;
|
||||
@FXML
|
||||
public Tab stringLevelTab;
|
||||
@FXML
|
||||
public Tab wordLevelTab;
|
||||
/*@FXML
|
||||
public Tab wordFormationTab;*/
|
||||
|
||||
|
||||
@FXML
|
||||
|
@ -83,28 +65,9 @@ public class GUIController extends Application {
|
|||
|
||||
@Override
|
||||
public void start(Stage primaryStage) throws IOException {
|
||||
// File fileDir = new File("message_sl_unicode.properties");
|
||||
//
|
||||
// BufferedReader in = new BufferedReader(
|
||||
// new InputStreamReader(
|
||||
// new FileInputStream(fileDir), "UTF8"));
|
||||
//
|
||||
// String str;
|
||||
//
|
||||
// while ((str = in.readLine()) != null) {
|
||||
// System.out.println(str);
|
||||
// }
|
||||
//
|
||||
// in.close();
|
||||
|
||||
Parent root = FXMLLoader.load(getClass().getResource("/GUI.fxml"));
|
||||
// Parent root = FXMLLoader.load(ResourceLookup.resources.url("GUI.fxml"));
|
||||
// primaryStage.setTitle("Luščilnik");
|
||||
// StringBinding a = I18N.createStringBinding("window.title");
|
||||
primaryStage.titleProperty().bind(I18N.createStringBinding("window.title"));
|
||||
Scene scene = new Scene(root, 800, 600);
|
||||
// https://github.com/dicolar/jbootx
|
||||
// scene.getStylesheets().add(GUIController.class.getResource("bootstrap3.css").toExternalForm())
|
||||
primaryStage.setScene(scene);
|
||||
stage = primaryStage;
|
||||
primaryStage.show();
|
||||
|
@ -130,13 +93,10 @@ public class GUIController extends Application {
|
|||
ctController.setSatNew2Controller(satNew2Controller);
|
||||
ctController.setOneWordTabController(oneWordTabController);
|
||||
ctController.setCatController(catController);
|
||||
//ctController.setWfController(wfController);
|
||||
ctController.setWlController(wlController);
|
||||
ctController.setTabPane(tabPane);
|
||||
ctController.setFfsController(ffsController);
|
||||
//ctController.setWordFormationTab(wordFormationTab);
|
||||
ctController.setWordLevelTab(wordLevelTab);
|
||||
//System.out.println(com.sun.javafx.runtime.VersionInfo.getRuntimeVersion());
|
||||
|
||||
ctController.setHostServices(getHostServices());
|
||||
|
||||
|
@ -146,14 +106,11 @@ public class GUIController extends Application {
|
|||
oneWordTabController.setHostServices(getHostServices());
|
||||
catController.setCorpus(corpus);
|
||||
catController.setHostServices(getHostServices());
|
||||
//wfController.setCorpus(corpus);
|
||||
//wfController.setHostServices(getHostServices());
|
||||
wlController.setCorpus(corpus);
|
||||
wlController.setHostServices(getHostServices());
|
||||
ffsController.setSatNew2Controller(satNew2Controller);
|
||||
ffsController.setOneWordTabController(oneWordTabController);
|
||||
ffsController.setCatController(catController);
|
||||
//ffsController.setWfController(wfController);
|
||||
ffsController.setWlController(wlController);
|
||||
ffsController.setHostServices(getHostServices());
|
||||
|
||||
|
|
|
@ -1,17 +1,12 @@
|
|||
package gui;
|
||||
|
||||
import com.sun.javafx.collections.ObservableListWrapper;
|
||||
import javafx.beans.binding.Bindings;
|
||||
import javafx.beans.binding.ObjectBinding;
|
||||
import javafx.beans.binding.StringBinding;
|
||||
import javafx.beans.property.ObjectProperty;
|
||||
import javafx.beans.property.SimpleObjectProperty;
|
||||
import javafx.beans.value.ObservableValue;
|
||||
import javafx.collections.FXCollections;
|
||||
import javafx.collections.ObservableList;
|
||||
import javafx.scene.control.Button;
|
||||
import javafx.scene.control.Label;
|
||||
import javafx.scene.control.Tooltip;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.text.MessageFormat;
|
||||
|
@ -111,22 +106,6 @@ public final class I18N {
|
|||
return Bindings.createStringBinding(() -> get(key, args), locale);
|
||||
}
|
||||
|
||||
// public static ObservableValue<ObservableList<String>> createListStringBinding(final String key, Object... args) {
|
||||
// ObservableList<StringBinding> r = (ObservableList<StringBinding>) new ArrayList<StringBinding>();
|
||||
// r.add(Bindings.createStringBinding(() -> get(key, args), locale));
|
||||
// return r;
|
||||
// }
|
||||
|
||||
/**
|
||||
* creates a Object Binding to a localized Object that is computed by calling the given func
|
||||
*
|
||||
* @param func
|
||||
* function called on every change
|
||||
* @return StringBinding
|
||||
*/
|
||||
public static StringBinding createStringBinding(Callable<String> func) {
|
||||
return Bindings.createStringBinding(func, locale);
|
||||
}
|
||||
/**
|
||||
* creates a String binding to a localized String for the given message bundle key
|
||||
*
|
||||
|
@ -138,22 +117,6 @@ public final class I18N {
|
|||
return Bindings.createObjectBinding(() -> getObject(keys, args), locale);
|
||||
}
|
||||
|
||||
// public static ObservableValue<ObservableList<String>> createListStringBinding(final String key, Object... args) {
|
||||
// ObservableList<StringBinding> r = (ObservableList<StringBinding>) new ArrayList<StringBinding>();
|
||||
// r.add(Bindings.createStringBinding(() -> get(key, args), locale));
|
||||
// return r;
|
||||
// }
|
||||
|
||||
/**
|
||||
* creates a String Binding to a localized String that is computed by calling the given func
|
||||
*
|
||||
* @param func
|
||||
* function called on every change
|
||||
* @return ObjectBinding
|
||||
*/
|
||||
public static ObjectBinding createObjectBinding(Callable<String> func) {
|
||||
return Bindings.createObjectBinding(func, locale);
|
||||
}
|
||||
|
||||
public static String getIndependent(final String key, Locale locale, final Object... args) {
|
||||
ResourceBundle bundle = ResourceBundle.getBundle("message", locale);
|
||||
|
@ -164,7 +127,6 @@ public final class I18N {
|
|||
e.printStackTrace();
|
||||
}
|
||||
return val;
|
||||
// return MessageFormat.format(bundle.getString(key), args);
|
||||
}
|
||||
|
||||
public static String getRootValue(String oldValue, ArrayList<String> nGramComputeForLetters) {
|
||||
|
@ -230,35 +192,4 @@ public final class I18N {
|
|||
|
||||
return FXCollections.observableArrayList(translatedWords);
|
||||
}
|
||||
|
||||
/**
|
||||
* DUPLICATE OF toString()
|
||||
* searches for possible values in translations and returns key of the string
|
||||
* == .toString()
|
||||
*
|
||||
* @param w, prefix
|
||||
* function called on every change
|
||||
* @return ObjectBinding
|
||||
*/
|
||||
public static String findI18NString(String w, String prefix){
|
||||
ResourceBundle bundle = ResourceBundle.getBundle("message", getLocale());
|
||||
for (String key : bundle.keySet()){
|
||||
if(prefix.length() > key.length() || !key.substring(0, prefix.length()).equals(prefix)){
|
||||
continue;
|
||||
}
|
||||
String val = bundle.getString(key);
|
||||
try {
|
||||
String newVal = new String(val.getBytes("ISO-8859-1"), "UTF-8");
|
||||
|
||||
if (newVal.equals(w)){
|
||||
return key;
|
||||
}
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,35 +10,17 @@ import javafx.scene.control.Label;
|
|||
public class Messages {
|
||||
|
||||
// warnings & errors
|
||||
public static String WARNING_CORPUS_NOT_FOUND = I18N.get("message.WARNING_CORPUS_NOT_FOUND");
|
||||
public static String WARNING_RESULTS_DIR_NOT_VALID = I18N.get("message.WARNING_RESULTS_DIR_NOT_VALID");
|
||||
public static String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS = I18N.get("message.WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS");
|
||||
public static String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS_INFO = I18N.get("message.WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS_INFO");
|
||||
public static String WARNING_WORD_OR_LEMMA = I18N.get("message.WARNING_WORD_OR_LEMMA");
|
||||
public static String WARNING_ONLY_NUMBERS_ALLOWED = I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED");
|
||||
public static String WARNING_NUMBER_TOO_BIG = I18N.get("message.WARNING_NUMBER_TOO_BIG");
|
||||
public static String WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES = I18N.get("message.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES");
|
||||
public static String WARNING_MISSING_STRING_LENGTH = I18N.get("message.WARNING_MISSING_STRING_LENGTH");
|
||||
public static String WARNING_NO_TAXONOMY_FOUND = I18N.get("message.WARNING_NO_TAXONOMY_FOUND");
|
||||
public static String WARNING_NO_SOLAR_FILTERS_FOUND = I18N.get("message.WARNING_NO_SOLAR_FILTERS_FOUND");
|
||||
public static String ERROR_WHILE_EXECUTING = I18N.get("message.ERROR_WHILE_EXECUTING");
|
||||
public static String ERROR_WHILE_SAVING_RESULTS_TO_CSV = I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV");
|
||||
public static String ERROR_NOT_ENOUGH_MEMORY= I18N.get("message.ERROR_NOT_ENOUGH_MEMORY");
|
||||
|
||||
// missing
|
||||
public static String MISSING_NGRAM_LEVEL = I18N.get("message.MISSING_NGRAM_LEVEL");
|
||||
public static String MISSING_CALCULATE_FOR = I18N.get("message.MISSING_CALCULATE_FOR");
|
||||
public static String MISSING_SKIP = I18N.get("message.MISSING_SKIP");
|
||||
public static String MISSING_STRING_LENGTH = I18N.get("message.MISSING_STRING_LENGTH");
|
||||
public static String MISMATCHED_STRING_LENGTH_AND_MSD_REGEX = I18N.get("message.MISMATCHED_STRING_LENGTH_AND_MSD_REGEX");
|
||||
|
||||
|
||||
// general notifications - static content/set only once
|
||||
public static String NOTIFICATION_FOUND_X_FILES = I18N.get("message.NOTIFICATION_FOUND_X_FILES");
|
||||
public static String NOTIFICATION_ANALYSIS_COMPLETED = I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED");
|
||||
public static String NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS = I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS");
|
||||
public static String RESULTS_PATH_SET_TO_DEFAULT = I18N.get("message.RESULTS_PATH_SET_TO_DEFAULT");
|
||||
public static String NOTIFICATION_ANALYSIS_CANCLED = I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED");
|
||||
|
||||
// ongoing notifications - displayed while processing, dynamically changing
|
||||
public static String ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y = I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y");
|
||||
|
@ -47,14 +29,7 @@ public class Messages {
|
|||
// Labels
|
||||
public static String LABEL_CORPUS_LOCATION_NOT_SET = I18N.get("message.LABEL_CORPUS_LOCATION_NOT_SET");
|
||||
public static String LABEL_RESULTS_LOCATION_NOT_SET = I18N.get("message.LABEL_RESULTS_LOCATION_NOT_SET");
|
||||
public static String LABEL_RESULTS_CORPUS_TYPE_NOT_SET = I18N.get("message.LABEL_RESULTS_CORPUS_TYPE_NOT_SET");
|
||||
|
||||
public static String LABEL_SCANNING_CORPUS = I18N.get("message.LABEL_SCANNING_CORPUS");
|
||||
public static String LABEL_SCANNING_SINGLE_FILE_CORPUS = I18N.get("message.LABEL_SCANNING_SINGLE_FILE_CORPUS");
|
||||
public static String COMPLETED = I18N.get("message.COMPLETED");
|
||||
|
||||
// public static String TOOLTIP_chooseCorpusLocationB = I18N.get("message.TOOLTIP_chooseCorpusLocationB");
|
||||
// public static String TOOLTIP_readHeaderInfoChB = I18N.get("message.TOOLTIP_readHeaderInfoChB");
|
||||
public static String TOOLTIP_readNotePunctuationsChB = I18N.get("message.TOOLTIP_readNotePunctuationsChB");
|
||||
public static String TOOLTIP_readDisplayTaxonomyChB = I18N.get("message.TOOLTIP_readDisplayTaxonomyChB");
|
||||
|
||||
|
|
|
@ -1,12 +1,8 @@
|
|||
package gui;
|
||||
|
||||
import alg.XML_processing;
|
||||
import data.*;
|
||||
import javafx.application.HostServices;
|
||||
import javafx.beans.InvalidationListener;
|
||||
import javafx.beans.Observable;
|
||||
import javafx.beans.binding.StringBinding;
|
||||
import javafx.beans.property.ReadOnlyDoubleWrapper;
|
||||
import javafx.beans.value.ChangeListener;
|
||||
import javafx.beans.value.ObservableValue;
|
||||
import javafx.collections.ListChangeListener;
|
||||
|
@ -24,12 +20,10 @@ import javafx.scene.image.ImageView;
|
|||
import util.Tasks;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static alg.XML_processing.readXML;
|
||||
import static gui.GUIController.showAlert;
|
||||
|
||||
@SuppressWarnings("Duplicates")
|
||||
|
@ -38,7 +32,6 @@ public class OneWordAnalysisTab {
|
|||
|
||||
@FXML
|
||||
private AnchorPane oneWordAnalysisTabPane;
|
||||
// private ArrayList<String> alsoVisualize;
|
||||
|
||||
@FXML
|
||||
public TextArea selectedFiltersTextArea;
|
||||
|
@ -197,43 +190,22 @@ public class OneWordAnalysisTab {
|
|||
private ChangeListener<Boolean> minimalTaxonomyListener;
|
||||
private ChangeListener<Boolean> minimalRelFreListener;
|
||||
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
||||
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
||||
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
||||
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
|
||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
|
||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY));
|
||||
|
||||
|
@ -242,9 +214,6 @@ public class OneWordAnalysisTab {
|
|||
|
||||
// TODO: pass observables for taxonomy based on header scan
|
||||
// after header scan
|
||||
private ObservableList<String> taxonomyCCBValues;
|
||||
private CorpusType currentCorpusType;
|
||||
|
||||
public void init() {
|
||||
// add CSS style
|
||||
oneWordAnalysisTabPane.getStylesheets().add("style.css");
|
||||
|
@ -339,9 +308,6 @@ public class OneWordAnalysisTab {
|
|||
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
||||
}
|
||||
};
|
||||
// alsoVisualizeCCB.getCheckModel().clearChecks();
|
||||
// alsoVisualizeCCB.getItems().removeAll();
|
||||
// alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
|
||||
|
||||
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
|
||||
|
||||
|
@ -462,18 +428,14 @@ public class OneWordAnalysisTab {
|
|||
public void onChanged(Change<? extends String> c) {
|
||||
if (changing) {
|
||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
|
||||
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
|
||||
|
||||
// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
|
||||
|
||||
taxonomy = new ArrayList<>();
|
||||
taxonomy.addAll(checkedItemsTaxonomy);
|
||||
|
||||
taxonomyCCB.getItems().removeAll();
|
||||
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
||||
|
||||
// taxonomyCCB.getCheckModel().clearChecks();
|
||||
changing = false;
|
||||
taxonomyCCB.getCheckModel().clearChecks();
|
||||
for (Taxonomy t : checkedItemsTaxonomy) {
|
||||
|
@ -527,7 +489,6 @@ public class OneWordAnalysisTab {
|
|||
writeMsdAtTheEnd = newValue;
|
||||
logger.info("write msd at the end: ", writeMsdAtTheEnd);
|
||||
});
|
||||
// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
|
||||
|
||||
notePunctuations = false;
|
||||
// set
|
||||
|
@ -653,84 +614,6 @@ public class OneWordAnalysisTab {
|
|||
cancel.setVisible(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* case a: values for combo boxes can change after a corpus change
|
||||
* <ul>
|
||||
* <li>different corpus type - reset all fields so no old values remain</li>
|
||||
* <li>same corpus type, different subset - keep</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* case b: values for combo boxes can change after a header scan
|
||||
* <ul>
|
||||
* <li>at first, fields are populated by corpus type defaults</li>
|
||||
* <li>after, with gathered data</li>
|
||||
* </ul>
|
||||
* <p></p>
|
||||
* ngrams: 1
|
||||
* calculateFor: word
|
||||
* msd:
|
||||
* taxonomy:
|
||||
* skip: 0
|
||||
* iscvv: false
|
||||
* string length: 1
|
||||
*/
|
||||
// public void populateFields() {
|
||||
// // corpus changed if: current one is null (this is first run of the app)
|
||||
// // or if currentCorpus != gui's corpus
|
||||
// boolean corpusChanged = currentCorpusType == null
|
||||
// || currentCorpusType != corpus.getCorpusType();
|
||||
//
|
||||
//
|
||||
// // TODO: check for GOS, GIGAFIDA, SOLAR...
|
||||
// // refresh and:
|
||||
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
||||
// if (calculateFor == null) {
|
||||
// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
|
||||
// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
|
||||
// }
|
||||
//
|
||||
// if (!filter.hasMsd()) {
|
||||
// // if current corpus doesn't have msd data, disable this field
|
||||
// msd = new ArrayList<>();
|
||||
// msdTF.setText("");
|
||||
// msdTF.setDisable(true);
|
||||
// logger.info("no msd data");
|
||||
// } else {
|
||||
// if (ValidationUtil.isEmpty(msd)
|
||||
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
||||
// // msd has not been set previously
|
||||
// // or msd has been set but the corpus changed -> reset
|
||||
// msd = new ArrayList<>();
|
||||
// msdTF.setText("");
|
||||
// msdTF.setDisable(false);
|
||||
// logger.info("msd reset");
|
||||
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
||||
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
|
||||
// msdTF.setText(StringUtils.join(msdStrings, " "));
|
||||
// msdTF.setDisable(false);
|
||||
// logger.info("msd kept");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // TODO: trigger on rescan
|
||||
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
||||
// // user changed corpus (by type) or by selection & triggered a rescan of headers
|
||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
//
|
||||
// currentCorpusType = corpus.getCorpusType();
|
||||
// // setTaxonomyIsDirty(false);
|
||||
// } else {
|
||||
//
|
||||
// }
|
||||
//
|
||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
||||
//
|
||||
// }
|
||||
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
|
||||
Tooltip tooltip = new Tooltip();
|
||||
tooltip.textProperty().bind(stringBinding);
|
||||
|
@ -819,7 +702,6 @@ public class OneWordAnalysisTab {
|
|||
filter.setSolarFilters(solarFiltersMap);
|
||||
filter.setStringLength(1);
|
||||
filter.setMultipleKeys(alsoVisualize);
|
||||
// filter.setNotePunctuations(true);
|
||||
filter.setNotePunctuations(notePunctuations);
|
||||
|
||||
// setMsd must be behind alsoVisualize
|
||||
|
@ -878,136 +760,14 @@ public class OneWordAnalysisTab {
|
|||
private void execute(StatisticsNew statistic) {
|
||||
logger.info("Started execution: ", statistic.getFilter());
|
||||
|
||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||
|
||||
// final Task<Void> task = new Task<Void>() {
|
||||
// @SuppressWarnings("Duplicates")
|
||||
// @Override
|
||||
// protected Void call() throws Exception {
|
||||
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
|
||||
// if(multipleFiles){
|
||||
// cancel.setVisible(true);
|
||||
// }
|
||||
// int i = 0;
|
||||
// Date startTime = new Date();
|
||||
// Date previousTime = new Date();
|
||||
// int remainingSeconds = -1;
|
||||
// for (File f : corpusFiles) {
|
||||
// final int iFinal = i;
|
||||
// XML_processing xml_processing = new XML_processing();
|
||||
// xml_processing.isCancelled = false;
|
||||
// i++;
|
||||
// if(xml_processing.progressBarListener != null) {
|
||||
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
|
||||
// }
|
||||
// if (multipleFiles) {
|
||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
||||
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
|
||||
// previousTime = new Date();
|
||||
// }
|
||||
// this.updateProgress(i, corpusFiles.size());
|
||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
|
||||
//// if (isCancelled()) {
|
||||
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
//// break;
|
||||
//// }
|
||||
// } else {
|
||||
//
|
||||
// xml_processing.progressBarListener = new InvalidationListener() {
|
||||
// int remainingSeconds = -1;
|
||||
// Date previousTime = new Date();
|
||||
// @Override
|
||||
// public void invalidated(Observable observable) {
|
||||
// cancel.setVisible(true);
|
||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
||||
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
||||
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
||||
// ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
||||
// previousTime = new Date();
|
||||
// }
|
||||
// xml_processing.isCancelled = isCancelled();
|
||||
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
|
||||
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
||||
// }
|
||||
// xml_processing.readXML(f.toString(), statistic);
|
||||
// if (isCancelled()) {
|
||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return null;
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
// progressLabel.textProperty().bind(task.messageProperty());
|
||||
//
|
||||
// task.setOnSucceeded(e -> {
|
||||
// try {
|
||||
// boolean successullySaved = statistic.saveResultToDisk();
|
||||
// if (successullySaved) {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
||||
// } else {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
|
||||
// }
|
||||
// } catch (UnsupportedEncodingException e1) {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
|
||||
// logger.error("Error while saving", e1);
|
||||
// }
|
||||
//
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// task.setOnFailed(e -> {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
||||
// logger.error("Error while executing", e);
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// task.setOnCancelled(e -> {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// // When cancel button is pressed cancel analysis
|
||||
// cancel.setOnAction(e -> {
|
||||
// task.cancel();
|
||||
// logger.info("cancel button");
|
||||
// });
|
||||
|
||||
// final Thread thread = new Thread(task, "task");
|
||||
// thread.setDaemon(true);
|
||||
// thread.start();
|
||||
|
||||
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
|
||||
if (statistic.getFilter().getMinimalRelFre() > 1){
|
||||
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
|
||||
// final Task<Void> mainTask = prepareTaskForMinRelFre(statistic);
|
||||
final Thread thread = new Thread(mainTask, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
} else {
|
||||
final Task<Void> mainTask = t.prepareMainTask(statistic);
|
||||
// final Task<Void> mainTask = prepareMainTask(statistic);
|
||||
final Thread thread = new Thread(mainTask, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
package gui;
|
||||
|
||||
import javafx.scene.control.Label;
|
||||
|
||||
public class SelectedFiltersPane {
|
||||
|
||||
|
||||
public Label selectedFiltersLabel;
|
||||
|
||||
public Label getSelectedFiltersLabel() {
|
||||
return selectedFiltersLabel;
|
||||
}
|
||||
|
||||
public void setSelectedFiltersLabel(String filters) {
|
||||
this.selectedFiltersLabel = new Label(filters);
|
||||
this.selectedFiltersLabel.setText("test?");
|
||||
}
|
||||
}
|
|
@ -1,21 +1,12 @@
|
|||
package gui;
|
||||
|
||||
import static alg.XML_processing.*;
|
||||
import static gui.GUIController.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import alg.XML_processing;
|
||||
import javafx.application.HostServices;
|
||||
import javafx.beans.InvalidationListener;
|
||||
import javafx.beans.Observable;
|
||||
import javafx.beans.binding.StringBinding;
|
||||
import javafx.beans.property.ReadOnlyDoubleWrapper;
|
||||
import javafx.beans.value.ChangeListener;
|
||||
import javafx.beans.value.ObservableValue;
|
||||
import javafx.scene.image.ImageView;
|
||||
|
@ -147,15 +138,6 @@ public class StringAnalysisTabNew2 {
|
|||
@FXML
|
||||
private CheckComboBox<String> taxonomyCCB;
|
||||
private ArrayList<Taxonomy> taxonomy;
|
||||
//
|
||||
// @FXML
|
||||
// private CheckBox calculatecvvCB;
|
||||
// private boolean calculateCvv;
|
||||
|
||||
// @FXML
|
||||
// private TextField stringLengthTF;
|
||||
// private Integer stringLength;
|
||||
|
||||
@FXML
|
||||
private ComboBox<String> calculateForCB;
|
||||
private CalculateFor calculateFor;
|
||||
|
@ -225,8 +207,6 @@ public class StringAnalysisTabNew2 {
|
|||
|
||||
private Corpus corpus;
|
||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||
private Filter filter;
|
||||
private boolean useDb;
|
||||
private HostServices hostService;
|
||||
private ListChangeListener<String> taxonomyListener;
|
||||
private ListChangeListener<String> alsoVisualizeListener;
|
||||
|
@ -236,44 +216,25 @@ public class StringAnalysisTabNew2 {
|
|||
private ChangeListener<Boolean> minimalOccurrencesListener;
|
||||
private ChangeListener<Boolean> minimalTaxonomyListener;
|
||||
private ChangeListener<Boolean> minimalRelFreListener;
|
||||
private boolean useDb;
|
||||
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
||||
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
||||
private static final ObservableList<String> COLLOCABILITY_ITEMS = FXCollections.observableArrayList("Dice", "t-score", "MI", "MI3", "logDice", "simple LL");
|
||||
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
||||
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
|
||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
|
||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY));
|
||||
|
||||
|
@ -282,9 +243,6 @@ public class StringAnalysisTabNew2 {
|
|||
|
||||
// TODO: pass observables for taxonomy based on header scan
|
||||
// after header scan
|
||||
private ObservableList<String> taxonomyCCBValues;
|
||||
private CorpusType currentCorpusType;
|
||||
|
||||
public void init() {
|
||||
// add CSS style
|
||||
stringAnalysisTabPaneNew2.getStylesheets().add("style.css");
|
||||
|
@ -420,13 +378,6 @@ public class StringAnalysisTabNew2 {
|
|||
} else {
|
||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_EMPTY));
|
||||
}
|
||||
// alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||
// alsoVisualize = new ArrayList<>();
|
||||
// ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
|
||||
// alsoVisualize.addAll(checkedItems);
|
||||
// logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
||||
// });
|
||||
// alsoVisualizeCCB.getCheckModel().clearChecks();
|
||||
|
||||
alsoVisualizeListener = new ListChangeListener<String>() {
|
||||
@Override
|
||||
|
@ -437,9 +388,6 @@ public class StringAnalysisTabNew2 {
|
|||
logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
|
||||
}
|
||||
};
|
||||
// alsoVisualizeCCB.getCheckModel().clearChecks();
|
||||
// alsoVisualizeCCB.getItems().removeAll();
|
||||
// alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS));
|
||||
|
||||
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
|
||||
|
||||
|
@ -500,16 +448,6 @@ public class StringAnalysisTabNew2 {
|
|||
collocabilityCCB.setDisable(false);
|
||||
|
||||
collocabilityCCB.getCheckModel().getCheckedItems().addListener(collocabilityListener);
|
||||
// collocabilityCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||
// collocability = new ArrayList<>();
|
||||
// ObservableList<Collocability> checkedItems = FXCollections.observableArrayList();
|
||||
// for (String el : collocabilityCCB.getCheckModel().getCheckedItems()) {
|
||||
// checkedItems.add(Collocability.factory(el));
|
||||
// }
|
||||
// collocability.addAll(checkedItems);
|
||||
// logger.info(String.format("Selected collocabilities: %s", StringUtils.join(collocabilityCCB.getCheckModel().getCheckedItems(), ",")));
|
||||
// });
|
||||
|
||||
|
||||
// msd
|
||||
if (msdListener != null){
|
||||
|
@ -595,9 +533,6 @@ public class StringAnalysisTabNew2 {
|
|||
public void onChanged(ListChangeListener.Change<? extends String> c){
|
||||
if(changing) {
|
||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
|
||||
//
|
||||
// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
|
||||
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
|
||||
|
||||
taxonomy = new ArrayList<>();
|
||||
|
@ -606,7 +541,6 @@ public class StringAnalysisTabNew2 {
|
|||
taxonomyCCB.getItems().removeAll();
|
||||
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
||||
|
||||
// taxonomyCCB.getCheckModel().clearChecks();
|
||||
changing = false;
|
||||
taxonomyCCB.getCheckModel().clearChecks();
|
||||
for (Taxonomy t : checkedItemsTaxonomy) {
|
||||
|
@ -646,32 +580,6 @@ public class StringAnalysisTabNew2 {
|
|||
skipValue = 0;
|
||||
|
||||
// cvv
|
||||
// calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
// calculateCvv = newValue;
|
||||
// logger.info("calculate cvv: " + calculateCvv);
|
||||
// });
|
||||
|
||||
// calculatecvvCB.setSelected(false);
|
||||
|
||||
// string length
|
||||
// stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
// if (!newValue) {
|
||||
// // focus lost
|
||||
// String value = stringLengthTF.getText();
|
||||
// if (!ValidationUtil.isEmpty(value)) {
|
||||
// if (!ValidationUtil.isNumber(value)) {
|
||||
// logAlert("stringlengthTf: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
||||
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
||||
// }
|
||||
// stringLength = Integer.parseInt(value);
|
||||
// } else {
|
||||
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_MISSING_STRING_LENGTH"));
|
||||
// stringLengthTF.setText("1");
|
||||
// logAlert(I18N.get("message.WARNING_MISSING_STRING_LENGTH"));
|
||||
// }
|
||||
// }
|
||||
// });
|
||||
|
||||
minimalOccurrencesTF.setText("1");
|
||||
minimalOccurrences = 1;
|
||||
|
||||
|
@ -781,108 +689,6 @@ public class StringAnalysisTabNew2 {
|
|||
cancel.setVisible(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* case a: values for combo boxes can change after a corpus change
|
||||
* <ul>
|
||||
* <li>different corpus type - reset all fields so no old values remain</li>
|
||||
* <li>same corpus type, different subset - keep</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* case b: values for combo boxes can change after a header scan
|
||||
* <ul>
|
||||
* <li>at first, fields are populated by corpus type defaults</li>
|
||||
* <li>after, with gathered data</li>
|
||||
* </ul>
|
||||
* <p></p>
|
||||
* ngrams: 1
|
||||
* calculateFor: word
|
||||
* msd:
|
||||
* taxonomy:
|
||||
* skip: 0
|
||||
* iscvv: false
|
||||
* string length: 1
|
||||
*/
|
||||
// public void populateFields() {
|
||||
// // corpus changed if: current one is null (this is first run of the app)
|
||||
// // or if currentCorpus != gui's corpus
|
||||
// boolean corpusChanged = currentCorpusType == null
|
||||
// || currentCorpusType != corpus.getCorpusType();
|
||||
//
|
||||
// // keep ngram value if set
|
||||
// if (ngramValue == null) {
|
||||
// ngramValueCB.getSelectionModel().select("1");
|
||||
// ngramValue = 1;
|
||||
// }
|
||||
//
|
||||
// // TODO: check for GOS, GIGAFIDA, SOLAR...
|
||||
// // refresh and:
|
||||
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
||||
// if (calculateFor == null) {
|
||||
// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
|
||||
// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
|
||||
// }
|
||||
//
|
||||
// if (!filter.hasMsd()) {
|
||||
// // if current corpus doesn't have msd data, disable this field
|
||||
// msd = new ArrayList<>();
|
||||
// msdTF.setText("");
|
||||
// msdTF.setDisable(true);
|
||||
// logger.info("no msd data");
|
||||
// } else {
|
||||
// if (ValidationUtil.isEmpty(msd)
|
||||
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
||||
// // msd has not been set previously
|
||||
// // or msd has been set but the corpus changed -> reset
|
||||
// msd = new ArrayList<>();
|
||||
// msdTF.setText("");
|
||||
// msdTF.setDisable(false);
|
||||
// logger.info("msd reset");
|
||||
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
||||
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
|
||||
// msdTF.setText(StringUtils.join(msdStrings, " "));
|
||||
// msdTF.setDisable(false);
|
||||
// logger.info("msd kept");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
|
||||
//
|
||||
// // keep skip value
|
||||
// if (skipValue == null) {
|
||||
// skipValueCB.getSelectionModel().select("0");
|
||||
// skipValue = 0;
|
||||
// }
|
||||
//
|
||||
// // keep calculateCvv
|
||||
// calculatecvvCB.setSelected(calculateCvv);
|
||||
//
|
||||
// // keep string length if set
|
||||
// if (stringLength != null) {
|
||||
// stringLengthTF.setText(String.valueOf(stringLength));
|
||||
// } else {
|
||||
// stringLengthTF.setText("1");
|
||||
// stringLength = 1;
|
||||
// }
|
||||
//
|
||||
// // TODO: trigger on rescan
|
||||
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
||||
// // user changed corpus (by type) or by selection & triggered a rescan of headers
|
||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
//
|
||||
// currentCorpusType = corpus.getCorpusType();
|
||||
// // setTaxonomyIsDirty(false);
|
||||
// } else {
|
||||
//
|
||||
// }
|
||||
//
|
||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
||||
//
|
||||
// }
|
||||
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
|
||||
Tooltip tooltip = new Tooltip();
|
||||
tooltip.textProperty().bind(stringBinding);
|
||||
|
@ -898,7 +704,6 @@ public class StringAnalysisTabNew2 {
|
|||
calculateForL.textProperty().bind(I18N.createStringBinding("label.calculateFor"));
|
||||
alsoVisualizeL.textProperty().bind(I18N.createStringBinding("label.alsoVisualize"));
|
||||
displayTaxonomyL.textProperty().bind(I18N.createStringBinding("label.displayTaxonomy"));
|
||||
// writeMsdAtTheEndL.textProperty().bind(I18N.createStringBinding("label.writeMsdAtTheEnd"));
|
||||
skipValueL.textProperty().bind(I18N.createStringBinding("label.skipValue"));
|
||||
slowSpeedWarning1L.textProperty().bind(I18N.createStringBinding("label.slowSpeedWarning"));
|
||||
slowSpeedWarning2L.textProperty().bind(I18N.createStringBinding("label.slowSpeedWarning"));
|
||||
|
@ -948,10 +753,6 @@ public class StringAnalysisTabNew2 {
|
|||
|
||||
if (mode == MODE.WORD) {
|
||||
paneWords.setVisible(true);
|
||||
// paneLetters.setVisible(false);
|
||||
// if (corpus.getCorpusType() == CorpusType.GOS)
|
||||
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
|
||||
// else
|
||||
if (corpus.getCorpusType() == CorpusType.GOS) {
|
||||
calculateForCB.itemsProperty().unbind();
|
||||
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_GOS));
|
||||
|
@ -980,7 +781,6 @@ public class StringAnalysisTabNew2 {
|
|||
filter.setDisplayTaxonomy(displayTaxonomy);
|
||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||
filter.setSkipValue(skipValue);
|
||||
// filter.setIsCvv(calculateCvv);
|
||||
filter.setSolarFilters(solarFiltersMap);
|
||||
filter.setNotePunctuations(notePunctuations);
|
||||
filter.setMultipleKeys(alsoVisualize);
|
||||
|
@ -993,16 +793,11 @@ public class StringAnalysisTabNew2 {
|
|||
filter.setCollocability(collocability);
|
||||
filter.setTaxonomySetOperation(taxonomySetOperation);
|
||||
|
||||
// if (ngramValue != null && ngramValue == 0) {
|
||||
// filter.setStringLength(stringLength);
|
||||
// }
|
||||
|
||||
String message = Validation.validateForStringLevel(filter);
|
||||
if (message == null) {
|
||||
// no errors
|
||||
logger.info("Executing: ", filter.toString());
|
||||
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
||||
// ADD THINGS HERE!!!
|
||||
execute(statistic);
|
||||
} else {
|
||||
logAlert(message);
|
||||
|
@ -1043,547 +838,6 @@ public class StringAnalysisTabNew2 {
|
|||
}
|
||||
}
|
||||
|
||||
// public void calculate_collocabilities(StatisticsNew statistics, StatisticsNew oneWordStatistics) {
|
||||
// statistics.updateCalculateCollocabilities(oneWordStatistics);
|
||||
//
|
||||
// }
|
||||
|
||||
// private final Task<Void> prepareTaskForMinRelFre(StatisticsNew statistic) {
|
||||
// Filter f = statistic.getFilter();
|
||||
// logger.info("Started execution: ", f);
|
||||
// Task<Void> task_collocability = null;
|
||||
//
|
||||
// try{
|
||||
// Filter f2 = (Filter) f.clone();
|
||||
// f2.setIsMinimalRelFreScraper(true);
|
||||
// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb);
|
||||
//
|
||||
//
|
||||
//// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f, useDb);
|
||||
//
|
||||
// Collection<File> corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles();
|
||||
//
|
||||
// final Task<Void> task = new Task<Void>() {
|
||||
// @SuppressWarnings("Duplicates")
|
||||
// @Override
|
||||
// protected Void call() throws Exception {
|
||||
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statisticsMinRelFre.getCorpus().getCorpusType());
|
||||
// if(multipleFiles){
|
||||
// cancel.setVisible(true);
|
||||
// }
|
||||
// Date startTime = new Date();
|
||||
// Date previousTime = new Date();
|
||||
// int remainingSeconds = -1;
|
||||
// int corpusSize;
|
||||
// int i;
|
||||
// if(statistic.getFilter().getCollocability().size() > 0){
|
||||
// i = 0;
|
||||
// corpusSize = corpusFiles.size() * 3;
|
||||
// } else {
|
||||
// i = 0;
|
||||
// corpusSize = corpusFiles.size() * 2;
|
||||
// }
|
||||
// for (File f : corpusFiles) {
|
||||
// final int iFinal = i;
|
||||
// XML_processing xml_processing = new XML_processing();
|
||||
// xml_processing.isCancelled = false;
|
||||
// i++;
|
||||
// if(xml_processing.progressBarListener != null) {
|
||||
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
|
||||
// }
|
||||
// if (multipleFiles) {
|
||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
||||
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
|
||||
// previousTime = new Date();
|
||||
// }
|
||||
// this.updateProgress(i, corpusSize);
|
||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
||||
//// if (isCancelled()) {
|
||||
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
//// break;
|
||||
//// }
|
||||
// } else {
|
||||
// xml_processing.progressBarListener = new InvalidationListener() {
|
||||
// int remainingSeconds = -1;
|
||||
// Date previousTime = new Date();
|
||||
// @Override
|
||||
// public void invalidated(Observable observable) {
|
||||
// cancel.setVisible(true);
|
||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
||||
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
||||
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
||||
// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
||||
//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
|
||||
//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
|
||||
//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
|
||||
//// System.out.println(remainingSeconds);
|
||||
// previousTime = new Date();
|
||||
// }
|
||||
// xml_processing.isCancelled = isCancelled();
|
||||
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
|
||||
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
||||
// }
|
||||
// xml_processing.readXML(f.toString(), statisticsMinRelFre);
|
||||
// if (isCancelled()) {
|
||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
// break;
|
||||
// }
|
||||
// if(!(multipleFiles)){
|
||||
// cancel.setVisible(false);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // add remaining minRelFre results
|
||||
// if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) {
|
||||
//// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
|
||||
// long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue();
|
||||
// double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
||||
//
|
||||
// statisticsMinRelFre.updateMinimalRelFre(statisticsMinRelFre.getTaxonomyResult().get(statisticsMinRelFre.getCorpus().getTotal()).entrySet(), absToRelFactor);
|
||||
//
|
||||
// // reset all values
|
||||
// for(Taxonomy taxonomy : statisticsMinRelFre.getTaxonomyResult().keySet()){
|
||||
// statisticsMinRelFre.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>());
|
||||
// }
|
||||
// for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){
|
||||
// statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
|
||||
// }
|
||||
//
|
||||
//// System.out.println("asd");
|
||||
// }
|
||||
//
|
||||
// return null;
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
// progressLabel.textProperty().bind(task.messageProperty());
|
||||
// task.setOnSucceeded(e -> {
|
||||
// statistic.updateMinimalRelFre(statisticsMinRelFre.getMinimalRelFreNgrams(), statisticsMinRelFre.getMinimalRelFre1grams());
|
||||
// final Task<Void> taskCollocability = prepareMainTask(statistic);
|
||||
// final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
|
||||
// thread_collocability.setDaemon(true);
|
||||
// thread_collocability.start();
|
||||
// });
|
||||
//
|
||||
// task.setOnFailed(e -> {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
||||
// logger.error("Error while executing", e);
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
// // ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// task.setOnCancelled(e -> {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
// // ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// // When cancel button is pressed cancel analysis
|
||||
// cancel.setOnAction(e -> {
|
||||
// task.cancel();
|
||||
// logger.info("cancel button");
|
||||
// });
|
||||
//
|
||||
// return task;
|
||||
// }catch(CloneNotSupportedException c){ return null; }
|
||||
// }
|
||||
//
|
||||
// private final Task<Void> prepareMainTask(StatisticsNew statistic) {
|
||||
// Filter f = statistic.getFilter();
|
||||
// logger.info("Started execution: ", f);
|
||||
// Task<Void> task_collocability = null;
|
||||
//
|
||||
// Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||
//
|
||||
// final Task<Void> task = new Task<Void>() {
|
||||
// @SuppressWarnings("Duplicates")
|
||||
// @Override
|
||||
// protected Void call() throws Exception {
|
||||
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
|
||||
// if(multipleFiles){
|
||||
// cancel.setVisible(true);
|
||||
// }
|
||||
//
|
||||
//
|
||||
//// int i = corpusFiles.size();
|
||||
//// Date startTime = new Date();
|
||||
//// Date previousTime = new Date();
|
||||
//// int remainingSeconds = -1;
|
||||
//// int corpusSize;
|
||||
//// if (statistic.getFilter().getCollocability().size() > 0) {
|
||||
//// corpusSize = corpusFiles.size() * 2;
|
||||
//// } else {
|
||||
//// corpusSize = corpusFiles.size();
|
||||
//// }
|
||||
//
|
||||
// Date startTime = new Date();
|
||||
// Date previousTime = new Date();
|
||||
// int remainingSeconds = -1;
|
||||
// int corpusSize;
|
||||
// int i;
|
||||
// int taskIndex = 0;
|
||||
// if(statistic.getFilter().getCollocability().size() > 0 && statistic.getFilter().getMinimalRelFre() > 1){
|
||||
// i = corpusFiles.size();
|
||||
// corpusSize = corpusFiles.size() * 3;
|
||||
// } else if (statistic.getFilter().getMinimalRelFre() > 1) {
|
||||
// i = corpusFiles.size();
|
||||
// corpusSize = corpusFiles.size() * 2;
|
||||
// } else if (statistic.getFilter().getCollocability().size() > 0) {
|
||||
// i = 0;
|
||||
// corpusSize = corpusFiles.size() * 2;
|
||||
// } else {
|
||||
// i = 0;
|
||||
// corpusSize = corpusFiles.size();
|
||||
// }
|
||||
// for (File f : corpusFiles) {
|
||||
// final int iFinal = i;
|
||||
// XML_processing xml_processing = new XML_processing();
|
||||
// xml_processing.isCancelled = false;
|
||||
// i++;
|
||||
// taskIndex++;
|
||||
// if(xml_processing.progressBarListener != null) {
|
||||
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
|
||||
// }
|
||||
// if (multipleFiles) {
|
||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
||||
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
|
||||
// previousTime = new Date();
|
||||
// }
|
||||
// this.updateProgress(i, corpusSize);
|
||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
||||
//
|
||||
//// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
||||
//// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
|
||||
//// previousTime = new Date();
|
||||
//// }
|
||||
//// this.updateProgress(i, corpusSize);
|
||||
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
||||
//
|
||||
// } else {
|
||||
// xml_processing.progressBarListener = new InvalidationListener() {
|
||||
// int remainingSeconds = -1;
|
||||
// Date previousTime = new Date();
|
||||
// @Override
|
||||
// public void invalidated(Observable observable) {
|
||||
// cancel.setVisible(true);
|
||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
||||
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
||||
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
||||
// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
||||
//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
|
||||
//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
|
||||
//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
|
||||
//// System.out.println(remainingSeconds);
|
||||
// previousTime = new Date();
|
||||
// }
|
||||
// xml_processing.isCancelled = isCancelled();
|
||||
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
|
||||
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
||||
// }
|
||||
// xml_processing.readXML(f.toString(), statistic);
|
||||
// if (isCancelled()) {
|
||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
// break;
|
||||
// }
|
||||
// if(!(multipleFiles)){
|
||||
// cancel.setVisible(false);
|
||||
// }
|
||||
//// readXML(f.toString(), statistic);
|
||||
//// i++;
|
||||
//// if (isCancelled()) {
|
||||
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
//// break;
|
||||
//// }
|
||||
//// if (statistic.getFilter().getCollocability().size() > 0) {
|
||||
//// this.updateProgress(i, corpusFiles.size() * 2);
|
||||
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
|
||||
//// } else {
|
||||
//// this.updateProgress(i, corpusFiles.size());
|
||||
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
|
||||
//// }
|
||||
////// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
|
||||
// }
|
||||
// // if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing
|
||||
// if (statistic.getFilter().getMinimalRelFre() > 1){
|
||||
//// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
|
||||
// long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue();
|
||||
// double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
||||
//
|
||||
//
|
||||
// for(Map.Entry<MultipleHMKeys, AtomicLong> entry : statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet()){
|
||||
// if(entry.getValue().longValue() < absToRelFactor){
|
||||
// statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).remove(entry.getKey());
|
||||
// }
|
||||
// }
|
||||
// statistic.updateMinimalRelFre(statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet(), absToRelFactor);
|
||||
// }
|
||||
//
|
||||
// return null;
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
// progressLabel.textProperty().bind(task.messageProperty());
|
||||
// task.setOnSucceeded(e -> {
|
||||
// if (f.getCollocability().size() > 0) {
|
||||
// try{
|
||||
// Filter f2 = (Filter) f.clone();
|
||||
// f2.setNgramValue(1);
|
||||
// StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
|
||||
// final Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
|
||||
// final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
|
||||
// thread_collocability.setDaemon(true);
|
||||
// thread_collocability.start();
|
||||
// }catch(CloneNotSupportedException c){}
|
||||
//
|
||||
//
|
||||
//
|
||||
// } else {
|
||||
// try {
|
||||
//// System.out.print(statistics);
|
||||
// boolean successullySaved = statistic.saveResultToDisk();
|
||||
// if (successullySaved) {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
||||
// } else {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
|
||||
// }
|
||||
// } catch (UnsupportedEncodingException e1) {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
|
||||
// logger.error("Error while saving", e1);
|
||||
// } catch (OutOfMemoryError e1) {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
|
||||
// logger.error("Out of memory error", e1);
|
||||
// }
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// }
|
||||
//
|
||||
//
|
||||
// });
|
||||
//
|
||||
// task.setOnFailed(e -> {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
||||
// logger.error("Error while executing", e);
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// task.setOnCancelled(e -> {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// // When cancel button is pressed cancel analysis
|
||||
// cancel.setOnAction(e -> {
|
||||
// task.cancel();
|
||||
// logger.info("cancel button");
|
||||
// });
|
||||
//
|
||||
// return task;
|
||||
// }
|
||||
//
|
||||
// private final Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
|
||||
// Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
|
||||
//
|
||||
// final Task<Void> task = new Task<Void>() {
|
||||
// @SuppressWarnings("Duplicates")
|
||||
// @Override
|
||||
// protected Void call() throws Exception {
|
||||
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
|
||||
// if(multipleFiles){
|
||||
// cancel.setVisible(true);
|
||||
// }
|
||||
//// int i = corpusFiles.size();
|
||||
// Date startTime = new Date();
|
||||
// Date previousTime = new Date();
|
||||
// int remainingSeconds = -1;
|
||||
//// int corpusSize;
|
||||
//// if (statistic.getFilter().getCollocability().size() > 0) {
|
||||
//// corpusSize = corpusFiles.size() * 2;
|
||||
//// } else {
|
||||
//// corpusSize = corpusFiles.size();
|
||||
//// }
|
||||
//
|
||||
//
|
||||
// int corpusSize;
|
||||
// int i;
|
||||
// int taskIndex = 0;
|
||||
// if(statistic.getFilter().getMinimalRelFre() > 1){
|
||||
// i = corpusFiles.size() * 2;
|
||||
// corpusSize = corpusFiles.size() * 3;
|
||||
// } else {
|
||||
// i = corpusFiles.size();
|
||||
// corpusSize = corpusFiles.size() * 2;
|
||||
// }
|
||||
//
|
||||
//
|
||||
//
|
||||
// for (File f : corpusFiles) {
|
||||
// final int iFinal = i;
|
||||
// XML_processing xml_processing = new XML_processing();
|
||||
// i++;
|
||||
// taskIndex++;
|
||||
// if(xml_processing.progressBarListener != null) {
|
||||
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
|
||||
// }
|
||||
// if (multipleFiles) {
|
||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
||||
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
|
||||
// previousTime = new Date();
|
||||
// }
|
||||
// this.updateProgress(i, corpusSize);
|
||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
||||
//// if (isCancelled()) {
|
||||
//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
//// break;
|
||||
//// }
|
||||
// } else {
|
||||
// xml_processing.progressBarListener = new InvalidationListener() {
|
||||
// int remainingSeconds = -1;
|
||||
// Date previousTime = new Date();
|
||||
// @Override
|
||||
// public void invalidated(Observable observable) {
|
||||
// cancel.setVisible(true);
|
||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
||||
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
||||
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
||||
// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
||||
//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
|
||||
//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)));
|
||||
//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
|
||||
//// System.out.println(remainingSeconds);
|
||||
// previousTime = new Date();
|
||||
// }
|
||||
// xml_processing.isCancelled = isCancelled();
|
||||
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
|
||||
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
||||
// }
|
||||
// xml_processing.isCollocability = true;
|
||||
// xml_processing.readXML(f.toString(), statisticsOneGrams);
|
||||
// xml_processing.isCollocability = false;
|
||||
// if (isCancelled()) {
|
||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
// break;
|
||||
// }
|
||||
//// readXML(f.toString(), statisticsOneGrams);
|
||||
//// i++;
|
||||
//// this.updateProgress(i, corpusFiles.size() * 2);
|
||||
//// if (statistic.getFilter().getCollocability().size() > 0) {
|
||||
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
|
||||
//// } else {
|
||||
//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
|
||||
//// }
|
||||
// }
|
||||
//
|
||||
// return null;
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
// progressLabel.textProperty().bind(task.messageProperty());
|
||||
//
|
||||
// task.setOnSucceeded(e -> {
|
||||
// try {
|
||||
// System.out.print(statistic);
|
||||
//// calculate_collocabilities(statistic, statisticsOneGrams);
|
||||
// statistic.updateCalculateCollocabilities(statisticsOneGrams);
|
||||
// boolean successullySaved = statistic.saveResultToDisk();
|
||||
// if (successullySaved) {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
||||
// } else {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
|
||||
// }
|
||||
// } catch (UnsupportedEncodingException e1) {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
|
||||
// logger.error("Error while saving", e1);
|
||||
// } catch (OutOfMemoryError e1) {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
|
||||
// logger.error("Out of memory error", e1);
|
||||
// }
|
||||
//// try {
|
||||
//// boolean successullySaved = statistic.saveResultToDisk();
|
||||
//// if (successullySaved) {
|
||||
//// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||
//// } else {
|
||||
//// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
||||
//// }
|
||||
//// } catch (UnsupportedEncodingException e1) {
|
||||
//// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||
//// logger.error("Error while saving", e1);
|
||||
//// } catch (OutOfMemoryError e1){
|
||||
//// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
||||
//// logger.error("Out of memory error", e1);
|
||||
//// }
|
||||
////
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// task.setOnFailed(e -> {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
||||
// logger.error("Error while executing", e);
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// task.setOnCancelled(e -> {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// // When cancel button is pressed cancel analysis
|
||||
// cancel.setOnAction(e -> {
|
||||
// task.cancel();
|
||||
//// logger.info("cancel button");
|
||||
// });
|
||||
// return task;
|
||||
// }
|
||||
|
||||
private void execute(StatisticsNew statistic) {
|
||||
Filter f = statistic.getFilter();
|
||||
logger.info("Started execution: ", f);
|
||||
|
@ -1591,13 +845,11 @@ public class StringAnalysisTabNew2 {
|
|||
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
|
||||
if (f.getMinimalRelFre() > 1){
|
||||
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
|
||||
// final Task<Void> mainTask = prepareTaskForMinRelFre(statistic);
|
||||
final Thread thread = new Thread(mainTask, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
} else {
|
||||
final Task<Void> mainTask = t.prepareMainTask(statistic);
|
||||
// final Task<Void> mainTask = prepareMainTask(statistic);
|
||||
final Thread thread = new Thread(mainTask, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
|
|
|
@ -9,7 +9,6 @@ import org.apache.commons.lang3.math.NumberUtils;
|
|||
public class ValidationUtil {
|
||||
|
||||
public static boolean isNumber(String value) {
|
||||
//return NumberUtils.isCreatable(value);
|
||||
return NumberUtils.isNumber(value);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,260 +0,0 @@
|
|||
//package gui;
|
||||
//
|
||||
//import static alg.XML_processing.*;
|
||||
//import static gui.GUIController.*;
|
||||
//
|
||||
//import java.io.File;
|
||||
//import java.io.UnsupportedEncodingException;
|
||||
//import java.util.*;
|
||||
//
|
||||
//import javafx.application.HostServices;
|
||||
//import javafx.scene.control.*;
|
||||
//import org.apache.commons.lang3.StringUtils;
|
||||
//import org.apache.logging.log4j.LogManager;
|
||||
//import org.apache.logging.log4j.Logger;
|
||||
//import org.controlsfx.control.CheckComboBox;
|
||||
//
|
||||
//import data.*;
|
||||
//import javafx.collections.ListChangeListener;
|
||||
//import javafx.collections.ObservableList;
|
||||
//import javafx.concurrent.Task;
|
||||
//import javafx.fxml.FXML;
|
||||
//import javafx.scene.layout.AnchorPane;
|
||||
//
|
||||
//@SuppressWarnings("Duplicates")
|
||||
//public class WordFormationTab {
|
||||
// public final static Logger logger = LogManager.getLogger(WordFormationTab.class);
|
||||
//
|
||||
// public AnchorPane wordAnalysisTabPane;
|
||||
//
|
||||
// @FXML
|
||||
// public Label selectedFiltersLabel;
|
||||
// @FXML
|
||||
// public Label solarFilters;
|
||||
//
|
||||
// @FXML
|
||||
// private CheckComboBox<String> taxonomyCCB;
|
||||
// private ArrayList<Taxonomy> taxonomy;
|
||||
//
|
||||
// @FXML
|
||||
// private TextField minimalOccurrencesTF;
|
||||
// private Integer minimalOccurrences;
|
||||
//
|
||||
// @FXML
|
||||
// private TextField minimalTaxonomyTF;
|
||||
// private Integer minimalTaxonomy;
|
||||
//
|
||||
// @FXML
|
||||
// private Button computeB;
|
||||
//
|
||||
// @FXML
|
||||
// public ProgressBar ngramProgressBar;
|
||||
// @FXML
|
||||
// public Label progressLabel;
|
||||
//
|
||||
// @FXML
|
||||
// private Hyperlink helpH;
|
||||
//
|
||||
// private Corpus corpus;
|
||||
// private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||
// private HostServices hostService;
|
||||
//
|
||||
// // after header scan
|
||||
// private ObservableList<String> taxonomyCCBValues;
|
||||
// private CorpusType currentCorpusType;
|
||||
// private boolean useDb;
|
||||
//
|
||||
//
|
||||
// public void init() {
|
||||
// // taxonomy
|
||||
// if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
||||
// taxonomyCCB.getItems().removeAll();
|
||||
// taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
||||
// taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||
// taxonomy = new ArrayList<>();
|
||||
// ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus);
|
||||
// taxonomy.addAll(checkedItemsTaxonomy);
|
||||
// logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
|
||||
// });
|
||||
// taxonomyCCB.getCheckModel().clearChecks();
|
||||
// } else {
|
||||
// taxonomyCCB.setDisable(true);
|
||||
// }
|
||||
//
|
||||
// // set default values
|
||||
// minimalOccurrencesTF.setText("1");
|
||||
// minimalOccurrences = 1;
|
||||
//
|
||||
// minimalTaxonomyTF.setText("1");
|
||||
// minimalTaxonomy = 1;
|
||||
//
|
||||
// minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
// if (!newValue) {
|
||||
// // focus lost
|
||||
// String value = minimalOccurrencesTF.getText();
|
||||
// if (!ValidationUtil.isEmpty(value)) {
|
||||
// if (!ValidationUtil.isNumber(value)) {
|
||||
// logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
||||
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
||||
// } else {
|
||||
// minimalOccurrences = Integer.parseInt(value);
|
||||
// }
|
||||
// } else {
|
||||
// minimalOccurrencesTF.setText("1");
|
||||
// minimalOccurrences = 1;
|
||||
// }
|
||||
// }
|
||||
// });
|
||||
//
|
||||
// minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
// if (!newValue) {
|
||||
// // focus lost
|
||||
// String value = minimalTaxonomyTF.getText();
|
||||
// if (!ValidationUtil.isEmpty(value)) {
|
||||
// if (!ValidationUtil.isNumber(value)) {
|
||||
// logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
||||
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
||||
// } else {
|
||||
// minimalTaxonomy = Integer.parseInt(value);
|
||||
// }
|
||||
// } else {
|
||||
// minimalTaxonomyTF.setText("1");
|
||||
// minimalTaxonomy = 1;
|
||||
// }
|
||||
// }
|
||||
// });
|
||||
//
|
||||
// computeB.setOnAction(e -> {
|
||||
// compute();
|
||||
// logger.info("compute button");
|
||||
// });
|
||||
//
|
||||
// helpH.setOnAction(e -> openHelpWebsite());
|
||||
// }
|
||||
//
|
||||
// private void compute() {
|
||||
// Filter filter = new Filter();
|
||||
// filter.setNgramValue(1);
|
||||
// filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
|
||||
// filter.setTaxonomy(taxonomy);
|
||||
// filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||
// filter.setSkipValue(0);
|
||||
// filter.setMsd(new ArrayList<>());
|
||||
// filter.setIsCvv(false);
|
||||
// filter.setSolarFilters(solarFiltersMap);
|
||||
// filter.setMinimalOccurrences(minimalOccurrences);
|
||||
// filter.setMinimalTaxonomy(minimalTaxonomy);
|
||||
//
|
||||
// String message = Validation.validateForStringLevel(filter);
|
||||
// if (message == null) {
|
||||
// // no errors
|
||||
// logger.info("Executing: ", filter.toString());
|
||||
// StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
||||
// execute(statistic);
|
||||
// } else {
|
||||
// logAlert(message);
|
||||
// showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private void openHelpWebsite(){
|
||||
// hostService.showDocument(Messages.HELP_URL);
|
||||
// }
|
||||
//
|
||||
// private void execute(StatisticsNew statistic) {
|
||||
// logger.info("Started execution: ", statistic.getFilter());
|
||||
//
|
||||
// Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||
//
|
||||
// final Task<Void> task = new Task<Void>() {
|
||||
// @SuppressWarnings("Duplicates")
|
||||
// @Override
|
||||
// protected Void call() throws Exception {
|
||||
// int i = 0;
|
||||
// Date startTime = new Date();
|
||||
// Date previousTime = new Date();
|
||||
// for (File f : corpusFiles) {
|
||||
// readXML(f.toString(), statistic);
|
||||
// i++;
|
||||
// this.updateProgress(i, corpusFiles.size());
|
||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
|
||||
// }
|
||||
//
|
||||
// return null;
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
// progressLabel.textProperty().bind(task.messageProperty());
|
||||
//
|
||||
// task.setOnSucceeded(e -> {
|
||||
// try {
|
||||
// // first, we have to recalculate all occurrences to detailed statistics
|
||||
// boolean successullySaved = statistic.recalculateAndSaveResultToDisk();
|
||||
//
|
||||
// if (successullySaved) {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
||||
// } else {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
|
||||
// }
|
||||
// } catch (UnsupportedEncodingException e1) {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
|
||||
// logger.error("Error while saving", e1);
|
||||
// }
|
||||
//
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// });
|
||||
//
|
||||
// task.setOnFailed(e -> {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
||||
// logger.error("Error while executing", e);
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// });
|
||||
//
|
||||
// final Thread thread = new Thread(task, "task");
|
||||
// thread.setDaemon(true);
|
||||
// thread.start();
|
||||
// }
|
||||
//
|
||||
// private void logAlert(String alert) {
|
||||
// logger.info("alert: " + alert);
|
||||
// }
|
||||
//
|
||||
//
|
||||
// public void setCorpus(Corpus corpus) {
|
||||
// this.corpus = corpus;
|
||||
//
|
||||
// if (corpus.getCorpusType() != CorpusType.SOLAR) {
|
||||
// setSelectedFiltersLabel(null);
|
||||
// } else {
|
||||
// setSelectedFiltersLabel("/");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public void setSelectedFiltersLabel(String content) {
|
||||
// if (content != null) {
|
||||
// solarFilters.setVisible(true);
|
||||
// selectedFiltersLabel.setVisible(true);
|
||||
// selectedFiltersLabel.setText(content);
|
||||
// } else {
|
||||
// solarFilters.setVisible(false);
|
||||
// selectedFiltersLabel.setVisible(false);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
|
||||
// this.solarFiltersMap = solarFiltersMap;
|
||||
// }
|
||||
//
|
||||
// public void setHostServices(HostServices hostServices){
|
||||
// this.hostService = hostServices;
|
||||
// }
|
||||
//}
|
|
@ -1,12 +1,8 @@
|
|||
package gui;
|
||||
|
||||
import alg.XML_processing;
|
||||
import data.*;
|
||||
import javafx.application.HostServices;
|
||||
import javafx.beans.InvalidationListener;
|
||||
import javafx.beans.Observable;
|
||||
import javafx.beans.binding.StringBinding;
|
||||
import javafx.beans.property.ReadOnlyDoubleWrapper;
|
||||
import javafx.beans.value.ChangeListener;
|
||||
import javafx.beans.value.ObservableValue;
|
||||
import javafx.collections.ListChangeListener;
|
||||
|
@ -23,12 +19,10 @@ import org.controlsfx.control.CheckComboBox;
|
|||
import util.Tasks;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static alg.XML_processing.readXML;
|
||||
import static gui.GUIController.showAlert;
|
||||
|
||||
@SuppressWarnings("Duplicates")
|
||||
|
@ -165,10 +159,6 @@ public class WordLevelTab {
|
|||
private TextField suffixListTF;
|
||||
private ArrayList<String> suffixList;
|
||||
|
||||
// @FXML
|
||||
// private CheckBox writeMsdAtTheEndChB;
|
||||
// private boolean writeMsdAtTheEnd;
|
||||
|
||||
@FXML
|
||||
private ComboBox<String> calculateForCB;
|
||||
private CalculateFor calculateFor;
|
||||
|
@ -215,7 +205,6 @@ public class WordLevelTab {
|
|||
|
||||
private Corpus corpus;
|
||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||
private Filter filter;
|
||||
private boolean useDb;
|
||||
private HostServices hostService;
|
||||
private ListChangeListener<String> taxonomyListener;
|
||||
|
@ -226,44 +215,31 @@ public class WordLevelTab {
|
|||
private ChangeListener<Boolean> minimalTaxonomyListener;
|
||||
private ChangeListener<Boolean> minimalRelFreListener;
|
||||
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
|
||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
|
||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
|
||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"};
|
||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY));
|
||||
// private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY));
|
||||
|
||||
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
|
||||
private static final ArrayList<String> TAXONOMY_SET_OPERATION = new ArrayList<>(Arrays.asList(TAXONOMY_SET_OPERATION_ARRAY));
|
||||
|
||||
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
||||
private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {};
|
||||
private static final ArrayList<String> ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY));
|
||||
|
||||
// TODO: pass observables for taxonomy based on header scan
|
||||
// after header scan
|
||||
private ObservableList<String> taxonomyCCBValues;
|
||||
private CorpusType currentCorpusType;
|
||||
|
||||
public void init() {
|
||||
// add CSS style
|
||||
wordLevelAnalysisTabPane.getStylesheets().add("style.css");
|
||||
|
@ -328,21 +304,12 @@ public class WordLevelTab {
|
|||
} else if (newValue.equals(CalculateFor.NORMALIZED_WORD.toString())) {
|
||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS));
|
||||
} else if (newValue.equals(CalculateFor.MORPHOSYNTACTIC_SPECS.toString())) {
|
||||
// writeMsdAtTheEndEnableCalculateFor.set(true);
|
||||
// writeMsdAtTheEndChB.setDisable(false);
|
||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_MSD));
|
||||
} else {
|
||||
|
||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_EMPTY));
|
||||
}
|
||||
|
||||
// if (!newValue.equals("oblikoskladenjska oznaka")){
|
||||
// writeMsdAtTheEnd = false;
|
||||
// writeMsdAtTheEndChB.setSelected(false);
|
||||
// writeMsdAtTheEndChB.setDisable(true);
|
||||
// writeMsdAtTheEndEnableCalculateFor.set(false);
|
||||
// }
|
||||
|
||||
alsoVisualizeListener = new ListChangeListener<String>() {
|
||||
@Override
|
||||
public void onChanged(Change<? extends String> c) {
|
||||
|
@ -353,10 +320,6 @@ public class WordLevelTab {
|
|||
}
|
||||
};
|
||||
|
||||
// alsoVisualizeCCB.getCheckModel().clearChecks();
|
||||
// alsoVisualizeCCB.getItems().removeAll();
|
||||
// alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
|
||||
|
||||
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
|
||||
|
||||
if (languageChanged) {
|
||||
|
@ -442,7 +405,6 @@ public class WordLevelTab {
|
|||
prefixList.add(w);
|
||||
}
|
||||
}
|
||||
// suffixList = value;
|
||||
}
|
||||
|
||||
System.out.println(prefixList);
|
||||
|
@ -475,7 +437,6 @@ public class WordLevelTab {
|
|||
suffixList.add(w);
|
||||
}
|
||||
}
|
||||
// suffixList = value;
|
||||
}
|
||||
System.out.println(suffixList);
|
||||
if(suffixList.size() > 0){
|
||||
|
@ -492,8 +453,6 @@ public class WordLevelTab {
|
|||
computeNgramsB.setDisable(true);
|
||||
}
|
||||
});
|
||||
// prefixLengthCB.setDisable(true);
|
||||
|
||||
|
||||
if (msdListener != null){
|
||||
msdTF.focusedProperty().removeListener(msdListener);
|
||||
|
@ -581,10 +540,8 @@ public class WordLevelTab {
|
|||
public void onChanged(ListChangeListener.Change<? extends String> c){
|
||||
if(changing) {
|
||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
|
||||
|
||||
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
|
||||
// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
|
||||
|
||||
taxonomy = new ArrayList<>();
|
||||
taxonomy.addAll(checkedItemsTaxonomy);
|
||||
|
@ -592,7 +549,6 @@ public class WordLevelTab {
|
|||
taxonomyCCB.getItems().removeAll();
|
||||
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
||||
|
||||
// taxonomyCCB.getCheckModel().clearChecks();
|
||||
changing = false;
|
||||
taxonomyCCB.getCheckModel().clearChecks();
|
||||
for (Taxonomy t : checkedItemsTaxonomy) {
|
||||
|
@ -639,15 +595,6 @@ public class WordLevelTab {
|
|||
displayTaxonomyChB.setDisable(true);
|
||||
}
|
||||
|
||||
// writeMsdAtTheEnd = false;
|
||||
// writeMsdAtTheEndChB.setDisable(true);
|
||||
// // set
|
||||
// writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
// writeMsdAtTheEnd = newValue;
|
||||
// logger.info("write msd at the end: ", writeMsdAtTheEnd);
|
||||
// });
|
||||
// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
|
||||
|
||||
// set default values
|
||||
minimalOccurrencesTF.setText("1");
|
||||
minimalOccurrences = 1;
|
||||
|
@ -764,85 +711,6 @@ public class WordLevelTab {
|
|||
cancel.setVisible(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* case a: values for combo boxes can change after a corpus change
|
||||
* <ul>
|
||||
* <li>different corpus type - reset all fields so no old values remain</li>
|
||||
* <li>same corpus type, different subset - keep</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* case b: values for combo boxes can change after a header scan
|
||||
* <ul>
|
||||
* <li>at first, fields are populated by corpus type defaults</li>
|
||||
* <li>after, with gathered data</li>
|
||||
* </ul>
|
||||
* <p></p>
|
||||
* ngrams: 1
|
||||
* calculateFor: word
|
||||
* msd:
|
||||
* taxonomy:
|
||||
* skip: 0
|
||||
* iscvv: false
|
||||
* string length: 1
|
||||
*/
|
||||
// public void populateFields() {
|
||||
// // corpus changed if: current one is null (this is first run of the app)
|
||||
// // or if currentCorpus != gui's corpus
|
||||
// boolean corpusChanged = currentCorpusType == null
|
||||
// || currentCorpusType != corpus.getCorpusType();
|
||||
//
|
||||
//
|
||||
// // TODO: check for GOS, GIGAFIDA, SOLAR...
|
||||
// // refresh and:
|
||||
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
||||
// if (calculateFor == null) {
|
||||
// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
|
||||
// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
|
||||
// }
|
||||
//
|
||||
// if (!filter.hasMsd()) {
|
||||
// // if current corpus doesn't have msd data, disable this field
|
||||
// msd = new ArrayList<>();
|
||||
// msdTF.setText("");
|
||||
// msdTF.setDisable(true);
|
||||
// logger.info("no msd data");
|
||||
// } else {
|
||||
// if (ValidationUtil.isEmpty(msd)
|
||||
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
||||
// // msd has not been set previously
|
||||
// // or msd has been set but the corpus changed -> reset
|
||||
// msd = new ArrayList<>();
|
||||
// msdTF.setText("");
|
||||
// msdTF.setDisable(false);
|
||||
// logger.info("msd reset");
|
||||
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
||||
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
|
||||
// msdTF.setText(StringUtils.join(msdStrings, " "));
|
||||
// msdTF.setDisable(false);
|
||||
// logger.info("msd kept");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // TODO: trigger on rescan
|
||||
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
||||
// // user changed corpus (by type) or by selection & triggered a rescan of headers
|
||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
//
|
||||
// currentCorpusType = corpus.getCorpusType();
|
||||
// // setTaxonomyIsDirty(false);
|
||||
// } else {
|
||||
//
|
||||
// }
|
||||
//
|
||||
// // see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
|
||||
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
||||
//
|
||||
// }
|
||||
|
||||
private void addTooltipToImage(ImageView image, StringBinding stringBinding){
|
||||
Tooltip tooltip = new Tooltip();
|
||||
tooltip.textProperty().bind(stringBinding);
|
||||
|
@ -911,11 +779,9 @@ public class WordLevelTab {
|
|||
if (corpus.getCorpusType() == CorpusType.GOS) {
|
||||
calculateForCB.itemsProperty().unbind();
|
||||
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_GOS));
|
||||
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS);
|
||||
} else {
|
||||
calculateForCB.itemsProperty().unbind();
|
||||
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS));
|
||||
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -923,7 +789,6 @@ public class WordLevelTab {
|
|||
if (corpus.isGosOrthMode()) {
|
||||
calculateForCB.itemsProperty().unbind();
|
||||
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_ORTH));
|
||||
// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
|
||||
msdTF.setDisable(true);
|
||||
} else {
|
||||
msdTF.setDisable(false);
|
||||
|
@ -954,7 +819,6 @@ public class WordLevelTab {
|
|||
filter.setPrefixList(prefixList);
|
||||
filter.setSuffixList(suffixList);
|
||||
filter.setTaxonomySetOperation(taxonomySetOperation);
|
||||
// filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd);
|
||||
|
||||
String message = Validation.validateForStringLevel(filter);
|
||||
if (message == null) {
|
||||
|
@ -1004,134 +868,14 @@ public class WordLevelTab {
|
|||
private void execute(StatisticsNew statistic) {
|
||||
logger.info("Started execution: ", statistic.getFilter());
|
||||
|
||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||
|
||||
// final Task<Void> task = new Task<Void>() {
|
||||
// @SuppressWarnings("Duplicates")
|
||||
// @Override
|
||||
// protected Void call() throws Exception {
|
||||
// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
|
||||
// if(multipleFiles){
|
||||
// cancel.setVisible(true);
|
||||
// }
|
||||
// int i = 0;
|
||||
// Date startTime = new Date();
|
||||
// Date previousTime = new Date();
|
||||
// int remainingSeconds = -1;
|
||||
// for (File f : corpusFiles) {
|
||||
// final int iFinal = i;
|
||||
// XML_processing xml_processing = new XML_processing();
|
||||
// xml_processing.isCancelled = false;
|
||||
// i++;
|
||||
// if (isCancelled()) {
|
||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
// break;
|
||||
// }
|
||||
// if(xml_processing.progressBarListener != null) {
|
||||
// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
|
||||
// }
|
||||
// if (multipleFiles) {
|
||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
||||
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
|
||||
// previousTime = new Date();
|
||||
// }
|
||||
// this.updateProgress(i, corpusFiles.size());
|
||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
|
||||
// } else {
|
||||
// xml_processing.progressBarListener = new InvalidationListener() {
|
||||
// int remainingSeconds = -1;
|
||||
// Date previousTime = new Date();
|
||||
// @Override
|
||||
// public void invalidated(Observable observable) {
|
||||
// cancel.setVisible(true);
|
||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
||||
// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
||||
// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
||||
// ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
||||
// previousTime = new Date();
|
||||
// }
|
||||
// xml_processing.isCancelled = isCancelled();
|
||||
// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
|
||||
// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
|
||||
// }
|
||||
// xml_processing.readXML(f.toString(), statistic);
|
||||
// if (isCancelled()) {
|
||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return null;
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
// progressLabel.textProperty().bind(task.messageProperty());
|
||||
//
|
||||
// task.setOnSucceeded(e -> {
|
||||
// try {
|
||||
// boolean successullySaved = statistic.saveResultToDisk();
|
||||
// if (successullySaved) {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
||||
// } else {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
|
||||
// }
|
||||
// } catch (UnsupportedEncodingException e1) {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
|
||||
// logger.error("Error while saving", e1);
|
||||
// }
|
||||
//
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// task.setOnFailed(e -> {
|
||||
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
||||
// logger.error("Error while executing", e);
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// task.setOnCancelled(e -> {
|
||||
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
// });
|
||||
//
|
||||
// // When cancel button is pressed cancel analysis
|
||||
// cancel.setOnAction(e -> {
|
||||
// task.cancel();
|
||||
// logger.info("cancel button");
|
||||
// });
|
||||
//
|
||||
// final Thread thread = new Thread(task, "task");
|
||||
// thread.setDaemon(true);
|
||||
// thread.start();
|
||||
Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel);
|
||||
if (statistic.getFilter().getMinimalRelFre() > 1){
|
||||
final Task<Void> mainTask = t.prepareTaskForMinRelFre(statistic);
|
||||
// final Task<Void> mainTask = prepareTaskForMinRelFre(statistic);
|
||||
final Thread thread = new Thread(mainTask, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
} else {
|
||||
final Task<Void> mainTask = t.prepareMainTask(statistic);
|
||||
// final Task<Void> mainTask = prepareMainTask(statistic);
|
||||
final Thread thread = new Thread(mainTask, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
|
@ -1144,5 +888,4 @@ public class WordLevelTab {
|
|||
public void setHostServices(HostServices hostServices){
|
||||
this.hostService = hostServices;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
package util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class Combinations {
|
||||
private static HashSet<HashSet<Integer>> result = new HashSet<>();
|
||||
|
||||
|
||||
/* arr[] ---> Input Array
|
||||
data[] ---> Temporary array to store current combination
|
||||
start & end ---> Staring and Ending indexes in arr[]
|
||||
index ---> Current index in data[]
|
||||
r ---> Size of a combination to be printed */
|
||||
static void combinationUtil(int arr[], Integer data[], int start, int end, int index, int combinationLength) {
|
||||
// Current combination is ready to be printed, print it
|
||||
if (index == combinationLength) {
|
||||
result.add(new HashSet<>(Arrays.asList(data)));
|
||||
return;
|
||||
}
|
||||
|
||||
// replace index with all possible elements. The condition
|
||||
// "end-i+1 >= r-index" makes sure that including one element
|
||||
// at index will make a combination with remaining elements
|
||||
// at remaining positions
|
||||
for (int i = start; i <= end && end - i + 1 >= combinationLength - index; i++) {
|
||||
data[index] = arr[i];
|
||||
combinationUtil(arr, data, i + 1, end, index + 1, combinationLength);
|
||||
}
|
||||
}
|
||||
|
||||
public static HashSet<HashSet<Integer>> generateIndices(int maxNOfIndices) {
|
||||
result = new HashSet<>();
|
||||
int[] arr = IntStream.range(1, maxNOfIndices).toArray();
|
||||
for (int i = 1; i < maxNOfIndices - 1; i++) {
|
||||
// A temporary array to store all combination one by one
|
||||
combinationUtil(arr, new Integer[i], 0, arr.length - 1, 0, i);
|
||||
}
|
||||
|
||||
// also add an empty one for X.... (all of this type)
|
||||
result.add(new HashSet<>());
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -6,7 +6,6 @@ import java.io.*;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import data.*;
|
||||
|
@ -16,49 +15,11 @@ import org.apache.commons.csv.CSVFormat;
|
|||
import org.apache.commons.csv.CSVPrinter;
|
||||
import org.apache.commons.csv.QuoteMode;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
|
||||
import data.Enums.WordLevelType;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public class Export {
|
||||
// public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) {
|
||||
// JSONArray wrapper = new JSONArray();
|
||||
//
|
||||
// for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||
// JSONArray data_wrapper = new JSONArray();
|
||||
// JSONObject metric = new JSONObject();
|
||||
//
|
||||
// String title = p.getLeft();
|
||||
// Map<MultipleHMKeys, Long> map = p.getRight();
|
||||
//
|
||||
// if (map.isEmpty())
|
||||
// continue;
|
||||
//
|
||||
// long total = Util.mapSumFrequencies(map);
|
||||
//
|
||||
// for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
||||
// JSONObject data_entry = new JSONObject();
|
||||
// data_entry.put("word", e.getKey());
|
||||
// data_entry.put("frequency", e.getValue());
|
||||
// data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
|
||||
//
|
||||
// data_wrapper.add(data_entry);
|
||||
// }
|
||||
//
|
||||
// metric.put("Title", title);
|
||||
// metric.put("data", data_wrapper);
|
||||
// wrapper.add(metric);
|
||||
// }
|
||||
//
|
||||
// try (FileWriter file = new FileWriter("statistics.json")) {
|
||||
// file.write(wrapper.toJSONString());
|
||||
// } catch (IOException e) {
|
||||
// e.printStackTrace();
|
||||
// }
|
||||
// }
|
||||
|
||||
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
||||
StatisticsNew statistics, Filter filter) {
|
||||
Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult();
|
||||
|
@ -68,15 +29,6 @@ public class Export {
|
|||
List<Object> FILE_HEADER_AL = new ArrayList<>();
|
||||
Object[] FILE_HEADER;
|
||||
|
||||
//Count frequencies
|
||||
// long num_frequencies = 0;
|
||||
// for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||
// Map<MultipleHMKeys, Long> map = p.getRight();
|
||||
// if (map.isEmpty())
|
||||
// continue;
|
||||
// num_frequencies = Util.mapSumFrequencies(map);
|
||||
// }
|
||||
|
||||
Map<Taxonomy, Long> num_selected_taxonomy_frequencies = new ConcurrentHashMap<>();
|
||||
for (Taxonomy taxonomyKey : taxonomyResults.keySet()) {
|
||||
num_selected_taxonomy_frequencies.put(taxonomyKey, (long) 0);
|
||||
|
@ -113,7 +65,6 @@ public class Export {
|
|||
|
||||
headerInfoBlock.put(filter.getCalculateFor().totalSumString(filter.getNgramValue()), String.valueOf(num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
|
||||
headerInfoBlock.put(filter.getCalculateFor().foundSumString(filter.getNgramValue()), String.valueOf(num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
|
||||
// headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
|
||||
|
||||
for (CalculateFor otherKey : filter.getMultipleKeys()) {
|
||||
FILE_HEADER_AL.add(otherKey.toHeaderString(filter.getNgramValue()));
|
||||
|
@ -163,10 +114,7 @@ public class Export {
|
|||
|
||||
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||
String title = p.getLeft();
|
||||
|
||||
// statistics.setTimeEnding();
|
||||
title = statistics.generateResultTitle();
|
||||
// statistics.
|
||||
|
||||
fileName = title.replace(": ", "-");
|
||||
fileName = fileName.replace(" ", "_").concat(".csv");
|
||||
|
@ -178,8 +126,6 @@ public class Export {
|
|||
if (map.isEmpty())
|
||||
continue;
|
||||
|
||||
// long total = Util.mapSumFrequencies(map);
|
||||
|
||||
OutputStreamWriter fileWriter = null;
|
||||
CSVPrinter csvFilePrinter = null;
|
||||
|
||||
|
@ -289,10 +235,7 @@ public class Export {
|
|||
dataEntry.add(frequency.toString());
|
||||
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key), statistics.getCorpus().getPunctuation()));
|
||||
dataEntry.add(formatNumberForExport(((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue(), statistics.getCorpus().getPunctuation()));
|
||||
// dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences()));
|
||||
// dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (filter.getCollocability().size() > 0){
|
||||
|
@ -303,39 +246,6 @@ public class Export {
|
|||
|
||||
// Write msd separated per letters at the end of each line in csv
|
||||
if (filter.getWriteMsdAtTheEnd()) {
|
||||
// String msd = "";
|
||||
//
|
||||
// if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
// msd = e.getKey().getK1();
|
||||
// } else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
|
||||
// i = 0;
|
||||
// for (CalculateFor otherKey : filter.getMultipleKeys()){
|
||||
// switch(i){
|
||||
// case 0:
|
||||
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
// msd = e.getKey().getK2();
|
||||
// }
|
||||
// break;
|
||||
// case 1:
|
||||
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
// msd = e.getKey().getK3();
|
||||
// }
|
||||
// break;
|
||||
// case 2:
|
||||
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
// msd = e.getKey().getK4();
|
||||
// }
|
||||
// break;
|
||||
// case 3:
|
||||
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
// msd = e.getKey().getK5();
|
||||
// }
|
||||
// break;
|
||||
// }
|
||||
//
|
||||
// i++;
|
||||
// }
|
||||
// }
|
||||
String msd = e.getKey().getMsd(filter);
|
||||
String [] charArray = msd.split("(?!^)");
|
||||
dataEntry.addAll(Arrays.asList(charArray));
|
||||
|
@ -372,67 +282,6 @@ public class Export {
|
|||
return s;
|
||||
}
|
||||
|
||||
// public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
||||
// //Delimiter used in CSV file
|
||||
// String NEW_LINE_SEPARATOR = "\n";
|
||||
//
|
||||
// //CSV file header
|
||||
// Object[] FILE_HEADER = {"word", "frequency", "percent"};
|
||||
//
|
||||
// String fileName = "";
|
||||
//
|
||||
// fileName = title.replace(": ", "-");
|
||||
// fileName = fileName.replace(" ", "_").concat(".csv");
|
||||
//
|
||||
// fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
||||
//
|
||||
// OutputStreamWriter fileWriter = null;
|
||||
// CSVPrinter csvFilePrinter = null;
|
||||
//
|
||||
// //Create the CSVFormat object with "\n" as a record delimiter
|
||||
// CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
|
||||
//
|
||||
// try {
|
||||
// //initialize FileWriter object
|
||||
// fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
|
||||
//
|
||||
// //initialize CSVPrinter object
|
||||
// csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
||||
//
|
||||
// // write info block
|
||||
// printHeaderInfo(csvFilePrinter, headerInfoBlock);
|
||||
//
|
||||
// //Create CSV file header
|
||||
// csvFilePrinter.printRecord(FILE_HEADER);
|
||||
//
|
||||
// for (Object[] resultEntry : result) {
|
||||
// List dataEntry = new ArrayList<>();
|
||||
// dataEntry.add(resultEntry[0]);
|
||||
// dataEntry.add(resultEntry[1]);
|
||||
// dataEntry.add(formatNumberAsPercent(resultEntry[2]), statistics.getCorpus().getPunctuation());
|
||||
// csvFilePrinter.printRecord(dataEntry);
|
||||
// }
|
||||
// } catch (Exception e) {
|
||||
// System.out.println("Error in CsvFileWriter!");
|
||||
// e.printStackTrace();
|
||||
// } finally {
|
||||
// try {
|
||||
// if (fileWriter != null) {
|
||||
// fileWriter.flush();
|
||||
// fileWriter.close();
|
||||
// }
|
||||
// if (csvFilePrinter != null) {
|
||||
// csvFilePrinter.close();
|
||||
// }
|
||||
// } catch (IOException e) {
|
||||
// System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
|
||||
// e.printStackTrace();
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return fileName;
|
||||
// }
|
||||
|
||||
public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
||||
//Delimiter used in CSV file
|
||||
String NEW_LINE_SEPARATOR = "\n";
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
package util;
|
||||
|
||||
public class Key /*implements Comparable<Key> */ {
|
||||
// private final String value;
|
||||
//
|
||||
// Key(String value) {
|
||||
// this.value = value;
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public int compareTo(Key o) {
|
||||
// return Objects.compare(this.value, o.value);
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public boolean equals(Object o) {
|
||||
// if (this.equals(o)) {
|
||||
// return true;
|
||||
// }
|
||||
// if (o == null || getClass() != o.getClass()) {
|
||||
// return false;
|
||||
// }
|
||||
// Key key = (Key) o;
|
||||
// return Objects.equals(value, key.value);
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public int hashCode() {
|
||||
// return 0;
|
||||
// }
|
||||
}
|
|
@ -57,9 +57,6 @@ public class Tasks {
|
|||
f2.setIsMinimalRelFreScraper(true);
|
||||
StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb);
|
||||
|
||||
|
||||
// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f, useDb);
|
||||
|
||||
Collection<File> corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles();
|
||||
|
||||
final javafx.concurrent.Task<Void> task = new javafx.concurrent.Task<Void>() {
|
||||
|
@ -97,10 +94,6 @@ public class Tasks {
|
|||
}
|
||||
this.updateProgress(i, corpusSize);
|
||||
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
||||
// if (isCancelled()) {
|
||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
// break;
|
||||
// }
|
||||
} else {
|
||||
xml_processing.progressBarListener = new InvalidationListener() {
|
||||
int remainingSeconds = -1;
|
||||
|
@ -112,10 +105,6 @@ public class Tasks {
|
|||
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
||||
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
||||
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
||||
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
|
||||
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
|
||||
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
|
||||
// System.out.println(remainingSeconds);
|
||||
previousTime = new Date();
|
||||
}
|
||||
xml_processing.isCancelled = isCancelled();
|
||||
|
@ -138,7 +127,6 @@ public class Tasks {
|
|||
|
||||
// add remaining minRelFre results
|
||||
if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) {
|
||||
// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
|
||||
long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue();
|
||||
double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
||||
|
||||
|
@ -151,8 +139,6 @@ public class Tasks {
|
|||
for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){
|
||||
statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
|
||||
}
|
||||
|
||||
// System.out.println("asd");
|
||||
}
|
||||
|
||||
return null;
|
||||
|
@ -174,7 +160,6 @@ public class Tasks {
|
|||
logger.error("Error while executing", e);
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
cancel.setVisible(false);
|
||||
|
@ -184,7 +169,6 @@ public class Tasks {
|
|||
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
cancel.setVisible(false);
|
||||
|
@ -215,19 +199,6 @@ public class Tasks {
|
|||
if(multipleFiles){
|
||||
cancel.setVisible(true);
|
||||
}
|
||||
|
||||
|
||||
// int i = corpusFiles.size();
|
||||
// Date startTime = new Date();
|
||||
// Date previousTime = new Date();
|
||||
// int remainingSeconds = -1;
|
||||
// int corpusSize;
|
||||
// if (statistic.getFilter().getCollocability().size() > 0) {
|
||||
// corpusSize = corpusFiles.size() * 2;
|
||||
// } else {
|
||||
// corpusSize = corpusFiles.size();
|
||||
// }
|
||||
|
||||
Date startTime = new Date();
|
||||
Date previousTime = new Date();
|
||||
int remainingSeconds = -1;
|
||||
|
@ -264,13 +235,6 @@ public class Tasks {
|
|||
this.updateProgress(i, corpusSize);
|
||||
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
||||
|
||||
// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
|
||||
// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
|
||||
// previousTime = new Date();
|
||||
// }
|
||||
// this.updateProgress(i, corpusSize);
|
||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
||||
|
||||
} else {
|
||||
xml_processing.progressBarListener = new InvalidationListener() {
|
||||
int remainingSeconds = -1;
|
||||
|
@ -282,10 +246,6 @@ public class Tasks {
|
|||
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
||||
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
||||
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
||||
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
|
||||
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
|
||||
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
|
||||
// System.out.println(remainingSeconds);
|
||||
previousTime = new Date();
|
||||
}
|
||||
xml_processing.isCancelled = isCancelled();
|
||||
|
@ -304,24 +264,9 @@ public class Tasks {
|
|||
if(!(multipleFiles)){
|
||||
cancel.setVisible(false);
|
||||
}
|
||||
// readXML(f.toString(), statistic);
|
||||
// i++;
|
||||
// if (isCancelled()) {
|
||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
// break;
|
||||
// }
|
||||
// if (statistic.getFilter().getCollocability().size() > 0) {
|
||||
// this.updateProgress(i, corpusFiles.size() * 2);
|
||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
|
||||
// } else {
|
||||
// this.updateProgress(i, corpusFiles.size());
|
||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
|
||||
// }
|
||||
//// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
|
||||
}
|
||||
// if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing
|
||||
if (statistic.getFilter().getMinimalRelFre() > 1){
|
||||
// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() +
|
||||
long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue();
|
||||
double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;
|
||||
|
||||
|
@ -356,7 +301,6 @@ public class Tasks {
|
|||
|
||||
} else {
|
||||
try {
|
||||
// System.out.print(statistics);
|
||||
boolean successullySaved = statistic.saveResultToDisk();
|
||||
if (successullySaved) {
|
||||
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
||||
|
@ -371,7 +315,6 @@ public class Tasks {
|
|||
logger.error("Out of memory error", e1);
|
||||
}
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
cancel.setVisible(false);
|
||||
|
@ -385,7 +328,6 @@ public class Tasks {
|
|||
logger.error("Error while executing", e);
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
cancel.setVisible(false);
|
||||
|
@ -395,7 +337,6 @@ public class Tasks {
|
|||
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
cancel.setVisible(false);
|
||||
|
@ -421,17 +362,9 @@ public class Tasks {
|
|||
if(multipleFiles){
|
||||
cancel.setVisible(true);
|
||||
}
|
||||
// int i = corpusFiles.size();
|
||||
Date startTime = new Date();
|
||||
Date previousTime = new Date();
|
||||
int remainingSeconds = -1;
|
||||
// int corpusSize;
|
||||
// if (statistic.getFilter().getCollocability().size() > 0) {
|
||||
// corpusSize = corpusFiles.size() * 2;
|
||||
// } else {
|
||||
// corpusSize = corpusFiles.size();
|
||||
// }
|
||||
|
||||
|
||||
int corpusSize;
|
||||
int i;
|
||||
|
@ -461,10 +394,6 @@ public class Tasks {
|
|||
}
|
||||
this.updateProgress(i, corpusSize);
|
||||
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
|
||||
// if (isCancelled()) {
|
||||
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
// break;
|
||||
// }
|
||||
} else {
|
||||
xml_processing.progressBarListener = new InvalidationListener() {
|
||||
int remainingSeconds = -1;
|
||||
|
@ -476,10 +405,6 @@ public class Tasks {
|
|||
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
|
||||
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
|
||||
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
|
||||
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
|
||||
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)));
|
||||
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
|
||||
// System.out.println(remainingSeconds);
|
||||
previousTime = new Date();
|
||||
}
|
||||
xml_processing.isCancelled = isCancelled();
|
||||
|
@ -497,14 +422,6 @@ public class Tasks {
|
|||
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
|
||||
break;
|
||||
}
|
||||
// readXML(f.toString(), statisticsOneGrams);
|
||||
// i++;
|
||||
// this.updateProgress(i, corpusFiles.size() * 2);
|
||||
// if (statistic.getFilter().getCollocability().size() > 0) {
|
||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
|
||||
// } else {
|
||||
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
|
||||
// }
|
||||
}
|
||||
|
||||
return null;
|
||||
|
@ -517,7 +434,6 @@ public class Tasks {
|
|||
task.setOnSucceeded(e -> {
|
||||
try {
|
||||
System.out.print(statistic);
|
||||
// calculate_collocabilities(statistic, statisticsOneGrams);
|
||||
statistic.updateCalculateCollocabilities(statisticsOneGrams);
|
||||
boolean successullySaved = statistic.saveResultToDisk();
|
||||
if (successullySaved) {
|
||||
|
@ -532,21 +448,6 @@ public class Tasks {
|
|||
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
|
||||
logger.error("Out of memory error", e1);
|
||||
}
|
||||
// try {
|
||||
// boolean successullySaved = statistic.saveResultToDisk();
|
||||
// if (successullySaved) {
|
||||
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||
// } else {
|
||||
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
||||
// }
|
||||
// } catch (UnsupportedEncodingException e1) {
|
||||
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||
// logger.error("Error while saving", e1);
|
||||
// } catch (OutOfMemoryError e1){
|
||||
// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
||||
// logger.error("Out of memory error", e1);
|
||||
// }
|
||||
//
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
progressLabel.textProperty().unbind();
|
||||
|
@ -559,7 +460,6 @@ public class Tasks {
|
|||
logger.error("Error while executing", e);
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
cancel.setVisible(false);
|
||||
|
@ -569,7 +469,6 @@ public class Tasks {
|
|||
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"));
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
cancel.setVisible(false);
|
||||
|
@ -578,7 +477,6 @@ public class Tasks {
|
|||
// When cancel button is pressed cancel analysis
|
||||
cancel.setOnAction(e -> {
|
||||
task.cancel();
|
||||
// logger.info("cancel button");
|
||||
});
|
||||
return task;
|
||||
}
|
||||
|
|
|
@ -49,15 +49,4 @@ public class TimeWatch {
|
|||
|
||||
return "Elapsed Time in nano seconds: ";
|
||||
}
|
||||
|
||||
private void exampleUsage() {
|
||||
TimeWatch watch = TimeWatch.start();
|
||||
|
||||
// do something...
|
||||
|
||||
System.out.println("Elapsed Time custom format: " + watch.toMinuteSeconds());
|
||||
System.out.println("Elapsed Time in seconds: " + watch.time(TimeUnit.SECONDS));
|
||||
System.out.println("Elapsed Time in nano seconds: " + watch.time());
|
||||
|
||||
}
|
||||
}
|
|
@ -20,22 +20,6 @@ import gui.ValidationUtil;
|
|||
public class Util {
|
||||
public final static Logger logger = LogManager.getLogger(Util.class);
|
||||
|
||||
|
||||
public static String toReadableTime(long time) {
|
||||
long hours = time(TimeUnit.HOURS, time);
|
||||
long minutes = time(TimeUnit.MINUTES, time) - TimeUnit.HOURS.toMinutes(hours);
|
||||
long seconds = time(TimeUnit.SECONDS, time) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
|
||||
long milliseconds = time(TimeUnit.MILLISECONDS, time) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
|
||||
long microseconds = time(TimeUnit.MICROSECONDS, time) - TimeUnit.HOURS.toMicros(hours) - TimeUnit.MINUTES.toMicros(minutes) - TimeUnit.SECONDS.toMicros(seconds) - TimeUnit.MILLISECONDS.toMicros(milliseconds);
|
||||
long nanoseconds = time(TimeUnit.NANOSECONDS, time) - TimeUnit.HOURS.toNanos(hours) - TimeUnit.MINUTES.toNanos(minutes) - TimeUnit.SECONDS.toNanos(seconds) - TimeUnit.MILLISECONDS.toNanos(milliseconds) - TimeUnit.MICROSECONDS.toNanos(microseconds);
|
||||
|
||||
return String.format("%d h, %d min, %d s, %d ms, %d µs, %d ns", hours, minutes, seconds, milliseconds, microseconds, nanoseconds);
|
||||
}
|
||||
|
||||
private static long time(TimeUnit unit, long t) {
|
||||
return unit.convert(t, TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a number to a more readable format.
|
||||
* 12345 -> 12.345
|
||||
|
@ -97,12 +81,6 @@ public class Util {
|
|||
return types.contains(o.getClass());
|
||||
}
|
||||
|
||||
public static <K, V> void printMap(Map<K, V> map) {
|
||||
System.out.println("\nkey: value");
|
||||
map.forEach((k, v) -> System.out.print(String.format("%s:\t %,8d%n", k, v)));
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic map converter -> since AtomicLongs aren't as comparable.
|
||||
* Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
|
||||
|
@ -117,23 +95,6 @@ public class Util {
|
|||
return m;
|
||||
}
|
||||
|
||||
public class ValueThenKeyComparator<K extends Comparable<? super K>,
|
||||
V extends Comparable<? super V>>
|
||||
implements Comparator<Map.Entry<K, V>> {
|
||||
|
||||
public int compare(Map.Entry<K, V> a, Map.Entry<K, V> b) {
|
||||
int cmp1 = a.getValue().compareTo(b.getValue());
|
||||
if (cmp1 != 0) {
|
||||
return cmp1;
|
||||
} else {
|
||||
return a.getKey().compareTo(b.getKey());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Sorts a map in a descending order by value.
|
||||
|
@ -183,25 +144,6 @@ public class Util {
|
|||
return result;
|
||||
}
|
||||
|
||||
public static <K, V> void printMap(Map<K, Integer> map, String title, int number_of_words) {
|
||||
System.out.println(String.format("\n%s\n------------\nkey: value\tpercent", title));
|
||||
map.forEach((k, v) ->
|
||||
System.out.println(String.format("%s:\t %s\t %s%%",
|
||||
k,
|
||||
Util.formatNumberReadable(v),
|
||||
Util.formatNumberReadable((double) v / number_of_words * 100))));
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
static long mapSumFrequencies(Map<MultipleHMKeys, Long> map) {
|
||||
long sum = 0;
|
||||
|
||||
for (long value : map.values()) {
|
||||
sum += value;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for passing optional integer values for sorting.
|
||||
|
|
|
@ -84,16 +84,6 @@ public class RDB {
|
|||
}
|
||||
}
|
||||
|
||||
// public byte[] atomicIntToByteArray(final AtomicLong i) {
|
||||
// BigInteger bigInt = BigInteger.valueOf(i.intValue());
|
||||
//
|
||||
// return bigInt.toByteArray();
|
||||
// }
|
||||
|
||||
public RocksDB getDb() {
|
||||
return db;
|
||||
}
|
||||
|
||||
public Map<String, AtomicLong> getDump() throws UnsupportedEncodingException {
|
||||
Map<String, AtomicLong> dump = new HashMap<>();
|
||||
RocksDB.loadLibrary();
|
||||
|
|
|
@ -34,17 +34,6 @@
|
|||
<ImageView fx:id="displayTaxonomyI" layoutX="370.0" layoutY="107.5" pickOnBounds="true" preserveRatio="true">
|
||||
<Image url="questionmark.png" backgroundLoading="true"/>
|
||||
</ImageView>
|
||||
<!--<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" />-->
|
||||
<!--<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD" />-->
|
||||
<!--<TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0" />-->
|
||||
<!--<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija" />-->
|
||||
<!--<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0" />-->
|
||||
|
||||
<!--<Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" />-->
|
||||
<!--<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />-->
|
||||
|
||||
<!--<Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" />-->
|
||||
<!--<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />-->
|
||||
|
||||
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0">
|
||||
<children>
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
fx:controller="gui.CorpusTab">
|
||||
<children>
|
||||
<Pane/>
|
||||
<!--<TextField fx:id="stringLengthTF" layoutX="225.0" layoutY="20.0" prefWidth="140.0" />-->
|
||||
<Label fx:id="chooseCorpusLocationL" layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Nastavi lokacijo korpusa" />
|
||||
<Button fx:id="chooseCorpusLocationB" layoutX="225.0" layoutY="20.0" prefWidth="140.0" mnemonicParsing="false"/>
|
||||
<ImageView fx:id="chooseCorpusLocationI" layoutX="370.0" layoutY="27.5" pickOnBounds="true" preserveRatio="true">
|
||||
|
@ -26,7 +25,6 @@
|
|||
<Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0">
|
||||
<children>
|
||||
<Label fx:id="chooseCorpusL" prefHeight="70.0" prefWidth="704.0" text="Label"/>
|
||||
<!--<CheckBox fx:id="gosUseOrthChB" layoutY="65.0" mnemonicParsing="false" text="Uporabi pogovorni zapis"/>-->
|
||||
</children>
|
||||
</Pane>
|
||||
<ProgressIndicator fx:id="locationScanPI" layoutX="10.0" layoutY="60.0" prefHeight="50.0" progress="0.0"/>
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
<?import org.controlsfx.control.CheckComboBox?>
|
||||
|
||||
<?import javafx.scene.control.Button?>
|
||||
<?import javafx.scene.control.TextField?>
|
||||
<?import javafx.scene.control.TextArea?>
|
||||
<AnchorPane fx:id="solarFiltersTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
||||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.FiltersForSolar">
|
||||
|
@ -31,7 +30,6 @@
|
|||
<!-- MSD and Taxonomy separated -->
|
||||
<Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />
|
||||
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="100.0" prefHeight="340.0" maxHeight="200.0" prefWidth="275.0" text=" " wrapText="true" editable="false"/>
|
||||
<!--<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />-->
|
||||
</Pane>
|
||||
|
||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0"/>
|
||||
|
|
|
@ -110,11 +110,6 @@
|
|||
<TextArea fx:id="selectedFiltersTextArea" layoutX="10.0" layoutY="380.0" prefHeight="95.0" maxHeight="95.0" prefWidth="360.0" text=" " wrapText="true" editable="false"/>
|
||||
</Pane>
|
||||
|
||||
<!--<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">-->
|
||||
<!--<Label fx:id="solarFilters" layoutX="10.0" layoutY="60.0" text="Izbrani filtri:" />-->
|
||||
<!--<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="100.0" prefHeight="340.0" prefWidth="275.0" text=" " wrapText="true" />-->
|
||||
<!--</Pane>-->
|
||||
|
||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
|
||||
<Button fx:id="changeLanguageB" layoutX="710.0" layoutY="40.0" mnemonicParsing="false" prefWidth="50.0"/>
|
||||
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
<?import javafx.scene.layout.Pane?>
|
||||
<?import org.controlsfx.control.CheckComboBox?>
|
||||
|
||||
<?import javafx.scene.control.Separator?>
|
||||
<?import javafx.scene.control.TextArea?>
|
||||
<?import javafx.scene.image.ImageView?>
|
||||
<?import javafx.scene.image.Image?>
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<?import org.controlsfx.control.CheckComboBox?>
|
||||
<?import javafx.scene.control.*?>
|
||||
<?import javafx.scene.layout.AnchorPane?>
|
||||
<?import javafx.scene.layout.Pane?>
|
||||
<AnchorPane fx:id="wordAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
||||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordFormationTab">
|
||||
<Pane>
|
||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
|
||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. pojavitev" />
|
||||
<TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="100.0" prefWidth="180.0" />
|
||||
|
||||
<Button fx:id="computeB" layoutX="10.0" layoutY="422.0" mnemonicParsing="false"
|
||||
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
||||
</Pane>
|
||||
|
||||
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
|
||||
text=" " wrapText="true"/>
|
||||
|
||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
|
||||
|
||||
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
|
||||
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
|
||||
|
||||
</AnchorPane>
|
|
@ -23,9 +23,9 @@ public class CorpusTests {
|
|||
|
||||
File f = Settings.corpus.iterator().next();
|
||||
|
||||
Statistics stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, 0, CalculateFor.WORD);
|
||||
// stats.setCorpusType(CorpusType.GOS);
|
||||
stats.setCorpusType(CorpusType.SOLAR);
|
||||
// Statistics stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, 0, CalculateFor.WORD);
|
||||
// // stats.setCorpusType(CorpusType.GOS);
|
||||
// stats.setCorpusType(CorpusType.SOLAR);
|
||||
|
||||
// XML_processing.readXMLGos(f.toString(), stats);
|
||||
// XML_processing.readXML(f.toString(), stats);
|
||||
|
@ -33,10 +33,10 @@ public class CorpusTests {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test() {
|
||||
ObservableList<String> var = GosTaxonomy.getForComboBox();
|
||||
String debug = "";
|
||||
|
||||
}
|
||||
// @Test
|
||||
// public void test() {
|
||||
// ObservableList<String> var = GosTaxonomy.getForComboBox();
|
||||
// String debug = "";
|
||||
//
|
||||
// }
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user