diff --git a/src/main/java/alg/Common.java b/src/main/java/alg/Common.java deleted file mode 100755 index 5bbd0a8..0000000 --- a/src/main/java/alg/Common.java +++ /dev/null @@ -1,15 +0,0 @@ -package alg; - -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; - -public class Common { - public static void updateMap(Map map, K o) { - // if not in map - AtomicLong r = map.putIfAbsent(o, new AtomicLong(1)); - - // else - if (r != null) - map.get(o).incrementAndGet(); - } -} diff --git a/src/main/java/alg/XML_processing.java b/src/main/java/alg/XML_processing.java index dad180c..8634427 100755 --- a/src/main/java/alg/XML_processing.java +++ b/src/main/java/alg/XML_processing.java @@ -19,7 +19,6 @@ import gui.I18N; import javafx.beans.InvalidationListener; import javafx.beans.property.ReadOnlyDoubleProperty; import javafx.beans.property.ReadOnlyDoubleWrapper; -import javafx.concurrent.Task; import org.apache.commons.io.FileUtils; import org.apache.commons.io.LineIterator; import org.apache.logging.log4j.LogManager; @@ -38,35 +37,10 @@ public class XML_processing { public static boolean isCollocability = false; public static InvalidationListener progressBarListener; - public double getProgress() { - return progressProperty().get(); - } - public ReadOnlyDoubleProperty progressProperty() { return progress ; } - // public static void processCorpus(Statistics stats) { - // // we can preset the list's size, so there won't be a need to resize it - // List corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); - // - // int i = 0; - // for (File f : Settings.corpus) { - // i++; - // readXML(f.toString(), stats); - // } - // } - - // public static void readXML(String path, Statistics stats) { - // if (stats.getCorpusType() == CorpusType.GIGAFIDA) { - // readXMLGigafida(path, stats); - // } else if (stats.getCorpusType() == CorpusType.GOS) { - // readXMLGos(path, stats); - // } else if (stats.getCorpusType() == CorpusType.SOLAR) { - // readXMLSolar(path, stats); - // } - // } - public static boolean readXML(String path, StatisticsNew stats) { if (stats.getCorpus().getCorpusType() == CorpusType.GIGAFIDA || stats.getCorpus().getCorpusType() == CorpusType.CCKRES) { @@ -81,7 +55,6 @@ public class XML_processing { } else if (stats.getCorpus().getCorpusType() == CorpusType.VERT) { return readVERT(path, stats); } -// task.updateProgress(fileNum, size); return false; } @@ -174,15 +147,10 @@ public class XML_processing { } else if (stats.getFilter().getAl() == AnalysisLevel.WORD_LEVEL) { alg.word.ForkJoin wc = new alg.word.ForkJoin(corpus, stats); pool.invoke(wc); - } else { - // TODO: - // alg.inflectedJOS.ForkJoin wc = new alg.inflectedJOS.ForkJoin(corpus, stats); - // pool.invoke(wc); } // if running with minimalRelFre frequency erase all ngrams with occurrences lower than set value per 1M if(stats.getFilter().getIsMinimalRelFreScraper()) { -// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() + long countFor1MWords = stats.getUniGramOccurrences().get(stats.getCorpus().getTotal()).longValue(); if(countFor1MWords > 1000000L){ double absToRelFactor = (stats.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; @@ -197,125 +165,9 @@ public class XML_processing { stats.getUniGramOccurrences().put(taxonomy, new AtomicLong(0)); } } -// System.out.println("asd"); } } - // public static void readXMLGos(String path, Statistics stats) { - // boolean in_word = false; - // String taksonomija = ""; - // String lemma = ""; - // String msd = ""; - // String type = stats.isGosOrthMode() ? "orth" : "norm"; // orth & norm - // - // List stavek = new ArrayList<>(); - // List corpus = new ArrayList<>(); - // String sentenceDelimiter = "seg"; - // String taxonomyPrefix = "gos."; - // - // try { - // XMLInputFactory factory = XMLInputFactory.newInstance(); - // XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path)); - // - // while (eventReader.hasNext()) { - // XMLEvent event = eventReader.nextEvent(); - // - // switch (event.getEventType()) { - // case XMLStreamConstants.START_ELEMENT: - // - // StartElement startElement = event.asStartElement(); - // String qName = startElement.getName().getLocalPart(); - // - // // "word" node - // if (qName.equals("w")) { - // in_word = true; - // - // if (type.equals("norm")) { - // // make sure we're looking at and not - // Iterator var = startElement.getAttributes(); - // ArrayList attributes = new ArrayList<>(); - // while (var.hasNext()) { - // attributes.add(var.next()); - // } - // - // if (attributes.contains("msd")) { - // msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue()); - // } else { - // msd = null; - // } - // - // if (attributes.contains("lemma")) { - // lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue()); - // } - // } - // } - // // taxonomy node - // else if (qName.equalsIgnoreCase("catRef")) { - // // there are some term nodes at the beginning that are of no interest to us - // // they differ by not having the attribute "ref", so test will equal null - // Attribute test = startElement.getAttributeByName(QName.valueOf("target")); - // - // if (test != null) { - // // keep only taxonomy properties - // taksonomija = String.valueOf(test.getValue()).replace(taxonomyPrefix, ""); - // } - // } else if (qName.equalsIgnoreCase("div")) { - // type = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue()); - // - // } - // break; - // - // case XMLStreamConstants.CHARACTERS: - // Characters characters = event.asCharacters(); - // - // // "word" node value - // if (in_word) { - // if (type.equals("norm") && msd != null) { - // stavek.add(new Word(characters.getData(), lemma, msd)); - // } else { - // stavek.add(new Word(characters.getData())); - // } - // - // in_word = false; - // } - // break; - // - // case XMLStreamConstants.END_ELEMENT: - // EndElement endElement = event.asEndElement(); - // - // // parser reached end of the current sentence - // if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) { - // // add sentence to corpus - // corpus.add(new Sentence(stavek, taksonomija, type)); - // // and start a new one - // stavek = new ArrayList<>(); - // - // /* Invoke Fork-Join when we reach maximum limit of - // * sentences (because we can't read everything to - // * memory) or we reach the end of the file. - // */ - // if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) { - // fj(corpus, stats); - // // empty the current corpus, since we don't need - // // the data anymore - // corpus.clear(); - // } - // } - // - // // backup - // if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) { - // fj(corpus, stats); - // corpus.clear(); - // } - // - // break; - // } - // } - // } catch (FileNotFoundException | XMLStreamException e) { - // e.printStackTrace(); - // } - // } - @SuppressWarnings("unused") public static boolean readXMLSolar(String path, StatisticsNew stats) { boolean in_word = false; @@ -327,7 +179,6 @@ public class XML_processing { List corpus = new ArrayList<>(); // used for filter -// Set headTags = new HashSet<>(Arrays.asList("sola", "predmet", "razred", "regija", "tip", "leto")); Set headTags = new HashSet<>(Arrays.asList(SOLA, PREDMET, RAZRED, REGIJA, TIP, LETO)); Map headBlock = null; boolean includeThisBlock = false; @@ -372,9 +223,7 @@ public class XML_processing { switch (event.getEventType()) { case XMLStreamConstants.START_ELEMENT: - StartElement startElement = event.asStartElement(); - // System.out.println(String.format("%s", startElement.toString())); String qName = startElement.getName().getLocalPart(); // "word" node @@ -423,7 +272,7 @@ public class XML_processing { stavek = new ArrayList<>(); } else if (qName.equals("head")) { headBlock = new HashMap<>(); - } else { // if (headTags.contains(qName)) { + } else { boolean inHeadTags = false; String headTag = ""; for (String tag : headTags){ @@ -436,8 +285,6 @@ public class XML_processing { if(inHeadTags) { String tagContent = eventReader.nextEvent().asCharacters().getData(); headBlock.put(headTag, tagContent); -// String tagContent = xmlEventReader.nextEvent().asCharacters().getData(); -// resultFilters.get(headTag).add(tagContent); } @@ -562,22 +409,16 @@ public class XML_processing { if (line.length() > 4 && line.substring(1, 5).equals("text")) { // split over "\" " String[] split = line.split("\" "); -// String mediumId = ""; -// String typeId = ""; -// String proofreadId = ""; boolean idsPresent = false; for (String el : split) { String[] attribute = el.split("=\""); if (attribute[0].equals("medium_id")) { -// mediumId = attribute[1]; idsPresent = true; resultTaxonomy.add(attribute[1]); } else if (attribute[0].equals("type_id")) { -// typeId = attribute[1]; idsPresent = true; resultTaxonomy.add(attribute[1]); } else if (attribute[0].equals("proofread_id")) { -// proofreadId = attribute[1]; idsPresent = true; resultTaxonomy.add(attribute[1]); } @@ -586,13 +427,10 @@ public class XML_processing { for (String el : split) { String[] attribute = el.split("=\""); if (attribute[0].equals("medium")) { -// mediumId = attribute[1]; resultTaxonomy.add(attribute[1]); } else if (attribute[0].equals("type")) { -// typeId = attribute[1]; resultTaxonomy.add(attribute[1]); } else if (attribute[0].equals("proofread")) { -// proofreadId = attribute[1]; resultTaxonomy.add(attribute[1]); } } @@ -679,7 +517,6 @@ public class XML_processing { resultTaxonomy.add(tax); // solar -// } else if (!parseTaxonomy && headTags.contains(elementName)) { } else if (!parseTaxonomy) { boolean inHeadTags = false; String headTag = ""; @@ -737,7 +574,6 @@ public class XML_processing { boolean inPunctuation = false; boolean taxonomyMatch = true; ArrayList currentFiletaxonomy = new ArrayList<>(); -// ArrayList currentFiletaxonomyLong = new ArrayList<>(); String lemma = ""; String msd = ""; @@ -780,8 +616,6 @@ public class XML_processing { // keep only taxonomy properties Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""), stats.getCorpus()); currentFiletaxonomy.add(currentFiletaxonomyElement); - Tax taxonomy = new Tax(); -// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); } } break; @@ -795,40 +629,13 @@ public class XML_processing { sentence.add(createWord(word, lemma, msd, word, stats.getFilter())); inWord = false; } -// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) { if (stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) { String punctuation = characters.getData(); sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter())); inPunctuation = false; - -// String punctuation = ","; -// -// sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation); -// sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation); -// sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation); -// inPunctuation = false; } break; -// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) { -// String actualPunctuation = characters.getData(); -// if (actualPunctuation.equals(".") || actualPunctuation.equals("!") || actualPunctuation.equals("?") || actualPunctuation.equals("...")) -// break; -// String punctuation = ","; -// int skip_number = 0; -// if (!ValidationUtil.isEmpty(stats.getFilter().getSkipValue())){ -// skip_number = stats.getFilter().getSkipValue(); -// } -// for(int i = 1; i < skip_number + 2; i ++){ -// if (i < sentence.size() && !sentence.get(sentence.size() - i).equals(punctuation)) { -// sentence.get(sentence.size() - i).setWord(sentence.get(sentence.size() - i).getWord() + punctuation); -// sentence.get(sentence.size() - i).setLemma(sentence.get(sentence.size() - i).getLemma() + punctuation); -// sentence.get(sentence.size() - i).setMsd(sentence.get(sentence.size() - i).getMsd() + punctuation); -// } -// } -// inPunctuation = false; -// } - case XMLStreamConstants.END_ELEMENT: EndElement endElement = event.asEndElement(); @@ -869,10 +676,6 @@ public class XML_processing { fj(corpus, stats); // empty the current corpus, since we don't need the data anymore corpus.clear(); - - // TODO: if (stats.isUseDB()) { - // stats.storeTmpResultsToDB(); - // } } } else if (endElement.getName().getLocalPart().equals("teiHeader")) { // before proceeding to read this file, make sure that taxonomy filters are a match @@ -883,7 +686,6 @@ public class XML_processing { if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) { // taxonomies don't match so stop // union (select words that match any of selected taxonomy -// return false; taxonomyMatch = false; // } else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){ @@ -898,10 +700,6 @@ public class XML_processing { // join corpus and stats fj(corpus, stats); corpus.clear(); - - // TODO: if (stats.isUseDB()) { - // stats.storeTmpResultsToDB(); - // } } break; @@ -909,7 +707,6 @@ public class XML_processing { } } catch (FileNotFoundException | XMLStreamException e) { throw new java.lang.RuntimeException("XMLStreamException | FileNotFoundException"); -// e.printStackTrace(); } finally { if (eventReader != null) { try { @@ -929,7 +726,6 @@ public class XML_processing { boolean inPunctuation = false; boolean taxonomyMatch = true; ArrayList currentFiletaxonomy = new ArrayList<>(); -// ArrayList currentFiletaxonomyLong = new ArrayList<>(); String lemma = ""; String msd = ""; @@ -1006,8 +802,6 @@ public class XML_processing { // keep only taxonomy properties Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""), stats.getCorpus()); currentFiletaxonomy.add(currentFiletaxonomyElement); -// Tax taxonomy = new Tax(); -// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); } } else if (stats.getCorpus().getTaxonomy().size() > 0 && qName.equalsIgnoreCase("catRef")) { // get value from attribute target @@ -1017,41 +811,7 @@ public class XML_processing { // keep only taxonomy properties Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).split(":")[1], stats.getCorpus()); currentFiletaxonomy.add(currentFiletaxonomyElement); -// Tax taxonomy = new Tax(); -// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); } - - - - - -// if (parseTaxonomy && elementName.equalsIgnoreCase("catRef")) { -// HashMap atts = extractAttributes(startElement); -// String debug = ""; -// -// String tax = startElement.getAttributeByName(QName.valueOf("target")) -// .getValue() -// .replace("#", ""); -// -// if (tax.indexOf(':') >= 0) { -// tax = tax.split(":")[1]; -// } -// resultTaxonomy.add(tax); -// } else if (parseTaxonomy && elementName.equalsIgnoreCase("term")) { -// String tax = startElement.getAttributeByName(QName.valueOf("ref")) -// .getValue() -// .replace("#", ""); -// -// resultTaxonomy.add(tax); -// } else if (!parseTaxonomy && headTags.contains(elementName)) { -// String tagContent = xmlEventReader.nextEvent().asCharacters().getData(); -// resultFilters.get(elementName).add(tagContent); -// } - - - - - } else if (qName.equals("bibl")) { // before proceeding to read this file, make sure that taxonomy filters are a match taxonomyMatch = true; @@ -1068,14 +828,10 @@ public class XML_processing { // "word" node value if (inWord) { String word = characters.getData(); -// if (word.equals("Banovec")){ -// System.out.println("Test"); -// } sentence.add(createWord(word, lemma, msd, word, stats.getFilter())); inWord = false; } if (stats.getFilter().getNotePunctuations() && inPunctuation) { -// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) { String punctuation = characters.getData(); sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter())); inPunctuation = false; @@ -1085,9 +841,6 @@ public class XML_processing { case XMLStreamConstants.END_ELEMENT: EndElement endElement = event.asEndElement(); - String var = endElement.getName().getLocalPart(); - String debug = ""; - // parser reached end of the current sentence if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) { if (stats.getFilter().getNgramValue() == 0){ @@ -1119,10 +872,6 @@ public class XML_processing { fj(corpus, stats); // empty the current corpus, since we don't need the data anymore corpus.clear(); - - // TODO: if (stats.isUseDB()) { - // stats.storeTmpResultsToDB(); - // } } } // fallback @@ -1133,7 +882,6 @@ public class XML_processing { corpus.clear(); currentFiletaxonomy = new ArrayList<>(); -// currentFiletaxonomyLong = new ArrayList<>(); } else if (endElement.getName().getLocalPart().equals("bibl")) { // before proceeding to read this file, make sure that taxonomy filters are a match @@ -1143,7 +891,6 @@ public class XML_processing { if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) { // taxonomies don't match so stop // union (select words that match any of selected taxonomy -// return false; taxonomyMatch = false; // } else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){ @@ -1162,10 +909,6 @@ public class XML_processing { fj(corpus, stats); // empty the current corpus, since we don't need the data anymore corpus.clear(); - - // TODO: if (stats.isUseDB()) { - // stats.storeTmpResultsToDB(); - // } } } catch (FileNotFoundException | XMLStreamException e) { e.printStackTrace(); @@ -1185,12 +928,9 @@ public class XML_processing { @SuppressWarnings("Duplicates") public static boolean readXMLGos(String path, StatisticsNew stats) { boolean inWord = false; - boolean inPunctuation = false; boolean inOrthDiv = false; - boolean computeForOrth = stats.getCorpus().isGosOrthMode(); boolean inSeparatedWord = false; ArrayList currentFiletaxonomy = new ArrayList<>(); -// ArrayList currentFiletaxonomyLong = new ArrayList<>(); String lemma = ""; String msd = ""; @@ -1201,10 +941,6 @@ public class XML_processing { String sentenceDelimiter = "seg"; int wordIndex = 0; - String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm - - - int numLines = 0; int lineNum = 0; progress.set(0.0); @@ -1248,7 +984,6 @@ public class XML_processing { } lineNum ++; XMLEvent event = eventReader.nextEvent(); - // System.out.print(String.format("%s", event.toString().replaceAll("\\['http://www.tei-c.org/ns/1.0'\\]::", ""))); switch (event.getEventType()) { case XMLStreamConstants.START_ELEMENT: @@ -1278,11 +1013,6 @@ public class XML_processing { if (atts.containsKey("lemma")) { lemma = atts.get("lemma"); } - // - // if (!inOrthDiv) { - // msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue()); - // lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue()); - // } } else if (atts.containsKey("type") && atts.get("type").equals("separated")) { inSeparatedWord = true; } @@ -1299,11 +1029,7 @@ public class XML_processing { // keep only taxonomy properties Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()), stats.getCorpus()); currentFiletaxonomy.add(currentFiletaxonomyElement); -// Tax taxonomy = new Tax(); -// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); } - } else if (qName.equalsIgnoreCase("div")) { - gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue()); } else if (qName.equalsIgnoreCase("seg")) { HashMap atts = extractAttributes(startElement); @@ -1322,20 +1048,14 @@ public class XML_processing { case XMLStreamConstants.CHARACTERS: // "word" node value if (inWord) { -// if (GOSCorpusHMKey.equals("gos.028-0108.norm") && wordIndex > 8){ -// System.out.println(wordIndex); -// } // if algorithm is in orthodox part add new word to sentence if (inOrthDiv){ -// GOSCorpusHM.put(GOSCorpusHMKey, sentence); String word = ""; Characters characters = event.asCharacters(); sentence.add(createWord(characters.getData(), "", "", "", stats.getFilter())); // if algorithm is in normalized part find orthodox word and add other info to it } else { Characters characters = event.asCharacters(); -// System.out.println(wordIndex); -// System.out.println(GOSCorpusHMKey + " " + lemma + " " + wordIndex); if (wordIndex < GOSCorpusHM.get(GOSCorpusHMKey).size()) { Word currentWord = GOSCorpusHM.get(GOSCorpusHMKey).get(wordIndex); currentWord.setLemma(lemma, stats.getFilter().getWordParts()); @@ -1349,9 +1069,7 @@ public class XML_processing { GOSCorpusHM.get(GOSCorpusHMKey).add(wordIndex, createWord(currentWord.getWord(stats.getFilter().getWordParts()), "", "", "", stats.getFilter())); } - } //else { -// System.out.println("Error"); -// } + } } } @@ -1393,17 +1111,7 @@ public class XML_processing { // add sentence to corpus if it passes filters if (includeFile && !ValidationUtil.isEmpty(sentence)) { -// for(Word w : sentence) { -// if (w.getW1().equals("")) { -// System.out.println("HERE!!!"); -// } -// } sentence = runFilters(sentence, stats.getFilter()); -// for(Word w : sentence) { -// if (w.getW1().equals("")) { -// System.out.println("HERE!!!"); -// } -// } corpus.add(new Sentence(sentence, currentFiletaxonomy)); } @@ -1430,21 +1138,12 @@ public class XML_processing { } else if (endElement.getName().getLocalPart().equals("teiHeader")) { // before proceeding to read this file, make sure that taxonomy filters are a match -// if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) { -// currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection -// -// // disregard this entry if taxonomies don't match -// includeFile = !currentFiletaxonomy.isEmpty(); -// -//// currentFiletaxonomy = new ArrayList<>(); -// } if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) { currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection if (stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.UNION")) && currentFiletaxonomy.isEmpty()) { // taxonomies don't match so stop // union (select words that match any of selected taxonomy -// return false; includeFile = false; // } else if(stats.getFilter().getTaxonomySetOperation().equals(I18N.get("taxonomySetOperation.INTERSECTION")) && currentFiletaxonomy.size() != stats.getFilter().getTaxonomy().size()){ @@ -1462,7 +1161,6 @@ public class XML_processing { corpus.clear(); currentFiletaxonomy = new ArrayList<>(); -// currentFiletaxonomyLong = new ArrayList<>(); } break; @@ -1488,9 +1186,6 @@ public class XML_processing { @SuppressWarnings("Duplicates") public static boolean readVERT(String path, StatisticsNew stats) { // taxonomy corpora -// HashSet resultTaxonomy = new HashSet<>(); - - // regi path String regiPath = path.substring(0, path.length()-4) + "regi"; @@ -1503,7 +1198,6 @@ public class XML_processing { // read regi file regiIt = FileUtils.lineIterator(new File(regiPath), "UTF-8"); try { - boolean insideHeader = false; int attributeIndex = 0; while (regiIt.hasNext()) { String line = regiIt.nextLine(); @@ -1534,7 +1228,6 @@ public class XML_processing { } } catch (IOException e) { throw new java.lang.RuntimeException("IOException"); -// e.printStackTrace(); } int numLines = 0; @@ -1556,7 +1249,6 @@ public class XML_processing { LineIterator it; ArrayList currentFiletaxonomy = new ArrayList<>(); - boolean inParagraph = false; boolean inSentence = false; boolean taxonomyMatch = true; int lineNum = 0; @@ -1572,8 +1264,6 @@ public class XML_processing { try { it = FileUtils.lineIterator(new File(path), "UTF-8"); try { - boolean insideHeader = false; - while (it.hasNext()) { int percentage = (int) (lineNum * 100.0 / numLines); if(progress.get() < percentage) { @@ -1596,7 +1286,6 @@ public class XML_processing { boolean proofread = false; for (String el : split) { String[] attribute = el.split("=\""); - boolean idsPresent = false; if (attribute[0].equals("medium_id") && !attribute[1].equals("-")) { Taxonomy currentFiletaxonomyElement = Taxonomy.factory(attribute[1], stats.getCorpus()); currentFiletaxonomy.add(currentFiletaxonomyElement); @@ -1639,12 +1328,6 @@ public class XML_processing { } } -// else if((line.length() >= 3 && line.substring(0, 2).equals("")) || -// (line.length() >= 3 && line.substring(0, 3).equals(""))){ -// inParagraph = true; -// } else if((line.length() == 4 && line.equals("

")) || (line.length() == 5 && line.equals(""))){ -// inParagraph = false; -// } else if(line.length() >= 3 && line.substring(0, 2).equals("")){ inSentence = true; } else if(line.length() == 4 && line.equals("")){ @@ -1677,10 +1360,7 @@ public class XML_processing { // and start a new one sentence = new ArrayList<>(); - -// corpus.add(new Sentence(sentence, currentFiletaxonomy)); } else if(!(line.charAt(0) == '<' && line.charAt(line.length() - 1) == '>') && inSentence){ -// } else if(!(line.charAt(0) == '<' && line.charAt(line.length() - 1) == '>') && inSentence && inParagraph){ String[] split = line.split("\t"); if(slovene) { if (split[lemmaIndex].length() > 2 && split[lemmaIndex].charAt(split[lemmaIndex].length() - 2) == '-' && Character.isAlphabetic(split[lemmaIndex].charAt(split[lemmaIndex].length() - 1)) && @@ -1721,7 +1401,6 @@ public class XML_processing { } catch (IOException e) { e.printStackTrace(); } -// resultTaxonomy.remove("-"); return true; } diff --git a/src/main/java/alg/inflectedJOS/ForkJoin.java b/src/main/java/alg/inflectedJOS/ForkJoin.java deleted file mode 100755 index 3da4eee..0000000 --- a/src/main/java/alg/inflectedJOS/ForkJoin.java +++ /dev/null @@ -1,67 +0,0 @@ -//package alg.inflectedJOS; -// -//import java.util.List; -//import java.util.concurrent.RecursiveAction; -// -//import data.Sentence; -//import data.Statistics; -// -//public class ForkJoin extends RecursiveAction { -// private static final long serialVersionUID = -1260951004477299634L; -// -// private static final int ACCEPTABLE_SIZE = 1000; -// private List corpus; -// private Statistics stats; -// private int start; -// private int end; -// -// -// /** -// * Constructor for subproblems. -// */ -// private ForkJoin(List corpus, int start, int end, Statistics stats) { -// this.corpus = corpus; -// this.start = start; -// this.end = end; -// this.stats = stats; -// } -// -// /** -// * Default constructor for the initial problem -// */ -// public ForkJoin(List corpus, Statistics stats) { -// this.corpus = corpus; -// this.start = 0; -// this.end = corpus.size(); -// this.stats = stats; -// } -// -// private void computeDirectly() { -// List subCorpus = corpus.subList(start, end); -// -// if (stats.isTaxonomySet()) { -// InflectedJOSCount.calculateForAll(subCorpus, stats, stats.getInflectedJosTaxonomy()); -// } else { -// InflectedJOSCount.calculateForAll(subCorpus, stats, null); -// } -// } -// -// @Override -// protected void compute() { -// int subCorpusSize = end - start; -// -// if (subCorpusSize < ACCEPTABLE_SIZE) { -// computeDirectly(); -// } else { -// int mid = start + subCorpusSize / 2; -// ForkJoin left = new ForkJoin(corpus, start, mid, stats); -// ForkJoin right = new ForkJoin(corpus, mid, end, stats); -// -// // fork (push to queue)-> compute -> join -// left.fork(); -// right.fork(); -// left.join(); -// right.join(); -// } -// } -//} diff --git a/src/main/java/alg/inflectedJOS/InflectedJOSCount.java b/src/main/java/alg/inflectedJOS/InflectedJOSCount.java deleted file mode 100755 index 3f8c480..0000000 --- a/src/main/java/alg/inflectedJOS/InflectedJOSCount.java +++ /dev/null @@ -1,170 +0,0 @@ -//package alg.inflectedJOS; -// -//import java.util.ArrayList; -//import java.util.HashMap; -//import java.util.List; -// -//import org.apache.commons.lang3.StringUtils; -// -//import alg.Common; -//import data.Sentence; -//import data.Statistics; -//import data.StatisticsNew; -//import data.Word; -// -//public class InflectedJOSCount { -// -// public static HashMap>> indices; -// -// // static { -// // // calculate all possible combinations of indices we will substitute with a '-' for substring statistics -// // indices = new HashMap<>(); -// // for (int i = 5; i <= 8; i++) { -// // indices.put(i, calculateCombinations(i)); -// // } -// // } -// // -// // private static List calculateCombinations(int i) { -// // int arr[] = {1, 2, 3, 4, 5}; -// // int r = 3; -// // int n = arr.length; -// // ArrayList> result = new ArrayList<>(); -// // -// // return printCombination(arr, n, r); -// // } -// // -// // /* arr[] ---> Input Array -// // data[] ---> Temporary array to store current combination -// // start & end ---> Staring and Ending indexes in arr[] -// // index ---> Current index in data[] -// // r ---> Size of a combination to be printed */ -// // static void combinationUtil(int arr[], int data[], int start, -// // int end, int index, int r, ArrayList> result) { -// // // Current combination is ready to be printed, print it -// // ArrayList tmpResult = new ArrayList<>(); -// // -// // if (index == r) { -// // ArrayList tmpResult = new ArrayList<>(); -// // for (int j = 0; j < r; j++) -// // System.out.print(data[j] + " "); -// // System.out.println(""); -// // return; -// // } -// // -// // // replace index with all possible elements. The condition -// // // "end-i+1 >= r-index" makes sure that including one element -// // // at index will make a combination with remaining elements -// // // at remaining positions -// // for (int i = start; i <= end && end - i + 1 >= r - index; i++) { -// // data[index] = arr[i]; -// // combinationUtil(arr, data, i + 1, end, index + 1, r); -// // } -// // } -// // -// // // The main function that prints all combinations of size r -// // // in arr[] of size n. This function mainly uses combinationUtil() -// // static void printCombination(int arr[], int n, int r) { -// // // A temporary array to store all combination one by one -// // int data[] = new int[r]; -// // -// // // Print all combination using temprary array 'data[]' -// // combinationUtil(arr, data, 0, n - 1, 0, r); -// // } -// -// // public static void calculateForAll(List corpus, Statistics stats, String taxonomy) { -// // for (Sentence s : corpus) { -// // // disregard if wrong taxonomy -// // if (!(s.getObservableListTaxonomy().startsWith(taxonomy))) { -// // continue; -// // } -// // -// // calculateCommon(s, stats.result); -// // -// // for (Word word : s.getWords()) { -// // // skip if current word is not inflected -// // if (!(word.getMsd().length() > 0)) { -// // continue; -// // } -// // -// // String msd = word.getMsd(); -// // -// // StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1))); -// // -// // for (int i = 1; i < msd.length(); i++) { -// // entry.setCharAt(i, msd.charAt(i)); -// // Common.updateMap(stats.result, entry.toString()); -// // entry.setCharAt(i, '-'); -// // } -// // } -// // } -// // } -// -// // public static void calculateForAll(List corpus, Statistics stats) { -// // for (Sentence s : corpus) { -// // for (Word word : s.getWords()) { -// // if (!(word.getMsd().length() > 0)) { -// // continue; -// // } -// // -// // String msd = word.getMsd(); -// // -// // StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1))); -// // -// // for (int i = 1; i < msd.length(); i++) { -// // entry.setCharAt(i, msd.charAt(i)); -// // Common.updateMap(stats.result, entry.toString()); -// // entry.setCharAt(i, '-'); -// // } -// // } -// // } -// // } -// -// static void calculateForAll(List corpus, Statistics stats, String taxonomy) { -// for (Sentence s : corpus) { -// // disregard if wrong taxonomy -//// if (taxonomy != null && !(s.getObservableListTaxonomy().startsWith(taxonomy))) { -//// continue; -//// } -// -// for (Word word : s.getWords()) { -// // skip if current word is not inflected -// if (!(word.getMsd().length() > 0)) { -// continue; -// } -// -// String msd = word.getMsd(); -// -// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1))); -// -// for (int i = 1; i < msd.length(); i++) { -// entry.setCharAt(i, msd.charAt(i)); -// Common.updateMap(stats.result, entry.toString()); -// entry.setCharAt(i, '-'); -// } -// } -// } -// } -// -// public static void calculateForAll(List corpus, StatisticsNew stats, String taxonomy) { -// for (Sentence s : corpus) { -// -// for (Word word : s.getWords()) { -// // skip if current word is not inflected -// // // TODO: if has defined msd and is of correct type (create a set) -// // if (!(word.getMsd().length() > 0)) { -// // continue; -// // } -// -// String msd = word.getMsd(); -// -// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1))); -// -// for (int i = 1; i < msd.length(); i++) { -// entry.setCharAt(i, msd.charAt(i)); -// stats.updateResults(entry.toString()); -// entry.setCharAt(i, '-'); -// } -// } -// } -// } -//} diff --git a/src/main/java/alg/inflectedJOS/WordFormation.java b/src/main/java/alg/inflectedJOS/WordFormation.java deleted file mode 100755 index 86fe16f..0000000 --- a/src/main/java/alg/inflectedJOS/WordFormation.java +++ /dev/null @@ -1,132 +0,0 @@ -package alg.inflectedJOS; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; - -import data.Enums.InflectedJosTypes; -import data.StatisticsNew; -import data.Taxonomy; -import gui.ValidationUtil; -import util.Combinations; - -// adapted from http://www.geeksforgeeks.org/print-all-possible-combinations-of-r-elements-in-a-given-array-of-size-n/ -public class WordFormation { - private static HashMap josTypeResult; - private static Object[][] tmpResults; - - private static HashMap>> indices; - - static { - indices = new HashMap<>(); - - for (int i = 4; i <= 8; i++) { - indices.put(i, Combinations.generateIndices(i)); - } - } - - public static void calculateStatistics(StatisticsNew stat) { - Map result = stat.getResult(); - - // 1. filter - keep only inflected types - result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.toString().charAt(0))); - - // 2. for each inflected type get all possible subcombinations - for (Character josChar : InflectedJosTypes.inflectedJosTypes) { - josTypeResult = new HashMap<>(); - - // filter out results for a single word type - Map singleTypeResults = result.entrySet().stream() - .filter(x -> x.getKey().charAt(0) == josChar) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - - if (ValidationUtil.isEmpty(singleTypeResults)) { - continue; - } - - // get all possible indices combos for a msd of this length - // HashSet> indicesCombos = indices.get() - //Combinations.generateIndices(singleTypeResults.keySet().stream().findFirst().get().length()); - - for (Map.Entry e : singleTypeResults.entrySet()) { - int l = e.getKey().length(); - - for (HashSet indicesCombo : indices.get(e.getKey().length())) { - updateResults(mask(e.getKey(), indicesCombo), e.getValue().longValue()); - } - } - - resultsMapToArray(singleTypeResults.values().stream().mapToLong(Number::longValue).sum()); - } - - stat.setResultCustom(tmpResults); - } - - private static String mask(String word, HashSet indicesCombo) { - StringBuilder sb = new StringBuilder(); - - sb.append(word.charAt(0)); - for (int i = 1; i < word.length(); i++) { - sb.append(indicesCombo.contains(i) ? word.charAt(i) : "."); - } - - return sb.toString(); - } - - - private static void updateResults(String s, Long nOfOccurences) { - // if not in map add - Long r = josTypeResult.putIfAbsent(s, nOfOccurences); - - // else update - if (r != null) { - josTypeResult.put(s, josTypeResult.get(s) + nOfOccurences); - } - } - - private static void resultsMapToArray(Long totalValue) { - Double total = totalValue * 1.0; - Object[][] josTypeResultArray = new Object[josTypeResult.size()][3]; - - int i = 0; - for (Map.Entry e : josTypeResult.entrySet()) { - josTypeResultArray[i][0] = e.getKey(); - josTypeResultArray[i][1] = e.getValue(); - josTypeResultArray[i][2] = e.getValue() / total; - - if (e.getValue() > total) { - - String debug = ""; - - } - - i++; - } - - if (tmpResults == null) { - tmpResults = josTypeResultArray; - } else { - int firstLength = tmpResults.length; - int secondLength = josTypeResultArray.length; - Object[][] tmp = new Object[firstLength + secondLength][3]; - - System.arraycopy(tmpResults, 0, tmp, 0, firstLength); - System.arraycopy(josTypeResultArray, 0, tmp, firstLength, secondLength); - - tmpResults = tmp; - - // tmpResults = ArrayUtils.addAll(tmpResults, josTypeResultArray); - } - } - - private static void printArray() { - for (int i = 0; i < tmpResults.length; i++) { - for (int j = 0; j < tmpResults[i].length; j++) { - System.out.print(tmpResults[i][j] + "\t"); - } - System.out.println(); - } - } -} diff --git a/src/main/java/alg/ngram/Ngrams.java b/src/main/java/alg/ngram/Ngrams.java index 44658dd..1065721 100755 --- a/src/main/java/alg/ngram/Ngrams.java +++ b/src/main/java/alg/ngram/Ngrams.java @@ -80,36 +80,13 @@ public class Ngrams { } } -// boolean a = (correctPrefix.equals("") && !correctSuffix.equals("")); -// boolean b = (!correctPrefix.equals("") && correctSuffix.equals("")); -// boolean c = (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()); -// boolean d = !((correctPrefix.equals("") && !correctSuffix.equals("")) || -// (!correctPrefix.equals("") && correctSuffix.equals("")) || -// (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length())); - if(!((stats.getFilter().getPrefixList().size() == 0 && !correctSuffix.equals("")) || (!correctPrefix.equals("") && stats.getFilter().getSuffixList().size() == 0) || (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()))){ continue; } - -// if(!((correctPrefix.equals("") && !correctSuffix.equals("")) || -// (!correctPrefix.equals("") && correctSuffix.equals("")) || -// (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()))){ -// continue; -// } - } - // if last letter is ',' erase it - -// if (key.equals("")){ -// String test = key; -// } - -// if (stats.getFilter().getNotePunctuations()) -// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key; - MultipleHMKeys multipleKeys; // create MultipleHMKeys for different amount of other keys @@ -119,28 +96,17 @@ public class Ngrams { break; case 1: String k1_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts()); -// if (stats.getFilter().getNotePunctuations()) -// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2; multipleKeys = new MultipleHMKeys2(key, k1_2); break; case 2: String k2_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts()); String k2_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts()); -// if (stats.getFilter().getNotePunctuations()) { -// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2; -// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3; -// } multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3); break; case 3: String k3_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts()); String k3_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts()); String k3_4 = wordToString(ngramCandidate, otherKeys.get(2), stats.getFilter().getWordParts()); -// if (stats.getFilter().getNotePunctuations()) { -// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2; -// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3; -// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4; -// } multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4); break; case 4: @@ -148,41 +114,13 @@ public class Ngrams { String k4_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts()); String k4_4 = wordToString(ngramCandidate, otherKeys.get(2), stats.getFilter().getWordParts()); String k4_5 = wordToString(ngramCandidate, otherKeys.get(3), stats.getFilter().getWordParts()); -// if (stats.getFilter().getNotePunctuations()) { -// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2; -// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3; -// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4; -// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5; -// } multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5); break; default: multipleKeys = null; } - -// String lemma = ""; -// String wordType = ""; -// String msd = ""; -// for (CalculateFor otherKey : stats.getFilter().getMultipleKeys()){ -// if(otherKey.toString().equals("lema")){ -// lemma = wordToString(ngramCandidate, otherKey); -// } else if(otherKey.toString().equals("besedna vrsta")){ -// wordType = wordToString(ngramCandidate, otherKey).substring(0, 1); -// } else if(otherKey.toString().equals("oblikoskladenjska oznaka")){ -// msd = wordToString(ngramCandidate, otherKey); -// } -// } -// -// MultipleHMKeys multipleKeys = new MultipleHMKeys(key, lemma, wordType, msd); - - - - - - // UPDATE TAXONOMY HERE!!! stats.updateTaxonomyResults(multipleKeys, s.getTaxonomy()); -// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor())); } } } @@ -191,18 +129,12 @@ public class Ngrams { * Checks whether an ngram candidate passes specified regex filter. */ private static boolean passesRegex(List ngramCandidate, ArrayList regex, ArrayList wordParts) { -// if (ngramCandidate.size() != regex.size()) { -// logger.error("ngramCandidate.size() & msd.size() mismatch"); // should not occur anyway -// return false; -// } - int j = 0; for (int i = 0; i < ngramCandidate.size(); i++) { String msd = ngramCandidate.get(i).getMsd(wordParts); if (msd.equals("*")){ continue; } - //if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) { if (!msd.matches(regex.get(j).pattern() + ".*")) { return false; } @@ -247,11 +179,6 @@ public class Ngrams { .stream() .map(w -> Character.toString(w.getMsd(wordParts).length() > 0 ? w.getMsd(wordParts).charAt(0) : '/')) .collect(Collectors.toList())); -// candidate.addAll(ngramCandidate -// .stream() -// .map(w -> Character.toString(w.getMsd().charAt(0))) -// .collect(Collectors.toList())); -// .substring(0, 1) return StringUtils.join(candidate, " "); case NORMALIZED_WORD: candidate.addAll(ngramCandidate @@ -322,32 +249,6 @@ public class Ngrams { } } - /** - * Checks skipped words and if necessary adds punctuations. - * - * @return List of candidates represented as a list - */ - private static Word checkAndModifySkipgramPunctuation(List sentence, int i, int j, StatisticsNew stats){ - // if punctuation checkbox selected and there words at indexes i and j are not next to each other -// if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){ -// boolean middleWordsHavePunctuation = false; -// for (int n = i + 1; n < j; n++){ -// if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){ -// middleWordsHavePunctuation = true; -// break; -// } -// } -// if (middleWordsHavePunctuation){ -// -// String punctuation = ","; -// return new Word(sentence.get(i).getWord() + punctuation, -// sentence.get(i).getLemma() + punctuation, -// sentence.get(i).getMsd() + punctuation); -// } -// } - return sentence.get(i); - - } /** * Extracts skipgram candidates. @@ -363,8 +264,6 @@ public class Ngrams { for (Sentence s : corpus) { List sentence = s.getWords(); -// stats.updateUniGramOccurrences(s.getWords().size()); - if (sentence == null){ continue; } @@ -373,7 +272,6 @@ public class Ngrams { for (int j = i + 1; j <= i + skip + 1; j++) { // 2gram if (ngram == 2 && j < sentence.size()) { currentLoop = new ArrayList<>(); -// currentLoop.add(sentence.get(i)); currentLoop.add(sentence.get(i)); fillSkipgrams(currentLoop, i, j, w); currentLoop.add(sentence.get(j)); @@ -439,25 +337,10 @@ public class Ngrams { private static void validateAndCountSkipgramCandidate(ArrayList skipgramCandidate, StatisticsNew stats, List taxonomy) { // count if no regex is set or if it is & candidate passes it if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())) { -// String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor()); -// key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key; -// stats.updateTaxonomyResults(new MultipleHMKeys1(key), -// stats.getCorpus().getObservableListTaxonomy()); - - ArrayList otherKeys = stats.getFilter().getMultipleKeys(); String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor(), stats.getFilter().getWordParts()); - // if last letter is ',' erase it - -// if (key.equals("")){ -// String test = key; -// } - -// if (stats.getFilter().getNotePunctuations()) -// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key; - MultipleHMKeys multipleKeys; // create MultipleHMKeys for different amount of other keys @@ -467,28 +350,17 @@ public class Ngrams { break; case 1: String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts()); -// if (stats.getFilter().getNotePunctuations()) -// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2; multipleKeys = new MultipleHMKeys2(key, k1_2); break; case 2: String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts()); String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts()); -// if (stats.getFilter().getNotePunctuations()) { -// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2; -// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3; -// } multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3); break; case 3: String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts()); String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts()); String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2), stats.getFilter().getWordParts()); -// if (stats.getFilter().getNotePunctuations()) { -// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2; -// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3; -// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4; -// } multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4); break; case 4: @@ -496,12 +368,6 @@ public class Ngrams { String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts()); String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2), stats.getFilter().getWordParts()); String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3), stats.getFilter().getWordParts()); -// if (stats.getFilter().getNotePunctuations()) { -// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2; -// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3; -// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4; -// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5; -// } multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5); break; default: diff --git a/src/main/java/alg/word/WordCount.java b/src/main/java/alg/word/WordCount.java deleted file mode 100755 index b6f4cbc..0000000 --- a/src/main/java/alg/word/WordCount.java +++ /dev/null @@ -1,167 +0,0 @@ -package alg.word; - -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; - -import alg.Common; -import data.CalculateFor; -import data.Sentence; -import data.Statistics; -import data.Word; - -//class WordCount { -// private static void calculateNoFilter(List corpus, Statistics stats) { -// for (Sentence s : corpus) { -// List sentence = new ArrayList<>(s.getWords().size()); -// -// if (stats.getCf() == CalculateFor.LEMMA) { -// sentence.addAll(s.getWords() -// .stream() -// .map(Word::getLemma) -// .collect(Collectors.toList())); -// } else if (stats.getCf() == CalculateFor.WORD) { -// sentence.addAll(s.getWords() -// .stream() -// .map(Word::getWord) -// .collect(Collectors.toList())); -// } -// -// for (String word : sentence) { -// Common.updateMap(stats.result, word); -// } -// } -// } -// -// private static void calculateVCC(List corpus, Statistics stats) { -// for (Sentence s : corpus) { -// List sentence = new ArrayList<>(s.getWords().size()); -// -// if (stats.getCf() == CalculateFor.LEMMA) { -// sentence.addAll(s.getWords() -// .stream() -// .map(Word::getCVVLemma) -// .collect(Collectors.toList())); -// } else if (stats.getCf() == CalculateFor.WORD) { -// sentence.addAll(s.getWords() -// .stream() -// .map(Word::getCVVWord) -// .collect(Collectors.toList())); -// } -// -// for (String word : sentence) { -// if (word.length() > stats.getSubstringLength()) { -// for (int i = 0; i <= word.length() - stats.getSubstringLength(); i++) { -// String substring = word.substring(i, i + stats.getSubstringLength()); -// Common.updateMap(stats.result, substring); -// } -// } -// } -// } -// } -// -// private static void calculateForJosType(List corpus, Statistics stats) { -// for (Sentence s : corpus) { -// List sentence = new ArrayList<>(s.getWords().size()); -// List filteredWords = new ArrayList<>(); -// -// for (Word word : s.getWords()) { -// if (word.getMsd() != null && word.getMsd().charAt(0) == stats.getDistributionJosWordType()) { -// filteredWords.add(word); -// } -// } -// -// if (stats.getCf() == CalculateFor.LEMMA) { -// sentence.addAll(filteredWords -// .stream() -// .map(Word::getLemma) -// .collect(Collectors.toList())); -// } else if (stats.getCf() == CalculateFor.WORD) { -// sentence.addAll(filteredWords -// .stream() -// .map(Word::getWord) -// .collect(Collectors.toList())); -// } -// -// for (String word : sentence) { -// Common.updateMap(stats.result, word); -// } -// } -// } - -// private static void calculateForTaxonomyAndJosType(List corpus, Statistics stats) { -// for (Sentence s : corpus) { -// if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) { -// List sentence = new ArrayList<>(s.getWords().size()); -// List filteredWords = new ArrayList<>(); -// -// for (Word word : s.getWords()) { -// if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) { -// filteredWords.add(word); -// } -// } -// -// if (stats.getCf() == CalculateFor.LEMMA) { -// sentence.addAll(filteredWords -// .stream() -// .map(Word::getLemma) -// .collect(Collectors.toList())); -// } else if (stats.getCf() == CalculateFor.WORD) { -// sentence.addAll(filteredWords -// .stream() -// .map(Word::getWord) -// .collect(Collectors.toList())); -// } -// -// for (String word : sentence) { -// Common.updateMap(stats.result, word); -// } -// } -// } -// } - -// private static void calculateForTaxonomy(List corpus, Statistics stats) { -// for (Sentence s : corpus) { -// if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) { -// List sentence = new ArrayList<>(s.getWords().size()); -// -// if (stats.getCf() == CalculateFor.LEMMA) { -// sentence.addAll(s.getWords() -// .stream() -// .map(Word::getLemma) -// .collect(Collectors.toList())); -// } else if (stats.getCf() == CalculateFor.WORD) { -// sentence.addAll(s.getWords() -// .stream() -// .map(Word::getWord) -// .collect(Collectors.toList())); -// } -// -// for (String word : sentence) { -// Common.updateMap(stats.result, word); -// } -// } -// } -// } - -// static void calculateForAll(List corpus, Statistics stats) { -// boolean taxonomyIsSet = stats.isTaxonomySet(); -// boolean JosTypeIsSet = stats.isJOSTypeSet(); -// -// // branching because even though the only difference is an if or two && -// // O(if) = 1, the amount of ifs adds up and this saves some time -// if (taxonomyIsSet && JosTypeIsSet) { -// calculateForTaxonomyAndJosType(corpus, stats); -// } else if (taxonomyIsSet && !JosTypeIsSet) { -// calculateForTaxonomy(corpus, stats); -// } else if (!taxonomyIsSet && JosTypeIsSet) { -// calculateForJosType(corpus, stats); -// } else { -// if (stats.isVcc()) { -// calculateVCC(corpus, stats); -// } else { -// calculateNoFilter(corpus, stats); -// } -// } -// } -//} \ No newline at end of file diff --git a/src/main/java/data/CalculateFor.java b/src/main/java/data/CalculateFor.java index baa1327..85557fb 100755 --- a/src/main/java/data/CalculateFor.java +++ b/src/main/java/data/CalculateFor.java @@ -3,24 +3,6 @@ package data; import gui.I18N; public enum CalculateFor { -// calculateFor.WORD=word -// calculateFor.NORMALIZED_WORD=normalized word -// calculateFor.LEMMA=lemma -// calculateFor.MORPHOSYNTACTIC_SPECS=msd -// calculateFor.MORPHOSYNTACTIC_PROPERTY=oblikoskladenjska lastnost -// calculateFor.WORD_TYPE=besedna vrsta -// calculateFor.DIST_WORDS=različnica -// calculateFor.DIST_LEMMAS=lema - -// WORD("različnica"), -// NORMALIZED_WORD("normalizirana različnica"), -// LEMMA("lema"), -// MORPHOSYNTACTIC_SPECS("oblikoskladenjska oznaka"), -// MORPHOSYNTACTIC_PROPERTY("oblikoskladenjska lastnost"), -// WORD_TYPE("besedna vrsta"), -// DIST_WORDS("različnica"), -// DIST_LEMMAS("lema"); - WORD("calculateFor.WORD"), LOWERCASE_WORD("calculateFor.LOWERCASE_WORD"), NORMALIZED_WORD("calculateFor.NORMALIZED_WORD"), @@ -44,7 +26,6 @@ public enum CalculateFor { public static CalculateFor factory(String cf) { if (cf != null) { -// String name = I18N.findI18NString(cf, "calculateFor"); if (WORD.toString().equals(cf)) { return WORD; } @@ -275,27 +256,4 @@ public enum CalculateFor { return null; } } - -// public String toPercentString() { -// switch(this){ -// case WORD: -// return "Delež glede na vse različnice"; -// case NORMALIZED_WORD: -// return "Delež glede na vse normalizirane različnice"; -// case LEMMA: -// return "Delež glede na vse leme"; -// case MORPHOSYNTACTIC_SPECS: -// return "Delež glede na vse oblikoskladenjske oznake"; -// case MORPHOSYNTACTIC_PROPERTY: -// return "Delež glede na vse oblikoskladenjske lastnosti"; -// case WORD_TYPE: -// return "Delež glede na vse besedne vrste"; -// case DIST_WORDS: -// return "Delež glede na vse različnice"; -// case DIST_LEMMAS: -// return "Delež glede na vse leme"; -// default: -// return null; -// } -// } } diff --git a/src/main/java/data/Collocability.java b/src/main/java/data/Collocability.java index 159f15c..f406c7a 100755 --- a/src/main/java/data/Collocability.java +++ b/src/main/java/data/Collocability.java @@ -74,13 +74,4 @@ public enum Collocability { return null; } } - -// public String toPercentString() { -// switch(this){ -// case DICE: -// return "Delež glede na vse različnice"; -// default: -// return null; -// } -// } } diff --git a/src/main/java/data/Corpus.java b/src/main/java/data/Corpus.java index 6b1e9b4..d63e120 100755 --- a/src/main/java/data/Corpus.java +++ b/src/main/java/data/Corpus.java @@ -17,7 +17,6 @@ import org.apache.logging.log4j.Logger; import data.Enums.solar.SolarFilters; import gui.ValidationUtil; import javafx.collections.ObservableList; -import org.controlsfx.control.CheckComboBox; public class Corpus { public final static Logger logger = LogManager.getLogger(Corpus.class); @@ -33,7 +32,6 @@ public class Corpus { public HashMap> solarSelectedFilters; // if solar selected private HashMap> solarFiltersForXML; // if solar - used while parsing xml private boolean gosOrthMode; - boolean hasMsdData; private ArrayList validationErrors; private String corpusName = ""; private String punctuation = "punctuation.COMMA"; @@ -48,7 +46,6 @@ public class Corpus { } public void setCorpusName(String corpusName) { -// System.out.println(corpusName); this.corpusName = corpusName; logger.info("Corpus.set: ", corpusName); } @@ -58,7 +55,6 @@ public class Corpus { } public void setPunctuation(String punctuation) { -// System.out.println(corpusName); this.punctuation = punctuation; logger.info("Punctuation.set: ", punctuation); } @@ -99,10 +95,6 @@ public class Corpus { logger.info("Corpus.set: ", detectedCorpusFiles); } - public boolean isHeaderRead() { - return headerRead; - } - public void setHeaderRead(boolean headerRead) { this.headerRead = headerRead; } @@ -128,11 +120,6 @@ public class Corpus { } return FXCollections.observableArrayList(al); } -// -// public ObservableList getFormattedTaxonomy() { -// ArrayList al = Tax.getTaxonomyFormatted(new ArrayList<>(taxonomy), corpusType); -// return FXCollections.observableArrayList(al); -// } public void setTaxonomy(ObservableList taxonomy) { this.taxonomy = new ArrayList<>(); @@ -155,15 +142,6 @@ public class Corpus { return solarSelectedFilters; } - public void setSolarSelectedFilters(HashMap> solarFilters) { - this.solarSelectedFilters = solarFilters; - logger.info("Corpus.set: ", solarFilters); - } - - public HashMap> getSolarFiltersForXML() { - return solarFiltersForXML; - } - public void setSolarFiltersForXML(HashMap> solarFiltersForXML) { this.solarFiltersForXML = solarFiltersForXML; logger.info("Corpus.set: ", solarFiltersForXML); @@ -173,23 +151,10 @@ public class Corpus { return gosOrthMode; } - public void setGosOrthMode(boolean gosOrthMode) { - this.gosOrthMode = gosOrthMode; - logger.info("Corpus.set: ", gosOrthMode); - } - - public ArrayList getValidationErrors() { - return validationErrors; - } - public String getValidationErrorsToString() { return StringUtils.join(validationErrors, "\n - "); } - public void setValidationErrors(ArrayList validationErrors) { - this.validationErrors = validationErrors; - } - public boolean validate() { if (corpusType == null) { validationErrors.add(I18N.get("message.LABEL_RESULTS_CORPUS_TYPE_NOT_SET")); diff --git a/src/main/java/data/Enums/InflectedJosTypes.java b/src/main/java/data/Enums/InflectedJosTypes.java deleted file mode 100755 index a5acbcf..0000000 --- a/src/main/java/data/Enums/InflectedJosTypes.java +++ /dev/null @@ -1,12 +0,0 @@ -package data.Enums; - -import java.util.Arrays; -import java.util.HashSet; - -public class InflectedJosTypes { - public static final HashSet inflectedJosTypes = new HashSet<>(); - - static { - inflectedJosTypes.addAll(Arrays.asList('S', 'G', 'P')); - } -} diff --git a/src/main/java/data/Enums/Msd.java b/src/main/java/data/Enums/Msd.java deleted file mode 100755 index 950749f..0000000 --- a/src/main/java/data/Enums/Msd.java +++ /dev/null @@ -1,68 +0,0 @@ -package data.Enums; - -import java.util.HashMap; - -public enum Msd { - NOUN("samostalnik", 'S', "Noun", 'N', 5), - VERB("glagol", 'G', "Verb", 'V', 7), - ADJECTIVE("pridevnik", 'P', "Adjective", 'A', 6), - ADVERB("prislov", 'R', "Adverb", 'R', 2), - PRONOUN("zaimek", 'Z', "Pronoun", 'P', 8), - NUMERAL("števnik", 'K', "Numeral", 'M', 6), - PREPOSITION("predlog", 'D', "Preposition", 'S', 1), - CONJUNCTION("veznik", 'V', "Conjunction", 'C', 1), - PARTICLE("členek", 'L', "Particle", 'Q', 0), - INTERJECTION("medmet", 'M', "Interjection", 'I', 0), - ABBREVIATION("okrajšava", 'O', "Abbreviation", 'Y', 0), - RESIDUAL("neuvrščeno", 'N', "Residual", 'X', 1); - - private final String siName; - private final Character siCode; - private final String enName; - private final Character enCode; - private final Integer nOfAttributes; - - private static HashMap siCodeNOfAttributes; - - static { - siCodeNOfAttributes = new HashMap<>(); - for (Msd msd : Msd.values()) { - siCodeNOfAttributes.put(msd.getSiCode(), msd.nOfAttributes); - } - } - - Msd(String siName, Character siCode, String enName, Character enCode, int nOfAttributes) { - this.siName = siName; - this.siCode = siCode; - this.enName = enName; - this.enCode = enCode; - this.nOfAttributes = nOfAttributes; - } - - public String getSiName() { - return siName; - } - - public Character getSiCode() { - return siCode; - } - - public String getEnName() { - return enName; - } - - public Character getEnCode() { - return enCode; - } - - /** - * Returns the number of attributes for the given type. - * - * @param msd - * - * @return - */ - public static int getMsdLengthForType(String msd) { - return siCodeNOfAttributes.get(msd.charAt(0)) + 1; - } -} diff --git a/src/main/java/data/Enums/solar/SolarFilters.java b/src/main/java/data/Enums/solar/SolarFilters.java index 15c6cec..bdf431f 100755 --- a/src/main/java/data/Enums/solar/SolarFilters.java +++ b/src/main/java/data/Enums/solar/SolarFilters.java @@ -27,9 +27,6 @@ public class SolarFilters { SOLAR_FILTERS.put(TIP, FXCollections.observableArrayList("esej/spis", "pisni izdelek (učna ura)", "test (daljše besedilo)", "test (odgovori na vprašanja)")); } - public static final ObservableList N_GRAM_COMPUTE_FOR_FULL = FXCollections.observableArrayList("različnica", "lema", "oblikoskladenjska oznaka", "oblikoskladenjska lastnost", "besedna vrsta"); - public static final ObservableList N_GRAM_COMPUTE_FOR_LIMITED = FXCollections.observableArrayList("različnica", "lema"); - /** * Returns filters with all possible values */ diff --git a/src/main/java/data/Filter.java b/src/main/java/data/Filter.java index 90eb0f0..5d49bd4 100755 --- a/src/main/java/data/Filter.java +++ b/src/main/java/data/Filter.java @@ -349,7 +349,6 @@ public class Filter implements Cloneable { } - public Object clone() throws CloneNotSupportedException{ Filter f = null; try { diff --git a/src/main/java/data/GigafidaJosWordType.java b/src/main/java/data/GigafidaJosWordType.java deleted file mode 100755 index 9e32953..0000000 --- a/src/main/java/data/GigafidaJosWordType.java +++ /dev/null @@ -1,71 +0,0 @@ -package data; - -public enum GigafidaJosWordType { - SAMOSTALNIK("samostalnik", 'S'), - GLAGOL("glagol", 'G'), - PRIDEVNIK("pridevnik", 'P'), - PRISLOV("prislov", 'R'), - ZAIMEK("zaimek", 'Z'), - STEVNIK("stevnik", 'K'), - PREDLOG("predlog", 'D'), - VEZNIK("veznik", 'V'), - CLENEK("clenek", 'L'), - MEDMET("medmet", 'M'), - OKRAJSAVA("okrajsava", 'O'); - - - private final String name; - private final char wordType; - - GigafidaJosWordType(String name, char wordType) { - this.name = name; - this.wordType = wordType; - } - - public String toString() { - return this.name; - } - - public char getWordType() { - return wordType; - } - - public static GigafidaJosWordType factory(String wType) { - if (wType != null) { - if (SAMOSTALNIK.toString().equals(wType)) { - return SAMOSTALNIK; - } - if (GLAGOL.toString().equals(wType)) { - return GLAGOL; - } - if (PRIDEVNIK.toString().equals(wType)) { - return PRIDEVNIK; - } - if (PRISLOV.toString().equals(wType)) { - return PRISLOV; - } - if (ZAIMEK.toString().equals(wType)) { - return ZAIMEK; - } - if (STEVNIK.toString().equals(wType)) { - return STEVNIK; - } - if (PREDLOG.toString().equals(wType)) { - return PREDLOG; - } - if (VEZNIK.toString().equals(wType)) { - return VEZNIK; - } - if (CLENEK.toString().equals(wType)) { - return CLENEK; - } - if (MEDMET.toString().equals(wType)) { - return MEDMET; - } - if (OKRAJSAVA.toString().equals(wType)) { - return OKRAJSAVA; - } - } - return null; - } -} diff --git a/src/main/java/data/GigafidaTaxonomy.java b/src/main/java/data/GigafidaTaxonomy.java deleted file mode 100755 index cafde32..0000000 --- a/src/main/java/data/GigafidaTaxonomy.java +++ /dev/null @@ -1,76 +0,0 @@ -package data; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.stream.Collectors; - -import javafx.collections.FXCollections; -import javafx.collections.ObservableList; - -public enum GigafidaTaxonomy { - TISK("tisk", "T"), - KNJIZNO("knjižno", "T.K"), - LEPOSLOVNO("leposlovno", "T.K.L"), - STROKOVNO("strokovno", "T.K.S"), - PERIODICNO("periodično", "T.P"), - CASOPIS("časopis", "T.P.C"), - REVIJA("revija", "T.P.R"), - INTERNET("internet", "I"); - - private final String name; - private final String taxonomy; - - private static final ObservableList FOR_COMBO_BOX; - - static { - ArrayList values = Arrays.stream(GigafidaTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new)); - FOR_COMBO_BOX = FXCollections.observableArrayList(values); - } - - GigafidaTaxonomy(String name, String taxonomy) { - this.name = name; - this.taxonomy = taxonomy; - } - - public String toString() { - return this.name; - } - - public String getTaxonomnyString() { - return this.taxonomy; - } - - public static GigafidaTaxonomy factory(String tax) { - if (tax != null) { - if (TISK.toString().equals(tax)) { - return TISK; - } - if (KNJIZNO.toString().equals(tax)) { - return KNJIZNO; - } - if (LEPOSLOVNO.toString().equals(tax)) { - return LEPOSLOVNO; - } - if (STROKOVNO.toString().equals(tax)) { - return STROKOVNO; - } - if (PERIODICNO.toString().equals(tax)) { - return PERIODICNO; - } - if (CASOPIS.toString().equals(tax)) { - return CASOPIS; - } - if (REVIJA.toString().equals(tax)) { - return REVIJA; - } - if (INTERNET.toString().equals(tax)) { - return INTERNET; - } - } - return null; - } - - public static ObservableList getForComboBox() { - return FOR_COMBO_BOX; - } -} diff --git a/src/main/java/data/GosTaxonomy.java b/src/main/java/data/GosTaxonomy.java deleted file mode 100755 index 454abad..0000000 --- a/src/main/java/data/GosTaxonomy.java +++ /dev/null @@ -1,85 +0,0 @@ -package data; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.stream.Collectors; - -import javafx.collections.FXCollections; -import javafx.collections.ObservableList; - -public enum GosTaxonomy { - JAVNI("javni", "gos.T.J"), - INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "gos.T.J.I"), - RAZVEDRILNI("razvedrilni", "gos.T.J.R"), - NEJAVNI("nejavni", "gos.T.N"), - NEZASEBNI("nezasebni", "gos.T.N.N"), - ZASEBNI("zasebni", "gos.T.N.Z"), - OSEBNI_STIK("osebni stik", "gos.K.O"), - TELEFON("telefon", "gos.K.P"), - RADIO("radio", "gos.K.R"), - TELEVIZIJA("televizija", "gos.K.T"); - - - private final String name; - private final String taxonomy; - - private static final ObservableList FOR_COMBO_BOX; - - static { - ArrayList values = Arrays.stream(GosTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new)); - FOR_COMBO_BOX = FXCollections.observableArrayList(values); - } - - GosTaxonomy(String name, String taxonomy) { - this.name = name; - this.taxonomy = taxonomy; - } - - public String toString() { - return this.name; - } - - public String getTaxonomnyString() { - return this.taxonomy; - } - - public static GosTaxonomy factory(String tax) { - if (tax != null) { - if (JAVNI.toString().equals(tax)) { - return JAVNI; - } - if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) { - return INFORMATIVNO_IZOBRAZEVALNI; - } - if (RAZVEDRILNI.toString().equals(tax)) { - return RAZVEDRILNI; - } - if (NEJAVNI.toString().equals(tax)) { - return NEJAVNI; - } - if (NEZASEBNI.toString().equals(tax)) { - return NEZASEBNI; - } - if (ZASEBNI.toString().equals(tax)) { - return ZASEBNI; - } - if (OSEBNI_STIK.toString().equals(tax)) { - return OSEBNI_STIK; - } - if (TELEFON.toString().equals(tax)) { - return TELEFON; - } - if (RADIO.toString().equals(tax)) { - return RADIO; - } - if (TELEVIZIJA.toString().equals(tax)) { - return TELEVIZIJA; - } - } - return null; - } - - public static ObservableList getForComboBox() { - return FOR_COMBO_BOX; - } -} diff --git a/src/main/java/data/MultipleHMKeys2.java b/src/main/java/data/MultipleHMKeys2.java index 1ba1d60..fded2b8 100755 --- a/src/main/java/data/MultipleHMKeys2.java +++ b/src/main/java/data/MultipleHMKeys2.java @@ -36,15 +36,12 @@ public final class MultipleHMKeys2 implements MultipleHMKeys { @Override public int hashCode() { return Objects.hash(k1, k2); -// return key.hashCode(); } @Override public boolean equals(Object obj) { return (obj instanceof MultipleHMKeys2) && ((MultipleHMKeys2) obj).k1.equals(k1) && ((MultipleHMKeys2) obj).k2.equals(k2); - -// return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key.equals(key); } public MultipleHMKeys[] splitNgramTo1grams(){ diff --git a/src/main/java/data/Sentence.java b/src/main/java/data/Sentence.java index ebf4416..57f6357 100755 --- a/src/main/java/data/Sentence.java +++ b/src/main/java/data/Sentence.java @@ -18,22 +18,6 @@ public class Sentence { this.taxonomy = taxonomy; } -// public Sentence(List words) { -// this.words = words; -// } - - public Sentence(List words, List taxonomy, Map properties) { - this.words = words; - this.taxonomy = taxonomy; - this.properties = properties; - } - - public Sentence(List words, List taxonomy, String type) { - this.words = words; - this.taxonomy = taxonomy; - this.type = type; - } - public List getWords() { return words; } diff --git a/src/main/java/data/Settings.java b/src/main/java/data/Settings.java index b84e5d0..0af641a 100755 --- a/src/main/java/data/Settings.java +++ b/src/main/java/data/Settings.java @@ -8,9 +8,6 @@ public class Settings { public static final int CORPUS_SENTENCE_LIMIT = 50000; public static final boolean PRINT_LOG = false; - public static final String FX_ACCENT_OK = "-fx-accent: forestgreen;"; - public static final String FX_ACCENT_NOK = "-fx-accent: red;"; - public static Collection corpus; public static File resultsFilePath; } diff --git a/src/main/java/data/Statistics.java b/src/main/java/data/Statistics.java deleted file mode 100755 index 807fadb..0000000 --- a/src/main/java/data/Statistics.java +++ /dev/null @@ -1,299 +0,0 @@ -package data; - -import java.io.UnsupportedEncodingException; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; -import java.util.regex.Pattern; - -import util.Util; -import util.db.RDB; - -public class Statistics { - private CorpusType corpusType; - private AnalysisLevel analysisLevel; - private boolean useDB; - private RDB db; - - private boolean analysisProducedResults; - - private String taxonomy; - private boolean taxonomyIsSet; - - private char JOSType; - private boolean JOSTypeIsSet; - - private String resultTitle; - public Map result = new ConcurrentHashMap<>(); - - // nGrams - private int nGramLevel; - private Integer skip; - private CalculateFor cf; - private List morphosyntacticFilter; - - // distributions - private String distributionTaxonomy; - private char distributionJosWordType; - private boolean vcc; - private Integer substringLength; - - // inflected JOS - private String inflectedJosTaxonomy; - - // GOS - boolean gosOrthMode; - - // šolar - Map solarHeadBlockFilter; - - - // for ngrams - public Statistics(AnalysisLevel al, int nGramLevel, Integer skip, CalculateFor cf) { - String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); - this.cf = cf; - this.analysisLevel = al; - this.nGramLevel = nGramLevel; - this.skip = skip == null || skip == 0 ? null : skip; - - this.resultTitle = String.format("%s%d-gram_%s_%s", - this.skip != null ? String.format("%d-%s-", skip, "skip") : "", - nGramLevel, - cf.toString(), - dateTime); - } - - // for words distributions -// public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) { -// String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); -// -// this.resultTitle = String.format("%s_%s_%s", -// distributionTaxonomy != null ? distributionTaxonomy.toString() : "", -// distributionJosWordType != null ? distributionJosWordType.toString() : "", -// dateTime); -// -// this.analysisLevel = al; -// this.cf = cf; -// this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null; -// this.taxonomyIsSet = distributionTaxonomy != null; -// -// this.JOSTypeIsSet = distributionJosWordType != null; -// this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' '; -// } - - public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) { - String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); - - this.resultTitle = String.format("%s_%d_%s", - "Distribucija zaporedij samoglasnikov in soglasnikov", - substringLength, - dateTime); - - this.analysisLevel = al; - this.cf = cf; - this.substringLength = substringLength; - this.vcc = true; - } - -// public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) { -// String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); -// -// this.resultTitle = String.format("InflectedJOS_%s_%s", -// distributionTaxonomy != null ? distributionTaxonomy : "", -// dateTime); -// -// this.analysisLevel = al; -// this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null; -// this.taxonomyIsSet = inflectedJosTaxonomy != null; -// } - - public Integer getSkip() { - return skip; - } - - public Integer getSubstringLength() { - return substringLength; - } - - public String getInflectedJosTaxonomy() { - return inflectedJosTaxonomy; - } - - public void setSubstringLength(Integer substringLength) { - this.substringLength = substringLength; - } - - public boolean isVcc() { - return vcc; - } - - public void setVcc(boolean vcc) { - this.vcc = vcc; - } - - public String getDistributionTaxonomy() { - return distributionTaxonomy; - } - - public void setDistributionTaxonomy(String distributionTaxonomy) { - this.distributionTaxonomy = distributionTaxonomy; - } - - public char getDistributionJosWordType() { - return distributionJosWordType; - } - - public void setDistributionJosWordType(char distributionJosWordType) { - this.distributionJosWordType = distributionJosWordType; - } - - public void setMorphosyntacticFilter(List morphosyntacticFilter) { - // change filter strings to regex patterns - this.morphosyntacticFilter = new ArrayList<>(); - for (String s : morphosyntacticFilter) { - this.morphosyntacticFilter.add(Pattern.compile(s.replaceAll("\\*", "."))); - } - } - - public List getMsd() { - return morphosyntacticFilter; - } - - public Map getResult() { - return result; - } - - public void setTaxonomy(String taxonomy) { - this.taxonomy = taxonomy; - } - - public void setTaxonomyIsSet(boolean taxonomyIsSet) { - this.taxonomyIsSet = taxonomyIsSet; - } - - public char getJOSType() { - return JOSType; - } - - public void setJOSType(char JOSType) { - this.JOSType = JOSType; - } - - public boolean isJOSTypeSet() { - return JOSTypeIsSet; - } - - public void setJOSType(boolean JOSTypeIsSet) { - this.JOSTypeIsSet = JOSTypeIsSet; - } - - public void saveResultToDisk(int... limit) throws UnsupportedEncodingException { - // Set>> stats = new HashSet<>(); - // - // if (useDB) { - // result = db.getDump(); - // db.delete(); - // } - // - // // if no results and nothing to save, return false - // if (!(result.size() > 0)) { - // analysisProducedResults = false; - // return; - // } else { - // analysisProducedResults = true; - // } - // - // stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit)))); - // Export.SetToCSV(stats); - } - - // private Map getSortedResultInflected(Map map) { - // // first convert to - // Map m = Util.sortByValue(Util.atomicInt2StringAndInt(map), 0); - // - // Map sortedM = new TreeMap<>(); - // - // sortedM.putAll(m); - // - // return sortedM; - // } - - private Map getSortedResult(Map map, int limit) { - return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit); - } - - public String getTaxonomy() { - return taxonomy; - } - - public boolean isTaxonomySet() { - return taxonomyIsSet; - } - - public int getnGramLevel() { - return nGramLevel; - } - - public CalculateFor getCf() { - return cf; - } - - public AnalysisLevel getAnalysisLevel() { - return analysisLevel; - } - - public CorpusType getCorpusType() { - return corpusType; - } - - public void setCorpusType(CorpusType corpusType) { - this.corpusType = corpusType; - } - - public boolean isGosOrthMode() { - return gosOrthMode; - } - - public void setGosOrthMode(boolean gosOrthMode) { - this.gosOrthMode = gosOrthMode; - } - - public Map getSolarHeadBlockFilter() { - return solarHeadBlockFilter; - } - - public void setSolarHeadBlockFilter(Map solarHeadBlockFilter) { - this.solarHeadBlockFilter = solarHeadBlockFilter; - } - - public boolean isUseDB() { - return useDB; - } - - public void setUseDB(boolean useDB) { - if (useDB && db == null) { - db = new RDB(); - } - this.useDB = useDB; - } - - /** - * Stores results from this batch to a database and clears results map - */ - public void storeTmpResultsToDB() { - try { - db.writeBatch(result); - result = new ConcurrentHashMap<>(); - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); - } - } - - public boolean isAnalysisProducedResults() { - return analysisProducedResults; - } -} diff --git a/src/main/java/data/StatisticsNew.java b/src/main/java/data/StatisticsNew.java index b670f8f..9aaa12d 100755 --- a/src/main/java/data/StatisticsNew.java +++ b/src/main/java/data/StatisticsNew.java @@ -18,7 +18,6 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import alg.inflectedJOS.WordFormation; import data.Enums.WordLevelType; import javafx.collections.ObservableList; import util.Export; diff --git a/src/main/java/data/Tax.java b/src/main/java/data/Tax.java index 2687fd1..106570f 100755 --- a/src/main/java/data/Tax.java +++ b/src/main/java/data/Tax.java @@ -166,22 +166,6 @@ public class Tax { } } - - - -// ArrayList taxonomyString = new ArrayList<>(); -// for (Taxonomy t : taxonomyResult.keySet()){ -// taxonomyString.add(t.toString()); -// } -// ObservableList taxonomyObservableString = Tax.getTaxonomyForComboBox(corpus.getCorpusType(), new HashSet<>(taxonomyString)); -// ArrayList sortedTaxonomyString = new ArrayList<>(); -// for (String t : taxonomyObservableString){ -// sortedTaxonomyString.add(t); -// } - - - - foundTaxHS.addAll(genFoundTax); // assures same relative order @@ -198,59 +182,6 @@ public class Tax { return corpusTypesWithTaxonomy; } - public static ArrayList getTaxonomyCodes(ArrayList taxonomyNames, CorpusType corpusType) { - ArrayList result = new ArrayList<>(); - - if (ValidationUtil.isEmpty(taxonomyNames)) { - return result; - } - - LinkedHashMap tax = new LinkedHashMap<>(); - - if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) { - tax = GIGAFIDA_TAXONOMY; - } else if (corpusType == CorpusType.GOS) { - tax = GOS_TAXONOMY; - } - - // for easier lookup - Map taxInversed = tax.entrySet() - .stream() - .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey)); - - for (Taxonomy taxonomyName : taxonomyNames) { - result.add(taxInversed.get(taxonomyName.toString())); - } - - return result; - } - -// public static ArrayList getTaxonomyFormatted(ArrayList taxonomyNames, CorpusType corpusType) { -// ArrayList result = new ArrayList<>(); -// -// if (ValidationUtil.isEmpty(taxonomyNames)) { -// return result; -// } -// -// LinkedHashMap tax = new LinkedHashMap<>(); -// -// if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) { -// tax = GIGAFIDA_TAXONOMY; -// } else if (corpusType == CorpusType.GOS) { -// tax = GOS_TAXONOMY; -// } -// -// // for easier lookup -// Map taxInversed = tax.entrySet() -// .stream() -// .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey)); -// -// for (String taxonomyName : taxonomyNames) { -// result.add(taxInversed.get(taxonomyName) + " - " + taxonomyName); -// } -// -// return result; -// } /** * Returns a list of proper names for codes @@ -283,13 +214,4 @@ public class Tax { return result; } - - public static String getLongTaxonomyName(String shortName){ - if (GIGAFIDA_TAXONOMY.containsKey(shortName)) - return GIGAFIDA_TAXONOMY.get(shortName); - else if(GOS_TAXONOMY.containsKey(shortName)) - return GOS_TAXONOMY.get(shortName); - else - return null; - } } diff --git a/src/main/java/data/Taxonomy.java b/src/main/java/data/Taxonomy.java index b04f762..2755fc5 100755 --- a/src/main/java/data/Taxonomy.java +++ b/src/main/java/data/Taxonomy.java @@ -28,14 +28,6 @@ enum TaxonomyEnum { // Gigafida -// KNJIZNO("knjižno", "T.K", "gigafida"), -// LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"), -// STROKOVNO("strokovno", "T.K.S", "gigafida"), -// PERIODICNO("periodično", "T.P", "gigafida"), -// CASOPIS("časopis", "T.P.C", "gigafida"), -// REVIJA("revija", "T.P.R", "gigafida"), -// INTERNET("internet", "I", "gigafida"), - SSJ_TISK("SSJ.T", "SSJ.T - tisk"), SSJ_KNJIZNO("SSJ.T.K", " SSJ.T.K - tisk-knjižno"), SSJ_LEPOSLOVNO("SSJ.T.K.L", " SSJ.T.K.L - tisk-knjižno-leposlovno"), @@ -148,9 +140,6 @@ enum TaxonomyEnum { } // Gigafida - // if (TISK.toString().equals(tax)) { - // return TISK; - // } if (SSJ_TISK.toString().equals(tax)) { return SSJ_TISK; } @@ -339,9 +328,6 @@ enum TaxonomyEnum { } // Gigafida - // if (TISK.toString().equals(tax)) { - // return TISK; - // } if (SSJ_TISK.toLongNameString().equals(tax)) { return SSJ_TISK; } @@ -483,7 +469,6 @@ enum TaxonomyEnum { public static ArrayList taxonomySelected(TaxonomyEnum disjointTaxonomy) { ArrayList r = new ArrayList<>(); -// System.out.println(disjointTaxonomy); if(disjointTaxonomy.equals(DISKURZ)){ r.add(DISKURZ_JAVNI); r.add(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI); @@ -696,12 +681,8 @@ enum TaxonomyEnum { } public static ArrayList convertStringListToTaxonomyList(ObservableList stringList, Corpus corpus){ -// System.out.println("1."); -// System.out.println(stringList); ArrayList taxonomyList = new ArrayList<>(); -// System.out.println("INTERESTING STUFF"); -// System.out.println(stringList); for (String e : stringList) { for (Taxonomy t : corpus.getTaxonomy()){ if (t.toLongNameString().equals(e)) { @@ -709,18 +690,11 @@ enum TaxonomyEnum { } } } -// System.out.println(taxonomyList); -// System.out.println("-----------------"); return taxonomyList; } public static void modifyingTaxonomy(ArrayList taxonomy, ArrayList checkedItemsTaxonomy, Corpus corpus){ // get taxonomies that were selected/deselected by user -// System.out.println("Print here:"); -// System.out.println(taxonomy); -// System.out.println(checkedItemsTaxonomy); -// System.out.println("-------------"); - Set disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy); if (taxonomy != null) { disjointTaxonomies.addAll(taxonomy); @@ -739,7 +713,6 @@ enum TaxonomyEnum { if(!TaxonomyEnum.convertStringListToTaxonomyList(corpus.getObservableListTaxonomy(), corpus).contains(s)){ disjointTaxonomies.remove(s); disArr.remove(s); -// taxonomy.remove(s); i--; } i++; @@ -790,11 +763,6 @@ public class Taxonomy { } -// public Taxonomy(String name, String longName) { -// this.name = name; -// this.longName = longName; -// } - public String toString() { return this.name; } @@ -813,7 +781,6 @@ public class Taxonomy { return t; } return null; -// return new Taxonomy(tax, false); } public static Taxonomy factoryLongName(String tax, Corpus corpus) { @@ -822,87 +789,6 @@ public class Taxonomy { return t; } return null; -// return new Taxonomy(tax, true); - } - -// public static ArrayList taxonomySelected(Taxonomy disjointTaxonomy) { -// ArrayList rTaxonomyEnum = TaxonomyEnum.taxonomySelected(disjointTaxonomy.getTaxonomyEnum()); -// -// ArrayList r = new ArrayList<>(); -// -// for(TaxonomyEnum t : rTaxonomyEnum){ -// r.add(new Taxonomy(t.toString(), false)); -// } -// -// return r; -// } - - public static ArrayList taxonomyDeselected(Taxonomy disjointTaxonomy){ -// ArrayList r = new ArrayList<>(); -// Map connections = new ConcurrentHashMap<>(); -// connections.put(DISKURZ_JAVNI, DISKURZ); -// connections.put(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI, DISKURZ_JAVNI); -// connections.put(DISKURZ_RAZVEDRILNI, DISKURZ_JAVNI); -// connections.put(DISKURZ_NEJAVNI, DISKURZ); -// connections.put(DISKURZ_NEZASEBNI, DISKURZ_NEJAVNI); -// connections.put(DISKURZ_ZASEBNI, DISKURZ_NEJAVNI); -// connections.put(SITUACIJA_RADIO, SITUACIJA); -// connections.put(SITUACIJA_TELEVIZIJA, SITUACIJA); -// connections.put(KANAL_OSEBNI_STIK, KANAL); -// connections.put(KANAL_TELEFON, KANAL); -// connections.put(KANAL_RADIO, KANAL); -// connections.put(KANAL_TELEVIZIJA, KANAL); -// -// connections.put(SSJ_KNJIZNO, SSJ_TISK); -// connections.put(SSJ_LEPOSLOVNO, SSJ_KNJIZNO); -// connections.put(SSJ_STROKOVNO, SSJ_KNJIZNO); -// connections.put(SSJ_PERIODICNO, SSJ_TISK); -// connections.put(SSJ_CASOPIS, SSJ_PERIODICNO); -// connections.put(SSJ_REVIJA, SSJ_PERIODICNO); -// connections.put(SSJ_DRUGO, SSJ_TISK); -// -// connections.put(FT_P_GOVORNI, FT_P_PRENOSNIK); -// connections.put(FT_P_ELEKTRONSKI, FT_P_PRENOSNIK); -// connections.put(FT_P_PISNI, FT_P_PRENOSNIK); -// connections.put(FT_P_OBJAVLJENO, FT_P_PISNI); -// connections.put(FT_P_KNJIZNO, FT_P_OBJAVLJENO); -// connections.put(FT_P_PERIODICNO, FT_P_OBJAVLJENO); -// connections.put(FT_P_CASOPISNO, FT_P_OBJAVLJENO); -// connections.put(FT_P_DNEVNO, FT_P_CASOPISNO); -// connections.put(FT_P_VECKRAT_TEDENSKO, FT_P_CASOPISNO); -// connections.put(FT_P_CASOPISNO_TEDENSKO, FT_P_CASOPISNO); -// connections.put(FT_P_REVIALNO, FT_P_PERIODICNO); -// connections.put(FT_P_TEDENSKO, FT_P_REVIALNO); -// connections.put(FT_P_STIRINAJSTDNEVNO, FT_P_REVIALNO); -// connections.put(FT_P_MESECNO, FT_P_REVIALNO); -// connections.put(FT_P_REDKEJE_KOT_MESECNO, FT_P_REVIALNO); -// connections.put(FT_P_OBCASNO, FT_P_REVIALNO); -// connections.put(FT_P_NEOBJAVLJENO, FT_P_PISNI); -// connections.put(FT_P_JAVNO, FT_P_NEOBJAVLJENO); -// connections.put(FT_P_INTERNO, FT_P_NEOBJAVLJENO); -// connections.put(FT_P_ZASEBNO, FT_P_NEOBJAVLJENO); -// connections.put(FT_UMETNOSTNA, FT_ZVRST); -// connections.put(FT_PESNISKA, FT_UMETNOSTNA); -// connections.put(FT_PROZNA, FT_UMETNOSTNA); -// connections.put(FT_DRAMSKA, FT_UMETNOSTNA); -// connections.put(FT_NEUMETNOSTNA, FT_ZVRST); -// connections.put(FT_STROKOVNA, FT_NEUMETNOSTNA); -// connections.put(FT_HID, FT_STROKOVNA); -// connections.put(FT_NIT, FT_STROKOVNA); -// connections.put(FT_NESTROKOVNA, FT_NEUMETNOSTNA); -// connections.put(FT_PRAVNA, FT_NEUMETNOSTNA); -// connections.put(FT_DA, FT_LEKTORIRANO); -// connections.put(FT_NE, FT_LEKTORIRANO); -// -// TaxonomyEnum currentTaxonomy = disjointTaxonomy; -// r.add(currentTaxonomy); -// while(connections.containsKey(currentTaxonomy)){ -// currentTaxonomy = connections.get(currentTaxonomy); -// r.add(currentTaxonomy); -// } -// Collections.reverse(r); -// return r; - return null; } public static ArrayList convertStringListToTaxonomyList(ObservableList stringList, Corpus corpus){ @@ -919,7 +805,6 @@ public class Taxonomy { } public static ArrayList taxonomyToTaxonomyEnum(ArrayList taxonomy){ -// System.out.println(taxonomy); if (taxonomy == null) { return null; } @@ -934,11 +819,6 @@ public class Taxonomy { } public static ArrayList taxonomyEnumToTaxonomy(ArrayList taxonomy, Corpus corpus){ -// ArrayList r = new ArrayList<>(); -// for (TaxonomyEnum t : taxonomy){ -// r.add(new Taxonomy(t)); -// } -// return r; ArrayList r = new ArrayList<>(); for (TaxonomyEnum te : taxonomy){ for (Taxonomy t : corpus.getTaxonomy()){ diff --git a/src/main/java/gui/CharacterAnalysisTab.java b/src/main/java/gui/CharacterAnalysisTab.java index 247a7b2..cef396e 100755 --- a/src/main/java/gui/CharacterAnalysisTab.java +++ b/src/main/java/gui/CharacterAnalysisTab.java @@ -15,7 +15,6 @@ import javafx.collections.ObservableList; import javafx.concurrent.Task; import javafx.fxml.FXML; import javafx.scene.control.*; -//import javafx.scene.image.Image; import javafx.scene.image.ImageView; import javafx.scene.layout.AnchorPane; import javafx.scene.layout.Pane; @@ -29,7 +28,6 @@ import java.io.UnsupportedEncodingException; import java.util.*; import java.util.regex.Pattern; -import static alg.XML_processing.readXML; import static gui.GUIController.showAlert; @SuppressWarnings("Duplicates") @@ -129,20 +127,10 @@ public class CharacterAnalysisTab { private ComboBox taxonomySetOperationCB; private String taxonomySetOperation; -// @FXML -// private ToggleGroup calculateForRB; -// private CalculateFor calculateFor; - @FXML private ComboBox calculateForCB; private CalculateFor calculateFor; - @FXML - private RadioButton lemmaRB; - - @FXML - private RadioButton varietyRB; - @FXML private Pane paneLetters; @@ -171,13 +159,12 @@ public class CharacterAnalysisTab { private Corpus corpus; private HashMap> solarFiltersMap; - private Filter filter; - private boolean useDb; private HostServices hostService; private ListChangeListener taxonomyListener; private ChangeListener msdListener; private ChangeListener minimalOccurrencesListener; private ChangeListener minimalTaxonomyListener; + private boolean useDb; private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"}; private static final ArrayList N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY)); @@ -185,14 +172,8 @@ public class CharacterAnalysisTab { private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"}; private static final ArrayList TAXONOMY_SET_OPERATION = new ArrayList<>(Arrays.asList(TAXONOMY_SET_OPERATION_ARRAY)); -// private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("različnica", "lema"); - private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); - - // TODO: pass observables for taxonomy based on header scan // after header scan - private ObservableList taxonomyCCBValues; - private CorpusType currentCorpusType; public void init() { characterAnalysisTab.getStylesheets().add("style.css"); @@ -203,24 +184,11 @@ public class CharacterAnalysisTab { currentMode = MODE.LETTER; toggleMode(currentMode); -// calculateForRB.selectedToggleProperty().addListener(new ChangeListener() { -// @Override -// public void changed(ObservableValue observable, Toggle oldValue, Toggle newValue) { -// //logger.info("calculateForRB:", newValue.toString()); -// RadioButton chk = (RadioButton)newValue.getToggleGroup().getSelectedToggle(); // Cast object to radio button -// calculateFor = CalculateFor.factory(chk.getText()); -// logger.info("calculateForRB:", chk.getText()); -// //System.out.println("Selected Radio Button - "+chk.getText()); -// } -// }); - calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> { if(newValue == null){ newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS); calculateForCB.getSelectionModel().select(newValue); } -// System.out.println(oldValue); -// System.out.println(newValue); calculateFor = CalculateFor.factory(newValue); logger.info("calculateForCB:", calculateFor.toString()); }); @@ -299,7 +267,6 @@ public class CharacterAnalysisTab { public void onChanged(Change c){ if(changing) { ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); -// ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems); ArrayList checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus); @@ -309,7 +276,6 @@ public class CharacterAnalysisTab { taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy()); - // taxonomyCCB.getCheckModel().clearChecks(); changing = false; taxonomyCCB.getCheckModel().clearChecks(); for (Taxonomy t : checkedItemsTaxonomy) { @@ -468,97 +434,6 @@ public class CharacterAnalysisTab { cancel.setVisible(false); } - /** - * case a: values for combo boxes can change after a corpus change - *
    - *
  • different corpus type - reset all fields so no old values remain
  • - *
  • same corpus type, different subset - keep
  • - *
- *

- * case b: values for combo boxes can change after a header scan - *

    - *
  • at first, fields are populated by corpus type defaults
  • - *
  • after, with gathered data
  • - *
- *

- * ngrams: 1 - * calculateFor: word - * msd: - * taxonomy: - * skip: 0 - * iscvv: false - * string length: 1 - */ -// public void populateFields() { -// // corpus changed if: current one is null (this is first run of the app) -// // or if currentCorpus != gui's corpus -// boolean corpusChanged = currentCorpusType == null -// || currentCorpusType != corpus.getCorpusType(); -// -// // TODO: check for GOS, GIGAFIDA, SOLAR... -// // refresh and: -// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset -//// if (calculateFor == null) { -//// calculateForRB.selectToggle(lemmaRB); -//// calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString()); -//// } -// -// if (!filter.hasMsd()) { -// // if current corpus doesn't have msd data, disable this field -// msd = new ArrayList<>(); -// msdTF.setText(""); -// msdTF.setDisable(true); -// logger.info("no msd data"); -// } else { -// if (ValidationUtil.isEmpty(msd) -// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { -// // msd has not been set previously -// // or msd has been set but the corpus changed -> reset -// msd = new ArrayList<>(); -// msdTF.setText(""); -// msdTF.setDisable(false); -// logger.info("msd reset"); -// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { -// // if msd has been set, but corpus type remained the same, we can keep any set msd value -// msdTF.setText(StringUtils.join(msdStrings, " ")); -// msdTF.setDisable(false); -// logger.info("msd kept"); -// } -// } -// -// // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection) -// -// // keep calculateCvv -// calculatecvvCB.setSelected(calculateCvv); -// -// // keep string length if set -// if (stringLength != null) { -// stringLengthTF.setText(String.valueOf(stringLength)); -// } else { -// stringLengthTF.setText("1"); -// stringLength = 1; -// } -// -// // TODO: trigger on rescan -// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { -// // user changed corpus (by type) or by selection & triggered a rescan of headers -// // see if we read taxonomy from headers, otherwise use default values for given corpus -// ObservableList tax = corpus.getObservableListTaxonomy(); -// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); -// -// currentCorpusType = corpus.getCorpusType(); -// // setTaxonomyIsDirty(false); -// } else { -// -// } -// -// // see if we read taxonomy from headers, otherwise use default values for given corpus -// ObservableList tax = corpus.getObservableListTaxonomy(); -// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); -// taxonomyCCB.getItems().addAll(taxonomyCCBValues); -// -// } - private void addTooltipToImage(ImageView image, StringBinding stringBinding){ Tooltip tooltip = new Tooltip(); tooltip.textProperty().bind(stringBinding); @@ -719,7 +594,6 @@ public class CharacterAnalysisTab { cancel.setVisible(true); } int i = 0; -// DateFormat df = new SimpleDateFormat("hh:mm:ss"); Date startTime = new Date(); Date previousTime = new Date(); int remainingSeconds = -1; @@ -759,23 +633,16 @@ public class CharacterAnalysisTab { xml_processing.isCancelled = isCancelled(); updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100); updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds)); -// updateProgress((iFinal * 100) + (double) observable, corpusFiles.size() * 100); } }; -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds)); - xml_processing.progressProperty().addListener(xml_processing.progressBarListener); - -// xml_processing.progressProperty().addListener((obs, oldProgress, newProgress) -> -// updateProgress((iFinal * 100) + newProgress.doubleValue(), corpusFiles.size() * 100)); } xml_processing.readXML(f.toString(), statistic); if (isCancelled()) { updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); break; } -// readXML(f.toString(), statistic, this, corpusFiles.size(), startTime, previousTime, i); } return null; @@ -799,7 +666,6 @@ public class CharacterAnalysisTab { } ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); @@ -810,7 +676,6 @@ public class CharacterAnalysisTab { logger.error("Error while executing", e); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); @@ -820,7 +685,6 @@ public class CharacterAnalysisTab { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); diff --git a/src/main/java/gui/CorpusTab.java b/src/main/java/gui/CorpusTab.java index 7102018..1707cb8 100755 --- a/src/main/java/gui/CorpusTab.java +++ b/src/main/java/gui/CorpusTab.java @@ -2,10 +2,8 @@ package gui; import static data.CorpusType.*; import static gui.GUIController.*; -import static gui.Messages.*; import static util.Util.*; -import java.awt.*; import java.io.File; import java.io.IOException; import java.lang.reflect.Constructor; @@ -53,16 +51,11 @@ public class CorpusTab { @FXML private Button chooseCorpusLocationB; - private File chosenCorpusLocation; @FXML private CheckBox readHeaderInfoChB; private boolean readHeaderInfo; -// @FXML -// private CheckBox gosUseOrthChB; -// private boolean gosUseOrth; - @FXML private Button chooseResultsLocationB; @@ -213,11 +206,8 @@ public class CorpusTab { selectReaderCB.valueProperty().addListener((observable, oldValue, newValue) -> { if(newValue == null){ -// newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS); selectReaderCB.getSelectionModel().select(newValue); } -// System.out.println(oldValue); -// System.out.println(newValue); selectReader = newValue; selectReader(); if(corpus != null && corpus.getCorpusType() != null) { @@ -236,12 +226,9 @@ public class CorpusTab { // comma / point choice punctuationCB.valueProperty().addListener((observable, oldValue, newValue) -> { if(newValue == null){ -// newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS); newValue = I18N.getTranslatedValue(oldValue, PUNCTUATION); punctuationCB.getSelectionModel().select(newValue); } -// System.out.println(oldValue); -// System.out.println(newValue); punctuation = newValue; if(corpus != null) { corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION)); @@ -252,7 +239,6 @@ public class CorpusTab { // add listeners chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation()); -// chooseCorpusLocationB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_chooseCorpusLocationB"))); helpH.setOnAction(e -> openHelpWebsite()); readHeaderInfoChB.selectedProperty().addListener((observable, oldValue, newValue) -> { @@ -262,18 +248,6 @@ public class CorpusTab { } logger.info("read headers: ", readHeaderInfo); }); -// readHeaderInfoChB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_readHeaderInfoChB"))); - -// gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> { -// gosUseOrth = newValue; -// corpus.setGosOrthMode(gosUseOrth); -//// wordFormationTab.setDisable(gosUseOrth); -// satNew2Controller.toggleMode(null); -// oneWordTabController.toggleMode(null); -// catController.toggleMode(null); -// -// logger.info("gosUseOrth: ", gosUseOrth); -// }); chooseResultsLocationB.setOnAction(e -> chooseResultsLocation(null)); @@ -284,31 +258,12 @@ public class CorpusTab { I18N.setLocale(new Locale.Builder().setLanguage("sl").setRegion("SI").build()); } Messages.reload(); - -// StringBuilder sb = new StringBuilder(); -// sb.append(corpusLocation) -// .append("\n") -// .append(String.format(I18N.get("message.NOTIFICATION_FOUND_X_FILES"), corpusFilesSize)) -// .append("\n") -// .append(String.format(I18N.get("message.NOTIFICATION_CORPUS"), corpusType.toString())); -// -// chooseCorpusLabelContent = sb.toString(); -// chooseCorpusL.textProperty().unbind(); -// chooseCorpusL.setText(chooseCorpusLabelContent); Messages.updateChooseCorpusL(); logger.info("change language"); }); // set labels and toggle visibility -// toggleGosChBVisibility(); - -// chooseCorpusLabelContent = Messages.LABEL_CORPUS_LOCATION_NOT_SET; -// chooseCorpusL.setText(chooseCorpusLabelContent); -// -// chooseResultsLabelContent = Messages.LABEL_RESULTS_LOCATION_NOT_SET; -// chooseResultsL.setText(chooseResultsLabelContent); - togglePiAndSetCorpusWrapper(false); } @@ -391,11 +346,6 @@ public class CorpusTab { corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("vert", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE); Collection corpusFilesRegi = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("regi", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE); - -// if (!checkRegiFile(corpusFilesRegi)){ -// return; -// } - if (corpusFiles.size() == 0){ logger.info("alert: ", I18N.get("message.WARNING_CORPUS_NOT_FOUND")); showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_CORPUS_NOT_FOUND"), null); @@ -405,7 +355,6 @@ public class CorpusTab { corpusLocation = selectedDirectory.getAbsolutePath(); corpusFilesSize = String.valueOf(corpusFiles.size()); Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null); -// corpusType = VERT; corpus.setCorpusType(corpusType); @@ -446,12 +395,10 @@ public class CorpusTab { } } else { -// System.out.println(corpusLocation); corpusLocation = selectedDirectory.getAbsolutePath(); corpusFilesSize = String.valueOf(corpusFiles.size()); Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null); -// String chooseCorpusLabelContentTmp = detectCorpusType(corpusFiles); selectReader(); StringBuilder sb = new StringBuilder(); sb.append(corpusLocation) @@ -491,7 +438,6 @@ public class CorpusTab { } } } -// System.out.println(outputName); corpus.setCorpusName(outputName); corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION)); } @@ -534,7 +480,6 @@ public class CorpusTab { private void setResults() { // if everything is ok // check and enable checkbox if GOS -// toggleGosChBVisibility(); // set default results location String defaultResultsLocationPath = corpus.getChosenCorpusLocation().getAbsolutePath(); @@ -543,28 +488,6 @@ public class CorpusTab { Messages.setChooseCorpusL(chooseCorpusL, chooseCorpusLabelContent); } - private boolean checkRegiFile(Collection corpusFiles) { -// CorpusType corpusType = corpus.getCorpusType(); -// Collection corpusFiles = corpus.getDetectedCorpusFiles(); - - - for (File file : corpusFiles) { - // try to open .regi file - String regiPath = file.getAbsolutePath().substring(0, file.getAbsolutePath().length() - 4) + "regi"; - LineIterator regiIt; - try { - // read regi file - regiIt = FileUtils.lineIterator(new File(regiPath), "UTF-8"); - LineIterator.closeQuietly(regiIt); - } catch (IOException e) { - GUIController.showAlert(Alert.AlertType.ERROR, String.format(I18N.get("message.ERROR_NO_REGI_FILE_FOUND"), regiPath)); - return false; - } - } - return true; - - } - private void readHeaderInfo() { CorpusType corpusType = corpus.getCorpusType(); Collection corpusFiles = corpus.getDetectedCorpusFiles(); @@ -592,8 +515,6 @@ public class CorpusTab { i++; if (corpusIsSplit) { -// System.out.println(i); -// System.out.println(corpusFiles.size()); updateProgress(i, corpusFiles.size()); } } @@ -615,10 +536,7 @@ public class CorpusTab { characterLevelTab.setDisable(true); wordLevelTab.setDisable(true); filterTab.setDisable(true); -// Messages.reload(); Messages.updateChooseCorpusL(); -// chooseCorpusL.textProperty().bind(I18N.createStringBinding("message.LABEL_CORPUS_LOCATION_NOT_SET")); -// chooseResultsL.textProperty().bind(I18N.createStringBinding("message.LABEL_RESULTS_LOCATION_NOT_SET")); logger.info("No taxonomy found in headers."); GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_TAXONOMY_FOUND")); @@ -749,12 +667,6 @@ public class CorpusTab { task.setOnSucceeded(e -> { ObservableList readTaxonomy = Tax.getTaxonomyForComboBox(corpusType, task.getValue()); - - // if (ValidationUtil.isEmpty(readTaxonomy)) { - // // if no taxonomy found alert the user and keep other tabs disabled - // logger.info("No vert filters found in headers."); - // GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_SOLAR_FILTERS_FOUND")); - // } else { // set taxonomy, update label corpus.setTaxonomy(readTaxonomy); corpus.setHeaderRead(true); @@ -790,10 +702,7 @@ public class CorpusTab { characterLevelTab.setDisable(false); catController.setCorpus(corpus); catController.init(); - //wordFormationTab.setDisable(false); wordLevelTab.setDisable(false); - //wfController.setCorpus(corpus); - //wfController.init(); wlController.setCorpus(corpus); wlController.init(); @@ -824,13 +733,6 @@ public class CorpusTab { return directoryChooser.showDialog(stage); } - /** - * Hides GOS related checkbox until needed. - */ -// private void toggleGosChBVisibility() { -// gosUseOrthChB.setVisible(corpus != null && corpus.getCorpusType() != null && corpus.getCorpusType() == CorpusType.GOS); -// } - private void selectReader() { switch (selectReader) { // "vert", "Solar", "GOS", "SSJ500K", "Gigafida", "Gigafida (old)", "Kres (old)" @@ -940,18 +842,10 @@ public class CorpusTab { public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; } - /*public void setWfController(WordFormationTab wfController) { - this.wfController = wfController; - }*/ - public void setWlController(WordLevelTab wlController) { this.wlController = wlController; } - public void setWordFormationTab(Tab wordFormationTab) { - this.wordFormationTab = wordFormationTab; - } - public void setHostServices(HostServices hostServices){ this.hostService = hostServices; } diff --git a/src/main/java/gui/FiltersForSolar.java b/src/main/java/gui/FiltersForSolar.java index 20bda57..217d108 100755 --- a/src/main/java/gui/FiltersForSolar.java +++ b/src/main/java/gui/FiltersForSolar.java @@ -52,21 +52,17 @@ public class FiltersForSolar { @FXML public Label solarFilters; @FXML - public Label selectedFiltersL; - @FXML public TextArea selectedFiltersTextArea; @FXML private Button changeLanguageB; @FXML private Hyperlink helpH; -// private HashMap> selectedFilters; private Corpus corpus; private StringAnalysisTabNew2 satNew2Controller; private OneWordAnalysisTab oneWordTabController; private CharacterAnalysisTab catController; - //private WordFormationTab wfController; private WordLevelTab wlController; private HostServices hostService; @@ -172,7 +168,6 @@ public class FiltersForSolar { ArrayList values = new ArrayList<>(entry.getValue()); if (!values.isEmpty()) { -// allFilters.append(entry.getKey()) allFilters.append(I18N.get(entry.getKey() + "L")) .append(": "); @@ -202,7 +197,6 @@ public class FiltersForSolar { satNew2Controller.setSolarFiltersMap(solarFiltersMap); oneWordTabController.setSolarFiltersMap(solarFiltersMap); catController.setSolarFiltersMap(solarFiltersMap); - //wfController.setSolarFiltersMap(solarFiltersMap); wlController.setSolarFiltersMap(solarFiltersMap); } @@ -215,7 +209,6 @@ public class FiltersForSolar { satNew2Controller.setSelectedFiltersLabel(content); oneWordTabController.setSelectedFiltersLabel(content); catController.setSelectedFiltersLabel(content); - //wfController.setSelectedFiltersLabel(content); wlController.setSelectedFiltersLabel(content); } @@ -230,10 +223,6 @@ public class FiltersForSolar { public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; } - /*public void setWfController(WordFormationTab wfController) { - this.wfController = wfController; - }*/ - public void setWlController(WordLevelTab wlController) { this.wlController = wlController; } diff --git a/src/main/java/gui/GUIController.java b/src/main/java/gui/GUIController.java index a60b695..7522004 100755 --- a/src/main/java/gui/GUIController.java +++ b/src/main/java/gui/GUIController.java @@ -49,29 +49,11 @@ public class GUIController extends Application { @FXML private CorpusTab ctController; @FXML - private Parent ct; - //@FXML - //private WordFormationTab wfController; - @FXML - private Parent wf; - @FXML private WordLevelTab wlController; @FXML - private Parent wl; - @FXML private FiltersForSolar ffsController; @FXML - private Parent ffs; - @FXML - private SelectedFiltersPane sfpController; - @FXML - private Parent sfp; - @FXML - public Tab stringLevelTab; - @FXML public Tab wordLevelTab; - /*@FXML - public Tab wordFormationTab;*/ @FXML @@ -83,28 +65,9 @@ public class GUIController extends Application { @Override public void start(Stage primaryStage) throws IOException { -// File fileDir = new File("message_sl_unicode.properties"); -// -// BufferedReader in = new BufferedReader( -// new InputStreamReader( -// new FileInputStream(fileDir), "UTF8")); -// -// String str; -// -// while ((str = in.readLine()) != null) { -// System.out.println(str); -// } -// -// in.close(); - Parent root = FXMLLoader.load(getClass().getResource("/GUI.fxml")); -// Parent root = FXMLLoader.load(ResourceLookup.resources.url("GUI.fxml")); -// primaryStage.setTitle("Luščilnik"); -// StringBinding a = I18N.createStringBinding("window.title"); primaryStage.titleProperty().bind(I18N.createStringBinding("window.title")); Scene scene = new Scene(root, 800, 600); - // https://github.com/dicolar/jbootx - // scene.getStylesheets().add(GUIController.class.getResource("bootstrap3.css").toExternalForm()) primaryStage.setScene(scene); stage = primaryStage; primaryStage.show(); @@ -130,13 +93,10 @@ public class GUIController extends Application { ctController.setSatNew2Controller(satNew2Controller); ctController.setOneWordTabController(oneWordTabController); ctController.setCatController(catController); - //ctController.setWfController(wfController); ctController.setWlController(wlController); ctController.setTabPane(tabPane); ctController.setFfsController(ffsController); - //ctController.setWordFormationTab(wordFormationTab); ctController.setWordLevelTab(wordLevelTab); - //System.out.println(com.sun.javafx.runtime.VersionInfo.getRuntimeVersion()); ctController.setHostServices(getHostServices()); @@ -146,14 +106,11 @@ public class GUIController extends Application { oneWordTabController.setHostServices(getHostServices()); catController.setCorpus(corpus); catController.setHostServices(getHostServices()); - //wfController.setCorpus(corpus); - //wfController.setHostServices(getHostServices()); wlController.setCorpus(corpus); wlController.setHostServices(getHostServices()); ffsController.setSatNew2Controller(satNew2Controller); ffsController.setOneWordTabController(oneWordTabController); ffsController.setCatController(catController); - //ffsController.setWfController(wfController); ffsController.setWlController(wlController); ffsController.setHostServices(getHostServices()); diff --git a/src/main/java/gui/I18N.java b/src/main/java/gui/I18N.java index 9a6494f..74fb5e6 100644 --- a/src/main/java/gui/I18N.java +++ b/src/main/java/gui/I18N.java @@ -1,17 +1,12 @@ package gui; -import com.sun.javafx.collections.ObservableListWrapper; import javafx.beans.binding.Bindings; import javafx.beans.binding.ObjectBinding; import javafx.beans.binding.StringBinding; import javafx.beans.property.ObjectProperty; import javafx.beans.property.SimpleObjectProperty; -import javafx.beans.value.ObservableValue; import javafx.collections.FXCollections; import javafx.collections.ObservableList; -import javafx.scene.control.Button; -import javafx.scene.control.Label; -import javafx.scene.control.Tooltip; import java.io.UnsupportedEncodingException; import java.text.MessageFormat; @@ -111,22 +106,6 @@ public final class I18N { return Bindings.createStringBinding(() -> get(key, args), locale); } -// public static ObservableValue> createListStringBinding(final String key, Object... args) { -// ObservableList r = (ObservableList) new ArrayList(); -// r.add(Bindings.createStringBinding(() -> get(key, args), locale)); -// return r; -// } - - /** - * creates a Object Binding to a localized Object that is computed by calling the given func - * - * @param func - * function called on every change - * @return StringBinding - */ - public static StringBinding createStringBinding(Callable func) { - return Bindings.createStringBinding(func, locale); - } /** * creates a String binding to a localized String for the given message bundle key * @@ -138,22 +117,6 @@ public final class I18N { return Bindings.createObjectBinding(() -> getObject(keys, args), locale); } -// public static ObservableValue> createListStringBinding(final String key, Object... args) { -// ObservableList r = (ObservableList) new ArrayList(); -// r.add(Bindings.createStringBinding(() -> get(key, args), locale)); -// return r; -// } - - /** - * creates a String Binding to a localized String that is computed by calling the given func - * - * @param func - * function called on every change - * @return ObjectBinding - */ - public static ObjectBinding createObjectBinding(Callable func) { - return Bindings.createObjectBinding(func, locale); - } public static String getIndependent(final String key, Locale locale, final Object... args) { ResourceBundle bundle = ResourceBundle.getBundle("message", locale); @@ -164,7 +127,6 @@ public final class I18N { e.printStackTrace(); } return val; -// return MessageFormat.format(bundle.getString(key), args); } public static String getRootValue(String oldValue, ArrayList nGramComputeForLetters) { @@ -230,35 +192,4 @@ public final class I18N { return FXCollections.observableArrayList(translatedWords); } - - /** - * DUPLICATE OF toString() - * searches for possible values in translations and returns key of the string - * == .toString() - * - * @param w, prefix - * function called on every change - * @return ObjectBinding - */ - public static String findI18NString(String w, String prefix){ - ResourceBundle bundle = ResourceBundle.getBundle("message", getLocale()); - for (String key : bundle.keySet()){ - if(prefix.length() > key.length() || !key.substring(0, prefix.length()).equals(prefix)){ - continue; - } - String val = bundle.getString(key); - try { - String newVal = new String(val.getBytes("ISO-8859-1"), "UTF-8"); - - if (newVal.equals(w)){ - return key; - } - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); - } - } - - - return null; - } } diff --git a/src/main/java/gui/Messages.java b/src/main/java/gui/Messages.java index edbec88..06b0254 100755 --- a/src/main/java/gui/Messages.java +++ b/src/main/java/gui/Messages.java @@ -10,35 +10,17 @@ import javafx.scene.control.Label; public class Messages { // warnings & errors - public static String WARNING_CORPUS_NOT_FOUND = I18N.get("message.WARNING_CORPUS_NOT_FOUND"); - public static String WARNING_RESULTS_DIR_NOT_VALID = I18N.get("message.WARNING_RESULTS_DIR_NOT_VALID"); - public static String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS = I18N.get("message.WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS"); - public static String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS_INFO = I18N.get("message.WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS_INFO"); - public static String WARNING_WORD_OR_LEMMA = I18N.get("message.WARNING_WORD_OR_LEMMA"); public static String WARNING_ONLY_NUMBERS_ALLOWED = I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"); - public static String WARNING_NUMBER_TOO_BIG = I18N.get("message.WARNING_NUMBER_TOO_BIG"); public static String WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES = I18N.get("message.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES"); public static String WARNING_MISSING_STRING_LENGTH = I18N.get("message.WARNING_MISSING_STRING_LENGTH"); - public static String WARNING_NO_TAXONOMY_FOUND = I18N.get("message.WARNING_NO_TAXONOMY_FOUND"); - public static String WARNING_NO_SOLAR_FILTERS_FOUND = I18N.get("message.WARNING_NO_SOLAR_FILTERS_FOUND"); public static String ERROR_WHILE_EXECUTING = I18N.get("message.ERROR_WHILE_EXECUTING"); public static String ERROR_WHILE_SAVING_RESULTS_TO_CSV = I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"); public static String ERROR_NOT_ENOUGH_MEMORY= I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"); - // missing - public static String MISSING_NGRAM_LEVEL = I18N.get("message.MISSING_NGRAM_LEVEL"); - public static String MISSING_CALCULATE_FOR = I18N.get("message.MISSING_CALCULATE_FOR"); - public static String MISSING_SKIP = I18N.get("message.MISSING_SKIP"); - public static String MISSING_STRING_LENGTH = I18N.get("message.MISSING_STRING_LENGTH"); - public static String MISMATCHED_STRING_LENGTH_AND_MSD_REGEX = I18N.get("message.MISMATCHED_STRING_LENGTH_AND_MSD_REGEX"); - // general notifications - static content/set only once - public static String NOTIFICATION_FOUND_X_FILES = I18N.get("message.NOTIFICATION_FOUND_X_FILES"); public static String NOTIFICATION_ANALYSIS_COMPLETED = I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"); public static String NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS = I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"); - public static String RESULTS_PATH_SET_TO_DEFAULT = I18N.get("message.RESULTS_PATH_SET_TO_DEFAULT"); - public static String NOTIFICATION_ANALYSIS_CANCLED = I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED"); // ongoing notifications - displayed while processing, dynamically changing public static String ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y = I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"); @@ -47,14 +29,7 @@ public class Messages { // Labels public static String LABEL_CORPUS_LOCATION_NOT_SET = I18N.get("message.LABEL_CORPUS_LOCATION_NOT_SET"); public static String LABEL_RESULTS_LOCATION_NOT_SET = I18N.get("message.LABEL_RESULTS_LOCATION_NOT_SET"); - public static String LABEL_RESULTS_CORPUS_TYPE_NOT_SET = I18N.get("message.LABEL_RESULTS_CORPUS_TYPE_NOT_SET"); - - public static String LABEL_SCANNING_CORPUS = I18N.get("message.LABEL_SCANNING_CORPUS"); - public static String LABEL_SCANNING_SINGLE_FILE_CORPUS = I18N.get("message.LABEL_SCANNING_SINGLE_FILE_CORPUS"); - public static String COMPLETED = I18N.get("message.COMPLETED"); -// public static String TOOLTIP_chooseCorpusLocationB = I18N.get("message.TOOLTIP_chooseCorpusLocationB"); -// public static String TOOLTIP_readHeaderInfoChB = I18N.get("message.TOOLTIP_readHeaderInfoChB"); public static String TOOLTIP_readNotePunctuationsChB = I18N.get("message.TOOLTIP_readNotePunctuationsChB"); public static String TOOLTIP_readDisplayTaxonomyChB = I18N.get("message.TOOLTIP_readDisplayTaxonomyChB"); diff --git a/src/main/java/gui/OneWordAnalysisTab.java b/src/main/java/gui/OneWordAnalysisTab.java index 7ceb444..b69af79 100755 --- a/src/main/java/gui/OneWordAnalysisTab.java +++ b/src/main/java/gui/OneWordAnalysisTab.java @@ -1,12 +1,8 @@ package gui; -import alg.XML_processing; import data.*; import javafx.application.HostServices; -import javafx.beans.InvalidationListener; -import javafx.beans.Observable; import javafx.beans.binding.StringBinding; -import javafx.beans.property.ReadOnlyDoubleWrapper; import javafx.beans.value.ChangeListener; import javafx.beans.value.ObservableValue; import javafx.collections.ListChangeListener; @@ -24,12 +20,10 @@ import javafx.scene.image.ImageView; import util.Tasks; import java.io.File; -import java.io.UnsupportedEncodingException; import java.util.*; import java.util.concurrent.atomic.AtomicBoolean; import java.util.regex.Pattern; -import static alg.XML_processing.readXML; import static gui.GUIController.showAlert; @SuppressWarnings("Duplicates") @@ -38,7 +32,6 @@ public class OneWordAnalysisTab { @FXML private AnchorPane oneWordAnalysisTabPane; -// private ArrayList alsoVisualize; @FXML public TextArea selectedFiltersTextArea; @@ -197,43 +190,22 @@ public class OneWordAnalysisTab { private ChangeListener minimalTaxonomyListener; private ChangeListener minimalRelFreListener; -// private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka"); -// private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); -// private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); -// private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica"); -// private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); -// private static final ObservableList alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); -// private static final ObservableList alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); -// private static final ObservableList alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); -// private static final ObservableList alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); -// private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); - - // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica"); -// private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY)); - // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY)); - // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica"); private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY)); - // private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY)); - // private static final ObservableList alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY)); - // private static final ObservableList alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY)); - // private static final ObservableList alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY)); - // private static final ObservableList alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY)); - // private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {}; private static final ArrayList ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY)); @@ -242,9 +214,6 @@ public class OneWordAnalysisTab { // TODO: pass observables for taxonomy based on header scan // after header scan - private ObservableList taxonomyCCBValues; - private CorpusType currentCorpusType; - public void init() { // add CSS style oneWordAnalysisTabPane.getStylesheets().add("style.css"); @@ -339,9 +308,6 @@ public class OneWordAnalysisTab { logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); } }; -// alsoVisualizeCCB.getCheckModel().clearChecks(); -// alsoVisualizeCCB.getItems().removeAll(); -// alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA)); alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener); @@ -462,18 +428,14 @@ public class OneWordAnalysisTab { public void onChanged(Change c) { if (changing) { ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); -// ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems); ArrayList checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus); -// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus); - taxonomy = new ArrayList<>(); taxonomy.addAll(checkedItemsTaxonomy); taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy()); - // taxonomyCCB.getCheckModel().clearChecks(); changing = false; taxonomyCCB.getCheckModel().clearChecks(); for (Taxonomy t : checkedItemsTaxonomy) { @@ -527,7 +489,6 @@ public class OneWordAnalysisTab { writeMsdAtTheEnd = newValue; logger.info("write msd at the end: ", writeMsdAtTheEnd); }); -// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); notePunctuations = false; // set @@ -653,84 +614,6 @@ public class OneWordAnalysisTab { cancel.setVisible(false); } - /** - * case a: values for combo boxes can change after a corpus change - *
    - *
  • different corpus type - reset all fields so no old values remain
  • - *
  • same corpus type, different subset - keep
  • - *
- *

- * case b: values for combo boxes can change after a header scan - *

    - *
  • at first, fields are populated by corpus type defaults
  • - *
  • after, with gathered data
  • - *
- *

- * ngrams: 1 - * calculateFor: word - * msd: - * taxonomy: - * skip: 0 - * iscvv: false - * string length: 1 - */ -// public void populateFields() { -// // corpus changed if: current one is null (this is first run of the app) -// // or if currentCorpus != gui's corpus -// boolean corpusChanged = currentCorpusType == null -// || currentCorpusType != corpus.getCorpusType(); -// -// -// // TODO: check for GOS, GIGAFIDA, SOLAR... -// // refresh and: -// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset -// if (calculateFor == null) { -// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); -// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); -// } -// -// if (!filter.hasMsd()) { -// // if current corpus doesn't have msd data, disable this field -// msd = new ArrayList<>(); -// msdTF.setText(""); -// msdTF.setDisable(true); -// logger.info("no msd data"); -// } else { -// if (ValidationUtil.isEmpty(msd) -// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { -// // msd has not been set previously -// // or msd has been set but the corpus changed -> reset -// msd = new ArrayList<>(); -// msdTF.setText(""); -// msdTF.setDisable(false); -// logger.info("msd reset"); -// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { -// // if msd has been set, but corpus type remained the same, we can keep any set msd value -// msdTF.setText(StringUtils.join(msdStrings, " ")); -// msdTF.setDisable(false); -// logger.info("msd kept"); -// } -// } -// -// // TODO: trigger on rescan -// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { -// // user changed corpus (by type) or by selection & triggered a rescan of headers -// // see if we read taxonomy from headers, otherwise use default values for given corpus -// ObservableList tax = corpus.getObservableListTaxonomy(); -// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); -// -// currentCorpusType = corpus.getCorpusType(); -// // setTaxonomyIsDirty(false); -// } else { -// -// } -// -// // see if we read taxonomy from headers, otherwise use default values for given corpus -// ObservableList tax = corpus.getObservableListTaxonomy(); -// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); -// taxonomyCCB.getItems().addAll(taxonomyCCBValues); -// -// } private void addTooltipToImage(ImageView image, StringBinding stringBinding){ Tooltip tooltip = new Tooltip(); tooltip.textProperty().bind(stringBinding); @@ -819,7 +702,6 @@ public class OneWordAnalysisTab { filter.setSolarFilters(solarFiltersMap); filter.setStringLength(1); filter.setMultipleKeys(alsoVisualize); -// filter.setNotePunctuations(true); filter.setNotePunctuations(notePunctuations); // setMsd must be behind alsoVisualize @@ -878,136 +760,14 @@ public class OneWordAnalysisTab { private void execute(StatisticsNew statistic) { logger.info("Started execution: ", statistic.getFilter()); - Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); - -// final Task task = new Task() { -// @SuppressWarnings("Duplicates") -// @Override -// protected Void call() throws Exception { -// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); -// if(multipleFiles){ -// cancel.setVisible(true); -// } -// int i = 0; -// Date startTime = new Date(); -// Date previousTime = new Date(); -// int remainingSeconds = -1; -// for (File f : corpusFiles) { -// final int iFinal = i; -// XML_processing xml_processing = new XML_processing(); -// xml_processing.isCancelled = false; -// i++; -// if(xml_processing.progressBarListener != null) { -// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); -// } -// if (multipleFiles) { -// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ -// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000); -// previousTime = new Date(); -// } -// this.updateProgress(i, corpusFiles.size()); -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds)); -//// if (isCancelled()) { -//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -//// break; -//// } -// } else { -// -// xml_processing.progressBarListener = new InvalidationListener() { -// int remainingSeconds = -1; -// Date previousTime = new Date(); -// @Override -// public void invalidated(Observable observable) { -// cancel.setVisible(true); -// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ -// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * -// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * -// ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); -// previousTime = new Date(); -// } -// xml_processing.isCancelled = isCancelled(); -// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100); -// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds)); -// } -// }; -// -// xml_processing.progressProperty().addListener(xml_processing.progressBarListener); -// } -// xml_processing.readXML(f.toString(), statistic); -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; -// } -// } -// -// return null; -// } -// }; -// -// ngramProgressBar.progressProperty().bind(task.progressProperty()); -// progressLabel.textProperty().bind(task.messageProperty()); -// -// task.setOnSucceeded(e -> { -// try { -// boolean successullySaved = statistic.saveResultToDisk(); -// if (successullySaved) { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); -// } else { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); -// } -// } catch (UnsupportedEncodingException e1) { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); -// logger.error("Error while saving", e1); -// } -// -// ngramProgressBar.progressProperty().unbind(); -//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// task.setOnFailed(e -> { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); -// logger.error("Error while executing", e); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// task.setOnCancelled(e -> { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// // When cancel button is pressed cancel analysis -// cancel.setOnAction(e -> { -// task.cancel(); -// logger.info("cancel button"); -// }); - -// final Thread thread = new Thread(task, "task"); -// thread.setDaemon(true); -// thread.start(); - Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel); if (statistic.getFilter().getMinimalRelFre() > 1){ final Task mainTask = t.prepareTaskForMinRelFre(statistic); -// final Task mainTask = prepareTaskForMinRelFre(statistic); final Thread thread = new Thread(mainTask, "task"); thread.setDaemon(true); thread.start(); } else { final Task mainTask = t.prepareMainTask(statistic); -// final Task mainTask = prepareMainTask(statistic); final Thread thread = new Thread(mainTask, "task"); thread.setDaemon(true); thread.start(); diff --git a/src/main/java/gui/SelectedFiltersPane.java b/src/main/java/gui/SelectedFiltersPane.java deleted file mode 100755 index 765a8ef..0000000 --- a/src/main/java/gui/SelectedFiltersPane.java +++ /dev/null @@ -1,18 +0,0 @@ -package gui; - -import javafx.scene.control.Label; - -public class SelectedFiltersPane { - - - public Label selectedFiltersLabel; - - public Label getSelectedFiltersLabel() { - return selectedFiltersLabel; - } - - public void setSelectedFiltersLabel(String filters) { - this.selectedFiltersLabel = new Label(filters); - this.selectedFiltersLabel.setText("test?"); - } -} diff --git a/src/main/java/gui/StringAnalysisTabNew2.java b/src/main/java/gui/StringAnalysisTabNew2.java index 4f4cbbe..fafec53 100755 --- a/src/main/java/gui/StringAnalysisTabNew2.java +++ b/src/main/java/gui/StringAnalysisTabNew2.java @@ -1,21 +1,12 @@ package gui; -import static alg.XML_processing.*; import static gui.GUIController.*; -import java.io.File; -import java.io.UnsupportedEncodingException; import java.util.*; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Pattern; -import alg.XML_processing; import javafx.application.HostServices; -import javafx.beans.InvalidationListener; -import javafx.beans.Observable; import javafx.beans.binding.StringBinding; -import javafx.beans.property.ReadOnlyDoubleWrapper; import javafx.beans.value.ChangeListener; import javafx.beans.value.ObservableValue; import javafx.scene.image.ImageView; @@ -147,15 +138,6 @@ public class StringAnalysisTabNew2 { @FXML private CheckComboBox taxonomyCCB; private ArrayList taxonomy; -// -// @FXML -// private CheckBox calculatecvvCB; -// private boolean calculateCvv; - -// @FXML -// private TextField stringLengthTF; -// private Integer stringLength; - @FXML private ComboBox calculateForCB; private CalculateFor calculateFor; @@ -225,8 +207,6 @@ public class StringAnalysisTabNew2 { private Corpus corpus; private HashMap> solarFiltersMap; - private Filter filter; - private boolean useDb; private HostServices hostService; private ListChangeListener taxonomyListener; private ListChangeListener alsoVisualizeListener; @@ -236,44 +216,25 @@ public class StringAnalysisTabNew2 { private ChangeListener minimalOccurrencesListener; private ChangeListener minimalTaxonomyListener; private ChangeListener minimalRelFreListener; + private boolean useDb; -// private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka"); -// private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); -// private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); -// private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka", "normalizirana različnica"); -// private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); -// private static final ObservableList alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); -// private static final ObservableList alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); -// private static final ObservableList alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); -// private static final ObservableList alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); private static final ObservableList COLLOCABILITY_ITEMS = FXCollections.observableArrayList("Dice", "t-score", "MI", "MI3", "logDice", "simple LL"); -// private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); - - // private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY)); - // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY)); - // private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica"); private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY)); - // private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY)); - // private static final ObservableList alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY)); - // private static final ObservableList alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY)); - // private static final ObservableList alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY)); - // private static final ObservableList alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY)); - // private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {}; private static final ArrayList ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY)); @@ -282,9 +243,6 @@ public class StringAnalysisTabNew2 { // TODO: pass observables for taxonomy based on header scan // after header scan - private ObservableList taxonomyCCBValues; - private CorpusType currentCorpusType; - public void init() { // add CSS style stringAnalysisTabPaneNew2.getStylesheets().add("style.css"); @@ -420,13 +378,6 @@ public class StringAnalysisTabNew2 { } else { alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_EMPTY)); } - // alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { - // alsoVisualize = new ArrayList<>(); - // ObservableList checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems(); - // alsoVisualize.addAll(checkedItems); - // logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); - // }); - // alsoVisualizeCCB.getCheckModel().clearChecks(); alsoVisualizeListener = new ListChangeListener() { @Override @@ -437,9 +388,6 @@ public class StringAnalysisTabNew2 { logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ","))); } }; -// alsoVisualizeCCB.getCheckModel().clearChecks(); -// alsoVisualizeCCB.getItems().removeAll(); -// alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS)); alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener); @@ -500,16 +448,6 @@ public class StringAnalysisTabNew2 { collocabilityCCB.setDisable(false); collocabilityCCB.getCheckModel().getCheckedItems().addListener(collocabilityListener); -// collocabilityCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { -// collocability = new ArrayList<>(); -// ObservableList checkedItems = FXCollections.observableArrayList(); -// for (String el : collocabilityCCB.getCheckModel().getCheckedItems()) { -// checkedItems.add(Collocability.factory(el)); -// } -// collocability.addAll(checkedItems); -// logger.info(String.format("Selected collocabilities: %s", StringUtils.join(collocabilityCCB.getCheckModel().getCheckedItems(), ","))); -// }); - // msd if (msdListener != null){ @@ -595,9 +533,6 @@ public class StringAnalysisTabNew2 { public void onChanged(ListChangeListener.Change c){ if(changing) { ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); -// ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems); -// -// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus); ArrayList checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus); taxonomy = new ArrayList<>(); @@ -606,7 +541,6 @@ public class StringAnalysisTabNew2 { taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy()); - // taxonomyCCB.getCheckModel().clearChecks(); changing = false; taxonomyCCB.getCheckModel().clearChecks(); for (Taxonomy t : checkedItemsTaxonomy) { @@ -646,32 +580,6 @@ public class StringAnalysisTabNew2 { skipValue = 0; // cvv -// calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> { -// calculateCvv = newValue; -// logger.info("calculate cvv: " + calculateCvv); -// }); - -// calculatecvvCB.setSelected(false); - - // string length -// stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> { -// if (!newValue) { -// // focus lost -// String value = stringLengthTF.getText(); -// if (!ValidationUtil.isEmpty(value)) { -// if (!ValidationUtil.isNumber(value)) { -// logAlert("stringlengthTf: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); -// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); -// } -// stringLength = Integer.parseInt(value); -// } else { -// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_MISSING_STRING_LENGTH")); -// stringLengthTF.setText("1"); -// logAlert(I18N.get("message.WARNING_MISSING_STRING_LENGTH")); -// } -// } -// }); - minimalOccurrencesTF.setText("1"); minimalOccurrences = 1; @@ -781,108 +689,6 @@ public class StringAnalysisTabNew2 { cancel.setVisible(false); } - /** - * case a: values for combo boxes can change after a corpus change - *
    - *
  • different corpus type - reset all fields so no old values remain
  • - *
  • same corpus type, different subset - keep
  • - *
- *

- * case b: values for combo boxes can change after a header scan - *

    - *
  • at first, fields are populated by corpus type defaults
  • - *
  • after, with gathered data
  • - *
- *

- * ngrams: 1 - * calculateFor: word - * msd: - * taxonomy: - * skip: 0 - * iscvv: false - * string length: 1 - */ -// public void populateFields() { -// // corpus changed if: current one is null (this is first run of the app) -// // or if currentCorpus != gui's corpus -// boolean corpusChanged = currentCorpusType == null -// || currentCorpusType != corpus.getCorpusType(); -// -// // keep ngram value if set -// if (ngramValue == null) { -// ngramValueCB.getSelectionModel().select("1"); -// ngramValue = 1; -// } -// -// // TODO: check for GOS, GIGAFIDA, SOLAR... -// // refresh and: -// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset -// if (calculateFor == null) { -// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); -// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); -// } -// -// if (!filter.hasMsd()) { -// // if current corpus doesn't have msd data, disable this field -// msd = new ArrayList<>(); -// msdTF.setText(""); -// msdTF.setDisable(true); -// logger.info("no msd data"); -// } else { -// if (ValidationUtil.isEmpty(msd) -// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { -// // msd has not been set previously -// // or msd has been set but the corpus changed -> reset -// msd = new ArrayList<>(); -// msdTF.setText(""); -// msdTF.setDisable(false); -// logger.info("msd reset"); -// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { -// // if msd has been set, but corpus type remained the same, we can keep any set msd value -// msdTF.setText(StringUtils.join(msdStrings, " ")); -// msdTF.setDisable(false); -// logger.info("msd kept"); -// } -// } -// -// // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection) -// -// // keep skip value -// if (skipValue == null) { -// skipValueCB.getSelectionModel().select("0"); -// skipValue = 0; -// } -// -// // keep calculateCvv -// calculatecvvCB.setSelected(calculateCvv); -// -// // keep string length if set -// if (stringLength != null) { -// stringLengthTF.setText(String.valueOf(stringLength)); -// } else { -// stringLengthTF.setText("1"); -// stringLength = 1; -// } -// -// // TODO: trigger on rescan -// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { -// // user changed corpus (by type) or by selection & triggered a rescan of headers -// // see if we read taxonomy from headers, otherwise use default values for given corpus -// ObservableList tax = corpus.getObservableListTaxonomy(); -// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); -// -// currentCorpusType = corpus.getCorpusType(); -// // setTaxonomyIsDirty(false); -// } else { -// -// } -// -// // see if we read taxonomy from headers, otherwise use default values for given corpus -// ObservableList tax = corpus.getObservableListTaxonomy(); -// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); -// taxonomyCCB.getItems().addAll(taxonomyCCBValues); -// -// } private void addTooltipToImage(ImageView image, StringBinding stringBinding){ Tooltip tooltip = new Tooltip(); tooltip.textProperty().bind(stringBinding); @@ -898,7 +704,6 @@ public class StringAnalysisTabNew2 { calculateForL.textProperty().bind(I18N.createStringBinding("label.calculateFor")); alsoVisualizeL.textProperty().bind(I18N.createStringBinding("label.alsoVisualize")); displayTaxonomyL.textProperty().bind(I18N.createStringBinding("label.displayTaxonomy")); -// writeMsdAtTheEndL.textProperty().bind(I18N.createStringBinding("label.writeMsdAtTheEnd")); skipValueL.textProperty().bind(I18N.createStringBinding("label.skipValue")); slowSpeedWarning1L.textProperty().bind(I18N.createStringBinding("label.slowSpeedWarning")); slowSpeedWarning2L.textProperty().bind(I18N.createStringBinding("label.slowSpeedWarning")); @@ -948,10 +753,6 @@ public class StringAnalysisTabNew2 { if (mode == MODE.WORD) { paneWords.setVisible(true); -// paneLetters.setVisible(false); -// if (corpus.getCorpusType() == CorpusType.GOS) -// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS); -// else if (corpus.getCorpusType() == CorpusType.GOS) { calculateForCB.itemsProperty().unbind(); calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_GOS)); @@ -980,7 +781,6 @@ public class StringAnalysisTabNew2 { filter.setDisplayTaxonomy(displayTaxonomy); filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setSkipValue(skipValue); -// filter.setIsCvv(calculateCvv); filter.setSolarFilters(solarFiltersMap); filter.setNotePunctuations(notePunctuations); filter.setMultipleKeys(alsoVisualize); @@ -993,16 +793,11 @@ public class StringAnalysisTabNew2 { filter.setCollocability(collocability); filter.setTaxonomySetOperation(taxonomySetOperation); -// if (ngramValue != null && ngramValue == 0) { -// filter.setStringLength(stringLength); -// } - String message = Validation.validateForStringLevel(filter); if (message == null) { // no errors logger.info("Executing: ", filter.toString()); StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb); -// ADD THINGS HERE!!! execute(statistic); } else { logAlert(message); @@ -1043,547 +838,6 @@ public class StringAnalysisTabNew2 { } } -// public void calculate_collocabilities(StatisticsNew statistics, StatisticsNew oneWordStatistics) { -// statistics.updateCalculateCollocabilities(oneWordStatistics); -// -// } - -// private final Task prepareTaskForMinRelFre(StatisticsNew statistic) { -// Filter f = statistic.getFilter(); -// logger.info("Started execution: ", f); -// Task task_collocability = null; -// -// try{ -// Filter f2 = (Filter) f.clone(); -// f2.setIsMinimalRelFreScraper(true); -// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb); -// -// -//// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f, useDb); -// -// Collection corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles(); -// -// final Task task = new Task() { -// @SuppressWarnings("Duplicates") -// @Override -// protected Void call() throws Exception { -// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statisticsMinRelFre.getCorpus().getCorpusType()); -// if(multipleFiles){ -// cancel.setVisible(true); -// } -// Date startTime = new Date(); -// Date previousTime = new Date(); -// int remainingSeconds = -1; -// int corpusSize; -// int i; -// if(statistic.getFilter().getCollocability().size() > 0){ -// i = 0; -// corpusSize = corpusFiles.size() * 3; -// } else { -// i = 0; -// corpusSize = corpusFiles.size() * 2; -// } -// for (File f : corpusFiles) { -// final int iFinal = i; -// XML_processing xml_processing = new XML_processing(); -// xml_processing.isCancelled = false; -// i++; -// if(xml_processing.progressBarListener != null) { -// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); -// } -// if (multipleFiles) { -// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ -// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); -// previousTime = new Date(); -// } -// this.updateProgress(i, corpusSize); -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); -//// if (isCancelled()) { -//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -//// break; -//// } -// } else { -// xml_processing.progressBarListener = new InvalidationListener() { -// int remainingSeconds = -1; -// Date previousTime = new Date(); -// @Override -// public void invalidated(Observable observable) { -// cancel.setVisible(true); -// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ -// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * -// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * -// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); -//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); -//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1); -//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); -//// System.out.println(remainingSeconds); -// previousTime = new Date(); -// } -// xml_processing.isCancelled = isCancelled(); -// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); -// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); -// } -// }; -// -// xml_processing.progressProperty().addListener(xml_processing.progressBarListener); -// } -// xml_processing.readXML(f.toString(), statisticsMinRelFre); -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; -// } -// if(!(multipleFiles)){ -// cancel.setVisible(false); -// } -// } -// -// // add remaining minRelFre results -// if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) { -//// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() + -// long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue(); -// double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; -// -// statisticsMinRelFre.updateMinimalRelFre(statisticsMinRelFre.getTaxonomyResult().get(statisticsMinRelFre.getCorpus().getTotal()).entrySet(), absToRelFactor); -// -// // reset all values -// for(Taxonomy taxonomy : statisticsMinRelFre.getTaxonomyResult().keySet()){ -// statisticsMinRelFre.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>()); -// } -// for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){ -// statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0)); -// } -// -//// System.out.println("asd"); -// } -// -// return null; -// } -// }; -// -// ngramProgressBar.progressProperty().bind(task.progressProperty()); -// progressLabel.textProperty().bind(task.messageProperty()); -// task.setOnSucceeded(e -> { -// statistic.updateMinimalRelFre(statisticsMinRelFre.getMinimalRelFreNgrams(), statisticsMinRelFre.getMinimalRelFre1grams()); -// final Task taskCollocability = prepareMainTask(statistic); -// final Thread thread_collocability = new Thread(taskCollocability, "task_collocability"); -// thread_collocability.setDaemon(true); -// thread_collocability.start(); -// }); -// -// task.setOnFailed(e -> { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); -// logger.error("Error while executing", e); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -// // ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// task.setOnCancelled(e -> { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -// // ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// // When cancel button is pressed cancel analysis -// cancel.setOnAction(e -> { -// task.cancel(); -// logger.info("cancel button"); -// }); -// -// return task; -// }catch(CloneNotSupportedException c){ return null; } -// } -// -// private final Task prepareMainTask(StatisticsNew statistic) { -// Filter f = statistic.getFilter(); -// logger.info("Started execution: ", f); -// Task task_collocability = null; -// -// Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); -// -// final Task task = new Task() { -// @SuppressWarnings("Duplicates") -// @Override -// protected Void call() throws Exception { -// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); -// if(multipleFiles){ -// cancel.setVisible(true); -// } -// -// -//// int i = corpusFiles.size(); -//// Date startTime = new Date(); -//// Date previousTime = new Date(); -//// int remainingSeconds = -1; -//// int corpusSize; -//// if (statistic.getFilter().getCollocability().size() > 0) { -//// corpusSize = corpusFiles.size() * 2; -//// } else { -//// corpusSize = corpusFiles.size(); -//// } -// -// Date startTime = new Date(); -// Date previousTime = new Date(); -// int remainingSeconds = -1; -// int corpusSize; -// int i; -// int taskIndex = 0; -// if(statistic.getFilter().getCollocability().size() > 0 && statistic.getFilter().getMinimalRelFre() > 1){ -// i = corpusFiles.size(); -// corpusSize = corpusFiles.size() * 3; -// } else if (statistic.getFilter().getMinimalRelFre() > 1) { -// i = corpusFiles.size(); -// corpusSize = corpusFiles.size() * 2; -// } else if (statistic.getFilter().getCollocability().size() > 0) { -// i = 0; -// corpusSize = corpusFiles.size() * 2; -// } else { -// i = 0; -// corpusSize = corpusFiles.size(); -// } -// for (File f : corpusFiles) { -// final int iFinal = i; -// XML_processing xml_processing = new XML_processing(); -// xml_processing.isCancelled = false; -// i++; -// taskIndex++; -// if(xml_processing.progressBarListener != null) { -// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); -// } -// if (multipleFiles) { -// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ -// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000); -// previousTime = new Date(); -// } -// this.updateProgress(i, corpusSize); -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); -// -//// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ -//// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); -//// previousTime = new Date(); -//// } -//// this.updateProgress(i, corpusSize); -//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); -// -// } else { -// xml_processing.progressBarListener = new InvalidationListener() { -// int remainingSeconds = -1; -// Date previousTime = new Date(); -// @Override -// public void invalidated(Observable observable) { -// cancel.setVisible(true); -// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ -// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * -// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * -// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); -//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); -//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1); -//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); -//// System.out.println(remainingSeconds); -// previousTime = new Date(); -// } -// xml_processing.isCancelled = isCancelled(); -// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); -// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); -// } -// }; -// -// xml_processing.progressProperty().addListener(xml_processing.progressBarListener); -// } -// xml_processing.readXML(f.toString(), statistic); -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; -// } -// if(!(multipleFiles)){ -// cancel.setVisible(false); -// } -//// readXML(f.toString(), statistic); -//// i++; -//// if (isCancelled()) { -//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -//// break; -//// } -//// if (statistic.getFilter().getCollocability().size() > 0) { -//// this.updateProgress(i, corpusFiles.size() * 2); -//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName())); -//// } else { -//// this.updateProgress(i, corpusFiles.size()); -//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); -//// } -////// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName())); -// } -// // if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing -// if (statistic.getFilter().getMinimalRelFre() > 1){ -//// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() + -// long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue(); -// double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; -// -// -// for(Map.Entry entry : statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet()){ -// if(entry.getValue().longValue() < absToRelFactor){ -// statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).remove(entry.getKey()); -// } -// } -// statistic.updateMinimalRelFre(statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet(), absToRelFactor); -// } -// -// return null; -// } -// }; -// -// ngramProgressBar.progressProperty().bind(task.progressProperty()); -// progressLabel.textProperty().bind(task.messageProperty()); -// task.setOnSucceeded(e -> { -// if (f.getCollocability().size() > 0) { -// try{ -// Filter f2 = (Filter) f.clone(); -// f2.setNgramValue(1); -// StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb); -// final Task taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams); -// final Thread thread_collocability = new Thread(taskCollocability, "task_collocability"); -// thread_collocability.setDaemon(true); -// thread_collocability.start(); -// }catch(CloneNotSupportedException c){} -// -// -// -// } else { -// try { -//// System.out.print(statistics); -// boolean successullySaved = statistic.saveResultToDisk(); -// if (successullySaved) { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); -// } else { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); -// } -// } catch (UnsupportedEncodingException e1) { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); -// logger.error("Error while saving", e1); -// } catch (OutOfMemoryError e1) { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); -// logger.error("Out of memory error", e1); -// } -// ngramProgressBar.progressProperty().unbind(); -//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// } -// -// -// }); -// -// task.setOnFailed(e -> { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); -// logger.error("Error while executing", e); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// task.setOnCancelled(e -> { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// // When cancel button is pressed cancel analysis -// cancel.setOnAction(e -> { -// task.cancel(); -// logger.info("cancel button"); -// }); -// -// return task; -// } -// -// private final Task prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) { -// Collection corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles(); -// -// final Task task = new Task() { -// @SuppressWarnings("Duplicates") -// @Override -// protected Void call() throws Exception { -// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); -// if(multipleFiles){ -// cancel.setVisible(true); -// } -//// int i = corpusFiles.size(); -// Date startTime = new Date(); -// Date previousTime = new Date(); -// int remainingSeconds = -1; -//// int corpusSize; -//// if (statistic.getFilter().getCollocability().size() > 0) { -//// corpusSize = corpusFiles.size() * 2; -//// } else { -//// corpusSize = corpusFiles.size(); -//// } -// -// -// int corpusSize; -// int i; -// int taskIndex = 0; -// if(statistic.getFilter().getMinimalRelFre() > 1){ -// i = corpusFiles.size() * 2; -// corpusSize = corpusFiles.size() * 3; -// } else { -// i = corpusFiles.size(); -// corpusSize = corpusFiles.size() * 2; -// } -// -// -// -// for (File f : corpusFiles) { -// final int iFinal = i; -// XML_processing xml_processing = new XML_processing(); -// i++; -// taskIndex++; -// if(xml_processing.progressBarListener != null) { -// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); -// } -// if (multipleFiles) { -// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ -// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000); -// previousTime = new Date(); -// } -// this.updateProgress(i, corpusSize); -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); -//// if (isCancelled()) { -//// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -//// break; -//// } -// } else { -// xml_processing.progressBarListener = new InvalidationListener() { -// int remainingSeconds = -1; -// Date previousTime = new Date(); -// @Override -// public void invalidated(Observable observable) { -// cancel.setVisible(true); -// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ -// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * -// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * -// ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); -//// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); -//// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1))); -//// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); -//// System.out.println(remainingSeconds); -// previousTime = new Date(); -// } -// xml_processing.isCancelled = isCancelled(); -// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); -// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); -// } -// }; -// -// xml_processing.progressProperty().addListener(xml_processing.progressBarListener); -// } -// xml_processing.isCollocability = true; -// xml_processing.readXML(f.toString(), statisticsOneGrams); -// xml_processing.isCollocability = false; -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; -// } -//// readXML(f.toString(), statisticsOneGrams); -//// i++; -//// this.updateProgress(i, corpusFiles.size() * 2); -//// if (statistic.getFilter().getCollocability().size() > 0) { -//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName())); -//// } else { -//// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); -//// } -// } -// -// return null; -// } -// }; -// -// ngramProgressBar.progressProperty().bind(task.progressProperty()); -// progressLabel.textProperty().bind(task.messageProperty()); -// -// task.setOnSucceeded(e -> { -// try { -// System.out.print(statistic); -//// calculate_collocabilities(statistic, statisticsOneGrams); -// statistic.updateCalculateCollocabilities(statisticsOneGrams); -// boolean successullySaved = statistic.saveResultToDisk(); -// if (successullySaved) { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); -// } else { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); -// } -// } catch (UnsupportedEncodingException e1) { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); -// logger.error("Error while saving", e1); -// } catch (OutOfMemoryError e1) { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); -// logger.error("Out of memory error", e1); -// } -//// try { -//// boolean successullySaved = statistic.saveResultToDisk(); -//// if (successullySaved) { -//// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED); -//// } else { -//// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS); -//// } -//// } catch (UnsupportedEncodingException e1) { -//// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV); -//// logger.error("Error while saving", e1); -//// } catch (OutOfMemoryError e1){ -//// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY); -//// logger.error("Out of memory error", e1); -//// } -//// -// ngramProgressBar.progressProperty().unbind(); -//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// task.setOnFailed(e -> { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); -// logger.error("Error while executing", e); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// task.setOnCancelled(e -> { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// // When cancel button is pressed cancel analysis -// cancel.setOnAction(e -> { -// task.cancel(); -//// logger.info("cancel button"); -// }); -// return task; -// } - private void execute(StatisticsNew statistic) { Filter f = statistic.getFilter(); logger.info("Started execution: ", f); @@ -1591,13 +845,11 @@ public class StringAnalysisTabNew2 { Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel); if (f.getMinimalRelFre() > 1){ final Task mainTask = t.prepareTaskForMinRelFre(statistic); -// final Task mainTask = prepareTaskForMinRelFre(statistic); final Thread thread = new Thread(mainTask, "task"); thread.setDaemon(true); thread.start(); } else { final Task mainTask = t.prepareMainTask(statistic); -// final Task mainTask = prepareMainTask(statistic); final Thread thread = new Thread(mainTask, "task"); thread.setDaemon(true); thread.start(); diff --git a/src/main/java/gui/ValidationUtil.java b/src/main/java/gui/ValidationUtil.java index 44416fa..c64d22d 100755 --- a/src/main/java/gui/ValidationUtil.java +++ b/src/main/java/gui/ValidationUtil.java @@ -9,7 +9,6 @@ import org.apache.commons.lang3.math.NumberUtils; public class ValidationUtil { public static boolean isNumber(String value) { - //return NumberUtils.isCreatable(value); return NumberUtils.isNumber(value); } diff --git a/src/main/java/gui/WordFormationTab.java b/src/main/java/gui/WordFormationTab.java deleted file mode 100755 index bb3e62b..0000000 --- a/src/main/java/gui/WordFormationTab.java +++ /dev/null @@ -1,260 +0,0 @@ -//package gui; -// -//import static alg.XML_processing.*; -//import static gui.GUIController.*; -// -//import java.io.File; -//import java.io.UnsupportedEncodingException; -//import java.util.*; -// -//import javafx.application.HostServices; -//import javafx.scene.control.*; -//import org.apache.commons.lang3.StringUtils; -//import org.apache.logging.log4j.LogManager; -//import org.apache.logging.log4j.Logger; -//import org.controlsfx.control.CheckComboBox; -// -//import data.*; -//import javafx.collections.ListChangeListener; -//import javafx.collections.ObservableList; -//import javafx.concurrent.Task; -//import javafx.fxml.FXML; -//import javafx.scene.layout.AnchorPane; -// -//@SuppressWarnings("Duplicates") -//public class WordFormationTab { -// public final static Logger logger = LogManager.getLogger(WordFormationTab.class); -// -// public AnchorPane wordAnalysisTabPane; -// -// @FXML -// public Label selectedFiltersLabel; -// @FXML -// public Label solarFilters; -// -// @FXML -// private CheckComboBox taxonomyCCB; -// private ArrayList taxonomy; -// -// @FXML -// private TextField minimalOccurrencesTF; -// private Integer minimalOccurrences; -// -// @FXML -// private TextField minimalTaxonomyTF; -// private Integer minimalTaxonomy; -// -// @FXML -// private Button computeB; -// -// @FXML -// public ProgressBar ngramProgressBar; -// @FXML -// public Label progressLabel; -// -// @FXML -// private Hyperlink helpH; -// -// private Corpus corpus; -// private HashMap> solarFiltersMap; -// private HostServices hostService; -// -// // after header scan -// private ObservableList taxonomyCCBValues; -// private CorpusType currentCorpusType; -// private boolean useDb; -// -// -// public void init() { -// // taxonomy -// if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { -// taxonomyCCB.getItems().removeAll(); -// taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy()); -// taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { -// taxonomy = new ArrayList<>(); -// ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); -// ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus); -// taxonomy.addAll(checkedItemsTaxonomy); -// logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); -// }); -// taxonomyCCB.getCheckModel().clearChecks(); -// } else { -// taxonomyCCB.setDisable(true); -// } -// -// // set default values -// minimalOccurrencesTF.setText("1"); -// minimalOccurrences = 1; -// -// minimalTaxonomyTF.setText("1"); -// minimalTaxonomy = 1; -// -// minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> { -// if (!newValue) { -// // focus lost -// String value = minimalOccurrencesTF.getText(); -// if (!ValidationUtil.isEmpty(value)) { -// if (!ValidationUtil.isNumber(value)) { -// logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); -// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); -// } else { -// minimalOccurrences = Integer.parseInt(value); -// } -// } else { -// minimalOccurrencesTF.setText("1"); -// minimalOccurrences = 1; -// } -// } -// }); -// -// minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> { -// if (!newValue) { -// // focus lost -// String value = minimalTaxonomyTF.getText(); -// if (!ValidationUtil.isEmpty(value)) { -// if (!ValidationUtil.isNumber(value)) { -// logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); -// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); -// } else { -// minimalTaxonomy = Integer.parseInt(value); -// } -// } else { -// minimalTaxonomyTF.setText("1"); -// minimalTaxonomy = 1; -// } -// } -// }); -// -// computeB.setOnAction(e -> { -// compute(); -// logger.info("compute button"); -// }); -// -// helpH.setOnAction(e -> openHelpWebsite()); -// } -// -// private void compute() { -// Filter filter = new Filter(); -// filter.setNgramValue(1); -// filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY); -// filter.setTaxonomy(taxonomy); -// filter.setAl(AnalysisLevel.STRING_LEVEL); -// filter.setSkipValue(0); -// filter.setMsd(new ArrayList<>()); -// filter.setIsCvv(false); -// filter.setSolarFilters(solarFiltersMap); -// filter.setMinimalOccurrences(minimalOccurrences); -// filter.setMinimalTaxonomy(minimalTaxonomy); -// -// String message = Validation.validateForStringLevel(filter); -// if (message == null) { -// // no errors -// logger.info("Executing: ", filter.toString()); -// StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb); -// execute(statistic); -// } else { -// logAlert(message); -// showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message); -// } -// } -// -// private void openHelpWebsite(){ -// hostService.showDocument(Messages.HELP_URL); -// } -// -// private void execute(StatisticsNew statistic) { -// logger.info("Started execution: ", statistic.getFilter()); -// -// Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); -// -// final Task task = new Task() { -// @SuppressWarnings("Duplicates") -// @Override -// protected Void call() throws Exception { -// int i = 0; -// Date startTime = new Date(); -// Date previousTime = new Date(); -// for (File f : corpusFiles) { -// readXML(f.toString(), statistic); -// i++; -// this.updateProgress(i, corpusFiles.size()); -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); -// } -// -// return null; -// } -// }; -// -// ngramProgressBar.progressProperty().bind(task.progressProperty()); -// progressLabel.textProperty().bind(task.messageProperty()); -// -// task.setOnSucceeded(e -> { -// try { -// // first, we have to recalculate all occurrences to detailed statistics -// boolean successullySaved = statistic.recalculateAndSaveResultToDisk(); -// -// if (successullySaved) { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); -// } else { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); -// } -// } catch (UnsupportedEncodingException e1) { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); -// logger.error("Error while saving", e1); -// } -// -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// }); -// -// task.setOnFailed(e -> { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); -// logger.error("Error while executing", e); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// }); -// -// final Thread thread = new Thread(task, "task"); -// thread.setDaemon(true); -// thread.start(); -// } -// -// private void logAlert(String alert) { -// logger.info("alert: " + alert); -// } -// -// -// public void setCorpus(Corpus corpus) { -// this.corpus = corpus; -// -// if (corpus.getCorpusType() != CorpusType.SOLAR) { -// setSelectedFiltersLabel(null); -// } else { -// setSelectedFiltersLabel("/"); -// } -// } -// -// public void setSelectedFiltersLabel(String content) { -// if (content != null) { -// solarFilters.setVisible(true); -// selectedFiltersLabel.setVisible(true); -// selectedFiltersLabel.setText(content); -// } else { -// solarFilters.setVisible(false); -// selectedFiltersLabel.setVisible(false); -// } -// } -// -// public void setSolarFiltersMap(HashMap> solarFiltersMap) { -// this.solarFiltersMap = solarFiltersMap; -// } -// -// public void setHostServices(HostServices hostServices){ -// this.hostService = hostServices; -// } -//} diff --git a/src/main/java/gui/WordLevelTab.java b/src/main/java/gui/WordLevelTab.java index 187f644..d4a5df2 100755 --- a/src/main/java/gui/WordLevelTab.java +++ b/src/main/java/gui/WordLevelTab.java @@ -1,12 +1,8 @@ package gui; -import alg.XML_processing; import data.*; import javafx.application.HostServices; -import javafx.beans.InvalidationListener; -import javafx.beans.Observable; import javafx.beans.binding.StringBinding; -import javafx.beans.property.ReadOnlyDoubleWrapper; import javafx.beans.value.ChangeListener; import javafx.beans.value.ObservableValue; import javafx.collections.ListChangeListener; @@ -23,12 +19,10 @@ import org.controlsfx.control.CheckComboBox; import util.Tasks; import java.io.File; -import java.io.UnsupportedEncodingException; import java.util.*; import java.util.concurrent.atomic.AtomicBoolean; import java.util.regex.Pattern; -import static alg.XML_processing.readXML; import static gui.GUIController.showAlert; @SuppressWarnings("Duplicates") @@ -165,10 +159,6 @@ public class WordLevelTab { private TextField suffixListTF; private ArrayList suffixList; -// @FXML -// private CheckBox writeMsdAtTheEndChB; -// private boolean writeMsdAtTheEnd; - @FXML private ComboBox calculateForCB; private CalculateFor calculateFor; @@ -215,7 +205,6 @@ public class WordLevelTab { private Corpus corpus; private HashMap> solarFiltersMap; - private Filter filter; private boolean useDb; private HostServices hostService; private ListChangeListener taxonomyListener; @@ -226,44 +215,31 @@ public class WordLevelTab { private ChangeListener minimalTaxonomyListener; private ChangeListener minimalRelFreListener; -// private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica"); -// private static final ObservableList N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY)); -// private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY)); -// private static final ObservableList N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica"); private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"}; private static final ArrayList N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY)); -// private static final ObservableList alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_LEMMA = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY)); -// private static final ObservableList alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_ARRAY)); -// private static final ObservableList alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica"); private static final String [] ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_WORDS_GOS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_WORDS_GOS_ARRAY)); -// private static final ObservableList alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka"); private static final String [] ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY = {"calculateFor.LEMMA", "calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS_ARRAY)); -// private static final ObservableList alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta"); private static final String [] ALSO_VISUALIZE_ITEMS_MSD_ARRAY = {"calculateFor.WORD_TYPE"}; private static final ArrayList ALSO_VISUALIZE_ITEMS_MSD = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_MSD_ARRAY)); private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"}; private static final ArrayList TAXONOMY_SET_OPERATION = new ArrayList<>(Arrays.asList(TAXONOMY_SET_OPERATION_ARRAY)); - // private static final ObservableList alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); private static final String [] ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY = {}; private static final ArrayList ALSO_VISUALIZE_ITEMS_EMPTY = new ArrayList<>(Arrays.asList(ALSO_VISUALIZE_ITEMS_EMPTY_ARRAY)); // TODO: pass observables for taxonomy based on header scan // after header scan - private ObservableList taxonomyCCBValues; - private CorpusType currentCorpusType; - public void init() { // add CSS style wordLevelAnalysisTabPane.getStylesheets().add("style.css"); @@ -328,21 +304,12 @@ public class WordLevelTab { } else if (newValue.equals(CalculateFor.NORMALIZED_WORD.toString())) { alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_NORMALIZED_WORDS)); } else if (newValue.equals(CalculateFor.MORPHOSYNTACTIC_SPECS.toString())) { -// writeMsdAtTheEndEnableCalculateFor.set(true); -// writeMsdAtTheEndChB.setDisable(false); alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_MSD)); } else { alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_EMPTY)); } -// if (!newValue.equals("oblikoskladenjska oznaka")){ -// writeMsdAtTheEnd = false; -// writeMsdAtTheEndChB.setSelected(false); -// writeMsdAtTheEndChB.setDisable(true); -// writeMsdAtTheEndEnableCalculateFor.set(false); -// } - alsoVisualizeListener = new ListChangeListener() { @Override public void onChanged(Change c) { @@ -353,10 +320,6 @@ public class WordLevelTab { } }; -// alsoVisualizeCCB.getCheckModel().clearChecks(); -// alsoVisualizeCCB.getItems().removeAll(); -// alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA)); - alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener); if (languageChanged) { @@ -442,7 +405,6 @@ public class WordLevelTab { prefixList.add(w); } } -// suffixList = value; } System.out.println(prefixList); @@ -475,7 +437,6 @@ public class WordLevelTab { suffixList.add(w); } } -// suffixList = value; } System.out.println(suffixList); if(suffixList.size() > 0){ @@ -492,8 +453,6 @@ public class WordLevelTab { computeNgramsB.setDisable(true); } }); -// prefixLengthCB.setDisable(true); - if (msdListener != null){ msdTF.focusedProperty().removeListener(msdListener); @@ -581,10 +540,8 @@ public class WordLevelTab { public void onChanged(ListChangeListener.Change c){ if(changing) { ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); -// ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems); ArrayList checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus); -// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus); taxonomy = new ArrayList<>(); taxonomy.addAll(checkedItemsTaxonomy); @@ -592,7 +549,6 @@ public class WordLevelTab { taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy()); - // taxonomyCCB.getCheckModel().clearChecks(); changing = false; taxonomyCCB.getCheckModel().clearChecks(); for (Taxonomy t : checkedItemsTaxonomy) { @@ -639,15 +595,6 @@ public class WordLevelTab { displayTaxonomyChB.setDisable(true); } -// writeMsdAtTheEnd = false; -// writeMsdAtTheEndChB.setDisable(true); -// // set -// writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> { -// writeMsdAtTheEnd = newValue; -// logger.info("write msd at the end: ", writeMsdAtTheEnd); -// }); -// writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB)); - // set default values minimalOccurrencesTF.setText("1"); minimalOccurrences = 1; @@ -764,85 +711,6 @@ public class WordLevelTab { cancel.setVisible(false); } - /** - * case a: values for combo boxes can change after a corpus change - *
    - *
  • different corpus type - reset all fields so no old values remain
  • - *
  • same corpus type, different subset - keep
  • - *
- *

- * case b: values for combo boxes can change after a header scan - *

    - *
  • at first, fields are populated by corpus type defaults
  • - *
  • after, with gathered data
  • - *
- *

- * ngrams: 1 - * calculateFor: word - * msd: - * taxonomy: - * skip: 0 - * iscvv: false - * string length: 1 - */ -// public void populateFields() { -// // corpus changed if: current one is null (this is first run of the app) -// // or if currentCorpus != gui's corpus -// boolean corpusChanged = currentCorpusType == null -// || currentCorpusType != corpus.getCorpusType(); -// -// -// // TODO: check for GOS, GIGAFIDA, SOLAR... -// // refresh and: -// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset -// if (calculateFor == null) { -// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); -// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); -// } -// -// if (!filter.hasMsd()) { -// // if current corpus doesn't have msd data, disable this field -// msd = new ArrayList<>(); -// msdTF.setText(""); -// msdTF.setDisable(true); -// logger.info("no msd data"); -// } else { -// if (ValidationUtil.isEmpty(msd) -// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { -// // msd has not been set previously -// // or msd has been set but the corpus changed -> reset -// msd = new ArrayList<>(); -// msdTF.setText(""); -// msdTF.setDisable(false); -// logger.info("msd reset"); -// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { -// // if msd has been set, but corpus type remained the same, we can keep any set msd value -// msdTF.setText(StringUtils.join(msdStrings, " ")); -// msdTF.setDisable(false); -// logger.info("msd kept"); -// } -// } -// -// // TODO: trigger on rescan -// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { -// // user changed corpus (by type) or by selection & triggered a rescan of headers -// // see if we read taxonomy from headers, otherwise use default values for given corpus -// ObservableList tax = corpus.getObservableListTaxonomy(); -// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); -// -// currentCorpusType = corpus.getCorpusType(); -// // setTaxonomyIsDirty(false); -// } else { -// -// } -// -// // see if we read taxonomy from headers, otherwise use default values for given corpus -// ObservableList tax = corpus.getObservableListTaxonomy(); -// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); -// taxonomyCCB.getItems().addAll(taxonomyCCBValues); -// -// } - private void addTooltipToImage(ImageView image, StringBinding stringBinding){ Tooltip tooltip = new Tooltip(); tooltip.textProperty().bind(stringBinding); @@ -911,11 +779,9 @@ public class WordLevelTab { if (corpus.getCorpusType() == CorpusType.GOS) { calculateForCB.itemsProperty().unbind(); calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_GOS)); -// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_GOS); } else { calculateForCB.itemsProperty().unbind(); calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS)); -// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS); } } @@ -923,7 +789,6 @@ public class WordLevelTab { if (corpus.isGosOrthMode()) { calculateForCB.itemsProperty().unbind(); calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_ORTH)); -// calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH); msdTF.setDisable(true); } else { msdTF.setDisable(false); @@ -954,7 +819,6 @@ public class WordLevelTab { filter.setPrefixList(prefixList); filter.setSuffixList(suffixList); filter.setTaxonomySetOperation(taxonomySetOperation); -// filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd); String message = Validation.validateForStringLevel(filter); if (message == null) { @@ -1004,134 +868,14 @@ public class WordLevelTab { private void execute(StatisticsNew statistic) { logger.info("Started execution: ", statistic.getFilter()); - Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); - -// final Task task = new Task() { -// @SuppressWarnings("Duplicates") -// @Override -// protected Void call() throws Exception { -// final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); -// if(multipleFiles){ -// cancel.setVisible(true); -// } -// int i = 0; -// Date startTime = new Date(); -// Date previousTime = new Date(); -// int remainingSeconds = -1; -// for (File f : corpusFiles) { -// final int iFinal = i; -// XML_processing xml_processing = new XML_processing(); -// xml_processing.isCancelled = false; -// i++; -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; -// } -// if(xml_processing.progressBarListener != null) { -// xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); -// } -// if (multipleFiles) { -// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ -// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000); -// previousTime = new Date(); -// } -// this.updateProgress(i, corpusFiles.size()); -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds)); -// } else { -// xml_processing.progressBarListener = new InvalidationListener() { -// int remainingSeconds = -1; -// Date previousTime = new Date(); -// @Override -// public void invalidated(Observable observable) { -// cancel.setVisible(true); -// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ -// remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * -// (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * -// ((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); -// previousTime = new Date(); -// } -// xml_processing.isCancelled = isCancelled(); -// updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100); -// updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds)); -// } -// }; -// -// xml_processing.progressProperty().addListener(xml_processing.progressBarListener); -// } -// xml_processing.readXML(f.toString(), statistic); -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; -// } -// } -// -// return null; -// } -// }; -// -// ngramProgressBar.progressProperty().bind(task.progressProperty()); -// progressLabel.textProperty().bind(task.messageProperty()); -// -// task.setOnSucceeded(e -> { -// try { -// boolean successullySaved = statistic.saveResultToDisk(); -// if (successullySaved) { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); -// } else { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); -// } -// } catch (UnsupportedEncodingException e1) { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); -// logger.error("Error while saving", e1); -// } -// -// ngramProgressBar.progressProperty().unbind(); -//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// task.setOnFailed(e -> { -// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); -// logger.error("Error while executing", e); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -//// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// task.setOnCancelled(e -> { -// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); -// ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setProgress(0.0); -//// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); -// progressLabel.textProperty().unbind(); -// progressLabel.setText(""); -// cancel.setVisible(false); -// }); -// -// // When cancel button is pressed cancel analysis -// cancel.setOnAction(e -> { -// task.cancel(); -// logger.info("cancel button"); -// }); -// -// final Thread thread = new Thread(task, "task"); -// thread.setDaemon(true); -// thread.start(); Tasks t = new Tasks(corpus, useDb, cancel, ngramProgressBar, progressLabel); if (statistic.getFilter().getMinimalRelFre() > 1){ final Task mainTask = t.prepareTaskForMinRelFre(statistic); -// final Task mainTask = prepareTaskForMinRelFre(statistic); final Thread thread = new Thread(mainTask, "task"); thread.setDaemon(true); thread.start(); } else { final Task mainTask = t.prepareMainTask(statistic); -// final Task mainTask = prepareMainTask(statistic); final Thread thread = new Thread(mainTask, "task"); thread.setDaemon(true); thread.start(); @@ -1144,5 +888,4 @@ public class WordLevelTab { public void setHostServices(HostServices hostServices){ this.hostService = hostServices; } - } diff --git a/src/main/java/util/Combinations.java b/src/main/java/util/Combinations.java deleted file mode 100755 index 3cb12dc..0000000 --- a/src/main/java/util/Combinations.java +++ /dev/null @@ -1,46 +0,0 @@ -package util; - -import java.util.Arrays; -import java.util.HashSet; -import java.util.stream.IntStream; - -public class Combinations { - private static HashSet> result = new HashSet<>(); - - - /* arr[] ---> Input Array - data[] ---> Temporary array to store current combination - start & end ---> Staring and Ending indexes in arr[] - index ---> Current index in data[] - r ---> Size of a combination to be printed */ - static void combinationUtil(int arr[], Integer data[], int start, int end, int index, int combinationLength) { - // Current combination is ready to be printed, print it - if (index == combinationLength) { - result.add(new HashSet<>(Arrays.asList(data))); - return; - } - - // replace index with all possible elements. The condition - // "end-i+1 >= r-index" makes sure that including one element - // at index will make a combination with remaining elements - // at remaining positions - for (int i = start; i <= end && end - i + 1 >= combinationLength - index; i++) { - data[index] = arr[i]; - combinationUtil(arr, data, i + 1, end, index + 1, combinationLength); - } - } - - public static HashSet> generateIndices(int maxNOfIndices) { - result = new HashSet<>(); - int[] arr = IntStream.range(1, maxNOfIndices).toArray(); - for (int i = 1; i < maxNOfIndices - 1; i++) { - // A temporary array to store all combination one by one - combinationUtil(arr, new Integer[i], 0, arr.length - 1, 0, i); - } - - // also add an empty one for X.... (all of this type) - result.add(new HashSet<>()); - - return result; - } -} diff --git a/src/main/java/util/Export.java b/src/main/java/util/Export.java index f9ce5ef..06e4fa5 100755 --- a/src/main/java/util/Export.java +++ b/src/main/java/util/Export.java @@ -6,7 +6,6 @@ import java.io.*; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicLong; import data.*; @@ -16,49 +15,11 @@ import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVPrinter; import org.apache.commons.csv.QuoteMode; import org.apache.commons.lang3.tuple.Pair; -import org.json.simple.JSONArray; -import org.json.simple.JSONObject; import data.Enums.WordLevelType; @SuppressWarnings("unchecked") public class Export { -// public static void SetToJSON(Set>> set) { -// JSONArray wrapper = new JSONArray(); -// -// for (Pair> p : set) { -// JSONArray data_wrapper = new JSONArray(); -// JSONObject metric = new JSONObject(); -// -// String title = p.getLeft(); -// Map map = p.getRight(); -// -// if (map.isEmpty()) -// continue; -// -// long total = Util.mapSumFrequencies(map); -// -// for (Map.Entry e : map.entrySet()) { -// JSONObject data_entry = new JSONObject(); -// data_entry.put("word", e.getKey()); -// data_entry.put("frequency", e.getValue()); -// data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total)); -// -// data_wrapper.add(data_entry); -// } -// -// metric.put("Title", title); -// metric.put("data", data_wrapper); -// wrapper.add(metric); -// } -// -// try (FileWriter file = new FileWriter("statistics.json")) { -// file.write(wrapper.toJSONString()); -// } catch (IOException e) { -// e.printStackTrace(); -// } -// } - public static String SetToCSV(Set>> set, File resultsPath, LinkedHashMap headerInfoBlock, StatisticsNew statistics, Filter filter) { Map> taxonomyResults = statistics.getTaxonomyResult(); @@ -68,15 +29,6 @@ public class Export { List FILE_HEADER_AL = new ArrayList<>(); Object[] FILE_HEADER; - //Count frequencies -// long num_frequencies = 0; -// for (Pair> p : set) { -// Map map = p.getRight(); -// if (map.isEmpty()) -// continue; -// num_frequencies = Util.mapSumFrequencies(map); -// } - Map num_selected_taxonomy_frequencies = new ConcurrentHashMap<>(); for (Taxonomy taxonomyKey : taxonomyResults.keySet()) { num_selected_taxonomy_frequencies.put(taxonomyKey, (long) 0); @@ -113,7 +65,6 @@ public class Export { headerInfoBlock.put(filter.getCalculateFor().totalSumString(filter.getNgramValue()), String.valueOf(num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue())); headerInfoBlock.put(filter.getCalculateFor().foundSumString(filter.getNgramValue()), String.valueOf(num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue())); -// headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies)); for (CalculateFor otherKey : filter.getMultipleKeys()) { FILE_HEADER_AL.add(otherKey.toHeaderString(filter.getNgramValue())); @@ -163,10 +114,7 @@ public class Export { for (Pair> p : set) { String title = p.getLeft(); - -// statistics.setTimeEnding(); title = statistics.generateResultTitle(); -// statistics. fileName = title.replace(": ", "-"); fileName = fileName.replace(" ", "_").concat(".csv"); @@ -178,8 +126,6 @@ public class Export { if (map.isEmpty()) continue; -// long total = Util.mapSumFrequencies(map); - OutputStreamWriter fileWriter = null; CSVPrinter csvFilePrinter = null; @@ -289,10 +235,7 @@ public class Export { dataEntry.add(frequency.toString()); dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key), statistics.getCorpus().getPunctuation())); dataEntry.add(formatNumberForExport(((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue(), statistics.getCorpus().getPunctuation())); -// dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences())); -// dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences())); } - } if (filter.getCollocability().size() > 0){ @@ -303,39 +246,6 @@ public class Export { // Write msd separated per letters at the end of each line in csv if (filter.getWriteMsdAtTheEnd()) { -// String msd = ""; -// -// if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){ -// msd = e.getKey().getK1(); -// } else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) { -// i = 0; -// for (CalculateFor otherKey : filter.getMultipleKeys()){ -// switch(i){ -// case 0: -// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){ -// msd = e.getKey().getK2(); -// } -// break; -// case 1: -// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){ -// msd = e.getKey().getK3(); -// } -// break; -// case 2: -// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){ -// msd = e.getKey().getK4(); -// } -// break; -// case 3: -// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){ -// msd = e.getKey().getK5(); -// } -// break; -// } -// -// i++; -// } -// } String msd = e.getKey().getMsd(filter); String [] charArray = msd.split("(?!^)"); dataEntry.addAll(Arrays.asList(charArray)); @@ -372,67 +282,6 @@ public class Export { return s; } -// public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap headerInfoBlock) { -// //Delimiter used in CSV file -// String NEW_LINE_SEPARATOR = "\n"; -// -// //CSV file header -// Object[] FILE_HEADER = {"word", "frequency", "percent"}; -// -// String fileName = ""; -// -// fileName = title.replace(": ", "-"); -// fileName = fileName.replace(" ", "_").concat(".csv"); -// -// fileName = resultsPath.toString().concat(File.separator).concat(fileName); -// -// OutputStreamWriter fileWriter = null; -// CSVPrinter csvFilePrinter = null; -// -// //Create the CSVFormat object with "\n" as a record delimiter -// CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';'); -// -// try { -// //initialize FileWriter object -// fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8); -// -// //initialize CSVPrinter object -// csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat); -// -// // write info block -// printHeaderInfo(csvFilePrinter, headerInfoBlock); -// -// //Create CSV file header -// csvFilePrinter.printRecord(FILE_HEADER); -// -// for (Object[] resultEntry : result) { -// List dataEntry = new ArrayList<>(); -// dataEntry.add(resultEntry[0]); -// dataEntry.add(resultEntry[1]); -// dataEntry.add(formatNumberAsPercent(resultEntry[2]), statistics.getCorpus().getPunctuation()); -// csvFilePrinter.printRecord(dataEntry); -// } -// } catch (Exception e) { -// System.out.println("Error in CsvFileWriter!"); -// e.printStackTrace(); -// } finally { -// try { -// if (fileWriter != null) { -// fileWriter.flush(); -// fileWriter.close(); -// } -// if (csvFilePrinter != null) { -// csvFilePrinter.close(); -// } -// } catch (IOException e) { -// System.out.println("Error while flushing/closing fileWriter/csvPrinter!"); -// e.printStackTrace(); -// } -// } -// -// return fileName; -// } - public static String nestedMapToCSV(String title, Map>> result, File resultsPath, LinkedHashMap headerInfoBlock) { //Delimiter used in CSV file String NEW_LINE_SEPARATOR = "\n"; diff --git a/src/main/java/util/Key.java b/src/main/java/util/Key.java deleted file mode 100755 index 2ddaf4a..0000000 --- a/src/main/java/util/Key.java +++ /dev/null @@ -1,31 +0,0 @@ -package util; - -public class Key /*implements Comparable */ { - // private final String value; - // - // Key(String value) { - // this.value = value; - // } - // - // @Override - // public int compareTo(Key o) { - // return Objects.compare(this.value, o.value); - // } - // - // @Override - // public boolean equals(Object o) { - // if (this.equals(o)) { - // return true; - // } - // if (o == null || getClass() != o.getClass()) { - // return false; - // } - // Key key = (Key) o; - // return Objects.equals(value, key.value); - // } - // - // @Override - // public int hashCode() { - // return 0; - // } -} \ No newline at end of file diff --git a/src/main/java/util/Tasks.java b/src/main/java/util/Tasks.java index 859588c..deb444c 100644 --- a/src/main/java/util/Tasks.java +++ b/src/main/java/util/Tasks.java @@ -57,9 +57,6 @@ public class Tasks { f2.setIsMinimalRelFreScraper(true); StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, useDb); - -// StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f, useDb); - Collection corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles(); final javafx.concurrent.Task task = new javafx.concurrent.Task() { @@ -97,10 +94,6 @@ public class Tasks { } this.updateProgress(i, corpusSize); this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; -// } } else { xml_processing.progressBarListener = new InvalidationListener() { int remainingSeconds = -1; @@ -112,10 +105,6 @@ public class Tasks { remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); -// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); -// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1); -// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); -// System.out.println(remainingSeconds); previousTime = new Date(); } xml_processing.isCancelled = isCancelled(); @@ -138,7 +127,6 @@ public class Tasks { // add remaining minRelFre results if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) { -// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() + long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue(); double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; @@ -151,8 +139,6 @@ public class Tasks { for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){ statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0)); } - -// System.out.println("asd"); } return null; @@ -174,7 +160,6 @@ public class Tasks { logger.error("Error while executing", e); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); - // ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); @@ -184,7 +169,6 @@ public class Tasks { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); - // ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); @@ -215,19 +199,6 @@ public class Tasks { if(multipleFiles){ cancel.setVisible(true); } - - -// int i = corpusFiles.size(); -// Date startTime = new Date(); -// Date previousTime = new Date(); -// int remainingSeconds = -1; -// int corpusSize; -// if (statistic.getFilter().getCollocability().size() > 0) { -// corpusSize = corpusFiles.size() * 2; -// } else { -// corpusSize = corpusFiles.size(); -// } - Date startTime = new Date(); Date previousTime = new Date(); int remainingSeconds = -1; @@ -264,13 +235,6 @@ public class Tasks { this.updateProgress(i, corpusSize); this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); -// if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ -// remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); -// previousTime = new Date(); -// } -// this.updateProgress(i, corpusSize); -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); - } else { xml_processing.progressBarListener = new InvalidationListener() { int remainingSeconds = -1; @@ -282,10 +246,6 @@ public class Tasks { remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); -// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); -// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1); -// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); -// System.out.println(remainingSeconds); previousTime = new Date(); } xml_processing.isCancelled = isCancelled(); @@ -304,24 +264,9 @@ public class Tasks { if(!(multipleFiles)){ cancel.setVisible(false); } -// readXML(f.toString(), statistic); -// i++; -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; -// } -// if (statistic.getFilter().getCollocability().size() > 0) { -// this.updateProgress(i, corpusFiles.size() * 2); -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName())); -// } else { -// this.updateProgress(i, corpusFiles.size()); -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); -// } -//// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName())); } // if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing if (statistic.getFilter().getMinimalRelFre() > 1){ -// long countFor1MWords = stats.getCountWordsForMinimalRelFreNgrams() + long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue(); double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; @@ -356,7 +301,6 @@ public class Tasks { } else { try { -// System.out.print(statistics); boolean successullySaved = statistic.saveResultToDisk(); if (successullySaved) { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); @@ -371,7 +315,6 @@ public class Tasks { logger.error("Out of memory error", e1); } ngramProgressBar.progressProperty().unbind(); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); @@ -385,7 +328,6 @@ public class Tasks { logger.error("Error while executing", e); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); @@ -395,7 +337,6 @@ public class Tasks { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); @@ -421,17 +362,9 @@ public class Tasks { if(multipleFiles){ cancel.setVisible(true); } -// int i = corpusFiles.size(); Date startTime = new Date(); Date previousTime = new Date(); int remainingSeconds = -1; -// int corpusSize; -// if (statistic.getFilter().getCollocability().size() > 0) { -// corpusSize = corpusFiles.size() * 2; -// } else { -// corpusSize = corpusFiles.size(); -// } - int corpusSize; int i; @@ -461,10 +394,6 @@ public class Tasks { } this.updateProgress(i, corpusSize); this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); -// if (isCancelled()) { -// updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); -// break; -// } } else { xml_processing.progressBarListener = new InvalidationListener() { int remainingSeconds = -1; @@ -476,10 +405,6 @@ public class Tasks { remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); -// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime())); -// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1))); -// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get())); -// System.out.println(remainingSeconds); previousTime = new Date(); } xml_processing.isCancelled = isCancelled(); @@ -497,14 +422,6 @@ public class Tasks { updateMessage(I18N.get("message.CANCELING_NOTIFICATION")); break; } -// readXML(f.toString(), statisticsOneGrams); -// i++; -// this.updateProgress(i, corpusFiles.size() * 2); -// if (statistic.getFilter().getCollocability().size() > 0) { -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName())); -// } else { -// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); -// } } return null; @@ -517,7 +434,6 @@ public class Tasks { task.setOnSucceeded(e -> { try { System.out.print(statistic); -// calculate_collocabilities(statistic, statisticsOneGrams); statistic.updateCalculateCollocabilities(statisticsOneGrams); boolean successullySaved = statistic.saveResultToDisk(); if (successullySaved) { @@ -532,21 +448,6 @@ public class Tasks { showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); logger.error("Out of memory error", e1); } -// try { -// boolean successullySaved = statistic.saveResultToDisk(); -// if (successullySaved) { -// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED); -// } else { -// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS); -// } -// } catch (UnsupportedEncodingException e1) { -// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV); -// logger.error("Error while saving", e1); -// } catch (OutOfMemoryError e1){ -// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY); -// logger.error("Out of memory error", e1); -// } -// ngramProgressBar.progressProperty().unbind(); // ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); @@ -559,7 +460,6 @@ public class Tasks { logger.error("Error while executing", e); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); @@ -569,7 +469,6 @@ public class Tasks { showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_CANCELED")); ngramProgressBar.progressProperty().unbind(); ngramProgressBar.setProgress(0.0); -// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); progressLabel.textProperty().unbind(); progressLabel.setText(""); cancel.setVisible(false); @@ -578,7 +477,6 @@ public class Tasks { // When cancel button is pressed cancel analysis cancel.setOnAction(e -> { task.cancel(); -// logger.info("cancel button"); }); return task; } diff --git a/src/main/java/util/TimeWatch.java b/src/main/java/util/TimeWatch.java index cf30ff7..6a45ef1 100755 --- a/src/main/java/util/TimeWatch.java +++ b/src/main/java/util/TimeWatch.java @@ -49,15 +49,4 @@ public class TimeWatch { return "Elapsed Time in nano seconds: "; } - - private void exampleUsage() { - TimeWatch watch = TimeWatch.start(); - - // do something... - - System.out.println("Elapsed Time custom format: " + watch.toMinuteSeconds()); - System.out.println("Elapsed Time in seconds: " + watch.time(TimeUnit.SECONDS)); - System.out.println("Elapsed Time in nano seconds: " + watch.time()); - - } } \ No newline at end of file diff --git a/src/main/java/util/Util.java b/src/main/java/util/Util.java index 0e96c03..f91b729 100755 --- a/src/main/java/util/Util.java +++ b/src/main/java/util/Util.java @@ -20,22 +20,6 @@ import gui.ValidationUtil; public class Util { public final static Logger logger = LogManager.getLogger(Util.class); - - public static String toReadableTime(long time) { - long hours = time(TimeUnit.HOURS, time); - long minutes = time(TimeUnit.MINUTES, time) - TimeUnit.HOURS.toMinutes(hours); - long seconds = time(TimeUnit.SECONDS, time) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes); - long milliseconds = time(TimeUnit.MILLISECONDS, time) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds); - long microseconds = time(TimeUnit.MICROSECONDS, time) - TimeUnit.HOURS.toMicros(hours) - TimeUnit.MINUTES.toMicros(minutes) - TimeUnit.SECONDS.toMicros(seconds) - TimeUnit.MILLISECONDS.toMicros(milliseconds); - long nanoseconds = time(TimeUnit.NANOSECONDS, time) - TimeUnit.HOURS.toNanos(hours) - TimeUnit.MINUTES.toNanos(minutes) - TimeUnit.SECONDS.toNanos(seconds) - TimeUnit.MILLISECONDS.toNanos(milliseconds) - TimeUnit.MICROSECONDS.toNanos(microseconds); - - return String.format("%d h, %d min, %d s, %d ms, %d µs, %d ns", hours, minutes, seconds, milliseconds, microseconds, nanoseconds); - } - - private static long time(TimeUnit unit, long t) { - return unit.convert(t, TimeUnit.NANOSECONDS); - } - /** * Converts a number to a more readable format. * 12345 -> 12.345 @@ -97,12 +81,6 @@ public class Util { return types.contains(o.getClass()); } - public static void printMap(Map map) { - System.out.println("\nkey: value"); - map.forEach((k, v) -> System.out.print(String.format("%s:\t %,8d%n", k, v))); - System.out.println(); - } - /** * Generic map converter -> since AtomicLongs aren't as comparable. * Converts ConcurrentHashMap to HashMap @@ -117,23 +95,6 @@ public class Util { return m; } - public class ValueThenKeyComparator, - V extends Comparable> - implements Comparator> { - - public int compare(Map.Entry a, Map.Entry b) { - int cmp1 = a.getValue().compareTo(b.getValue()); - if (cmp1 != 0) { - return cmp1; - } else { - return a.getKey().compareTo(b.getKey()); - } - } - - } - - - /** * Sorts a map in a descending order by value. @@ -183,25 +144,6 @@ public class Util { return result; } - public static void printMap(Map map, String title, int number_of_words) { - System.out.println(String.format("\n%s\n------------\nkey: value\tpercent", title)); - map.forEach((k, v) -> - System.out.println(String.format("%s:\t %s\t %s%%", - k, - Util.formatNumberReadable(v), - Util.formatNumberReadable((double) v / number_of_words * 100)))); - System.out.println(); - } - - static long mapSumFrequencies(Map map) { - long sum = 0; - - for (long value : map.values()) { - sum += value; - } - - return sum; - } /** * Used for passing optional integer values for sorting. diff --git a/src/main/java/util/db/RDB.java b/src/main/java/util/db/RDB.java index 356f2a7..c987c82 100755 --- a/src/main/java/util/db/RDB.java +++ b/src/main/java/util/db/RDB.java @@ -84,16 +84,6 @@ public class RDB { } } - // public byte[] atomicIntToByteArray(final AtomicLong i) { - // BigInteger bigInt = BigInteger.valueOf(i.intValue()); - // - // return bigInt.toByteArray(); - // } - - public RocksDB getDb() { - return db; - } - public Map getDump() throws UnsupportedEncodingException { Map dump = new HashMap<>(); RocksDB.loadLibrary(); diff --git a/src/main/resources/gui/CharacterAnalysisTab.fxml b/src/main/resources/gui/CharacterAnalysisTab.fxml index 5c84c4f..ce92af0 100755 --- a/src/main/resources/gui/CharacterAnalysisTab.fxml +++ b/src/main/resources/gui/CharacterAnalysisTab.fxml @@ -34,17 +34,6 @@ - - - - - - - - - - - diff --git a/src/main/resources/gui/CorpusTab.fxml b/src/main/resources/gui/CorpusTab.fxml index e0c3788..848ff64 100755 --- a/src/main/resources/gui/CorpusTab.fxml +++ b/src/main/resources/gui/CorpusTab.fxml @@ -16,7 +16,6 @@ fx:controller="gui.CorpusTab"> -