From 9efe3d529b3cd68d1cc1854f54fc1addc738cd28 Mon Sep 17 00:00:00 2001 From: Luka Date: Mon, 26 Nov 2018 13:41:35 +0100 Subject: [PATCH] Taxonomy refactored --- src/main/java/alg/XML_processing.java | 36 +- .../java/alg/inflectedJOS/WordFormation.java | 3 +- src/main/java/alg/ngram/Ngrams.java | 4 +- src/main/java/data/Filter.java | 6 +- src/main/java/data/Sentence.java | 10 +- src/main/java/data/Statistics.java | 54 +- src/main/java/data/StatisticsNew.java | 80 +- src/main/java/data/Tax.java | 127 +-- src/main/java/data/Taxonomy.java | 808 +++++++++++++++--- src/main/java/gui/CharacterAnalysisTab.java | 172 ++-- src/main/java/gui/OneWordAnalysisTab.java | 150 ++-- src/main/java/gui/StringAnalysisTabNew2.java | 36 +- src/main/java/gui/WordFormationTab.java | 7 +- src/main/java/gui/WordLevelTab.java | 150 ++-- src/main/java/util/Export.java | 20 +- src/test/java/DBTest.java | 1 + 16 files changed, 1173 insertions(+), 491 deletions(-) diff --git a/src/main/java/alg/XML_processing.java b/src/main/java/alg/XML_processing.java index b080121..6779d23 100755 --- a/src/main/java/alg/XML_processing.java +++ b/src/main/java/alg/XML_processing.java @@ -536,8 +536,8 @@ public class XML_processing { boolean inWord = false; boolean inPunctuation = false; boolean taxonomyMatch = true; - ArrayList currentFiletaxonomy = new ArrayList<>(); - ArrayList currentFiletaxonomyLong = new ArrayList<>(); + ArrayList currentFiletaxonomy = new ArrayList<>(); +// ArrayList currentFiletaxonomyLong = new ArrayList<>(); String lemma = ""; String msd = ""; @@ -578,10 +578,10 @@ public class XML_processing { if (tax != null) { // keep only taxonomy properties - String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", ""); + Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", "")); currentFiletaxonomy.add(currentFiletaxonomyElement); Tax taxonomy = new Tax(); - currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); +// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); } } break; @@ -637,7 +637,7 @@ public class XML_processing { // parser reached end of the current sentence if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) { // count all UniGramOccurrences in sentence for statistics - stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomyLong); + stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy); // add sentence to corpus if it passes filters sentence = runFilters(sentence, stats.getFilter()); @@ -645,7 +645,7 @@ public class XML_processing { if (!ValidationUtil.isEmpty(sentence) && taxonomyMatch) { - corpus.add(new Sentence(sentence, currentFiletaxonomyLong)); + corpus.add(new Sentence(sentence, currentFiletaxonomy)); } // taxonomyMatch = true; @@ -713,8 +713,8 @@ public class XML_processing { public static boolean readXMLSSJ500K(String path, StatisticsNew stats) { boolean inWord = false; boolean inPunctuation = false; - ArrayList currentFiletaxonomy = new ArrayList<>(); - ArrayList currentFiletaxonomyLong = new ArrayList<>(); + ArrayList currentFiletaxonomy = new ArrayList<>(); +// ArrayList currentFiletaxonomyLong = new ArrayList<>(); String lemma = ""; String msd = ""; @@ -757,10 +757,10 @@ public class XML_processing { if (tax != null) { // keep only taxonomy properties - String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", ""); + Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", "")); currentFiletaxonomy.add(currentFiletaxonomyElement); Tax taxonomy = new Tax(); - currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); +// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); } } break; @@ -793,7 +793,7 @@ public class XML_processing { sentence = runFilters(sentence, stats.getFilter()); if (!ValidationUtil.isEmpty(sentence)) { - corpus.add(new Sentence(sentence, currentFiletaxonomyLong)); + corpus.add(new Sentence(sentence, currentFiletaxonomy)); } // and start a new one @@ -820,7 +820,7 @@ public class XML_processing { corpus.clear(); currentFiletaxonomy = new ArrayList<>(); - currentFiletaxonomyLong = new ArrayList<>(); +// currentFiletaxonomyLong = new ArrayList<>(); } break; @@ -848,8 +848,8 @@ public class XML_processing { boolean inOrthDiv = false; boolean computeForOrth = stats.getCorpus().isGosOrthMode(); boolean inSeparatedWord = false; - ArrayList currentFiletaxonomy = new ArrayList<>(); - ArrayList currentFiletaxonomyLong = new ArrayList<>(); + ArrayList currentFiletaxonomy = new ArrayList<>(); +// ArrayList currentFiletaxonomyLong = new ArrayList<>(); String lemma = ""; String msd = ""; @@ -923,10 +923,10 @@ public class XML_processing { if (tax != null) { // keep only taxonomy properties - String currentFiletaxonomyElement = String.valueOf(tax.getValue()); + Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue())); currentFiletaxonomy.add(currentFiletaxonomyElement); Tax taxonomy = new Tax(); - currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); +// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); } } else if (qName.equalsIgnoreCase("div")) { gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue()); @@ -1010,7 +1010,7 @@ public class XML_processing { // add sentence to corpus if it passes filters if (includeFile && !ValidationUtil.isEmpty(sentence)) { sentence = runFilters(sentence, stats.getFilter()); - corpus.add(new Sentence(sentence, currentFiletaxonomyLong)); + corpus.add(new Sentence(sentence, currentFiletaxonomy)); } wordIndex = 0; @@ -1050,7 +1050,7 @@ public class XML_processing { corpus.clear(); currentFiletaxonomy = new ArrayList<>(); - currentFiletaxonomyLong = new ArrayList<>(); +// currentFiletaxonomyLong = new ArrayList<>(); } break; diff --git a/src/main/java/alg/inflectedJOS/WordFormation.java b/src/main/java/alg/inflectedJOS/WordFormation.java index f3c57bf..86fe16f 100755 --- a/src/main/java/alg/inflectedJOS/WordFormation.java +++ b/src/main/java/alg/inflectedJOS/WordFormation.java @@ -8,6 +8,7 @@ import java.util.stream.Collectors; import data.Enums.InflectedJosTypes; import data.StatisticsNew; +import data.Taxonomy; import gui.ValidationUtil; import util.Combinations; @@ -30,7 +31,7 @@ public class WordFormation { Map result = stat.getResult(); // 1. filter - keep only inflected types - result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.charAt(0))); + result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.toString().charAt(0))); // 2. for each inflected type get all possible subcombinations for (Character josChar : InflectedJosTypes.inflectedJosTypes) { diff --git a/src/main/java/alg/ngram/Ngrams.java b/src/main/java/alg/ngram/Ngrams.java index 8712f08..98c069c 100755 --- a/src/main/java/alg/ngram/Ngrams.java +++ b/src/main/java/alg/ngram/Ngrams.java @@ -267,7 +267,7 @@ public class Ngrams { for (Sentence s : corpus) { // stats.updateUniGramOccurrences(s.getWords().size()); for (Word w : s.getWords()) { - List taxonomy = s.getTaxonomy(); + List taxonomy = s.getTaxonomy(); //// List ngramCandidate = s.getSublist(i, i + stats.getFilter().getNgramValue()); List ngramCandidate = new ArrayList<>(); @@ -425,7 +425,7 @@ public class Ngrams { } } - private static void validateAndCountSkipgramCandidate(ArrayList skipgramCandidate, StatisticsNew stats, List taxonomy) { + private static void validateAndCountSkipgramCandidate(ArrayList skipgramCandidate, StatisticsNew stats, List taxonomy) { // count if no regex is set or if it is & candidate passes it if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())) { // String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor()); diff --git a/src/main/java/data/Filter.java b/src/main/java/data/Filter.java index 84361de..d5cc07c 100755 --- a/src/main/java/data/Filter.java +++ b/src/main/java/data/Filter.java @@ -104,13 +104,13 @@ public class Filter implements Cloneable { return (Integer) filter.get(STRING_LENGTH); } - public void setTaxonomy(ArrayList taxonomy) { + public void setTaxonomy(ArrayList taxonomy) { filter.put(TAXONOMY, taxonomy); } - public ArrayList getTaxonomy() { + public ArrayList getTaxonomy() { if (filter.containsKey(TAXONOMY) && filter.get(TAXONOMY) != null) { - return (ArrayList) filter.get(TAXONOMY); + return (ArrayList) filter.get(TAXONOMY); } else { return new ArrayList<>(); } diff --git a/src/main/java/data/Sentence.java b/src/main/java/data/Sentence.java index 5213bb4..ebf4416 100755 --- a/src/main/java/data/Sentence.java +++ b/src/main/java/data/Sentence.java @@ -7,13 +7,13 @@ public class Sentence { private List words; - private List taxonomy; + private List taxonomy; // GOS private String type; private Map properties; - public Sentence(List words, List taxonomy) { + public Sentence(List words, List taxonomy) { this.words = words; this.taxonomy = taxonomy; } @@ -22,13 +22,13 @@ public class Sentence { // this.words = words; // } - public Sentence(List words, List taxonomy, Map properties) { + public Sentence(List words, List taxonomy, Map properties) { this.words = words; this.taxonomy = taxonomy; this.properties = properties; } - public Sentence(List words, List taxonomy, String type) { + public Sentence(List words, List taxonomy, String type) { this.words = words; this.taxonomy = taxonomy; this.type = type; @@ -38,7 +38,7 @@ public class Sentence { return words; } - public List getTaxonomy() { + public List getTaxonomy() { return taxonomy; } diff --git a/src/main/java/data/Statistics.java b/src/main/java/data/Statistics.java index bc76cc9..807fadb 100755 --- a/src/main/java/data/Statistics.java +++ b/src/main/java/data/Statistics.java @@ -68,22 +68,22 @@ public class Statistics { } // for words distributions - public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) { - String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); - - this.resultTitle = String.format("%s_%s_%s", - distributionTaxonomy != null ? distributionTaxonomy.toString() : "", - distributionJosWordType != null ? distributionJosWordType.toString() : "", - dateTime); - - this.analysisLevel = al; - this.cf = cf; - this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null; - this.taxonomyIsSet = distributionTaxonomy != null; - - this.JOSTypeIsSet = distributionJosWordType != null; - this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' '; - } +// public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) { +// String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); +// +// this.resultTitle = String.format("%s_%s_%s", +// distributionTaxonomy != null ? distributionTaxonomy.toString() : "", +// distributionJosWordType != null ? distributionJosWordType.toString() : "", +// dateTime); +// +// this.analysisLevel = al; +// this.cf = cf; +// this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null; +// this.taxonomyIsSet = distributionTaxonomy != null; +// +// this.JOSTypeIsSet = distributionJosWordType != null; +// this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' '; +// } public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) { String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); @@ -99,17 +99,17 @@ public class Statistics { this.vcc = true; } - public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) { - String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); - - this.resultTitle = String.format("InflectedJOS_%s_%s", - distributionTaxonomy != null ? distributionTaxonomy : "", - dateTime); - - this.analysisLevel = al; - this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null; - this.taxonomyIsSet = inflectedJosTaxonomy != null; - } +// public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) { +// String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); +// +// this.resultTitle = String.format("InflectedJOS_%s_%s", +// distributionTaxonomy != null ? distributionTaxonomy : "", +// dateTime); +// +// this.analysisLevel = al; +// this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null; +// this.taxonomyIsSet = inflectedJosTaxonomy != null; +// } public Integer getSkip() { return skip; diff --git a/src/main/java/data/StatisticsNew.java b/src/main/java/data/StatisticsNew.java index 3f46d19..266dc55 100755 --- a/src/main/java/data/StatisticsNew.java +++ b/src/main/java/data/StatisticsNew.java @@ -33,7 +33,7 @@ public class StatisticsNew { private String resultTitle; private Map result; - private Map> taxonomyResult; + private Map> taxonomyResult; private Object[][] resultCustom; // for when calculating percentages that don't add up to 100% private Map> resultNestedSuffix; private Map> resultNestedPrefix; @@ -43,28 +43,28 @@ public class StatisticsNew { private LocalDateTime timeBeginning; private LocalDateTime timeEnding; private Map> collocability; - private Map uniGramTaxonomyOccurrences; + private Map uniGramTaxonomyOccurrences; public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) { this.corpus = corpus; this.filter = filter; this.taxonomyResult = new ConcurrentHashMap<>(); - this.taxonomyResult.put("Total", new ConcurrentHashMap<>()); + this.taxonomyResult.put(Taxonomy.TOTAL, new ConcurrentHashMap<>()); this.collocability = new ConcurrentHashMap<>(); this.uniGramTaxonomyOccurrences = new ConcurrentHashMap<>(); - this.uniGramTaxonomyOccurrences.put("Total", new AtomicLong(0L)); + this.uniGramTaxonomyOccurrences.put(Taxonomy.TOTAL, new AtomicLong(0L)); // create table for counting word occurrences per taxonomies if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) { if (this.filter.getTaxonomy().isEmpty()) { for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) { - this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>()); + this.taxonomyResult.put(Taxonomy.factory(this.corpus.getTaxonomy().get(i)), new ConcurrentHashMap<>()); } } else { for (int i = 0; i < this.filter.getTaxonomy().size(); i++) { - Tax taxonomy = new Tax(); - this.taxonomyResult.put(taxonomy.getLongTaxonomyName(this.filter.getTaxonomy().get(i)), new ConcurrentHashMap<>()); +// Tax taxonomy = new Tax(); + this.taxonomyResult.put(this.filter.getTaxonomy().get(i), new ConcurrentHashMap<>()); } } } @@ -202,15 +202,15 @@ public class StatisticsNew { /** * Stores results from this batch to a database and clears results map */ - public void storeTmpResultsToDB() { - try { - db.writeBatch(result); - result = new ConcurrentHashMap<>(); - } catch (UnsupportedEncodingException e) { - logger.error("Store tmp results to DB", e); - // e.printStackTrace(); - } - } +// public void storeTmpResultsToDB() { +// try { +// db.writeBatch(result); +// result = new ConcurrentHashMap<>(); +// } catch (UnsupportedEncodingException e) { +// logger.error("Store tmp results to DB", e); +// // e.printStackTrace(); +// } +// } public Filter getFilter() { return filter; @@ -229,16 +229,16 @@ public class StatisticsNew { } // if no results and nothing to save, return false - if (!(taxonomyResult.get("Total").size() > 0)) { + if (!(taxonomyResult.get(Taxonomy.TOTAL).size() > 0)) { analysisProducedResults = false; return false; } else { analysisProducedResults = true; } - removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences()); + removeMinimalOccurrences(taxonomyResult.get(Taxonomy.TOTAL), filter.getMinimalOccurrences()); removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy()); - stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit)))); + stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get(Taxonomy.TOTAL), Util.getValidInt(limit)))); Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter); return true; } @@ -246,18 +246,18 @@ public class StatisticsNew { /** * Removes lines, where number of different taxonomies is lower than specified number (minimalTaxonomy) */ - private void removeMinimalTaxonomy(Map> taxonomyResult, Integer minimalTaxonomy) { + private void removeMinimalTaxonomy(Map> taxonomyResult, Integer minimalTaxonomy) { if (minimalTaxonomy == 1) return; int occurances; - for (MultipleHMKeys key : taxonomyResult.get("Total").keySet()){ + for (MultipleHMKeys key : taxonomyResult.get(Taxonomy.TOTAL).keySet()){ occurances = 0; - for (String columnNameKey : taxonomyResult.keySet()){ - if(!columnNameKey.equals("Total") && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1) + for (Taxonomy columnNameKey : taxonomyResult.keySet()){ + if(!columnNameKey.equals(Taxonomy.TOTAL) && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1) occurances++; } if(occurances < minimalTaxonomy){ - taxonomyResult.get("Total").remove(key); + taxonomyResult.get(Taxonomy.TOTAL).remove(key); } } } @@ -343,9 +343,9 @@ public class StatisticsNew { return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit); } - public void updateUniGramOccurrences(int amount, ArrayList taxonomy){ - uniGramTaxonomyOccurrences.get("Total").set(uniGramTaxonomyOccurrences.get("Total").longValue() + amount); - for (String t : taxonomy){ + public void updateUniGramOccurrences(int amount, ArrayList taxonomy){ + uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).set(uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).longValue() + amount); + for (Taxonomy t : taxonomy){ if (uniGramTaxonomyOccurrences.get(t) != null){ uniGramTaxonomyOccurrences.get(t).set(uniGramTaxonomyOccurrences.get(t).longValue() + amount); } else { @@ -354,16 +354,16 @@ public class StatisticsNew { } } - public Map getUniGramOccurrences(){ -// return uniGramTaxonomyOccurrences.get("Total").longValue(); + public Map getUniGramOccurrences(){ +// return uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).longValue(); return uniGramTaxonomyOccurrences; } - public void updateTaxonomyResults(MultipleHMKeys o, List taxonomy) { - for (String key : taxonomyResult.keySet()) { + public void updateTaxonomyResults(MultipleHMKeys o, List taxonomy) { + for (Taxonomy key : taxonomyResult.keySet()) { // first word should have the same taxonomy as others - if (key.equals("Total") || taxonomy.contains(key)) { -// if (key.equals("Total") || taxonomy != null && taxonomy.contains(key)) { + if (key.equals(Taxonomy.TOTAL) || taxonomy.contains(key)) { +// if (key.equals(Taxonomy.TOTAL) || taxonomy != null && taxonomy.contains(key)) { // if taxonomy not in map and in this word AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1)); @@ -382,7 +382,7 @@ public class StatisticsNew { } - public Map> getTaxonomyResult() { + public Map> getTaxonomyResult() { return taxonomyResult; } @@ -608,7 +608,7 @@ public class StatisticsNew { } public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) { - Map> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult(); + Map> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult(); Map> collocabilityMap = new ConcurrentHashMap<>(); @@ -618,11 +618,11 @@ public class StatisticsNew { // count number of all words long N = 0; - for(AtomicLong a : oneWordTaxonomyResult.get("Total").values()){ + for(AtomicLong a : oneWordTaxonomyResult.get(Taxonomy.TOTAL).values()){ N += a.longValue(); } - for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) { + for(MultipleHMKeys hmKey : taxonomyResult.get(Taxonomy.TOTAL).keySet()) { // String[] splitedString = hmKey.getK1().split("\\s+"); long sum_fwi =0L; @@ -630,15 +630,15 @@ public class StatisticsNew { for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){ // System.out.println(smallHmKey.getK1()); - sum_fwi += oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue(); - mul_fwi *= oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue(); + sum_fwi += oneWordTaxonomyResult.get(Taxonomy.TOTAL).get(smallHmKey).longValue(); + mul_fwi *= oneWordTaxonomyResult.get(Taxonomy.TOTAL).get(smallHmKey).longValue(); } // String t = hmKey.getK1(); // if(hmKey.getK1().equals("v Slovenija")){ // System.out.println("TEST"); // // } - double O = (double)taxonomyResult.get("Total").get(hmKey).longValue(); + double O = (double)taxonomyResult.get(Taxonomy.TOTAL).get(hmKey).longValue(); double n = (double)filter.getNgramValue(); double E = (double)mul_fwi / Math.pow(N, n - 1); if (collocabilityMap.keySet().contains(Collocability.DICE)){ diff --git a/src/main/java/data/Tax.java b/src/main/java/data/Tax.java index 891fb54..f48e301 100755 --- a/src/main/java/data/Tax.java +++ b/src/main/java/data/Tax.java @@ -17,72 +17,72 @@ public class Tax { GIGAFIDA_TAXONOMY = new LinkedHashMap<>(); GIGAFIDA_TAXONOMY.put("SSJ.T", "SSJ.T - tisk"); - GIGAFIDA_TAXONOMY.put("SSJ.T.K", "SSJ.T.K - tisk-knjižno"); - GIGAFIDA_TAXONOMY.put("SSJ.T.K.L", "SSJ.T.K.L - tisk-knjižno-leposlovno"); - GIGAFIDA_TAXONOMY.put("SSJ.T.K.S", "SSJ.T.K.S - tisk-knjižno-strokovno"); - GIGAFIDA_TAXONOMY.put("SSJ.T.P", "SSJ.T.P - tisk-periodično"); - GIGAFIDA_TAXONOMY.put("SSJ.T.P.C", "SSJ.T.P.C - tisk-periodično-časopis"); - GIGAFIDA_TAXONOMY.put("SSJ.T.P.R", "SSJ.T.P.R - tisk-periodično-revija"); - GIGAFIDA_TAXONOMY.put("SSJ.T.D", "SSJ.T.D - tisk-drugo"); + GIGAFIDA_TAXONOMY.put("SSJ.T.K", " SSJ.T.K - tisk-knjižno"); + GIGAFIDA_TAXONOMY.put("SSJ.T.K.L", " SSJ.T.K.L - tisk-knjižno-leposlovno"); + GIGAFIDA_TAXONOMY.put("SSJ.T.K.S", " SSJ.T.K.S - tisk-knjižno-strokovno"); + GIGAFIDA_TAXONOMY.put("SSJ.T.P", " SSJ.T.P - tisk-periodično"); + GIGAFIDA_TAXONOMY.put("SSJ.T.P.C", " SSJ.T.P.C - tisk-periodično-časopis"); + GIGAFIDA_TAXONOMY.put("SSJ.T.P.R", " SSJ.T.P.R - tisk-periodično-revija"); + GIGAFIDA_TAXONOMY.put("SSJ.T.D", " SSJ.T.D - tisk-drugo"); GIGAFIDA_TAXONOMY.put("SSJ.I", "SSJ.I - internet"); GIGAFIDA_TAXONOMY.put("Ft.P", "Ft.P - prenosnik"); - GIGAFIDA_TAXONOMY.put("Ft.P.G", "Ft.P.G - prenosnik-govorni"); - GIGAFIDA_TAXONOMY.put("Ft.P.E", "Ft.P.E - prenosnik-elektronski"); - GIGAFIDA_TAXONOMY.put("Ft.P.P", "Ft.P.P - prenosnik-pisni"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O", "Ft.P.P.O - prenosnik-pisni-objavljeno"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O.K", "Ft.P.P.O.K - prenosnik-pisni-objavljeno-knjižno"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P", "Ft.P.P.O.P - prenosnik-pisni-objavljeno-periodično"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C", "Ft.P.P.O.P.C - prenosnik-pisni-objavljeno-periodično-časopisno"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.D", "Ft.P.P.O.P.C.D - prenosnik-pisni-objavljeno-periodično-časopisno-dnevno"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.V", "Ft.P.P.O.P.C.V - prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.T", "Ft.P.P.O.P.C.T - prenosnik-pisni-objavljeno-periodično-časopisno-tedensko"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R", "Ft.P.P.O.P.R - prenosnik-pisni-objavljeno-periodično-revialno"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.T", "Ft.P.P.O.P.R.T - prenosnik-pisni-objavljeno-periodično-revialno-tedensko"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.S", "Ft.P.P.O.P.R.S - prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.M", "Ft.P.P.O.P.R.M - prenosnik-pisni-objavljeno-periodično-revialno-mesečno"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.D", "Ft.P.P.O.P.R.D - prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.O", "Ft.P.P.O.P.R.O - prenosnik-pisni-objavljeno-periodično-revialno-občasno"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.N", "Ft.P.P.N - prenosnik-pisni-neobjavljeno"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.N.J", "Ft.P.P.N.J - prenosnik-pisni-neobjavljeno-javno"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.N.I", "Ft.P.P.N.I - prenosnik-pisni-neobjavljeno-interno"); - GIGAFIDA_TAXONOMY.put("Ft.P.P.N.Z", "Ft.P.P.N.Z - prenosnik-pisni-neobjavljeno-zasebno"); + GIGAFIDA_TAXONOMY.put("Ft.P.G", " Ft.P.G - prenosnik-govorni"); + GIGAFIDA_TAXONOMY.put("Ft.P.E", " Ft.P.E - prenosnik-elektronski"); + GIGAFIDA_TAXONOMY.put("Ft.P.P", " Ft.P.P - prenosnik-pisni"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O", " Ft.P.P.O - prenosnik-pisni-objavljeno"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O.K", " Ft.P.P.O.K - prenosnik-pisni-objavljeno-knjižno"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P", " Ft.P.P.O.P - prenosnik-pisni-objavljeno-periodično"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C", " Ft.P.P.O.P.C - prenosnik-pisni-objavljeno-periodično-časopisno"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.D", " Ft.P.P.O.P.C.D - prenosnik-pisni-objavljeno-periodično-časopisno-dnevno"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.V", " Ft.P.P.O.P.C.V - prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.T", " Ft.P.P.O.P.C.T - prenosnik-pisni-objavljeno-periodično-časopisno-tedensko"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R", " Ft.P.P.O.P.R - prenosnik-pisni-objavljeno-periodično-revialno"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.T", " Ft.P.P.O.P.R.T - prenosnik-pisni-objavljeno-periodično-revialno-tedensko"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.S", " Ft.P.P.O.P.R.S - prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.M", " Ft.P.P.O.P.R.M - prenosnik-pisni-objavljeno-periodično-revialno-mesečno"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.D", " Ft.P.P.O.P.R.D - prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.O", " Ft.P.P.O.P.R.O - prenosnik-pisni-objavljeno-periodično-revialno-občasno"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.N", " Ft.P.P.N - prenosnik-pisni-neobjavljeno"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.N.J", " Ft.P.P.N.J - prenosnik-pisni-neobjavljeno-javno"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.N.I", " Ft.P.P.N.I - prenosnik-pisni-neobjavljeno-interno"); + GIGAFIDA_TAXONOMY.put("Ft.P.P.N.Z", " Ft.P.P.N.Z - prenosnik-pisni-neobjavljeno-zasebno"); GIGAFIDA_TAXONOMY.put("Ft.Z", "Ft.Z - zvrst"); - GIGAFIDA_TAXONOMY.put("Ft.Z.U", "Ft.Z.U - zvrst-umetnostna"); - GIGAFIDA_TAXONOMY.put("Ft.Z.U.P", "Ft.Z.U.P - zvrst-umetnostna-pesniška"); - GIGAFIDA_TAXONOMY.put("Ft.Z.U.R", "Ft.Z.U.R - zvrst-umetnostna-prozna"); - GIGAFIDA_TAXONOMY.put("Ft.Z.U.D", "Ft.Z.U.D - zvrst-umetnostna-dramska"); - GIGAFIDA_TAXONOMY.put("Ft.Z.N", "Ft.Z.N - zvrst-neumetnostna"); - GIGAFIDA_TAXONOMY.put("Ft.Z.N.S", "Ft.Z.N.S - zvrst-neumetnostna-strokovna"); - GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.H", "Ft.Z.N.S.H - zvrst-neumetnostna-strokovna-humanistična in družboslovna"); - GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.N", "Ft.Z.N.S.N - zvrst-neumetnostna-strokovna-naravoslovna in tehnična"); - GIGAFIDA_TAXONOMY.put("Ft.Z.N.N", "Ft.Z.N.N - zvrst-neumetnostna-nestrokovna"); - GIGAFIDA_TAXONOMY.put("Ft.Z.N.P", "Ft.Z.N.P - zvrst-neumetnostna-pravna"); + GIGAFIDA_TAXONOMY.put("Ft.Z.U", " Ft.Z.U - zvrst-umetnostna"); + GIGAFIDA_TAXONOMY.put("Ft.Z.U.P", " Ft.Z.U.P - zvrst-umetnostna-pesniška"); + GIGAFIDA_TAXONOMY.put("Ft.Z.U.R", " Ft.Z.U.R - zvrst-umetnostna-prozna"); + GIGAFIDA_TAXONOMY.put("Ft.Z.U.D", " Ft.Z.U.D - zvrst-umetnostna-dramska"); + GIGAFIDA_TAXONOMY.put("Ft.Z.N", " Ft.Z.N - zvrst-neumetnostna"); + GIGAFIDA_TAXONOMY.put("Ft.Z.N.S", " Ft.Z.N.S - zvrst-neumetnostna-strokovna"); + GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.H", " Ft.Z.N.S.H - zvrst-neumetnostna-strokovna-humanistična in družboslovna"); + GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.N", " Ft.Z.N.S.N - zvrst-neumetnostna-strokovna-naravoslovna in tehnična"); + GIGAFIDA_TAXONOMY.put("Ft.Z.N.N", " Ft.Z.N.N - zvrst-neumetnostna-nestrokovna"); + GIGAFIDA_TAXONOMY.put("Ft.Z.N.P", " Ft.Z.N.P - zvrst-neumetnostna-pravna"); GIGAFIDA_TAXONOMY.put("Ft.L", "Ft.L - zvrst-lektorirano"); - GIGAFIDA_TAXONOMY.put("Ft.L.D", "Ft.L.D - zvrst-lektorirano-da"); - GIGAFIDA_TAXONOMY.put("Ft.L.N", "Ft.L.N - zvrst-lektorirano-ne"); + GIGAFIDA_TAXONOMY.put("Ft.L.D", " Ft.L.D - zvrst-lektorirano-da"); + GIGAFIDA_TAXONOMY.put("Ft.L.N", " Ft.L.N - zvrst-lektorirano-ne"); // GOS ---------------------------------- GOS_TAXONOMY = new LinkedHashMap<>(); GOS_TAXONOMY.put("gos.T", "gos.T - diskurz"); - GOS_TAXONOMY.put("gos.T.J", "gos.T.J - diskurz-javni"); - GOS_TAXONOMY.put("gos.T.J.I", "gos.T.J.I - diskurz-javni-informativno-izobraževalni"); - GOS_TAXONOMY.put("gos.T.J.R", "gos.T.J.R - diskurz-javni-razvedrilni"); - GOS_TAXONOMY.put("gos.T.N", "gos.T.N - diskurz-nejavni"); - GOS_TAXONOMY.put("gos.T.N.N", "gos.T.N.N - diskurz-nejavni-nezasebni"); - GOS_TAXONOMY.put("gos.T.N.Z", "gos.T.N.Z - diskurz-nejavni-zasebni"); + GOS_TAXONOMY.put("gos.T.J", " gos.T.J - diskurz-javni"); + GOS_TAXONOMY.put("gos.T.J.I", " gos.T.J.I - diskurz-javni-informativno-izobraževalni"); + GOS_TAXONOMY.put("gos.T.J.R", " gos.T.J.R - diskurz-javni-razvedrilni"); + GOS_TAXONOMY.put("gos.T.N", " gos.T.N - diskurz-nejavni"); + GOS_TAXONOMY.put("gos.T.N.N", " gos.T.N.N - diskurz-nejavni-nezasebni"); + GOS_TAXONOMY.put("gos.T.N.Z", " gos.T.N.Z - diskurz-nejavni-zasebni"); GOS_TAXONOMY.put("gos.S", "gos.S - situacija"); - GOS_TAXONOMY.put("gos.S.R", "gos.S.R - situacija-radio"); - GOS_TAXONOMY.put("gos.S.T", "gos.S.T - situacija-televizija"); + GOS_TAXONOMY.put("gos.S.R", " gos.S.R - situacija-radio"); + GOS_TAXONOMY.put("gos.S.T", " gos.S.T - situacija-televizija"); GOS_TAXONOMY.put("gos.K", "gos.K - kanal"); - GOS_TAXONOMY.put("gos.K.O", "gos.K.O - kanal-osebni stik"); - GOS_TAXONOMY.put("gos.K.P", "gos.K.P - kanal-telefon"); - GOS_TAXONOMY.put("gos.K.R", "gos.K.R - kanal-radio"); - GOS_TAXONOMY.put("gos.K.T", "gos.K.T - kanal-televizija"); + GOS_TAXONOMY.put("gos.K.O", " gos.K.O - kanal-osebni stik"); + GOS_TAXONOMY.put("gos.K.P", " gos.K.P - kanal-telefon"); + GOS_TAXONOMY.put("gos.K.R", " gos.K.R - kanal-radio"); + GOS_TAXONOMY.put("gos.K.T", " gos.K.T - kanal-televizija"); } /** @@ -112,6 +112,19 @@ public class Tax { ArrayList taxForCombo = new ArrayList<>(); + // adds parents taxonomy as well + HashSet genFoundTax = new HashSet<>(); + + for(String e : foundTax){ + String[] elList = e.split("\\."); + for(int i = 1; i < elList.length - 1; i++){ + String candidate = String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i)); + genFoundTax.add(candidate); + } + } + + foundTax.addAll(genFoundTax); + // assures same relative order for (String t : tax.keySet()) { if (foundTax.contains(t)) { @@ -126,7 +139,7 @@ public class Tax { return corpusTypesWithTaxonomy; } - public static ArrayList getTaxonomyCodes(ArrayList taxonomyNames, CorpusType corpusType) { + public static ArrayList getTaxonomyCodes(ArrayList taxonomyNames, CorpusType corpusType) { ArrayList result = new ArrayList<>(); if (ValidationUtil.isEmpty(taxonomyNames)) { @@ -146,8 +159,8 @@ public class Tax { .stream() .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey)); - for (String taxonomyName : taxonomyNames) { - result.add(taxInversed.get(taxonomyName)); + for (Taxonomy taxonomyName : taxonomyNames) { + result.add(taxInversed.get(taxonomyName.toString())); } return result; @@ -188,7 +201,7 @@ public class Tax { * * @return */ - public static ArrayList getTaxonomyForInfo(CorpusType corpusType, ArrayList taxonomy) { + public static ArrayList getTaxonomyForInfo(CorpusType corpusType, ArrayList taxonomy) { LinkedHashMap tax = new LinkedHashMap<>(); if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) { @@ -199,8 +212,8 @@ public class Tax { ArrayList result = new ArrayList<>(); - for (String t : taxonomy) { - result.add(tax.get(t)); + for (Taxonomy t : taxonomy) { + result.add(tax.get(t.toString())); } return result; diff --git a/src/main/java/data/Taxonomy.java b/src/main/java/data/Taxonomy.java index d6d408e..21663e3 100755 --- a/src/main/java/data/Taxonomy.java +++ b/src/main/java/data/Taxonomy.java @@ -1,171 +1,749 @@ package data; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.stream.Collectors; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; -import javafx.collections.FXCollections; import javafx.collections.ObservableList; public enum Taxonomy { + TOTAL("Total", "Total"), + // GOS - JAVNI("javni", "T.J", "gos"), - INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "T.J.I", "gos"), - RAZVEDRILNI("razvedrilni", "T.J.R", "gos"), - NEJAVNI("nejavni", "T.N", "gos"), - NEZASEBNI("nezasebni", "T.N.N", "gos"), - ZASEBNI("zasebni", "T.N.Z", "gos"), - OSEBNI_STIK("osebni stik", "K.O", "gos"), - TELEFON("telefon", "K.P", "gos"), - RADIO("radio", "K.R", "gos"), - TELEVIZIJA("televizija", "K.T", "gos"), + DISKURZ("gos.T", "gos.T - diskurz"), + DISKURZ_JAVNI("gos.T.J", " gos.T.J - diskurz-javni"), + DISKURZ_INFORMATIVNO_IZOBRAZEVALNI("gos.T.J.I", " gos.T.J.I - diskurz-javni-informativno-izobraževalni"), + DISKURZ_RAZVEDRILNI("gos.T.J.R", " gos.T.J.R - diskurz-javni-razvedrilni"), + DISKURZ_NEJAVNI("gos.T.N", " gos.T.N - diskurz-nejavni"), + DISKURZ_NEZASEBNI("gos.T.N.N", " gos.T.N.N - diskurz-nejavni-nezasebni"), + DISKURZ_ZASEBNI("gos.T.N.Z", " gos.T.N.Z - diskurz-nejavni-zasebni"), + SITUACIJA("gos.S", "gos.S - situacija"), + SITUACIJA_RADIO("gos.S.R", " gos.S.R - situacija-radio"), + SITUACIJA_TELEVIZIJA("gos.S.T", " gos.S.T - situacija-televizija"), + KANAL("gos.K", "gos.K - kanal"), + KANAL_OSEBNI_STIK("gos.K.O", " gos.K.O - kanal-osebni stik"), + KANAL_TELEFON("gos.K.P", " gos.K.P - kanal-telefon"), + KANAL_RADIO("gos.K.R", " gos.K.R - kanal-radio"), + KANAL_TELEVIZIJA("gos.K.T", " gos.K.T - kanal-televizija"), + + + // Gigafida - KNJIZNO("knjižno", "T.K", "gigafida"), - LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"), - STROKOVNO("strokovno", "T.K.S", "gigafida"), - PERIODICNO("periodično", "T.P", "gigafida"), - CASOPIS("časopis", "T.P.C", "gigafida"), - REVIJA("revija", "T.P.R", "gigafida"), - INTERNET("internet", "I", "gigafida"), - - SSJ_TISK("tisk", "SSJ.T", "gigafida"), - SSJ_KNJIZNO("opis", "identifikator", "gigafida"), - SSJ_LEPOSLOVNO("opis", "identifikator", "gigafida"), - SSJ_STROKOVNO("opis", "identifikator", "gigafida"), - SSJ_PERIODICNO("opis", "identifikator", "gigafida"), - SSJ_CASOPIS("opis", "identifikator", "gigafida"), - SSJ_REVIJA("opis", "identifikator", "gigafida"), - SSJ_DRUGO("opis", "identifikator", "gigafida"), - SSJ_INTERNET("opis", "identifikator", "gigafida"), - FT_P_PRENOSNIK("opis", "identifikator", "gigafida"), - FT_P_GOVORNI("opis", "identifikator", "gigafida"), - FT_P_ELEKTRONSKI("opis", "identifikator", "gigafida"), - FT_P_PISNI("opis", "identifikator", "gigafida"), - FT_P_OBJAVLJENO("opis", "identifikator", "gigafida"), - FT_P_KNJIZNO("opis", "identifikator", "gigafida"), - FT_P_PERIODICNO("opis", "identifikator", "gigafida"), - FT_P_CASOPISNO("opis", "identifikator", "gigafida"), - FT_P_DNEVNO("opis", "identifikator", "gigafida"), - FT_P_VECKRAT_TEDENSKO("opis", "identifikator", "gigafida"), - // FT_P_TEDENSKO("opis", "identifikator", "gigafida"), - FT_P_REVIALNO("opis", "identifikator", "gigafida"), - FT_P_TEDENSKO("opis", "identifikator", "gigafida"), - FT_P_STIRINAJSTDNEVNO("opis", "identifikator", "gigafida"), - FT_P_MESECNO("opis", "identifikator", "gigafida"), - FT_P_REDKEJE_KOT_MESECNO("opis", "identifikator", "gigafida"), - FT_P_OBCASNO("opis", "identifikator", "gigafida"), - FT_P_NEOBJAVLJENO("opis", "identifikator", "gigafida"), - FT_P_JAVNO("opis", "identifikator", "gigafida"), - FT_P_INTERNO("opis", "identifikator", "gigafida"), - FT_P_ZASEBNO("opis", "identifikator", "gigafida"), - FT_ZVRST("opis", "identifikator", "gigafida"), - FT_UMETNOSTNA("opis", "identifikator", "gigafida"), - FT_PESNISKA("opis", "identifikator", "gigafida"), - FT_PROZNA("opis", "identifikator", "gigafida"), - FT_DRAMSKA("opis", "identifikator", "gigafida"), - FT_NEUMETNOSTNA("opis", "identifikator", "gigafida"), - FT_STROKOVNA("opis", "identifikator", "gigafida"), - FT_HID("opis", "identifikator", "gigafida"), - FT_NIT("opis", "identifikator", "gigafida"), - FT_NESTROKOVNA("opis", "identifikator", "gigafida"), - FT_PRAVNA("opis", "identifikator", "gigafida"), - FT_LEKTORIRANO("opis", "identifikator", "gigafida"), - FT_DA("opis", "identifikator", "gigafida"), - FT_NE("opis", "identifikator", "gigafida"); +// KNJIZNO("knjižno", "T.K", "gigafida"), +// LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"), +// STROKOVNO("strokovno", "T.K.S", "gigafida"), +// PERIODICNO("periodično", "T.P", "gigafida"), +// CASOPIS("časopis", "T.P.C", "gigafida"), +// REVIJA("revija", "T.P.R", "gigafida"), +// INTERNET("internet", "I", "gigafida"), + SSJ_TISK("SSJ.T", "SSJ.T - tisk"), + SSJ_KNJIZNO("SSJ.T.K", " SSJ.T.K - tisk-knjižno"), + SSJ_LEPOSLOVNO("SSJ.T.K.L", " SSJ.T.K.L - tisk-knjižno-leposlovno"), + SSJ_STROKOVNO("SSJ.T.K.S", " SSJ.T.K.S - tisk-knjižno-strokovno"), + SSJ_PERIODICNO("SSJ.T.P", " SSJ.T.P - tisk-periodično"), + SSJ_CASOPIS("SSJ.T.P.C", " SSJ.T.P.C - tisk-periodično-časopis"), + SSJ_REVIJA("SSJ.T.P.R", " SSJ.T.P.R - tisk-periodično-revija"), + SSJ_DRUGO("SSJ.T.D", " SSJ.T.D - tisk-drugo"), + SSJ_INTERNET("SSJ.I", "SSJ.I - internet"), + FT_P_PRENOSNIK("Ft.P", "Ft.P - prenosnik"), + FT_P_GOVORNI("Ft.P.G", " Ft.P.G - prenosnik-govorni"), + FT_P_ELEKTRONSKI("Ft.P.E", " Ft.P.E - prenosnik-elektronski"), + FT_P_PISNI("Ft.P.P", " Ft.P.P - prenosnik-pisni"), + FT_P_OBJAVLJENO("Ft.P.P.O", " Ft.P.P.O - prenosnik-pisni-objavljeno"), + FT_P_KNJIZNO("Ft.P.P.O.K", " Ft.P.P.O.K - prenosnik-pisni-objavljeno-knjižno"), + FT_P_PERIODICNO("Ft.P.P.O.P", " Ft.P.P.O.P - prenosnik-pisni-objavljeno-periodično"), + FT_P_CASOPISNO("Ft.P.P.O.P.C", " Ft.P.P.O.P.C - prenosnik-pisni-objavljeno-periodično-časopisno"), + FT_P_DNEVNO("Ft.P.P.O.P.C.D", " Ft.P.P.O.P.C.D - prenosnik-pisni-objavljeno-periodično-časopisno-dnevno"), + FT_P_VECKRAT_TEDENSKO("Ft.P.P.O.P.C.V", " Ft.P.P.O.P.C.V - prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko"), + FT_P_CASOPISNO_TEDENSKO("Ft.P.P.O.P.C.T", " Ft.P.P.O.P.C.T - prenosnik-pisni-objavljeno-periodično-časopisno-tedensko"), + FT_P_REVIALNO("Ft.P.P.O.P.R", " Ft.P.P.O.P.R - prenosnik-pisni-objavljeno-periodično-revialno"), + FT_P_TEDENSKO("Ft.P.P.O.P.R.T", " Ft.P.P.O.P.R.T - prenosnik-pisni-objavljeno-periodično-revialno-tedensko"), + FT_P_STIRINAJSTDNEVNO("Ft.P.P.O.P.R.S", " Ft.P.P.O.P.R.S - prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno"), + FT_P_MESECNO("Ft.P.P.O.P.R.M", " Ft.P.P.O.P.R.M - prenosnik-pisni-objavljeno-periodično-revialno-mesečno"), + FT_P_REDKEJE_KOT_MESECNO("Ft.P.P.O.P.R.D", " Ft.P.P.O.P.R.D - prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec"), + FT_P_OBCASNO("Ft.P.P.O.P.R.O", " Ft.P.P.O.P.R.O - prenosnik-pisni-objavljeno-periodično-revialno-občasno"), + FT_P_NEOBJAVLJENO("Ft.P.P.N", " Ft.P.P.N - prenosnik-pisni-neobjavljeno"), + FT_P_JAVNO("Ft.P.P.N.J", " Ft.P.P.N.J - prenosnik-pisni-neobjavljeno-javno"), + FT_P_INTERNO("Ft.P.P.N.I", " Ft.P.P.N.I - prenosnik-pisni-neobjavljeno-interno"), + FT_P_ZASEBNO("Ft.P.P.N.Z", " Ft.P.P.N.Z - prenosnik-pisni-neobjavljeno-zasebno"), + FT_ZVRST("Ft.Z", "Ft.Z - zvrst"), + FT_UMETNOSTNA("Ft.Z.U", " Ft.Z.U - zvrst-umetnostna"), + FT_PESNISKA("Ft.Z.U.P", " Ft.Z.U.P - zvrst-umetnostna-pesniška"), + FT_PROZNA("Ft.Z.U.R", " Ft.Z.U.R - zvrst-umetnostna-prozna"), + FT_DRAMSKA("Ft.Z.U.D", " Ft.Z.U.D - zvrst-umetnostna-dramska"), + FT_NEUMETNOSTNA("Ft.Z.N", " Ft.Z.N - zvrst-neumetnostna"), + FT_STROKOVNA("Ft.Z.N.S", " Ft.Z.N.S - zvrst-neumetnostna-strokovna"), + FT_HID("Ft.Z.N.S.H", " Ft.Z.N.S.H - zvrst-neumetnostna-strokovna-humanistična in družboslovna"), + FT_NIT("Ft.Z.N.S.N", " Ft.Z.N.S.N - zvrst-neumetnostna-strokovna-naravoslovna in tehnična"), + FT_NESTROKOVNA("Ft.Z.N.N", " Ft.Z.N.N - zvrst-neumetnostna-nestrokovna"), + FT_PRAVNA("Ft.Z.N.P", " Ft.Z.N.P - zvrst-neumetnostna-pravna"), + FT_LEKTORIRANO("Ft.L", "Ft.L - zvrst-lektorirano"), + FT_DA("Ft.L.D", " Ft.L.D - zvrst-lektorirano-da"), + FT_NE("Ft.L.N", " Ft.L.N - zvrst-lektorirano-ne"); private final String name; - private final String taxonomy; - private final String corpus; + private final String longName; - Taxonomy(String name, String taxonomy, String corpusType) { + Taxonomy(String name, String longName) { this.name = name; - this.taxonomy = taxonomy; - this.corpus = corpusType; + this.longName = longName; } public String toString() { return this.name; } - public String getTaxonomnyString() { - return this.taxonomy; + public String toLongNameString() { + return this.longName; } public static Taxonomy factory(String tax) { if (tax != null) { // GOS - if (JAVNI.toString().equals(tax)) { - return JAVNI; + if (DISKURZ.toString().equals(tax)) { + return DISKURZ; + } + if (DISKURZ_JAVNI.toString().equals(tax)) { + return DISKURZ_JAVNI; + } + if (DISKURZ_INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) { + return DISKURZ_INFORMATIVNO_IZOBRAZEVALNI; + } + if (DISKURZ_RAZVEDRILNI.toString().equals(tax)) { + return DISKURZ_RAZVEDRILNI; + } + if (DISKURZ_NEJAVNI.toString().equals(tax)) { + return DISKURZ_NEJAVNI; + } + if (DISKURZ_NEZASEBNI.toString().equals(tax)) { + return DISKURZ_NEZASEBNI; + } + if (DISKURZ_ZASEBNI.toString().equals(tax)) { + return DISKURZ_ZASEBNI; + } + if (SITUACIJA.toString().equals(tax)) { + return SITUACIJA; + } + if (SITUACIJA_RADIO.toString().equals(tax)) { + return SITUACIJA_RADIO; + } + if (SITUACIJA_TELEVIZIJA.toString().equals(tax)) { + return SITUACIJA_TELEVIZIJA; + } + if (KANAL.toString().equals(tax)) { + return KANAL; + } + if (KANAL_OSEBNI_STIK.toString().equals(tax)) { + return KANAL_OSEBNI_STIK; + } + if (KANAL_TELEFON.toString().equals(tax)) { + return KANAL_TELEFON; + } + if (KANAL_RADIO.toString().equals(tax)) { + return KANAL_RADIO; + } + if (KANAL_TELEVIZIJA.toString().equals(tax)) { + return KANAL_TELEVIZIJA; + } + + // Gigafida + // if (TISK.toString().equals(tax)) { + // return TISK; + // } + if (SSJ_TISK.toString().equals(tax)) { + return SSJ_TISK; + } + if (SSJ_KNJIZNO.toString().equals(tax)) { + return SSJ_KNJIZNO; + } + if (SSJ_LEPOSLOVNO.toString().equals(tax)) { + return SSJ_LEPOSLOVNO; + } + if (SSJ_STROKOVNO.toString().equals(tax)) { + return SSJ_STROKOVNO; + } + if (SSJ_PERIODICNO.toString().equals(tax)) { + return SSJ_PERIODICNO; + } + if (SSJ_CASOPIS.toString().equals(tax)) { + return SSJ_CASOPIS; + } + if (SSJ_REVIJA.toString().equals(tax)) { + return SSJ_REVIJA; + } + if (SSJ_DRUGO.toString().equals(tax)) { + return SSJ_DRUGO; + } + if (SSJ_INTERNET.toString().equals(tax)) { + return SSJ_INTERNET; + } + + if (FT_P_PRENOSNIK.toString().equals(tax)) { + return FT_P_PRENOSNIK; + } + if (FT_P_GOVORNI.toString().equals(tax)) { + return FT_P_GOVORNI; + } + if (FT_P_ELEKTRONSKI.toString().equals(tax)) { + return FT_P_ELEKTRONSKI; + } + if (FT_P_PISNI.toString().equals(tax)) { + return FT_P_PISNI; + } + if (FT_P_OBJAVLJENO.toString().equals(tax)) { + return FT_P_OBJAVLJENO; + } + if (FT_P_KNJIZNO.toString().equals(tax)) { + return FT_P_KNJIZNO; + } + if (FT_P_PERIODICNO.toString().equals(tax)) { + return FT_P_PERIODICNO; + } + if (FT_P_CASOPISNO.toString().equals(tax)) { + return FT_P_CASOPISNO; + } + if (FT_P_DNEVNO.toString().equals(tax)) { + return FT_P_DNEVNO; + } + if (FT_P_VECKRAT_TEDENSKO.toString().equals(tax)) { + return FT_P_VECKRAT_TEDENSKO; + } + if (FT_P_CASOPISNO_TEDENSKO.toString().equals(tax)) { + return FT_P_CASOPISNO_TEDENSKO; + } + if (FT_P_REVIALNO.toString().equals(tax)) { + return FT_P_REVIALNO; + } + if (FT_P_TEDENSKO.toString().equals(tax)) { + return FT_P_TEDENSKO; + } + if (FT_P_STIRINAJSTDNEVNO.toString().equals(tax)) { + return FT_P_STIRINAJSTDNEVNO; + } + if (FT_P_MESECNO.toString().equals(tax)) { + return FT_P_MESECNO; + } + if (FT_P_REDKEJE_KOT_MESECNO.toString().equals(tax)) { + return FT_P_REDKEJE_KOT_MESECNO; + } + if (FT_P_OBCASNO.toString().equals(tax)) { + return FT_P_OBCASNO; + } + if (FT_P_NEOBJAVLJENO.toString().equals(tax)) { + return FT_P_NEOBJAVLJENO; + } + if (FT_P_JAVNO.toString().equals(tax)) { + return FT_P_JAVNO; + } + if (FT_P_INTERNO.toString().equals(tax)) { + return FT_P_INTERNO; + } + if (FT_P_ZASEBNO.toString().equals(tax)) { + return FT_P_ZASEBNO; + } + if (FT_ZVRST.toString().equals(tax)) { + return FT_ZVRST; + } + if (FT_UMETNOSTNA.toString().equals(tax)) { + return FT_UMETNOSTNA; + } + if (FT_PESNISKA.toString().equals(tax)) { + return FT_PESNISKA; + } + if (FT_PROZNA.toString().equals(tax)) { + return FT_PROZNA; + } + if (FT_DRAMSKA.toString().equals(tax)) { + return FT_DRAMSKA; + } + if (FT_NEUMETNOSTNA.toString().equals(tax)) { + return FT_NEUMETNOSTNA; + } + if (FT_STROKOVNA.toString().equals(tax)) { + return FT_STROKOVNA; + } + if (FT_NIT.toString().equals(tax)) { + return FT_NIT; + } + if (FT_HID.toString().equals(tax)) { + return FT_HID; + } + if (FT_NESTROKOVNA.toString().equals(tax)) { + return FT_NESTROKOVNA; + } + if (FT_PRAVNA.toString().equals(tax)) { + return FT_PRAVNA; + } + if (FT_LEKTORIRANO.toString().equals(tax)) { + return FT_LEKTORIRANO; + } + if (FT_DA.toString().equals(tax)) { + return FT_DA; + } + if (FT_NE.toString().equals(tax)) { + return FT_NE; + } + + } + return null; + } + + public static Taxonomy factoryLongName(String tax) { + if (tax != null) { + // GOS + if (DISKURZ.toLongNameString().equals(tax)) { + return DISKURZ; + } + if (DISKURZ_JAVNI.toLongNameString().equals(tax)) { + return DISKURZ_JAVNI; + } + if (DISKURZ_INFORMATIVNO_IZOBRAZEVALNI.toLongNameString().equals(tax)) { + return DISKURZ_INFORMATIVNO_IZOBRAZEVALNI; + } + if (DISKURZ_RAZVEDRILNI.toLongNameString().equals(tax)) { + return DISKURZ_RAZVEDRILNI; + } + if (DISKURZ_NEJAVNI.toLongNameString().equals(tax)) { + return DISKURZ_NEJAVNI; + } + if (DISKURZ_NEZASEBNI.toLongNameString().equals(tax)) { + return DISKURZ_NEZASEBNI; } - if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) { - return INFORMATIVNO_IZOBRAZEVALNI; + if (DISKURZ_ZASEBNI.toLongNameString().equals(tax)) { + return DISKURZ_ZASEBNI; } - if (RAZVEDRILNI.toString().equals(tax)) { - return RAZVEDRILNI; + if (SITUACIJA.toLongNameString().equals(tax)) { + return SITUACIJA; } - if (NEJAVNI.toString().equals(tax)) { - return NEJAVNI; + if (SITUACIJA_RADIO.toLongNameString().equals(tax)) { + return SITUACIJA_RADIO; } - if (NEZASEBNI.toString().equals(tax)) { - return NEZASEBNI; + if (SITUACIJA_TELEVIZIJA.toLongNameString().equals(tax)) { + return SITUACIJA_TELEVIZIJA; } - if (ZASEBNI.toString().equals(tax)) { - return ZASEBNI; + if (KANAL.toLongNameString().equals(tax)) { + return KANAL; } - if (OSEBNI_STIK.toString().equals(tax)) { - return OSEBNI_STIK; + if (KANAL_OSEBNI_STIK.toLongNameString().equals(tax)) { + return KANAL_OSEBNI_STIK; } - if (TELEFON.toString().equals(tax)) { - return TELEFON; + if (KANAL_TELEFON.toLongNameString().equals(tax)) { + return KANAL_TELEFON; } - if (RADIO.toString().equals(tax)) { - return RADIO; + if (KANAL_RADIO.toLongNameString().equals(tax)) { + return KANAL_RADIO; } - if (TELEVIZIJA.toString().equals(tax)) { - return TELEVIZIJA; + if (KANAL_TELEVIZIJA.toLongNameString().equals(tax)) { + return KANAL_TELEVIZIJA; } // Gigafida // if (TISK.toString().equals(tax)) { // return TISK; // } - if (KNJIZNO.toString().equals(tax)) { - return KNJIZNO; + if (SSJ_TISK.toLongNameString().equals(tax)) { + return SSJ_TISK; + } + if (SSJ_KNJIZNO.toLongNameString().equals(tax)) { + return SSJ_KNJIZNO; + } + if (SSJ_LEPOSLOVNO.toLongNameString().equals(tax)) { + return SSJ_LEPOSLOVNO; + } + if (SSJ_STROKOVNO.toLongNameString().equals(tax)) { + return SSJ_STROKOVNO; + } + if (SSJ_PERIODICNO.toLongNameString().equals(tax)) { + return SSJ_PERIODICNO; + } + if (SSJ_CASOPIS.toLongNameString().equals(tax)) { + return SSJ_CASOPIS; + } + if (SSJ_REVIJA.toLongNameString().equals(tax)) { + return SSJ_REVIJA; + } + if (SSJ_DRUGO.toLongNameString().equals(tax)) { + return SSJ_DRUGO; + } + if (SSJ_INTERNET.toLongNameString().equals(tax)) { + return SSJ_INTERNET; + } + + if (FT_P_PRENOSNIK.toLongNameString().equals(tax)) { + return FT_P_PRENOSNIK; + } + if (FT_P_GOVORNI.toLongNameString().equals(tax)) { + return FT_P_GOVORNI; + } + if (FT_P_ELEKTRONSKI.toLongNameString().equals(tax)) { + return FT_P_ELEKTRONSKI; + } + if (FT_P_PISNI.toLongNameString().equals(tax)) { + return FT_P_PISNI; + } + if (FT_P_OBJAVLJENO.toLongNameString().equals(tax)) { + return FT_P_OBJAVLJENO; + } + if (FT_P_KNJIZNO.toLongNameString().equals(tax)) { + return FT_P_KNJIZNO; + } + if (FT_P_PERIODICNO.toLongNameString().equals(tax)) { + return FT_P_PERIODICNO; + } + if (FT_P_CASOPISNO.toLongNameString().equals(tax)) { + return FT_P_CASOPISNO; + } + if (FT_P_DNEVNO.toLongNameString().equals(tax)) { + return FT_P_DNEVNO; + } + if (FT_P_VECKRAT_TEDENSKO.toLongNameString().equals(tax)) { + return FT_P_VECKRAT_TEDENSKO; + } + if (FT_P_CASOPISNO_TEDENSKO.toLongNameString().equals(tax)) { + return FT_P_CASOPISNO_TEDENSKO; + } + if (FT_P_REVIALNO.toLongNameString().equals(tax)) { + return FT_P_REVIALNO; + } + if (FT_P_TEDENSKO.toLongNameString().equals(tax)) { + return FT_P_TEDENSKO; + } + if (FT_P_STIRINAJSTDNEVNO.toLongNameString().equals(tax)) { + return FT_P_STIRINAJSTDNEVNO; + } + if (FT_P_MESECNO.toLongNameString().equals(tax)) { + return FT_P_MESECNO; + } + if (FT_P_REDKEJE_KOT_MESECNO.toLongNameString().equals(tax)) { + return FT_P_REDKEJE_KOT_MESECNO; + } + if (FT_P_OBCASNO.toLongNameString().equals(tax)) { + return FT_P_OBCASNO; + } + if (FT_P_NEOBJAVLJENO.toLongNameString().equals(tax)) { + return FT_P_NEOBJAVLJENO; } - if (LEPOSLOVNO.toString().equals(tax)) { - return LEPOSLOVNO; + if (FT_P_JAVNO.toLongNameString().equals(tax)) { + return FT_P_JAVNO; } - if (STROKOVNO.toString().equals(tax)) { - return STROKOVNO; + if (FT_P_INTERNO.toLongNameString().equals(tax)) { + return FT_P_INTERNO; } - if (PERIODICNO.toString().equals(tax)) { - return PERIODICNO; + if (FT_P_ZASEBNO.toLongNameString().equals(tax)) { + return FT_P_ZASEBNO; } - if (CASOPIS.toString().equals(tax)) { - return CASOPIS; + if (FT_ZVRST.toLongNameString().equals(tax)) { + return FT_ZVRST; } - if (REVIJA.toString().equals(tax)) { - return REVIJA; + if (FT_UMETNOSTNA.toLongNameString().equals(tax)) { + return FT_UMETNOSTNA; } - if (INTERNET.toString().equals(tax)) { - return INTERNET; + if (FT_PESNISKA.toLongNameString().equals(tax)) { + return FT_PESNISKA; } + if (FT_PROZNA.toLongNameString().equals(tax)) { + return FT_PROZNA; + } + if (FT_DRAMSKA.toLongNameString().equals(tax)) { + return FT_DRAMSKA; + } + if (FT_NEUMETNOSTNA.toLongNameString().equals(tax)) { + return FT_NEUMETNOSTNA; + } + if (FT_STROKOVNA.toLongNameString().equals(tax)) { + return FT_STROKOVNA; + } + if (FT_NIT.toLongNameString().equals(tax)) { + return FT_NIT; + } + if (FT_HID.toLongNameString().equals(tax)) { + return FT_HID; + } + if (FT_NESTROKOVNA.toLongNameString().equals(tax)) { + return FT_NESTROKOVNA; + } + if (FT_PRAVNA.toLongNameString().equals(tax)) { + return FT_PRAVNA; + } + if (FT_LEKTORIRANO.toLongNameString().equals(tax)) { + return FT_LEKTORIRANO; + } + if (FT_DA.toLongNameString().equals(tax)) { + return FT_DA; + } + if (FT_NE.toLongNameString().equals(tax)) { + return FT_NE; + } + } return null; } - public static ObservableList getDefaultForComboBox(String corpusType) { - ArrayList values = Arrays.stream(Taxonomy.values()) - .filter(x -> x.corpus.equals(corpusType)) - .map(x -> x.name) - .collect(Collectors.toCollection(ArrayList::new)); + public static ArrayList taxonomySelected(Taxonomy disjointTaxonomy) { + ArrayList r = new ArrayList<>(); + + System.out.println(disjointTaxonomy); + if(disjointTaxonomy.equals(DISKURZ)){ + r.add(DISKURZ_JAVNI); + r.add(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI); + r.add(DISKURZ_RAZVEDRILNI); + r.add(DISKURZ_NEJAVNI); + r.add(DISKURZ_NEZASEBNI); + r.add(DISKURZ_ZASEBNI); + } else if(disjointTaxonomy.equals(DISKURZ_JAVNI)){ + r.add(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI); + r.add(DISKURZ_RAZVEDRILNI); + } else if(disjointTaxonomy.equals(DISKURZ_NEJAVNI)){ + r.add(DISKURZ_NEZASEBNI); + r.add(DISKURZ_ZASEBNI); + } else if(disjointTaxonomy.equals(SITUACIJA)){ + r.add(SITUACIJA_RADIO); + r.add(SITUACIJA_TELEVIZIJA); + } else if(disjointTaxonomy.equals(KANAL)){ + r.add(KANAL_OSEBNI_STIK); + r.add(KANAL_RADIO); + r.add(KANAL_TELEVIZIJA); + } else if(disjointTaxonomy.equals(SSJ_TISK)){ + r.add(SSJ_KNJIZNO); + r.add(SSJ_LEPOSLOVNO); + r.add(SSJ_STROKOVNO); + r.add(SSJ_PERIODICNO); + r.add(SSJ_CASOPIS); + r.add(SSJ_REVIJA); + r.add(SSJ_DRUGO); + } else if(disjointTaxonomy.equals(SSJ_KNJIZNO)){ + r.add(SSJ_LEPOSLOVNO); + r.add(SSJ_STROKOVNO); + } else if(disjointTaxonomy.equals(SSJ_PERIODICNO)){ + r.add(SSJ_CASOPIS); + r.add(SSJ_REVIJA); + } else if(disjointTaxonomy.equals(FT_P_PRENOSNIK)){ + r.add(FT_P_GOVORNI); + r.add(FT_P_ELEKTRONSKI); + r.add(FT_P_PISNI); + r.add(FT_P_OBJAVLJENO); + r.add(FT_P_KNJIZNO); + r.add(FT_P_PERIODICNO); + r.add(FT_P_CASOPISNO); + r.add(FT_P_DNEVNO); + r.add(FT_P_VECKRAT_TEDENSKO); + r.add(FT_P_CASOPISNO_TEDENSKO); + r.add(FT_P_REVIALNO); + r.add(FT_P_TEDENSKO); + r.add(FT_P_STIRINAJSTDNEVNO); + r.add(FT_P_MESECNO); + r.add(FT_P_REDKEJE_KOT_MESECNO); + r.add(FT_P_OBCASNO); + r.add(FT_P_NEOBJAVLJENO); + r.add(FT_P_JAVNO); + r.add(FT_P_INTERNO); + r.add(FT_P_ZASEBNO); + } else if(disjointTaxonomy.equals(FT_P_PISNI)){ + r.add(FT_P_OBJAVLJENO); + r.add(FT_P_KNJIZNO); + r.add(FT_P_PERIODICNO); + r.add(FT_P_CASOPISNO); + r.add(FT_P_DNEVNO); + r.add(FT_P_VECKRAT_TEDENSKO); + r.add(FT_P_CASOPISNO_TEDENSKO); + r.add(FT_P_REVIALNO); + r.add(FT_P_TEDENSKO); + r.add(FT_P_STIRINAJSTDNEVNO); + r.add(FT_P_MESECNO); + r.add(FT_P_REDKEJE_KOT_MESECNO); + r.add(FT_P_OBCASNO); + r.add(FT_P_NEOBJAVLJENO); + r.add(FT_P_JAVNO); + r.add(FT_P_INTERNO); + r.add(FT_P_ZASEBNO); + } else if(disjointTaxonomy.equals(FT_P_OBJAVLJENO)){ + r.add(FT_P_KNJIZNO); + r.add(FT_P_PERIODICNO); + r.add(FT_P_CASOPISNO); + r.add(FT_P_DNEVNO); + r.add(FT_P_VECKRAT_TEDENSKO); + r.add(FT_P_CASOPISNO_TEDENSKO); + r.add(FT_P_REVIALNO); + r.add(FT_P_TEDENSKO); + r.add(FT_P_STIRINAJSTDNEVNO); + r.add(FT_P_MESECNO); + r.add(FT_P_REDKEJE_KOT_MESECNO); + r.add(FT_P_OBCASNO); + } else if(disjointTaxonomy.equals(FT_P_PERIODICNO)){ + r.add(FT_P_CASOPISNO); + r.add(FT_P_DNEVNO); + r.add(FT_P_VECKRAT_TEDENSKO); + r.add(FT_P_CASOPISNO_TEDENSKO); + r.add(FT_P_REVIALNO); + r.add(FT_P_TEDENSKO); + r.add(FT_P_STIRINAJSTDNEVNO); + r.add(FT_P_MESECNO); + r.add(FT_P_REDKEJE_KOT_MESECNO); + r.add(FT_P_OBCASNO); + } else if(disjointTaxonomy.equals(FT_P_CASOPISNO)){ + r.add(FT_P_DNEVNO); + r.add(FT_P_VECKRAT_TEDENSKO); + r.add(FT_P_CASOPISNO_TEDENSKO); + } else if(disjointTaxonomy.equals(FT_P_REVIALNO)) { + r.add(FT_P_TEDENSKO); + r.add(FT_P_STIRINAJSTDNEVNO); + r.add(FT_P_MESECNO); + r.add(FT_P_REDKEJE_KOT_MESECNO); + r.add(FT_P_OBCASNO); + } else if(disjointTaxonomy.equals(FT_P_NEOBJAVLJENO)){ + r.add(FT_P_JAVNO); + r.add(FT_P_INTERNO); + r.add(FT_P_ZASEBNO); + } else if(disjointTaxonomy.equals(FT_ZVRST)){ + r.add(FT_UMETNOSTNA); + r.add(FT_PESNISKA); + r.add(FT_PROZNA); + r.add(FT_DRAMSKA); + r.add(FT_NEUMETNOSTNA); + r.add(FT_STROKOVNA); + r.add(FT_HID); + r.add(FT_NIT); + r.add(FT_NESTROKOVNA); + r.add(FT_PRAVNA); + } else if(disjointTaxonomy.equals(FT_UMETNOSTNA)){ + r.add(FT_PESNISKA); + r.add(FT_PROZNA); + r.add(FT_DRAMSKA); + } else if(disjointTaxonomy.equals(FT_NEUMETNOSTNA)){ + r.add(FT_STROKOVNA); + r.add(FT_HID); + r.add(FT_NIT); + r.add(FT_NESTROKOVNA); + r.add(FT_PRAVNA); + } else if(disjointTaxonomy.equals(FT_STROKOVNA)){ + r.add(FT_HID); + r.add(FT_NIT); + } else if(disjointTaxonomy.equals(FT_LEKTORIRANO)){ + r.add(FT_DA); + r.add(FT_NE); + } + + return r; + } + + public static ArrayList taxonomyDeselected(Taxonomy disjointTaxonomy){ + ArrayList r = new ArrayList<>(); + Map connections = new ConcurrentHashMap<>(); + connections.put(DISKURZ_JAVNI, DISKURZ); + connections.put(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI, DISKURZ_JAVNI); + connections.put(DISKURZ_RAZVEDRILNI, DISKURZ_JAVNI); + connections.put(DISKURZ_NEJAVNI, DISKURZ); + connections.put(DISKURZ_NEZASEBNI, DISKURZ_NEJAVNI); + connections.put(DISKURZ_ZASEBNI, DISKURZ_NEJAVNI); + connections.put(SITUACIJA_RADIO, SITUACIJA); + connections.put(SITUACIJA_TELEVIZIJA, SITUACIJA); + connections.put(KANAL_OSEBNI_STIK, KANAL); + connections.put(KANAL_TELEFON, KANAL); + connections.put(KANAL_RADIO, KANAL); + connections.put(KANAL_TELEVIZIJA, KANAL); + + connections.put(SSJ_KNJIZNO, SSJ_TISK); + connections.put(SSJ_LEPOSLOVNO, SSJ_KNJIZNO); + connections.put(SSJ_STROKOVNO, SSJ_KNJIZNO); + connections.put(SSJ_DRUGO, SSJ_TISK); + + connections.put(FT_P_GOVORNI, FT_P_PRENOSNIK); + connections.put(FT_P_ELEKTRONSKI, FT_P_PRENOSNIK); + connections.put(FT_P_PISNI, FT_P_PRENOSNIK); + connections.put(FT_P_OBJAVLJENO, FT_P_PISNI); + connections.put(FT_P_KNJIZNO, FT_P_OBJAVLJENO); + connections.put(FT_P_PERIODICNO, FT_P_OBJAVLJENO); + connections.put(FT_P_CASOPISNO, FT_P_OBJAVLJENO); + connections.put(FT_P_DNEVNO, FT_P_CASOPISNO); + connections.put(FT_P_VECKRAT_TEDENSKO, FT_P_CASOPISNO); + connections.put(FT_P_CASOPISNO_TEDENSKO, FT_P_CASOPISNO); + connections.put(FT_P_REVIALNO, FT_P_PERIODICNO); + connections.put(FT_P_TEDENSKO, FT_P_REVIALNO); + connections.put(FT_P_STIRINAJSTDNEVNO, FT_P_REVIALNO); + connections.put(FT_P_MESECNO, FT_P_REVIALNO); + connections.put(FT_P_REDKEJE_KOT_MESECNO, FT_P_REVIALNO); + connections.put(FT_P_OBCASNO, FT_P_REVIALNO); + connections.put(FT_P_NEOBJAVLJENO, FT_P_PISNI); + connections.put(FT_P_JAVNO, FT_P_NEOBJAVLJENO); + connections.put(FT_P_INTERNO, FT_P_NEOBJAVLJENO); + connections.put(FT_P_ZASEBNO, FT_P_NEOBJAVLJENO); + connections.put(FT_UMETNOSTNA, FT_ZVRST); + connections.put(FT_PESNISKA, FT_UMETNOSTNA); + connections.put(FT_PROZNA, FT_UMETNOSTNA); + connections.put(FT_DRAMSKA, FT_UMETNOSTNA); + connections.put(FT_NEUMETNOSTNA, FT_ZVRST); + connections.put(FT_STROKOVNA, FT_NEUMETNOSTNA); + connections.put(FT_HID, FT_STROKOVNA); + connections.put(FT_NIT, FT_STROKOVNA); + connections.put(FT_NESTROKOVNA, FT_NEUMETNOSTNA); + connections.put(FT_PRAVNA, FT_NEUMETNOSTNA); + connections.put(FT_DA, FT_LEKTORIRANO); + connections.put(FT_NE, FT_LEKTORIRANO); + + Taxonomy currentTaxonomy = disjointTaxonomy; + r.add(currentTaxonomy); + while(connections.containsKey(currentTaxonomy)){ + currentTaxonomy = connections.get(currentTaxonomy); + r.add(currentTaxonomy); + } + Collections.reverse(r); + return r; + } + + public static ArrayList convertStringListToTaxonomyList(ObservableList stringList){ + ArrayList taxonomyList = new ArrayList<>(); - return FXCollections.observableArrayList(values); +// System.out.println("INTERESTING STUFF"); +// System.out.println(stringList); + for (String e : stringList) { + taxonomyList.add(factoryLongName(e)); + } +// System.out.println(taxonomyList); +// System.out.println("-----------------"); + return taxonomyList; } - public static ObservableList getDefaultForComboBox(CorpusType corpusType) { - return getDefaultForComboBox(corpusType.toString()); + public static void modifyingTaxonomy(ArrayList taxonomy, ArrayList checkedItemsTaxonomy, Corpus corpus){ + // get taxonomies that were selected/deselected by user + Set disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy); + if (taxonomy != null) { + disjointTaxonomies.addAll(taxonomy); + for (Taxonomy s : checkedItemsTaxonomy) { + if (taxonomy.contains(s)) { + disjointTaxonomies.remove(s); + } + } + } + + // remove previously selected items plus remove taxonomies that are not presented in current setup + ArrayList disArr = new ArrayList<>(disjointTaxonomies); + int i = 0; + while(i < disArr.size()){ + Taxonomy s = disArr.get(i); + if(!Taxonomy.convertStringListToTaxonomyList(corpus.getTaxonomy()).contains(s)){ + disjointTaxonomies.remove(s); + disArr.remove(s); +// taxonomy.remove(s); + i--; + } + i++; + } + + + if (disjointTaxonomies.size() > 0) { + Taxonomy disjointTaxonomy = disjointTaxonomies.iterator().next(); + + // taxonomy was selected + if (checkedItemsTaxonomy.contains(disjointTaxonomy)) { + ArrayList addTaxonomies = Taxonomy.taxonomySelected(disjointTaxonomy); + checkedItemsTaxonomy.addAll(addTaxonomies); + } else if (taxonomy.contains(disjointTaxonomy)) { + ArrayList removeTaxonomies = Taxonomy.taxonomyDeselected(disjointTaxonomy); + checkedItemsTaxonomy.removeAll(removeTaxonomies); + } + } } + + } diff --git a/src/main/java/gui/CharacterAnalysisTab.java b/src/main/java/gui/CharacterAnalysisTab.java index 941b1de..b6481c0 100755 --- a/src/main/java/gui/CharacterAnalysisTab.java +++ b/src/main/java/gui/CharacterAnalysisTab.java @@ -45,7 +45,7 @@ public class CharacterAnalysisTab { @FXML private CheckComboBox taxonomyCCB; - private ArrayList taxonomy; + private ArrayList taxonomy; @FXML private CheckBox displayTaxonomyChB; @@ -183,11 +183,33 @@ public class CharacterAnalysisTab { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); - taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { - taxonomy = new ArrayList<>(); - ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); - taxonomy.addAll(checkedItems); - logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); + taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener() { + boolean changing = true; + + @Override + public void onChanged(ListChangeListener.Change c){ + if(changing) { + ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); + ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems); + + Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus); + + taxonomy = new ArrayList<>(); + taxonomy.addAll(checkedItemsTaxonomy); + + taxonomyCCB.getItems().removeAll(); + taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); + + // taxonomyCCB.getCheckModel().clearChecks(); + changing = false; + taxonomyCCB.getCheckModel().clearChecks(); + for (Taxonomy t : checkedItemsTaxonomy) { + taxonomyCCB.getCheckModel().check(t.toLongNameString()); + } + changing = true; + logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); + } + } }); taxonomyCCB.getCheckModel().clearChecks(); } else { @@ -313,75 +335,75 @@ public class CharacterAnalysisTab { * iscvv: false * string length: 1 */ - public void populateFields() { - // corpus changed if: current one is null (this is first run of the app) - // or if currentCorpus != gui's corpus - boolean corpusChanged = currentCorpusType == null - || currentCorpusType != corpus.getCorpusType(); - - // TODO: check for GOS, GIGAFIDA, SOLAR... - // refresh and: - // TODO if current value != null && is in new calculateFor ? keep : otherwise reset -// if (calculateFor == null) { -// calculateForRB.selectToggle(lemmaRB); -// calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString()); +// public void populateFields() { +// // corpus changed if: current one is null (this is first run of the app) +// // or if currentCorpus != gui's corpus +// boolean corpusChanged = currentCorpusType == null +// || currentCorpusType != corpus.getCorpusType(); +// +// // TODO: check for GOS, GIGAFIDA, SOLAR... +// // refresh and: +// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset +//// if (calculateFor == null) { +//// calculateForRB.selectToggle(lemmaRB); +//// calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString()); +//// } +// +// if (!filter.hasMsd()) { +// // if current corpus doesn't have msd data, disable this field +// msd = new ArrayList<>(); +// msdTF.setText(""); +// msdTF.setDisable(true); +// logger.info("no msd data"); +// } else { +// if (ValidationUtil.isEmpty(msd) +// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { +// // msd has not been set previously +// // or msd has been set but the corpus changed -> reset +// msd = new ArrayList<>(); +// msdTF.setText(""); +// msdTF.setDisable(false); +// logger.info("msd reset"); +// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { +// // if msd has been set, but corpus type remained the same, we can keep any set msd value +// msdTF.setText(StringUtils.join(msdStrings, " ")); +// msdTF.setDisable(false); +// logger.info("msd kept"); +// } // } - - if (!filter.hasMsd()) { - // if current corpus doesn't have msd data, disable this field - msd = new ArrayList<>(); - msdTF.setText(""); - msdTF.setDisable(true); - logger.info("no msd data"); - } else { - if (ValidationUtil.isEmpty(msd) - || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { - // msd has not been set previously - // or msd has been set but the corpus changed -> reset - msd = new ArrayList<>(); - msdTF.setText(""); - msdTF.setDisable(false); - logger.info("msd reset"); - } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { - // if msd has been set, but corpus type remained the same, we can keep any set msd value - msdTF.setText(StringUtils.join(msdStrings, " ")); - msdTF.setDisable(false); - logger.info("msd kept"); - } - } - - // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection) - - // keep calculateCvv - calculatecvvCB.setSelected(calculateCvv); - - // keep string length if set - if (stringLength != null) { - stringLengthTF.setText(String.valueOf(stringLength)); - } else { - stringLengthTF.setText("1"); - stringLength = 1; - } - - // TODO: trigger on rescan - if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { - // user changed corpus (by type) or by selection & triggered a rescan of headers - // see if we read taxonomy from headers, otherwise use default values for given corpus - ObservableList tax = corpus.getTaxonomy(); - taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); - - currentCorpusType = corpus.getCorpusType(); - // setTaxonomyIsDirty(false); - } else { - - } - - // see if we read taxonomy from headers, otherwise use default values for given corpus - ObservableList tax = corpus.getTaxonomy(); - taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); - taxonomyCCB.getItems().addAll(taxonomyCCBValues); - - } +// +// // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection) +// +// // keep calculateCvv +// calculatecvvCB.setSelected(calculateCvv); +// +// // keep string length if set +// if (stringLength != null) { +// stringLengthTF.setText(String.valueOf(stringLength)); +// } else { +// stringLengthTF.setText("1"); +// stringLength = 1; +// } +// +// // TODO: trigger on rescan +// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { +// // user changed corpus (by type) or by selection & triggered a rescan of headers +// // see if we read taxonomy from headers, otherwise use default values for given corpus +// ObservableList tax = corpus.getTaxonomy(); +// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); +// +// currentCorpusType = corpus.getCorpusType(); +// // setTaxonomyIsDirty(false); +// } else { +// +// } +// +// // see if we read taxonomy from headers, otherwise use default values for given corpus +// ObservableList tax = corpus.getTaxonomy(); +// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); +// taxonomyCCB.getItems().addAll(taxonomyCCBValues); +// +// } /** * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc., @@ -434,7 +456,7 @@ public class CharacterAnalysisTab { filter.setMultipleKeys(new ArrayList<>()); filter.setMsd(msd); - filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); + filter.setTaxonomy(taxonomy); filter.setDisplayTaxonomy(displayTaxonomy); filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setSkipValue(0); diff --git a/src/main/java/gui/OneWordAnalysisTab.java b/src/main/java/gui/OneWordAnalysisTab.java index f8abee5..4e3fa82 100755 --- a/src/main/java/gui/OneWordAnalysisTab.java +++ b/src/main/java/gui/OneWordAnalysisTab.java @@ -49,7 +49,7 @@ public class OneWordAnalysisTab { @FXML private CheckComboBox taxonomyCCB; - private ArrayList taxonomy; + private ArrayList taxonomy; @FXML private CheckBox displayTaxonomyChB; @@ -222,11 +222,33 @@ public class OneWordAnalysisTab { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); - taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { - taxonomy = new ArrayList<>(); - ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); - taxonomy.addAll(checkedItems); - logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); + taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener() { + boolean changing = true; + + @Override + public void onChanged(ListChangeListener.Change c){ + if(changing) { + ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); + ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems); + + Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus); + + taxonomy = new ArrayList<>(); + taxonomy.addAll(checkedItemsTaxonomy); + + taxonomyCCB.getItems().removeAll(); + taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); + + // taxonomyCCB.getCheckModel().clearChecks(); + changing = false; + taxonomyCCB.getCheckModel().clearChecks(); + for (Taxonomy t : checkedItemsTaxonomy) { + taxonomyCCB.getCheckModel().check(t.toLongNameString()); + } + changing = true; + logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); + } + } }); taxonomyCCB.getCheckModel().clearChecks(); } else { @@ -333,63 +355,63 @@ public class OneWordAnalysisTab { * iscvv: false * string length: 1 */ - public void populateFields() { - // corpus changed if: current one is null (this is first run of the app) - // or if currentCorpus != gui's corpus - boolean corpusChanged = currentCorpusType == null - || currentCorpusType != corpus.getCorpusType(); - - - // TODO: check for GOS, GIGAFIDA, SOLAR... - // refresh and: - // TODO if current value != null && is in new calculateFor ? keep : otherwise reset - if (calculateFor == null) { - calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); - calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); - } - - if (!filter.hasMsd()) { - // if current corpus doesn't have msd data, disable this field - msd = new ArrayList<>(); - msdTF.setText(""); - msdTF.setDisable(true); - logger.info("no msd data"); - } else { - if (ValidationUtil.isEmpty(msd) - || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { - // msd has not been set previously - // or msd has been set but the corpus changed -> reset - msd = new ArrayList<>(); - msdTF.setText(""); - msdTF.setDisable(false); - logger.info("msd reset"); - } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { - // if msd has been set, but corpus type remained the same, we can keep any set msd value - msdTF.setText(StringUtils.join(msdStrings, " ")); - msdTF.setDisable(false); - logger.info("msd kept"); - } - } - - // TODO: trigger on rescan - if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { - // user changed corpus (by type) or by selection & triggered a rescan of headers - // see if we read taxonomy from headers, otherwise use default values for given corpus - ObservableList tax = corpus.getTaxonomy(); - taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); - - currentCorpusType = corpus.getCorpusType(); - // setTaxonomyIsDirty(false); - } else { - - } - - // see if we read taxonomy from headers, otherwise use default values for given corpus - ObservableList tax = corpus.getTaxonomy(); - taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); - taxonomyCCB.getItems().addAll(taxonomyCCBValues); - - } +// public void populateFields() { +// // corpus changed if: current one is null (this is first run of the app) +// // or if currentCorpus != gui's corpus +// boolean corpusChanged = currentCorpusType == null +// || currentCorpusType != corpus.getCorpusType(); +// +// +// // TODO: check for GOS, GIGAFIDA, SOLAR... +// // refresh and: +// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset +// if (calculateFor == null) { +// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); +// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); +// } +// +// if (!filter.hasMsd()) { +// // if current corpus doesn't have msd data, disable this field +// msd = new ArrayList<>(); +// msdTF.setText(""); +// msdTF.setDisable(true); +// logger.info("no msd data"); +// } else { +// if (ValidationUtil.isEmpty(msd) +// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { +// // msd has not been set previously +// // or msd has been set but the corpus changed -> reset +// msd = new ArrayList<>(); +// msdTF.setText(""); +// msdTF.setDisable(false); +// logger.info("msd reset"); +// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { +// // if msd has been set, but corpus type remained the same, we can keep any set msd value +// msdTF.setText(StringUtils.join(msdStrings, " ")); +// msdTF.setDisable(false); +// logger.info("msd kept"); +// } +// } +// +// // TODO: trigger on rescan +// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { +// // user changed corpus (by type) or by selection & triggered a rescan of headers +// // see if we read taxonomy from headers, otherwise use default values for given corpus +// ObservableList tax = corpus.getTaxonomy(); +// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); +// +// currentCorpusType = corpus.getCorpusType(); +// // setTaxonomyIsDirty(false); +// } else { +// +// } +// +// // see if we read taxonomy from headers, otherwise use default values for given corpus +// ObservableList tax = corpus.getTaxonomy(); +// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); +// taxonomyCCB.getItems().addAll(taxonomyCCBValues); +// +// } /** * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc., @@ -435,7 +457,7 @@ public class OneWordAnalysisTab { Filter filter = new Filter(); filter.setNgramValue(1); filter.setCalculateFor(calculateFor); - filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); + filter.setTaxonomy(taxonomy); filter.setDisplayTaxonomy(displayTaxonomy); filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setSkipValue(0); diff --git a/src/main/java/gui/StringAnalysisTabNew2.java b/src/main/java/gui/StringAnalysisTabNew2.java index 69452ef..c58f754 100755 --- a/src/main/java/gui/StringAnalysisTabNew2.java +++ b/src/main/java/gui/StringAnalysisTabNew2.java @@ -52,7 +52,7 @@ public class StringAnalysisTabNew2 { @FXML private CheckComboBox taxonomyCCB; - private ArrayList taxonomy; + private ArrayList taxonomy; @FXML private CheckBox calculatecvvCB; @@ -308,11 +308,33 @@ public class StringAnalysisTabNew2 { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); - taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { - taxonomy = new ArrayList<>(); - ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); - taxonomy.addAll(checkedItems); - logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); + taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener() { + boolean changing = true; + + @Override + public void onChanged(ListChangeListener.Change c){ + if(changing) { + ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); + ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems); + + Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus); + + taxonomy = new ArrayList<>(); + taxonomy.addAll(checkedItemsTaxonomy); + + taxonomyCCB.getItems().removeAll(); + taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); + + // taxonomyCCB.getCheckModel().clearChecks(); + changing = false; + taxonomyCCB.getCheckModel().clearChecks(); + for (Taxonomy t : checkedItemsTaxonomy) { + taxonomyCCB.getCheckModel().check(t.toLongNameString()); + } + changing = true; + logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); + } + } }); taxonomyCCB.getCheckModel().clearChecks(); } else { @@ -563,7 +585,7 @@ public class StringAnalysisTabNew2 { Filter filter = new Filter(); filter.setNgramValue(ngramValue); filter.setCalculateFor(calculateFor); - filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); + filter.setTaxonomy(taxonomy); filter.setDisplayTaxonomy(displayTaxonomy); filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setSkipValue(skipValue); diff --git a/src/main/java/gui/WordFormationTab.java b/src/main/java/gui/WordFormationTab.java index 49f51c0..7c7d436 100755 --- a/src/main/java/gui/WordFormationTab.java +++ b/src/main/java/gui/WordFormationTab.java @@ -38,7 +38,7 @@ public class WordFormationTab { @FXML private CheckComboBox taxonomyCCB; - private ArrayList taxonomy; + private ArrayList taxonomy; @FXML private TextField minimalOccurrencesTF; @@ -77,7 +77,8 @@ public class WordFormationTab { taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { taxonomy = new ArrayList<>(); ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); - taxonomy.addAll(checkedItems); + ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems); + taxonomy.addAll(checkedItemsTaxonomy); logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); }); taxonomyCCB.getCheckModel().clearChecks(); @@ -140,7 +141,7 @@ public class WordFormationTab { Filter filter = new Filter(); filter.setNgramValue(1); filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY); - filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); + filter.setTaxonomy(taxonomy); filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setSkipValue(0); filter.setMsd(new ArrayList<>()); diff --git a/src/main/java/gui/WordLevelTab.java b/src/main/java/gui/WordLevelTab.java index cb1bfcc..0415c66 100755 --- a/src/main/java/gui/WordLevelTab.java +++ b/src/main/java/gui/WordLevelTab.java @@ -47,7 +47,7 @@ public class WordLevelTab { @FXML private CheckComboBox taxonomyCCB; - private ArrayList taxonomy; + private ArrayList taxonomy; @FXML private CheckBox displayTaxonomyChB; @@ -345,11 +345,33 @@ public class WordLevelTab { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); - taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> { - taxonomy = new ArrayList<>(); - ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); - taxonomy.addAll(checkedItems); - logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); + taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener() { + boolean changing = true; + + @Override + public void onChanged(ListChangeListener.Change c){ + if(changing) { + ObservableList checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); + ArrayList checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems); + + Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus); + + taxonomy = new ArrayList<>(); + taxonomy.addAll(checkedItemsTaxonomy); + + taxonomyCCB.getItems().removeAll(); + taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); + + // taxonomyCCB.getCheckModel().clearChecks(); + changing = false; + taxonomyCCB.getCheckModel().clearChecks(); + for (Taxonomy t : checkedItemsTaxonomy) { + taxonomyCCB.getCheckModel().check(t.toLongNameString()); + } + changing = true; + logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); + } + } }); taxonomyCCB.getCheckModel().clearChecks(); } else { @@ -458,63 +480,63 @@ public class WordLevelTab { * iscvv: false * string length: 1 */ - public void populateFields() { - // corpus changed if: current one is null (this is first run of the app) - // or if currentCorpus != gui's corpus - boolean corpusChanged = currentCorpusType == null - || currentCorpusType != corpus.getCorpusType(); - - - // TODO: check for GOS, GIGAFIDA, SOLAR... - // refresh and: - // TODO if current value != null && is in new calculateFor ? keep : otherwise reset - if (calculateFor == null) { - calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); - calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); - } - - if (!filter.hasMsd()) { - // if current corpus doesn't have msd data, disable this field - msd = new ArrayList<>(); - msdTF.setText(""); - msdTF.setDisable(true); - logger.info("no msd data"); - } else { - if (ValidationUtil.isEmpty(msd) - || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { - // msd has not been set previously - // or msd has been set but the corpus changed -> reset - msd = new ArrayList<>(); - msdTF.setText(""); - msdTF.setDisable(false); - logger.info("msd reset"); - } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { - // if msd has been set, but corpus type remained the same, we can keep any set msd value - msdTF.setText(StringUtils.join(msdStrings, " ")); - msdTF.setDisable(false); - logger.info("msd kept"); - } - } - - // TODO: trigger on rescan - if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { - // user changed corpus (by type) or by selection & triggered a rescan of headers - // see if we read taxonomy from headers, otherwise use default values for given corpus - ObservableList tax = corpus.getTaxonomy(); - taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); - - currentCorpusType = corpus.getCorpusType(); - // setTaxonomyIsDirty(false); - } else { - - } - - // see if we read taxonomy from headers, otherwise use default values for given corpus - ObservableList tax = corpus.getTaxonomy(); - taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); - taxonomyCCB.getItems().addAll(taxonomyCCBValues); - - } +// public void populateFields() { +// // corpus changed if: current one is null (this is first run of the app) +// // or if currentCorpus != gui's corpus +// boolean corpusChanged = currentCorpusType == null +// || currentCorpusType != corpus.getCorpusType(); +// +// +// // TODO: check for GOS, GIGAFIDA, SOLAR... +// // refresh and: +// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset +// if (calculateFor == null) { +// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); +// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); +// } +// +// if (!filter.hasMsd()) { +// // if current corpus doesn't have msd data, disable this field +// msd = new ArrayList<>(); +// msdTF.setText(""); +// msdTF.setDisable(true); +// logger.info("no msd data"); +// } else { +// if (ValidationUtil.isEmpty(msd) +// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) { +// // msd has not been set previously +// // or msd has been set but the corpus changed -> reset +// msd = new ArrayList<>(); +// msdTF.setText(""); +// msdTF.setDisable(false); +// logger.info("msd reset"); +// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { +// // if msd has been set, but corpus type remained the same, we can keep any set msd value +// msdTF.setText(StringUtils.join(msdStrings, " ")); +// msdTF.setDisable(false); +// logger.info("msd kept"); +// } +// } +// +// // TODO: trigger on rescan +// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { +// // user changed corpus (by type) or by selection & triggered a rescan of headers +// // see if we read taxonomy from headers, otherwise use default values for given corpus +// ObservableList tax = corpus.getTaxonomy(); +// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); +// +// currentCorpusType = corpus.getCorpusType(); +// // setTaxonomyIsDirty(false); +// } else { +// +// } +// +// // see if we read taxonomy from headers, otherwise use default values for given corpus +// ObservableList tax = corpus.getTaxonomy(); +// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); +// taxonomyCCB.getItems().addAll(taxonomyCCBValues); +// +// } /** * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc., @@ -560,7 +582,7 @@ public class WordLevelTab { Filter filter = new Filter(); filter.setNgramValue(1); filter.setCalculateFor(calculateFor); - filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); + filter.setTaxonomy(taxonomy); filter.setDisplayTaxonomy(displayTaxonomy); filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setSkipValue(0); diff --git a/src/main/java/util/Export.java b/src/main/java/util/Export.java index 3273bb0..6dbdd2f 100755 --- a/src/main/java/util/Export.java +++ b/src/main/java/util/Export.java @@ -60,7 +60,7 @@ public class Export { public static String SetToCSV(Set>> set, File resultsPath, LinkedHashMap headerInfoBlock, StatisticsNew statistics, Filter filter) { - Map> taxonomyResults = statistics.getTaxonomyResult(); + Map> taxonomyResults = statistics.getTaxonomyResult(); //Delimiter used in CSV file String NEW_LINE_SEPARATOR = "\n"; @@ -85,7 +85,7 @@ public class Export { // num_taxonomy_frequencies.put(taxonomyKey, val); // } // } - Map num_taxonomy_frequencies = statistics.getUniGramOccurrences(); + Map num_taxonomy_frequencies = statistics.getUniGramOccurrences(); //CSV file header @@ -106,7 +106,7 @@ public class Export { } } - headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(statistics.getUniGramOccurrences().get("Total").longValue())); + headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(statistics.getUniGramOccurrences().get(Taxonomy.TOTAL).longValue())); // headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies)); for (CalculateFor otherKey : filter.getMultipleKeys()) { @@ -127,11 +127,11 @@ public class Export { } } - for (String key : taxonomyResults.keySet()) { - if(!key.equals("Total") && num_taxonomy_frequencies.get(key).longValue() > 0) { - FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]"); - FILE_HEADER_AL.add("Delež [" + key + "]"); - FILE_HEADER_AL.add("Relativna pogostost [" + key + "]"); + for (Taxonomy key : taxonomyResults.keySet()) { + if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) { + FILE_HEADER_AL.add("Absolutna pogostost [" + key.toString() + "]"); + FILE_HEADER_AL.add("Delež [" + key.toString() + "]"); + FILE_HEADER_AL.add("Relativna pogostost [" + key.toString() + "]"); } } @@ -270,8 +270,8 @@ public class Export { dataEntry.add(e.getValue().toString()); dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies)); dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies)); - for (String key : taxonomyResults.keySet()){ - if(!key.equals("Total") && num_taxonomy_frequencies.get(key).longValue() > 0) { + for (Taxonomy key : taxonomyResults.keySet()){ + if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) { AtomicLong frequency = taxonomyResults.get(key).get(e.getKey()); dataEntry.add(frequency.toString()); dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key).longValue())); diff --git a/src/test/java/DBTest.java b/src/test/java/DBTest.java index 9b20899..dceee80 100755 --- a/src/test/java/DBTest.java +++ b/src/test/java/DBTest.java @@ -5,6 +5,7 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; +import data.Taxonomy; import org.rocksdb.RocksDB; import util.db.RDB;