Taxonomy refactored

This commit is contained in:
Luka 2018-11-26 13:41:35 +01:00
parent a7f3bdb925
commit 9efe3d529b
16 changed files with 1173 additions and 491 deletions

View File

@ -536,8 +536,8 @@ public class XML_processing {
boolean inWord = false;
boolean inPunctuation = false;
boolean taxonomyMatch = true;
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
// ArrayList<Taxonomy> currentFiletaxonomyLong = new ArrayList<>();
String lemma = "";
String msd = "";
@ -578,10 +578,10 @@ public class XML_processing {
if (tax != null) {
// keep only taxonomy properties
String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", "");
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""));
currentFiletaxonomy.add(currentFiletaxonomyElement);
Tax taxonomy = new Tax();
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
}
}
break;
@ -637,7 +637,7 @@ public class XML_processing {
// parser reached end of the current sentence
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
// count all UniGramOccurrences in sentence for statistics
stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomyLong);
stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
// add sentence to corpus if it passes filters
sentence = runFilters(sentence, stats.getFilter());
@ -645,7 +645,7 @@ public class XML_processing {
if (!ValidationUtil.isEmpty(sentence) && taxonomyMatch) {
corpus.add(new Sentence(sentence, currentFiletaxonomyLong));
corpus.add(new Sentence(sentence, currentFiletaxonomy));
}
// taxonomyMatch = true;
@ -713,8 +713,8 @@ public class XML_processing {
public static boolean readXMLSSJ500K(String path, StatisticsNew stats) {
boolean inWord = false;
boolean inPunctuation = false;
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
// ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = "";
String msd = "";
@ -757,10 +757,10 @@ public class XML_processing {
if (tax != null) {
// keep only taxonomy properties
String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", "");
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""));
currentFiletaxonomy.add(currentFiletaxonomyElement);
Tax taxonomy = new Tax();
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
}
}
break;
@ -793,7 +793,7 @@ public class XML_processing {
sentence = runFilters(sentence, stats.getFilter());
if (!ValidationUtil.isEmpty(sentence)) {
corpus.add(new Sentence(sentence, currentFiletaxonomyLong));
corpus.add(new Sentence(sentence, currentFiletaxonomy));
}
// and start a new one
@ -820,7 +820,7 @@ public class XML_processing {
corpus.clear();
currentFiletaxonomy = new ArrayList<>();
currentFiletaxonomyLong = new ArrayList<>();
// currentFiletaxonomyLong = new ArrayList<>();
}
break;
@ -848,8 +848,8 @@ public class XML_processing {
boolean inOrthDiv = false;
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
boolean inSeparatedWord = false;
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
// ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = "";
String msd = "";
@ -923,10 +923,10 @@ public class XML_processing {
if (tax != null) {
// keep only taxonomy properties
String currentFiletaxonomyElement = String.valueOf(tax.getValue());
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()));
currentFiletaxonomy.add(currentFiletaxonomyElement);
Tax taxonomy = new Tax();
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
}
} else if (qName.equalsIgnoreCase("div")) {
gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
@ -1010,7 +1010,7 @@ public class XML_processing {
// add sentence to corpus if it passes filters
if (includeFile && !ValidationUtil.isEmpty(sentence)) {
sentence = runFilters(sentence, stats.getFilter());
corpus.add(new Sentence(sentence, currentFiletaxonomyLong));
corpus.add(new Sentence(sentence, currentFiletaxonomy));
}
wordIndex = 0;
@ -1050,7 +1050,7 @@ public class XML_processing {
corpus.clear();
currentFiletaxonomy = new ArrayList<>();
currentFiletaxonomyLong = new ArrayList<>();
// currentFiletaxonomyLong = new ArrayList<>();
}
break;

View File

@ -8,6 +8,7 @@ import java.util.stream.Collectors;
import data.Enums.InflectedJosTypes;
import data.StatisticsNew;
import data.Taxonomy;
import gui.ValidationUtil;
import util.Combinations;
@ -30,7 +31,7 @@ public class WordFormation {
Map<String, AtomicLong> result = stat.getResult();
// 1. filter - keep only inflected types
result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.charAt(0)));
result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.toString().charAt(0)));
// 2. for each inflected type get all possible subcombinations
for (Character josChar : InflectedJosTypes.inflectedJosTypes) {

View File

@ -267,7 +267,7 @@ public class Ngrams {
for (Sentence s : corpus) {
// stats.updateUniGramOccurrences(s.getWords().size());
for (Word w : s.getWords()) {
List<String> taxonomy = s.getTaxonomy();
List<Taxonomy> taxonomy = s.getTaxonomy();
//// List<Word> ngramCandidate = s.getSublist(i, i + stats.getFilter().getNgramValue());
List<Word> ngramCandidate = new ArrayList<>();
@ -425,7 +425,7 @@ public class Ngrams {
}
}
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats, List<String> taxonomy) {
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats, List<Taxonomy> taxonomy) {
// count if no regex is set or if it is & candidate passes it
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())) {
// String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());

View File

@ -104,13 +104,13 @@ public class Filter implements Cloneable {
return (Integer) filter.get(STRING_LENGTH);
}
public void setTaxonomy(ArrayList<String> taxonomy) {
public void setTaxonomy(ArrayList<Taxonomy> taxonomy) {
filter.put(TAXONOMY, taxonomy);
}
public ArrayList<String> getTaxonomy() {
public ArrayList<Taxonomy> getTaxonomy() {
if (filter.containsKey(TAXONOMY) && filter.get(TAXONOMY) != null) {
return (ArrayList<String>) filter.get(TAXONOMY);
return (ArrayList<Taxonomy>) filter.get(TAXONOMY);
} else {
return new ArrayList<>();
}

View File

@ -7,13 +7,13 @@ public class Sentence {
private List<Word> words;
private List<String> taxonomy;
private List<Taxonomy> taxonomy;
// GOS
private String type;
private Map<String, String> properties;
public Sentence(List<Word> words, List<String> taxonomy) {
public Sentence(List<Word> words, List<Taxonomy> taxonomy) {
this.words = words;
this.taxonomy = taxonomy;
}
@ -22,13 +22,13 @@ public class Sentence {
// this.words = words;
// }
public Sentence(List<Word> words, List<String> taxonomy, Map<String, String> properties) {
public Sentence(List<Word> words, List<Taxonomy> taxonomy, Map<String, String> properties) {
this.words = words;
this.taxonomy = taxonomy;
this.properties = properties;
}
public Sentence(List<Word> words, List<String> taxonomy, String type) {
public Sentence(List<Word> words, List<Taxonomy> taxonomy, String type) {
this.words = words;
this.taxonomy = taxonomy;
this.type = type;
@ -38,7 +38,7 @@ public class Sentence {
return words;
}
public List<String> getTaxonomy() {
public List<Taxonomy> getTaxonomy() {
return taxonomy;
}

View File

@ -68,22 +68,22 @@ public class Statistics {
}
// for words distributions
public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) {
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
this.resultTitle = String.format("%s_%s_%s",
distributionTaxonomy != null ? distributionTaxonomy.toString() : "",
distributionJosWordType != null ? distributionJosWordType.toString() : "",
dateTime);
this.analysisLevel = al;
this.cf = cf;
this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null;
this.taxonomyIsSet = distributionTaxonomy != null;
this.JOSTypeIsSet = distributionJosWordType != null;
this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' ';
}
// public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) {
// String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
//
// this.resultTitle = String.format("%s_%s_%s",
// distributionTaxonomy != null ? distributionTaxonomy.toString() : "",
// distributionJosWordType != null ? distributionJosWordType.toString() : "",
// dateTime);
//
// this.analysisLevel = al;
// this.cf = cf;
// this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null;
// this.taxonomyIsSet = distributionTaxonomy != null;
//
// this.JOSTypeIsSet = distributionJosWordType != null;
// this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' ';
// }
public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) {
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
@ -99,17 +99,17 @@ public class Statistics {
this.vcc = true;
}
public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) {
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
this.resultTitle = String.format("InflectedJOS_%s_%s",
distributionTaxonomy != null ? distributionTaxonomy : "",
dateTime);
this.analysisLevel = al;
this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null;
this.taxonomyIsSet = inflectedJosTaxonomy != null;
}
// public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) {
// String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
//
// this.resultTitle = String.format("InflectedJOS_%s_%s",
// distributionTaxonomy != null ? distributionTaxonomy : "",
// dateTime);
//
// this.analysisLevel = al;
// this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null;
// this.taxonomyIsSet = inflectedJosTaxonomy != null;
// }
public Integer getSkip() {
return skip;

View File

@ -33,7 +33,7 @@ public class StatisticsNew {
private String resultTitle;
private Map<String, AtomicLong> result;
private Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult;
private Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> taxonomyResult;
private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedSuffix;
private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedPrefix;
@ -43,28 +43,28 @@ public class StatisticsNew {
private LocalDateTime timeBeginning;
private LocalDateTime timeEnding;
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
private Map<String, AtomicLong> uniGramTaxonomyOccurrences;
private Map<Taxonomy, AtomicLong> uniGramTaxonomyOccurrences;
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
this.corpus = corpus;
this.filter = filter;
this.taxonomyResult = new ConcurrentHashMap<>();
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
this.taxonomyResult.put(Taxonomy.TOTAL, new ConcurrentHashMap<>());
this.collocability = new ConcurrentHashMap<>();
this.uniGramTaxonomyOccurrences = new ConcurrentHashMap<>();
this.uniGramTaxonomyOccurrences.put("Total", new AtomicLong(0L));
this.uniGramTaxonomyOccurrences.put(Taxonomy.TOTAL, new AtomicLong(0L));
// create table for counting word occurrences per taxonomies
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
if (this.filter.getTaxonomy().isEmpty()) {
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>());
this.taxonomyResult.put(Taxonomy.factory(this.corpus.getTaxonomy().get(i)), new ConcurrentHashMap<>());
}
} else {
for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
Tax taxonomy = new Tax();
this.taxonomyResult.put(taxonomy.getLongTaxonomyName(this.filter.getTaxonomy().get(i)), new ConcurrentHashMap<>());
// Tax taxonomy = new Tax();
this.taxonomyResult.put(this.filter.getTaxonomy().get(i), new ConcurrentHashMap<>());
}
}
}
@ -202,15 +202,15 @@ public class StatisticsNew {
/**
* Stores results from this batch to a database and clears results map
*/
public void storeTmpResultsToDB() {
try {
db.writeBatch(result);
result = new ConcurrentHashMap<>();
} catch (UnsupportedEncodingException e) {
logger.error("Store tmp results to DB", e);
// e.printStackTrace();
}
}
// public void storeTmpResultsToDB() {
// try {
// db.writeBatch(result);
// result = new ConcurrentHashMap<>();
// } catch (UnsupportedEncodingException e) {
// logger.error("Store tmp results to DB", e);
// // e.printStackTrace();
// }
// }
public Filter getFilter() {
return filter;
@ -229,16 +229,16 @@ public class StatisticsNew {
}
// if no results and nothing to save, return false
if (!(taxonomyResult.get("Total").size() > 0)) {
if (!(taxonomyResult.get(Taxonomy.TOTAL).size() > 0)) {
analysisProducedResults = false;
return false;
} else {
analysisProducedResults = true;
}
removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
removeMinimalOccurrences(taxonomyResult.get(Taxonomy.TOTAL), filter.getMinimalOccurrences());
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get(Taxonomy.TOTAL), Util.getValidInt(limit))));
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
return true;
}
@ -246,18 +246,18 @@ public class StatisticsNew {
/**
* Removes lines, where number of different taxonomies is lower than specified number (minimalTaxonomy)
*/
private void removeMinimalTaxonomy(Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult, Integer minimalTaxonomy) {
private void removeMinimalTaxonomy(Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> taxonomyResult, Integer minimalTaxonomy) {
if (minimalTaxonomy == 1)
return;
int occurances;
for (MultipleHMKeys key : taxonomyResult.get("Total").keySet()){
for (MultipleHMKeys key : taxonomyResult.get(Taxonomy.TOTAL).keySet()){
occurances = 0;
for (String columnNameKey : taxonomyResult.keySet()){
if(!columnNameKey.equals("Total") && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1)
for (Taxonomy columnNameKey : taxonomyResult.keySet()){
if(!columnNameKey.equals(Taxonomy.TOTAL) && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1)
occurances++;
}
if(occurances < minimalTaxonomy){
taxonomyResult.get("Total").remove(key);
taxonomyResult.get(Taxonomy.TOTAL).remove(key);
}
}
}
@ -343,9 +343,9 @@ public class StatisticsNew {
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
}
public void updateUniGramOccurrences(int amount, ArrayList<String> taxonomy){
uniGramTaxonomyOccurrences.get("Total").set(uniGramTaxonomyOccurrences.get("Total").longValue() + amount);
for (String t : taxonomy){
public void updateUniGramOccurrences(int amount, ArrayList<Taxonomy> taxonomy){
uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).set(uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).longValue() + amount);
for (Taxonomy t : taxonomy){
if (uniGramTaxonomyOccurrences.get(t) != null){
uniGramTaxonomyOccurrences.get(t).set(uniGramTaxonomyOccurrences.get(t).longValue() + amount);
} else {
@ -354,16 +354,16 @@ public class StatisticsNew {
}
}
public Map<String, AtomicLong> getUniGramOccurrences(){
// return uniGramTaxonomyOccurrences.get("Total").longValue();
public Map<Taxonomy, AtomicLong> getUniGramOccurrences(){
// return uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).longValue();
return uniGramTaxonomyOccurrences;
}
public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {
for (String key : taxonomyResult.keySet()) {
public void updateTaxonomyResults(MultipleHMKeys o, List<Taxonomy> taxonomy) {
for (Taxonomy key : taxonomyResult.keySet()) {
// first word should have the same taxonomy as others
if (key.equals("Total") || taxonomy.contains(key)) {
// if (key.equals("Total") || taxonomy != null && taxonomy.contains(key)) {
if (key.equals(Taxonomy.TOTAL) || taxonomy.contains(key)) {
// if (key.equals(Taxonomy.TOTAL) || taxonomy != null && taxonomy.contains(key)) {
// if taxonomy not in map and in this word
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));
@ -382,7 +382,7 @@ public class StatisticsNew {
}
public Map<String, Map<MultipleHMKeys, AtomicLong>> getTaxonomyResult() {
public Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> getTaxonomyResult() {
return taxonomyResult;
}
@ -608,7 +608,7 @@ public class StatisticsNew {
}
public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();
Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();
Map<Collocability, Map<MultipleHMKeys, Double>> collocabilityMap = new ConcurrentHashMap<>();
@ -618,11 +618,11 @@ public class StatisticsNew {
// count number of all words
long N = 0;
for(AtomicLong a : oneWordTaxonomyResult.get("Total").values()){
for(AtomicLong a : oneWordTaxonomyResult.get(Taxonomy.TOTAL).values()){
N += a.longValue();
}
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
for(MultipleHMKeys hmKey : taxonomyResult.get(Taxonomy.TOTAL).keySet()) {
// String[] splitedString = hmKey.getK1().split("\\s+");
long sum_fwi =0L;
@ -630,15 +630,15 @@ public class StatisticsNew {
for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){
// System.out.println(smallHmKey.getK1());
sum_fwi += oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue();
mul_fwi *= oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue();
sum_fwi += oneWordTaxonomyResult.get(Taxonomy.TOTAL).get(smallHmKey).longValue();
mul_fwi *= oneWordTaxonomyResult.get(Taxonomy.TOTAL).get(smallHmKey).longValue();
}
// String t = hmKey.getK1();
// if(hmKey.getK1().equals("v Slovenija")){
// System.out.println("TEST");
//
// }
double O = (double)taxonomyResult.get("Total").get(hmKey).longValue();
double O = (double)taxonomyResult.get(Taxonomy.TOTAL).get(hmKey).longValue();
double n = (double)filter.getNgramValue();
double E = (double)mul_fwi / Math.pow(N, n - 1);
if (collocabilityMap.keySet().contains(Collocability.DICE)){

View File

@ -17,72 +17,72 @@ public class Tax {
GIGAFIDA_TAXONOMY = new LinkedHashMap<>();
GIGAFIDA_TAXONOMY.put("SSJ.T", "SSJ.T - tisk");
GIGAFIDA_TAXONOMY.put("SSJ.T.K", "SSJ.T.K - tisk-knjižno");
GIGAFIDA_TAXONOMY.put("SSJ.T.K.L", "SSJ.T.K.L - tisk-knjižno-leposlovno");
GIGAFIDA_TAXONOMY.put("SSJ.T.K.S", "SSJ.T.K.S - tisk-knjižno-strokovno");
GIGAFIDA_TAXONOMY.put("SSJ.T.P", "SSJ.T.P - tisk-periodično");
GIGAFIDA_TAXONOMY.put("SSJ.T.P.C", "SSJ.T.P.C - tisk-periodično-časopis");
GIGAFIDA_TAXONOMY.put("SSJ.T.P.R", "SSJ.T.P.R - tisk-periodično-revija");
GIGAFIDA_TAXONOMY.put("SSJ.T.D", "SSJ.T.D - tisk-drugo");
GIGAFIDA_TAXONOMY.put("SSJ.T.K", " SSJ.T.K - tisk-knjižno");
GIGAFIDA_TAXONOMY.put("SSJ.T.K.L", " SSJ.T.K.L - tisk-knjižno-leposlovno");
GIGAFIDA_TAXONOMY.put("SSJ.T.K.S", " SSJ.T.K.S - tisk-knjižno-strokovno");
GIGAFIDA_TAXONOMY.put("SSJ.T.P", " SSJ.T.P - tisk-periodično");
GIGAFIDA_TAXONOMY.put("SSJ.T.P.C", " SSJ.T.P.C - tisk-periodično-časopis");
GIGAFIDA_TAXONOMY.put("SSJ.T.P.R", " SSJ.T.P.R - tisk-periodično-revija");
GIGAFIDA_TAXONOMY.put("SSJ.T.D", " SSJ.T.D - tisk-drugo");
GIGAFIDA_TAXONOMY.put("SSJ.I", "SSJ.I - internet");
GIGAFIDA_TAXONOMY.put("Ft.P", "Ft.P - prenosnik");
GIGAFIDA_TAXONOMY.put("Ft.P.G", "Ft.P.G - prenosnik-govorni");
GIGAFIDA_TAXONOMY.put("Ft.P.E", "Ft.P.E - prenosnik-elektronski");
GIGAFIDA_TAXONOMY.put("Ft.P.P", "Ft.P.P - prenosnik-pisni");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O", "Ft.P.P.O - prenosnik-pisni-objavljeno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.K", "Ft.P.P.O.K - prenosnik-pisni-objavljeno-knjižno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P", "Ft.P.P.O.P - prenosnik-pisni-objavljeno-periodično");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C", "Ft.P.P.O.P.C - prenosnik-pisni-objavljeno-periodično-časopisno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.D", "Ft.P.P.O.P.C.D - prenosnik-pisni-objavljeno-periodično-časopisno-dnevno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.V", "Ft.P.P.O.P.C.V - prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.T", "Ft.P.P.O.P.C.T - prenosnik-pisni-objavljeno-periodično-časopisno-tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R", "Ft.P.P.O.P.R - prenosnik-pisni-objavljeno-periodično-revialno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.T", "Ft.P.P.O.P.R.T - prenosnik-pisni-objavljeno-periodično-revialno-tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.S", "Ft.P.P.O.P.R.S - prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.M", "Ft.P.P.O.P.R.M - prenosnik-pisni-objavljeno-periodično-revialno-mesečno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.D", "Ft.P.P.O.P.R.D - prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.O", "Ft.P.P.O.P.R.O - prenosnik-pisni-objavljeno-periodično-revialno-občasno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N", "Ft.P.P.N - prenosnik-pisni-neobjavljeno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.J", "Ft.P.P.N.J - prenosnik-pisni-neobjavljeno-javno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.I", "Ft.P.P.N.I - prenosnik-pisni-neobjavljeno-interno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.Z", "Ft.P.P.N.Z - prenosnik-pisni-neobjavljeno-zasebno");
GIGAFIDA_TAXONOMY.put("Ft.P.G", " Ft.P.G - prenosnik-govorni");
GIGAFIDA_TAXONOMY.put("Ft.P.E", " Ft.P.E - prenosnik-elektronski");
GIGAFIDA_TAXONOMY.put("Ft.P.P", " Ft.P.P - prenosnik-pisni");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O", " Ft.P.P.O - prenosnik-pisni-objavljeno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.K", " Ft.P.P.O.K - prenosnik-pisni-objavljeno-knjižno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P", " Ft.P.P.O.P - prenosnik-pisni-objavljeno-periodično");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C", " Ft.P.P.O.P.C - prenosnik-pisni-objavljeno-periodično-časopisno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.D", " Ft.P.P.O.P.C.D - prenosnik-pisni-objavljeno-periodično-časopisno-dnevno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.V", " Ft.P.P.O.P.C.V - prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.T", " Ft.P.P.O.P.C.T - prenosnik-pisni-objavljeno-periodično-časopisno-tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R", " Ft.P.P.O.P.R - prenosnik-pisni-objavljeno-periodično-revialno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.T", " Ft.P.P.O.P.R.T - prenosnik-pisni-objavljeno-periodično-revialno-tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.S", " Ft.P.P.O.P.R.S - prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.M", " Ft.P.P.O.P.R.M - prenosnik-pisni-objavljeno-periodično-revialno-mesečno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.D", " Ft.P.P.O.P.R.D - prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.O", " Ft.P.P.O.P.R.O - prenosnik-pisni-objavljeno-periodično-revialno-občasno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N", " Ft.P.P.N - prenosnik-pisni-neobjavljeno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.J", " Ft.P.P.N.J - prenosnik-pisni-neobjavljeno-javno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.I", " Ft.P.P.N.I - prenosnik-pisni-neobjavljeno-interno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.Z", " Ft.P.P.N.Z - prenosnik-pisni-neobjavljeno-zasebno");
GIGAFIDA_TAXONOMY.put("Ft.Z", "Ft.Z - zvrst");
GIGAFIDA_TAXONOMY.put("Ft.Z.U", "Ft.Z.U - zvrst-umetnostna");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.P", "Ft.Z.U.P - zvrst-umetnostna-pesniška");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.R", "Ft.Z.U.R - zvrst-umetnostna-prozna");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.D", "Ft.Z.U.D - zvrst-umetnostna-dramska");
GIGAFIDA_TAXONOMY.put("Ft.Z.N", "Ft.Z.N - zvrst-neumetnostna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S", "Ft.Z.N.S - zvrst-neumetnostna-strokovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.H", "Ft.Z.N.S.H - zvrst-neumetnostna-strokovna-humanistična in družboslovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.N", "Ft.Z.N.S.N - zvrst-neumetnostna-strokovna-naravoslovna in tehnična");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.N", "Ft.Z.N.N - zvrst-neumetnostna-nestrokovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.P", "Ft.Z.N.P - zvrst-neumetnostna-pravna");
GIGAFIDA_TAXONOMY.put("Ft.Z.U", " Ft.Z.U - zvrst-umetnostna");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.P", " Ft.Z.U.P - zvrst-umetnostna-pesniška");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.R", " Ft.Z.U.R - zvrst-umetnostna-prozna");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.D", " Ft.Z.U.D - zvrst-umetnostna-dramska");
GIGAFIDA_TAXONOMY.put("Ft.Z.N", " Ft.Z.N - zvrst-neumetnostna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S", " Ft.Z.N.S - zvrst-neumetnostna-strokovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.H", " Ft.Z.N.S.H - zvrst-neumetnostna-strokovna-humanistična in družboslovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.N", " Ft.Z.N.S.N - zvrst-neumetnostna-strokovna-naravoslovna in tehnična");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.N", " Ft.Z.N.N - zvrst-neumetnostna-nestrokovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.P", " Ft.Z.N.P - zvrst-neumetnostna-pravna");
GIGAFIDA_TAXONOMY.put("Ft.L", "Ft.L - zvrst-lektorirano");
GIGAFIDA_TAXONOMY.put("Ft.L.D", "Ft.L.D - zvrst-lektorirano-da");
GIGAFIDA_TAXONOMY.put("Ft.L.N", "Ft.L.N - zvrst-lektorirano-ne");
GIGAFIDA_TAXONOMY.put("Ft.L.D", " Ft.L.D - zvrst-lektorirano-da");
GIGAFIDA_TAXONOMY.put("Ft.L.N", " Ft.L.N - zvrst-lektorirano-ne");
// GOS ----------------------------------
GOS_TAXONOMY = new LinkedHashMap<>();
GOS_TAXONOMY.put("gos.T", "gos.T - diskurz");
GOS_TAXONOMY.put("gos.T.J", "gos.T.J - diskurz-javni");
GOS_TAXONOMY.put("gos.T.J.I", "gos.T.J.I - diskurz-javni-informativno-izobraževalni");
GOS_TAXONOMY.put("gos.T.J.R", "gos.T.J.R - diskurz-javni-razvedrilni");
GOS_TAXONOMY.put("gos.T.N", "gos.T.N - diskurz-nejavni");
GOS_TAXONOMY.put("gos.T.N.N", "gos.T.N.N - diskurz-nejavni-nezasebni");
GOS_TAXONOMY.put("gos.T.N.Z", "gos.T.N.Z - diskurz-nejavni-zasebni");
GOS_TAXONOMY.put("gos.T.J", " gos.T.J - diskurz-javni");
GOS_TAXONOMY.put("gos.T.J.I", " gos.T.J.I - diskurz-javni-informativno-izobraževalni");
GOS_TAXONOMY.put("gos.T.J.R", " gos.T.J.R - diskurz-javni-razvedrilni");
GOS_TAXONOMY.put("gos.T.N", " gos.T.N - diskurz-nejavni");
GOS_TAXONOMY.put("gos.T.N.N", " gos.T.N.N - diskurz-nejavni-nezasebni");
GOS_TAXONOMY.put("gos.T.N.Z", " gos.T.N.Z - diskurz-nejavni-zasebni");
GOS_TAXONOMY.put("gos.S", "gos.S - situacija");
GOS_TAXONOMY.put("gos.S.R", "gos.S.R - situacija-radio");
GOS_TAXONOMY.put("gos.S.T", "gos.S.T - situacija-televizija");
GOS_TAXONOMY.put("gos.S.R", " gos.S.R - situacija-radio");
GOS_TAXONOMY.put("gos.S.T", " gos.S.T - situacija-televizija");
GOS_TAXONOMY.put("gos.K", "gos.K - kanal");
GOS_TAXONOMY.put("gos.K.O", "gos.K.O - kanal-osebni stik");
GOS_TAXONOMY.put("gos.K.P", "gos.K.P - kanal-telefon");
GOS_TAXONOMY.put("gos.K.R", "gos.K.R - kanal-radio");
GOS_TAXONOMY.put("gos.K.T", "gos.K.T - kanal-televizija");
GOS_TAXONOMY.put("gos.K.O", " gos.K.O - kanal-osebni stik");
GOS_TAXONOMY.put("gos.K.P", " gos.K.P - kanal-telefon");
GOS_TAXONOMY.put("gos.K.R", " gos.K.R - kanal-radio");
GOS_TAXONOMY.put("gos.K.T", " gos.K.T - kanal-televizija");
}
/**
@ -112,6 +112,19 @@ public class Tax {
ArrayList<String> taxForCombo = new ArrayList<>();
// adds parents taxonomy as well
HashSet<String> genFoundTax = new HashSet<>();
for(String e : foundTax){
String[] elList = e.split("\\.");
for(int i = 1; i < elList.length - 1; i++){
String candidate = String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i));
genFoundTax.add(candidate);
}
}
foundTax.addAll(genFoundTax);
// assures same relative order
for (String t : tax.keySet()) {
if (foundTax.contains(t)) {
@ -126,7 +139,7 @@ public class Tax {
return corpusTypesWithTaxonomy;
}
public static ArrayList<String> getTaxonomyCodes(ArrayList<String> taxonomyNames, CorpusType corpusType) {
public static ArrayList<String> getTaxonomyCodes(ArrayList<Taxonomy> taxonomyNames, CorpusType corpusType) {
ArrayList<String> result = new ArrayList<>();
if (ValidationUtil.isEmpty(taxonomyNames)) {
@ -146,8 +159,8 @@ public class Tax {
.stream()
.collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
for (String taxonomyName : taxonomyNames) {
result.add(taxInversed.get(taxonomyName));
for (Taxonomy taxonomyName : taxonomyNames) {
result.add(taxInversed.get(taxonomyName.toString()));
}
return result;
@ -188,7 +201,7 @@ public class Tax {
*
* @return
*/
public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<String> taxonomy) {
public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<Taxonomy> taxonomy) {
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
@ -199,8 +212,8 @@ public class Tax {
ArrayList<String> result = new ArrayList<>();
for (String t : taxonomy) {
result.add(tax.get(t));
for (Taxonomy t : taxonomy) {
result.add(tax.get(t.toString()));
}
return result;

View File

@ -1,171 +1,749 @@
package data;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.stream.Collectors;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
public enum Taxonomy {
TOTAL("Total", "Total"),
// GOS
JAVNI("javni", "T.J", "gos"),
INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "T.J.I", "gos"),
RAZVEDRILNI("razvedrilni", "T.J.R", "gos"),
NEJAVNI("nejavni", "T.N", "gos"),
NEZASEBNI("nezasebni", "T.N.N", "gos"),
ZASEBNI("zasebni", "T.N.Z", "gos"),
OSEBNI_STIK("osebni stik", "K.O", "gos"),
TELEFON("telefon", "K.P", "gos"),
RADIO("radio", "K.R", "gos"),
TELEVIZIJA("televizija", "K.T", "gos"),
DISKURZ("gos.T", "gos.T - diskurz"),
DISKURZ_JAVNI("gos.T.J", " gos.T.J - diskurz-javni"),
DISKURZ_INFORMATIVNO_IZOBRAZEVALNI("gos.T.J.I", " gos.T.J.I - diskurz-javni-informativno-izobraževalni"),
DISKURZ_RAZVEDRILNI("gos.T.J.R", " gos.T.J.R - diskurz-javni-razvedrilni"),
DISKURZ_NEJAVNI("gos.T.N", " gos.T.N - diskurz-nejavni"),
DISKURZ_NEZASEBNI("gos.T.N.N", " gos.T.N.N - diskurz-nejavni-nezasebni"),
DISKURZ_ZASEBNI("gos.T.N.Z", " gos.T.N.Z - diskurz-nejavni-zasebni"),
SITUACIJA("gos.S", "gos.S - situacija"),
SITUACIJA_RADIO("gos.S.R", " gos.S.R - situacija-radio"),
SITUACIJA_TELEVIZIJA("gos.S.T", " gos.S.T - situacija-televizija"),
KANAL("gos.K", "gos.K - kanal"),
KANAL_OSEBNI_STIK("gos.K.O", " gos.K.O - kanal-osebni stik"),
KANAL_TELEFON("gos.K.P", " gos.K.P - kanal-telefon"),
KANAL_RADIO("gos.K.R", " gos.K.R - kanal-radio"),
KANAL_TELEVIZIJA("gos.K.T", " gos.K.T - kanal-televizija"),
// Gigafida
KNJIZNO("knjižno", "T.K", "gigafida"),
LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"),
STROKOVNO("strokovno", "T.K.S", "gigafida"),
PERIODICNO("periodično", "T.P", "gigafida"),
CASOPIS("časopis", "T.P.C", "gigafida"),
REVIJA("revija", "T.P.R", "gigafida"),
INTERNET("internet", "I", "gigafida"),
SSJ_TISK("tisk", "SSJ.T", "gigafida"),
SSJ_KNJIZNO("opis", "identifikator", "gigafida"),
SSJ_LEPOSLOVNO("opis", "identifikator", "gigafida"),
SSJ_STROKOVNO("opis", "identifikator", "gigafida"),
SSJ_PERIODICNO("opis", "identifikator", "gigafida"),
SSJ_CASOPIS("opis", "identifikator", "gigafida"),
SSJ_REVIJA("opis", "identifikator", "gigafida"),
SSJ_DRUGO("opis", "identifikator", "gigafida"),
SSJ_INTERNET("opis", "identifikator", "gigafida"),
FT_P_PRENOSNIK("opis", "identifikator", "gigafida"),
FT_P_GOVORNI("opis", "identifikator", "gigafida"),
FT_P_ELEKTRONSKI("opis", "identifikator", "gigafida"),
FT_P_PISNI("opis", "identifikator", "gigafida"),
FT_P_OBJAVLJENO("opis", "identifikator", "gigafida"),
FT_P_KNJIZNO("opis", "identifikator", "gigafida"),
FT_P_PERIODICNO("opis", "identifikator", "gigafida"),
FT_P_CASOPISNO("opis", "identifikator", "gigafida"),
FT_P_DNEVNO("opis", "identifikator", "gigafida"),
FT_P_VECKRAT_TEDENSKO("opis", "identifikator", "gigafida"),
// FT_P_TEDENSKO("opis", "identifikator", "gigafida"),
FT_P_REVIALNO("opis", "identifikator", "gigafida"),
FT_P_TEDENSKO("opis", "identifikator", "gigafida"),
FT_P_STIRINAJSTDNEVNO("opis", "identifikator", "gigafida"),
FT_P_MESECNO("opis", "identifikator", "gigafida"),
FT_P_REDKEJE_KOT_MESECNO("opis", "identifikator", "gigafida"),
FT_P_OBCASNO("opis", "identifikator", "gigafida"),
FT_P_NEOBJAVLJENO("opis", "identifikator", "gigafida"),
FT_P_JAVNO("opis", "identifikator", "gigafida"),
FT_P_INTERNO("opis", "identifikator", "gigafida"),
FT_P_ZASEBNO("opis", "identifikator", "gigafida"),
FT_ZVRST("opis", "identifikator", "gigafida"),
FT_UMETNOSTNA("opis", "identifikator", "gigafida"),
FT_PESNISKA("opis", "identifikator", "gigafida"),
FT_PROZNA("opis", "identifikator", "gigafida"),
FT_DRAMSKA("opis", "identifikator", "gigafida"),
FT_NEUMETNOSTNA("opis", "identifikator", "gigafida"),
FT_STROKOVNA("opis", "identifikator", "gigafida"),
FT_HID("opis", "identifikator", "gigafida"),
FT_NIT("opis", "identifikator", "gigafida"),
FT_NESTROKOVNA("opis", "identifikator", "gigafida"),
FT_PRAVNA("opis", "identifikator", "gigafida"),
FT_LEKTORIRANO("opis", "identifikator", "gigafida"),
FT_DA("opis", "identifikator", "gigafida"),
FT_NE("opis", "identifikator", "gigafida");
// KNJIZNO("knjižno", "T.K", "gigafida"),
// LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"),
// STROKOVNO("strokovno", "T.K.S", "gigafida"),
// PERIODICNO("periodično", "T.P", "gigafida"),
// CASOPIS("časopis", "T.P.C", "gigafida"),
// REVIJA("revija", "T.P.R", "gigafida"),
// INTERNET("internet", "I", "gigafida"),
SSJ_TISK("SSJ.T", "SSJ.T - tisk"),
SSJ_KNJIZNO("SSJ.T.K", " SSJ.T.K - tisk-knjižno"),
SSJ_LEPOSLOVNO("SSJ.T.K.L", " SSJ.T.K.L - tisk-knjižno-leposlovno"),
SSJ_STROKOVNO("SSJ.T.K.S", " SSJ.T.K.S - tisk-knjižno-strokovno"),
SSJ_PERIODICNO("SSJ.T.P", " SSJ.T.P - tisk-periodično"),
SSJ_CASOPIS("SSJ.T.P.C", " SSJ.T.P.C - tisk-periodično-časopis"),
SSJ_REVIJA("SSJ.T.P.R", " SSJ.T.P.R - tisk-periodično-revija"),
SSJ_DRUGO("SSJ.T.D", " SSJ.T.D - tisk-drugo"),
SSJ_INTERNET("SSJ.I", "SSJ.I - internet"),
FT_P_PRENOSNIK("Ft.P", "Ft.P - prenosnik"),
FT_P_GOVORNI("Ft.P.G", " Ft.P.G - prenosnik-govorni"),
FT_P_ELEKTRONSKI("Ft.P.E", " Ft.P.E - prenosnik-elektronski"),
FT_P_PISNI("Ft.P.P", " Ft.P.P - prenosnik-pisni"),
FT_P_OBJAVLJENO("Ft.P.P.O", " Ft.P.P.O - prenosnik-pisni-objavljeno"),
FT_P_KNJIZNO("Ft.P.P.O.K", " Ft.P.P.O.K - prenosnik-pisni-objavljeno-knjižno"),
FT_P_PERIODICNO("Ft.P.P.O.P", " Ft.P.P.O.P - prenosnik-pisni-objavljeno-periodično"),
FT_P_CASOPISNO("Ft.P.P.O.P.C", " Ft.P.P.O.P.C - prenosnik-pisni-objavljeno-periodično-časopisno"),
FT_P_DNEVNO("Ft.P.P.O.P.C.D", " Ft.P.P.O.P.C.D - prenosnik-pisni-objavljeno-periodično-časopisno-dnevno"),
FT_P_VECKRAT_TEDENSKO("Ft.P.P.O.P.C.V", " Ft.P.P.O.P.C.V - prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko"),
FT_P_CASOPISNO_TEDENSKO("Ft.P.P.O.P.C.T", " Ft.P.P.O.P.C.T - prenosnik-pisni-objavljeno-periodično-časopisno-tedensko"),
FT_P_REVIALNO("Ft.P.P.O.P.R", " Ft.P.P.O.P.R - prenosnik-pisni-objavljeno-periodično-revialno"),
FT_P_TEDENSKO("Ft.P.P.O.P.R.T", " Ft.P.P.O.P.R.T - prenosnik-pisni-objavljeno-periodično-revialno-tedensko"),
FT_P_STIRINAJSTDNEVNO("Ft.P.P.O.P.R.S", " Ft.P.P.O.P.R.S - prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno"),
FT_P_MESECNO("Ft.P.P.O.P.R.M", " Ft.P.P.O.P.R.M - prenosnik-pisni-objavljeno-periodično-revialno-mesečno"),
FT_P_REDKEJE_KOT_MESECNO("Ft.P.P.O.P.R.D", " Ft.P.P.O.P.R.D - prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec"),
FT_P_OBCASNO("Ft.P.P.O.P.R.O", " Ft.P.P.O.P.R.O - prenosnik-pisni-objavljeno-periodično-revialno-občasno"),
FT_P_NEOBJAVLJENO("Ft.P.P.N", " Ft.P.P.N - prenosnik-pisni-neobjavljeno"),
FT_P_JAVNO("Ft.P.P.N.J", " Ft.P.P.N.J - prenosnik-pisni-neobjavljeno-javno"),
FT_P_INTERNO("Ft.P.P.N.I", " Ft.P.P.N.I - prenosnik-pisni-neobjavljeno-interno"),
FT_P_ZASEBNO("Ft.P.P.N.Z", " Ft.P.P.N.Z - prenosnik-pisni-neobjavljeno-zasebno"),
FT_ZVRST("Ft.Z", "Ft.Z - zvrst"),
FT_UMETNOSTNA("Ft.Z.U", " Ft.Z.U - zvrst-umetnostna"),
FT_PESNISKA("Ft.Z.U.P", " Ft.Z.U.P - zvrst-umetnostna-pesniška"),
FT_PROZNA("Ft.Z.U.R", " Ft.Z.U.R - zvrst-umetnostna-prozna"),
FT_DRAMSKA("Ft.Z.U.D", " Ft.Z.U.D - zvrst-umetnostna-dramska"),
FT_NEUMETNOSTNA("Ft.Z.N", " Ft.Z.N - zvrst-neumetnostna"),
FT_STROKOVNA("Ft.Z.N.S", " Ft.Z.N.S - zvrst-neumetnostna-strokovna"),
FT_HID("Ft.Z.N.S.H", " Ft.Z.N.S.H - zvrst-neumetnostna-strokovna-humanistična in družboslovna"),
FT_NIT("Ft.Z.N.S.N", " Ft.Z.N.S.N - zvrst-neumetnostna-strokovna-naravoslovna in tehnična"),
FT_NESTROKOVNA("Ft.Z.N.N", " Ft.Z.N.N - zvrst-neumetnostna-nestrokovna"),
FT_PRAVNA("Ft.Z.N.P", " Ft.Z.N.P - zvrst-neumetnostna-pravna"),
FT_LEKTORIRANO("Ft.L", "Ft.L - zvrst-lektorirano"),
FT_DA("Ft.L.D", " Ft.L.D - zvrst-lektorirano-da"),
FT_NE("Ft.L.N", " Ft.L.N - zvrst-lektorirano-ne");
private final String name;
private final String taxonomy;
private final String corpus;
private final String longName;
Taxonomy(String name, String taxonomy, String corpusType) {
Taxonomy(String name, String longName) {
this.name = name;
this.taxonomy = taxonomy;
this.corpus = corpusType;
this.longName = longName;
}
public String toString() {
return this.name;
}
public String getTaxonomnyString() {
return this.taxonomy;
public String toLongNameString() {
return this.longName;
}
public static Taxonomy factory(String tax) {
if (tax != null) {
// GOS
if (JAVNI.toString().equals(tax)) {
return JAVNI;
if (DISKURZ.toString().equals(tax)) {
return DISKURZ;
}
if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
return INFORMATIVNO_IZOBRAZEVALNI;
if (DISKURZ_JAVNI.toString().equals(tax)) {
return DISKURZ_JAVNI;
}
if (RAZVEDRILNI.toString().equals(tax)) {
return RAZVEDRILNI;
if (DISKURZ_INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
return DISKURZ_INFORMATIVNO_IZOBRAZEVALNI;
}
if (NEJAVNI.toString().equals(tax)) {
return NEJAVNI;
if (DISKURZ_RAZVEDRILNI.toString().equals(tax)) {
return DISKURZ_RAZVEDRILNI;
}
if (NEZASEBNI.toString().equals(tax)) {
return NEZASEBNI;
if (DISKURZ_NEJAVNI.toString().equals(tax)) {
return DISKURZ_NEJAVNI;
}
if (ZASEBNI.toString().equals(tax)) {
return ZASEBNI;
if (DISKURZ_NEZASEBNI.toString().equals(tax)) {
return DISKURZ_NEZASEBNI;
}
if (OSEBNI_STIK.toString().equals(tax)) {
return OSEBNI_STIK;
if (DISKURZ_ZASEBNI.toString().equals(tax)) {
return DISKURZ_ZASEBNI;
}
if (TELEFON.toString().equals(tax)) {
return TELEFON;
if (SITUACIJA.toString().equals(tax)) {
return SITUACIJA;
}
if (RADIO.toString().equals(tax)) {
return RADIO;
if (SITUACIJA_RADIO.toString().equals(tax)) {
return SITUACIJA_RADIO;
}
if (TELEVIZIJA.toString().equals(tax)) {
return TELEVIZIJA;
if (SITUACIJA_TELEVIZIJA.toString().equals(tax)) {
return SITUACIJA_TELEVIZIJA;
}
if (KANAL.toString().equals(tax)) {
return KANAL;
}
if (KANAL_OSEBNI_STIK.toString().equals(tax)) {
return KANAL_OSEBNI_STIK;
}
if (KANAL_TELEFON.toString().equals(tax)) {
return KANAL_TELEFON;
}
if (KANAL_RADIO.toString().equals(tax)) {
return KANAL_RADIO;
}
if (KANAL_TELEVIZIJA.toString().equals(tax)) {
return KANAL_TELEVIZIJA;
}
// Gigafida
// if (TISK.toString().equals(tax)) {
// return TISK;
// }
if (KNJIZNO.toString().equals(tax)) {
return KNJIZNO;
if (SSJ_TISK.toString().equals(tax)) {
return SSJ_TISK;
}
if (LEPOSLOVNO.toString().equals(tax)) {
return LEPOSLOVNO;
if (SSJ_KNJIZNO.toString().equals(tax)) {
return SSJ_KNJIZNO;
}
if (STROKOVNO.toString().equals(tax)) {
return STROKOVNO;
if (SSJ_LEPOSLOVNO.toString().equals(tax)) {
return SSJ_LEPOSLOVNO;
}
if (PERIODICNO.toString().equals(tax)) {
return PERIODICNO;
if (SSJ_STROKOVNO.toString().equals(tax)) {
return SSJ_STROKOVNO;
}
if (CASOPIS.toString().equals(tax)) {
return CASOPIS;
if (SSJ_PERIODICNO.toString().equals(tax)) {
return SSJ_PERIODICNO;
}
if (REVIJA.toString().equals(tax)) {
return REVIJA;
if (SSJ_CASOPIS.toString().equals(tax)) {
return SSJ_CASOPIS;
}
if (INTERNET.toString().equals(tax)) {
return INTERNET;
if (SSJ_REVIJA.toString().equals(tax)) {
return SSJ_REVIJA;
}
if (SSJ_DRUGO.toString().equals(tax)) {
return SSJ_DRUGO;
}
if (SSJ_INTERNET.toString().equals(tax)) {
return SSJ_INTERNET;
}
if (FT_P_PRENOSNIK.toString().equals(tax)) {
return FT_P_PRENOSNIK;
}
if (FT_P_GOVORNI.toString().equals(tax)) {
return FT_P_GOVORNI;
}
if (FT_P_ELEKTRONSKI.toString().equals(tax)) {
return FT_P_ELEKTRONSKI;
}
if (FT_P_PISNI.toString().equals(tax)) {
return FT_P_PISNI;
}
if (FT_P_OBJAVLJENO.toString().equals(tax)) {
return FT_P_OBJAVLJENO;
}
if (FT_P_KNJIZNO.toString().equals(tax)) {
return FT_P_KNJIZNO;
}
if (FT_P_PERIODICNO.toString().equals(tax)) {
return FT_P_PERIODICNO;
}
if (FT_P_CASOPISNO.toString().equals(tax)) {
return FT_P_CASOPISNO;
}
if (FT_P_DNEVNO.toString().equals(tax)) {
return FT_P_DNEVNO;
}
if (FT_P_VECKRAT_TEDENSKO.toString().equals(tax)) {
return FT_P_VECKRAT_TEDENSKO;
}
if (FT_P_CASOPISNO_TEDENSKO.toString().equals(tax)) {
return FT_P_CASOPISNO_TEDENSKO;
}
if (FT_P_REVIALNO.toString().equals(tax)) {
return FT_P_REVIALNO;
}
if (FT_P_TEDENSKO.toString().equals(tax)) {
return FT_P_TEDENSKO;
}
if (FT_P_STIRINAJSTDNEVNO.toString().equals(tax)) {
return FT_P_STIRINAJSTDNEVNO;
}
if (FT_P_MESECNO.toString().equals(tax)) {
return FT_P_MESECNO;
}
if (FT_P_REDKEJE_KOT_MESECNO.toString().equals(tax)) {
return FT_P_REDKEJE_KOT_MESECNO;
}
if (FT_P_OBCASNO.toString().equals(tax)) {
return FT_P_OBCASNO;
}
if (FT_P_NEOBJAVLJENO.toString().equals(tax)) {
return FT_P_NEOBJAVLJENO;
}
if (FT_P_JAVNO.toString().equals(tax)) {
return FT_P_JAVNO;
}
if (FT_P_INTERNO.toString().equals(tax)) {
return FT_P_INTERNO;
}
if (FT_P_ZASEBNO.toString().equals(tax)) {
return FT_P_ZASEBNO;
}
if (FT_ZVRST.toString().equals(tax)) {
return FT_ZVRST;
}
if (FT_UMETNOSTNA.toString().equals(tax)) {
return FT_UMETNOSTNA;
}
if (FT_PESNISKA.toString().equals(tax)) {
return FT_PESNISKA;
}
if (FT_PROZNA.toString().equals(tax)) {
return FT_PROZNA;
}
if (FT_DRAMSKA.toString().equals(tax)) {
return FT_DRAMSKA;
}
if (FT_NEUMETNOSTNA.toString().equals(tax)) {
return FT_NEUMETNOSTNA;
}
if (FT_STROKOVNA.toString().equals(tax)) {
return FT_STROKOVNA;
}
if (FT_NIT.toString().equals(tax)) {
return FT_NIT;
}
if (FT_HID.toString().equals(tax)) {
return FT_HID;
}
if (FT_NESTROKOVNA.toString().equals(tax)) {
return FT_NESTROKOVNA;
}
if (FT_PRAVNA.toString().equals(tax)) {
return FT_PRAVNA;
}
if (FT_LEKTORIRANO.toString().equals(tax)) {
return FT_LEKTORIRANO;
}
if (FT_DA.toString().equals(tax)) {
return FT_DA;
}
if (FT_NE.toString().equals(tax)) {
return FT_NE;
}
}
return null;
}
public static ObservableList<String> getDefaultForComboBox(String corpusType) {
ArrayList<String> values = Arrays.stream(Taxonomy.values())
.filter(x -> x.corpus.equals(corpusType))
.map(x -> x.name)
.collect(Collectors.toCollection(ArrayList::new));
return FXCollections.observableArrayList(values);
public static Taxonomy factoryLongName(String tax) {
if (tax != null) {
// GOS
if (DISKURZ.toLongNameString().equals(tax)) {
return DISKURZ;
}
if (DISKURZ_JAVNI.toLongNameString().equals(tax)) {
return DISKURZ_JAVNI;
}
if (DISKURZ_INFORMATIVNO_IZOBRAZEVALNI.toLongNameString().equals(tax)) {
return DISKURZ_INFORMATIVNO_IZOBRAZEVALNI;
}
if (DISKURZ_RAZVEDRILNI.toLongNameString().equals(tax)) {
return DISKURZ_RAZVEDRILNI;
}
if (DISKURZ_NEJAVNI.toLongNameString().equals(tax)) {
return DISKURZ_NEJAVNI;
}
if (DISKURZ_NEZASEBNI.toLongNameString().equals(tax)) {
return DISKURZ_NEZASEBNI;
}
if (DISKURZ_ZASEBNI.toLongNameString().equals(tax)) {
return DISKURZ_ZASEBNI;
}
if (SITUACIJA.toLongNameString().equals(tax)) {
return SITUACIJA;
}
if (SITUACIJA_RADIO.toLongNameString().equals(tax)) {
return SITUACIJA_RADIO;
}
if (SITUACIJA_TELEVIZIJA.toLongNameString().equals(tax)) {
return SITUACIJA_TELEVIZIJA;
}
if (KANAL.toLongNameString().equals(tax)) {
return KANAL;
}
if (KANAL_OSEBNI_STIK.toLongNameString().equals(tax)) {
return KANAL_OSEBNI_STIK;
}
if (KANAL_TELEFON.toLongNameString().equals(tax)) {
return KANAL_TELEFON;
}
if (KANAL_RADIO.toLongNameString().equals(tax)) {
return KANAL_RADIO;
}
if (KANAL_TELEVIZIJA.toLongNameString().equals(tax)) {
return KANAL_TELEVIZIJA;
}
public static ObservableList<String> getDefaultForComboBox(CorpusType corpusType) {
return getDefaultForComboBox(corpusType.toString());
// Gigafida
// if (TISK.toString().equals(tax)) {
// return TISK;
// }
if (SSJ_TISK.toLongNameString().equals(tax)) {
return SSJ_TISK;
}
if (SSJ_KNJIZNO.toLongNameString().equals(tax)) {
return SSJ_KNJIZNO;
}
if (SSJ_LEPOSLOVNO.toLongNameString().equals(tax)) {
return SSJ_LEPOSLOVNO;
}
if (SSJ_STROKOVNO.toLongNameString().equals(tax)) {
return SSJ_STROKOVNO;
}
if (SSJ_PERIODICNO.toLongNameString().equals(tax)) {
return SSJ_PERIODICNO;
}
if (SSJ_CASOPIS.toLongNameString().equals(tax)) {
return SSJ_CASOPIS;
}
if (SSJ_REVIJA.toLongNameString().equals(tax)) {
return SSJ_REVIJA;
}
if (SSJ_DRUGO.toLongNameString().equals(tax)) {
return SSJ_DRUGO;
}
if (SSJ_INTERNET.toLongNameString().equals(tax)) {
return SSJ_INTERNET;
}
if (FT_P_PRENOSNIK.toLongNameString().equals(tax)) {
return FT_P_PRENOSNIK;
}
if (FT_P_GOVORNI.toLongNameString().equals(tax)) {
return FT_P_GOVORNI;
}
if (FT_P_ELEKTRONSKI.toLongNameString().equals(tax)) {
return FT_P_ELEKTRONSKI;
}
if (FT_P_PISNI.toLongNameString().equals(tax)) {
return FT_P_PISNI;
}
if (FT_P_OBJAVLJENO.toLongNameString().equals(tax)) {
return FT_P_OBJAVLJENO;
}
if (FT_P_KNJIZNO.toLongNameString().equals(tax)) {
return FT_P_KNJIZNO;
}
if (FT_P_PERIODICNO.toLongNameString().equals(tax)) {
return FT_P_PERIODICNO;
}
if (FT_P_CASOPISNO.toLongNameString().equals(tax)) {
return FT_P_CASOPISNO;
}
if (FT_P_DNEVNO.toLongNameString().equals(tax)) {
return FT_P_DNEVNO;
}
if (FT_P_VECKRAT_TEDENSKO.toLongNameString().equals(tax)) {
return FT_P_VECKRAT_TEDENSKO;
}
if (FT_P_CASOPISNO_TEDENSKO.toLongNameString().equals(tax)) {
return FT_P_CASOPISNO_TEDENSKO;
}
if (FT_P_REVIALNO.toLongNameString().equals(tax)) {
return FT_P_REVIALNO;
}
if (FT_P_TEDENSKO.toLongNameString().equals(tax)) {
return FT_P_TEDENSKO;
}
if (FT_P_STIRINAJSTDNEVNO.toLongNameString().equals(tax)) {
return FT_P_STIRINAJSTDNEVNO;
}
if (FT_P_MESECNO.toLongNameString().equals(tax)) {
return FT_P_MESECNO;
}
if (FT_P_REDKEJE_KOT_MESECNO.toLongNameString().equals(tax)) {
return FT_P_REDKEJE_KOT_MESECNO;
}
if (FT_P_OBCASNO.toLongNameString().equals(tax)) {
return FT_P_OBCASNO;
}
if (FT_P_NEOBJAVLJENO.toLongNameString().equals(tax)) {
return FT_P_NEOBJAVLJENO;
}
if (FT_P_JAVNO.toLongNameString().equals(tax)) {
return FT_P_JAVNO;
}
if (FT_P_INTERNO.toLongNameString().equals(tax)) {
return FT_P_INTERNO;
}
if (FT_P_ZASEBNO.toLongNameString().equals(tax)) {
return FT_P_ZASEBNO;
}
if (FT_ZVRST.toLongNameString().equals(tax)) {
return FT_ZVRST;
}
if (FT_UMETNOSTNA.toLongNameString().equals(tax)) {
return FT_UMETNOSTNA;
}
if (FT_PESNISKA.toLongNameString().equals(tax)) {
return FT_PESNISKA;
}
if (FT_PROZNA.toLongNameString().equals(tax)) {
return FT_PROZNA;
}
if (FT_DRAMSKA.toLongNameString().equals(tax)) {
return FT_DRAMSKA;
}
if (FT_NEUMETNOSTNA.toLongNameString().equals(tax)) {
return FT_NEUMETNOSTNA;
}
if (FT_STROKOVNA.toLongNameString().equals(tax)) {
return FT_STROKOVNA;
}
if (FT_NIT.toLongNameString().equals(tax)) {
return FT_NIT;
}
if (FT_HID.toLongNameString().equals(tax)) {
return FT_HID;
}
if (FT_NESTROKOVNA.toLongNameString().equals(tax)) {
return FT_NESTROKOVNA;
}
if (FT_PRAVNA.toLongNameString().equals(tax)) {
return FT_PRAVNA;
}
if (FT_LEKTORIRANO.toLongNameString().equals(tax)) {
return FT_LEKTORIRANO;
}
if (FT_DA.toLongNameString().equals(tax)) {
return FT_DA;
}
if (FT_NE.toLongNameString().equals(tax)) {
return FT_NE;
}
}
return null;
}
public static ArrayList<Taxonomy> taxonomySelected(Taxonomy disjointTaxonomy) {
ArrayList<Taxonomy> r = new ArrayList<>();
System.out.println(disjointTaxonomy);
if(disjointTaxonomy.equals(DISKURZ)){
r.add(DISKURZ_JAVNI);
r.add(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI);
r.add(DISKURZ_RAZVEDRILNI);
r.add(DISKURZ_NEJAVNI);
r.add(DISKURZ_NEZASEBNI);
r.add(DISKURZ_ZASEBNI);
} else if(disjointTaxonomy.equals(DISKURZ_JAVNI)){
r.add(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI);
r.add(DISKURZ_RAZVEDRILNI);
} else if(disjointTaxonomy.equals(DISKURZ_NEJAVNI)){
r.add(DISKURZ_NEZASEBNI);
r.add(DISKURZ_ZASEBNI);
} else if(disjointTaxonomy.equals(SITUACIJA)){
r.add(SITUACIJA_RADIO);
r.add(SITUACIJA_TELEVIZIJA);
} else if(disjointTaxonomy.equals(KANAL)){
r.add(KANAL_OSEBNI_STIK);
r.add(KANAL_RADIO);
r.add(KANAL_TELEVIZIJA);
} else if(disjointTaxonomy.equals(SSJ_TISK)){
r.add(SSJ_KNJIZNO);
r.add(SSJ_LEPOSLOVNO);
r.add(SSJ_STROKOVNO);
r.add(SSJ_PERIODICNO);
r.add(SSJ_CASOPIS);
r.add(SSJ_REVIJA);
r.add(SSJ_DRUGO);
} else if(disjointTaxonomy.equals(SSJ_KNJIZNO)){
r.add(SSJ_LEPOSLOVNO);
r.add(SSJ_STROKOVNO);
} else if(disjointTaxonomy.equals(SSJ_PERIODICNO)){
r.add(SSJ_CASOPIS);
r.add(SSJ_REVIJA);
} else if(disjointTaxonomy.equals(FT_P_PRENOSNIK)){
r.add(FT_P_GOVORNI);
r.add(FT_P_ELEKTRONSKI);
r.add(FT_P_PISNI);
r.add(FT_P_OBJAVLJENO);
r.add(FT_P_KNJIZNO);
r.add(FT_P_PERIODICNO);
r.add(FT_P_CASOPISNO);
r.add(FT_P_DNEVNO);
r.add(FT_P_VECKRAT_TEDENSKO);
r.add(FT_P_CASOPISNO_TEDENSKO);
r.add(FT_P_REVIALNO);
r.add(FT_P_TEDENSKO);
r.add(FT_P_STIRINAJSTDNEVNO);
r.add(FT_P_MESECNO);
r.add(FT_P_REDKEJE_KOT_MESECNO);
r.add(FT_P_OBCASNO);
r.add(FT_P_NEOBJAVLJENO);
r.add(FT_P_JAVNO);
r.add(FT_P_INTERNO);
r.add(FT_P_ZASEBNO);
} else if(disjointTaxonomy.equals(FT_P_PISNI)){
r.add(FT_P_OBJAVLJENO);
r.add(FT_P_KNJIZNO);
r.add(FT_P_PERIODICNO);
r.add(FT_P_CASOPISNO);
r.add(FT_P_DNEVNO);
r.add(FT_P_VECKRAT_TEDENSKO);
r.add(FT_P_CASOPISNO_TEDENSKO);
r.add(FT_P_REVIALNO);
r.add(FT_P_TEDENSKO);
r.add(FT_P_STIRINAJSTDNEVNO);
r.add(FT_P_MESECNO);
r.add(FT_P_REDKEJE_KOT_MESECNO);
r.add(FT_P_OBCASNO);
r.add(FT_P_NEOBJAVLJENO);
r.add(FT_P_JAVNO);
r.add(FT_P_INTERNO);
r.add(FT_P_ZASEBNO);
} else if(disjointTaxonomy.equals(FT_P_OBJAVLJENO)){
r.add(FT_P_KNJIZNO);
r.add(FT_P_PERIODICNO);
r.add(FT_P_CASOPISNO);
r.add(FT_P_DNEVNO);
r.add(FT_P_VECKRAT_TEDENSKO);
r.add(FT_P_CASOPISNO_TEDENSKO);
r.add(FT_P_REVIALNO);
r.add(FT_P_TEDENSKO);
r.add(FT_P_STIRINAJSTDNEVNO);
r.add(FT_P_MESECNO);
r.add(FT_P_REDKEJE_KOT_MESECNO);
r.add(FT_P_OBCASNO);
} else if(disjointTaxonomy.equals(FT_P_PERIODICNO)){
r.add(FT_P_CASOPISNO);
r.add(FT_P_DNEVNO);
r.add(FT_P_VECKRAT_TEDENSKO);
r.add(FT_P_CASOPISNO_TEDENSKO);
r.add(FT_P_REVIALNO);
r.add(FT_P_TEDENSKO);
r.add(FT_P_STIRINAJSTDNEVNO);
r.add(FT_P_MESECNO);
r.add(FT_P_REDKEJE_KOT_MESECNO);
r.add(FT_P_OBCASNO);
} else if(disjointTaxonomy.equals(FT_P_CASOPISNO)){
r.add(FT_P_DNEVNO);
r.add(FT_P_VECKRAT_TEDENSKO);
r.add(FT_P_CASOPISNO_TEDENSKO);
} else if(disjointTaxonomy.equals(FT_P_REVIALNO)) {
r.add(FT_P_TEDENSKO);
r.add(FT_P_STIRINAJSTDNEVNO);
r.add(FT_P_MESECNO);
r.add(FT_P_REDKEJE_KOT_MESECNO);
r.add(FT_P_OBCASNO);
} else if(disjointTaxonomy.equals(FT_P_NEOBJAVLJENO)){
r.add(FT_P_JAVNO);
r.add(FT_P_INTERNO);
r.add(FT_P_ZASEBNO);
} else if(disjointTaxonomy.equals(FT_ZVRST)){
r.add(FT_UMETNOSTNA);
r.add(FT_PESNISKA);
r.add(FT_PROZNA);
r.add(FT_DRAMSKA);
r.add(FT_NEUMETNOSTNA);
r.add(FT_STROKOVNA);
r.add(FT_HID);
r.add(FT_NIT);
r.add(FT_NESTROKOVNA);
r.add(FT_PRAVNA);
} else if(disjointTaxonomy.equals(FT_UMETNOSTNA)){
r.add(FT_PESNISKA);
r.add(FT_PROZNA);
r.add(FT_DRAMSKA);
} else if(disjointTaxonomy.equals(FT_NEUMETNOSTNA)){
r.add(FT_STROKOVNA);
r.add(FT_HID);
r.add(FT_NIT);
r.add(FT_NESTROKOVNA);
r.add(FT_PRAVNA);
} else if(disjointTaxonomy.equals(FT_STROKOVNA)){
r.add(FT_HID);
r.add(FT_NIT);
} else if(disjointTaxonomy.equals(FT_LEKTORIRANO)){
r.add(FT_DA);
r.add(FT_NE);
}
return r;
}
public static ArrayList<Taxonomy> taxonomyDeselected(Taxonomy disjointTaxonomy){
ArrayList<Taxonomy> r = new ArrayList<>();
Map<Taxonomy, Taxonomy> connections = new ConcurrentHashMap<>();
connections.put(DISKURZ_JAVNI, DISKURZ);
connections.put(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI, DISKURZ_JAVNI);
connections.put(DISKURZ_RAZVEDRILNI, DISKURZ_JAVNI);
connections.put(DISKURZ_NEJAVNI, DISKURZ);
connections.put(DISKURZ_NEZASEBNI, DISKURZ_NEJAVNI);
connections.put(DISKURZ_ZASEBNI, DISKURZ_NEJAVNI);
connections.put(SITUACIJA_RADIO, SITUACIJA);
connections.put(SITUACIJA_TELEVIZIJA, SITUACIJA);
connections.put(KANAL_OSEBNI_STIK, KANAL);
connections.put(KANAL_TELEFON, KANAL);
connections.put(KANAL_RADIO, KANAL);
connections.put(KANAL_TELEVIZIJA, KANAL);
connections.put(SSJ_KNJIZNO, SSJ_TISK);
connections.put(SSJ_LEPOSLOVNO, SSJ_KNJIZNO);
connections.put(SSJ_STROKOVNO, SSJ_KNJIZNO);
connections.put(SSJ_DRUGO, SSJ_TISK);
connections.put(FT_P_GOVORNI, FT_P_PRENOSNIK);
connections.put(FT_P_ELEKTRONSKI, FT_P_PRENOSNIK);
connections.put(FT_P_PISNI, FT_P_PRENOSNIK);
connections.put(FT_P_OBJAVLJENO, FT_P_PISNI);
connections.put(FT_P_KNJIZNO, FT_P_OBJAVLJENO);
connections.put(FT_P_PERIODICNO, FT_P_OBJAVLJENO);
connections.put(FT_P_CASOPISNO, FT_P_OBJAVLJENO);
connections.put(FT_P_DNEVNO, FT_P_CASOPISNO);
connections.put(FT_P_VECKRAT_TEDENSKO, FT_P_CASOPISNO);
connections.put(FT_P_CASOPISNO_TEDENSKO, FT_P_CASOPISNO);
connections.put(FT_P_REVIALNO, FT_P_PERIODICNO);
connections.put(FT_P_TEDENSKO, FT_P_REVIALNO);
connections.put(FT_P_STIRINAJSTDNEVNO, FT_P_REVIALNO);
connections.put(FT_P_MESECNO, FT_P_REVIALNO);
connections.put(FT_P_REDKEJE_KOT_MESECNO, FT_P_REVIALNO);
connections.put(FT_P_OBCASNO, FT_P_REVIALNO);
connections.put(FT_P_NEOBJAVLJENO, FT_P_PISNI);
connections.put(FT_P_JAVNO, FT_P_NEOBJAVLJENO);
connections.put(FT_P_INTERNO, FT_P_NEOBJAVLJENO);
connections.put(FT_P_ZASEBNO, FT_P_NEOBJAVLJENO);
connections.put(FT_UMETNOSTNA, FT_ZVRST);
connections.put(FT_PESNISKA, FT_UMETNOSTNA);
connections.put(FT_PROZNA, FT_UMETNOSTNA);
connections.put(FT_DRAMSKA, FT_UMETNOSTNA);
connections.put(FT_NEUMETNOSTNA, FT_ZVRST);
connections.put(FT_STROKOVNA, FT_NEUMETNOSTNA);
connections.put(FT_HID, FT_STROKOVNA);
connections.put(FT_NIT, FT_STROKOVNA);
connections.put(FT_NESTROKOVNA, FT_NEUMETNOSTNA);
connections.put(FT_PRAVNA, FT_NEUMETNOSTNA);
connections.put(FT_DA, FT_LEKTORIRANO);
connections.put(FT_NE, FT_LEKTORIRANO);
Taxonomy currentTaxonomy = disjointTaxonomy;
r.add(currentTaxonomy);
while(connections.containsKey(currentTaxonomy)){
currentTaxonomy = connections.get(currentTaxonomy);
r.add(currentTaxonomy);
}
Collections.reverse(r);
return r;
}
public static ArrayList<Taxonomy> convertStringListToTaxonomyList(ObservableList<String> stringList){
ArrayList<Taxonomy> taxonomyList = new ArrayList<>();
// System.out.println("INTERESTING STUFF");
// System.out.println(stringList);
for (String e : stringList) {
taxonomyList.add(factoryLongName(e));
}
// System.out.println(taxonomyList);
// System.out.println("-----------------");
return taxonomyList;
}
public static void modifyingTaxonomy(ArrayList<Taxonomy> taxonomy, ArrayList<Taxonomy> checkedItemsTaxonomy, Corpus corpus){
// get taxonomies that were selected/deselected by user
Set<Taxonomy> disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy);
if (taxonomy != null) {
disjointTaxonomies.addAll(taxonomy);
for (Taxonomy s : checkedItemsTaxonomy) {
if (taxonomy.contains(s)) {
disjointTaxonomies.remove(s);
}
}
}
// remove previously selected items plus remove taxonomies that are not presented in current setup
ArrayList<Taxonomy> disArr = new ArrayList<>(disjointTaxonomies);
int i = 0;
while(i < disArr.size()){
Taxonomy s = disArr.get(i);
if(!Taxonomy.convertStringListToTaxonomyList(corpus.getTaxonomy()).contains(s)){
disjointTaxonomies.remove(s);
disArr.remove(s);
// taxonomy.remove(s);
i--;
}
i++;
}
if (disjointTaxonomies.size() > 0) {
Taxonomy disjointTaxonomy = disjointTaxonomies.iterator().next();
// taxonomy was selected
if (checkedItemsTaxonomy.contains(disjointTaxonomy)) {
ArrayList<Taxonomy> addTaxonomies = Taxonomy.taxonomySelected(disjointTaxonomy);
checkedItemsTaxonomy.addAll(addTaxonomies);
} else if (taxonomy.contains(disjointTaxonomy)) {
ArrayList<Taxonomy> removeTaxonomies = Taxonomy.taxonomyDeselected(disjointTaxonomy);
checkedItemsTaxonomy.removeAll(removeTaxonomies);
}
}
}
}

View File

@ -45,7 +45,7 @@ public class CharacterAnalysisTab {
@FXML
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
private ArrayList<Taxonomy> taxonomy;
@FXML
private CheckBox displayTaxonomyChB;
@ -183,11 +183,33 @@ public class CharacterAnalysisTab {
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>();
taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
});
taxonomyCCB.getCheckModel().clearChecks();
} else {
@ -313,75 +335,75 @@ public class CharacterAnalysisTab {
* iscvv: false
* string length: 1
*/
public void populateFields() {
// corpus changed if: current one is null (this is first run of the app)
// or if currentCorpus != gui's corpus
boolean corpusChanged = currentCorpusType == null
|| currentCorpusType != corpus.getCorpusType();
// TODO: check for GOS, GIGAFIDA, SOLAR...
// refresh and:
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
// if (calculateFor == null) {
// calculateForRB.selectToggle(lemmaRB);
// calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString());
// public void populateFields() {
// // corpus changed if: current one is null (this is first run of the app)
// // or if currentCorpus != gui's corpus
// boolean corpusChanged = currentCorpusType == null
// || currentCorpusType != corpus.getCorpusType();
//
// // TODO: check for GOS, GIGAFIDA, SOLAR...
// // refresh and:
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
//// if (calculateFor == null) {
//// calculateForRB.selectToggle(lemmaRB);
//// calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString());
//// }
//
// if (!filter.hasMsd()) {
// // if current corpus doesn't have msd data, disable this field
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(true);
// logger.info("no msd data");
// } else {
// if (ValidationUtil.isEmpty(msd)
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// // msd has not been set previously
// // or msd has been set but the corpus changed -> reset
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(false);
// logger.info("msd reset");
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
// msdTF.setText(StringUtils.join(msdStrings, " "));
// msdTF.setDisable(false);
// logger.info("msd kept");
// }
// }
//
// // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
//
// // keep calculateCvv
// calculatecvvCB.setSelected(calculateCvv);
//
// // keep string length if set
// if (stringLength != null) {
// stringLengthTF.setText(String.valueOf(stringLength));
// } else {
// stringLengthTF.setText("1");
// stringLength = 1;
// }
//
// // TODO: trigger on rescan
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// // user changed corpus (by type) or by selection & triggered a rescan of headers
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
//
// currentCorpusType = corpus.getCorpusType();
// // setTaxonomyIsDirty(false);
// } else {
//
// }
//
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
//
// }
if (!filter.hasMsd()) {
// if current corpus doesn't have msd data, disable this field
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(true);
logger.info("no msd data");
} else {
if (ValidationUtil.isEmpty(msd)
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// msd has not been set previously
// or msd has been set but the corpus changed -> reset
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(false);
logger.info("msd reset");
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// if msd has been set, but corpus type remained the same, we can keep any set msd value
msdTF.setText(StringUtils.join(msdStrings, " "));
msdTF.setDisable(false);
logger.info("msd kept");
}
}
// TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
// keep calculateCvv
calculatecvvCB.setSelected(calculateCvv);
// keep string length if set
if (stringLength != null) {
stringLengthTF.setText(String.valueOf(stringLength));
} else {
stringLengthTF.setText("1");
stringLength = 1;
}
// TODO: trigger on rescan
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// user changed corpus (by type) or by selection & triggered a rescan of headers
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
currentCorpusType = corpus.getCorpusType();
// setTaxonomyIsDirty(false);
} else {
}
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
}
/**
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
@ -434,7 +456,7 @@ public class CharacterAnalysisTab {
filter.setMultipleKeys(new ArrayList<>());
filter.setMsd(msd);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setTaxonomy(taxonomy);
filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0);

View File

@ -49,7 +49,7 @@ public class OneWordAnalysisTab {
@FXML
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
private ArrayList<Taxonomy> taxonomy;
@FXML
private CheckBox displayTaxonomyChB;
@ -222,11 +222,33 @@ public class OneWordAnalysisTab {
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>();
taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
});
taxonomyCCB.getCheckModel().clearChecks();
} else {
@ -333,63 +355,63 @@ public class OneWordAnalysisTab {
* iscvv: false
* string length: 1
*/
public void populateFields() {
// corpus changed if: current one is null (this is first run of the app)
// or if currentCorpus != gui's corpus
boolean corpusChanged = currentCorpusType == null
|| currentCorpusType != corpus.getCorpusType();
// TODO: check for GOS, GIGAFIDA, SOLAR...
// refresh and:
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
if (calculateFor == null) {
calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
}
if (!filter.hasMsd()) {
// if current corpus doesn't have msd data, disable this field
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(true);
logger.info("no msd data");
} else {
if (ValidationUtil.isEmpty(msd)
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// msd has not been set previously
// or msd has been set but the corpus changed -> reset
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(false);
logger.info("msd reset");
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// if msd has been set, but corpus type remained the same, we can keep any set msd value
msdTF.setText(StringUtils.join(msdStrings, " "));
msdTF.setDisable(false);
logger.info("msd kept");
}
}
// TODO: trigger on rescan
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// user changed corpus (by type) or by selection & triggered a rescan of headers
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
currentCorpusType = corpus.getCorpusType();
// setTaxonomyIsDirty(false);
} else {
}
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
}
// public void populateFields() {
// // corpus changed if: current one is null (this is first run of the app)
// // or if currentCorpus != gui's corpus
// boolean corpusChanged = currentCorpusType == null
// || currentCorpusType != corpus.getCorpusType();
//
//
// // TODO: check for GOS, GIGAFIDA, SOLAR...
// // refresh and:
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
// if (calculateFor == null) {
// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
// }
//
// if (!filter.hasMsd()) {
// // if current corpus doesn't have msd data, disable this field
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(true);
// logger.info("no msd data");
// } else {
// if (ValidationUtil.isEmpty(msd)
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// // msd has not been set previously
// // or msd has been set but the corpus changed -> reset
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(false);
// logger.info("msd reset");
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
// msdTF.setText(StringUtils.join(msdStrings, " "));
// msdTF.setDisable(false);
// logger.info("msd kept");
// }
// }
//
// // TODO: trigger on rescan
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// // user changed corpus (by type) or by selection & triggered a rescan of headers
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
//
// currentCorpusType = corpus.getCorpusType();
// // setTaxonomyIsDirty(false);
// } else {
//
// }
//
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
//
// }
/**
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
@ -435,7 +457,7 @@ public class OneWordAnalysisTab {
Filter filter = new Filter();
filter.setNgramValue(1);
filter.setCalculateFor(calculateFor);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setTaxonomy(taxonomy);
filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0);

View File

@ -52,7 +52,7 @@ public class StringAnalysisTabNew2 {
@FXML
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
private ArrayList<Taxonomy> taxonomy;
@FXML
private CheckBox calculatecvvCB;
@ -308,11 +308,33 @@ public class StringAnalysisTabNew2 {
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>();
taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
});
taxonomyCCB.getCheckModel().clearChecks();
} else {
@ -563,7 +585,7 @@ public class StringAnalysisTabNew2 {
Filter filter = new Filter();
filter.setNgramValue(ngramValue);
filter.setCalculateFor(calculateFor);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setTaxonomy(taxonomy);
filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(skipValue);

View File

@ -38,7 +38,7 @@ public class WordFormationTab {
@FXML
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
private ArrayList<Taxonomy> taxonomy;
@FXML
private TextField minimalOccurrencesTF;
@ -77,7 +77,8 @@ public class WordFormationTab {
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>();
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
taxonomy.addAll(checkedItemsTaxonomy);
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
});
taxonomyCCB.getCheckModel().clearChecks();
@ -140,7 +141,7 @@ public class WordFormationTab {
Filter filter = new Filter();
filter.setNgramValue(1);
filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setTaxonomy(taxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0);
filter.setMsd(new ArrayList<>());

View File

@ -47,7 +47,7 @@ public class WordLevelTab {
@FXML
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
private ArrayList<Taxonomy> taxonomy;
@FXML
private CheckBox displayTaxonomyChB;
@ -345,11 +345,33 @@ public class WordLevelTab {
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>();
taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
});
taxonomyCCB.getCheckModel().clearChecks();
} else {
@ -458,63 +480,63 @@ public class WordLevelTab {
* iscvv: false
* string length: 1
*/
public void populateFields() {
// corpus changed if: current one is null (this is first run of the app)
// or if currentCorpus != gui's corpus
boolean corpusChanged = currentCorpusType == null
|| currentCorpusType != corpus.getCorpusType();
// TODO: check for GOS, GIGAFIDA, SOLAR...
// refresh and:
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
if (calculateFor == null) {
calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
}
if (!filter.hasMsd()) {
// if current corpus doesn't have msd data, disable this field
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(true);
logger.info("no msd data");
} else {
if (ValidationUtil.isEmpty(msd)
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// msd has not been set previously
// or msd has been set but the corpus changed -> reset
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(false);
logger.info("msd reset");
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// if msd has been set, but corpus type remained the same, we can keep any set msd value
msdTF.setText(StringUtils.join(msdStrings, " "));
msdTF.setDisable(false);
logger.info("msd kept");
}
}
// TODO: trigger on rescan
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// user changed corpus (by type) or by selection & triggered a rescan of headers
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
currentCorpusType = corpus.getCorpusType();
// setTaxonomyIsDirty(false);
} else {
}
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
}
// public void populateFields() {
// // corpus changed if: current one is null (this is first run of the app)
// // or if currentCorpus != gui's corpus
// boolean corpusChanged = currentCorpusType == null
// || currentCorpusType != corpus.getCorpusType();
//
//
// // TODO: check for GOS, GIGAFIDA, SOLAR...
// // refresh and:
// // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
// if (calculateFor == null) {
// calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
// calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
// }
//
// if (!filter.hasMsd()) {
// // if current corpus doesn't have msd data, disable this field
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(true);
// logger.info("no msd data");
// } else {
// if (ValidationUtil.isEmpty(msd)
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// // msd has not been set previously
// // or msd has been set but the corpus changed -> reset
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(false);
// logger.info("msd reset");
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
// msdTF.setText(StringUtils.join(msdStrings, " "));
// msdTF.setDisable(false);
// logger.info("msd kept");
// }
// }
//
// // TODO: trigger on rescan
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// // user changed corpus (by type) or by selection & triggered a rescan of headers
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
//
// currentCorpusType = corpus.getCorpusType();
// // setTaxonomyIsDirty(false);
// } else {
//
// }
//
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
//
// }
/**
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
@ -560,7 +582,7 @@ public class WordLevelTab {
Filter filter = new Filter();
filter.setNgramValue(1);
filter.setCalculateFor(calculateFor);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setTaxonomy(taxonomy);
filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0);

View File

@ -60,7 +60,7 @@ public class Export {
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
StatisticsNew statistics, Filter filter) {
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult();
Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult();
//Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n";
@ -85,7 +85,7 @@ public class Export {
// num_taxonomy_frequencies.put(taxonomyKey, val);
// }
// }
Map<String, AtomicLong> num_taxonomy_frequencies = statistics.getUniGramOccurrences();
Map<Taxonomy, AtomicLong> num_taxonomy_frequencies = statistics.getUniGramOccurrences();
//CSV file header
@ -106,7 +106,7 @@ public class Export {
}
}
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(statistics.getUniGramOccurrences().get("Total").longValue()));
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(statistics.getUniGramOccurrences().get(Taxonomy.TOTAL).longValue()));
// headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
for (CalculateFor otherKey : filter.getMultipleKeys()) {
@ -127,11 +127,11 @@ public class Export {
}
}
for (String key : taxonomyResults.keySet()) {
if(!key.equals("Total") && num_taxonomy_frequencies.get(key).longValue() > 0) {
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
FILE_HEADER_AL.add("Delež [" + key + "]");
FILE_HEADER_AL.add("Relativna pogostost [" + key + "]");
for (Taxonomy key : taxonomyResults.keySet()) {
if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
FILE_HEADER_AL.add("Absolutna pogostost [" + key.toString() + "]");
FILE_HEADER_AL.add("Delež [" + key.toString() + "]");
FILE_HEADER_AL.add("Relativna pogostost [" + key.toString() + "]");
}
}
@ -270,8 +270,8 @@ public class Export {
dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
for (String key : taxonomyResults.keySet()){
if(!key.equals("Total") && num_taxonomy_frequencies.get(key).longValue() > 0) {
for (Taxonomy key : taxonomyResults.keySet()){
if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
dataEntry.add(frequency.toString());
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key).longValue()));

View File

@ -5,6 +5,7 @@ import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import data.Taxonomy;
import org.rocksdb.RocksDB;
import util.db.RDB;