Taxonomy refactored

This commit is contained in:
Luka 2018-11-26 13:41:35 +01:00
parent a7f3bdb925
commit 9efe3d529b
16 changed files with 1173 additions and 491 deletions

View File

@ -536,8 +536,8 @@ public class XML_processing {
boolean inWord = false; boolean inWord = false;
boolean inPunctuation = false; boolean inPunctuation = false;
boolean taxonomyMatch = true; boolean taxonomyMatch = true;
ArrayList<String> currentFiletaxonomy = new ArrayList<>(); ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>(); // ArrayList<Taxonomy> currentFiletaxonomyLong = new ArrayList<>();
String lemma = ""; String lemma = "";
String msd = ""; String msd = "";
@ -578,10 +578,10 @@ public class XML_processing {
if (tax != null) { if (tax != null) {
// keep only taxonomy properties // keep only taxonomy properties
String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", ""); Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""));
currentFiletaxonomy.add(currentFiletaxonomyElement); currentFiletaxonomy.add(currentFiletaxonomyElement);
Tax taxonomy = new Tax(); Tax taxonomy = new Tax();
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); // currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
} }
} }
break; break;
@ -637,7 +637,7 @@ public class XML_processing {
// parser reached end of the current sentence // parser reached end of the current sentence
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) { if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
// count all UniGramOccurrences in sentence for statistics // count all UniGramOccurrences in sentence for statistics
stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomyLong); stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
// add sentence to corpus if it passes filters // add sentence to corpus if it passes filters
sentence = runFilters(sentence, stats.getFilter()); sentence = runFilters(sentence, stats.getFilter());
@ -645,7 +645,7 @@ public class XML_processing {
if (!ValidationUtil.isEmpty(sentence) && taxonomyMatch) { if (!ValidationUtil.isEmpty(sentence) && taxonomyMatch) {
corpus.add(new Sentence(sentence, currentFiletaxonomyLong)); corpus.add(new Sentence(sentence, currentFiletaxonomy));
} }
// taxonomyMatch = true; // taxonomyMatch = true;
@ -713,8 +713,8 @@ public class XML_processing {
public static boolean readXMLSSJ500K(String path, StatisticsNew stats) { public static boolean readXMLSSJ500K(String path, StatisticsNew stats) {
boolean inWord = false; boolean inWord = false;
boolean inPunctuation = false; boolean inPunctuation = false;
ArrayList<String> currentFiletaxonomy = new ArrayList<>(); ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>(); // ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = ""; String lemma = "";
String msd = ""; String msd = "";
@ -757,10 +757,10 @@ public class XML_processing {
if (tax != null) { if (tax != null) {
// keep only taxonomy properties // keep only taxonomy properties
String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", ""); Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""));
currentFiletaxonomy.add(currentFiletaxonomyElement); currentFiletaxonomy.add(currentFiletaxonomyElement);
Tax taxonomy = new Tax(); Tax taxonomy = new Tax();
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); // currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
} }
} }
break; break;
@ -793,7 +793,7 @@ public class XML_processing {
sentence = runFilters(sentence, stats.getFilter()); sentence = runFilters(sentence, stats.getFilter());
if (!ValidationUtil.isEmpty(sentence)) { if (!ValidationUtil.isEmpty(sentence)) {
corpus.add(new Sentence(sentence, currentFiletaxonomyLong)); corpus.add(new Sentence(sentence, currentFiletaxonomy));
} }
// and start a new one // and start a new one
@ -820,7 +820,7 @@ public class XML_processing {
corpus.clear(); corpus.clear();
currentFiletaxonomy = new ArrayList<>(); currentFiletaxonomy = new ArrayList<>();
currentFiletaxonomyLong = new ArrayList<>(); // currentFiletaxonomyLong = new ArrayList<>();
} }
break; break;
@ -848,8 +848,8 @@ public class XML_processing {
boolean inOrthDiv = false; boolean inOrthDiv = false;
boolean computeForOrth = stats.getCorpus().isGosOrthMode(); boolean computeForOrth = stats.getCorpus().isGosOrthMode();
boolean inSeparatedWord = false; boolean inSeparatedWord = false;
ArrayList<String> currentFiletaxonomy = new ArrayList<>(); ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>(); // ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = ""; String lemma = "";
String msd = ""; String msd = "";
@ -923,10 +923,10 @@ public class XML_processing {
if (tax != null) { if (tax != null) {
// keep only taxonomy properties // keep only taxonomy properties
String currentFiletaxonomyElement = String.valueOf(tax.getValue()); Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()));
currentFiletaxonomy.add(currentFiletaxonomyElement); currentFiletaxonomy.add(currentFiletaxonomyElement);
Tax taxonomy = new Tax(); Tax taxonomy = new Tax();
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement)); // currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
} }
} else if (qName.equalsIgnoreCase("div")) { } else if (qName.equalsIgnoreCase("div")) {
gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue()); gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
@ -1010,7 +1010,7 @@ public class XML_processing {
// add sentence to corpus if it passes filters // add sentence to corpus if it passes filters
if (includeFile && !ValidationUtil.isEmpty(sentence)) { if (includeFile && !ValidationUtil.isEmpty(sentence)) {
sentence = runFilters(sentence, stats.getFilter()); sentence = runFilters(sentence, stats.getFilter());
corpus.add(new Sentence(sentence, currentFiletaxonomyLong)); corpus.add(new Sentence(sentence, currentFiletaxonomy));
} }
wordIndex = 0; wordIndex = 0;
@ -1050,7 +1050,7 @@ public class XML_processing {
corpus.clear(); corpus.clear();
currentFiletaxonomy = new ArrayList<>(); currentFiletaxonomy = new ArrayList<>();
currentFiletaxonomyLong = new ArrayList<>(); // currentFiletaxonomyLong = new ArrayList<>();
} }
break; break;

View File

@ -8,6 +8,7 @@ import java.util.stream.Collectors;
import data.Enums.InflectedJosTypes; import data.Enums.InflectedJosTypes;
import data.StatisticsNew; import data.StatisticsNew;
import data.Taxonomy;
import gui.ValidationUtil; import gui.ValidationUtil;
import util.Combinations; import util.Combinations;
@ -30,7 +31,7 @@ public class WordFormation {
Map<String, AtomicLong> result = stat.getResult(); Map<String, AtomicLong> result = stat.getResult();
// 1. filter - keep only inflected types // 1. filter - keep only inflected types
result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.charAt(0))); result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.toString().charAt(0)));
// 2. for each inflected type get all possible subcombinations // 2. for each inflected type get all possible subcombinations
for (Character josChar : InflectedJosTypes.inflectedJosTypes) { for (Character josChar : InflectedJosTypes.inflectedJosTypes) {

View File

@ -267,7 +267,7 @@ public class Ngrams {
for (Sentence s : corpus) { for (Sentence s : corpus) {
// stats.updateUniGramOccurrences(s.getWords().size()); // stats.updateUniGramOccurrences(s.getWords().size());
for (Word w : s.getWords()) { for (Word w : s.getWords()) {
List<String> taxonomy = s.getTaxonomy(); List<Taxonomy> taxonomy = s.getTaxonomy();
//// List<Word> ngramCandidate = s.getSublist(i, i + stats.getFilter().getNgramValue()); //// List<Word> ngramCandidate = s.getSublist(i, i + stats.getFilter().getNgramValue());
List<Word> ngramCandidate = new ArrayList<>(); List<Word> ngramCandidate = new ArrayList<>();
@ -425,7 +425,7 @@ public class Ngrams {
} }
} }
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats, List<String> taxonomy) { private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats, List<Taxonomy> taxonomy) {
// count if no regex is set or if it is & candidate passes it // count if no regex is set or if it is & candidate passes it
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())) { if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())) {
// String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor()); // String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());

View File

@ -104,13 +104,13 @@ public class Filter implements Cloneable {
return (Integer) filter.get(STRING_LENGTH); return (Integer) filter.get(STRING_LENGTH);
} }
public void setTaxonomy(ArrayList<String> taxonomy) { public void setTaxonomy(ArrayList<Taxonomy> taxonomy) {
filter.put(TAXONOMY, taxonomy); filter.put(TAXONOMY, taxonomy);
} }
public ArrayList<String> getTaxonomy() { public ArrayList<Taxonomy> getTaxonomy() {
if (filter.containsKey(TAXONOMY) && filter.get(TAXONOMY) != null) { if (filter.containsKey(TAXONOMY) && filter.get(TAXONOMY) != null) {
return (ArrayList<String>) filter.get(TAXONOMY); return (ArrayList<Taxonomy>) filter.get(TAXONOMY);
} else { } else {
return new ArrayList<>(); return new ArrayList<>();
} }

View File

@ -7,13 +7,13 @@ public class Sentence {
private List<Word> words; private List<Word> words;
private List<String> taxonomy; private List<Taxonomy> taxonomy;
// GOS // GOS
private String type; private String type;
private Map<String, String> properties; private Map<String, String> properties;
public Sentence(List<Word> words, List<String> taxonomy) { public Sentence(List<Word> words, List<Taxonomy> taxonomy) {
this.words = words; this.words = words;
this.taxonomy = taxonomy; this.taxonomy = taxonomy;
} }
@ -22,13 +22,13 @@ public class Sentence {
// this.words = words; // this.words = words;
// } // }
public Sentence(List<Word> words, List<String> taxonomy, Map<String, String> properties) { public Sentence(List<Word> words, List<Taxonomy> taxonomy, Map<String, String> properties) {
this.words = words; this.words = words;
this.taxonomy = taxonomy; this.taxonomy = taxonomy;
this.properties = properties; this.properties = properties;
} }
public Sentence(List<Word> words, List<String> taxonomy, String type) { public Sentence(List<Word> words, List<Taxonomy> taxonomy, String type) {
this.words = words; this.words = words;
this.taxonomy = taxonomy; this.taxonomy = taxonomy;
this.type = type; this.type = type;
@ -38,7 +38,7 @@ public class Sentence {
return words; return words;
} }
public List<String> getTaxonomy() { public List<Taxonomy> getTaxonomy() {
return taxonomy; return taxonomy;
} }

View File

@ -68,22 +68,22 @@ public class Statistics {
} }
// for words distributions // for words distributions
public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) { // public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) {
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); // String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
//
this.resultTitle = String.format("%s_%s_%s", // this.resultTitle = String.format("%s_%s_%s",
distributionTaxonomy != null ? distributionTaxonomy.toString() : "", // distributionTaxonomy != null ? distributionTaxonomy.toString() : "",
distributionJosWordType != null ? distributionJosWordType.toString() : "", // distributionJosWordType != null ? distributionJosWordType.toString() : "",
dateTime); // dateTime);
//
this.analysisLevel = al; // this.analysisLevel = al;
this.cf = cf; // this.cf = cf;
this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null; // this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null;
this.taxonomyIsSet = distributionTaxonomy != null; // this.taxonomyIsSet = distributionTaxonomy != null;
//
this.JOSTypeIsSet = distributionJosWordType != null; // this.JOSTypeIsSet = distributionJosWordType != null;
this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' '; // this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' ';
} // }
public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) { public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) {
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
@ -99,17 +99,17 @@ public class Statistics {
this.vcc = true; this.vcc = true;
} }
public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) { // public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) {
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); // String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
//
this.resultTitle = String.format("InflectedJOS_%s_%s", // this.resultTitle = String.format("InflectedJOS_%s_%s",
distributionTaxonomy != null ? distributionTaxonomy : "", // distributionTaxonomy != null ? distributionTaxonomy : "",
dateTime); // dateTime);
//
this.analysisLevel = al; // this.analysisLevel = al;
this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null; // this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null;
this.taxonomyIsSet = inflectedJosTaxonomy != null; // this.taxonomyIsSet = inflectedJosTaxonomy != null;
} // }
public Integer getSkip() { public Integer getSkip() {
return skip; return skip;

View File

@ -33,7 +33,7 @@ public class StatisticsNew {
private String resultTitle; private String resultTitle;
private Map<String, AtomicLong> result; private Map<String, AtomicLong> result;
private Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult; private Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> taxonomyResult;
private Object[][] resultCustom; // for when calculating percentages that don't add up to 100% private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedSuffix; private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedSuffix;
private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedPrefix; private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedPrefix;
@ -43,28 +43,28 @@ public class StatisticsNew {
private LocalDateTime timeBeginning; private LocalDateTime timeBeginning;
private LocalDateTime timeEnding; private LocalDateTime timeEnding;
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability; private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
private Map<String, AtomicLong> uniGramTaxonomyOccurrences; private Map<Taxonomy, AtomicLong> uniGramTaxonomyOccurrences;
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) { public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
this.corpus = corpus; this.corpus = corpus;
this.filter = filter; this.filter = filter;
this.taxonomyResult = new ConcurrentHashMap<>(); this.taxonomyResult = new ConcurrentHashMap<>();
this.taxonomyResult.put("Total", new ConcurrentHashMap<>()); this.taxonomyResult.put(Taxonomy.TOTAL, new ConcurrentHashMap<>());
this.collocability = new ConcurrentHashMap<>(); this.collocability = new ConcurrentHashMap<>();
this.uniGramTaxonomyOccurrences = new ConcurrentHashMap<>(); this.uniGramTaxonomyOccurrences = new ConcurrentHashMap<>();
this.uniGramTaxonomyOccurrences.put("Total", new AtomicLong(0L)); this.uniGramTaxonomyOccurrences.put(Taxonomy.TOTAL, new AtomicLong(0L));
// create table for counting word occurrences per taxonomies // create table for counting word occurrences per taxonomies
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) { if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
if (this.filter.getTaxonomy().isEmpty()) { if (this.filter.getTaxonomy().isEmpty()) {
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) { for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>()); this.taxonomyResult.put(Taxonomy.factory(this.corpus.getTaxonomy().get(i)), new ConcurrentHashMap<>());
} }
} else { } else {
for (int i = 0; i < this.filter.getTaxonomy().size(); i++) { for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
Tax taxonomy = new Tax(); // Tax taxonomy = new Tax();
this.taxonomyResult.put(taxonomy.getLongTaxonomyName(this.filter.getTaxonomy().get(i)), new ConcurrentHashMap<>()); this.taxonomyResult.put(this.filter.getTaxonomy().get(i), new ConcurrentHashMap<>());
} }
} }
} }
@ -202,15 +202,15 @@ public class StatisticsNew {
/** /**
* Stores results from this batch to a database and clears results map * Stores results from this batch to a database and clears results map
*/ */
public void storeTmpResultsToDB() { // public void storeTmpResultsToDB() {
try { // try {
db.writeBatch(result); // db.writeBatch(result);
result = new ConcurrentHashMap<>(); // result = new ConcurrentHashMap<>();
} catch (UnsupportedEncodingException e) { // } catch (UnsupportedEncodingException e) {
logger.error("Store tmp results to DB", e); // logger.error("Store tmp results to DB", e);
// e.printStackTrace(); // // e.printStackTrace();
} // }
} // }
public Filter getFilter() { public Filter getFilter() {
return filter; return filter;
@ -229,16 +229,16 @@ public class StatisticsNew {
} }
// if no results and nothing to save, return false // if no results and nothing to save, return false
if (!(taxonomyResult.get("Total").size() > 0)) { if (!(taxonomyResult.get(Taxonomy.TOTAL).size() > 0)) {
analysisProducedResults = false; analysisProducedResults = false;
return false; return false;
} else { } else {
analysisProducedResults = true; analysisProducedResults = true;
} }
removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences()); removeMinimalOccurrences(taxonomyResult.get(Taxonomy.TOTAL), filter.getMinimalOccurrences());
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy()); removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit)))); stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get(Taxonomy.TOTAL), Util.getValidInt(limit))));
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter); Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
return true; return true;
} }
@ -246,18 +246,18 @@ public class StatisticsNew {
/** /**
* Removes lines, where number of different taxonomies is lower than specified number (minimalTaxonomy) * Removes lines, where number of different taxonomies is lower than specified number (minimalTaxonomy)
*/ */
private void removeMinimalTaxonomy(Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult, Integer minimalTaxonomy) { private void removeMinimalTaxonomy(Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> taxonomyResult, Integer minimalTaxonomy) {
if (minimalTaxonomy == 1) if (minimalTaxonomy == 1)
return; return;
int occurances; int occurances;
for (MultipleHMKeys key : taxonomyResult.get("Total").keySet()){ for (MultipleHMKeys key : taxonomyResult.get(Taxonomy.TOTAL).keySet()){
occurances = 0; occurances = 0;
for (String columnNameKey : taxonomyResult.keySet()){ for (Taxonomy columnNameKey : taxonomyResult.keySet()){
if(!columnNameKey.equals("Total") && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1) if(!columnNameKey.equals(Taxonomy.TOTAL) && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1)
occurances++; occurances++;
} }
if(occurances < minimalTaxonomy){ if(occurances < minimalTaxonomy){
taxonomyResult.get("Total").remove(key); taxonomyResult.get(Taxonomy.TOTAL).remove(key);
} }
} }
} }
@ -343,9 +343,9 @@ public class StatisticsNew {
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit); return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
} }
public void updateUniGramOccurrences(int amount, ArrayList<String> taxonomy){ public void updateUniGramOccurrences(int amount, ArrayList<Taxonomy> taxonomy){
uniGramTaxonomyOccurrences.get("Total").set(uniGramTaxonomyOccurrences.get("Total").longValue() + amount); uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).set(uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).longValue() + amount);
for (String t : taxonomy){ for (Taxonomy t : taxonomy){
if (uniGramTaxonomyOccurrences.get(t) != null){ if (uniGramTaxonomyOccurrences.get(t) != null){
uniGramTaxonomyOccurrences.get(t).set(uniGramTaxonomyOccurrences.get(t).longValue() + amount); uniGramTaxonomyOccurrences.get(t).set(uniGramTaxonomyOccurrences.get(t).longValue() + amount);
} else { } else {
@ -354,16 +354,16 @@ public class StatisticsNew {
} }
} }
public Map<String, AtomicLong> getUniGramOccurrences(){ public Map<Taxonomy, AtomicLong> getUniGramOccurrences(){
// return uniGramTaxonomyOccurrences.get("Total").longValue(); // return uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).longValue();
return uniGramTaxonomyOccurrences; return uniGramTaxonomyOccurrences;
} }
public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) { public void updateTaxonomyResults(MultipleHMKeys o, List<Taxonomy> taxonomy) {
for (String key : taxonomyResult.keySet()) { for (Taxonomy key : taxonomyResult.keySet()) {
// first word should have the same taxonomy as others // first word should have the same taxonomy as others
if (key.equals("Total") || taxonomy.contains(key)) { if (key.equals(Taxonomy.TOTAL) || taxonomy.contains(key)) {
// if (key.equals("Total") || taxonomy != null && taxonomy.contains(key)) { // if (key.equals(Taxonomy.TOTAL) || taxonomy != null && taxonomy.contains(key)) {
// if taxonomy not in map and in this word // if taxonomy not in map and in this word
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1)); AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));
@ -382,7 +382,7 @@ public class StatisticsNew {
} }
public Map<String, Map<MultipleHMKeys, AtomicLong>> getTaxonomyResult() { public Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> getTaxonomyResult() {
return taxonomyResult; return taxonomyResult;
} }
@ -608,7 +608,7 @@ public class StatisticsNew {
} }
public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) { public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult(); Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();
Map<Collocability, Map<MultipleHMKeys, Double>> collocabilityMap = new ConcurrentHashMap<>(); Map<Collocability, Map<MultipleHMKeys, Double>> collocabilityMap = new ConcurrentHashMap<>();
@ -618,11 +618,11 @@ public class StatisticsNew {
// count number of all words // count number of all words
long N = 0; long N = 0;
for(AtomicLong a : oneWordTaxonomyResult.get("Total").values()){ for(AtomicLong a : oneWordTaxonomyResult.get(Taxonomy.TOTAL).values()){
N += a.longValue(); N += a.longValue();
} }
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) { for(MultipleHMKeys hmKey : taxonomyResult.get(Taxonomy.TOTAL).keySet()) {
// String[] splitedString = hmKey.getK1().split("\\s+"); // String[] splitedString = hmKey.getK1().split("\\s+");
long sum_fwi =0L; long sum_fwi =0L;
@ -630,15 +630,15 @@ public class StatisticsNew {
for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){ for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){
// System.out.println(smallHmKey.getK1()); // System.out.println(smallHmKey.getK1());
sum_fwi += oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue(); sum_fwi += oneWordTaxonomyResult.get(Taxonomy.TOTAL).get(smallHmKey).longValue();
mul_fwi *= oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue(); mul_fwi *= oneWordTaxonomyResult.get(Taxonomy.TOTAL).get(smallHmKey).longValue();
} }
// String t = hmKey.getK1(); // String t = hmKey.getK1();
// if(hmKey.getK1().equals("v Slovenija")){ // if(hmKey.getK1().equals("v Slovenija")){
// System.out.println("TEST"); // System.out.println("TEST");
// //
// } // }
double O = (double)taxonomyResult.get("Total").get(hmKey).longValue(); double O = (double)taxonomyResult.get(Taxonomy.TOTAL).get(hmKey).longValue();
double n = (double)filter.getNgramValue(); double n = (double)filter.getNgramValue();
double E = (double)mul_fwi / Math.pow(N, n - 1); double E = (double)mul_fwi / Math.pow(N, n - 1);
if (collocabilityMap.keySet().contains(Collocability.DICE)){ if (collocabilityMap.keySet().contains(Collocability.DICE)){

View File

@ -112,6 +112,19 @@ public class Tax {
ArrayList<String> taxForCombo = new ArrayList<>(); ArrayList<String> taxForCombo = new ArrayList<>();
// adds parents taxonomy as well
HashSet<String> genFoundTax = new HashSet<>();
for(String e : foundTax){
String[] elList = e.split("\\.");
for(int i = 1; i < elList.length - 1; i++){
String candidate = String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i));
genFoundTax.add(candidate);
}
}
foundTax.addAll(genFoundTax);
// assures same relative order // assures same relative order
for (String t : tax.keySet()) { for (String t : tax.keySet()) {
if (foundTax.contains(t)) { if (foundTax.contains(t)) {
@ -126,7 +139,7 @@ public class Tax {
return corpusTypesWithTaxonomy; return corpusTypesWithTaxonomy;
} }
public static ArrayList<String> getTaxonomyCodes(ArrayList<String> taxonomyNames, CorpusType corpusType) { public static ArrayList<String> getTaxonomyCodes(ArrayList<Taxonomy> taxonomyNames, CorpusType corpusType) {
ArrayList<String> result = new ArrayList<>(); ArrayList<String> result = new ArrayList<>();
if (ValidationUtil.isEmpty(taxonomyNames)) { if (ValidationUtil.isEmpty(taxonomyNames)) {
@ -146,8 +159,8 @@ public class Tax {
.stream() .stream()
.collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey)); .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
for (String taxonomyName : taxonomyNames) { for (Taxonomy taxonomyName : taxonomyNames) {
result.add(taxInversed.get(taxonomyName)); result.add(taxInversed.get(taxonomyName.toString()));
} }
return result; return result;
@ -188,7 +201,7 @@ public class Tax {
* *
* @return * @return
*/ */
public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<String> taxonomy) { public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<Taxonomy> taxonomy) {
LinkedHashMap<String, String> tax = new LinkedHashMap<>(); LinkedHashMap<String, String> tax = new LinkedHashMap<>();
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) { if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
@ -199,8 +212,8 @@ public class Tax {
ArrayList<String> result = new ArrayList<>(); ArrayList<String> result = new ArrayList<>();
for (String t : taxonomy) { for (Taxonomy t : taxonomy) {
result.add(tax.get(t)); result.add(tax.get(t.toString()));
} }
return result; return result;

View File

@ -1,171 +1,749 @@
package data; package data;
import java.util.ArrayList; import java.util.*;
import java.util.Arrays; import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList; import javafx.collections.ObservableList;
public enum Taxonomy { public enum Taxonomy {
TOTAL("Total", "Total"),
// GOS // GOS
JAVNI("javni", "T.J", "gos"), DISKURZ("gos.T", "gos.T - diskurz"),
INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "T.J.I", "gos"), DISKURZ_JAVNI("gos.T.J", " gos.T.J - diskurz-javni"),
RAZVEDRILNI("razvedrilni", "T.J.R", "gos"), DISKURZ_INFORMATIVNO_IZOBRAZEVALNI("gos.T.J.I", " gos.T.J.I - diskurz-javni-informativno-izobraževalni"),
NEJAVNI("nejavni", "T.N", "gos"), DISKURZ_RAZVEDRILNI("gos.T.J.R", " gos.T.J.R - diskurz-javni-razvedrilni"),
NEZASEBNI("nezasebni", "T.N.N", "gos"), DISKURZ_NEJAVNI("gos.T.N", " gos.T.N - diskurz-nejavni"),
ZASEBNI("zasebni", "T.N.Z", "gos"), DISKURZ_NEZASEBNI("gos.T.N.N", " gos.T.N.N - diskurz-nejavni-nezasebni"),
OSEBNI_STIK("osebni stik", "K.O", "gos"), DISKURZ_ZASEBNI("gos.T.N.Z", " gos.T.N.Z - diskurz-nejavni-zasebni"),
TELEFON("telefon", "K.P", "gos"), SITUACIJA("gos.S", "gos.S - situacija"),
RADIO("radio", "K.R", "gos"), SITUACIJA_RADIO("gos.S.R", " gos.S.R - situacija-radio"),
TELEVIZIJA("televizija", "K.T", "gos"), SITUACIJA_TELEVIZIJA("gos.S.T", " gos.S.T - situacija-televizija"),
KANAL("gos.K", "gos.K - kanal"),
KANAL_OSEBNI_STIK("gos.K.O", " gos.K.O - kanal-osebni stik"),
KANAL_TELEFON("gos.K.P", " gos.K.P - kanal-telefon"),
KANAL_RADIO("gos.K.R", " gos.K.R - kanal-radio"),
KANAL_TELEVIZIJA("gos.K.T", " gos.K.T - kanal-televizija"),
// Gigafida // Gigafida
KNJIZNO("knjižno", "T.K", "gigafida"), // KNJIZNO("knjižno", "T.K", "gigafida"),
LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"), // LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"),
STROKOVNO("strokovno", "T.K.S", "gigafida"), // STROKOVNO("strokovno", "T.K.S", "gigafida"),
PERIODICNO("periodično", "T.P", "gigafida"), // PERIODICNO("periodično", "T.P", "gigafida"),
CASOPIS("časopis", "T.P.C", "gigafida"), // CASOPIS("časopis", "T.P.C", "gigafida"),
REVIJA("revija", "T.P.R", "gigafida"), // REVIJA("revija", "T.P.R", "gigafida"),
INTERNET("internet", "I", "gigafida"), // INTERNET("internet", "I", "gigafida"),
SSJ_TISK("tisk", "SSJ.T", "gigafida"),
SSJ_KNJIZNO("opis", "identifikator", "gigafida"),
SSJ_LEPOSLOVNO("opis", "identifikator", "gigafida"),
SSJ_STROKOVNO("opis", "identifikator", "gigafida"),
SSJ_PERIODICNO("opis", "identifikator", "gigafida"),
SSJ_CASOPIS("opis", "identifikator", "gigafida"),
SSJ_REVIJA("opis", "identifikator", "gigafida"),
SSJ_DRUGO("opis", "identifikator", "gigafida"),
SSJ_INTERNET("opis", "identifikator", "gigafida"),
FT_P_PRENOSNIK("opis", "identifikator", "gigafida"),
FT_P_GOVORNI("opis", "identifikator", "gigafida"),
FT_P_ELEKTRONSKI("opis", "identifikator", "gigafida"),
FT_P_PISNI("opis", "identifikator", "gigafida"),
FT_P_OBJAVLJENO("opis", "identifikator", "gigafida"),
FT_P_KNJIZNO("opis", "identifikator", "gigafida"),
FT_P_PERIODICNO("opis", "identifikator", "gigafida"),
FT_P_CASOPISNO("opis", "identifikator", "gigafida"),
FT_P_DNEVNO("opis", "identifikator", "gigafida"),
FT_P_VECKRAT_TEDENSKO("opis", "identifikator", "gigafida"),
// FT_P_TEDENSKO("opis", "identifikator", "gigafida"),
FT_P_REVIALNO("opis", "identifikator", "gigafida"),
FT_P_TEDENSKO("opis", "identifikator", "gigafida"),
FT_P_STIRINAJSTDNEVNO("opis", "identifikator", "gigafida"),
FT_P_MESECNO("opis", "identifikator", "gigafida"),
FT_P_REDKEJE_KOT_MESECNO("opis", "identifikator", "gigafida"),
FT_P_OBCASNO("opis", "identifikator", "gigafida"),
FT_P_NEOBJAVLJENO("opis", "identifikator", "gigafida"),
FT_P_JAVNO("opis", "identifikator", "gigafida"),
FT_P_INTERNO("opis", "identifikator", "gigafida"),
FT_P_ZASEBNO("opis", "identifikator", "gigafida"),
FT_ZVRST("opis", "identifikator", "gigafida"),
FT_UMETNOSTNA("opis", "identifikator", "gigafida"),
FT_PESNISKA("opis", "identifikator", "gigafida"),
FT_PROZNA("opis", "identifikator", "gigafida"),
FT_DRAMSKA("opis", "identifikator", "gigafida"),
FT_NEUMETNOSTNA("opis", "identifikator", "gigafida"),
FT_STROKOVNA("opis", "identifikator", "gigafida"),
FT_HID("opis", "identifikator", "gigafida"),
FT_NIT("opis", "identifikator", "gigafida"),
FT_NESTROKOVNA("opis", "identifikator", "gigafida"),
FT_PRAVNA("opis", "identifikator", "gigafida"),
FT_LEKTORIRANO("opis", "identifikator", "gigafida"),
FT_DA("opis", "identifikator", "gigafida"),
FT_NE("opis", "identifikator", "gigafida");
SSJ_TISK("SSJ.T", "SSJ.T - tisk"),
SSJ_KNJIZNO("SSJ.T.K", " SSJ.T.K - tisk-knjižno"),
SSJ_LEPOSLOVNO("SSJ.T.K.L", " SSJ.T.K.L - tisk-knjižno-leposlovno"),
SSJ_STROKOVNO("SSJ.T.K.S", " SSJ.T.K.S - tisk-knjižno-strokovno"),
SSJ_PERIODICNO("SSJ.T.P", " SSJ.T.P - tisk-periodično"),
SSJ_CASOPIS("SSJ.T.P.C", " SSJ.T.P.C - tisk-periodično-časopis"),
SSJ_REVIJA("SSJ.T.P.R", " SSJ.T.P.R - tisk-periodično-revija"),
SSJ_DRUGO("SSJ.T.D", " SSJ.T.D - tisk-drugo"),
SSJ_INTERNET("SSJ.I", "SSJ.I - internet"),
FT_P_PRENOSNIK("Ft.P", "Ft.P - prenosnik"),
FT_P_GOVORNI("Ft.P.G", " Ft.P.G - prenosnik-govorni"),
FT_P_ELEKTRONSKI("Ft.P.E", " Ft.P.E - prenosnik-elektronski"),
FT_P_PISNI("Ft.P.P", " Ft.P.P - prenosnik-pisni"),
FT_P_OBJAVLJENO("Ft.P.P.O", " Ft.P.P.O - prenosnik-pisni-objavljeno"),
FT_P_KNJIZNO("Ft.P.P.O.K", " Ft.P.P.O.K - prenosnik-pisni-objavljeno-knjižno"),
FT_P_PERIODICNO("Ft.P.P.O.P", " Ft.P.P.O.P - prenosnik-pisni-objavljeno-periodično"),
FT_P_CASOPISNO("Ft.P.P.O.P.C", " Ft.P.P.O.P.C - prenosnik-pisni-objavljeno-periodično-časopisno"),
FT_P_DNEVNO("Ft.P.P.O.P.C.D", " Ft.P.P.O.P.C.D - prenosnik-pisni-objavljeno-periodično-časopisno-dnevno"),
FT_P_VECKRAT_TEDENSKO("Ft.P.P.O.P.C.V", " Ft.P.P.O.P.C.V - prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko"),
FT_P_CASOPISNO_TEDENSKO("Ft.P.P.O.P.C.T", " Ft.P.P.O.P.C.T - prenosnik-pisni-objavljeno-periodično-časopisno-tedensko"),
FT_P_REVIALNO("Ft.P.P.O.P.R", " Ft.P.P.O.P.R - prenosnik-pisni-objavljeno-periodično-revialno"),
FT_P_TEDENSKO("Ft.P.P.O.P.R.T", " Ft.P.P.O.P.R.T - prenosnik-pisni-objavljeno-periodično-revialno-tedensko"),
FT_P_STIRINAJSTDNEVNO("Ft.P.P.O.P.R.S", " Ft.P.P.O.P.R.S - prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno"),
FT_P_MESECNO("Ft.P.P.O.P.R.M", " Ft.P.P.O.P.R.M - prenosnik-pisni-objavljeno-periodično-revialno-mesečno"),
FT_P_REDKEJE_KOT_MESECNO("Ft.P.P.O.P.R.D", " Ft.P.P.O.P.R.D - prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec"),
FT_P_OBCASNO("Ft.P.P.O.P.R.O", " Ft.P.P.O.P.R.O - prenosnik-pisni-objavljeno-periodično-revialno-občasno"),
FT_P_NEOBJAVLJENO("Ft.P.P.N", " Ft.P.P.N - prenosnik-pisni-neobjavljeno"),
FT_P_JAVNO("Ft.P.P.N.J", " Ft.P.P.N.J - prenosnik-pisni-neobjavljeno-javno"),
FT_P_INTERNO("Ft.P.P.N.I", " Ft.P.P.N.I - prenosnik-pisni-neobjavljeno-interno"),
FT_P_ZASEBNO("Ft.P.P.N.Z", " Ft.P.P.N.Z - prenosnik-pisni-neobjavljeno-zasebno"),
FT_ZVRST("Ft.Z", "Ft.Z - zvrst"),
FT_UMETNOSTNA("Ft.Z.U", " Ft.Z.U - zvrst-umetnostna"),
FT_PESNISKA("Ft.Z.U.P", " Ft.Z.U.P - zvrst-umetnostna-pesniška"),
FT_PROZNA("Ft.Z.U.R", " Ft.Z.U.R - zvrst-umetnostna-prozna"),
FT_DRAMSKA("Ft.Z.U.D", " Ft.Z.U.D - zvrst-umetnostna-dramska"),
FT_NEUMETNOSTNA("Ft.Z.N", " Ft.Z.N - zvrst-neumetnostna"),
FT_STROKOVNA("Ft.Z.N.S", " Ft.Z.N.S - zvrst-neumetnostna-strokovna"),
FT_HID("Ft.Z.N.S.H", " Ft.Z.N.S.H - zvrst-neumetnostna-strokovna-humanistična in družboslovna"),
FT_NIT("Ft.Z.N.S.N", " Ft.Z.N.S.N - zvrst-neumetnostna-strokovna-naravoslovna in tehnična"),
FT_NESTROKOVNA("Ft.Z.N.N", " Ft.Z.N.N - zvrst-neumetnostna-nestrokovna"),
FT_PRAVNA("Ft.Z.N.P", " Ft.Z.N.P - zvrst-neumetnostna-pravna"),
FT_LEKTORIRANO("Ft.L", "Ft.L - zvrst-lektorirano"),
FT_DA("Ft.L.D", " Ft.L.D - zvrst-lektorirano-da"),
FT_NE("Ft.L.N", " Ft.L.N - zvrst-lektorirano-ne");
private final String name; private final String name;
private final String taxonomy; private final String longName;
private final String corpus;
Taxonomy(String name, String taxonomy, String corpusType) { Taxonomy(String name, String longName) {
this.name = name; this.name = name;
this.taxonomy = taxonomy; this.longName = longName;
this.corpus = corpusType;
} }
public String toString() { public String toString() {
return this.name; return this.name;
} }
public String getTaxonomnyString() { public String toLongNameString() {
return this.taxonomy; return this.longName;
} }
public static Taxonomy factory(String tax) { public static Taxonomy factory(String tax) {
if (tax != null) { if (tax != null) {
// GOS // GOS
if (JAVNI.toString().equals(tax)) { if (DISKURZ.toString().equals(tax)) {
return JAVNI; return DISKURZ;
} }
if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) { if (DISKURZ_JAVNI.toString().equals(tax)) {
return INFORMATIVNO_IZOBRAZEVALNI; return DISKURZ_JAVNI;
} }
if (RAZVEDRILNI.toString().equals(tax)) { if (DISKURZ_INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
return RAZVEDRILNI; return DISKURZ_INFORMATIVNO_IZOBRAZEVALNI;
} }
if (NEJAVNI.toString().equals(tax)) { if (DISKURZ_RAZVEDRILNI.toString().equals(tax)) {
return NEJAVNI; return DISKURZ_RAZVEDRILNI;
} }
if (NEZASEBNI.toString().equals(tax)) { if (DISKURZ_NEJAVNI.toString().equals(tax)) {
return NEZASEBNI; return DISKURZ_NEJAVNI;
} }
if (ZASEBNI.toString().equals(tax)) { if (DISKURZ_NEZASEBNI.toString().equals(tax)) {
return ZASEBNI; return DISKURZ_NEZASEBNI;
} }
if (OSEBNI_STIK.toString().equals(tax)) { if (DISKURZ_ZASEBNI.toString().equals(tax)) {
return OSEBNI_STIK; return DISKURZ_ZASEBNI;
} }
if (TELEFON.toString().equals(tax)) { if (SITUACIJA.toString().equals(tax)) {
return TELEFON; return SITUACIJA;
} }
if (RADIO.toString().equals(tax)) { if (SITUACIJA_RADIO.toString().equals(tax)) {
return RADIO; return SITUACIJA_RADIO;
} }
if (TELEVIZIJA.toString().equals(tax)) { if (SITUACIJA_TELEVIZIJA.toString().equals(tax)) {
return TELEVIZIJA; return SITUACIJA_TELEVIZIJA;
}
if (KANAL.toString().equals(tax)) {
return KANAL;
}
if (KANAL_OSEBNI_STIK.toString().equals(tax)) {
return KANAL_OSEBNI_STIK;
}
if (KANAL_TELEFON.toString().equals(tax)) {
return KANAL_TELEFON;
}
if (KANAL_RADIO.toString().equals(tax)) {
return KANAL_RADIO;
}
if (KANAL_TELEVIZIJA.toString().equals(tax)) {
return KANAL_TELEVIZIJA;
} }
// Gigafida // Gigafida
// if (TISK.toString().equals(tax)) { // if (TISK.toString().equals(tax)) {
// return TISK; // return TISK;
// } // }
if (KNJIZNO.toString().equals(tax)) { if (SSJ_TISK.toString().equals(tax)) {
return KNJIZNO; return SSJ_TISK;
} }
if (LEPOSLOVNO.toString().equals(tax)) { if (SSJ_KNJIZNO.toString().equals(tax)) {
return LEPOSLOVNO; return SSJ_KNJIZNO;
} }
if (STROKOVNO.toString().equals(tax)) { if (SSJ_LEPOSLOVNO.toString().equals(tax)) {
return STROKOVNO; return SSJ_LEPOSLOVNO;
} }
if (PERIODICNO.toString().equals(tax)) { if (SSJ_STROKOVNO.toString().equals(tax)) {
return PERIODICNO; return SSJ_STROKOVNO;
} }
if (CASOPIS.toString().equals(tax)) { if (SSJ_PERIODICNO.toString().equals(tax)) {
return CASOPIS; return SSJ_PERIODICNO;
} }
if (REVIJA.toString().equals(tax)) { if (SSJ_CASOPIS.toString().equals(tax)) {
return REVIJA; return SSJ_CASOPIS;
} }
if (INTERNET.toString().equals(tax)) { if (SSJ_REVIJA.toString().equals(tax)) {
return INTERNET; return SSJ_REVIJA;
} }
if (SSJ_DRUGO.toString().equals(tax)) {
return SSJ_DRUGO;
}
if (SSJ_INTERNET.toString().equals(tax)) {
return SSJ_INTERNET;
}
if (FT_P_PRENOSNIK.toString().equals(tax)) {
return FT_P_PRENOSNIK;
}
if (FT_P_GOVORNI.toString().equals(tax)) {
return FT_P_GOVORNI;
}
if (FT_P_ELEKTRONSKI.toString().equals(tax)) {
return FT_P_ELEKTRONSKI;
}
if (FT_P_PISNI.toString().equals(tax)) {
return FT_P_PISNI;
}
if (FT_P_OBJAVLJENO.toString().equals(tax)) {
return FT_P_OBJAVLJENO;
}
if (FT_P_KNJIZNO.toString().equals(tax)) {
return FT_P_KNJIZNO;
}
if (FT_P_PERIODICNO.toString().equals(tax)) {
return FT_P_PERIODICNO;
}
if (FT_P_CASOPISNO.toString().equals(tax)) {
return FT_P_CASOPISNO;
}
if (FT_P_DNEVNO.toString().equals(tax)) {
return FT_P_DNEVNO;
}
if (FT_P_VECKRAT_TEDENSKO.toString().equals(tax)) {
return FT_P_VECKRAT_TEDENSKO;
}
if (FT_P_CASOPISNO_TEDENSKO.toString().equals(tax)) {
return FT_P_CASOPISNO_TEDENSKO;
}
if (FT_P_REVIALNO.toString().equals(tax)) {
return FT_P_REVIALNO;
}
if (FT_P_TEDENSKO.toString().equals(tax)) {
return FT_P_TEDENSKO;
}
if (FT_P_STIRINAJSTDNEVNO.toString().equals(tax)) {
return FT_P_STIRINAJSTDNEVNO;
}
if (FT_P_MESECNO.toString().equals(tax)) {
return FT_P_MESECNO;
}
if (FT_P_REDKEJE_KOT_MESECNO.toString().equals(tax)) {
return FT_P_REDKEJE_KOT_MESECNO;
}
if (FT_P_OBCASNO.toString().equals(tax)) {
return FT_P_OBCASNO;
}
if (FT_P_NEOBJAVLJENO.toString().equals(tax)) {
return FT_P_NEOBJAVLJENO;
}
if (FT_P_JAVNO.toString().equals(tax)) {
return FT_P_JAVNO;
}
if (FT_P_INTERNO.toString().equals(tax)) {
return FT_P_INTERNO;
}
if (FT_P_ZASEBNO.toString().equals(tax)) {
return FT_P_ZASEBNO;
}
if (FT_ZVRST.toString().equals(tax)) {
return FT_ZVRST;
}
if (FT_UMETNOSTNA.toString().equals(tax)) {
return FT_UMETNOSTNA;
}
if (FT_PESNISKA.toString().equals(tax)) {
return FT_PESNISKA;
}
if (FT_PROZNA.toString().equals(tax)) {
return FT_PROZNA;
}
if (FT_DRAMSKA.toString().equals(tax)) {
return FT_DRAMSKA;
}
if (FT_NEUMETNOSTNA.toString().equals(tax)) {
return FT_NEUMETNOSTNA;
}
if (FT_STROKOVNA.toString().equals(tax)) {
return FT_STROKOVNA;
}
if (FT_NIT.toString().equals(tax)) {
return FT_NIT;
}
if (FT_HID.toString().equals(tax)) {
return FT_HID;
}
if (FT_NESTROKOVNA.toString().equals(tax)) {
return FT_NESTROKOVNA;
}
if (FT_PRAVNA.toString().equals(tax)) {
return FT_PRAVNA;
}
if (FT_LEKTORIRANO.toString().equals(tax)) {
return FT_LEKTORIRANO;
}
if (FT_DA.toString().equals(tax)) {
return FT_DA;
}
if (FT_NE.toString().equals(tax)) {
return FT_NE;
}
} }
return null; return null;
} }
public static ObservableList<String> getDefaultForComboBox(String corpusType) { public static Taxonomy factoryLongName(String tax) {
ArrayList<String> values = Arrays.stream(Taxonomy.values()) if (tax != null) {
.filter(x -> x.corpus.equals(corpusType)) // GOS
.map(x -> x.name) if (DISKURZ.toLongNameString().equals(tax)) {
.collect(Collectors.toCollection(ArrayList::new)); return DISKURZ;
}
return FXCollections.observableArrayList(values); if (DISKURZ_JAVNI.toLongNameString().equals(tax)) {
return DISKURZ_JAVNI;
}
if (DISKURZ_INFORMATIVNO_IZOBRAZEVALNI.toLongNameString().equals(tax)) {
return DISKURZ_INFORMATIVNO_IZOBRAZEVALNI;
}
if (DISKURZ_RAZVEDRILNI.toLongNameString().equals(tax)) {
return DISKURZ_RAZVEDRILNI;
}
if (DISKURZ_NEJAVNI.toLongNameString().equals(tax)) {
return DISKURZ_NEJAVNI;
}
if (DISKURZ_NEZASEBNI.toLongNameString().equals(tax)) {
return DISKURZ_NEZASEBNI;
}
if (DISKURZ_ZASEBNI.toLongNameString().equals(tax)) {
return DISKURZ_ZASEBNI;
}
if (SITUACIJA.toLongNameString().equals(tax)) {
return SITUACIJA;
}
if (SITUACIJA_RADIO.toLongNameString().equals(tax)) {
return SITUACIJA_RADIO;
}
if (SITUACIJA_TELEVIZIJA.toLongNameString().equals(tax)) {
return SITUACIJA_TELEVIZIJA;
}
if (KANAL.toLongNameString().equals(tax)) {
return KANAL;
}
if (KANAL_OSEBNI_STIK.toLongNameString().equals(tax)) {
return KANAL_OSEBNI_STIK;
}
if (KANAL_TELEFON.toLongNameString().equals(tax)) {
return KANAL_TELEFON;
}
if (KANAL_RADIO.toLongNameString().equals(tax)) {
return KANAL_RADIO;
}
if (KANAL_TELEVIZIJA.toLongNameString().equals(tax)) {
return KANAL_TELEVIZIJA;
} }
public static ObservableList<String> getDefaultForComboBox(CorpusType corpusType) { // Gigafida
return getDefaultForComboBox(corpusType.toString()); // if (TISK.toString().equals(tax)) {
// return TISK;
// }
if (SSJ_TISK.toLongNameString().equals(tax)) {
return SSJ_TISK;
}
if (SSJ_KNJIZNO.toLongNameString().equals(tax)) {
return SSJ_KNJIZNO;
}
if (SSJ_LEPOSLOVNO.toLongNameString().equals(tax)) {
return SSJ_LEPOSLOVNO;
}
if (SSJ_STROKOVNO.toLongNameString().equals(tax)) {
return SSJ_STROKOVNO;
}
if (SSJ_PERIODICNO.toLongNameString().equals(tax)) {
return SSJ_PERIODICNO;
}
if (SSJ_CASOPIS.toLongNameString().equals(tax)) {
return SSJ_CASOPIS;
}
if (SSJ_REVIJA.toLongNameString().equals(tax)) {
return SSJ_REVIJA;
}
if (SSJ_DRUGO.toLongNameString().equals(tax)) {
return SSJ_DRUGO;
}
if (SSJ_INTERNET.toLongNameString().equals(tax)) {
return SSJ_INTERNET;
}
if (FT_P_PRENOSNIK.toLongNameString().equals(tax)) {
return FT_P_PRENOSNIK;
}
if (FT_P_GOVORNI.toLongNameString().equals(tax)) {
return FT_P_GOVORNI;
}
if (FT_P_ELEKTRONSKI.toLongNameString().equals(tax)) {
return FT_P_ELEKTRONSKI;
}
if (FT_P_PISNI.toLongNameString().equals(tax)) {
return FT_P_PISNI;
}
if (FT_P_OBJAVLJENO.toLongNameString().equals(tax)) {
return FT_P_OBJAVLJENO;
}
if (FT_P_KNJIZNO.toLongNameString().equals(tax)) {
return FT_P_KNJIZNO;
}
if (FT_P_PERIODICNO.toLongNameString().equals(tax)) {
return FT_P_PERIODICNO;
}
if (FT_P_CASOPISNO.toLongNameString().equals(tax)) {
return FT_P_CASOPISNO;
}
if (FT_P_DNEVNO.toLongNameString().equals(tax)) {
return FT_P_DNEVNO;
}
if (FT_P_VECKRAT_TEDENSKO.toLongNameString().equals(tax)) {
return FT_P_VECKRAT_TEDENSKO;
}
if (FT_P_CASOPISNO_TEDENSKO.toLongNameString().equals(tax)) {
return FT_P_CASOPISNO_TEDENSKO;
}
if (FT_P_REVIALNO.toLongNameString().equals(tax)) {
return FT_P_REVIALNO;
}
if (FT_P_TEDENSKO.toLongNameString().equals(tax)) {
return FT_P_TEDENSKO;
}
if (FT_P_STIRINAJSTDNEVNO.toLongNameString().equals(tax)) {
return FT_P_STIRINAJSTDNEVNO;
}
if (FT_P_MESECNO.toLongNameString().equals(tax)) {
return FT_P_MESECNO;
}
if (FT_P_REDKEJE_KOT_MESECNO.toLongNameString().equals(tax)) {
return FT_P_REDKEJE_KOT_MESECNO;
}
if (FT_P_OBCASNO.toLongNameString().equals(tax)) {
return FT_P_OBCASNO;
}
if (FT_P_NEOBJAVLJENO.toLongNameString().equals(tax)) {
return FT_P_NEOBJAVLJENO;
}
if (FT_P_JAVNO.toLongNameString().equals(tax)) {
return FT_P_JAVNO;
}
if (FT_P_INTERNO.toLongNameString().equals(tax)) {
return FT_P_INTERNO;
}
if (FT_P_ZASEBNO.toLongNameString().equals(tax)) {
return FT_P_ZASEBNO;
}
if (FT_ZVRST.toLongNameString().equals(tax)) {
return FT_ZVRST;
}
if (FT_UMETNOSTNA.toLongNameString().equals(tax)) {
return FT_UMETNOSTNA;
}
if (FT_PESNISKA.toLongNameString().equals(tax)) {
return FT_PESNISKA;
}
if (FT_PROZNA.toLongNameString().equals(tax)) {
return FT_PROZNA;
}
if (FT_DRAMSKA.toLongNameString().equals(tax)) {
return FT_DRAMSKA;
}
if (FT_NEUMETNOSTNA.toLongNameString().equals(tax)) {
return FT_NEUMETNOSTNA;
}
if (FT_STROKOVNA.toLongNameString().equals(tax)) {
return FT_STROKOVNA;
}
if (FT_NIT.toLongNameString().equals(tax)) {
return FT_NIT;
}
if (FT_HID.toLongNameString().equals(tax)) {
return FT_HID;
}
if (FT_NESTROKOVNA.toLongNameString().equals(tax)) {
return FT_NESTROKOVNA;
}
if (FT_PRAVNA.toLongNameString().equals(tax)) {
return FT_PRAVNA;
}
if (FT_LEKTORIRANO.toLongNameString().equals(tax)) {
return FT_LEKTORIRANO;
}
if (FT_DA.toLongNameString().equals(tax)) {
return FT_DA;
}
if (FT_NE.toLongNameString().equals(tax)) {
return FT_NE;
}
}
return null;
}
public static ArrayList<Taxonomy> taxonomySelected(Taxonomy disjointTaxonomy) {
ArrayList<Taxonomy> r = new ArrayList<>();
System.out.println(disjointTaxonomy);
if(disjointTaxonomy.equals(DISKURZ)){
r.add(DISKURZ_JAVNI);
r.add(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI);
r.add(DISKURZ_RAZVEDRILNI);
r.add(DISKURZ_NEJAVNI);
r.add(DISKURZ_NEZASEBNI);
r.add(DISKURZ_ZASEBNI);
} else if(disjointTaxonomy.equals(DISKURZ_JAVNI)){
r.add(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI);
r.add(DISKURZ_RAZVEDRILNI);
} else if(disjointTaxonomy.equals(DISKURZ_NEJAVNI)){
r.add(DISKURZ_NEZASEBNI);
r.add(DISKURZ_ZASEBNI);
} else if(disjointTaxonomy.equals(SITUACIJA)){
r.add(SITUACIJA_RADIO);
r.add(SITUACIJA_TELEVIZIJA);
} else if(disjointTaxonomy.equals(KANAL)){
r.add(KANAL_OSEBNI_STIK);
r.add(KANAL_RADIO);
r.add(KANAL_TELEVIZIJA);
} else if(disjointTaxonomy.equals(SSJ_TISK)){
r.add(SSJ_KNJIZNO);
r.add(SSJ_LEPOSLOVNO);
r.add(SSJ_STROKOVNO);
r.add(SSJ_PERIODICNO);
r.add(SSJ_CASOPIS);
r.add(SSJ_REVIJA);
r.add(SSJ_DRUGO);
} else if(disjointTaxonomy.equals(SSJ_KNJIZNO)){
r.add(SSJ_LEPOSLOVNO);
r.add(SSJ_STROKOVNO);
} else if(disjointTaxonomy.equals(SSJ_PERIODICNO)){
r.add(SSJ_CASOPIS);
r.add(SSJ_REVIJA);
} else if(disjointTaxonomy.equals(FT_P_PRENOSNIK)){
r.add(FT_P_GOVORNI);
r.add(FT_P_ELEKTRONSKI);
r.add(FT_P_PISNI);
r.add(FT_P_OBJAVLJENO);
r.add(FT_P_KNJIZNO);
r.add(FT_P_PERIODICNO);
r.add(FT_P_CASOPISNO);
r.add(FT_P_DNEVNO);
r.add(FT_P_VECKRAT_TEDENSKO);
r.add(FT_P_CASOPISNO_TEDENSKO);
r.add(FT_P_REVIALNO);
r.add(FT_P_TEDENSKO);
r.add(FT_P_STIRINAJSTDNEVNO);
r.add(FT_P_MESECNO);
r.add(FT_P_REDKEJE_KOT_MESECNO);
r.add(FT_P_OBCASNO);
r.add(FT_P_NEOBJAVLJENO);
r.add(FT_P_JAVNO);
r.add(FT_P_INTERNO);
r.add(FT_P_ZASEBNO);
} else if(disjointTaxonomy.equals(FT_P_PISNI)){
r.add(FT_P_OBJAVLJENO);
r.add(FT_P_KNJIZNO);
r.add(FT_P_PERIODICNO);
r.add(FT_P_CASOPISNO);
r.add(FT_P_DNEVNO);
r.add(FT_P_VECKRAT_TEDENSKO);
r.add(FT_P_CASOPISNO_TEDENSKO);
r.add(FT_P_REVIALNO);
r.add(FT_P_TEDENSKO);
r.add(FT_P_STIRINAJSTDNEVNO);
r.add(FT_P_MESECNO);
r.add(FT_P_REDKEJE_KOT_MESECNO);
r.add(FT_P_OBCASNO);
r.add(FT_P_NEOBJAVLJENO);
r.add(FT_P_JAVNO);
r.add(FT_P_INTERNO);
r.add(FT_P_ZASEBNO);
} else if(disjointTaxonomy.equals(FT_P_OBJAVLJENO)){
r.add(FT_P_KNJIZNO);
r.add(FT_P_PERIODICNO);
r.add(FT_P_CASOPISNO);
r.add(FT_P_DNEVNO);
r.add(FT_P_VECKRAT_TEDENSKO);
r.add(FT_P_CASOPISNO_TEDENSKO);
r.add(FT_P_REVIALNO);
r.add(FT_P_TEDENSKO);
r.add(FT_P_STIRINAJSTDNEVNO);
r.add(FT_P_MESECNO);
r.add(FT_P_REDKEJE_KOT_MESECNO);
r.add(FT_P_OBCASNO);
} else if(disjointTaxonomy.equals(FT_P_PERIODICNO)){
r.add(FT_P_CASOPISNO);
r.add(FT_P_DNEVNO);
r.add(FT_P_VECKRAT_TEDENSKO);
r.add(FT_P_CASOPISNO_TEDENSKO);
r.add(FT_P_REVIALNO);
r.add(FT_P_TEDENSKO);
r.add(FT_P_STIRINAJSTDNEVNO);
r.add(FT_P_MESECNO);
r.add(FT_P_REDKEJE_KOT_MESECNO);
r.add(FT_P_OBCASNO);
} else if(disjointTaxonomy.equals(FT_P_CASOPISNO)){
r.add(FT_P_DNEVNO);
r.add(FT_P_VECKRAT_TEDENSKO);
r.add(FT_P_CASOPISNO_TEDENSKO);
} else if(disjointTaxonomy.equals(FT_P_REVIALNO)) {
r.add(FT_P_TEDENSKO);
r.add(FT_P_STIRINAJSTDNEVNO);
r.add(FT_P_MESECNO);
r.add(FT_P_REDKEJE_KOT_MESECNO);
r.add(FT_P_OBCASNO);
} else if(disjointTaxonomy.equals(FT_P_NEOBJAVLJENO)){
r.add(FT_P_JAVNO);
r.add(FT_P_INTERNO);
r.add(FT_P_ZASEBNO);
} else if(disjointTaxonomy.equals(FT_ZVRST)){
r.add(FT_UMETNOSTNA);
r.add(FT_PESNISKA);
r.add(FT_PROZNA);
r.add(FT_DRAMSKA);
r.add(FT_NEUMETNOSTNA);
r.add(FT_STROKOVNA);
r.add(FT_HID);
r.add(FT_NIT);
r.add(FT_NESTROKOVNA);
r.add(FT_PRAVNA);
} else if(disjointTaxonomy.equals(FT_UMETNOSTNA)){
r.add(FT_PESNISKA);
r.add(FT_PROZNA);
r.add(FT_DRAMSKA);
} else if(disjointTaxonomy.equals(FT_NEUMETNOSTNA)){
r.add(FT_STROKOVNA);
r.add(FT_HID);
r.add(FT_NIT);
r.add(FT_NESTROKOVNA);
r.add(FT_PRAVNA);
} else if(disjointTaxonomy.equals(FT_STROKOVNA)){
r.add(FT_HID);
r.add(FT_NIT);
} else if(disjointTaxonomy.equals(FT_LEKTORIRANO)){
r.add(FT_DA);
r.add(FT_NE);
}
return r;
}
public static ArrayList<Taxonomy> taxonomyDeselected(Taxonomy disjointTaxonomy){
ArrayList<Taxonomy> r = new ArrayList<>();
Map<Taxonomy, Taxonomy> connections = new ConcurrentHashMap<>();
connections.put(DISKURZ_JAVNI, DISKURZ);
connections.put(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI, DISKURZ_JAVNI);
connections.put(DISKURZ_RAZVEDRILNI, DISKURZ_JAVNI);
connections.put(DISKURZ_NEJAVNI, DISKURZ);
connections.put(DISKURZ_NEZASEBNI, DISKURZ_NEJAVNI);
connections.put(DISKURZ_ZASEBNI, DISKURZ_NEJAVNI);
connections.put(SITUACIJA_RADIO, SITUACIJA);
connections.put(SITUACIJA_TELEVIZIJA, SITUACIJA);
connections.put(KANAL_OSEBNI_STIK, KANAL);
connections.put(KANAL_TELEFON, KANAL);
connections.put(KANAL_RADIO, KANAL);
connections.put(KANAL_TELEVIZIJA, KANAL);
connections.put(SSJ_KNJIZNO, SSJ_TISK);
connections.put(SSJ_LEPOSLOVNO, SSJ_KNJIZNO);
connections.put(SSJ_STROKOVNO, SSJ_KNJIZNO);
connections.put(SSJ_DRUGO, SSJ_TISK);
connections.put(FT_P_GOVORNI, FT_P_PRENOSNIK);
connections.put(FT_P_ELEKTRONSKI, FT_P_PRENOSNIK);
connections.put(FT_P_PISNI, FT_P_PRENOSNIK);
connections.put(FT_P_OBJAVLJENO, FT_P_PISNI);
connections.put(FT_P_KNJIZNO, FT_P_OBJAVLJENO);
connections.put(FT_P_PERIODICNO, FT_P_OBJAVLJENO);
connections.put(FT_P_CASOPISNO, FT_P_OBJAVLJENO);
connections.put(FT_P_DNEVNO, FT_P_CASOPISNO);
connections.put(FT_P_VECKRAT_TEDENSKO, FT_P_CASOPISNO);
connections.put(FT_P_CASOPISNO_TEDENSKO, FT_P_CASOPISNO);
connections.put(FT_P_REVIALNO, FT_P_PERIODICNO);
connections.put(FT_P_TEDENSKO, FT_P_REVIALNO);
connections.put(FT_P_STIRINAJSTDNEVNO, FT_P_REVIALNO);
connections.put(FT_P_MESECNO, FT_P_REVIALNO);
connections.put(FT_P_REDKEJE_KOT_MESECNO, FT_P_REVIALNO);
connections.put(FT_P_OBCASNO, FT_P_REVIALNO);
connections.put(FT_P_NEOBJAVLJENO, FT_P_PISNI);
connections.put(FT_P_JAVNO, FT_P_NEOBJAVLJENO);
connections.put(FT_P_INTERNO, FT_P_NEOBJAVLJENO);
connections.put(FT_P_ZASEBNO, FT_P_NEOBJAVLJENO);
connections.put(FT_UMETNOSTNA, FT_ZVRST);
connections.put(FT_PESNISKA, FT_UMETNOSTNA);
connections.put(FT_PROZNA, FT_UMETNOSTNA);
connections.put(FT_DRAMSKA, FT_UMETNOSTNA);
connections.put(FT_NEUMETNOSTNA, FT_ZVRST);
connections.put(FT_STROKOVNA, FT_NEUMETNOSTNA);
connections.put(FT_HID, FT_STROKOVNA);
connections.put(FT_NIT, FT_STROKOVNA);
connections.put(FT_NESTROKOVNA, FT_NEUMETNOSTNA);
connections.put(FT_PRAVNA, FT_NEUMETNOSTNA);
connections.put(FT_DA, FT_LEKTORIRANO);
connections.put(FT_NE, FT_LEKTORIRANO);
Taxonomy currentTaxonomy = disjointTaxonomy;
r.add(currentTaxonomy);
while(connections.containsKey(currentTaxonomy)){
currentTaxonomy = connections.get(currentTaxonomy);
r.add(currentTaxonomy);
}
Collections.reverse(r);
return r;
}
public static ArrayList<Taxonomy> convertStringListToTaxonomyList(ObservableList<String> stringList){
ArrayList<Taxonomy> taxonomyList = new ArrayList<>();
// System.out.println("INTERESTING STUFF");
// System.out.println(stringList);
for (String e : stringList) {
taxonomyList.add(factoryLongName(e));
}
// System.out.println(taxonomyList);
// System.out.println("-----------------");
return taxonomyList;
}
public static void modifyingTaxonomy(ArrayList<Taxonomy> taxonomy, ArrayList<Taxonomy> checkedItemsTaxonomy, Corpus corpus){
// get taxonomies that were selected/deselected by user
Set<Taxonomy> disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy);
if (taxonomy != null) {
disjointTaxonomies.addAll(taxonomy);
for (Taxonomy s : checkedItemsTaxonomy) {
if (taxonomy.contains(s)) {
disjointTaxonomies.remove(s);
} }
} }
}
// remove previously selected items plus remove taxonomies that are not presented in current setup
ArrayList<Taxonomy> disArr = new ArrayList<>(disjointTaxonomies);
int i = 0;
while(i < disArr.size()){
Taxonomy s = disArr.get(i);
if(!Taxonomy.convertStringListToTaxonomyList(corpus.getTaxonomy()).contains(s)){
disjointTaxonomies.remove(s);
disArr.remove(s);
// taxonomy.remove(s);
i--;
}
i++;
}
if (disjointTaxonomies.size() > 0) {
Taxonomy disjointTaxonomy = disjointTaxonomies.iterator().next();
// taxonomy was selected
if (checkedItemsTaxonomy.contains(disjointTaxonomy)) {
ArrayList<Taxonomy> addTaxonomies = Taxonomy.taxonomySelected(disjointTaxonomy);
checkedItemsTaxonomy.addAll(addTaxonomies);
} else if (taxonomy.contains(disjointTaxonomy)) {
ArrayList<Taxonomy> removeTaxonomies = Taxonomy.taxonomyDeselected(disjointTaxonomy);
checkedItemsTaxonomy.removeAll(removeTaxonomies);
}
}
}
}

View File

@ -45,7 +45,7 @@ public class CharacterAnalysisTab {
@FXML @FXML
private CheckComboBox<String> taxonomyCCB; private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy; private ArrayList<Taxonomy> taxonomy;
@FXML @FXML
private CheckBox displayTaxonomyChB; private CheckBox displayTaxonomyChB;
@ -183,11 +183,33 @@ public class CharacterAnalysisTab {
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> { taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
taxonomy = new ArrayList<>(); boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems); ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
}); });
taxonomyCCB.getCheckModel().clearChecks(); taxonomyCCB.getCheckModel().clearChecks();
} else { } else {
@ -313,75 +335,75 @@ public class CharacterAnalysisTab {
* iscvv: false * iscvv: false
* string length: 1 * string length: 1
*/ */
public void populateFields() { // public void populateFields() {
// corpus changed if: current one is null (this is first run of the app) // // corpus changed if: current one is null (this is first run of the app)
// or if currentCorpus != gui's corpus // // or if currentCorpus != gui's corpus
boolean corpusChanged = currentCorpusType == null // boolean corpusChanged = currentCorpusType == null
|| currentCorpusType != corpus.getCorpusType(); // || currentCorpusType != corpus.getCorpusType();
//
// TODO: check for GOS, GIGAFIDA, SOLAR... // // TODO: check for GOS, GIGAFIDA, SOLAR...
// refresh and: // // refresh and:
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset // // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
// if (calculateFor == null) { //// if (calculateFor == null) {
// calculateForRB.selectToggle(lemmaRB); //// calculateForRB.selectToggle(lemmaRB);
// calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString()); //// calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString());
//// }
//
// if (!filter.hasMsd()) {
// // if current corpus doesn't have msd data, disable this field
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(true);
// logger.info("no msd data");
// } else {
// if (ValidationUtil.isEmpty(msd)
// || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// // msd has not been set previously
// // or msd has been set but the corpus changed -> reset
// msd = new ArrayList<>();
// msdTF.setText("");
// msdTF.setDisable(false);
// logger.info("msd reset");
// } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// // if msd has been set, but corpus type remained the same, we can keep any set msd value
// msdTF.setText(StringUtils.join(msdStrings, " "));
// msdTF.setDisable(false);
// logger.info("msd kept");
// }
// }
//
// // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
//
// // keep calculateCvv
// calculatecvvCB.setSelected(calculateCvv);
//
// // keep string length if set
// if (stringLength != null) {
// stringLengthTF.setText(String.valueOf(stringLength));
// } else {
// stringLengthTF.setText("1");
// stringLength = 1;
// }
//
// // TODO: trigger on rescan
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// // user changed corpus (by type) or by selection & triggered a rescan of headers
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
//
// currentCorpusType = corpus.getCorpusType();
// // setTaxonomyIsDirty(false);
// } else {
//
// }
//
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
//
// } // }
if (!filter.hasMsd()) {
// if current corpus doesn't have msd data, disable this field
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(true);
logger.info("no msd data");
} else {
if (ValidationUtil.isEmpty(msd)
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// msd has not been set previously
// or msd has been set but the corpus changed -> reset
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(false);
logger.info("msd reset");
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// if msd has been set, but corpus type remained the same, we can keep any set msd value
msdTF.setText(StringUtils.join(msdStrings, " "));
msdTF.setDisable(false);
logger.info("msd kept");
}
}
// TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
// keep calculateCvv
calculatecvvCB.setSelected(calculateCvv);
// keep string length if set
if (stringLength != null) {
stringLengthTF.setText(String.valueOf(stringLength));
} else {
stringLengthTF.setText("1");
stringLength = 1;
}
// TODO: trigger on rescan
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// user changed corpus (by type) or by selection & triggered a rescan of headers
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
currentCorpusType = corpus.getCorpusType();
// setTaxonomyIsDirty(false);
} else {
}
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
}
/** /**
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc., * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
@ -434,7 +456,7 @@ public class CharacterAnalysisTab {
filter.setMultipleKeys(new ArrayList<>()); filter.setMultipleKeys(new ArrayList<>());
filter.setMsd(msd); filter.setMsd(msd);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); filter.setTaxonomy(taxonomy);
filter.setDisplayTaxonomy(displayTaxonomy); filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0); filter.setSkipValue(0);

View File

@ -49,7 +49,7 @@ public class OneWordAnalysisTab {
@FXML @FXML
private CheckComboBox<String> taxonomyCCB; private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy; private ArrayList<Taxonomy> taxonomy;
@FXML @FXML
private CheckBox displayTaxonomyChB; private CheckBox displayTaxonomyChB;
@ -222,11 +222,33 @@ public class OneWordAnalysisTab {
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> { taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
taxonomy = new ArrayList<>(); boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems); ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
}); });
taxonomyCCB.getCheckModel().clearChecks(); taxonomyCCB.getCheckModel().clearChecks();
} else { } else {
@ -333,63 +355,63 @@ public class OneWordAnalysisTab {
* iscvv: false * iscvv: false
* string length: 1 * string length: 1
*/ */
public void populateFields() { // public void populateFields() {
// corpus changed if: current one is null (this is first run of the app) // // corpus changed if: current one is null (this is first run of the app)
// or if currentCorpus != gui's corpus // // or if currentCorpus != gui's corpus
boolean corpusChanged = currentCorpusType == null // boolean corpusChanged = currentCorpusType == null
|| currentCorpusType != corpus.getCorpusType(); // || currentCorpusType != corpus.getCorpusType();
//
//
// TODO: check for GOS, GIGAFIDA, SOLAR... // // TODO: check for GOS, GIGAFIDA, SOLAR...
// refresh and: // // refresh and:
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset // // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
if (calculateFor == null) { // if (calculateFor == null) {
calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); // calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); // calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
} // }
//
if (!filter.hasMsd()) { // if (!filter.hasMsd()) {
// if current corpus doesn't have msd data, disable this field // // if current corpus doesn't have msd data, disable this field
msd = new ArrayList<>(); // msd = new ArrayList<>();
msdTF.setText(""); // msdTF.setText("");
msdTF.setDisable(true); // msdTF.setDisable(true);
logger.info("no msd data"); // logger.info("no msd data");
} else { // } else {
if (ValidationUtil.isEmpty(msd) // if (ValidationUtil.isEmpty(msd)
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) { // || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// msd has not been set previously // // msd has not been set previously
// or msd has been set but the corpus changed -> reset // // or msd has been set but the corpus changed -> reset
msd = new ArrayList<>(); // msd = new ArrayList<>();
msdTF.setText(""); // msdTF.setText("");
msdTF.setDisable(false); // msdTF.setDisable(false);
logger.info("msd reset"); // logger.info("msd reset");
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { // } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// if msd has been set, but corpus type remained the same, we can keep any set msd value // // if msd has been set, but corpus type remained the same, we can keep any set msd value
msdTF.setText(StringUtils.join(msdStrings, " ")); // msdTF.setText(StringUtils.join(msdStrings, " "));
msdTF.setDisable(false); // msdTF.setDisable(false);
logger.info("msd kept"); // logger.info("msd kept");
} // }
} // }
//
// TODO: trigger on rescan // // TODO: trigger on rescan
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { // if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// user changed corpus (by type) or by selection & triggered a rescan of headers // // user changed corpus (by type) or by selection & triggered a rescan of headers
// see if we read taxonomy from headers, otherwise use default values for given corpus // // see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy(); // ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); // taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
//
currentCorpusType = corpus.getCorpusType(); // currentCorpusType = corpus.getCorpusType();
// setTaxonomyIsDirty(false); // // setTaxonomyIsDirty(false);
} else { // } else {
//
} // }
//
// see if we read taxonomy from headers, otherwise use default values for given corpus // // see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy(); // ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); // taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
taxonomyCCB.getItems().addAll(taxonomyCCBValues); // taxonomyCCB.getItems().addAll(taxonomyCCBValues);
//
} // }
/** /**
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc., * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
@ -435,7 +457,7 @@ public class OneWordAnalysisTab {
Filter filter = new Filter(); Filter filter = new Filter();
filter.setNgramValue(1); filter.setNgramValue(1);
filter.setCalculateFor(calculateFor); filter.setCalculateFor(calculateFor);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); filter.setTaxonomy(taxonomy);
filter.setDisplayTaxonomy(displayTaxonomy); filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0); filter.setSkipValue(0);

View File

@ -52,7 +52,7 @@ public class StringAnalysisTabNew2 {
@FXML @FXML
private CheckComboBox<String> taxonomyCCB; private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy; private ArrayList<Taxonomy> taxonomy;
@FXML @FXML
private CheckBox calculatecvvCB; private CheckBox calculatecvvCB;
@ -308,11 +308,33 @@ public class StringAnalysisTabNew2 {
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> { taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
taxonomy = new ArrayList<>(); boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems); ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
}); });
taxonomyCCB.getCheckModel().clearChecks(); taxonomyCCB.getCheckModel().clearChecks();
} else { } else {
@ -563,7 +585,7 @@ public class StringAnalysisTabNew2 {
Filter filter = new Filter(); Filter filter = new Filter();
filter.setNgramValue(ngramValue); filter.setNgramValue(ngramValue);
filter.setCalculateFor(calculateFor); filter.setCalculateFor(calculateFor);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); filter.setTaxonomy(taxonomy);
filter.setDisplayTaxonomy(displayTaxonomy); filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(skipValue); filter.setSkipValue(skipValue);

View File

@ -38,7 +38,7 @@ public class WordFormationTab {
@FXML @FXML
private CheckComboBox<String> taxonomyCCB; private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy; private ArrayList<Taxonomy> taxonomy;
@FXML @FXML
private TextField minimalOccurrencesTF; private TextField minimalOccurrencesTF;
@ -77,7 +77,8 @@ public class WordFormationTab {
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> { taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>(); taxonomy = new ArrayList<>();
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems); ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
taxonomy.addAll(checkedItemsTaxonomy);
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}); });
taxonomyCCB.getCheckModel().clearChecks(); taxonomyCCB.getCheckModel().clearChecks();
@ -140,7 +141,7 @@ public class WordFormationTab {
Filter filter = new Filter(); Filter filter = new Filter();
filter.setNgramValue(1); filter.setNgramValue(1);
filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY); filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); filter.setTaxonomy(taxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0); filter.setSkipValue(0);
filter.setMsd(new ArrayList<>()); filter.setMsd(new ArrayList<>());

View File

@ -47,7 +47,7 @@ public class WordLevelTab {
@FXML @FXML
private CheckComboBox<String> taxonomyCCB; private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy; private ArrayList<Taxonomy> taxonomy;
@FXML @FXML
private CheckBox displayTaxonomyChB; private CheckBox displayTaxonomyChB;
@ -345,11 +345,33 @@ public class WordLevelTab {
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll(); taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy()); taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> { taxonomyCCB.getCheckModel().getCheckedItems().addListener(new ListChangeListener<String>() {
taxonomy = new ArrayList<>(); boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems); ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
}); });
taxonomyCCB.getCheckModel().clearChecks(); taxonomyCCB.getCheckModel().clearChecks();
} else { } else {
@ -458,63 +480,63 @@ public class WordLevelTab {
* iscvv: false * iscvv: false
* string length: 1 * string length: 1
*/ */
public void populateFields() { // public void populateFields() {
// corpus changed if: current one is null (this is first run of the app) // // corpus changed if: current one is null (this is first run of the app)
// or if currentCorpus != gui's corpus // // or if currentCorpus != gui's corpus
boolean corpusChanged = currentCorpusType == null // boolean corpusChanged = currentCorpusType == null
|| currentCorpusType != corpus.getCorpusType(); // || currentCorpusType != corpus.getCorpusType();
//
//
// TODO: check for GOS, GIGAFIDA, SOLAR... // // TODO: check for GOS, GIGAFIDA, SOLAR...
// refresh and: // // refresh and:
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset // // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
if (calculateFor == null) { // if (calculateFor == null) {
calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0)); // calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0)); // calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
} // }
//
if (!filter.hasMsd()) { // if (!filter.hasMsd()) {
// if current corpus doesn't have msd data, disable this field // // if current corpus doesn't have msd data, disable this field
msd = new ArrayList<>(); // msd = new ArrayList<>();
msdTF.setText(""); // msdTF.setText("");
msdTF.setDisable(true); // msdTF.setDisable(true);
logger.info("no msd data"); // logger.info("no msd data");
} else { // } else {
if (ValidationUtil.isEmpty(msd) // if (ValidationUtil.isEmpty(msd)
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) { // || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// msd has not been set previously // // msd has not been set previously
// or msd has been set but the corpus changed -> reset // // or msd has been set but the corpus changed -> reset
msd = new ArrayList<>(); // msd = new ArrayList<>();
msdTF.setText(""); // msdTF.setText("");
msdTF.setDisable(false); // msdTF.setDisable(false);
logger.info("msd reset"); // logger.info("msd reset");
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) { // } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// if msd has been set, but corpus type remained the same, we can keep any set msd value // // if msd has been set, but corpus type remained the same, we can keep any set msd value
msdTF.setText(StringUtils.join(msdStrings, " ")); // msdTF.setText(StringUtils.join(msdStrings, " "));
msdTF.setDisable(false); // msdTF.setDisable(false);
logger.info("msd kept"); // logger.info("msd kept");
} // }
} // }
//
// TODO: trigger on rescan // // TODO: trigger on rescan
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) { // if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// user changed corpus (by type) or by selection & triggered a rescan of headers // // user changed corpus (by type) or by selection & triggered a rescan of headers
// see if we read taxonomy from headers, otherwise use default values for given corpus // // see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy(); // ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); // taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
//
currentCorpusType = corpus.getCorpusType(); // currentCorpusType = corpus.getCorpusType();
// setTaxonomyIsDirty(false); // // setTaxonomyIsDirty(false);
} else { // } else {
//
} // }
//
// see if we read taxonomy from headers, otherwise use default values for given corpus // // see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy(); // ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType()); // taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
taxonomyCCB.getItems().addAll(taxonomyCCBValues); // taxonomyCCB.getItems().addAll(taxonomyCCBValues);
//
} // }
/** /**
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc., * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
@ -560,7 +582,7 @@ public class WordLevelTab {
Filter filter = new Filter(); Filter filter = new Filter();
filter.setNgramValue(1); filter.setNgramValue(1);
filter.setCalculateFor(calculateFor); filter.setCalculateFor(calculateFor);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType())); filter.setTaxonomy(taxonomy);
filter.setDisplayTaxonomy(displayTaxonomy); filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL); filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0); filter.setSkipValue(0);

View File

@ -60,7 +60,7 @@ public class Export {
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock, public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
StatisticsNew statistics, Filter filter) { StatisticsNew statistics, Filter filter) {
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult(); Map<Taxonomy, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult();
//Delimiter used in CSV file //Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n"; String NEW_LINE_SEPARATOR = "\n";
@ -85,7 +85,7 @@ public class Export {
// num_taxonomy_frequencies.put(taxonomyKey, val); // num_taxonomy_frequencies.put(taxonomyKey, val);
// } // }
// } // }
Map<String, AtomicLong> num_taxonomy_frequencies = statistics.getUniGramOccurrences(); Map<Taxonomy, AtomicLong> num_taxonomy_frequencies = statistics.getUniGramOccurrences();
//CSV file header //CSV file header
@ -106,7 +106,7 @@ public class Export {
} }
} }
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(statistics.getUniGramOccurrences().get("Total").longValue())); headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(statistics.getUniGramOccurrences().get(Taxonomy.TOTAL).longValue()));
// headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies)); // headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
for (CalculateFor otherKey : filter.getMultipleKeys()) { for (CalculateFor otherKey : filter.getMultipleKeys()) {
@ -127,11 +127,11 @@ public class Export {
} }
} }
for (String key : taxonomyResults.keySet()) { for (Taxonomy key : taxonomyResults.keySet()) {
if(!key.equals("Total") && num_taxonomy_frequencies.get(key).longValue() > 0) { if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]"); FILE_HEADER_AL.add("Absolutna pogostost [" + key.toString() + "]");
FILE_HEADER_AL.add("Delež [" + key + "]"); FILE_HEADER_AL.add("Delež [" + key.toString() + "]");
FILE_HEADER_AL.add("Relativna pogostost [" + key + "]"); FILE_HEADER_AL.add("Relativna pogostost [" + key.toString() + "]");
} }
} }
@ -270,8 +270,8 @@ public class Export {
dataEntry.add(e.getValue().toString()); dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies)); dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies)); dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
for (String key : taxonomyResults.keySet()){ for (Taxonomy key : taxonomyResults.keySet()){
if(!key.equals("Total") && num_taxonomy_frequencies.get(key).longValue() > 0) { if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey()); AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
dataEntry.add(frequency.toString()); dataEntry.add(frequency.toString());
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key).longValue())); dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key).longValue()));

View File

@ -5,6 +5,7 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import data.Taxonomy;
import org.rocksdb.RocksDB; import org.rocksdb.RocksDB;
import util.db.RDB; import util.db.RDB;