Added hevristics (time predictions for calculations)

This commit is contained in:
Luka 2019-01-07 12:55:11 +01:00
parent bb9f3f0fb9
commit 5af79e9670
19 changed files with 2232 additions and 1247 deletions

File diff suppressed because it is too large Load Diff

View File

@ -74,7 +74,7 @@
// // public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
// // for (Sentence s : corpus) {
// // // disregard if wrong taxonomy
// // if (!(s.getTaxonomy().startsWith(taxonomy))) {
// // if (!(s.getObservableListTaxonomy().startsWith(taxonomy))) {
// // continue;
// // }
// //
@ -122,7 +122,7 @@
// static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
// for (Sentence s : corpus) {
// // disregard if wrong taxonomy
//// if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
//// if (taxonomy != null && !(s.getObservableListTaxonomy().startsWith(taxonomy))) {
//// continue;
//// }
//

View File

@ -432,7 +432,7 @@ public class Ngrams {
// String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
// key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
// stats.updateTaxonomyResults(new MultipleHMKeys1(key),
// stats.getCorpus().getTaxonomy());
// stats.getCorpus().getObservableListTaxonomy());
ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();

View File

@ -91,7 +91,7 @@ import data.Word;
// private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
// for (Sentence s : corpus) {
// if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
// if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
// List<String> sentence = new ArrayList<>(s.getWords().size());
// List<Word> filteredWords = new ArrayList<>();
//
@ -122,7 +122,7 @@ import data.Word;
// private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
// for (Sentence s : corpus) {
// if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
// if (s.getObservableListTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
// List<String> sentence = new ArrayList<>(s.getWords().size());
//
// if (stats.getCf() == CalculateFor.LEMMA) {

View File

@ -27,7 +27,8 @@ public class Corpus {
private File chosenCorpusLocation;
private Collection<File> detectedCorpusFiles;
boolean headerRead;
private ObservableList<String> taxonomy; // if gigafida or gos
private ArrayList<Taxonomy> taxonomy; // if gigafida or gos
private Taxonomy taxonomyTotal;
private HashMap<String, ObservableList<String>> solarFilters; // if solar
private HashMap<String, HashSet<String>> solarFiltersForXML; // if solar - used while parsing xml
private boolean gosOrthMode;
@ -36,6 +37,7 @@ public class Corpus {
public Corpus() {
validationErrors = new ArrayList<>();
setTotal();
}
public CorpusType getCorpusType() {
@ -82,9 +84,25 @@ public class Corpus {
this.headerRead = headerRead;
}
public ObservableList<String> getTaxonomy() {
public Taxonomy getTotal() {
return taxonomyTotal;
}
public void setTotal() {
taxonomyTotal = new Taxonomy("Total", false);
}
public ArrayList<Taxonomy> getTaxonomy() {
return taxonomy;
}
public ObservableList<String> getObservableListTaxonomy() {
ArrayList<String> al = new ArrayList<>();
for (Taxonomy t : this.taxonomy){
al.add(t.toLongNameString());
}
return FXCollections.observableArrayList(al);
}
//
// public ObservableList<String> getFormattedTaxonomy() {
// ArrayList<String> al = Tax.getTaxonomyFormatted(new ArrayList<>(taxonomy), corpusType);
@ -92,7 +110,10 @@ public class Corpus {
// }
public void setTaxonomy(ObservableList<String> taxonomy) {
this.taxonomy = taxonomy;
this.taxonomy = new ArrayList<>();
for(String t : taxonomy){
this.taxonomy.add(new Taxonomy(t, true));
}
logger.info("Corpus.set: ", taxonomy);
}
@ -151,7 +172,8 @@ public class Corpus {
if (!headerRead && corpusType != null) {
// if user didn't opt into reading the headers, set default taxonomy or solar filters
if (Tax.getCorpusTypesWithTaxonomy().contains(corpusType)) {
taxonomy = Tax.getTaxonomyForComboBox(corpusType);
Tax.getTaxonomyForComboBox(corpusType);
setTaxonomy(Tax.getTaxonomyForComboBox(corpusType));
} else if (corpusType == CorpusType.SOLAR && solarFilters == null) {
setSolarFilters(SolarFilters.getFiltersForComboBoxes());
}

View File

@ -2,6 +2,7 @@ package data;
public enum CorpusType {
GIGAFIDA("Gigafida", "gigafida"),
GIGAFIDA2("Gigafida2.0", "gigafida2.0"),
CCKRES("ccKres ", "cckres"),
SOLAR("Šolar", "šolar"),
GOS("GOS", "gos"),

View File

@ -10,7 +10,6 @@ import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import gui.I18N;
import org.apache.commons.lang3.StringUtils;
@ -51,17 +50,17 @@ public class StatisticsNew {
this.corpus = corpus;
this.filter = filter;
this.taxonomyResult = new ConcurrentHashMap<>();
this.taxonomyResult.put(Taxonomy.TOTAL, new ConcurrentHashMap<>());
this.taxonomyResult.put(corpus.getTotal(), new ConcurrentHashMap<>());
this.collocability = new ConcurrentHashMap<>();
this.uniGramTaxonomyOccurrences = new ConcurrentHashMap<>();
this.uniGramTaxonomyOccurrences.put(Taxonomy.TOTAL, new AtomicLong(0L));
this.uniGramTaxonomyOccurrences.put(corpus.getTotal(), new AtomicLong(0L));
// create table for counting word occurrences per taxonomies
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
if (this.corpus.getObservableListTaxonomy() != null && filter.getDisplayTaxonomy()) {
if (this.filter.getTaxonomy().isEmpty()) {
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
this.taxonomyResult.put(Taxonomy.factoryLongName(this.corpus.getTaxonomy().get(i)), new ConcurrentHashMap<>());
for (int i = 0; i < this.corpus.getObservableListTaxonomy().size(); i++) {
this.taxonomyResult.put(Taxonomy.factoryLongName(this.corpus.getObservableListTaxonomy().get(i), corpus), new ConcurrentHashMap<>());
}
} else {
for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
@ -234,14 +233,14 @@ public class StatisticsNew {
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
// if no results and nothing to save, return false
if (!(taxonomyResult.get(Taxonomy.TOTAL).size() > 0)) {
if (!(taxonomyResult.get(corpus.getTotal()).size() > 0)) {
analysisProducedResults = false;
return false;
} else {
analysisProducedResults = true;
}
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get(Taxonomy.TOTAL), Util.getValidInt(limit))));
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get(corpus.getTotal()), Util.getValidInt(limit))));
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
return true;
}
@ -253,14 +252,14 @@ public class StatisticsNew {
if (minimalTaxonomy == 1)
return;
int occurances;
for (MultipleHMKeys key : taxonomyResult.get(Taxonomy.TOTAL).keySet()){
for (MultipleHMKeys key : taxonomyResult.get(corpus.getTotal()).keySet()){
occurances = 0;
for (Taxonomy columnNameKey : taxonomyResult.keySet()){
if(!columnNameKey.equals(Taxonomy.TOTAL) && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1)
if(!columnNameKey.equals(corpus.getTotal()) && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1)
occurances++;
}
if(occurances < minimalTaxonomy){
taxonomyResult.get(Taxonomy.TOTAL).remove(key);
taxonomyResult.get(corpus.getTotal()).remove(key);
}
}
}
@ -271,8 +270,8 @@ public class StatisticsNew {
private void removeMinimalOccurrences(Integer minimalOccurrences) {
if (minimalOccurrences == 0)
return;
for (MultipleHMKeys key : taxonomyResult.get(Taxonomy.TOTAL).keySet()){
if(taxonomyResult.get(Taxonomy.TOTAL).get(key).intValue() < minimalOccurrences){
for (MultipleHMKeys key : taxonomyResult.get(corpus.getTotal()).keySet()){
if(taxonomyResult.get(corpus.getTotal()).get(key).intValue() < minimalOccurrences){
for (Taxonomy t : taxonomyResult.keySet()){
taxonomyResult.get(t).remove(key);
}
@ -349,7 +348,7 @@ public class StatisticsNew {
}
public void updateUniGramOccurrences(int amount, ArrayList<Taxonomy> taxonomy){
uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).set(uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).longValue() + amount);
uniGramTaxonomyOccurrences.get(corpus.getTotal()).set(uniGramTaxonomyOccurrences.get(corpus.getTotal()).longValue() + amount);
for (Taxonomy t : taxonomy){
if (uniGramTaxonomyOccurrences.get(t) != null){
uniGramTaxonomyOccurrences.get(t).set(uniGramTaxonomyOccurrences.get(t).longValue() + amount);
@ -360,15 +359,15 @@ public class StatisticsNew {
}
public Map<Taxonomy, AtomicLong> getUniGramOccurrences(){
// return uniGramTaxonomyOccurrences.get(Taxonomy.TOTAL).longValue();
// return uniGramTaxonomyOccurrences.get(corpus.getTotal()).longValue();
return uniGramTaxonomyOccurrences;
}
public void updateTaxonomyResults(MultipleHMKeys o, List<Taxonomy> taxonomy) {
for (Taxonomy key : taxonomyResult.keySet()) {
// first word should have the same taxonomy as others
if (key.equals(Taxonomy.TOTAL) || taxonomy.contains(key)) {
// if (key.equals(Taxonomy.TOTAL) || taxonomy != null && taxonomy.contains(key)) {
if (key.equals(corpus.getTotal()) || taxonomy.contains(key)) {
// if (key.equals(corpus.getTotal()) || taxonomy != null && taxonomy.contains(key)) {
// if taxonomy not in map and in this word
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));
@ -607,7 +606,7 @@ public class StatisticsNew {
// sortedTaxonomyString.add(t);
// }
// getTaxonomyForTaxonomyResult
tax = Tax.getTaxonomyForTaxonomyResult(corpus.getCorpusType(), taxonomyResult.keySet());
tax = Tax.getTaxonomyForTaxonomyResult(corpus, taxonomyResult.keySet());
}
// String sep = "";
@ -618,11 +617,11 @@ public class StatisticsNew {
}
// info.put(sep = sep + " ", s);
if (uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s)) == null) {
if (uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s, corpus)) == null) {
info.put(s, "");
continue;
}
int n = uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s)).intValue();
int n = uniGramTaxonomyOccurrences.get(Taxonomy.factoryLongName(s, corpus)).intValue();
if (n == 0) {
info.put(s, "");
} else {
@ -662,11 +661,11 @@ public class StatisticsNew {
// count number of all words
long N = 0;
for(AtomicLong a : oneWordTaxonomyResult.get(Taxonomy.TOTAL).values()){
for(AtomicLong a : oneWordTaxonomyResult.get(corpus.getTotal()).values()){
N += a.longValue();
}
for(MultipleHMKeys hmKey : taxonomyResult.get(Taxonomy.TOTAL).keySet()) {
for(MultipleHMKeys hmKey : taxonomyResult.get(corpus.getTotal()).keySet()) {
// String[] splitedString = hmKey.getK1().split("\\s+");
long sum_fwi =0L;
@ -674,15 +673,15 @@ public class StatisticsNew {
for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){
// System.out.println(smallHmKey.getK1());
sum_fwi += oneWordTaxonomyResult.get(Taxonomy.TOTAL).get(smallHmKey).longValue();
mul_fwi *= oneWordTaxonomyResult.get(Taxonomy.TOTAL).get(smallHmKey).longValue();
sum_fwi += oneWordTaxonomyResult.get(corpus.getTotal()).get(smallHmKey).longValue();
mul_fwi *= oneWordTaxonomyResult.get(corpus.getTotal()).get(smallHmKey).longValue();
}
// String t = hmKey.getK1();
// if(hmKey.getK1().equals("v Slovenija")){
// System.out.println("TEST");
//
// }
double O = (double)taxonomyResult.get(Taxonomy.TOTAL).get(hmKey).longValue();
double O = (double)taxonomyResult.get(corpus.getTotal()).get(hmKey).longValue();
double n = (double)filter.getNgramValue();
double E = (double)mul_fwi / Math.pow(N, n - 1);
if (collocabilityMap.keySet().contains(Collocability.DICE)){

View File

@ -10,7 +10,7 @@ import javafx.collections.ObservableList;
public class Tax {
private static LinkedHashMap<String, String> GIGAFIDA_TAXONOMY;
private static LinkedHashMap<String, String> GOS_TAXONOMY;
private static final HashSet<CorpusType> corpusTypesWithTaxonomy = new HashSet<>(Arrays.asList(CorpusType.GIGAFIDA, CorpusType.GOS, CorpusType.CCKRES, CorpusType.SSJ500K, CorpusType.VERT));
private static final HashSet<CorpusType> corpusTypesWithTaxonomy = new HashSet<>(Arrays.asList(CorpusType.GIGAFIDA, CorpusType.GOS, CorpusType.CCKRES, CorpusType.SSJ500K, CorpusType.GIGAFIDA2, CorpusType.VERT));
static {
// GIGAFIDA ----------------------------
@ -104,7 +104,7 @@ public class Tax {
public static ObservableList<String> getTaxonomyForComboBox(CorpusType corpusType, HashSet<String> foundTax) {
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K || corpusType == CorpusType.GIGAFIDA2) {
tax = GIGAFIDA_TAXONOMY;
} else if (corpusType == CorpusType.GOS) {
tax = GOS_TAXONOMY;
@ -143,13 +143,13 @@ public class Tax {
/**
* Returns taxonomy names only for items found in headers
*/
public static ArrayList<String> getTaxonomyForTaxonomyResult(CorpusType corpusType, Set<Taxonomy> foundTax) {
public static ArrayList<String> getTaxonomyForTaxonomyResult(Corpus corpus, Set<Taxonomy> foundTax) {
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
Set<Taxonomy> foundTaxHS= new HashSet<>(foundTax);
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
if (corpus.getCorpusType() == CorpusType.GIGAFIDA || corpus.getCorpusType() == CorpusType.CCKRES || corpus.getCorpusType() == CorpusType.SSJ500K || corpus.getCorpusType() == CorpusType.GIGAFIDA2) {
tax = GIGAFIDA_TAXONOMY;
} else if (corpusType == CorpusType.GOS) {
} else if (corpus.getCorpusType() == CorpusType.GOS) {
tax = GOS_TAXONOMY;
}
@ -161,7 +161,7 @@ public class Tax {
for(Taxonomy e : foundTaxHS){
String[] elList = e.toString().split("\\.");
for(int i = 1; i < elList.length - 1; i++){
Taxonomy candidate = Taxonomy.factory(String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i)));
Taxonomy candidate = Taxonomy.factory(String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i)), corpus);
genFoundTax.add(candidate);
}
}
@ -186,7 +186,7 @@ public class Tax {
// assures same relative order
for (String t : tax.keySet()) {
if (foundTaxHS.contains(Taxonomy.factory(t))) {
if (foundTaxHS.contains(Taxonomy.factory(t, corpus))) {
taxForCombo.add(tax.get(t));
}
}
@ -263,13 +263,19 @@ public class Tax {
public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<Taxonomy> taxonomy) {
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
ArrayList<String> result = new ArrayList<>();
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K || corpusType == CorpusType.GIGAFIDA2) {
tax = GIGAFIDA_TAXONOMY;
} else if (corpusType == CorpusType.GOS) {
tax = GOS_TAXONOMY;
} else if (corpusType == CorpusType.VERT) {
for (Taxonomy t : taxonomy) {
result.add(t.toLongNameString());
}
return result;
}
ArrayList<String> result = new ArrayList<>();
for (Taxonomy t : taxonomy) {
result.add(tax.get(t.toString()));

View File

@ -5,7 +5,7 @@ import java.util.concurrent.ConcurrentHashMap;
import javafx.collections.ObservableList;
public enum Taxonomy {
enum TaxonomyEnum {
TOTAL("Total", "Total"),
// GOS
@ -85,7 +85,7 @@ public enum Taxonomy {
private final String name;
private final String longName;
Taxonomy(String name, String longName) {
TaxonomyEnum(String name, String longName) {
this.name = name;
this.longName = longName;
}
@ -98,7 +98,7 @@ public enum Taxonomy {
return this.longName;
}
public static Taxonomy factory(String tax) {
public static TaxonomyEnum factory(String tax) {
if (tax != null) {
// GOS
if (DISKURZ.toString().equals(tax)) {
@ -289,7 +289,7 @@ public enum Taxonomy {
return null;
}
public static Taxonomy factoryLongName(String tax) {
public static TaxonomyEnum factoryLongName(String tax) {
if (tax != null) {
// GOS
if (DISKURZ.toLongNameString().equals(tax)) {
@ -477,11 +477,15 @@ public enum Taxonomy {
}
}
// return new Taxonomy(tax, tax);
System.out.println("2.");
System.out.println(tax);
return null;
}
public static ArrayList<Taxonomy> taxonomySelected(Taxonomy disjointTaxonomy) {
ArrayList<Taxonomy> r = new ArrayList<>();
public static ArrayList<TaxonomyEnum> taxonomySelected(TaxonomyEnum disjointTaxonomy) {
ArrayList<TaxonomyEnum> r = new ArrayList<>();
System.out.println(disjointTaxonomy);
if(disjointTaxonomy.equals(DISKURZ)){
@ -628,9 +632,9 @@ public enum Taxonomy {
return r;
}
public static ArrayList<Taxonomy> taxonomyDeselected(Taxonomy disjointTaxonomy){
ArrayList<Taxonomy> r = new ArrayList<>();
Map<Taxonomy, Taxonomy> connections = new ConcurrentHashMap<>();
public static ArrayList<TaxonomyEnum> taxonomyDeselected(TaxonomyEnum disjointTaxonomy){
ArrayList<TaxonomyEnum> r = new ArrayList<>();
Map<TaxonomyEnum, TaxonomyEnum> connections = new ConcurrentHashMap<>();
connections.put(DISKURZ_JAVNI, DISKURZ);
connections.put(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI, DISKURZ_JAVNI);
connections.put(DISKURZ_RAZVEDRILNI, DISKURZ_JAVNI);
@ -685,7 +689,7 @@ public enum Taxonomy {
connections.put(FT_DA, FT_LEKTORIRANO);
connections.put(FT_NE, FT_LEKTORIRANO);
Taxonomy currentTaxonomy = disjointTaxonomy;
TaxonomyEnum currentTaxonomy = disjointTaxonomy;
r.add(currentTaxonomy);
while(connections.containsKey(currentTaxonomy)){
currentTaxonomy = connections.get(currentTaxonomy);
@ -695,29 +699,36 @@ public enum Taxonomy {
return r;
}
public static ArrayList<Taxonomy> convertStringListToTaxonomyList(ObservableList<String> stringList){
public static ArrayList<TaxonomyEnum> convertStringListToTaxonomyList(ObservableList<String> stringList, Corpus corpus){
System.out.println("1.");
System.out.println(stringList);
ArrayList<Taxonomy> taxonomyList = new ArrayList<>();
ArrayList<TaxonomyEnum> taxonomyList = new ArrayList<>();
// System.out.println("INTERESTING STUFF");
// System.out.println(stringList);
for (String e : stringList) {
taxonomyList.add(factoryLongName(e));
for (Taxonomy t : corpus.getTaxonomy()){
if (t.toLongNameString().equals(e)) {
taxonomyList.add(t.getTaxonomyEnum());
}
}
}
// System.out.println(taxonomyList);
// System.out.println("-----------------");
return taxonomyList;
}
public static void modifyingTaxonomy(ArrayList<Taxonomy> taxonomy, ArrayList<Taxonomy> checkedItemsTaxonomy, Corpus corpus){
public static void modifyingTaxonomy(ArrayList<TaxonomyEnum> taxonomy, ArrayList<TaxonomyEnum> checkedItemsTaxonomy, Corpus corpus){
// get taxonomies that were selected/deselected by user
// System.out.println(taxonomy);
// System.out.println(checkedItemsTaxonomy);
System.out.println("Print here:");
System.out.println(taxonomy);
System.out.println(checkedItemsTaxonomy);
System.out.println("-------------");
Set<Taxonomy> disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy);
Set<TaxonomyEnum> disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy);
if (taxonomy != null) {
disjointTaxonomies.addAll(taxonomy);
for (Taxonomy s : checkedItemsTaxonomy) {
for (TaxonomyEnum s : checkedItemsTaxonomy) {
if (taxonomy.contains(s)) {
disjointTaxonomies.remove(s);
}
@ -725,11 +736,11 @@ public enum Taxonomy {
}
// remove previously selected items plus remove taxonomies that are not presented in current setup
ArrayList<Taxonomy> disArr = new ArrayList<>(disjointTaxonomies);
ArrayList<TaxonomyEnum> disArr = new ArrayList<>(disjointTaxonomies);
int i = 0;
while(i < disArr.size()){
Taxonomy s = disArr.get(i);
if(!Taxonomy.convertStringListToTaxonomyList(corpus.getTaxonomy()).contains(s)){
TaxonomyEnum s = disArr.get(i);
if(!TaxonomyEnum.convertStringListToTaxonomyList(corpus.getObservableListTaxonomy(), corpus).contains(s)){
disjointTaxonomies.remove(s);
disArr.remove(s);
// taxonomy.remove(s);
@ -740,14 +751,14 @@ public enum Taxonomy {
if (disjointTaxonomies.size() > 0) {
Taxonomy disjointTaxonomy = disjointTaxonomies.iterator().next();
TaxonomyEnum disjointTaxonomy = disjointTaxonomies.iterator().next();
// taxonomy was selected
if (checkedItemsTaxonomy.contains(disjointTaxonomy)) {
ArrayList<Taxonomy> addTaxonomies = Taxonomy.taxonomySelected(disjointTaxonomy);
ArrayList<TaxonomyEnum> addTaxonomies = TaxonomyEnum.taxonomySelected(disjointTaxonomy);
checkedItemsTaxonomy.addAll(addTaxonomies);
} else if (taxonomy.contains(disjointTaxonomy)) {
ArrayList<Taxonomy> removeTaxonomies = Taxonomy.taxonomyDeselected(disjointTaxonomy);
ArrayList<TaxonomyEnum> removeTaxonomies = TaxonomyEnum.taxonomyDeselected(disjointTaxonomy);
checkedItemsTaxonomy.removeAll(removeTaxonomies);
}
}
@ -755,3 +766,203 @@ public enum Taxonomy {
}
public class Taxonomy {
private String name;
private String longName;
private TaxonomyEnum taxonomyEnum;
public Taxonomy(String tax, boolean longName) {
if (!longName) {
this.taxonomyEnum = TaxonomyEnum.factory(tax);
} else {
this.taxonomyEnum = TaxonomyEnum.factoryLongName(tax);
}
if (taxonomyEnum != null){
this.name = this.taxonomyEnum.toString();
this.longName = this.taxonomyEnum.toLongNameString();
} else {
this.name = tax;
this.longName = tax;
}
}
public Taxonomy(TaxonomyEnum taxonomyEnum) {
this.taxonomyEnum = taxonomyEnum;
this.name = this.taxonomyEnum.toString();
this.longName = this.taxonomyEnum.toLongNameString();
}
// public Taxonomy(String name, String longName) {
// this.name = name;
// this.longName = longName;
// }
public String toString() {
return this.name;
}
public String toLongNameString() {
return this.longName;
}
public TaxonomyEnum getTaxonomyEnum() {
return this.taxonomyEnum;
}
public static Taxonomy factory(String tax, Corpus corpus) {
for (Taxonomy t : corpus.getTaxonomy()){
if(tax.equals(t.toString()))
return t;
}
return null;
// return new Taxonomy(tax, false);
}
public static Taxonomy factoryLongName(String tax, Corpus corpus) {
for (Taxonomy t : corpus.getTaxonomy()){
if(tax.equals(t.toLongNameString()))
return t;
}
return null;
// return new Taxonomy(tax, true);
}
// public static ArrayList<Taxonomy> taxonomySelected(Taxonomy disjointTaxonomy) {
// ArrayList<TaxonomyEnum> rTaxonomyEnum = TaxonomyEnum.taxonomySelected(disjointTaxonomy.getTaxonomyEnum());
//
// ArrayList<Taxonomy> r = new ArrayList<>();
//
// for(TaxonomyEnum t : rTaxonomyEnum){
// r.add(new Taxonomy(t.toString(), false));
// }
//
// return r;
// }
public static ArrayList<Taxonomy> taxonomyDeselected(Taxonomy disjointTaxonomy){
// ArrayList<TaxonomyEnum> r = new ArrayList<>();
// Map<TaxonomyEnum, TaxonomyEnum> connections = new ConcurrentHashMap<>();
// connections.put(DISKURZ_JAVNI, DISKURZ);
// connections.put(DISKURZ_INFORMATIVNO_IZOBRAZEVALNI, DISKURZ_JAVNI);
// connections.put(DISKURZ_RAZVEDRILNI, DISKURZ_JAVNI);
// connections.put(DISKURZ_NEJAVNI, DISKURZ);
// connections.put(DISKURZ_NEZASEBNI, DISKURZ_NEJAVNI);
// connections.put(DISKURZ_ZASEBNI, DISKURZ_NEJAVNI);
// connections.put(SITUACIJA_RADIO, SITUACIJA);
// connections.put(SITUACIJA_TELEVIZIJA, SITUACIJA);
// connections.put(KANAL_OSEBNI_STIK, KANAL);
// connections.put(KANAL_TELEFON, KANAL);
// connections.put(KANAL_RADIO, KANAL);
// connections.put(KANAL_TELEVIZIJA, KANAL);
//
// connections.put(SSJ_KNJIZNO, SSJ_TISK);
// connections.put(SSJ_LEPOSLOVNO, SSJ_KNJIZNO);
// connections.put(SSJ_STROKOVNO, SSJ_KNJIZNO);
// connections.put(SSJ_PERIODICNO, SSJ_TISK);
// connections.put(SSJ_CASOPIS, SSJ_PERIODICNO);
// connections.put(SSJ_REVIJA, SSJ_PERIODICNO);
// connections.put(SSJ_DRUGO, SSJ_TISK);
//
// connections.put(FT_P_GOVORNI, FT_P_PRENOSNIK);
// connections.put(FT_P_ELEKTRONSKI, FT_P_PRENOSNIK);
// connections.put(FT_P_PISNI, FT_P_PRENOSNIK);
// connections.put(FT_P_OBJAVLJENO, FT_P_PISNI);
// connections.put(FT_P_KNJIZNO, FT_P_OBJAVLJENO);
// connections.put(FT_P_PERIODICNO, FT_P_OBJAVLJENO);
// connections.put(FT_P_CASOPISNO, FT_P_OBJAVLJENO);
// connections.put(FT_P_DNEVNO, FT_P_CASOPISNO);
// connections.put(FT_P_VECKRAT_TEDENSKO, FT_P_CASOPISNO);
// connections.put(FT_P_CASOPISNO_TEDENSKO, FT_P_CASOPISNO);
// connections.put(FT_P_REVIALNO, FT_P_PERIODICNO);
// connections.put(FT_P_TEDENSKO, FT_P_REVIALNO);
// connections.put(FT_P_STIRINAJSTDNEVNO, FT_P_REVIALNO);
// connections.put(FT_P_MESECNO, FT_P_REVIALNO);
// connections.put(FT_P_REDKEJE_KOT_MESECNO, FT_P_REVIALNO);
// connections.put(FT_P_OBCASNO, FT_P_REVIALNO);
// connections.put(FT_P_NEOBJAVLJENO, FT_P_PISNI);
// connections.put(FT_P_JAVNO, FT_P_NEOBJAVLJENO);
// connections.put(FT_P_INTERNO, FT_P_NEOBJAVLJENO);
// connections.put(FT_P_ZASEBNO, FT_P_NEOBJAVLJENO);
// connections.put(FT_UMETNOSTNA, FT_ZVRST);
// connections.put(FT_PESNISKA, FT_UMETNOSTNA);
// connections.put(FT_PROZNA, FT_UMETNOSTNA);
// connections.put(FT_DRAMSKA, FT_UMETNOSTNA);
// connections.put(FT_NEUMETNOSTNA, FT_ZVRST);
// connections.put(FT_STROKOVNA, FT_NEUMETNOSTNA);
// connections.put(FT_HID, FT_STROKOVNA);
// connections.put(FT_NIT, FT_STROKOVNA);
// connections.put(FT_NESTROKOVNA, FT_NEUMETNOSTNA);
// connections.put(FT_PRAVNA, FT_NEUMETNOSTNA);
// connections.put(FT_DA, FT_LEKTORIRANO);
// connections.put(FT_NE, FT_LEKTORIRANO);
//
// TaxonomyEnum currentTaxonomy = disjointTaxonomy;
// r.add(currentTaxonomy);
// while(connections.containsKey(currentTaxonomy)){
// currentTaxonomy = connections.get(currentTaxonomy);
// r.add(currentTaxonomy);
// }
// Collections.reverse(r);
// return r;
return null;
}
public static ArrayList<Taxonomy> convertStringListToTaxonomyList(ObservableList<String> stringList, Corpus corpus){
ArrayList<Taxonomy> taxonomyList = new ArrayList<>();
for (String e : stringList) {
for (Taxonomy t : corpus.getTaxonomy()){
if (t.toLongNameString().equals(e)) {
taxonomyList.add(t);
}
}
}
return taxonomyList;
}
public static ArrayList<TaxonomyEnum> taxonomyToTaxonomyEnum(ArrayList<Taxonomy> taxonomy){
System.out.println(taxonomy);
if (taxonomy == null) {
return null;
}
ArrayList<TaxonomyEnum> r = new ArrayList<>();
for (Taxonomy t : taxonomy){
if (t.taxonomyEnum == null){
return null;
}
r.add(t.taxonomyEnum);
}
return r;
}
public static ArrayList<Taxonomy> taxonomyEnumToTaxonomy(ArrayList<TaxonomyEnum> taxonomy, Corpus corpus){
// ArrayList<Taxonomy> r = new ArrayList<>();
// for (TaxonomyEnum t : taxonomy){
// r.add(new Taxonomy(t));
// }
// return r;
ArrayList<Taxonomy> r = new ArrayList<>();
for (TaxonomyEnum te : taxonomy){
for (Taxonomy t : corpus.getTaxonomy()){
if (t.taxonomyEnum.equals(te)) {
r.add(t);
break;
}
}
}
return r;
}
public static ArrayList<Taxonomy> modifyingTaxonomy(ArrayList<Taxonomy> taxonomy, ObservableList<String> checkedItems, Corpus corpus){
ArrayList<TaxonomyEnum> checkedItemsTaxonomy = TaxonomyEnum.convertStringListToTaxonomyList(checkedItems, corpus);
if (checkedItemsTaxonomy != null && corpus.getCorpusType() != CorpusType.VERT) {
TaxonomyEnum.modifyingTaxonomy(Taxonomy.taxonomyToTaxonomyEnum(taxonomy), checkedItemsTaxonomy, corpus);
return taxonomyEnumToTaxonomy(checkedItemsTaxonomy, corpus);
} else {
return convertStringListToTaxonomyList(checkedItems, corpus);
}
}
}

View File

@ -1,8 +1,11 @@
package gui;
import alg.XML_processing;
import data.*;
import javafx.application.HostServices;
import javafx.beans.binding.Bindings;
import javafx.beans.InvalidationListener;
import javafx.beans.Observable;
import javafx.beans.property.ReadOnlyDoubleWrapper;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.collections.FXCollections;
@ -25,7 +28,6 @@ import java.util.regex.Pattern;
import static alg.XML_processing.readXML;
import static gui.GUIController.showAlert;
import static gui.Messages.*;
@SuppressWarnings("Duplicates")
public class CharacterAnalysisTab {
@ -160,6 +162,7 @@ public class CharacterAnalysisTab {
private boolean useDb;
private HostServices hostService;
private ListChangeListener<String> taxonomyListener;
private InvalidationListener progressBarListener;
private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY));
@ -241,53 +244,56 @@ public class CharacterAnalysisTab {
msd = new ArrayList<>();
// taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
if (taxonomyListener != null){
taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
}
taxonomyListener = new ListChangeListener<String>() {
boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
};
taxonomyCCB.getCheckModel().clearChecks();
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
taxonomyCCB.setDisable(false);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
} else {
taxonomyCCB.setDisable(true);
}
if (taxonomyListener != null){
taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
}
taxonomyListener = new ListChangeListener<String>() {
boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
};
taxonomyCCB.getCheckModel().clearChecks();
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
displayTaxonomy = false;
displayTaxonomyChB.setSelected(false);
// set
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
displayTaxonomyChB.setDisable(false);
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
displayTaxonomy = newValue;
@ -475,7 +481,7 @@ public class CharacterAnalysisTab {
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// // user changed corpus (by type) or by selection & triggered a rescan of headers
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
//
// currentCorpusType = corpus.getCorpusType();
@ -485,7 +491,7 @@ public class CharacterAnalysisTab {
// }
//
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
//
@ -548,7 +554,7 @@ public class CharacterAnalysisTab {
// if calculateFor was selected for something other than a word or a lemma -> reset
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
// if the user selected something else before selecting ngram for letters, reset that choice
calculateFor = CalculateFor.LEMMA;
calculateFor = CalculateFor.WORD;
calculateForCB.getSelectionModel().select(0);
}
@ -637,16 +643,66 @@ public class CharacterAnalysisTab {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
if(corpusFiles.size() > 1){
cancel.setVisible(true);
}
int i = 0;
// DateFormat df = new SimpleDateFormat("hh:mm:ss");
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
i++;
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
if (corpusFiles.size() > 1) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
} else {
if(progressBarListener != null) {
xml_processing.progressProperty().removeListener(progressBarListener);
}
progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
// updateProgress((iFinal * 100) + (double) observable, corpusFiles.size() * 100);
}
};
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
xml_processing.progressProperty().addListener(progressBarListener);
// xml_processing.progressProperty().addListener((obs, oldProgress, newProgress) ->
// updateProgress((iFinal * 100) + newProgress.doubleValue(), corpusFiles.size() * 100));
}
xml_processing.readXML(f.toString(), statistic);
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
// readXML(f.toString(), statistic, this, corpusFiles.size(), startTime, previousTime, i);
}
return null;
@ -703,8 +759,6 @@ public class CharacterAnalysisTab {
logger.info("cancel button");
});
cancel.setVisible(true);
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();

View File

@ -6,11 +6,13 @@ import static gui.Messages.*;
import static util.Util.*;
import java.io.File;
import java.io.IOException;
import java.util.*;
import javafx.scene.layout.AnchorPane;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOCase;
import org.apache.commons.io.LineIterator;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.logging.log4j.LogManager;
@ -205,9 +207,6 @@ public class CorpusTab {
// scan for xml files
Collection<File> corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("xml", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
corpusLocation = selectedDirectory.getAbsolutePath();
corpusFilesSize = String.valueOf(corpusFiles.size());
Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
// make sure there are corpus files in selected directory or notify the user about it
if (corpusFiles.size() == 0) {
@ -215,10 +214,20 @@ public class CorpusTab {
corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("vert", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
Collection<File> corpusFilesRegi = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("regi", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
// if (!checkRegiFile(corpusFilesRegi)){
// return;
// }
if (corpusFiles.size() == 0){
logger.info("alert: ", I18N.get("message.WARNING_CORPUS_NOT_FOUND"));
showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_CORPUS_NOT_FOUND"), null);
} else if (corpusFilesRegi.size() == 0){
GUIController.showAlert(Alert.AlertType.ERROR, String.format(I18N.get("message.ERROR_NO_REGI_FILE_FOUND"), selectedDirectory.getAbsolutePath()));
} else {
corpusLocation = selectedDirectory.getAbsolutePath();
corpusFilesSize = String.valueOf(corpusFiles.size());
Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
corpusType = VERT;
corpus.setCorpusType(corpusType);
@ -255,12 +264,17 @@ public class CorpusTab {
}
} else {
corpusLocation = selectedDirectory.getAbsolutePath();
corpusFilesSize = String.valueOf(corpusFiles.size());
Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType != null ? corpusType.toString() : null);
String chooseCorpusLabelContentTmp = detectCorpusType(corpusFiles);
if (chooseCorpusLabelContentTmp == null) {
logger.info("alert: ", I18N.get("message.WARNING_CORPUS_NOT_FOUND"));
showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_CORPUS_NOT_FOUND"), null);
} else {
initNewCorpus(selectedDirectory, corpusFiles);
Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType.toString());
@ -330,6 +344,28 @@ public class CorpusTab {
Messages.setChooseCorpusL(chooseCorpusL, chooseCorpusLabelContent);
}
private boolean checkRegiFile(Collection<File> corpusFiles) {
// CorpusType corpusType = corpus.getCorpusType();
// Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
for (File file : corpusFiles) {
// try to open .regi file
String regiPath = file.getAbsolutePath().substring(0, file.getAbsolutePath().length() - 4) + "regi";
LineIterator regiIt;
try {
// read regi file
regiIt = FileUtils.lineIterator(new File(regiPath), "UTF-8");
LineIterator.closeQuietly(regiIt);
} catch (IOException e) {
GUIController.showAlert(Alert.AlertType.ERROR, String.format(I18N.get("message.ERROR_NO_REGI_FILE_FOUND"), regiPath));
return false;
}
}
return true;
}
private void readHeaderInfo() {
CorpusType corpusType = corpus.getCorpusType();
Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
@ -339,7 +375,7 @@ public class CorpusTab {
logger.info("reading header data for ", corpusType.toString());
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.GOS || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.GOS || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K || corpusType == CorpusType.GIGAFIDA2) {
boolean corpusIsSplit = corpusFiles.size() > 1;
final Task<HashSet<String>> task = new Task<HashSet<String>>() {
@ -505,26 +541,27 @@ public class CorpusTab {
task.setOnSucceeded(e -> {
ObservableList<String> readTaxonomy = Tax.getTaxonomyForComboBox(corpusType, task.getValue());
if (ValidationUtil.isEmpty(readTaxonomy)) {
// if no taxonomy found alert the user and keep other tabs disabled
logger.info("No vert filters found in headers.");
GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_SOLAR_FILTERS_FOUND"));
} else {
// if (ValidationUtil.isEmpty(readTaxonomy)) {
// // if no taxonomy found alert the user and keep other tabs disabled
// logger.info("No vert filters found in headers.");
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_SOLAR_FILTERS_FOUND"));
// } else {
// set taxonomy, update label
corpus.setTaxonomy(readTaxonomy);
corpus.setHeaderRead(true);
Messages.setChooseCorpusL(chooseCorpusL, chooseCorpusLabelContent);
setResults();
setCorpusForAnalysis();
}
// }
togglePiAndSetCorpusWrapper(false);
togglePiAndSetCorpusWrapper(false);
});
task.setOnCancelled(e -> togglePiAndSetCorpusWrapper(false));
task.setOnFailed(e -> togglePiAndSetCorpusWrapper(false));
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
@ -599,7 +636,12 @@ public class CorpusTab {
if (title.contains(SOLAR.getNameLowerCase())) {
corpusType = SOLAR;
} else if (title.contains(GIGAFIDA.getNameLowerCase())) {
corpusType = GIGAFIDA;
String edition = XML_processing.readXMLHeaderTag(f.getAbsolutePath(), "edition").toLowerCase();
if (Double.valueOf(edition) < 2.0) {
corpusType = GIGAFIDA;
} else {
corpusType = GIGAFIDA2;
}
} else if (title.contains(CCKRES.getNameLowerCase())) {
corpusType = CCKRES;
} else if (title.contains(GOS.getNameLowerCase())) {

View File

@ -114,8 +114,10 @@ public class Messages {
.append(String.format(I18N.get("message.NOTIFICATION_CORPUS"), chooseCorpusLabelProperties[2]));
chooseCorpusLabelContent = sb.toString();
chooseCorpusL.textProperty().unbind();
chooseCorpusL.setText(chooseCorpusLabelContent);
if (chooseCorpusL != null) {
chooseCorpusL.textProperty().unbind();
chooseCorpusL.setText(chooseCorpusLabelContent);
}
}
}
}

View File

@ -1,22 +1,23 @@
package gui;
import alg.XML_processing;
import data.*;
import javafx.application.HostServices;
import javafx.beans.InvalidationListener;
import javafx.beans.Observable;
import javafx.beans.property.ReadOnlyDoubleWrapper;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.collections.FXCollections;
import javafx.collections.ListChangeListener;
import javafx.collections.ObservableList;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.Scene;
import javafx.scene.control.*;
import javafx.scene.layout.AnchorPane;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.controlsfx.control.CheckComboBox;
import org.controlsfx.control.IndexedCheckModel;
import java.io.File;
import java.io.UnsupportedEncodingException;
@ -26,7 +27,6 @@ import java.util.regex.Pattern;
import static alg.XML_processing.readXML;
import static gui.GUIController.showAlert;
import static gui.Messages.*;
@SuppressWarnings("Duplicates")
public class OneWordAnalysisTab {
@ -158,6 +158,7 @@ public class OneWordAnalysisTab {
private ListChangeListener<String> taxonomyListener;
private ListChangeListener<String> alsoVisualizeListener;
private ChangeListener<String> calculateForListener;
private InvalidationListener progressBarListener;
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
@ -383,54 +384,57 @@ public class OneWordAnalysisTab {
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
// taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
if (taxonomyListener != null){
taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
}
taxonomyListener = new ListChangeListener<String>() {
public boolean changing = true;
@Override
public void onChanged(Change<? extends String> c) {
if (changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
};
taxonomyCCB.getCheckModel().clearChecks();
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
taxonomyCCB.setDisable(false);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
} else {
taxonomyCCB.setDisable(true);
}
if (taxonomyListener != null){
taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
}
taxonomyListener = new ListChangeListener<String>() {
public boolean changing = true;
@Override
public void onChanged(Change<? extends String> c) {
if (changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
};
taxonomyCCB.getCheckModel().clearChecks();
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
displayTaxonomy = false;
displayTaxonomyChB.setSelected(false);
// set
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
displayTaxonomyChB.setDisable(false);
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
displayTaxonomy = newValue;
@ -586,7 +590,7 @@ public class OneWordAnalysisTab {
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// // user changed corpus (by type) or by selection & triggered a rescan of headers
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
//
// currentCorpusType = corpus.getCorpusType();
@ -596,7 +600,7 @@ public class OneWordAnalysisTab {
// }
//
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
//
@ -733,22 +737,63 @@ public class OneWordAnalysisTab {
logger.info("Started execution: ", statistic.getFilter());
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
boolean corpusIsSplit = corpusFiles.size() > 1;
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
if(corpusFiles.size() > 1){
cancel.setVisible(true);
}
int i = 0;
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
i++;
if (corpusFiles.size() > 1) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
} else {
if(progressBarListener != null) {
xml_processing.progressProperty().removeListener(progressBarListener);
}
progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(progressBarListener);
}
xml_processing.readXML(f.toString(), statistic);
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
}
return null;
@ -805,7 +850,6 @@ public class OneWordAnalysisTab {
logger.info("cancel button");
});
cancel.setVisible(true);
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();

View File

@ -2,21 +2,20 @@ package gui;
import static alg.XML_processing.*;
import static gui.GUIController.*;
import static gui.Messages.*;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import alg.XML_processing;
import javafx.application.HostServices;
import javafx.beans.InvalidationListener;
import javafx.beans.Observable;
import javafx.beans.property.ReadOnlyDoubleWrapper;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.scene.layout.AnchorPane;
import org.apache.commons.lang3.SerializationUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -208,6 +207,7 @@ public class StringAnalysisTabNew2 {
private ListChangeListener<String> alsoVisualizeListener;
private ListChangeListener<String> collocabilityListener;
private ChangeListener<String> calculateForListener;
private InvalidationListener progressBarListener;
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
@ -306,13 +306,14 @@ public class StringAnalysisTabNew2 {
notePunctuations = newValue;
logger.info("note punctuations: ", notePunctuations);
});
notePunctuationsChB.setSelected(false);
notePunctuationsChB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_readNotePunctuationsChB")));
displayTaxonomy = false;
displayTaxonomyChB.setSelected(false);
// set
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
displayTaxonomyChB.setDisable(false);
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
displayTaxonomy = newValue;
@ -515,49 +516,52 @@ public class StringAnalysisTabNew2 {
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
// taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
if (taxonomyListener != null){
taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
}
taxonomyListener = new ListChangeListener<String>() {
boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
};
taxonomyCCB.getCheckModel().clearChecks();
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
taxonomyCCB.setDisable(false);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
} else {
taxonomyCCB.setDisable(true);
}
if (taxonomyListener != null){
taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
}
taxonomyListener = new ListChangeListener<String>() {
boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
//
// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
};
taxonomyCCB.getCheckModel().clearChecks();
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
// skip
skipValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
skipValue = Integer.valueOf(newValue);
@ -738,7 +742,7 @@ public class StringAnalysisTabNew2 {
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// // user changed corpus (by type) or by selection & triggered a rescan of headers
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
//
// currentCorpusType = corpus.getCorpusType();
@ -748,7 +752,7 @@ public class StringAnalysisTabNew2 {
// }
//
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
//
@ -913,16 +917,78 @@ public class StringAnalysisTabNew2 {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = corpusFiles.size();
if(corpusFiles.size() > 1){
cancel.setVisible(true);
}
int i = corpusFiles.size();
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
int corpusSize;
if (statistic.getFilter().getCollocability().size() > 0) {
corpusSize = corpusFiles.size() * 2;
} else {
corpusSize = corpusFiles.size();
}
for (File f : corpusFiles) {
readXML(f.toString(), statisticsOneGrams);
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
i++;
this.updateProgress(i, corpusFiles.size() * 2);
if (statistic.getFilter().getCollocability().size() > 0) {
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
if (corpusFiles.size() > 1) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
} else {
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
if(progressBarListener != null) {
xml_processing.progressProperty().removeListener(progressBarListener);
}
progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)));
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
// System.out.println(remainingSeconds);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusSize, f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(progressBarListener);
}
xml_processing.isCollocability = true;
xml_processing.readXML(f.toString(), statisticsOneGrams);
xml_processing.isCollocability = false;
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
// readXML(f.toString(), statisticsOneGrams);
// i++;
// this.updateProgress(i, corpusFiles.size() * 2);
// if (statistic.getFilter().getCollocability().size() > 0) {
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
// } else {
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
// }
}
return null;
@ -998,8 +1064,6 @@ public class StringAnalysisTabNew2 {
task.cancel();
// logger.info("cancel button");
});
// cancel.setVisible(true);
return task;
}
@ -1009,28 +1073,90 @@ public class StringAnalysisTabNew2 {
// Task<Void> task_collocability = null;
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
boolean corpusIsSplit = corpusFiles.size() > 1;
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
if(corpusFiles.size() > 1){
cancel.setVisible(true);
}
int i = 0;
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
int corpusSize;
if (statistic.getFilter().getCollocability().size() > 0) {
corpusSize = corpusFiles.size() * 2;
} else {
corpusSize = corpusFiles.size();
}
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
i++;
if (corpusFiles.size() > 1) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusSize);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
} else {
if(progressBarListener != null) {
xml_processing.progressProperty().removeListener(progressBarListener);
}
progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
// System.out.println(((new Date()).getTime() - xml_processing.startTime.getTime()));
// System.out.println((1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get())) + 1);
// System.out.println(((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()));
// System.out.println(remainingSeconds);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusSize, f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(progressBarListener);
}
xml_processing.readXML(f.toString(), statistic);
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
if (statistic.getFilter().getCollocability().size() > 0) {
this.updateProgress(i, corpusFiles.size() * 2);
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
} else {
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
if(!(corpusFiles.size() > 1)){
cancel.setVisible(false);
}
// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
// readXML(f.toString(), statistic);
// i++;
// if (isCancelled()) {
// updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
// break;
// }
// if (statistic.getFilter().getCollocability().size() > 0) {
// this.updateProgress(i, corpusFiles.size() * 2);
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size() * 2, f.getName()));
// } else {
// this.updateProgress(i, corpusFiles.size());
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
// }
//// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size() * 2, f.getName()));
}
return null;
@ -1106,8 +1232,6 @@ public class StringAnalysisTabNew2 {
logger.info("cancel button");
});
cancel.setVisible(true);
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();

View File

@ -2,14 +2,10 @@ package gui;
import static alg.XML_processing.*;
import static gui.GUIController.*;
import static gui.Messages.*;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.*;
import javafx.application.HostServices;
import javafx.scene.control.*;
@ -73,11 +69,11 @@ public class WordFormationTab {
// taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>();
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus);
taxonomy.addAll(checkedItemsTaxonomy);
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
});
@ -175,7 +171,9 @@ public class WordFormationTab {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
int i = 0;
Date startTime = new Date();
Date previousTime = new Date();
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
i++;

View File

@ -1,10 +1,13 @@
package gui;
import alg.XML_processing;
import data.*;
import javafx.application.HostServices;
import javafx.beans.InvalidationListener;
import javafx.beans.Observable;
import javafx.beans.property.ReadOnlyDoubleWrapper;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.collections.FXCollections;
import javafx.collections.ListChangeListener;
import javafx.collections.ObservableList;
import javafx.concurrent.Task;
@ -24,7 +27,6 @@ import java.util.regex.Pattern;
import static alg.XML_processing.readXML;
import static gui.GUIController.showAlert;
import static gui.Messages.*;
@SuppressWarnings("Duplicates")
public class WordLevelTab {
@ -196,6 +198,7 @@ public class WordLevelTab {
private ListChangeListener<String> taxonomyListener;
private ListChangeListener<String> alsoVisualizeListener;
private ChangeListener<String> calculateForListener;
private InvalidationListener progressBarListener;
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
@ -509,54 +512,57 @@ public class WordLevelTab {
alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener);
// taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
if (taxonomyListener != null){
taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
}
taxonomyListener = new ListChangeListener<String>() {
boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
};
taxonomyCCB.getCheckModel().clearChecks();
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
taxonomyCCB.setDisable(false);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
} else {
taxonomyCCB.setDisable(true);
}
if (taxonomyListener != null){
taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener);
}
taxonomyListener = new ListChangeListener<String>() {
boolean changing = true;
@Override
public void onChanged(ListChangeListener.Change<? extends String> c){
if(changing) {
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems);
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
// Taxonomy.modifyingTaxonomy(taxonomy, checkedItemsTaxonomy, corpus);
taxonomy = new ArrayList<>();
taxonomy.addAll(checkedItemsTaxonomy);
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
// taxonomyCCB.getCheckModel().clearChecks();
changing = false;
taxonomyCCB.getCheckModel().clearChecks();
for (Taxonomy t : checkedItemsTaxonomy) {
taxonomyCCB.getCheckModel().check(t.toLongNameString());
}
changing = true;
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}
}
};
taxonomyCCB.getCheckModel().clearChecks();
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener(taxonomyListener);
displayTaxonomy = false;
displayTaxonomyChB.setSelected(false);
// set
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) {
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getObservableListTaxonomy().size() > 0) {
displayTaxonomyChB.setDisable(false);
displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
displayTaxonomy = newValue;
@ -714,7 +720,7 @@ public class WordLevelTab {
// if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// // user changed corpus (by type) or by selection & triggered a rescan of headers
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
//
// currentCorpusType = corpus.getCorpusType();
@ -724,7 +730,7 @@ public class WordLevelTab {
// }
//
// // see if we read taxonomy from headers, otherwise use default values for given corpus
// ObservableList<String> tax = corpus.getTaxonomy();
// ObservableList<String> tax = corpus.getObservableListTaxonomy();
// taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
// taxonomyCCB.getItems().addAll(taxonomyCCBValues);
//
@ -879,22 +885,63 @@ public class WordLevelTab {
logger.info("Started execution: ", statistic.getFilter());
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
boolean corpusIsSplit = corpusFiles.size() > 1;
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
if(corpusFiles.size() > 1){
cancel.setVisible(true);
}
int i = 0;
Date startTime = new Date();
Date previousTime = new Date();
int remainingSeconds = -1;
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
final int iFinal = i;
XML_processing xml_processing = new XML_processing();
i++;
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
if (corpusFiles.size() > 1) {
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusFiles.size() - i) / 1000);
previousTime = new Date();
}
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName(), remainingSeconds));
} else {
if(progressBarListener != null) {
xml_processing.progressProperty().removeListener(progressBarListener);
}
progressBarListener = new InvalidationListener() {
int remainingSeconds = -1;
Date previousTime = new Date();
@Override
public void invalidated(Observable observable) {
cancel.setVisible(true);
if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
(1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
((corpusFiles.size() - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
previousTime = new Date();
}
xml_processing.isCancelled = isCancelled();
updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusFiles.size() * 100);
updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), iFinal + 1, corpusFiles.size(), f.getName(), remainingSeconds));
}
};
xml_processing.progressProperty().addListener(progressBarListener);
}
xml_processing.readXML(f.toString(), statistic);
if (isCancelled()) {
updateMessage(I18N.get("message.CANCELING_NOTIFICATION"));
break;
}
}
return null;
@ -951,7 +998,6 @@ public class WordLevelTab {
logger.info("cancel button");
});
cancel.setVisible(true);
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();

View File

@ -111,8 +111,8 @@ public class Export {
}
}
headerInfoBlock.put(filter.getCalculateFor().totalSumString(filter.getNgramValue()), String.valueOf(num_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
headerInfoBlock.put(filter.getCalculateFor().foundSumString(filter.getNgramValue()), String.valueOf(num_selected_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
headerInfoBlock.put(filter.getCalculateFor().totalSumString(filter.getNgramValue()), String.valueOf(num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
headerInfoBlock.put(filter.getCalculateFor().foundSumString(filter.getNgramValue()), String.valueOf(num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
// headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
for (CalculateFor otherKey : filter.getMultipleKeys()) {
@ -134,7 +134,7 @@ public class Export {
}
for (Taxonomy key : taxonomyResults.keySet()) {
if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
FILE_HEADER_AL.add(I18N.get("exportTable.absoluteFrequency") + " [" + key.toString() + "]");
FILE_HEADER_AL.add(I18N.get("exportTable.percentage") + " [" + key.toString() + "]");
FILE_HEADER_AL.add(I18N.get("exportTable.relativeFrequency") + " [" + key.toString() + "]");
@ -280,10 +280,10 @@ public class Export {
dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(Taxonomy.TOTAL)));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal())));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
for (Taxonomy key : taxonomyResults.keySet()){
if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
dataEntry.add(frequency.toString());
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key)));

View File

@ -118,6 +118,7 @@ message.WARNING_NO_SOLAR_FILTERS_FOUND=We weren't able to read filters from corp
message.ERROR_WHILE_EXECUTING=Error in program execution.
message.ERROR_WHILE_SAVING_RESULTS_TO_CSV=Error while saving results.
message.ERROR_NOT_ENOUGH_MEMORY=You do not have sufficient RAM for analyzing such amount of data. You can try changing filters.
message.ERROR_NO_REGI_FILE_FOUND=Missing file \"%s\".
message.MISSING_NGRAM_LEVEL=N-gram level
message.MISSING_CALCULATE_FOR=Calculate for
@ -132,7 +133,7 @@ message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS=Analysis completed, however n
message.RESULTS_PATH_SET_TO_DEFAULT=Save location is set on corpus location.
message.NOTIFICATION_ANALYSIS_CANCELED=Analysis was cancled.
message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y=Analyzing file %d of %d (%s)
message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y=Analyzing file %d of %d (%s) - Estimated time remaining %d s
message.CANCELING_NOTIFICATION=Canceled
message.LABEL_CORPUS_LOCATION_NOT_SET=Corpus location is not set

View File

@ -118,6 +118,7 @@ message.WARNING_NO_SOLAR_FILTERS_FOUND=Iz korpusnih datotek ni bilo moč razbrat
message.ERROR_WHILE_EXECUTING=Prišlo je do napake med izvajanjem.
message.ERROR_WHILE_SAVING_RESULTS_TO_CSV=Prišlo je do napake med shranjevanje rezultatov.
message.ERROR_NOT_ENOUGH_MEMORY=Na voljo imate premalo pomnilnika (RAM-a) za analizo takšne količine podatkov.
message.ERROR_NO_REGI_FILE_FOUND=Manjka datoteka \"%s\".
message.MISSING_NGRAM_LEVEL=N-gram nivo
message.MISSING_CALCULATE_FOR=Izračunaj za
@ -132,7 +133,7 @@ message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS=Analiza je zaključena, venda
message.RESULTS_PATH_SET_TO_DEFAULT=Lokacija za shranjevanje rezultatov je nastavljena na lokacijo korpusa.
message.NOTIFICATION_ANALYSIS_CANCELED=Analiziranje je bilo prekinjeno.
message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y=Analiziram datoteko %d od %d (%s)
message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y=Analiziram datoteko %d od %d (%s) - Preostali čas %d s
message.CANCELING_NOTIFICATION=Prekinjeno
message.LABEL_CORPUS_LOCATION_NOT_SET=Lokacija korpusa ni nastavljena