diff --git a/pom.xml b/pom.xml
index 1f4b81a..17a3e90 100644
--- a/pom.xml
+++ b/pom.xml
@@ -93,7 +93,7 @@
com.zenjava
javafx-maven-plugin
- 8.6.0
+ 8.8.3
gui.GUIController
true
diff --git a/src/main/java/alg/XML_processing.java b/src/main/java/alg/XML_processing.java
index 94c94cc..b47c190 100644
--- a/src/main/java/alg/XML_processing.java
+++ b/src/main/java/alg/XML_processing.java
@@ -473,6 +473,7 @@ public class XML_processing {
public static boolean readXMLGigafida(String path, StatisticsNew stats) {
boolean inWord = false;
ArrayList currentFiletaxonomy = new ArrayList<>();
+ ArrayList currentFiletaxonomyLong = new ArrayList<>();
String lemma = "";
String msd = "";
@@ -508,7 +509,10 @@ public class XML_processing {
if (tax != null) {
// keep only taxonomy properties
- currentFiletaxonomy.add(String.valueOf(tax.getValue()).replace("#", ""));
+ String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", "");
+ currentFiletaxonomy.add(currentFiletaxonomyElement);
+ Tax taxonomy = new Tax();
+ currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
}
}
break;
@@ -519,7 +523,7 @@ public class XML_processing {
// "word" node value
if (inWord) {
String word = characters.getData();
- sentence.add(new Word(word, lemma, msd));
+ sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
inWord = false;
}
break;
@@ -570,6 +574,7 @@ public class XML_processing {
// fallback
else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
+ // join corpus and stats
fj(corpus, stats);
corpus.clear();
diff --git a/src/main/java/alg/ngram/Ngrams.java b/src/main/java/alg/ngram/Ngrams.java
index 4b0f930..429b5a6 100644
--- a/src/main/java/alg/ngram/Ngrams.java
+++ b/src/main/java/alg/ngram/Ngrams.java
@@ -45,6 +45,8 @@ public class Ngrams {
continue;
}
+ // UPDATE TAXONOMY HERE!!!
+ stats.updateTaxonomyResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()), ngramCandidate);
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
}
}
@@ -60,7 +62,8 @@ public class Ngrams {
}
for (int i = 0; i < regex.size(); i++) {
- if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
+ //if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
+ if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern() + ".*")) {
return false;
}
}
diff --git a/src/main/java/data/StatisticsNew.java b/src/main/java/data/StatisticsNew.java
index 618f85d..6c27265 100644
--- a/src/main/java/data/StatisticsNew.java
+++ b/src/main/java/data/StatisticsNew.java
@@ -32,6 +32,7 @@ public class StatisticsNew {
private String resultTitle;
private Map result;
+ private Map> taxonomyResult;
private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
private Map> resultNestedSuffix;
private Map> resultNestedPrefix;
@@ -43,6 +44,20 @@ public class StatisticsNew {
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
this.corpus = corpus;
this.filter = filter;
+ this.taxonomyResult = new ConcurrentHashMap<>();
+
+ // create table for counting word occurances per taxonomies
+
+ if (this.filter.getTaxonomy().isEmpty()) {
+ for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
+ this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>());
+ }
+ } else {
+ for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
+ Tax taxonomy = new Tax();
+ this.taxonomyResult.put(taxonomy.getLongTaxonomyName(this.filter.getTaxonomy().get(i)), new ConcurrentHashMap<>());
+ }
+ }
if (useDB) {
this.useDB = true;
@@ -189,7 +204,7 @@ public class StatisticsNew {
}
stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
- Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock());
+ Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult);
return true;
}
@@ -260,6 +275,28 @@ public class StatisticsNew {
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
}
+ public void updateTaxonomyResults(String o, List ngramCandidate) {
+ for (String key : taxonomyResult.keySet()) {
+ // first word should have the same taxonomy as others
+ if (ngramCandidate.get(0).getTaxonomy().contains(key)) {
+ // if taxonomy not in map and in this word
+ AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));
+
+ if (r != null)
+ taxonomyResult.get(key).get(o).incrementAndGet();
+ } else {
+ // if taxonomy not in map and not in this word
+ AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(0));
+ }
+ }
+
+ // if not in map
+
+
+ // else
+
+ }
+
public void updateResults(String o) {
// if not in map
AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));
@@ -377,22 +414,22 @@ public class StatisticsNew {
}
// taksonomija
- if (!isEmpty(filter.getTaxonomy())) {
- info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
- }
+// if (!isEmpty(filter.getTaxonomy())) {
+// info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
+// }
}
-// if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
-// ArrayList tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
-//
-// info.put("Taksonomija: ", "");
-// String sep = "";
-// for (String s : tax) {
-// info.put(sep = sep + " ", s);
-// }
-// }
+ if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+ ArrayList tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
+
+ info.put("Taksonomija: ", "");
+ String sep = "";
+ for (String s : tax) {
+ info.put(sep = sep + " ", s);
+ }
+ }
if (corpus.getCorpusType() == CorpusType.SOLAR) {
HashMap> filters = corpus.getSolarFilters();
diff --git a/src/main/java/data/Tax.java b/src/main/java/data/Tax.java
index c1d6251..6324fd0 100644
--- a/src/main/java/data/Tax.java
+++ b/src/main/java/data/Tax.java
@@ -172,4 +172,13 @@ public class Tax {
return result;
}
+
+ public static String getLongTaxonomyName(String shortName){
+ if (GIGAFIDA_TAXONOMY.containsKey(shortName))
+ return GIGAFIDA_TAXONOMY.get(shortName);
+ else if(GOS_TAXONOMY.containsKey(shortName))
+ return GOS_TAXONOMY.get(shortName);
+ else
+ return null;
+ }
}
diff --git a/src/main/java/data/Word.java b/src/main/java/data/Word.java
index 154dd24..5cff321 100644
--- a/src/main/java/data/Word.java
+++ b/src/main/java/data/Word.java
@@ -3,6 +3,7 @@ package data;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
+import java.util.List;
import org.apache.commons.lang3.StringUtils;
@@ -15,6 +16,7 @@ public class Word implements Serializable {
private String word;
private String lemma;
private String msd;
+ private List taxonomy;
private final HashSet VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));
/**
@@ -50,6 +52,22 @@ public class Word implements Serializable {
}
}
+ //private char besedna_vrsta;
+ public Word(String word, String lemma, String msd, List taxonomy) {
+ this.lemma = lemma;
+ this.msd = normalizeMsd(msd);
+ this.taxonomy = taxonomy;
+
+ // veliko zacetnico ohranimo samo za lastna imena
+ if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
+ && this.msd.length() >= 2
+ && this.msd.charAt(1) == 'l')) {
+ this.word = word.toLowerCase();
+ } else {
+ this.word = word;
+ }
+ }
+
public Word() {
}
@@ -99,6 +117,10 @@ public class Word implements Serializable {
this.word = word;
}
+ public List getTaxonomy() {
+ return taxonomy;
+ }
+
public String getLemma() {
return lemma;
}
diff --git a/src/main/java/gui/CharacterAnalysisTab.java b/src/main/java/gui/CharacterAnalysisTab.java
index cd1f6e3..12327a7 100644
--- a/src/main/java/gui/CharacterAnalysisTab.java
+++ b/src/main/java/gui/CharacterAnalysisTab.java
@@ -67,6 +67,9 @@ public class CharacterAnalysisTab {
@FXML
private Button computeNgramsB;
+ @FXML
+ private Button cancel;
+
@FXML
public ProgressBar ngramProgressBar;
@FXML
@@ -192,6 +195,8 @@ public class CharacterAnalysisTab {
});
helpH.setOnAction(e -> openHelpWebsite());
+
+ cancel.setVisible(false);
}
/**
@@ -399,6 +404,10 @@ public class CharacterAnalysisTab {
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
i++;
+ if (isCancelled()) {
+ updateMessage(CANCELING_NOTIFICATION);
+ break;
+ }
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
@@ -427,6 +436,7 @@ public class CharacterAnalysisTab {
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
+ cancel.setVisible(false);
});
task.setOnFailed(e -> {
@@ -437,8 +447,27 @@ public class CharacterAnalysisTab {
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
+ cancel.setVisible(false);
+ });
+
+ task.setOnCancelled(e -> {
+ showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
+ ngramProgressBar.progressProperty().unbind();
+ ngramProgressBar.setProgress(0.0);
+ ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+ progressLabel.textProperty().unbind();
+ progressLabel.setText("");
+ cancel.setVisible(false);
});
+ // When cancel button is pressed cancel analysis
+ cancel.setOnAction(e -> {
+ task.cancel();
+ logger.info("cancel button");
+ });
+
+ cancel.setVisible(true);
+
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
diff --git a/src/main/java/gui/CorpusTab.java b/src/main/java/gui/CorpusTab.java
index 48ce9a0..5171b02 100644
--- a/src/main/java/gui/CorpusTab.java
+++ b/src/main/java/gui/CorpusTab.java
@@ -83,7 +83,7 @@ public class CorpusTab {
private OneWordAnalysisTab oneWordTabController;
private CharacterAnalysisTab catController;
private FiltersForSolar ffsController;
- //private WordFormationTab wfController;
+ private WordFormationTab wfController;
private WordLevelTab wlController;
private HostServices hostService;
@@ -383,7 +383,7 @@ public class CorpusTab {
characterLevelTab.setDisable(false);
catController.setCorpus(corpus);
catController.init();
- wordFormationTab.setDisable(false);
+ //wordFormationTab.setDisable(false);
wordLevelTab.setDisable(false);
//wfController.setCorpus(corpus);
//wfController.init();
diff --git a/src/main/java/gui/OneWordAnalysisTab.java b/src/main/java/gui/OneWordAnalysisTab.java
index 2f7572d..0755ede 100755
--- a/src/main/java/gui/OneWordAnalysisTab.java
+++ b/src/main/java/gui/OneWordAnalysisTab.java
@@ -164,6 +164,8 @@ public class OneWordAnalysisTab {
logger.info("compute button");
});
helpH.setOnAction(e -> openHelpWebsite());
+
+ cancel.setVisible(false);
}
/**
@@ -384,6 +386,7 @@ public class OneWordAnalysisTab {
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
+ cancel.setVisible(false);
});
task.setOnFailed(e -> {
@@ -394,6 +397,7 @@ public class OneWordAnalysisTab {
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
+ cancel.setVisible(false);
});
task.setOnCancelled(e -> {
@@ -403,6 +407,7 @@ public class OneWordAnalysisTab {
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
+ cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
@@ -411,6 +416,7 @@ public class OneWordAnalysisTab {
logger.info("cancel button");
});
+ cancel.setVisible(true);
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
diff --git a/src/main/java/gui/StringAnalysisTabNew2.java b/src/main/java/gui/StringAnalysisTabNew2.java
index 1db9293..b861d3b 100755
--- a/src/main/java/gui/StringAnalysisTabNew2.java
+++ b/src/main/java/gui/StringAnalysisTabNew2.java
@@ -71,6 +71,9 @@ public class StringAnalysisTabNew2 {
@FXML
private Button computeNgramsB;
+ @FXML
+ private Button cancel;
+
@FXML
public ProgressBar ngramProgressBar;
@FXML
@@ -231,6 +234,8 @@ public class StringAnalysisTabNew2 {
});
helpH.setOnAction(e -> openHelpWebsite());
+
+ cancel.setVisible(false);
}
/**
@@ -457,6 +462,10 @@ public class StringAnalysisTabNew2 {
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
i++;
+ if (isCancelled()) {
+ updateMessage(CANCELING_NOTIFICATION);
+ break;
+ }
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
@@ -485,6 +494,7 @@ public class StringAnalysisTabNew2 {
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
+ cancel.setVisible(false);
});
task.setOnFailed(e -> {
@@ -495,8 +505,27 @@ public class StringAnalysisTabNew2 {
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
+ cancel.setVisible(false);
+ });
+
+ task.setOnCancelled(e -> {
+ showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
+ ngramProgressBar.progressProperty().unbind();
+ ngramProgressBar.setProgress(0.0);
+ ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+ progressLabel.textProperty().unbind();
+ progressLabel.setText("");
+ cancel.setVisible(false);
});
+ // When cancel button is pressed cancel analysis
+ cancel.setOnAction(e -> {
+ task.cancel();
+ logger.info("cancel button");
+ });
+
+ cancel.setVisible(true);
+
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
diff --git a/src/main/java/util/Export.java b/src/main/java/util/Export.java
index 9b5e3be..1627312 100644
--- a/src/main/java/util/Export.java
+++ b/src/main/java/util/Export.java
@@ -5,7 +5,11 @@ import static util.Util.*;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.atomic.AtomicLong;
+import data.Filter;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.lang3.tuple.Pair;
@@ -52,17 +56,29 @@ public class Export {
}
}
- public static String SetToCSV(Set>> set, File resultsPath, LinkedHashMap headerInfoBlock) {
+ public static String SetToCSV(Set>> set, File resultsPath, LinkedHashMap headerInfoBlock,
+ Map> taxonomyResults) {
//Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n";
+ List