Added taxonomy presentation in results
This commit is contained in:
parent
d5d06fd7c5
commit
8d7cce6c77
2
pom.xml
2
pom.xml
|
@ -93,7 +93,7 @@
|
||||||
<!-- JavaFX -->
|
<!-- JavaFX -->
|
||||||
<groupId>com.zenjava</groupId>
|
<groupId>com.zenjava</groupId>
|
||||||
<artifactId>javafx-maven-plugin</artifactId>
|
<artifactId>javafx-maven-plugin</artifactId>
|
||||||
<version>8.6.0</version>
|
<version>8.8.3</version>
|
||||||
<configuration>
|
<configuration>
|
||||||
<mainClass>gui.GUIController</mainClass>
|
<mainClass>gui.GUIController</mainClass>
|
||||||
<verbose>true</verbose>
|
<verbose>true</verbose>
|
||||||
|
|
|
@ -473,6 +473,7 @@ public class XML_processing {
|
||||||
public static boolean readXMLGigafida(String path, StatisticsNew stats) {
|
public static boolean readXMLGigafida(String path, StatisticsNew stats) {
|
||||||
boolean inWord = false;
|
boolean inWord = false;
|
||||||
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
||||||
|
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
||||||
String lemma = "";
|
String lemma = "";
|
||||||
String msd = "";
|
String msd = "";
|
||||||
|
|
||||||
|
@ -508,7 +509,10 @@ public class XML_processing {
|
||||||
|
|
||||||
if (tax != null) {
|
if (tax != null) {
|
||||||
// keep only taxonomy properties
|
// keep only taxonomy properties
|
||||||
currentFiletaxonomy.add(String.valueOf(tax.getValue()).replace("#", ""));
|
String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", "");
|
||||||
|
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||||
|
Tax taxonomy = new Tax();
|
||||||
|
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -519,7 +523,7 @@ public class XML_processing {
|
||||||
// "word" node value
|
// "word" node value
|
||||||
if (inWord) {
|
if (inWord) {
|
||||||
String word = characters.getData();
|
String word = characters.getData();
|
||||||
sentence.add(new Word(word, lemma, msd));
|
sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
|
||||||
inWord = false;
|
inWord = false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -570,6 +574,7 @@ public class XML_processing {
|
||||||
|
|
||||||
// fallback
|
// fallback
|
||||||
else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
|
else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
|
||||||
|
// join corpus and stats
|
||||||
fj(corpus, stats);
|
fj(corpus, stats);
|
||||||
corpus.clear();
|
corpus.clear();
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,8 @@ public class Ngrams {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UPDATE TAXONOMY HERE!!!
|
||||||
|
stats.updateTaxonomyResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()), ngramCandidate);
|
||||||
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -60,7 +62,8 @@ public class Ngrams {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < regex.size(); i++) {
|
for (int i = 0; i < regex.size(); i++) {
|
||||||
if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
|
//if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
|
||||||
|
if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern() + ".*")) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,7 @@ public class StatisticsNew {
|
||||||
|
|
||||||
private String resultTitle;
|
private String resultTitle;
|
||||||
private Map<String, AtomicLong> result;
|
private Map<String, AtomicLong> result;
|
||||||
|
private Map<String, Map<String, AtomicLong>> taxonomyResult;
|
||||||
private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
|
private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
|
||||||
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedSuffix;
|
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedSuffix;
|
||||||
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedPrefix;
|
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedPrefix;
|
||||||
|
@ -43,6 +44,20 @@ public class StatisticsNew {
|
||||||
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
|
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
|
||||||
this.corpus = corpus;
|
this.corpus = corpus;
|
||||||
this.filter = filter;
|
this.filter = filter;
|
||||||
|
this.taxonomyResult = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
// create table for counting word occurances per taxonomies
|
||||||
|
|
||||||
|
if (this.filter.getTaxonomy().isEmpty()) {
|
||||||
|
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
|
||||||
|
this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
|
||||||
|
Tax taxonomy = new Tax();
|
||||||
|
this.taxonomyResult.put(taxonomy.getLongTaxonomyName(this.filter.getTaxonomy().get(i)), new ConcurrentHashMap<>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (useDB) {
|
if (useDB) {
|
||||||
this.useDB = true;
|
this.useDB = true;
|
||||||
|
@ -189,7 +204,7 @@ public class StatisticsNew {
|
||||||
}
|
}
|
||||||
|
|
||||||
stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
|
stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
|
||||||
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock());
|
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -260,6 +275,28 @@ public class StatisticsNew {
|
||||||
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void updateTaxonomyResults(String o, List<Word> ngramCandidate) {
|
||||||
|
for (String key : taxonomyResult.keySet()) {
|
||||||
|
// first word should have the same taxonomy as others
|
||||||
|
if (ngramCandidate.get(0).getTaxonomy().contains(key)) {
|
||||||
|
// if taxonomy not in map and in this word
|
||||||
|
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));
|
||||||
|
|
||||||
|
if (r != null)
|
||||||
|
taxonomyResult.get(key).get(o).incrementAndGet();
|
||||||
|
} else {
|
||||||
|
// if taxonomy not in map and not in this word
|
||||||
|
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if not in map
|
||||||
|
|
||||||
|
|
||||||
|
// else
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public void updateResults(String o) {
|
public void updateResults(String o) {
|
||||||
// if not in map
|
// if not in map
|
||||||
AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));
|
AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));
|
||||||
|
@ -377,22 +414,22 @@ public class StatisticsNew {
|
||||||
}
|
}
|
||||||
|
|
||||||
// taksonomija
|
// taksonomija
|
||||||
if (!isEmpty(filter.getTaxonomy())) {
|
// if (!isEmpty(filter.getTaxonomy())) {
|
||||||
info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
|
// info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
|
||||||
}
|
// }
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
||||||
// ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
|
ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
|
||||||
//
|
|
||||||
// info.put("Taksonomija: ", "");
|
info.put("Taksonomija: ", "");
|
||||||
// String sep = "";
|
String sep = "";
|
||||||
// for (String s : tax) {
|
for (String s : tax) {
|
||||||
// info.put(sep = sep + " ", s);
|
info.put(sep = sep + " ", s);
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
|
|
||||||
if (corpus.getCorpusType() == CorpusType.SOLAR) {
|
if (corpus.getCorpusType() == CorpusType.SOLAR) {
|
||||||
HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();
|
HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();
|
||||||
|
|
|
@ -172,4 +172,13 @@ public class Tax {
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String getLongTaxonomyName(String shortName){
|
||||||
|
if (GIGAFIDA_TAXONOMY.containsKey(shortName))
|
||||||
|
return GIGAFIDA_TAXONOMY.get(shortName);
|
||||||
|
else if(GOS_TAXONOMY.containsKey(shortName))
|
||||||
|
return GOS_TAXONOMY.get(shortName);
|
||||||
|
else
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ package data;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
@ -15,6 +16,7 @@ public class Word implements Serializable {
|
||||||
private String word;
|
private String word;
|
||||||
private String lemma;
|
private String lemma;
|
||||||
private String msd;
|
private String msd;
|
||||||
|
private List<String> taxonomy;
|
||||||
private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));
|
private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -50,6 +52,22 @@ public class Word implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//private char besedna_vrsta;
|
||||||
|
public Word(String word, String lemma, String msd, List<String> taxonomy) {
|
||||||
|
this.lemma = lemma;
|
||||||
|
this.msd = normalizeMsd(msd);
|
||||||
|
this.taxonomy = taxonomy;
|
||||||
|
|
||||||
|
// veliko zacetnico ohranimo samo za lastna imena
|
||||||
|
if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
|
||||||
|
&& this.msd.length() >= 2
|
||||||
|
&& this.msd.charAt(1) == 'l')) {
|
||||||
|
this.word = word.toLowerCase();
|
||||||
|
} else {
|
||||||
|
this.word = word;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public Word() {
|
public Word() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -99,6 +117,10 @@ public class Word implements Serializable {
|
||||||
this.word = word;
|
this.word = word;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<String> getTaxonomy() {
|
||||||
|
return taxonomy;
|
||||||
|
}
|
||||||
|
|
||||||
public String getLemma() {
|
public String getLemma() {
|
||||||
return lemma;
|
return lemma;
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,6 +67,9 @@ public class CharacterAnalysisTab {
|
||||||
@FXML
|
@FXML
|
||||||
private Button computeNgramsB;
|
private Button computeNgramsB;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private Button cancel;
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
public ProgressBar ngramProgressBar;
|
public ProgressBar ngramProgressBar;
|
||||||
@FXML
|
@FXML
|
||||||
|
@ -192,6 +195,8 @@ public class CharacterAnalysisTab {
|
||||||
});
|
});
|
||||||
|
|
||||||
helpH.setOnAction(e -> openHelpWebsite());
|
helpH.setOnAction(e -> openHelpWebsite());
|
||||||
|
|
||||||
|
cancel.setVisible(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -399,6 +404,10 @@ public class CharacterAnalysisTab {
|
||||||
for (File f : corpusFiles) {
|
for (File f : corpusFiles) {
|
||||||
readXML(f.toString(), statistic);
|
readXML(f.toString(), statistic);
|
||||||
i++;
|
i++;
|
||||||
|
if (isCancelled()) {
|
||||||
|
updateMessage(CANCELING_NOTIFICATION);
|
||||||
|
break;
|
||||||
|
}
|
||||||
this.updateProgress(i, corpusFiles.size());
|
this.updateProgress(i, corpusFiles.size());
|
||||||
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
||||||
}
|
}
|
||||||
|
@ -427,6 +436,7 @@ public class CharacterAnalysisTab {
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
|
cancel.setVisible(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
task.setOnFailed(e -> {
|
task.setOnFailed(e -> {
|
||||||
|
@ -437,8 +447,27 @@ public class CharacterAnalysisTab {
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
|
cancel.setVisible(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
task.setOnCancelled(e -> {
|
||||||
|
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
|
||||||
|
ngramProgressBar.progressProperty().unbind();
|
||||||
|
ngramProgressBar.setProgress(0.0);
|
||||||
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
|
progressLabel.textProperty().unbind();
|
||||||
|
progressLabel.setText("");
|
||||||
|
cancel.setVisible(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
// When cancel button is pressed cancel analysis
|
||||||
|
cancel.setOnAction(e -> {
|
||||||
|
task.cancel();
|
||||||
|
logger.info("cancel button");
|
||||||
|
});
|
||||||
|
|
||||||
|
cancel.setVisible(true);
|
||||||
|
|
||||||
final Thread thread = new Thread(task, "task");
|
final Thread thread = new Thread(task, "task");
|
||||||
thread.setDaemon(true);
|
thread.setDaemon(true);
|
||||||
thread.start();
|
thread.start();
|
||||||
|
|
|
@ -83,7 +83,7 @@ public class CorpusTab {
|
||||||
private OneWordAnalysisTab oneWordTabController;
|
private OneWordAnalysisTab oneWordTabController;
|
||||||
private CharacterAnalysisTab catController;
|
private CharacterAnalysisTab catController;
|
||||||
private FiltersForSolar ffsController;
|
private FiltersForSolar ffsController;
|
||||||
//private WordFormationTab wfController;
|
private WordFormationTab wfController;
|
||||||
private WordLevelTab wlController;
|
private WordLevelTab wlController;
|
||||||
private HostServices hostService;
|
private HostServices hostService;
|
||||||
|
|
||||||
|
@ -383,7 +383,7 @@ public class CorpusTab {
|
||||||
characterLevelTab.setDisable(false);
|
characterLevelTab.setDisable(false);
|
||||||
catController.setCorpus(corpus);
|
catController.setCorpus(corpus);
|
||||||
catController.init();
|
catController.init();
|
||||||
wordFormationTab.setDisable(false);
|
//wordFormationTab.setDisable(false);
|
||||||
wordLevelTab.setDisable(false);
|
wordLevelTab.setDisable(false);
|
||||||
//wfController.setCorpus(corpus);
|
//wfController.setCorpus(corpus);
|
||||||
//wfController.init();
|
//wfController.init();
|
||||||
|
|
|
@ -164,6 +164,8 @@ public class OneWordAnalysisTab {
|
||||||
logger.info("compute button");
|
logger.info("compute button");
|
||||||
});
|
});
|
||||||
helpH.setOnAction(e -> openHelpWebsite());
|
helpH.setOnAction(e -> openHelpWebsite());
|
||||||
|
|
||||||
|
cancel.setVisible(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -384,6 +386,7 @@ public class OneWordAnalysisTab {
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
|
cancel.setVisible(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
task.setOnFailed(e -> {
|
task.setOnFailed(e -> {
|
||||||
|
@ -394,6 +397,7 @@ public class OneWordAnalysisTab {
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
|
cancel.setVisible(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
task.setOnCancelled(e -> {
|
task.setOnCancelled(e -> {
|
||||||
|
@ -403,6 +407,7 @@ public class OneWordAnalysisTab {
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
|
cancel.setVisible(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
// When cancel button is pressed cancel analysis
|
// When cancel button is pressed cancel analysis
|
||||||
|
@ -411,6 +416,7 @@ public class OneWordAnalysisTab {
|
||||||
logger.info("cancel button");
|
logger.info("cancel button");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
cancel.setVisible(true);
|
||||||
final Thread thread = new Thread(task, "task");
|
final Thread thread = new Thread(task, "task");
|
||||||
thread.setDaemon(true);
|
thread.setDaemon(true);
|
||||||
thread.start();
|
thread.start();
|
||||||
|
|
|
@ -71,6 +71,9 @@ public class StringAnalysisTabNew2 {
|
||||||
@FXML
|
@FXML
|
||||||
private Button computeNgramsB;
|
private Button computeNgramsB;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
private Button cancel;
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
public ProgressBar ngramProgressBar;
|
public ProgressBar ngramProgressBar;
|
||||||
@FXML
|
@FXML
|
||||||
|
@ -231,6 +234,8 @@ public class StringAnalysisTabNew2 {
|
||||||
});
|
});
|
||||||
|
|
||||||
helpH.setOnAction(e -> openHelpWebsite());
|
helpH.setOnAction(e -> openHelpWebsite());
|
||||||
|
|
||||||
|
cancel.setVisible(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -457,6 +462,10 @@ public class StringAnalysisTabNew2 {
|
||||||
for (File f : corpusFiles) {
|
for (File f : corpusFiles) {
|
||||||
readXML(f.toString(), statistic);
|
readXML(f.toString(), statistic);
|
||||||
i++;
|
i++;
|
||||||
|
if (isCancelled()) {
|
||||||
|
updateMessage(CANCELING_NOTIFICATION);
|
||||||
|
break;
|
||||||
|
}
|
||||||
this.updateProgress(i, corpusFiles.size());
|
this.updateProgress(i, corpusFiles.size());
|
||||||
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
||||||
}
|
}
|
||||||
|
@ -485,6 +494,7 @@ public class StringAnalysisTabNew2 {
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
|
cancel.setVisible(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
task.setOnFailed(e -> {
|
task.setOnFailed(e -> {
|
||||||
|
@ -495,8 +505,27 @@ public class StringAnalysisTabNew2 {
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||||
progressLabel.textProperty().unbind();
|
progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
progressLabel.setText("");
|
||||||
|
cancel.setVisible(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
task.setOnCancelled(e -> {
|
||||||
|
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
|
||||||
|
ngramProgressBar.progressProperty().unbind();
|
||||||
|
ngramProgressBar.setProgress(0.0);
|
||||||
|
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
|
progressLabel.textProperty().unbind();
|
||||||
|
progressLabel.setText("");
|
||||||
|
cancel.setVisible(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
// When cancel button is pressed cancel analysis
|
||||||
|
cancel.setOnAction(e -> {
|
||||||
|
task.cancel();
|
||||||
|
logger.info("cancel button");
|
||||||
|
});
|
||||||
|
|
||||||
|
cancel.setVisible(true);
|
||||||
|
|
||||||
final Thread thread = new Thread(task, "task");
|
final Thread thread = new Thread(task, "task");
|
||||||
thread.setDaemon(true);
|
thread.setDaemon(true);
|
||||||
thread.start();
|
thread.start();
|
||||||
|
|
|
@ -5,7 +5,11 @@ import static util.Util.*;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.ConcurrentMap;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import data.Filter;
|
||||||
import org.apache.commons.csv.CSVFormat;
|
import org.apache.commons.csv.CSVFormat;
|
||||||
import org.apache.commons.csv.CSVPrinter;
|
import org.apache.commons.csv.CSVPrinter;
|
||||||
import org.apache.commons.lang3.tuple.Pair;
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
@ -52,17 +56,29 @@ public class Export {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
||||||
|
Map<String, Map<String, AtomicLong>> taxonomyResults) {
|
||||||
//Delimiter used in CSV file
|
//Delimiter used in CSV file
|
||||||
String NEW_LINE_SEPARATOR = "\n";
|
String NEW_LINE_SEPARATOR = "\n";
|
||||||
|
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
|
||||||
Object[] FILE_HEADER;
|
Object[] FILE_HEADER;
|
||||||
|
|
||||||
//Count frequencies
|
//Count frequencies
|
||||||
int num_frequencies = 0;
|
long num_frequencies = 0;
|
||||||
for (Pair<String, Map<String, Long>> p : set) {
|
for (Pair<String, Map<String, Long>> p : set) {
|
||||||
Map<String, Long> map = p.getRight();
|
Map<String, Long> map = p.getRight();
|
||||||
for (Map.Entry<String, Long> e : map.entrySet()) {
|
if (map.isEmpty())
|
||||||
num_frequencies += e.getValue();
|
continue;
|
||||||
|
num_frequencies = Util.mapSumFrequencies(map);
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, Long> num_taxonomy_frequencies = new ConcurrentHashMap<>();
|
||||||
|
for (String taxonomyKey : taxonomyResults.keySet()) {
|
||||||
|
num_taxonomy_frequencies.put(taxonomyKey, (long) 0);
|
||||||
|
for (AtomicLong value : taxonomyResults.get(taxonomyKey).values()){
|
||||||
|
long val = num_taxonomy_frequencies.get(taxonomyKey);
|
||||||
|
val += value.get();
|
||||||
|
num_taxonomy_frequencies.put(taxonomyKey, val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,19 +87,36 @@ public class Export {
|
||||||
if (headerInfoBlock.containsKey("Analiza") && headerInfoBlock.get("Analiza").equals("Besede")) {
|
if (headerInfoBlock.containsKey("Analiza") && headerInfoBlock.get("Analiza").equals("Besede")) {
|
||||||
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
||||||
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
||||||
FILE_HEADER = new Object[]{"Različnica", "Skupna absolutna pogostost", "Delež glede na vse različnice"};
|
FILE_HEADER_AL.add("Različnica");
|
||||||
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||||
|
FILE_HEADER_AL.add("Delež glede na vse različnice");
|
||||||
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
||||||
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
|
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
|
||||||
FILE_HEADER = new Object[]{"Lema", "Skupna absolutna pogostost", "Delež glede na vse leme"};
|
FILE_HEADER_AL.add("Lema");
|
||||||
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||||
|
FILE_HEADER_AL.add("Delež glede na vse leme");
|
||||||
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
||||||
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
|
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
|
||||||
FILE_HEADER = new Object[]{"Oblikoskladenjska oznaka", "Skupna absolutna pogostost", "Delež glede na vse oblikoskladenjske oznake"};
|
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
||||||
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||||
|
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
|
||||||
} else {
|
} else {
|
||||||
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
||||||
FILE_HEADER = new Object[]{"Lema", "Skupna pogostost", "Delež glede na leme"};
|
FILE_HEADER_AL.add("Lema");
|
||||||
|
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||||
|
FILE_HEADER_AL.add("Delež glede na vse leme");
|
||||||
}
|
}
|
||||||
} else
|
FILE_HEADER_AL.add("Skupna relativna pogostost");
|
||||||
|
for (String key : taxonomyResults.keySet()) {
|
||||||
|
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
|
||||||
|
FILE_HEADER_AL.add("Delež [" + key + "]");
|
||||||
|
FILE_HEADER_AL.add("Relativna pogostost [" + key + "]");
|
||||||
|
}
|
||||||
|
FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
|
||||||
|
FILE_HEADER_AL.toArray(FILE_HEADER);
|
||||||
|
} else {
|
||||||
FILE_HEADER = new Object[]{"word", "frequency", "percent"};
|
FILE_HEADER = new Object[]{"word", "frequency", "percent"};
|
||||||
|
}
|
||||||
|
|
||||||
String fileName = "";
|
String fileName = "";
|
||||||
|
|
||||||
|
@ -99,7 +132,7 @@ public class Export {
|
||||||
if (map.isEmpty())
|
if (map.isEmpty())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
long total = Util.mapSumFrequencies(map);
|
// long total = Util.mapSumFrequencies(map);
|
||||||
|
|
||||||
OutputStreamWriter fileWriter = null;
|
OutputStreamWriter fileWriter = null;
|
||||||
CSVPrinter csvFilePrinter = null;
|
CSVPrinter csvFilePrinter = null;
|
||||||
|
@ -124,7 +157,16 @@ public class Export {
|
||||||
List dataEntry = new ArrayList<>();
|
List dataEntry = new ArrayList<>();
|
||||||
dataEntry.add(e.getKey());
|
dataEntry.add(e.getKey());
|
||||||
dataEntry.add(e.getValue().toString());
|
dataEntry.add(e.getValue().toString());
|
||||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / total));
|
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
||||||
|
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
|
||||||
|
for (String key : taxonomyResults.keySet()){
|
||||||
|
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
|
||||||
|
dataEntry.add(frequency.toString());
|
||||||
|
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
|
||||||
|
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key)));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
csvFilePrinter.printRecord(dataEntry);
|
csvFilePrinter.printRecord(dataEntry);
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user