Added taxonomy presentation in results

This commit is contained in:
Luka 2018-06-29 12:53:29 +02:00
parent d5d06fd7c5
commit 8d7cce6c77
11 changed files with 212 additions and 30 deletions

View File

@ -93,7 +93,7 @@
<!-- JavaFX --> <!-- JavaFX -->
<groupId>com.zenjava</groupId> <groupId>com.zenjava</groupId>
<artifactId>javafx-maven-plugin</artifactId> <artifactId>javafx-maven-plugin</artifactId>
<version>8.6.0</version> <version>8.8.3</version>
<configuration> <configuration>
<mainClass>gui.GUIController</mainClass> <mainClass>gui.GUIController</mainClass>
<verbose>true</verbose> <verbose>true</verbose>

View File

@ -473,6 +473,7 @@ public class XML_processing {
public static boolean readXMLGigafida(String path, StatisticsNew stats) { public static boolean readXMLGigafida(String path, StatisticsNew stats) {
boolean inWord = false; boolean inWord = false;
ArrayList<String> currentFiletaxonomy = new ArrayList<>(); ArrayList<String> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = ""; String lemma = "";
String msd = ""; String msd = "";
@ -508,7 +509,10 @@ public class XML_processing {
if (tax != null) { if (tax != null) {
// keep only taxonomy properties // keep only taxonomy properties
currentFiletaxonomy.add(String.valueOf(tax.getValue()).replace("#", "")); String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", "");
currentFiletaxonomy.add(currentFiletaxonomyElement);
Tax taxonomy = new Tax();
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
} }
} }
break; break;
@ -519,7 +523,7 @@ public class XML_processing {
// "word" node value // "word" node value
if (inWord) { if (inWord) {
String word = characters.getData(); String word = characters.getData();
sentence.add(new Word(word, lemma, msd)); sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
inWord = false; inWord = false;
} }
break; break;
@ -570,6 +574,7 @@ public class XML_processing {
// fallback // fallback
else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) { else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
// join corpus and stats
fj(corpus, stats); fj(corpus, stats);
corpus.clear(); corpus.clear();

View File

@ -45,6 +45,8 @@ public class Ngrams {
continue; continue;
} }
// UPDATE TAXONOMY HERE!!!
stats.updateTaxonomyResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()), ngramCandidate);
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor())); stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
} }
} }
@ -60,7 +62,8 @@ public class Ngrams {
} }
for (int i = 0; i < regex.size(); i++) { for (int i = 0; i < regex.size(); i++) {
if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) { //if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern() + ".*")) {
return false; return false;
} }
} }

View File

@ -32,6 +32,7 @@ public class StatisticsNew {
private String resultTitle; private String resultTitle;
private Map<String, AtomicLong> result; private Map<String, AtomicLong> result;
private Map<String, Map<String, AtomicLong>> taxonomyResult;
private Object[][] resultCustom; // for when calculating percentages that don't add up to 100% private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedSuffix; private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedSuffix;
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedPrefix; private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedPrefix;
@ -43,6 +44,20 @@ public class StatisticsNew {
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) { public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
this.corpus = corpus; this.corpus = corpus;
this.filter = filter; this.filter = filter;
this.taxonomyResult = new ConcurrentHashMap<>();
// create table for counting word occurances per taxonomies
if (this.filter.getTaxonomy().isEmpty()) {
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>());
}
} else {
for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
Tax taxonomy = new Tax();
this.taxonomyResult.put(taxonomy.getLongTaxonomyName(this.filter.getTaxonomy().get(i)), new ConcurrentHashMap<>());
}
}
if (useDB) { if (useDB) {
this.useDB = true; this.useDB = true;
@ -189,7 +204,7 @@ public class StatisticsNew {
} }
stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit)))); stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock()); Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult);
return true; return true;
} }
@ -260,6 +275,28 @@ public class StatisticsNew {
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit); return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
} }
public void updateTaxonomyResults(String o, List<Word> ngramCandidate) {
for (String key : taxonomyResult.keySet()) {
// first word should have the same taxonomy as others
if (ngramCandidate.get(0).getTaxonomy().contains(key)) {
// if taxonomy not in map and in this word
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));
if (r != null)
taxonomyResult.get(key).get(o).incrementAndGet();
} else {
// if taxonomy not in map and not in this word
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(0));
}
}
// if not in map
// else
}
public void updateResults(String o) { public void updateResults(String o) {
// if not in map // if not in map
AtomicLong r = result.putIfAbsent(o, new AtomicLong(1)); AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));
@ -377,22 +414,22 @@ public class StatisticsNew {
} }
// taksonomija // taksonomija
if (!isEmpty(filter.getTaxonomy())) { // if (!isEmpty(filter.getTaxonomy())) {
info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", ")); // info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
} // }
} }
// if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
// ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy()); ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
//
// info.put("Taksonomija: ", ""); info.put("Taksonomija: ", "");
// String sep = ""; String sep = "";
// for (String s : tax) { for (String s : tax) {
// info.put(sep = sep + " ", s); info.put(sep = sep + " ", s);
// } }
// } }
if (corpus.getCorpusType() == CorpusType.SOLAR) { if (corpus.getCorpusType() == CorpusType.SOLAR) {
HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters(); HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();

View File

@ -172,4 +172,13 @@ public class Tax {
return result; return result;
} }
public static String getLongTaxonomyName(String shortName){
if (GIGAFIDA_TAXONOMY.containsKey(shortName))
return GIGAFIDA_TAXONOMY.get(shortName);
else if(GOS_TAXONOMY.containsKey(shortName))
return GOS_TAXONOMY.get(shortName);
else
return null;
}
} }

View File

@ -3,6 +3,7 @@ package data;
import java.io.Serializable; import java.io.Serializable;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashSet; import java.util.HashSet;
import java.util.List;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -15,6 +16,7 @@ public class Word implements Serializable {
private String word; private String word;
private String lemma; private String lemma;
private String msd; private String msd;
private List<String> taxonomy;
private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u')); private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));
/** /**
@ -50,6 +52,22 @@ public class Word implements Serializable {
} }
} }
//private char besedna_vrsta;
public Word(String word, String lemma, String msd, List<String> taxonomy) {
this.lemma = lemma;
this.msd = normalizeMsd(msd);
this.taxonomy = taxonomy;
// veliko zacetnico ohranimo samo za lastna imena
if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
&& this.msd.length() >= 2
&& this.msd.charAt(1) == 'l')) {
this.word = word.toLowerCase();
} else {
this.word = word;
}
}
public Word() { public Word() {
} }
@ -99,6 +117,10 @@ public class Word implements Serializable {
this.word = word; this.word = word;
} }
public List<String> getTaxonomy() {
return taxonomy;
}
public String getLemma() { public String getLemma() {
return lemma; return lemma;
} }

View File

@ -67,6 +67,9 @@ public class CharacterAnalysisTab {
@FXML @FXML
private Button computeNgramsB; private Button computeNgramsB;
@FXML
private Button cancel;
@FXML @FXML
public ProgressBar ngramProgressBar; public ProgressBar ngramProgressBar;
@FXML @FXML
@ -192,6 +195,8 @@ public class CharacterAnalysisTab {
}); });
helpH.setOnAction(e -> openHelpWebsite()); helpH.setOnAction(e -> openHelpWebsite());
cancel.setVisible(false);
} }
/** /**
@ -399,6 +404,10 @@ public class CharacterAnalysisTab {
for (File f : corpusFiles) { for (File f : corpusFiles) {
readXML(f.toString(), statistic); readXML(f.toString(), statistic);
i++; i++;
if (isCancelled()) {
updateMessage(CANCELING_NOTIFICATION);
break;
}
this.updateProgress(i, corpusFiles.size()); this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName())); this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
} }
@ -427,6 +436,7 @@ public class CharacterAnalysisTab {
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind(); progressLabel.textProperty().unbind();
progressLabel.setText(""); progressLabel.setText("");
cancel.setVisible(false);
}); });
task.setOnFailed(e -> { task.setOnFailed(e -> {
@ -437,8 +447,27 @@ public class CharacterAnalysisTab {
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind(); progressLabel.textProperty().unbind();
progressLabel.setText(""); progressLabel.setText("");
cancel.setVisible(false);
}); });
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
logger.info("cancel button");
});
cancel.setVisible(true);
final Thread thread = new Thread(task, "task"); final Thread thread = new Thread(task, "task");
thread.setDaemon(true); thread.setDaemon(true);
thread.start(); thread.start();

View File

@ -83,7 +83,7 @@ public class CorpusTab {
private OneWordAnalysisTab oneWordTabController; private OneWordAnalysisTab oneWordTabController;
private CharacterAnalysisTab catController; private CharacterAnalysisTab catController;
private FiltersForSolar ffsController; private FiltersForSolar ffsController;
//private WordFormationTab wfController; private WordFormationTab wfController;
private WordLevelTab wlController; private WordLevelTab wlController;
private HostServices hostService; private HostServices hostService;
@ -383,7 +383,7 @@ public class CorpusTab {
characterLevelTab.setDisable(false); characterLevelTab.setDisable(false);
catController.setCorpus(corpus); catController.setCorpus(corpus);
catController.init(); catController.init();
wordFormationTab.setDisable(false); //wordFormationTab.setDisable(false);
wordLevelTab.setDisable(false); wordLevelTab.setDisable(false);
//wfController.setCorpus(corpus); //wfController.setCorpus(corpus);
//wfController.init(); //wfController.init();

View File

@ -164,6 +164,8 @@ public class OneWordAnalysisTab {
logger.info("compute button"); logger.info("compute button");
}); });
helpH.setOnAction(e -> openHelpWebsite()); helpH.setOnAction(e -> openHelpWebsite());
cancel.setVisible(false);
} }
/** /**
@ -384,6 +386,7 @@ public class OneWordAnalysisTab {
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind(); progressLabel.textProperty().unbind();
progressLabel.setText(""); progressLabel.setText("");
cancel.setVisible(false);
}); });
task.setOnFailed(e -> { task.setOnFailed(e -> {
@ -394,6 +397,7 @@ public class OneWordAnalysisTab {
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind(); progressLabel.textProperty().unbind();
progressLabel.setText(""); progressLabel.setText("");
cancel.setVisible(false);
}); });
task.setOnCancelled(e -> { task.setOnCancelled(e -> {
@ -403,6 +407,7 @@ public class OneWordAnalysisTab {
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind(); progressLabel.textProperty().unbind();
progressLabel.setText(""); progressLabel.setText("");
cancel.setVisible(false);
}); });
// When cancel button is pressed cancel analysis // When cancel button is pressed cancel analysis
@ -411,6 +416,7 @@ public class OneWordAnalysisTab {
logger.info("cancel button"); logger.info("cancel button");
}); });
cancel.setVisible(true);
final Thread thread = new Thread(task, "task"); final Thread thread = new Thread(task, "task");
thread.setDaemon(true); thread.setDaemon(true);
thread.start(); thread.start();

View File

@ -71,6 +71,9 @@ public class StringAnalysisTabNew2 {
@FXML @FXML
private Button computeNgramsB; private Button computeNgramsB;
@FXML
private Button cancel;
@FXML @FXML
public ProgressBar ngramProgressBar; public ProgressBar ngramProgressBar;
@FXML @FXML
@ -231,6 +234,8 @@ public class StringAnalysisTabNew2 {
}); });
helpH.setOnAction(e -> openHelpWebsite()); helpH.setOnAction(e -> openHelpWebsite());
cancel.setVisible(false);
} }
/** /**
@ -457,6 +462,10 @@ public class StringAnalysisTabNew2 {
for (File f : corpusFiles) { for (File f : corpusFiles) {
readXML(f.toString(), statistic); readXML(f.toString(), statistic);
i++; i++;
if (isCancelled()) {
updateMessage(CANCELING_NOTIFICATION);
break;
}
this.updateProgress(i, corpusFiles.size()); this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName())); this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
} }
@ -485,6 +494,7 @@ public class StringAnalysisTabNew2 {
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind(); progressLabel.textProperty().unbind();
progressLabel.setText(""); progressLabel.setText("");
cancel.setVisible(false);
}); });
task.setOnFailed(e -> { task.setOnFailed(e -> {
@ -495,8 +505,27 @@ public class StringAnalysisTabNew2 {
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind(); progressLabel.textProperty().unbind();
progressLabel.setText(""); progressLabel.setText("");
cancel.setVisible(false);
}); });
task.setOnCancelled(e -> {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
cancel.setVisible(false);
});
// When cancel button is pressed cancel analysis
cancel.setOnAction(e -> {
task.cancel();
logger.info("cancel button");
});
cancel.setVisible(true);
final Thread thread = new Thread(task, "task"); final Thread thread = new Thread(task, "task");
thread.setDaemon(true); thread.setDaemon(true);
thread.start(); thread.start();

View File

@ -5,7 +5,11 @@ import static util.Util.*;
import java.io.*; import java.io.*;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.*; import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicLong;
import data.Filter;
import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter; import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.tuple.Pair;
@ -52,17 +56,29 @@ public class Export {
} }
} }
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) { public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
Map<String, Map<String, AtomicLong>> taxonomyResults) {
//Delimiter used in CSV file //Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n"; String NEW_LINE_SEPARATOR = "\n";
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
Object[] FILE_HEADER; Object[] FILE_HEADER;
//Count frequencies //Count frequencies
int num_frequencies = 0; long num_frequencies = 0;
for (Pair<String, Map<String, Long>> p : set) { for (Pair<String, Map<String, Long>> p : set) {
Map<String, Long> map = p.getRight(); Map<String, Long> map = p.getRight();
for (Map.Entry<String, Long> e : map.entrySet()) { if (map.isEmpty())
num_frequencies += e.getValue(); continue;
num_frequencies = Util.mapSumFrequencies(map);
}
Map<String, Long> num_taxonomy_frequencies = new ConcurrentHashMap<>();
for (String taxonomyKey : taxonomyResults.keySet()) {
num_taxonomy_frequencies.put(taxonomyKey, (long) 0);
for (AtomicLong value : taxonomyResults.get(taxonomyKey).values()){
long val = num_taxonomy_frequencies.get(taxonomyKey);
val += value.get();
num_taxonomy_frequencies.put(taxonomyKey, val);
} }
} }
@ -71,19 +87,36 @@ public class Export {
if (headerInfoBlock.containsKey("Analiza") && headerInfoBlock.get("Analiza").equals("Besede")) { if (headerInfoBlock.containsKey("Analiza") && headerInfoBlock.get("Analiza").equals("Besede")) {
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) { if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies)); headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
FILE_HEADER = new Object[]{"Različnica", "Skupna absolutna pogostost", "Delež glede na vse različnice"}; FILE_HEADER_AL.add("Različnica");
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse različnice");
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) { } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies)); headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
FILE_HEADER = new Object[]{"Lema", "Skupna absolutna pogostost", "Delež glede na vse leme"}; FILE_HEADER_AL.add("Lema");
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse leme");
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) { } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies)); headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
FILE_HEADER = new Object[]{"Oblikoskladenjska oznaka", "Skupna absolutna pogostost", "Delež glede na vse oblikoskladenjske oznake"}; FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
} else { } else {
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies)); headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
FILE_HEADER = new Object[]{"Lema", "Skupna pogostost", "Delež glede na leme"}; FILE_HEADER_AL.add("Lema");
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse leme");
} }
} else FILE_HEADER_AL.add("Skupna relativna pogostost");
for (String key : taxonomyResults.keySet()) {
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
FILE_HEADER_AL.add("Delež [" + key + "]");
FILE_HEADER_AL.add("Relativna pogostost [" + key + "]");
}
FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
FILE_HEADER_AL.toArray(FILE_HEADER);
} else {
FILE_HEADER = new Object[]{"word", "frequency", "percent"}; FILE_HEADER = new Object[]{"word", "frequency", "percent"};
}
String fileName = ""; String fileName = "";
@ -99,7 +132,7 @@ public class Export {
if (map.isEmpty()) if (map.isEmpty())
continue; continue;
long total = Util.mapSumFrequencies(map); // long total = Util.mapSumFrequencies(map);
OutputStreamWriter fileWriter = null; OutputStreamWriter fileWriter = null;
CSVPrinter csvFilePrinter = null; CSVPrinter csvFilePrinter = null;
@ -124,7 +157,16 @@ public class Export {
List dataEntry = new ArrayList<>(); List dataEntry = new ArrayList<>();
dataEntry.add(e.getKey()); dataEntry.add(e.getKey());
dataEntry.add(e.getValue().toString()); dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / total)); dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
for (String key : taxonomyResults.keySet()){
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
dataEntry.add(frequency.toString());
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key)));
}
csvFilePrinter.printRecord(dataEntry); csvFilePrinter.printRecord(dataEntry);
} }
} catch (Exception e) { } catch (Exception e) {