Added collocability functionality - implemented Dice method
This commit is contained in:
parent
1d9e9b7ed6
commit
f9ce74d7b8
51
src/main/java/data/Collocability.java
Executable file
51
src/main/java/data/Collocability.java
Executable file
|
@ -0,0 +1,51 @@
|
|||
package data;
|
||||
|
||||
public enum Collocability {
|
||||
DICE("Dice");
|
||||
|
||||
private final String name;
|
||||
|
||||
Collocability(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
public static Collocability factory(String cf) {
|
||||
if (cf != null) {
|
||||
if (DICE.toString().equals(cf)) {
|
||||
return DICE;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public String toMetadataString() {
|
||||
switch(this){
|
||||
case DICE:
|
||||
return "Kolokabilnost - Dice:";
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public String toHeaderString() {
|
||||
switch(this){
|
||||
case DICE:
|
||||
return "Kolokabilnost - Dice";
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// public String toPercentString() {
|
||||
// switch(this){
|
||||
// case DICE:
|
||||
// return "Delež glede na vse različnice";
|
||||
// default:
|
||||
// return null;
|
||||
// }
|
||||
// }
|
||||
}
|
|
@ -8,7 +8,7 @@ import java.util.regex.Pattern;
|
|||
import gui.ValidationUtil;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public class Filter {
|
||||
public class Filter implements Cloneable {
|
||||
private HashMap<filterName, Object> filter;
|
||||
|
||||
public enum filterName {
|
||||
|
@ -28,7 +28,8 @@ public class Filter {
|
|||
MULTIPLE_KEYS,
|
||||
NOTE_PUNCTUATIONS,
|
||||
MINIMAL_OCCURRENCES,
|
||||
MINIMAL_TAXONOMY
|
||||
MINIMAL_TAXONOMY,
|
||||
COLLOCABILITY
|
||||
}
|
||||
|
||||
public Filter() {
|
||||
|
@ -186,6 +187,23 @@ public class Filter {
|
|||
filter.put(MULTIPLE_KEYS, newKeys);
|
||||
}
|
||||
|
||||
public void setCollocability(ArrayList<Collocability> keys) {
|
||||
ArrayList<Collocability> newKeys = new ArrayList<>();
|
||||
if (keys != null) {
|
||||
newKeys.addAll(keys);
|
||||
}
|
||||
|
||||
filter.put(COLLOCABILITY, newKeys);
|
||||
}
|
||||
|
||||
public ArrayList<Collocability> getCollocability() {
|
||||
if (filter.containsKey(COLLOCABILITY) && filter.get(COLLOCABILITY) != null) {
|
||||
return (ArrayList<Collocability>) filter.get(COLLOCABILITY);
|
||||
} else {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
public ArrayList<CalculateFor> getMultipleKeys() {
|
||||
if (filter.containsKey(MULTIPLE_KEYS) && filter.get(MULTIPLE_KEYS) != null) {
|
||||
return (ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS);
|
||||
|
@ -254,4 +272,18 @@ public class Filter {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
public Object clone() throws CloneNotSupportedException{
|
||||
Filter f = null;
|
||||
try {
|
||||
f = (Filter) super.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
f = new Filter();
|
||||
}
|
||||
f.filter = (HashMap<filterName, Object>) f.filter.clone();
|
||||
|
||||
return f;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,14 +40,16 @@ public class StatisticsNew {
|
|||
private RDB db;
|
||||
private boolean analysisProducedResults;
|
||||
private LocalDateTime time;
|
||||
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
|
||||
|
||||
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
|
||||
this.corpus = corpus;
|
||||
this.filter = filter;
|
||||
this.taxonomyResult = new ConcurrentHashMap<>();
|
||||
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
|
||||
this.collocability = new ConcurrentHashMap<>();
|
||||
|
||||
// create table for counting word occurances per taxonomies
|
||||
// create table for counting word occurrences per taxonomies
|
||||
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
|
||||
if (this.filter.getTaxonomy().isEmpty()) {
|
||||
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
|
||||
|
@ -213,7 +215,7 @@ public class StatisticsNew {
|
|||
removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
|
||||
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
|
||||
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
|
||||
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult, filter);
|
||||
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -442,11 +444,6 @@ public class StatisticsNew {
|
|||
info.put("n-gram nivo:", String.valueOf(ngramLevel));
|
||||
}
|
||||
|
||||
// else if (ngramLevel == 1){
|
||||
// info.put("n-gram nivo:", "nivo besed");
|
||||
// } else {
|
||||
// info.put("n-gram nivo:", "nivo črk");
|
||||
// }
|
||||
// skip
|
||||
if (ngramLevel > 1)
|
||||
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
|
||||
|
@ -464,11 +461,6 @@ public class StatisticsNew {
|
|||
info.put("MSD:", msdPattern.toString());
|
||||
}
|
||||
|
||||
// taksonomija
|
||||
// if (!isEmpty(filter.getTaxonomy())) {
|
||||
// info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
|
||||
// }
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -496,4 +488,28 @@ public class StatisticsNew {
|
|||
|
||||
return info;
|
||||
}
|
||||
|
||||
public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
|
||||
Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();
|
||||
|
||||
Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
|
||||
|
||||
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
|
||||
String[] splitedString = hmKey.getK1().split("\\s+");
|
||||
|
||||
long sum_fwi =0L;
|
||||
for(String s : splitedString){
|
||||
MultipleHMKeys search = new MultipleHMKeys1(s);
|
||||
sum_fwi += oneWordTaxonomyResult.get("Total").get(search).longValue();
|
||||
}
|
||||
double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
|
||||
collocabilityMap.put(hmKey, dice_value);
|
||||
}
|
||||
|
||||
collocability.put(filter.getCollocability().get(0), collocabilityMap);
|
||||
}
|
||||
|
||||
public Map<Collocability, Map<MultipleHMKeys, Double>> getCollocability(){
|
||||
return this.collocability;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,10 +7,13 @@ import static gui.Messages.*;
|
|||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javafx.application.HostServices;
|
||||
import javafx.collections.transformation.SortedList;
|
||||
import org.apache.commons.lang3.SerializationUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
@ -59,6 +62,10 @@ public class StringAnalysisTabNew2 {
|
|||
private ComboBox<String> calculateForCB;
|
||||
private CalculateFor calculateFor;
|
||||
|
||||
@FXML
|
||||
private CheckComboBox<String> collocabilityCCB;
|
||||
private ArrayList<Collocability> collocability;
|
||||
|
||||
@FXML
|
||||
private ComboBox<String> ngramValueCB;
|
||||
private Integer ngramValue;
|
||||
|
@ -126,6 +133,7 @@ public class StringAnalysisTabNew2 {
|
|||
private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
|
||||
private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
|
||||
private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
|
||||
private static final ObservableList<String> COLLOCABILITY_ITEMS = FXCollections.observableArrayList("Dice");
|
||||
private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
||||
|
||||
|
||||
|
@ -219,6 +227,21 @@ public class StringAnalysisTabNew2 {
|
|||
|
||||
calculateForCB.getSelectionModel().select(0);
|
||||
|
||||
// collocabilityCCB
|
||||
collocabilityCCB.getItems().removeAll();
|
||||
|
||||
collocabilityCCB.getItems().setAll(FXCollections.observableArrayList(COLLOCABILITY_ITEMS));
|
||||
collocabilityCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||
collocability = new ArrayList<>();
|
||||
ObservableList<Collocability> checkedItems = FXCollections.observableArrayList();
|
||||
for (String el : collocabilityCCB.getCheckModel().getCheckedItems()) {
|
||||
checkedItems.add(Collocability.factory(el));
|
||||
}
|
||||
collocability.addAll(checkedItems);
|
||||
logger.info(String.format("Selected collocabilities: %s", StringUtils.join(collocabilityCCB.getCheckModel().getCheckedItems(), ",")));
|
||||
});
|
||||
collocabilityCCB.getCheckModel().clearChecks();
|
||||
|
||||
// msd
|
||||
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
if (!newValue) {
|
||||
|
@ -535,6 +558,7 @@ public class StringAnalysisTabNew2 {
|
|||
filter.setMsd(msd);
|
||||
filter.setMinimalOccurrences(minimalOccurrences);
|
||||
filter.setMinimalTaxonomy(minimalTaxonomy);
|
||||
filter.setCollocability(collocability);
|
||||
|
||||
if (ngramValue != null && ngramValue == 0) {
|
||||
filter.setStringLength(stringLength);
|
||||
|
@ -545,6 +569,7 @@ public class StringAnalysisTabNew2 {
|
|||
// no errors
|
||||
logger.info("Executing: ", filter.toString());
|
||||
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
||||
// ADD THINGS HERE!!!
|
||||
execute(statistic);
|
||||
} else {
|
||||
logAlert(message);
|
||||
|
@ -585,8 +610,109 @@ public class StringAnalysisTabNew2 {
|
|||
}
|
||||
}
|
||||
|
||||
// public void calculate_collocabilities(StatisticsNew statistics, StatisticsNew oneWordStatistics) {
|
||||
// statistics.updateCalculateCollocabilities(oneWordStatistics);
|
||||
//
|
||||
// }
|
||||
|
||||
private final Task<Void> prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
|
||||
Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();
|
||||
|
||||
final Task<Void> task = new Task<Void>() {
|
||||
@SuppressWarnings("Duplicates")
|
||||
@Override
|
||||
protected Void call() throws Exception {
|
||||
long i = 0;
|
||||
for (File f : corpusFiles) {
|
||||
readXML(f.toString(), statisticsOneGrams);
|
||||
// i++;
|
||||
// this.updateProgress(i, corpusFiles.size());
|
||||
// this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
// progressLabel.textProperty().bind(task.messageProperty());
|
||||
|
||||
task.setOnSucceeded(e -> {
|
||||
System.out.print("test");
|
||||
try {
|
||||
System.out.print(statistic);
|
||||
// calculate_collocabilities(statistic, statisticsOneGrams);
|
||||
statistic.updateCalculateCollocabilities(statisticsOneGrams);
|
||||
boolean successullySaved = statistic.saveResultToDisk();
|
||||
if (successullySaved) {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||
} else {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
||||
}
|
||||
} catch (UnsupportedEncodingException e1) {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||
logger.error("Error while saving", e1);
|
||||
} catch (OutOfMemoryError e1) {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
||||
logger.error("Out of memory error", e1);
|
||||
}
|
||||
// try {
|
||||
// boolean successullySaved = statistic.saveResultToDisk();
|
||||
// if (successullySaved) {
|
||||
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||
// } else {
|
||||
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
||||
// }
|
||||
// } catch (UnsupportedEncodingException e1) {
|
||||
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||
// logger.error("Error while saving", e1);
|
||||
// } catch (OutOfMemoryError e1){
|
||||
// showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
||||
// logger.error("Out of memory error", e1);
|
||||
// }
|
||||
//
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
});
|
||||
|
||||
task.setOnFailed(e -> {
|
||||
// showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
|
||||
// logger.error("Error while executing", e);
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
});
|
||||
|
||||
task.setOnCancelled(e -> {
|
||||
// showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_CANCLED);
|
||||
// ngramProgressBar.progressProperty().unbind();
|
||||
// ngramProgressBar.setProgress(0.0);
|
||||
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
// progressLabel.textProperty().unbind();
|
||||
// progressLabel.setText("");
|
||||
// cancel.setVisible(false);
|
||||
});
|
||||
|
||||
// When cancel button is pressed cancel analysis
|
||||
cancel.setOnAction(e -> {
|
||||
task.cancel();
|
||||
// logger.info("cancel button");
|
||||
});
|
||||
|
||||
// cancel.setVisible(true);
|
||||
return task;
|
||||
}
|
||||
|
||||
private void execute(StatisticsNew statistic) {
|
||||
logger.info("Started execution: ", statistic.getFilter());
|
||||
Filter f = statistic.getFilter();
|
||||
logger.info("Started execution: ", f);
|
||||
// Task<Void> task_collocability = null;
|
||||
|
||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||
boolean corpusIsSplit = corpusFiles.size() > 1;
|
||||
|
@ -613,9 +739,23 @@ public class StringAnalysisTabNew2 {
|
|||
|
||||
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
progressLabel.textProperty().bind(task.messageProperty());
|
||||
|
||||
task.setOnSucceeded(e -> {
|
||||
if (f.getCollocability().size() > 0) {
|
||||
try{
|
||||
Filter f2 = (Filter) f.clone();
|
||||
f2.setNgramValue(1);
|
||||
StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, useDb);
|
||||
final Task<Void> taskCollocability = prepareTaskForCollocability(statistic, statisticsOneGrams);
|
||||
final Thread thread_collocability = new Thread(taskCollocability, "task_collocability");
|
||||
thread_collocability.setDaemon(true);
|
||||
thread_collocability.start();
|
||||
}catch(CloneNotSupportedException c){}
|
||||
|
||||
|
||||
|
||||
} else {
|
||||
try {
|
||||
// System.out.print(statistics);
|
||||
boolean successullySaved = statistic.saveResultToDisk();
|
||||
if (successullySaved) {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||
|
@ -625,10 +765,11 @@ public class StringAnalysisTabNew2 {
|
|||
} catch (UnsupportedEncodingException e1) {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||
logger.error("Error while saving", e1);
|
||||
} catch (OutOfMemoryError e1){
|
||||
} catch (OutOfMemoryError e1) {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_NOT_ENOUGH_MEMORY);
|
||||
logger.error("Out of memory error", e1);
|
||||
}
|
||||
}
|
||||
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
|
|
|
@ -9,9 +9,7 @@ import java.util.concurrent.ConcurrentHashMap;
|
|||
import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import data.CalculateFor;
|
||||
import data.Filter;
|
||||
import data.MultipleHMKeys;
|
||||
import data.*;
|
||||
import gui.ValidationUtil;
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.CSVPrinter;
|
||||
|
@ -61,7 +59,9 @@ public class Export {
|
|||
}
|
||||
|
||||
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
||||
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults, Filter filter) {
|
||||
StatisticsNew statistics, Filter filter) {
|
||||
Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResults = statistics.getTaxonomyResult();
|
||||
|
||||
//Delimiter used in CSV file
|
||||
String NEW_LINE_SEPARATOR = "\n";
|
||||
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
|
||||
|
@ -96,77 +96,22 @@ public class Export {
|
|||
FILE_HEADER_AL.add("Lema male črke");
|
||||
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
|
||||
|
||||
// if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) {
|
||||
// if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
||||
// headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
||||
// if (headerInfoBlock.get("Analiza").equals("Besede")){
|
||||
// FILE_HEADER_AL.add("Različnica");
|
||||
// } else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
|
||||
// FILE_HEADER_AL.add("Različnice");
|
||||
// }
|
||||
// } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
||||
// headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
|
||||
// if (headerInfoBlock.get("Analiza").equals("Besede")){
|
||||
// FILE_HEADER_AL.add("Lema");
|
||||
// FILE_HEADER_AL.add("Lema male črke");
|
||||
// } else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
|
||||
// FILE_HEADER_AL.add("Leme");
|
||||
// FILE_HEADER_AL.add("Leme male črke");
|
||||
// }
|
||||
// } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
||||
// headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
|
||||
// if (headerInfoBlock.get("Analiza").equals("Besede")){
|
||||
// FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
||||
// } else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
|
||||
// FILE_HEADER_AL.add("Oblikoskladenjska oznake");
|
||||
// }
|
||||
// } else {
|
||||
// headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
|
||||
// FILE_HEADER_AL.add("Lema");
|
||||
// FILE_HEADER_AL.add("Lema male črke");
|
||||
// }
|
||||
|
||||
|
||||
// for (Map<MultipleHMKeys, AtomicLong> value : taxonomyResults.values()) {
|
||||
for (CalculateFor otherKey : filter.getMultipleKeys()) {
|
||||
FILE_HEADER_AL.add(otherKey.toHeaderString());
|
||||
if (otherKey.equals(CalculateFor.LEMMA))
|
||||
FILE_HEADER_AL.add("Lema male črke");
|
||||
}
|
||||
|
||||
// if(otherKey.equals(CalculateFor.LEMMA)){
|
||||
// FILE_HEADER_AL.add("Lema");
|
||||
// FILE_HEADER_AL.add("Lema male črke");
|
||||
// }
|
||||
// if(otherKey.equals(CalculateFor.WORD_TYPE)){
|
||||
// FILE_HEADER_AL.add("Besedna vrsta");
|
||||
// }
|
||||
// if(otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
// FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
|
||||
// }
|
||||
// if(otherKey.equals(CalculateFor.NORMALIZED_WORD)){
|
||||
// FILE_HEADER_AL.add("Normalizirana različnica");
|
||||
// }
|
||||
// }
|
||||
|
||||
// break;
|
||||
// }
|
||||
|
||||
|
||||
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add(filter.getCalculateFor().toPercentString());
|
||||
|
||||
// if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
|
||||
// FILE_HEADER_AL.add("Delež glede na vse različnice");
|
||||
// } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
|
||||
// FILE_HEADER_AL.add("Delež glede na vse leme");
|
||||
// } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
|
||||
// FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
|
||||
// } else {
|
||||
// FILE_HEADER_AL.add("Delež glede na vse leme");
|
||||
// }
|
||||
FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
|
||||
|
||||
if (filter.getCollocability().size() > 0){
|
||||
FILE_HEADER_AL.add(filter.getCollocability().get(0).toHeaderString());
|
||||
}
|
||||
|
||||
for (String key : taxonomyResults.keySet()) {
|
||||
if(!key.equals("Total") && num_taxonomy_frequencies.get(key) > 0) {
|
||||
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
|
||||
|
@ -176,9 +121,6 @@ public class Export {
|
|||
}
|
||||
FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
|
||||
FILE_HEADER_AL.toArray(FILE_HEADER);
|
||||
// } else {
|
||||
// FILE_HEADER = new Object[]{"word", "frequency", "percent"};
|
||||
// }
|
||||
|
||||
String fileName = "";
|
||||
|
||||
|
@ -250,16 +192,7 @@ public class Export {
|
|||
i++;
|
||||
}
|
||||
|
||||
// if(!e.getKey().getLemma().equals("")){
|
||||
// dataEntry.add(e.getKey().getLemma());
|
||||
// dataEntry.add(e.getKey().getLemma().toLowerCase());
|
||||
// }
|
||||
// if(!e.getKey().getWordType().equals("")){
|
||||
// dataEntry.add(e.getKey().getWordType());
|
||||
// }
|
||||
// if(!e.getKey().getMsd().equals("")){
|
||||
// dataEntry.add(e.getKey().getMsd());
|
||||
// }
|
||||
|
||||
dataEntry.add(e.getValue().toString());
|
||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
||||
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
|
||||
|
@ -270,6 +203,11 @@ public class Export {
|
|||
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
|
||||
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (filter.getCollocability().size() > 0){
|
||||
dataEntry.add(String.format("%.4f", statistics.getCollocability().get(filter.getCollocability().get(0)).get(e.getKey())));
|
||||
}
|
||||
|
||||
// Write msd separated per letters at the end of each line in csv
|
||||
|
|
|
@ -105,8 +105,12 @@
|
|||
<Pane layoutX="400.0" prefHeight="480.0" prefWidth="380.0">
|
||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Min. št. taksonomij" />
|
||||
<TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="60.0" prefWidth="180.0" />
|
||||
<Label fx:id="solarFilters" layoutX="10.0" layoutY="100.0" text="Izbrani filtri:" />
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="300.0" prefWidth="275.0" text=" " wrapText="true" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Kolokabilnost" />
|
||||
<CheckComboBox fx:id="collocabilityCCB" layoutX="185.0" layoutY="100.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
|
||||
<Label fx:id="solarFilters" layoutX="10.0" layoutY="140.0" text="Izbrani filtri:" />
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="10.0" layoutY="140.0" prefHeight="260.0" prefWidth="275.0" text=" " wrapText="true" />
|
||||
<!-- samoglasniki/soglasniki -->
|
||||
<Pane fx:id="paneLetters">
|
||||
<children>
|
||||
|
|
Loading…
Reference in New Issue
Block a user