2018-06-19 07:15:37 +00:00
|
|
|
package data;
|
|
|
|
|
|
|
|
import static gui.ValidationUtil.*;
|
|
|
|
|
|
|
|
import java.io.UnsupportedEncodingException;
|
|
|
|
import java.time.LocalDateTime;
|
|
|
|
import java.time.format.DateTimeFormatter;
|
|
|
|
import java.util.*;
|
|
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
import org.apache.commons.lang3.tuple.ImmutablePair;
|
|
|
|
import org.apache.commons.lang3.tuple.Pair;
|
|
|
|
import org.apache.logging.log4j.LogManager;
|
|
|
|
import org.apache.logging.log4j.Logger;
|
|
|
|
|
|
|
|
import alg.inflectedJOS.WordFormation;
|
|
|
|
import data.Enums.WordLevelType;
|
|
|
|
import javafx.collections.ObservableList;
|
|
|
|
import util.Export;
|
|
|
|
import util.Util;
|
|
|
|
import util.db.RDB;
|
|
|
|
|
|
|
|
@SuppressWarnings("Duplicates")
|
|
|
|
public class StatisticsNew {
|
|
|
|
public final static Logger logger = LogManager.getLogger(StatisticsNew.class);
|
|
|
|
|
|
|
|
private Corpus corpus;
|
|
|
|
private Filter filter;
|
|
|
|
|
|
|
|
private String resultTitle;
|
|
|
|
private Map<String, AtomicLong> result;
|
2018-07-16 08:14:21 +00:00
|
|
|
private Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult;
|
2018-06-19 07:15:37 +00:00
|
|
|
private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
|
2018-07-16 08:14:21 +00:00
|
|
|
private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedSuffix;
|
|
|
|
private Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> resultNestedPrefix;
|
2018-06-19 07:15:37 +00:00
|
|
|
private boolean useDB;
|
|
|
|
private RDB db;
|
|
|
|
private boolean analysisProducedResults;
|
|
|
|
private LocalDateTime time;
|
2018-10-24 08:36:07 +00:00
|
|
|
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
|
2018-06-19 07:15:37 +00:00
|
|
|
|
|
|
|
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
|
|
|
|
this.corpus = corpus;
|
|
|
|
this.filter = filter;
|
2018-06-29 10:53:29 +00:00
|
|
|
this.taxonomyResult = new ConcurrentHashMap<>();
|
2018-07-05 07:37:35 +00:00
|
|
|
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
|
2018-10-24 08:36:07 +00:00
|
|
|
this.collocability = new ConcurrentHashMap<>();
|
2018-06-29 10:53:29 +00:00
|
|
|
|
2018-10-24 08:36:07 +00:00
|
|
|
// create table for counting word occurrences per taxonomies
|
2018-08-22 07:11:14 +00:00
|
|
|
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
|
2018-07-31 06:58:17 +00:00
|
|
|
if (this.filter.getTaxonomy().isEmpty()) {
|
|
|
|
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
|
|
|
|
this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
|
|
|
|
Tax taxonomy = new Tax();
|
|
|
|
this.taxonomyResult.put(taxonomy.getLongTaxonomyName(this.filter.getTaxonomy().get(i)), new ConcurrentHashMap<>());
|
|
|
|
}
|
2018-06-29 10:53:29 +00:00
|
|
|
}
|
|
|
|
}
|
2018-06-19 07:15:37 +00:00
|
|
|
|
|
|
|
if (useDB) {
|
|
|
|
this.useDB = true;
|
|
|
|
db = new RDB();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (filter.getAl() == AnalysisLevel.WORD_LEVEL) {
|
|
|
|
resultNestedSuffix = new ConcurrentHashMap<>();
|
|
|
|
resultNestedPrefix = new ConcurrentHashMap<>();
|
|
|
|
} else {
|
|
|
|
result = new ConcurrentHashMap<>();
|
|
|
|
}
|
|
|
|
|
|
|
|
resultTitle = generateResultTitle();
|
|
|
|
|
|
|
|
logger.debug(toString());
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Result's title consists of:
|
|
|
|
* <ul>
|
|
|
|
* <li>Corpus type</li>
|
|
|
|
* <li>Analysis level</li>
|
|
|
|
* <li>Calculate for</li>
|
|
|
|
* <li></li>
|
|
|
|
* <li></li>
|
|
|
|
* <li></li>
|
|
|
|
* <li></li>
|
|
|
|
* </ul>
|
|
|
|
*
|
|
|
|
* @return
|
|
|
|
*/
|
|
|
|
private String generateResultTitle() {
|
|
|
|
String separator = "_";
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
|
|
|
|
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
|
|
|
Integer ngramLevel = filter.getNgramValue();
|
|
|
|
if(ngramLevel == 0) {
|
2018-07-05 07:37:35 +00:00
|
|
|
sb.append(corpus.getCorpusType().toString())
|
|
|
|
.append(separator)
|
|
|
|
.append("crke")
|
|
|
|
.append(separator)
|
|
|
|
.append(filter.getCalculateFor())
|
2018-06-19 07:15:37 +00:00
|
|
|
.append(separator);
|
|
|
|
} else if(ngramLevel == 1) {
|
2018-07-05 07:37:35 +00:00
|
|
|
sb.append(corpus.getCorpusType().toString())
|
|
|
|
.append(separator)
|
|
|
|
.append("besede")
|
|
|
|
.append(separator)
|
|
|
|
.append(filter.getCalculateFor())
|
2018-06-19 07:15:37 +00:00
|
|
|
.append(separator);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
sb.append(filter.getAl().toString())
|
|
|
|
.append(separator)
|
|
|
|
.append(corpus.getCorpusType().toString())
|
|
|
|
.append(separator);
|
|
|
|
sb.append(filter.getCalculateFor().toString())
|
|
|
|
.append(separator);
|
|
|
|
// ngram value
|
|
|
|
sb.append(filter.getNgramValue()).append("-gram")
|
|
|
|
.append(separator);
|
|
|
|
sb.append(filter.getSkipValue()).append("-preskok")
|
|
|
|
.append(separator);
|
|
|
|
}
|
|
|
|
// TODO: assure skip is not null but zero
|
|
|
|
|
|
|
|
} else {
|
|
|
|
sb.append(filter.getAl().toString()) // analysis level
|
|
|
|
.append(separator)
|
|
|
|
.append(corpus.getCorpusType().toString())
|
|
|
|
.append(separator);
|
|
|
|
}
|
|
|
|
// skip value
|
|
|
|
// msd ?
|
|
|
|
// if taxonomy -> taxonomy
|
|
|
|
// if cvv -> cvv + dolžina
|
|
|
|
|
|
|
|
this.time = this.time != null ? this.time : LocalDateTime.now();
|
|
|
|
|
|
|
|
sb.append(time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss")));
|
|
|
|
return sb.toString();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
public boolean isAnalysisProducedResults() {
|
|
|
|
return analysisProducedResults;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setAnalysisProducedResults(boolean analysisProducedResults) {
|
|
|
|
this.analysisProducedResults = analysisProducedResults;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String toString() {
|
|
|
|
String newLine = "\n\t- ";
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
sb.append(newLine).append("Statistic properties:");
|
|
|
|
sb.append(newLine).append(corpus.getCorpusType().toString()).append(String.format(" (%d files)", corpus.getDetectedCorpusFiles().size()));
|
|
|
|
sb.append(newLine).append(useDB ? "use DB" : "run in memory");
|
|
|
|
sb.append(newLine).append(filter.toString());
|
|
|
|
|
|
|
|
return sb.toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
public String getResultTitle() {
|
|
|
|
return resultTitle;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ****************************************
|
|
|
|
// ***************** util *****************
|
|
|
|
// ****************************************
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Stores results from this batch to a database and clears results map
|
|
|
|
*/
|
|
|
|
public void storeTmpResultsToDB() {
|
|
|
|
try {
|
|
|
|
db.writeBatch(result);
|
|
|
|
result = new ConcurrentHashMap<>();
|
|
|
|
} catch (UnsupportedEncodingException e) {
|
|
|
|
logger.error("Store tmp results to DB", e);
|
|
|
|
// e.printStackTrace();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public Filter getFilter() {
|
|
|
|
return filter;
|
|
|
|
}
|
|
|
|
|
|
|
|
public Corpus getCorpus() {
|
|
|
|
return corpus;
|
|
|
|
}
|
|
|
|
|
|
|
|
public boolean saveResultToDisk(int... limit) throws UnsupportedEncodingException {
|
2018-07-16 08:14:21 +00:00
|
|
|
Set<Pair<String, Map<MultipleHMKeys, Long>>> stats = new HashSet<>();
|
2018-06-19 07:15:37 +00:00
|
|
|
|
|
|
|
if (useDB) {
|
|
|
|
result = db.getDump();
|
|
|
|
db.delete();
|
|
|
|
}
|
|
|
|
|
|
|
|
// if no results and nothing to save, return false
|
2018-07-05 07:37:35 +00:00
|
|
|
if (!(taxonomyResult.get("Total").size() > 0)) {
|
2018-06-19 07:15:37 +00:00
|
|
|
analysisProducedResults = false;
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
analysisProducedResults = true;
|
|
|
|
}
|
|
|
|
|
2018-07-31 06:58:17 +00:00
|
|
|
removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
|
|
|
|
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
|
2018-07-05 07:37:35 +00:00
|
|
|
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
|
2018-10-24 08:36:07 +00:00
|
|
|
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
|
2018-06-19 07:15:37 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-07-31 06:58:17 +00:00
|
|
|
/**
|
|
|
|
* Removes lines, where number of different taxonomies is lower than specified number (minimalTaxonomy)
|
|
|
|
*/
|
|
|
|
private void removeMinimalTaxonomy(Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult, Integer minimalTaxonomy) {
|
|
|
|
if (minimalTaxonomy == 1)
|
|
|
|
return;
|
|
|
|
int occurances;
|
|
|
|
for (MultipleHMKeys key : taxonomyResult.get("Total").keySet()){
|
|
|
|
occurances = 0;
|
|
|
|
for (String columnNameKey : taxonomyResult.keySet()){
|
|
|
|
if(!columnNameKey.equals("Total") && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1)
|
|
|
|
occurances++;
|
|
|
|
}
|
|
|
|
if(occurances < minimalTaxonomy){
|
|
|
|
taxonomyResult.get("Total").remove(key);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Removes lines where total number of occurrences is lower than specified number (minimalOccurrences)
|
|
|
|
*/
|
|
|
|
private void removeMinimalOccurrences(Map<MultipleHMKeys, AtomicLong> taxonomyResultTotal, Integer minimalOccurrences) {
|
|
|
|
if (minimalOccurrences == 0)
|
|
|
|
return;
|
|
|
|
for (MultipleHMKeys key : taxonomyResultTotal.keySet()){
|
|
|
|
if(taxonomyResultTotal.get(key).intValue() < minimalOccurrences){
|
|
|
|
taxonomyResultTotal.remove(key);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-19 07:15:37 +00:00
|
|
|
public boolean saveResultNestedToDisk(int... limit) throws UnsupportedEncodingException {
|
|
|
|
resultTitle = generateResultTitle();
|
|
|
|
|
|
|
|
if (useDB) {
|
|
|
|
result = db.getDump();
|
|
|
|
db.delete();
|
|
|
|
}
|
|
|
|
Map<WordLevelType, Map<String, Map<String, Long>>> results = new HashMap<>();
|
|
|
|
|
2018-07-16 08:14:21 +00:00
|
|
|
// UNCOMMENT!!!!!!
|
|
|
|
// if (!isEmpty(resultNestedSuffix)) {
|
|
|
|
// results.put(WordLevelType.SUFFIX, sortNestedMap(resultNestedSuffix, Util.getValidInt(limit)));
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// if (!isEmpty(resultNestedPrefix)) {
|
|
|
|
// results.put(WordLevelType.PREFIX, sortNestedMap(resultNestedPrefix, Util.getValidInt(limit)));
|
|
|
|
// }
|
2018-06-19 07:15:37 +00:00
|
|
|
|
|
|
|
// if no results and nothing to save, return false
|
|
|
|
if (!(results.size() > 0)) {
|
|
|
|
analysisProducedResults = false;
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
analysisProducedResults = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
Export.nestedMapToCSV(resultTitle, results, corpus.getChosenResultsLocation(), headerInfoBlock());
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {
|
|
|
|
filter.setAl(AnalysisLevel.WORD_FORMATION);
|
|
|
|
resultTitle = generateResultTitle();
|
|
|
|
|
|
|
|
if (useDB) {
|
|
|
|
result = db.getDump();
|
|
|
|
db.delete();
|
|
|
|
}
|
|
|
|
|
|
|
|
// if no results and nothing to save, return false
|
|
|
|
if (!(result.size() > 0)) {
|
|
|
|
analysisProducedResults = false;
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
analysisProducedResults = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
WordFormation.calculateStatistics(this);
|
|
|
|
|
|
|
|
Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-07-16 08:14:21 +00:00
|
|
|
private Map<String, Map<MultipleHMKeys, Long>> sortNestedMap(Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> nestedMap, int limit) {
|
|
|
|
Map<String, Map<MultipleHMKeys, Long>> sorted = new HashMap<>();
|
2018-06-19 07:15:37 +00:00
|
|
|
|
|
|
|
for (String s : nestedMap.keySet()) {
|
|
|
|
sorted.put(s, getSortedResult(nestedMap.get(s), Util.getValidInt(limit)));
|
|
|
|
}
|
|
|
|
|
|
|
|
return sorted;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-07-16 08:14:21 +00:00
|
|
|
private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {
|
2018-06-19 07:15:37 +00:00
|
|
|
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
|
|
|
}
|
|
|
|
|
2018-07-16 08:14:21 +00:00
|
|
|
public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {
|
2018-06-29 10:53:29 +00:00
|
|
|
for (String key : taxonomyResult.keySet()) {
|
|
|
|
// first word should have the same taxonomy as others
|
2018-07-31 06:58:17 +00:00
|
|
|
if (key.equals("Total") || taxonomy.contains(key)) {
|
|
|
|
// if (key.equals("Total") || taxonomy != null && taxonomy.contains(key)) {
|
2018-06-29 10:53:29 +00:00
|
|
|
// if taxonomy not in map and in this word
|
|
|
|
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));
|
|
|
|
|
|
|
|
if (r != null)
|
|
|
|
taxonomyResult.get(key).get(o).incrementAndGet();
|
|
|
|
} else {
|
|
|
|
// if taxonomy not in map and not in this word
|
|
|
|
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(0));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// if not in map
|
|
|
|
|
|
|
|
|
|
|
|
// else
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2018-07-17 14:04:26 +00:00
|
|
|
public Map<String, Map<MultipleHMKeys, AtomicLong>> getTaxonomyResult() {
|
|
|
|
return taxonomyResult;
|
|
|
|
}
|
|
|
|
|
2018-06-19 07:15:37 +00:00
|
|
|
public void updateResults(String o) {
|
|
|
|
// if not in map
|
|
|
|
AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));
|
|
|
|
|
|
|
|
// else
|
|
|
|
if (r != null)
|
|
|
|
result.get(o).incrementAndGet();
|
|
|
|
}
|
|
|
|
|
|
|
|
public Map<String, AtomicLong> getResult() {
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
public Object[][] getResultCustom() {
|
|
|
|
return resultCustom;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setResultCustom(Object[][] resultCustom) {
|
|
|
|
this.resultCustom = resultCustom;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void updateResultsNested(WordLevelType type, String key, String stringValue) {
|
|
|
|
ConcurrentHashMap<String, ConcurrentHashMap<String, AtomicLong>> resultsMap;
|
|
|
|
|
|
|
|
if (type == WordLevelType.SUFFIX) {
|
|
|
|
updateResultsNestedSuffix(key, stringValue);
|
|
|
|
} else if (type == WordLevelType.PREFIX) {
|
|
|
|
updateResultsNestedPrefix(key, stringValue);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public void updateResultsNestedSuffix(String key, String stringValue) {
|
2018-08-09 07:21:06 +00:00
|
|
|
MultipleHMKeys mkStringValue = new MultipleHMKeys1(stringValue);
|
2018-07-16 08:14:21 +00:00
|
|
|
|
2018-06-19 07:15:37 +00:00
|
|
|
if (resultNestedSuffix.containsKey(key)) {
|
|
|
|
// if not in map
|
2018-07-16 08:14:21 +00:00
|
|
|
AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));
|
2018-06-19 07:15:37 +00:00
|
|
|
|
|
|
|
// else
|
|
|
|
if (r != null) {
|
|
|
|
resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
resultNestedSuffix.putIfAbsent(key, new ConcurrentHashMap<>());
|
2018-07-16 08:14:21 +00:00
|
|
|
AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));
|
2018-06-19 07:15:37 +00:00
|
|
|
|
|
|
|
if (r != null) {
|
|
|
|
resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public void updateResultsNestedPrefix(String key, String stringValue) {
|
2018-08-09 07:21:06 +00:00
|
|
|
MultipleHMKeys mkStringValue = new MultipleHMKeys1(stringValue);
|
2018-07-16 08:14:21 +00:00
|
|
|
|
2018-06-19 07:15:37 +00:00
|
|
|
if (resultNestedPrefix.containsKey(key)) {
|
|
|
|
// if not in map
|
2018-07-16 08:14:21 +00:00
|
|
|
AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));
|
2018-06-19 07:15:37 +00:00
|
|
|
|
|
|
|
// else
|
|
|
|
if (r != null) {
|
|
|
|
resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
resultNestedPrefix.putIfAbsent(key, new ConcurrentHashMap<>());
|
2018-07-16 08:14:21 +00:00
|
|
|
AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(mkStringValue, new AtomicLong(1));
|
2018-06-19 07:15:37 +00:00
|
|
|
|
|
|
|
if (r != null) {
|
|
|
|
resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private LinkedHashMap<String, String> headerInfoBlock() {
|
|
|
|
LinkedHashMap<String, String> info = new LinkedHashMap<>();
|
|
|
|
|
|
|
|
info.put("Korpus:", corpus.getCorpusType().toString());
|
|
|
|
info.put("Datum:", time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
|
|
|
|
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
|
|
|
Integer ngramLevel = filter.getNgramValue();
|
|
|
|
if (ngramLevel == 0)
|
2018-07-31 06:58:17 +00:00
|
|
|
info.put("Analiza", "Črke");
|
2018-06-19 07:15:37 +00:00
|
|
|
else if (ngramLevel == 1)
|
|
|
|
info.put("Analiza", "Besede");
|
|
|
|
else
|
2018-07-31 06:58:17 +00:00
|
|
|
info.put("Analiza", filter.getAl().toString());
|
2018-06-19 07:15:37 +00:00
|
|
|
} else {
|
2018-07-31 06:58:17 +00:00
|
|
|
info.put("Analiza", filter.getAl().toString());
|
2018-06-19 07:15:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
|
|
|
Integer ngramLevel = filter.getNgramValue();
|
|
|
|
|
|
|
|
// n.gram nivo
|
|
|
|
if (ngramLevel > 1) {
|
|
|
|
info.put("n-gram nivo:", String.valueOf(ngramLevel));
|
|
|
|
}
|
2018-06-27 08:14:40 +00:00
|
|
|
|
2018-06-19 07:15:37 +00:00
|
|
|
// skip
|
|
|
|
if (ngramLevel > 1)
|
|
|
|
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
|
|
|
|
|
|
|
|
// izračunaj za
|
|
|
|
info.put("Izračunaj za:", filter.getCalculateFor().toString());
|
|
|
|
|
|
|
|
// msd
|
|
|
|
if (!isEmpty(filter.getMsd())) {
|
|
|
|
StringBuilder msdPattern = new StringBuilder();
|
|
|
|
for (Pattern pattern : filter.getMsd()) {
|
|
|
|
msdPattern.append(pattern.toString()).append(" ");
|
|
|
|
}
|
|
|
|
|
|
|
|
info.put("MSD:", msdPattern.toString());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2018-06-29 10:53:29 +00:00
|
|
|
if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
|
|
|
ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
|
|
|
|
|
|
|
|
info.put("Taksonomija: ", "");
|
|
|
|
String sep = "";
|
|
|
|
for (String s : tax) {
|
|
|
|
info.put(sep = sep + " ", s);
|
|
|
|
}
|
|
|
|
}
|
2018-06-19 07:15:37 +00:00
|
|
|
|
|
|
|
if (corpus.getCorpusType() == CorpusType.SOLAR) {
|
|
|
|
HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();
|
|
|
|
|
|
|
|
if (!isEmpty(filters)) {
|
|
|
|
info.put("Dodatni filtri: ", "");
|
|
|
|
|
|
|
|
for (Map.Entry<String, ObservableList<String>> f : filters.entrySet()) {
|
|
|
|
info.put(f.getKey(), StringUtils.join(f.getValue(), ", "));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return info;
|
|
|
|
}
|
2018-10-24 08:36:07 +00:00
|
|
|
|
|
|
|
public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
|
|
|
|
Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();
|
|
|
|
|
|
|
|
Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
|
|
|
|
|
|
|
|
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
|
2018-11-05 09:30:41 +00:00
|
|
|
// String[] splitedString = hmKey.getK1().split("\\s+");
|
2018-10-24 08:36:07 +00:00
|
|
|
|
|
|
|
long sum_fwi =0L;
|
2018-11-05 09:30:41 +00:00
|
|
|
|
|
|
|
for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){
|
|
|
|
System.out.println(smallHmKey.getK1());
|
|
|
|
sum_fwi += oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue();
|
2018-10-24 08:36:07 +00:00
|
|
|
}
|
|
|
|
double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
|
|
|
|
collocabilityMap.put(hmKey, dice_value);
|
|
|
|
}
|
|
|
|
|
|
|
|
collocability.put(filter.getCollocability().get(0), collocabilityMap);
|
|
|
|
}
|
|
|
|
|
|
|
|
public Map<Collocability, Map<MultipleHMKeys, Double>> getCollocability(){
|
|
|
|
return this.collocability;
|
|
|
|
}
|
2018-06-19 07:15:37 +00:00
|
|
|
}
|