Added collocability functionality - implemented Dice method

This commit is contained in:
2018-10-24 10:36:07 +02:00
parent 1d9e9b7ed6
commit f9ce74d7b8
6 changed files with 290 additions and 108 deletions

View File

@@ -0,0 +1,51 @@
package data;
public enum Collocability {
DICE("Dice");
private final String name;
Collocability(String name) {
this.name = name;
}
public String toString() {
return this.name;
}
public static Collocability factory(String cf) {
if (cf != null) {
if (DICE.toString().equals(cf)) {
return DICE;
}
}
return null;
}
public String toMetadataString() {
switch(this){
case DICE:
return "Kolokabilnost - Dice:";
default:
return null;
}
}
public String toHeaderString() {
switch(this){
case DICE:
return "Kolokabilnost - Dice";
default:
return null;
}
}
// public String toPercentString() {
// switch(this){
// case DICE:
// return "Delež glede na vse različnice";
// default:
// return null;
// }
// }
}

View File

@@ -8,7 +8,7 @@ import java.util.regex.Pattern;
import gui.ValidationUtil;
@SuppressWarnings("unchecked")
public class Filter {
public class Filter implements Cloneable {
private HashMap<filterName, Object> filter;
public enum filterName {
@@ -28,7 +28,8 @@ public class Filter {
MULTIPLE_KEYS,
NOTE_PUNCTUATIONS,
MINIMAL_OCCURRENCES,
MINIMAL_TAXONOMY
MINIMAL_TAXONOMY,
COLLOCABILITY
}
public Filter() {
@@ -186,6 +187,23 @@ public class Filter {
filter.put(MULTIPLE_KEYS, newKeys);
}
public void setCollocability(ArrayList<Collocability> keys) {
ArrayList<Collocability> newKeys = new ArrayList<>();
if (keys != null) {
newKeys.addAll(keys);
}
filter.put(COLLOCABILITY, newKeys);
}
public ArrayList<Collocability> getCollocability() {
if (filter.containsKey(COLLOCABILITY) && filter.get(COLLOCABILITY) != null) {
return (ArrayList<Collocability>) filter.get(COLLOCABILITY);
} else {
return new ArrayList<>();
}
}
public ArrayList<CalculateFor> getMultipleKeys() {
if (filter.containsKey(MULTIPLE_KEYS) && filter.get(MULTIPLE_KEYS) != null) {
return (ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS);
@@ -254,4 +272,18 @@ public class Filter {
}
}
public Object clone() throws CloneNotSupportedException{
Filter f = null;
try {
f = (Filter) super.clone();
} catch (CloneNotSupportedException e) {
f = new Filter();
}
f.filter = (HashMap<filterName, Object>) f.filter.clone();
return f;
}
}

View File

@@ -40,14 +40,16 @@ public class StatisticsNew {
private RDB db;
private boolean analysisProducedResults;
private LocalDateTime time;
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
this.corpus = corpus;
this.filter = filter;
this.taxonomyResult = new ConcurrentHashMap<>();
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
this.collocability = new ConcurrentHashMap<>();
// create table for counting word occurances per taxonomies
// create table for counting word occurrences per taxonomies
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
if (this.filter.getTaxonomy().isEmpty()) {
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
@@ -213,7 +215,7 @@ public class StatisticsNew {
removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult, filter);
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
return true;
}
@@ -442,11 +444,6 @@ public class StatisticsNew {
info.put("n-gram nivo:", String.valueOf(ngramLevel));
}
// else if (ngramLevel == 1){
// info.put("n-gram nivo:", "nivo besed");
// } else {
// info.put("n-gram nivo:", "nivo črk");
// }
// skip
if (ngramLevel > 1)
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
@@ -464,11 +461,6 @@ public class StatisticsNew {
info.put("MSD:", msdPattern.toString());
}
// taksonomija
// if (!isEmpty(filter.getTaxonomy())) {
// info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
// }
}
@@ -496,4 +488,28 @@ public class StatisticsNew {
return info;
}
public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();
Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
String[] splitedString = hmKey.getK1().split("\\s+");
long sum_fwi =0L;
for(String s : splitedString){
MultipleHMKeys search = new MultipleHMKeys1(s);
sum_fwi += oneWordTaxonomyResult.get("Total").get(search).longValue();
}
double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
collocabilityMap.put(hmKey, dice_value);
}
collocability.put(filter.getCollocability().get(0), collocabilityMap);
}
public Map<Collocability, Map<MultipleHMKeys, Double>> getCollocability(){
return this.collocability;
}
}