Added collocability functionality - implemented Dice method
This commit is contained in:
51
src/main/java/data/Collocability.java
Executable file
51
src/main/java/data/Collocability.java
Executable file
@@ -0,0 +1,51 @@
|
||||
package data;
|
||||
|
||||
public enum Collocability {
|
||||
DICE("Dice");
|
||||
|
||||
private final String name;
|
||||
|
||||
Collocability(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
public static Collocability factory(String cf) {
|
||||
if (cf != null) {
|
||||
if (DICE.toString().equals(cf)) {
|
||||
return DICE;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public String toMetadataString() {
|
||||
switch(this){
|
||||
case DICE:
|
||||
return "Kolokabilnost - Dice:";
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public String toHeaderString() {
|
||||
switch(this){
|
||||
case DICE:
|
||||
return "Kolokabilnost - Dice";
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// public String toPercentString() {
|
||||
// switch(this){
|
||||
// case DICE:
|
||||
// return "Delež glede na vse različnice";
|
||||
// default:
|
||||
// return null;
|
||||
// }
|
||||
// }
|
||||
}
|
||||
@@ -8,7 +8,7 @@ import java.util.regex.Pattern;
|
||||
import gui.ValidationUtil;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public class Filter {
|
||||
public class Filter implements Cloneable {
|
||||
private HashMap<filterName, Object> filter;
|
||||
|
||||
public enum filterName {
|
||||
@@ -28,7 +28,8 @@ public class Filter {
|
||||
MULTIPLE_KEYS,
|
||||
NOTE_PUNCTUATIONS,
|
||||
MINIMAL_OCCURRENCES,
|
||||
MINIMAL_TAXONOMY
|
||||
MINIMAL_TAXONOMY,
|
||||
COLLOCABILITY
|
||||
}
|
||||
|
||||
public Filter() {
|
||||
@@ -186,6 +187,23 @@ public class Filter {
|
||||
filter.put(MULTIPLE_KEYS, newKeys);
|
||||
}
|
||||
|
||||
public void setCollocability(ArrayList<Collocability> keys) {
|
||||
ArrayList<Collocability> newKeys = new ArrayList<>();
|
||||
if (keys != null) {
|
||||
newKeys.addAll(keys);
|
||||
}
|
||||
|
||||
filter.put(COLLOCABILITY, newKeys);
|
||||
}
|
||||
|
||||
public ArrayList<Collocability> getCollocability() {
|
||||
if (filter.containsKey(COLLOCABILITY) && filter.get(COLLOCABILITY) != null) {
|
||||
return (ArrayList<Collocability>) filter.get(COLLOCABILITY);
|
||||
} else {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
public ArrayList<CalculateFor> getMultipleKeys() {
|
||||
if (filter.containsKey(MULTIPLE_KEYS) && filter.get(MULTIPLE_KEYS) != null) {
|
||||
return (ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS);
|
||||
@@ -254,4 +272,18 @@ public class Filter {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
public Object clone() throws CloneNotSupportedException{
|
||||
Filter f = null;
|
||||
try {
|
||||
f = (Filter) super.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
f = new Filter();
|
||||
}
|
||||
f.filter = (HashMap<filterName, Object>) f.filter.clone();
|
||||
|
||||
return f;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,14 +40,16 @@ public class StatisticsNew {
|
||||
private RDB db;
|
||||
private boolean analysisProducedResults;
|
||||
private LocalDateTime time;
|
||||
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
|
||||
|
||||
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
|
||||
this.corpus = corpus;
|
||||
this.filter = filter;
|
||||
this.taxonomyResult = new ConcurrentHashMap<>();
|
||||
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
|
||||
this.collocability = new ConcurrentHashMap<>();
|
||||
|
||||
// create table for counting word occurances per taxonomies
|
||||
// create table for counting word occurrences per taxonomies
|
||||
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
|
||||
if (this.filter.getTaxonomy().isEmpty()) {
|
||||
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
|
||||
@@ -213,7 +215,7 @@ public class StatisticsNew {
|
||||
removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
|
||||
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
|
||||
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
|
||||
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult, filter);
|
||||
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), this, filter);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -442,11 +444,6 @@ public class StatisticsNew {
|
||||
info.put("n-gram nivo:", String.valueOf(ngramLevel));
|
||||
}
|
||||
|
||||
// else if (ngramLevel == 1){
|
||||
// info.put("n-gram nivo:", "nivo besed");
|
||||
// } else {
|
||||
// info.put("n-gram nivo:", "nivo črk");
|
||||
// }
|
||||
// skip
|
||||
if (ngramLevel > 1)
|
||||
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
|
||||
@@ -464,11 +461,6 @@ public class StatisticsNew {
|
||||
info.put("MSD:", msdPattern.toString());
|
||||
}
|
||||
|
||||
// taksonomija
|
||||
// if (!isEmpty(filter.getTaxonomy())) {
|
||||
// info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
|
||||
// }
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -496,4 +488,28 @@ public class StatisticsNew {
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
|
||||
Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();
|
||||
|
||||
Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
|
||||
|
||||
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
|
||||
String[] splitedString = hmKey.getK1().split("\\s+");
|
||||
|
||||
long sum_fwi =0L;
|
||||
for(String s : splitedString){
|
||||
MultipleHMKeys search = new MultipleHMKeys1(s);
|
||||
sum_fwi += oneWordTaxonomyResult.get("Total").get(search).longValue();
|
||||
}
|
||||
double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
|
||||
collocabilityMap.put(hmKey, dice_value);
|
||||
}
|
||||
|
||||
collocability.put(filter.getCollocability().get(0), collocabilityMap);
|
||||
}
|
||||
|
||||
public Map<Collocability, Map<MultipleHMKeys, Double>> getCollocability(){
|
||||
return this.collocability;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user