|
|
|
@ -428,9 +428,14 @@ public class StatisticsNew {
|
|
|
|
|
Integer ngramLevel = filter.getNgramValue();
|
|
|
|
|
if (ngramLevel == 0)
|
|
|
|
|
info.put("Analiza", "Črke");
|
|
|
|
|
else if (ngramLevel == 1)
|
|
|
|
|
info.put("Analiza", "Besede");
|
|
|
|
|
else
|
|
|
|
|
else if (ngramLevel == 1) {
|
|
|
|
|
// if suffixes or prefixes are not null print word parts
|
|
|
|
|
if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
|
|
|
|
|
info.put("Analiza", "Besedni deli");
|
|
|
|
|
} else {
|
|
|
|
|
info.put("Analiza", "Besede");
|
|
|
|
|
}
|
|
|
|
|
} else
|
|
|
|
|
info.put("Analiza", filter.getAl().toString());
|
|
|
|
|
} else {
|
|
|
|
|
info.put("Analiza", filter.getAl().toString());
|
|
|
|
@ -492,22 +497,68 @@ public class StatisticsNew {
|
|
|
|
|
public void updateCalculateCollocabilities(StatisticsNew oneWordStatistics) {
|
|
|
|
|
Map<String, Map<MultipleHMKeys, AtomicLong>> oneWordTaxonomyResult = oneWordStatistics.getTaxonomyResult();
|
|
|
|
|
|
|
|
|
|
Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
|
|
|
|
|
Map<Collocability, Map<MultipleHMKeys, Double>> collocabilityMap = new ConcurrentHashMap<>();
|
|
|
|
|
|
|
|
|
|
for(Collocability c : filter.getCollocability()){
|
|
|
|
|
collocabilityMap.put(c, new ConcurrentHashMap<>());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// count number of all words
|
|
|
|
|
long N = 0;
|
|
|
|
|
for(AtomicLong a : oneWordTaxonomyResult.get("Total").values()){
|
|
|
|
|
N += a.longValue();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
|
|
|
|
|
// String[] splitedString = hmKey.getK1().split("\\s+");
|
|
|
|
|
|
|
|
|
|
long sum_fwi =0L;
|
|
|
|
|
long mul_fwi =1L;
|
|
|
|
|
|
|
|
|
|
for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){
|
|
|
|
|
System.out.println(smallHmKey.getK1());
|
|
|
|
|
// System.out.println(smallHmKey.getK1());
|
|
|
|
|
sum_fwi += oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue();
|
|
|
|
|
mul_fwi *= oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue();
|
|
|
|
|
}
|
|
|
|
|
// String t = hmKey.getK1();
|
|
|
|
|
// if(hmKey.getK1().equals("v Slovenija")){
|
|
|
|
|
// System.out.println("TEST");
|
|
|
|
|
//
|
|
|
|
|
// }
|
|
|
|
|
double O = (double)taxonomyResult.get("Total").get(hmKey).longValue();
|
|
|
|
|
double n = (double)filter.getNgramValue();
|
|
|
|
|
double E = (double)mul_fwi / Math.pow(N, n - 1);
|
|
|
|
|
if (collocabilityMap.keySet().contains(Collocability.DICE)){
|
|
|
|
|
double dice_value = n * O / sum_fwi;
|
|
|
|
|
collocabilityMap.get(Collocability.DICE).put(hmKey, dice_value);
|
|
|
|
|
}
|
|
|
|
|
if (collocabilityMap.keySet().contains(Collocability.TSCORE)){
|
|
|
|
|
double t_score = (O - E) / Math.sqrt(O);
|
|
|
|
|
collocabilityMap.get(Collocability.TSCORE).put(hmKey, t_score);
|
|
|
|
|
}
|
|
|
|
|
double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
|
|
|
|
|
collocabilityMap.put(hmKey, dice_value);
|
|
|
|
|
if (collocabilityMap.keySet().contains(Collocability.MI)){
|
|
|
|
|
double MI = Math.log(O / E) / Math.log(2);
|
|
|
|
|
collocabilityMap.get(Collocability.MI).put(hmKey, MI);
|
|
|
|
|
}
|
|
|
|
|
if (collocabilityMap.keySet().contains(Collocability.MI3)){
|
|
|
|
|
double MI3 = Math.log(Math.pow(O, 3.0) / E) / Math.log(2);
|
|
|
|
|
collocabilityMap.get(Collocability.MI3).put(hmKey, MI3);
|
|
|
|
|
}
|
|
|
|
|
if (collocabilityMap.keySet().contains(Collocability.LOGDICE)){
|
|
|
|
|
double dice_value = n * O / sum_fwi;
|
|
|
|
|
double log_dice = 14 + Math.log(dice_value) / Math.log(2);
|
|
|
|
|
collocabilityMap.get(Collocability.LOGDICE).put(hmKey, log_dice);
|
|
|
|
|
}
|
|
|
|
|
if (collocabilityMap.keySet().contains(Collocability.SIMPLELL)){
|
|
|
|
|
double simple_ll = 2 * (O * Math.log10(O / E) - (O - E));
|
|
|
|
|
collocabilityMap.get(Collocability.SIMPLELL).put(hmKey, simple_ll);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for(Collocability c : collocabilityMap.keySet()){
|
|
|
|
|
collocability.put(c, collocabilityMap.get(c));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
collocability.put(filter.getCollocability().get(0), collocabilityMap);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public Map<Collocability, Map<MultipleHMKeys, Double>> getCollocability(){
|
|
|
|
|