Added initial functionality for word parts implementation

This commit is contained in:
2018-11-05 10:30:41 +01:00
parent f9ce74d7b8
commit abc15360d3
12 changed files with 795 additions and 152 deletions

View File

@@ -29,7 +29,11 @@ public class Filter implements Cloneable {
NOTE_PUNCTUATIONS,
MINIMAL_OCCURRENCES,
MINIMAL_TAXONOMY,
COLLOCABILITY
COLLOCABILITY,
PREFIX_LENGTH,
SUFFIX_LENGTH,
PREFIX_LIST,
SUFFIX_LIST
}
public Filter() {
@@ -245,6 +249,43 @@ public class Filter implements Cloneable {
return (Integer) filter.get(MINIMAL_TAXONOMY);
}
// PREFIX_LENGTH,
// SUFFIX_LENGTH,
// PREFIX_LIST,
// SUFFIX_LIST
public void setPrefixLength(Integer v) {
filter.put(PREFIX_LENGTH, v);
}
public Integer getPrefixLength() {
return (Integer) filter.get(PREFIX_LENGTH);
}
public void setSuffixLength(Integer v) {
filter.put(SUFFIX_LENGTH, v);
}
public Integer getSuffixLength() {
return (Integer) filter.get(SUFFIX_LENGTH);
}
public void setPrefixList(ArrayList<String> v) {
filter.put(PREFIX_LIST, v);
}
public ArrayList<String> getPrefixList() {
return (ArrayList<String>) filter.get(PREFIX_LIST);
}
public void setSuffixList(ArrayList<String> v) {
filter.put(SUFFIX_LIST, v);
}
public ArrayList<String> getSuffixList() {
return (ArrayList<String>) filter.get(SUFFIX_LIST);
}
private void addWordPart(CalculateFor wp){
ArrayList<CalculateFor> oldWp = ((ArrayList<CalculateFor>) filter.get(WORD_PARTS));

View File

@@ -1,5 +1,6 @@
package data;
import java.util.ArrayList;
import java.util.Objects;
/*
@@ -12,6 +13,8 @@ public interface MultipleHMKeys {
default String getK4(){ return null; }
default String getK5(){ return null; }
default ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){ return null; }
@Override
int hashCode();

View File

@@ -1,5 +1,6 @@
package data;
import java.util.ArrayList;
import java.util.Objects;
/*
@@ -16,6 +17,16 @@ public final class MultipleHMKeys1 implements MultipleHMKeys {
return k1;
}
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
ArrayList<MultipleHMKeys> r = new ArrayList<>();
String[] splitedK1 = k1.split("\\s+");
for(int i = 0; i < splitedK1.length; i ++){
MultipleHMKeys search = new MultipleHMKeys1(splitedK1[i]);
r.add(search);
}
return r;
}
@Override
public int hashCode() {
return k1.hashCode();

View File

@@ -1,5 +1,6 @@
package data;
import java.util.ArrayList;
import java.util.Objects;
/*
@@ -21,6 +22,17 @@ public final class MultipleHMKeys2 implements MultipleHMKeys {
return k2;
}
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
ArrayList<MultipleHMKeys> r = new ArrayList<>();
String[] splitedK1 = k1.split("\\s+");
String[] splitedK2 = k2.split("\\s+");
for(int i = 0; i < splitedK1.length; i ++){
MultipleHMKeys search = new MultipleHMKeys2(splitedK1[i], splitedK2[i]);
r.add(search);
}
return r;
}
@Override
public int hashCode() {
return Objects.hash(k1, k2);

View File

@@ -1,5 +1,6 @@
package data;
import java.util.ArrayList;
import java.util.Objects;
/*
@@ -26,6 +27,18 @@ public final class MultipleHMKeys3 implements MultipleHMKeys {
return k3;
}
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
ArrayList<MultipleHMKeys> r = new ArrayList<>();
String[] splitedK1 = k1.split("\\s+");
String[] splitedK2 = k2.split("\\s+");
String[] splitedK3 = k3.split("\\s+");
for(int i = 0; i < splitedK1.length; i ++){
MultipleHMKeys search = new MultipleHMKeys3(splitedK1[i], splitedK2[i], splitedK3[i]);
r.add(search);
}
return r;
}
@Override
public int hashCode() {
return Objects.hash(k1, k2, k3);

View File

@@ -1,5 +1,6 @@
package data;
import java.util.ArrayList;
import java.util.Objects;
/*
@@ -31,6 +32,20 @@ public final class MultipleHMKeys4 implements MultipleHMKeys {
return k4;
}
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
ArrayList<MultipleHMKeys> r = new ArrayList<>();
String[] splitedK1 = k1.split("\\s+");
String[] splitedK2 = k2.split("\\s+");
String[] splitedK3 = k3.split("\\s+");
String[] splitedK4 = k4.split("\\s+");
for(int i = 0; i < splitedK1.length; i ++){
MultipleHMKeys search = new MultipleHMKeys4(splitedK1[i], splitedK2[i], splitedK3[i], splitedK4[i]);
r.add(search);
}
return r;
}
@Override
public int hashCode() {
return Objects.hash(k1, k2, k3, k4);

View File

@@ -1,5 +1,6 @@
package data;
import java.util.ArrayList;
import java.util.Objects;
/*
@@ -36,6 +37,22 @@ public final class MultipleHMKeys5 implements MultipleHMKeys {
return k5;
}
public ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){
ArrayList<MultipleHMKeys> r = new ArrayList<>();
String[] splitedK1 = k1.split("\\s+");
String[] splitedK2 = k2.split("\\s+");
String[] splitedK3 = k3.split("\\s+");
String[] splitedK4 = k4.split("\\s+");
String[] splitedK5 = k5.split("\\s+");
for(int i = 0; i < splitedK1.length; i ++){
MultipleHMKeys search = new MultipleHMKeys5(splitedK1[i], splitedK2[i], splitedK3[i], splitedK4[i], splitedK5[i]);
r.add(search);
}
return r;
}
@Override
public int hashCode() {
return Objects.hash(k1, k2, k3, k4, k5);

View File

@@ -495,12 +495,13 @@ public class StatisticsNew {
Map<MultipleHMKeys, Double> collocabilityMap = new ConcurrentHashMap<>();
for(MultipleHMKeys hmKey : taxonomyResult.get("Total").keySet()) {
String[] splitedString = hmKey.getK1().split("\\s+");
// String[] splitedString = hmKey.getK1().split("\\s+");
long sum_fwi =0L;
for(String s : splitedString){
MultipleHMKeys search = new MultipleHMKeys1(s);
sum_fwi += oneWordTaxonomyResult.get("Total").get(search).longValue();
for(MultipleHMKeys smallHmKey : hmKey.getSplittedMultipleHMKeys()){
System.out.println(smallHmKey.getK1());
sum_fwi += oneWordTaxonomyResult.get("Total").get(smallHmKey).longValue();
}
double dice_value = (double) filter.getNgramValue() * (double)taxonomyResult.get("Total").get(hmKey).longValue() / sum_fwi;
collocabilityMap.put(hmKey, dice_value);