Added the rest of collocabilities

This commit is contained in:
2018-11-08 11:37:16 +01:00
parent abc15360d3
commit a4df732678
7 changed files with 202 additions and 19 deletions

View File

@@ -52,10 +52,53 @@ public class Ngrams {
// generate proper MultipleHMKeys depending on filter data
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor(), stats.getFilter().getWordParts());
if(key.length() < stats.getFilter().getPrefixLength() + stats.getFilter().getSuffixLength()){
if(stats.getFilter().getPrefixLength() != null && stats.getFilter().getSuffixLength() != null &&
key.length() < stats.getFilter().getPrefixLength() + stats.getFilter().getSuffixLength()){
continue;
}
if(stats.getFilter().getPrefixList() != null && stats.getFilter().getSuffixList() != null &&
(stats.getFilter().getPrefixList().size() > 0 || stats.getFilter().getSuffixList().size() > 0)){
String correctPrefix = "";
// go over all prefixes in PrefixList and look for them in words
for(String pf : stats.getFilter().getPrefixList()){
if (pf.length() <= key.length() && pf.equals(key.substring(0, pf.length()))){
correctPrefix = pf;
break;
}
}
String correctSuffix = "";
// go over all prefixes in SuffixList and look for them in words
for(String sf : stats.getFilter().getSuffixList()){
if (sf.length() <= key.length() && sf.equals(key.substring(key.length() - sf.length()))){
correctSuffix = sf;
break;
}
}
// boolean a = (correctPrefix.equals("") && !correctSuffix.equals(""));
// boolean b = (!correctPrefix.equals("") && correctSuffix.equals(""));
// boolean c = (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length());
// boolean d = !((correctPrefix.equals("") && !correctSuffix.equals("")) ||
// (!correctPrefix.equals("") && correctSuffix.equals("")) ||
// (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()));
if(!((stats.getFilter().getPrefixList().size() == 0 && !correctSuffix.equals("")) ||
(!correctPrefix.equals("") && stats.getFilter().getSuffixList().size() == 0) ||
(!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()))){
continue;
}
// if(!((correctPrefix.equals("") && !correctSuffix.equals("")) ||
// (!correctPrefix.equals("") && correctSuffix.equals("")) ||
// (!correctPrefix.equals("") && !correctSuffix.equals("") && correctPrefix.length() + correctSuffix.length() <= key.length()))){
// continue;
// }
}
// if last letter is ',' erase it
// if (key.equals("")){