|
|
|
@ -56,8 +56,8 @@ public class Ngrams {
|
|
|
|
|
// String test = key;
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
if (stats.getFilter().getNotePunctuations())
|
|
|
|
|
key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
|
|
|
|
// if (stats.getFilter().getNotePunctuations())
|
|
|
|
|
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
|
|
|
|
|
|
|
|
|
MultipleHMKeys multipleKeys;
|
|
|
|
|
|
|
|
|
@ -68,28 +68,28 @@ public class Ngrams {
|
|
|
|
|
break;
|
|
|
|
|
case 1:
|
|
|
|
|
String k1_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
|
|
|
|
if (stats.getFilter().getNotePunctuations())
|
|
|
|
|
k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
|
|
|
|
// if (stats.getFilter().getNotePunctuations())
|
|
|
|
|
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
|
|
|
|
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
|
|
|
|
break;
|
|
|
|
|
case 2:
|
|
|
|
|
String k2_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
|
|
|
|
String k2_3 = wordToString(ngramCandidate, otherKeys.get(1));
|
|
|
|
|
if (stats.getFilter().getNotePunctuations()) {
|
|
|
|
|
k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
|
|
|
|
k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
|
|
|
|
}
|
|
|
|
|
// if (stats.getFilter().getNotePunctuations()) {
|
|
|
|
|
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
|
|
|
|
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
|
|
|
|
// }
|
|
|
|
|
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
|
|
|
|
break;
|
|
|
|
|
case 3:
|
|
|
|
|
String k3_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
|
|
|
|
String k3_3 = wordToString(ngramCandidate, otherKeys.get(1));
|
|
|
|
|
String k3_4 = wordToString(ngramCandidate, otherKeys.get(2));
|
|
|
|
|
if (stats.getFilter().getNotePunctuations()) {
|
|
|
|
|
k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
|
|
|
|
k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
|
|
|
|
k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
|
|
|
|
}
|
|
|
|
|
// if (stats.getFilter().getNotePunctuations()) {
|
|
|
|
|
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
|
|
|
|
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
|
|
|
|
// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
|
|
|
|
// }
|
|
|
|
|
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
|
|
|
|
break;
|
|
|
|
|
case 4:
|
|
|
|
@ -97,12 +97,12 @@ public class Ngrams {
|
|
|
|
|
String k4_3 = wordToString(ngramCandidate, otherKeys.get(1));
|
|
|
|
|
String k4_4 = wordToString(ngramCandidate, otherKeys.get(2));
|
|
|
|
|
String k4_5 = wordToString(ngramCandidate, otherKeys.get(3));
|
|
|
|
|
if (stats.getFilter().getNotePunctuations()) {
|
|
|
|
|
k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
|
|
|
|
k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
|
|
|
|
k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
|
|
|
|
k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
|
|
|
|
}
|
|
|
|
|
// if (stats.getFilter().getNotePunctuations()) {
|
|
|
|
|
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
|
|
|
|
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
|
|
|
|
// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
|
|
|
|
// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
|
|
|
|
// }
|
|
|
|
|
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
@ -241,22 +241,22 @@ public class Ngrams {
|
|
|
|
|
*/
|
|
|
|
|
private static Word checkAndModifySkipgramPunctuation(List<Word> sentence, int i, int j, StatisticsNew stats){
|
|
|
|
|
// if punctuation checkbox selected and there words at indexes i and j are not next to each other
|
|
|
|
|
if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){
|
|
|
|
|
boolean middleWordsHavePunctuation = false;
|
|
|
|
|
for (int n = i + 1; n < j; n++){
|
|
|
|
|
if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){
|
|
|
|
|
middleWordsHavePunctuation = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (middleWordsHavePunctuation){
|
|
|
|
|
|
|
|
|
|
String punctuation = ",";
|
|
|
|
|
return new Word(sentence.get(i).getWord() + punctuation,
|
|
|
|
|
sentence.get(i).getLemma() + punctuation,
|
|
|
|
|
sentence.get(i).getMsd() + punctuation);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){
|
|
|
|
|
// boolean middleWordsHavePunctuation = false;
|
|
|
|
|
// for (int n = i + 1; n < j; n++){
|
|
|
|
|
// if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){
|
|
|
|
|
// middleWordsHavePunctuation = true;
|
|
|
|
|
// break;
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
// if (middleWordsHavePunctuation){
|
|
|
|
|
//
|
|
|
|
|
// String punctuation = ",";
|
|
|
|
|
// return new Word(sentence.get(i).getWord() + punctuation,
|
|
|
|
|
// sentence.get(i).getLemma() + punctuation,
|
|
|
|
|
// sentence.get(i).getMsd() + punctuation);
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
return sentence.get(i);
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
@ -348,8 +348,8 @@ public class Ngrams {
|
|
|
|
|
// String test = key;
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
if (stats.getFilter().getNotePunctuations())
|
|
|
|
|
key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
|
|
|
|
// if (stats.getFilter().getNotePunctuations())
|
|
|
|
|
// key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
|
|
|
|
|
|
|
|
|
MultipleHMKeys multipleKeys;
|
|
|
|
|
|
|
|
|
@ -360,28 +360,28 @@ public class Ngrams {
|
|
|
|
|
break;
|
|
|
|
|
case 1:
|
|
|
|
|
String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
|
|
|
|
if (stats.getFilter().getNotePunctuations())
|
|
|
|
|
k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
|
|
|
|
// if (stats.getFilter().getNotePunctuations())
|
|
|
|
|
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
|
|
|
|
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
|
|
|
|
break;
|
|
|
|
|
case 2:
|
|
|
|
|
String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
|
|
|
|
String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1));
|
|
|
|
|
if (stats.getFilter().getNotePunctuations()) {
|
|
|
|
|
k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
|
|
|
|
k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
|
|
|
|
}
|
|
|
|
|
// if (stats.getFilter().getNotePunctuations()) {
|
|
|
|
|
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
|
|
|
|
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
|
|
|
|
// }
|
|
|
|
|
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
|
|
|
|
break;
|
|
|
|
|
case 3:
|
|
|
|
|
String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
|
|
|
|
String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1));
|
|
|
|
|
String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2));
|
|
|
|
|
if (stats.getFilter().getNotePunctuations()) {
|
|
|
|
|
k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
|
|
|
|
k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
|
|
|
|
k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
|
|
|
|
}
|
|
|
|
|
// if (stats.getFilter().getNotePunctuations()) {
|
|
|
|
|
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
|
|
|
|
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
|
|
|
|
// k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
|
|
|
|
// }
|
|
|
|
|
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
|
|
|
|
break;
|
|
|
|
|
case 4:
|
|
|
|
@ -389,12 +389,12 @@ public class Ngrams {
|
|
|
|
|
String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1));
|
|
|
|
|
String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2));
|
|
|
|
|
String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3));
|
|
|
|
|
if (stats.getFilter().getNotePunctuations()) {
|
|
|
|
|
k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
|
|
|
|
k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
|
|
|
|
k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
|
|
|
|
k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
|
|
|
|
}
|
|
|
|
|
// if (stats.getFilter().getNotePunctuations()) {
|
|
|
|
|
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
|
|
|
|
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
|
|
|
|
// k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
|
|
|
|
// k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
|
|
|
|
// }
|
|
|
|
|
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|