Some functionality from OneWord copied to StringAnalysis and fixed
This commit is contained in:
@@ -67,22 +67,43 @@ public class Ngrams {
|
||||
multipleKeys = new MultipleHMKeys1(key);
|
||||
break;
|
||||
case 1:
|
||||
multipleKeys = new MultipleHMKeys2(key, wordToString(ngramCandidate, otherKeys.get(0)));
|
||||
String k1_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
||||
if (stats.getFilter().getNotePunctuations())
|
||||
k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
||||
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
||||
break;
|
||||
case 2:
|
||||
multipleKeys = new MultipleHMKeys3(key, wordToString(ngramCandidate, otherKeys.get(0)),
|
||||
wordToString(ngramCandidate, otherKeys.get(1)));
|
||||
String k2_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
||||
String k2_3 = wordToString(ngramCandidate, otherKeys.get(1));
|
||||
if (stats.getFilter().getNotePunctuations()) {
|
||||
k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
||||
k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
||||
}
|
||||
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
||||
break;
|
||||
case 3:
|
||||
multipleKeys = new MultipleHMKeys4(key, wordToString(ngramCandidate, otherKeys.get(0)),
|
||||
wordToString(ngramCandidate, otherKeys.get(1)),
|
||||
wordToString(ngramCandidate, otherKeys.get(2)));
|
||||
String k3_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
||||
String k3_3 = wordToString(ngramCandidate, otherKeys.get(1));
|
||||
String k3_4 = wordToString(ngramCandidate, otherKeys.get(2));
|
||||
if (stats.getFilter().getNotePunctuations()) {
|
||||
k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
||||
k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
||||
k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
||||
}
|
||||
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
||||
break;
|
||||
case 4:
|
||||
multipleKeys = new MultipleHMKeys5(key, wordToString(ngramCandidate, otherKeys.get(0)),
|
||||
wordToString(ngramCandidate, otherKeys.get(1)),
|
||||
wordToString(ngramCandidate, otherKeys.get(2)),
|
||||
wordToString(ngramCandidate, otherKeys.get(3)));
|
||||
String k4_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
||||
String k4_3 = wordToString(ngramCandidate, otherKeys.get(1));
|
||||
String k4_4 = wordToString(ngramCandidate, otherKeys.get(2));
|
||||
String k4_5 = wordToString(ngramCandidate, otherKeys.get(3));
|
||||
if (stats.getFilter().getNotePunctuations()) {
|
||||
k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
||||
k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
||||
k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
||||
k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
||||
}
|
||||
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
|
||||
break;
|
||||
default:
|
||||
multipleKeys = null;
|
||||
@@ -265,7 +286,7 @@ public class Ngrams {
|
||||
currentLoop.add(checkAndModifySkipgramPunctuation(sentence, i, j, stats));
|
||||
currentLoop.add(sentence.get(j));
|
||||
|
||||
validateAndCountSkipgramCandidate(currentLoop, stats);
|
||||
validateAndCountSkipgramCandidate(currentLoop, stats, s.getTaxonomy());
|
||||
} else {
|
||||
for (int k = j + 1; k <= j + 1 + skip; k++) { // 3gram
|
||||
if (ngram == 3 && k < sentence.size()) {
|
||||
@@ -274,7 +295,7 @@ public class Ngrams {
|
||||
currentLoop.add(checkAndModifySkipgramPunctuation(sentence, j, k, stats));
|
||||
currentLoop.add(sentence.get(k));
|
||||
|
||||
validateAndCountSkipgramCandidate(currentLoop, stats);
|
||||
validateAndCountSkipgramCandidate(currentLoop, stats, s.getTaxonomy());
|
||||
} else {
|
||||
for (int l = k + 1; l <= k + 1 + skip; l++) { // 4gram
|
||||
if (ngram == 4 && l < sentence.size()) {
|
||||
@@ -284,7 +305,7 @@ public class Ngrams {
|
||||
currentLoop.add(checkAndModifySkipgramPunctuation(sentence, k, l, stats));
|
||||
currentLoop.add(sentence.get(l));
|
||||
|
||||
validateAndCountSkipgramCandidate(currentLoop, stats);
|
||||
validateAndCountSkipgramCandidate(currentLoop, stats, s.getTaxonomy());
|
||||
} else {
|
||||
for (int m = l + 1; m <= l + 1 + skip; m++) { // 5gram
|
||||
if (ngram == 5 && m < sentence.size()) {
|
||||
@@ -295,7 +316,7 @@ public class Ngrams {
|
||||
currentLoop.add(checkAndModifySkipgramPunctuation(sentence, l, m, stats));
|
||||
currentLoop.add(sentence.get(m));
|
||||
|
||||
validateAndCountSkipgramCandidate(currentLoop, stats);
|
||||
validateAndCountSkipgramCandidate(currentLoop, stats, s.getTaxonomy());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -308,13 +329,80 @@ public class Ngrams {
|
||||
}
|
||||
}
|
||||
|
||||
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats) {
|
||||
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats, List<String> taxonomy) {
|
||||
// count if no regex is set or if it is & candidate passes it
|
||||
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) {
|
||||
String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
|
||||
key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
stats.updateTaxonomyResults(new MultipleHMKeys1(key),
|
||||
stats.getCorpus().getTaxonomy());
|
||||
// String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
|
||||
// key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
// stats.updateTaxonomyResults(new MultipleHMKeys1(key),
|
||||
// stats.getCorpus().getTaxonomy());
|
||||
|
||||
|
||||
ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();
|
||||
|
||||
String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
|
||||
|
||||
// if last letter is ',' erase it
|
||||
|
||||
// if (key.equals("")){
|
||||
// String test = key;
|
||||
// }
|
||||
|
||||
if (stats.getFilter().getNotePunctuations())
|
||||
key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
|
||||
MultipleHMKeys multipleKeys;
|
||||
|
||||
// create MultipleHMKeys for different amount of other keys
|
||||
switch (otherKeys.size()) {
|
||||
case 0:
|
||||
multipleKeys = new MultipleHMKeys1(key);
|
||||
break;
|
||||
case 1:
|
||||
String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
||||
if (stats.getFilter().getNotePunctuations())
|
||||
k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
||||
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
||||
break;
|
||||
case 2:
|
||||
String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
||||
String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1));
|
||||
if (stats.getFilter().getNotePunctuations()) {
|
||||
k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
||||
k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
||||
}
|
||||
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
||||
break;
|
||||
case 3:
|
||||
String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
||||
String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1));
|
||||
String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2));
|
||||
if (stats.getFilter().getNotePunctuations()) {
|
||||
k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
||||
k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
||||
k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
|
||||
}
|
||||
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
||||
break;
|
||||
case 4:
|
||||
String k4_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
||||
String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1));
|
||||
String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2));
|
||||
String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3));
|
||||
if (stats.getFilter().getNotePunctuations()) {
|
||||
k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
||||
k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
||||
k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
|
||||
k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
|
||||
}
|
||||
multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
|
||||
break;
|
||||
default:
|
||||
multipleKeys = null;
|
||||
|
||||
}
|
||||
stats.updateTaxonomyResults(multipleKeys, taxonomy);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user