Added N-gram implementaion with punctuation'

This commit is contained in:
2018-07-23 13:26:12 +02:00
parent bebc0abbb3
commit 681eb4f949
2 changed files with 74 additions and 22 deletions

View File

@@ -542,16 +542,36 @@ public class XML_processing {
sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
inWord = false;
}
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
//// String punctuation = characters.getData();
// String punctuation = ",";
// sentence.get(sentence.size()-1).setWord(sentence.get(sentence.size()-1).getWord() + punctuation);
// sentence.get(sentence.size()-1).setLemma(sentence.get(sentence.size()-1).getLemma() + punctuation);
// sentence.get(sentence.size()-1).setMsd(sentence.get(sentence.size()-1).getMsd() + punctuation);
// inPunctuation = false;
// }
if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
// String punctuation = characters.getData();
String punctuation = ",";
sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
inPunctuation = false;
}
break;
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
// String actualPunctuation = characters.getData();
// if (actualPunctuation.equals(".") || actualPunctuation.equals("!") || actualPunctuation.equals("?") || actualPunctuation.equals("..."))
// break;
// String punctuation = ",";
// int skip_number = 0;
// if (!ValidationUtil.isEmpty(stats.getFilter().getSkipValue())){
// skip_number = stats.getFilter().getSkipValue();
// }
// for(int i = 1; i < skip_number + 2; i ++){
// if (i < sentence.size() && !sentence.get(sentence.size() - i).equals(punctuation)) {
// sentence.get(sentence.size() - i).setWord(sentence.get(sentence.size() - i).getWord() + punctuation);
// sentence.get(sentence.size() - i).setLemma(sentence.get(sentence.size() - i).getLemma() + punctuation);
// sentence.get(sentence.size() - i).setMsd(sentence.get(sentence.size() - i).getMsd() + punctuation);
// }
// }
// inPunctuation = false;
// }
case XMLStreamConstants.END_ELEMENT:
EndElement endElement = event.asEndElement();