Added N-gram implementaion with punctuation'
This commit is contained in:
@@ -542,16 +542,36 @@ public class XML_processing {
|
||||
sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
|
||||
inWord = false;
|
||||
}
|
||||
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
||||
//// String punctuation = characters.getData();
|
||||
// String punctuation = ",";
|
||||
// sentence.get(sentence.size()-1).setWord(sentence.get(sentence.size()-1).getWord() + punctuation);
|
||||
// sentence.get(sentence.size()-1).setLemma(sentence.get(sentence.size()-1).getLemma() + punctuation);
|
||||
// sentence.get(sentence.size()-1).setMsd(sentence.get(sentence.size()-1).getMsd() + punctuation);
|
||||
// inPunctuation = false;
|
||||
// }
|
||||
if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
||||
// String punctuation = characters.getData();
|
||||
String punctuation = ",";
|
||||
|
||||
sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
|
||||
sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
|
||||
sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
|
||||
inPunctuation = false;
|
||||
}
|
||||
break;
|
||||
|
||||
// if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
||||
// String actualPunctuation = characters.getData();
|
||||
// if (actualPunctuation.equals(".") || actualPunctuation.equals("!") || actualPunctuation.equals("?") || actualPunctuation.equals("..."))
|
||||
// break;
|
||||
// String punctuation = ",";
|
||||
// int skip_number = 0;
|
||||
// if (!ValidationUtil.isEmpty(stats.getFilter().getSkipValue())){
|
||||
// skip_number = stats.getFilter().getSkipValue();
|
||||
// }
|
||||
// for(int i = 1; i < skip_number + 2; i ++){
|
||||
// if (i < sentence.size() && !sentence.get(sentence.size() - i).equals(punctuation)) {
|
||||
// sentence.get(sentence.size() - i).setWord(sentence.get(sentence.size() - i).getWord() + punctuation);
|
||||
// sentence.get(sentence.size() - i).setLemma(sentence.get(sentence.size() - i).getLemma() + punctuation);
|
||||
// sentence.get(sentence.size() - i).setMsd(sentence.get(sentence.size() - i).getMsd() + punctuation);
|
||||
// }
|
||||
// }
|
||||
// inPunctuation = false;
|
||||
// }
|
||||
|
||||
case XMLStreamConstants.END_ELEMENT:
|
||||
EndElement endElement = event.asEndElement();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user