Added taxonomy presentation in results
This commit is contained in:
@@ -473,6 +473,7 @@ public class XML_processing {
|
||||
public static boolean readXMLGigafida(String path, StatisticsNew stats) {
|
||||
boolean inWord = false;
|
||||
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
||||
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
||||
String lemma = "";
|
||||
String msd = "";
|
||||
|
||||
@@ -508,7 +509,10 @@ public class XML_processing {
|
||||
|
||||
if (tax != null) {
|
||||
// keep only taxonomy properties
|
||||
currentFiletaxonomy.add(String.valueOf(tax.getValue()).replace("#", ""));
|
||||
String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", "");
|
||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||
Tax taxonomy = new Tax();
|
||||
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -519,7 +523,7 @@ public class XML_processing {
|
||||
// "word" node value
|
||||
if (inWord) {
|
||||
String word = characters.getData();
|
||||
sentence.add(new Word(word, lemma, msd));
|
||||
sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
|
||||
inWord = false;
|
||||
}
|
||||
break;
|
||||
@@ -570,6 +574,7 @@ public class XML_processing {
|
||||
|
||||
// fallback
|
||||
else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
|
||||
// join corpus and stats
|
||||
fj(corpus, stats);
|
||||
corpus.clear();
|
||||
|
||||
|
||||
@@ -45,6 +45,8 @@ public class Ngrams {
|
||||
continue;
|
||||
}
|
||||
|
||||
// UPDATE TAXONOMY HERE!!!
|
||||
stats.updateTaxonomyResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()), ngramCandidate);
|
||||
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
||||
}
|
||||
}
|
||||
@@ -60,7 +62,8 @@ public class Ngrams {
|
||||
}
|
||||
|
||||
for (int i = 0; i < regex.size(); i++) {
|
||||
if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
|
||||
//if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
|
||||
if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern() + ".*")) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user