Added taxonomy presentation in results

This commit is contained in:
2018-06-29 12:53:29 +02:00
parent d5d06fd7c5
commit 8d7cce6c77
11 changed files with 212 additions and 30 deletions

View File

@@ -473,6 +473,7 @@ public class XML_processing {
public static boolean readXMLGigafida(String path, StatisticsNew stats) {
boolean inWord = false;
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = "";
String msd = "";
@@ -508,7 +509,10 @@ public class XML_processing {
if (tax != null) {
// keep only taxonomy properties
currentFiletaxonomy.add(String.valueOf(tax.getValue()).replace("#", ""));
String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", "");
currentFiletaxonomy.add(currentFiletaxonomyElement);
Tax taxonomy = new Tax();
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
}
}
break;
@@ -519,7 +523,7 @@ public class XML_processing {
// "word" node value
if (inWord) {
String word = characters.getData();
sentence.add(new Word(word, lemma, msd));
sentence.add(new Word(word, lemma, msd, currentFiletaxonomyLong));
inWord = false;
}
break;
@@ -570,6 +574,7 @@ public class XML_processing {
// fallback
else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
// join corpus and stats
fj(corpus, stats);
corpus.clear();

View File

@@ -45,6 +45,8 @@ public class Ngrams {
continue;
}
// UPDATE TAXONOMY HERE!!!
stats.updateTaxonomyResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()), ngramCandidate);
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
}
}
@@ -60,7 +62,8 @@ public class Ngrams {
}
for (int i = 0; i < regex.size(); i++) {
if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
//if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern() + ".*")) {
return false;
}
}