Taxonomy refactored
This commit is contained in:
@@ -536,8 +536,8 @@ public class XML_processing {
|
||||
boolean inWord = false;
|
||||
boolean inPunctuation = false;
|
||||
boolean taxonomyMatch = true;
|
||||
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
||||
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
||||
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
||||
// ArrayList<Taxonomy> currentFiletaxonomyLong = new ArrayList<>();
|
||||
String lemma = "";
|
||||
String msd = "";
|
||||
|
||||
@@ -578,10 +578,10 @@ public class XML_processing {
|
||||
|
||||
if (tax != null) {
|
||||
// keep only taxonomy properties
|
||||
String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", "");
|
||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""));
|
||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||
Tax taxonomy = new Tax();
|
||||
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
||||
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -637,7 +637,7 @@ public class XML_processing {
|
||||
// parser reached end of the current sentence
|
||||
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
|
||||
// count all UniGramOccurrences in sentence for statistics
|
||||
stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomyLong);
|
||||
stats.updateUniGramOccurrences(sentence.size(), currentFiletaxonomy);
|
||||
|
||||
// add sentence to corpus if it passes filters
|
||||
sentence = runFilters(sentence, stats.getFilter());
|
||||
@@ -645,7 +645,7 @@ public class XML_processing {
|
||||
|
||||
|
||||
if (!ValidationUtil.isEmpty(sentence) && taxonomyMatch) {
|
||||
corpus.add(new Sentence(sentence, currentFiletaxonomyLong));
|
||||
corpus.add(new Sentence(sentence, currentFiletaxonomy));
|
||||
}
|
||||
|
||||
// taxonomyMatch = true;
|
||||
@@ -713,8 +713,8 @@ public class XML_processing {
|
||||
public static boolean readXMLSSJ500K(String path, StatisticsNew stats) {
|
||||
boolean inWord = false;
|
||||
boolean inPunctuation = false;
|
||||
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
||||
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
||||
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
||||
// ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
||||
String lemma = "";
|
||||
String msd = "";
|
||||
|
||||
@@ -757,10 +757,10 @@ public class XML_processing {
|
||||
|
||||
if (tax != null) {
|
||||
// keep only taxonomy properties
|
||||
String currentFiletaxonomyElement = String.valueOf(tax.getValue()).replace("#", "");
|
||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()).replace("#", ""));
|
||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||
Tax taxonomy = new Tax();
|
||||
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
||||
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -793,7 +793,7 @@ public class XML_processing {
|
||||
sentence = runFilters(sentence, stats.getFilter());
|
||||
|
||||
if (!ValidationUtil.isEmpty(sentence)) {
|
||||
corpus.add(new Sentence(sentence, currentFiletaxonomyLong));
|
||||
corpus.add(new Sentence(sentence, currentFiletaxonomy));
|
||||
}
|
||||
|
||||
// and start a new one
|
||||
@@ -820,7 +820,7 @@ public class XML_processing {
|
||||
corpus.clear();
|
||||
|
||||
currentFiletaxonomy = new ArrayList<>();
|
||||
currentFiletaxonomyLong = new ArrayList<>();
|
||||
// currentFiletaxonomyLong = new ArrayList<>();
|
||||
}
|
||||
|
||||
break;
|
||||
@@ -848,8 +848,8 @@ public class XML_processing {
|
||||
boolean inOrthDiv = false;
|
||||
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
|
||||
boolean inSeparatedWord = false;
|
||||
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
||||
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
||||
ArrayList<Taxonomy> currentFiletaxonomy = new ArrayList<>();
|
||||
// ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
|
||||
String lemma = "";
|
||||
String msd = "";
|
||||
|
||||
@@ -923,10 +923,10 @@ public class XML_processing {
|
||||
|
||||
if (tax != null) {
|
||||
// keep only taxonomy properties
|
||||
String currentFiletaxonomyElement = String.valueOf(tax.getValue());
|
||||
Taxonomy currentFiletaxonomyElement = Taxonomy.factory(String.valueOf(tax.getValue()));
|
||||
currentFiletaxonomy.add(currentFiletaxonomyElement);
|
||||
Tax taxonomy = new Tax();
|
||||
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
||||
// currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
|
||||
}
|
||||
} else if (qName.equalsIgnoreCase("div")) {
|
||||
gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
|
||||
@@ -1010,7 +1010,7 @@ public class XML_processing {
|
||||
// add sentence to corpus if it passes filters
|
||||
if (includeFile && !ValidationUtil.isEmpty(sentence)) {
|
||||
sentence = runFilters(sentence, stats.getFilter());
|
||||
corpus.add(new Sentence(sentence, currentFiletaxonomyLong));
|
||||
corpus.add(new Sentence(sentence, currentFiletaxonomy));
|
||||
}
|
||||
|
||||
wordIndex = 0;
|
||||
@@ -1050,7 +1050,7 @@ public class XML_processing {
|
||||
corpus.clear();
|
||||
|
||||
currentFiletaxonomy = new ArrayList<>();
|
||||
currentFiletaxonomyLong = new ArrayList<>();
|
||||
// currentFiletaxonomyLong = new ArrayList<>();
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user