Fixed taxonomy processing for KOST + Added ignoring of certain files in KOST
This commit is contained in:
parent
6f09cf9bed
commit
eb72b380a5
|
@ -3,6 +3,7 @@
|
|||
```shell
|
||||
mvn package
|
||||
```
|
||||
- results are in shade folder
|
||||
|
||||
# Build executable using Launch4j
|
||||
- Install Java on Windows
|
||||
|
|
|
@ -501,6 +501,10 @@ public class XML_processing {
|
|||
// this toggle is true when we're inside a header (next block of code executes)
|
||||
// and false when we're not (skip reading unnecessary attributes)
|
||||
insideHeader = true;
|
||||
} else if (corpusType == CorpusType.KOST && elementName.equals("standOff") ||
|
||||
corpusType == CorpusType.KOST && elementName.equals("TEI")
|
||||
) {
|
||||
return resultTaxonomy;
|
||||
}
|
||||
|
||||
if (insideHeader) {
|
||||
|
@ -849,6 +853,10 @@ public class XML_processing {
|
|||
|
||||
} else if (qName.equals("text")){
|
||||
taxonomyMatch = true;
|
||||
} else if (stats.getCorpus().getCorpusType() == CorpusType.KOST && qName.equals("standOff") ||
|
||||
stats.getCorpus().getCorpusType() == CorpusType.KOST && qName.equals("TEI")
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -911,7 +919,8 @@ public class XML_processing {
|
|||
}
|
||||
// fallback
|
||||
else if (endElement.getName().getLocalPart().equalsIgnoreCase("div") &&
|
||||
stats.getCorpus().getCorpusType() == CorpusType.SSJ500K) {
|
||||
(stats.getCorpus().getCorpusType() == CorpusType.SSJ500K ||
|
||||
stats.getCorpus().getCorpusType() == CorpusType.KOST)) {
|
||||
// join corpus and stats
|
||||
fj(corpus, stats);
|
||||
corpus.clear();
|
||||
|
|
Loading…
Reference in New Issue
Block a user