Fixed taxonomy processing for KOST + Added ignoring of certain files in KOST
This commit is contained in:
parent
6f09cf9bed
commit
eb72b380a5
|
@ -3,6 +3,7 @@
|
||||||
```shell
|
```shell
|
||||||
mvn package
|
mvn package
|
||||||
```
|
```
|
||||||
|
- results are in shade folder
|
||||||
|
|
||||||
# Build executable using Launch4j
|
# Build executable using Launch4j
|
||||||
- Install Java on Windows
|
- Install Java on Windows
|
||||||
|
|
|
@ -501,6 +501,10 @@ public class XML_processing {
|
||||||
// this toggle is true when we're inside a header (next block of code executes)
|
// this toggle is true when we're inside a header (next block of code executes)
|
||||||
// and false when we're not (skip reading unnecessary attributes)
|
// and false when we're not (skip reading unnecessary attributes)
|
||||||
insideHeader = true;
|
insideHeader = true;
|
||||||
|
} else if (corpusType == CorpusType.KOST && elementName.equals("standOff") ||
|
||||||
|
corpusType == CorpusType.KOST && elementName.equals("TEI")
|
||||||
|
) {
|
||||||
|
return resultTaxonomy;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (insideHeader) {
|
if (insideHeader) {
|
||||||
|
@ -849,6 +853,10 @@ public class XML_processing {
|
||||||
|
|
||||||
} else if (qName.equals("text")){
|
} else if (qName.equals("text")){
|
||||||
taxonomyMatch = true;
|
taxonomyMatch = true;
|
||||||
|
} else if (stats.getCorpus().getCorpusType() == CorpusType.KOST && qName.equals("standOff") ||
|
||||||
|
stats.getCorpus().getCorpusType() == CorpusType.KOST && qName.equals("TEI")
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -911,7 +919,8 @@ public class XML_processing {
|
||||||
}
|
}
|
||||||
// fallback
|
// fallback
|
||||||
else if (endElement.getName().getLocalPart().equalsIgnoreCase("div") &&
|
else if (endElement.getName().getLocalPart().equalsIgnoreCase("div") &&
|
||||||
stats.getCorpus().getCorpusType() == CorpusType.SSJ500K) {
|
(stats.getCorpus().getCorpusType() == CorpusType.SSJ500K ||
|
||||||
|
stats.getCorpus().getCorpusType() == CorpusType.KOST)) {
|
||||||
// join corpus and stats
|
// join corpus and stats
|
||||||
fj(corpus, stats);
|
fj(corpus, stats);
|
||||||
corpus.clear();
|
corpus.clear();
|
||||||
|
|
Loading…
Reference in New Issue
Block a user