diff --git a/build_instructions.md b/build_instructions.md index 930ec08..405942e 100644 --- a/build_instructions.md +++ b/build_instructions.md @@ -3,6 +3,7 @@ ```shell mvn package ``` +- results are in shade folder # Build executable using Launch4j - Install Java on Windows diff --git a/src/main/java/alg/XML_processing.java b/src/main/java/alg/XML_processing.java index 26ed8e5..a6e9e8e 100755 --- a/src/main/java/alg/XML_processing.java +++ b/src/main/java/alg/XML_processing.java @@ -501,6 +501,10 @@ public class XML_processing { // this toggle is true when we're inside a header (next block of code executes) // and false when we're not (skip reading unnecessary attributes) insideHeader = true; + } else if (corpusType == CorpusType.KOST && elementName.equals("standOff") || + corpusType == CorpusType.KOST && elementName.equals("TEI") + ) { + return resultTaxonomy; } if (insideHeader) { @@ -849,6 +853,10 @@ public class XML_processing { } else if (qName.equals("text")){ taxonomyMatch = true; + } else if (stats.getCorpus().getCorpusType() == CorpusType.KOST && qName.equals("standOff") || + stats.getCorpus().getCorpusType() == CorpusType.KOST && qName.equals("TEI") + ) { + return true; } break; @@ -911,7 +919,8 @@ public class XML_processing { } // fallback else if (endElement.getName().getLocalPart().equalsIgnoreCase("div") && - stats.getCorpus().getCorpusType() == CorpusType.SSJ500K) { + (stats.getCorpus().getCorpusType() == CorpusType.SSJ500K || + stats.getCorpus().getCorpusType() == CorpusType.KOST)) { // join corpus and stats fj(corpus, stats); corpus.clear();