Added new ssj500k reading option. Fixed GOS taxonomy

This commit is contained in:
2018-09-03 13:31:41 +02:00
parent 426a9ccc46
commit 1d9e9b7ed6
9 changed files with 280 additions and 40 deletions

View File

@@ -244,7 +244,7 @@ public class CorpusTab {
logger.info("reading header data for ", corpusType.toString());
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.GOS || corpusType == CorpusType.CCKRES) {
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.GOS || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
boolean corpusIsSplit = corpusFiles.size() > 1;
final Task<HashSet<String>> task = new Task<HashSet<String>>() {
@@ -429,6 +429,7 @@ public class CorpusTab {
// read first file only, maybe later do all, if toll on resources is acceptable
File f = corpusFiles.iterator().next();
String title = XML_processing.readXMLHeaderTag(f.getAbsolutePath(), "title").toLowerCase();
String attrib = XML_processing.readXMLHeaderAttribute(f.getAbsolutePath(), "body", "base").toLowerCase();
String test = CCKRES.getNameLowerCase();
String debug = "";
@@ -442,6 +443,8 @@ public class CorpusTab {
corpusType = CCKRES;
} else if (title.contains(GOS.getNameLowerCase())) {
corpusType = GOS;
} else if (attrib.contains(SSJ500K.getNameLowerCase())) {
corpusType = SSJ500K;
}
if (corpusType == null) {

View File

@@ -415,7 +415,6 @@ public class OneWordAnalysisTab {
Filter filter = new Filter();
filter.setNgramValue(1);
filter.setCalculateFor(calculateFor);
filter.setMsd(msd);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL);
@@ -424,6 +423,9 @@ public class OneWordAnalysisTab {
filter.setSolarFilters(solarFiltersMap);
filter.setStringLength(1);
filter.setMultipleKeys(alsoVisualize);
// setMsd must be behind alsoVisualize
filter.setMsd(msd);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd);

View File

@@ -522,7 +522,6 @@ public class StringAnalysisTabNew2 {
Filter filter = new Filter();
filter.setNgramValue(ngramValue);
filter.setCalculateFor(calculateFor);
filter.setMsd(msd);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setDisplayTaxonomy(displayTaxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL);
@@ -531,6 +530,9 @@ public class StringAnalysisTabNew2 {
filter.setSolarFilters(solarFiltersMap);
filter.setNotePunctuations(notePunctuations);
filter.setMultipleKeys(alsoVisualize);
// setMsd must be behind alsoVisualize
filter.setMsd(msd);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);