Added new ssj500k reading option. Fixed GOS taxonomy
This commit is contained in:
@@ -244,7 +244,7 @@ public class CorpusTab {
|
||||
|
||||
logger.info("reading header data for ", corpusType.toString());
|
||||
|
||||
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.GOS || corpusType == CorpusType.CCKRES) {
|
||||
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.GOS || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
|
||||
boolean corpusIsSplit = corpusFiles.size() > 1;
|
||||
|
||||
final Task<HashSet<String>> task = new Task<HashSet<String>>() {
|
||||
@@ -429,6 +429,7 @@ public class CorpusTab {
|
||||
// read first file only, maybe later do all, if toll on resources is acceptable
|
||||
File f = corpusFiles.iterator().next();
|
||||
String title = XML_processing.readXMLHeaderTag(f.getAbsolutePath(), "title").toLowerCase();
|
||||
String attrib = XML_processing.readXMLHeaderAttribute(f.getAbsolutePath(), "body", "base").toLowerCase();
|
||||
String test = CCKRES.getNameLowerCase();
|
||||
String debug = "";
|
||||
|
||||
@@ -442,6 +443,8 @@ public class CorpusTab {
|
||||
corpusType = CCKRES;
|
||||
} else if (title.contains(GOS.getNameLowerCase())) {
|
||||
corpusType = GOS;
|
||||
} else if (attrib.contains(SSJ500K.getNameLowerCase())) {
|
||||
corpusType = SSJ500K;
|
||||
}
|
||||
|
||||
if (corpusType == null) {
|
||||
|
||||
@@ -415,7 +415,6 @@ public class OneWordAnalysisTab {
|
||||
Filter filter = new Filter();
|
||||
filter.setNgramValue(1);
|
||||
filter.setCalculateFor(calculateFor);
|
||||
filter.setMsd(msd);
|
||||
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
|
||||
filter.setDisplayTaxonomy(displayTaxonomy);
|
||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||
@@ -424,6 +423,9 @@ public class OneWordAnalysisTab {
|
||||
filter.setSolarFilters(solarFiltersMap);
|
||||
filter.setStringLength(1);
|
||||
filter.setMultipleKeys(alsoVisualize);
|
||||
|
||||
// setMsd must be behind alsoVisualize
|
||||
filter.setMsd(msd);
|
||||
filter.setMinimalOccurrences(minimalOccurrences);
|
||||
filter.setMinimalTaxonomy(minimalTaxonomy);
|
||||
filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd);
|
||||
|
||||
@@ -522,7 +522,6 @@ public class StringAnalysisTabNew2 {
|
||||
Filter filter = new Filter();
|
||||
filter.setNgramValue(ngramValue);
|
||||
filter.setCalculateFor(calculateFor);
|
||||
filter.setMsd(msd);
|
||||
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
|
||||
filter.setDisplayTaxonomy(displayTaxonomy);
|
||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||
@@ -531,6 +530,9 @@ public class StringAnalysisTabNew2 {
|
||||
filter.setSolarFilters(solarFiltersMap);
|
||||
filter.setNotePunctuations(notePunctuations);
|
||||
filter.setMultipleKeys(alsoVisualize);
|
||||
|
||||
// setMsd must be behind alsoVisualize
|
||||
filter.setMsd(msd);
|
||||
filter.setMinimalOccurrences(minimalOccurrences);
|
||||
filter.setMinimalTaxonomy(minimalTaxonomy);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user