diff --git a/src/main/java/alg/XML_processing.java b/src/main/java/alg/XML_processing.java index 6031e85..a60fc3a 100755 --- a/src/main/java/alg/XML_processing.java +++ b/src/main/java/alg/XML_processing.java @@ -2,8 +2,7 @@ package alg; import static data.Enums.solar.SolarFilters.*; -import java.io.FileInputStream; -import java.io.FileNotFoundException; +import java.io.*; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ForkJoinPool; @@ -15,6 +14,8 @@ import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.*; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.LineIterator; import org.apache.logging.log4j.LogManager; import data.*; @@ -436,6 +437,62 @@ public class XML_processing { return true; } + + /** + * Parses XML headers for information about its taxonomy (if supported) or filters (solar) + * + * @param filepath + * @param corpusIsSplit is corpus split into multiple xml files, or are all entries grouped into one large xml file + * @param corpusType + */ + public static HashSet readVertHeaderTaxonomyAndFilters(String filepath, boolean corpusIsSplit, CorpusType corpusType) { +// boolean parseTaxonomy = Tax.getCorpusTypesWithTaxonomy().contains(corpusType); + // solar + Set headTags = null; + HashMap> resultFilters = new HashMap<>(); + // taxonomy corpora + HashSet resultTaxonomy = new HashSet<>(); + + LineIterator it = null; + try { + it = FileUtils.lineIterator(new File(filepath), "UTF-8"); + try { + boolean insideHeader = false; + + while (it.hasNext()) { + String line = it.nextLine(); + + if (line.length() > 4 && line.substring(1, 5).equals("text")) { + // split over "\" " + String[] split = line.split("\" "); +// String mediumId = ""; +// String typeId = ""; +// String proofreadId = ""; + for (String el : split) { + String[] attribute = el.split("=\""); + if (attribute[0].equals("medium_id")) { +// mediumId = attribute[1]; + resultTaxonomy.add(attribute[1]); + } else if (attribute[0].equals("type_id")) { +// typeId = attribute[1]; + resultTaxonomy.add(attribute[1]); + } else if (attribute[0].equals("proofread_id")) { +// proofreadId = attribute[1]; + resultTaxonomy.add(attribute[1]); + } + } + } + } + } finally { + LineIterator.closeQuietly(it); + } + } catch (IOException e) { + e.printStackTrace(); + } + resultTaxonomy.remove("-"); + return resultTaxonomy; + } + /** * Parses XML headers for information about its taxonomy (if supported) or filters (solar) * diff --git a/src/main/java/data/CorpusType.java b/src/main/java/data/CorpusType.java index e8b2db9..7cac659 100755 --- a/src/main/java/data/CorpusType.java +++ b/src/main/java/data/CorpusType.java @@ -5,7 +5,8 @@ public enum CorpusType { CCKRES("ccKres ", "cckres"), SOLAR("Šolar", "šolar"), GOS("GOS", "gos"), - SSJ500K("ssj500k", "ssj500k"); + SSJ500K("ssj500k", "ssj500k"), + VERT("vert", "vert"); private final String name; diff --git a/src/main/java/data/Tax.java b/src/main/java/data/Tax.java index 58191c8..305fcb7 100755 --- a/src/main/java/data/Tax.java +++ b/src/main/java/data/Tax.java @@ -10,7 +10,7 @@ import javafx.collections.ObservableList; public class Tax { private static LinkedHashMap GIGAFIDA_TAXONOMY; private static LinkedHashMap GOS_TAXONOMY; - private static final HashSet corpusTypesWithTaxonomy = new HashSet<>(Arrays.asList(CorpusType.GIGAFIDA, CorpusType.GOS, CorpusType.CCKRES, CorpusType.SSJ500K)); + private static final HashSet corpusTypesWithTaxonomy = new HashSet<>(Arrays.asList(CorpusType.GIGAFIDA, CorpusType.GOS, CorpusType.CCKRES, CorpusType.SSJ500K, CorpusType.VERT)); static { // GIGAFIDA ---------------------------- @@ -108,6 +108,11 @@ public class Tax { tax = GIGAFIDA_TAXONOMY; } else if (corpusType == CorpusType.GOS) { tax = GOS_TAXONOMY; + } else if (corpusType == CorpusType.VERT){ + // if VERT only order taxonomy by alphabet + ArrayList sortedFoundTaxonomy = new ArrayList<>(foundTax); + Collections.sort(sortedFoundTaxonomy); + return FXCollections.observableArrayList(sortedFoundTaxonomy); } ArrayList taxForCombo = new ArrayList<>(); diff --git a/src/main/java/data/Taxonomy.java b/src/main/java/data/Taxonomy.java index 86f0bd6..ce4ae7d 100755 --- a/src/main/java/data/Taxonomy.java +++ b/src/main/java/data/Taxonomy.java @@ -696,6 +696,7 @@ public enum Taxonomy { } public static ArrayList convertStringListToTaxonomyList(ObservableList stringList){ + System.out.println(stringList); ArrayList taxonomyList = new ArrayList<>(); // System.out.println("INTERESTING STUFF"); @@ -710,6 +711,9 @@ public enum Taxonomy { public static void modifyingTaxonomy(ArrayList taxonomy, ArrayList checkedItemsTaxonomy, Corpus corpus){ // get taxonomies that were selected/deselected by user +// System.out.println(taxonomy); +// System.out.println(checkedItemsTaxonomy); + Set disjointTaxonomies = new HashSet<>(checkedItemsTaxonomy); if (taxonomy != null) { disjointTaxonomies.addAll(taxonomy); diff --git a/src/main/java/gui/CharacterAnalysisTab.java b/src/main/java/gui/CharacterAnalysisTab.java index 7b3c181..e068884 100755 --- a/src/main/java/gui/CharacterAnalysisTab.java +++ b/src/main/java/gui/CharacterAnalysisTab.java @@ -241,7 +241,7 @@ public class CharacterAnalysisTab { msd = new ArrayList<>(); // taxonomy - if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { + if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) { if (taxonomyListener != null){ taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener); } @@ -287,7 +287,7 @@ public class CharacterAnalysisTab { displayTaxonomy = false; displayTaxonomyChB.setSelected(false); // set - if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { + if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) { displayTaxonomyChB.setDisable(false); displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> { displayTaxonomy = newValue; diff --git a/src/main/java/gui/CorpusTab.java b/src/main/java/gui/CorpusTab.java index 68e6ad9..2750f4b 100755 --- a/src/main/java/gui/CorpusTab.java +++ b/src/main/java/gui/CorpusTab.java @@ -211,8 +211,49 @@ public class CorpusTab { // make sure there are corpus files in selected directory or notify the user about it if (corpusFiles.size() == 0) { - logger.info("alert: ", I18N.get("message.WARNING_CORPUS_NOT_FOUND")); - showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_CORPUS_NOT_FOUND"), null); + // try .vert + corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("vert", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE); + Collection corpusFilesRegi = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("regi", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE); + + if (corpusFiles.size() == 0){ + logger.info("alert: ", I18N.get("message.WARNING_CORPUS_NOT_FOUND")); + showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_CORPUS_NOT_FOUND"), null); + } else { + corpusType = VERT; + + corpus.setCorpusType(corpusType); + + Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType.toString()); + + StringBuilder sb = new StringBuilder(); + sb.append(corpusLocation) + .append("\n") + .append(String.format(I18N.get("message.NOTIFICATION_FOUND_X_FILES"), corpusFiles.size())) + .append("\n") + .append(String.format(I18N.get("message.NOTIFICATION_CORPUS"), corpusType.toString())); + + String result = sb.toString(); + + logger.debug(result); + + initNewCorpus(selectedDirectory, corpusFiles); + Messages.setChooseCorpusProperties(corpusLocation, corpusFilesSize, corpusType.toString()); + + corpus.setChosenCorpusLocation(selectedDirectory); + corpus.setDetectedCorpusFiles(corpusFiles); + + chooseCorpusLabelContent = result; + if (readHeaderInfo) { + logger.info("reading header info..."); + readHeaderInfo(); + } else { + setResults(); + + setCorpusForAnalysis(); + } + + } + } else { String chooseCorpusLabelContentTmp = detectCorpusType(corpusFiles); @@ -420,6 +461,70 @@ public class CorpusTab { task.setOnCancelled(e -> togglePiAndSetCorpusWrapper(false)); task.setOnFailed(e -> togglePiAndSetCorpusWrapper(false)); + final Thread thread = new Thread(task, "task"); + thread.setDaemon(true); + thread.start(); + } else if (corpusType == CorpusType.VERT) { + // many many fields + boolean corpusIsSplit = corpusFiles.size() > 1; + + final Task> task = new Task>() { + @Override + protected HashSet call() throws Exception { + HashSet values = new HashSet<>(); + long i = 0; + + if (!corpusIsSplit) { + updateProgress(-1.0f, -1.0f); + } + + for (File file : corpusFiles) { + HashSet tmpvalues = XML_processing.readVertHeaderTaxonomyAndFilters(file.getAbsolutePath(), corpusIsSplit, corpusType); + + // update final results + for (String entry : tmpvalues) { + if (!entry.equals("-")) { + values.add(entry); + } + } + + i++; + + if (corpusIsSplit) { + updateProgress(i, corpusFiles.size()); + } + } + + updateProgress(1.0f, 1.0f); + return values; + } + }; + + locationScanPI.progressProperty().bind(task.progressProperty()); + + task.setOnSucceeded(e -> { + ObservableList readTaxonomy = Tax.getTaxonomyForComboBox(corpusType, task.getValue()); + + if (ValidationUtil.isEmpty(readTaxonomy)) { + // if no taxonomy found alert the user and keep other tabs disabled + logger.info("No vert filters found in headers."); + GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_NO_SOLAR_FILTERS_FOUND")); + } else { + // set taxonomy, update label + corpus.setTaxonomy(readTaxonomy); + corpus.setHeaderRead(true); + Messages.setChooseCorpusL(chooseCorpusL, chooseCorpusLabelContent); + setResults(); + setCorpusForAnalysis(); + } + + togglePiAndSetCorpusWrapper(false); + + }); + + task.setOnCancelled(e -> togglePiAndSetCorpusWrapper(false)); + task.setOnFailed(e -> togglePiAndSetCorpusWrapper(false)); + final Thread thread = new Thread(task, "task"); thread.setDaemon(true); thread.start(); diff --git a/src/main/java/gui/OneWordAnalysisTab.java b/src/main/java/gui/OneWordAnalysisTab.java index 058442a..655f176 100755 --- a/src/main/java/gui/OneWordAnalysisTab.java +++ b/src/main/java/gui/OneWordAnalysisTab.java @@ -383,7 +383,7 @@ public class OneWordAnalysisTab { alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener); // taxonomy - if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { + if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) { if (taxonomyListener != null){ taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener); } @@ -430,7 +430,7 @@ public class OneWordAnalysisTab { displayTaxonomy = false; displayTaxonomyChB.setSelected(false); // set - if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { + if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) { displayTaxonomyChB.setDisable(false); displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> { displayTaxonomy = newValue; diff --git a/src/main/java/gui/StringAnalysisTabNew2.java b/src/main/java/gui/StringAnalysisTabNew2.java index 574ad98..dec4053 100755 --- a/src/main/java/gui/StringAnalysisTabNew2.java +++ b/src/main/java/gui/StringAnalysisTabNew2.java @@ -312,7 +312,7 @@ public class StringAnalysisTabNew2 { displayTaxonomyChB.setSelected(false); // set - if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { + if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) { displayTaxonomyChB.setDisable(false); displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> { displayTaxonomy = newValue; @@ -515,7 +515,7 @@ public class StringAnalysisTabNew2 { alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener); // taxonomy - if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { + if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) { if (taxonomyListener != null){ taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener); } diff --git a/src/main/java/gui/WordLevelTab.java b/src/main/java/gui/WordLevelTab.java index aa2cd40..9d83422 100755 --- a/src/main/java/gui/WordLevelTab.java +++ b/src/main/java/gui/WordLevelTab.java @@ -509,7 +509,7 @@ public class WordLevelTab { alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener(alsoVisualizeListener); // taxonomy - if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { + if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) { if (taxonomyListener != null){ taxonomyCCB.getCheckModel().getCheckedItems().removeListener(taxonomyListener); } @@ -556,7 +556,7 @@ public class WordLevelTab { displayTaxonomy = false; displayTaxonomyChB.setSelected(false); // set - if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { + if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType()) && corpus.getTaxonomy().size() > 0) { displayTaxonomyChB.setDisable(false); displayTaxonomyChB.selectedProperty().addListener((observable, oldValue, newValue) -> { displayTaxonomy = newValue; diff --git a/src/main/resources/gui/StringAnalysisTabNew2.fxml b/src/main/resources/gui/StringAnalysisTabNew2.fxml index 4210030..2a4296c 100755 --- a/src/main/resources/gui/StringAnalysisTabNew2.fxml +++ b/src/main/resources/gui/StringAnalysisTabNew2.fxml @@ -61,7 +61,7 @@