package data; import java.util.*; import java.util.stream.Collectors; import gui.ValidationUtil; import javafx.collections.FXCollections; import javafx.collections.ObservableList; public class Tax { private static LinkedHashMap GIGAFIDA_TAXONOMY; private static LinkedHashMap GOS_TAXONOMY; private static final HashSet corpusTypesWithTaxonomy = new HashSet<>(Arrays.asList(CorpusType.GIGAFIDA, CorpusType.GOS, CorpusType.CCKRES, CorpusType.SSJ500K, CorpusType.VERT)); static { // GIGAFIDA ---------------------------- GIGAFIDA_TAXONOMY = new LinkedHashMap<>(); GIGAFIDA_TAXONOMY.put("SSJ.T", "SSJ.T - tisk"); GIGAFIDA_TAXONOMY.put("SSJ.T.K", " SSJ.T.K - tisk-knjižno"); GIGAFIDA_TAXONOMY.put("SSJ.T.K.L", " SSJ.T.K.L - tisk-knjižno-leposlovno"); GIGAFIDA_TAXONOMY.put("SSJ.T.K.S", " SSJ.T.K.S - tisk-knjižno-strokovno"); GIGAFIDA_TAXONOMY.put("SSJ.T.P", " SSJ.T.P - tisk-periodično"); GIGAFIDA_TAXONOMY.put("SSJ.T.P.C", " SSJ.T.P.C - tisk-periodično-časopis"); GIGAFIDA_TAXONOMY.put("SSJ.T.P.R", " SSJ.T.P.R - tisk-periodično-revija"); GIGAFIDA_TAXONOMY.put("SSJ.T.D", " SSJ.T.D - tisk-drugo"); GIGAFIDA_TAXONOMY.put("SSJ.I", "SSJ.I - internet"); GIGAFIDA_TAXONOMY.put("Ft.P", "Ft.P - prenosnik"); GIGAFIDA_TAXONOMY.put("Ft.P.G", " Ft.P.G - prenosnik-govorni"); GIGAFIDA_TAXONOMY.put("Ft.P.E", " Ft.P.E - prenosnik-elektronski"); GIGAFIDA_TAXONOMY.put("Ft.P.P", " Ft.P.P - prenosnik-pisni"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O", " Ft.P.P.O - prenosnik-pisni-objavljeno"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O.K", " Ft.P.P.O.K - prenosnik-pisni-objavljeno-knjižno"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P", " Ft.P.P.O.P - prenosnik-pisni-objavljeno-periodično"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C", " Ft.P.P.O.P.C - prenosnik-pisni-objavljeno-periodično-časopisno"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.D", " Ft.P.P.O.P.C.D - prenosnik-pisni-objavljeno-periodično-časopisno-dnevno"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.V", " Ft.P.P.O.P.C.V - prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.T", " Ft.P.P.O.P.C.T - prenosnik-pisni-objavljeno-periodično-časopisno-tedensko"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R", " Ft.P.P.O.P.R - prenosnik-pisni-objavljeno-periodično-revialno"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.T", " Ft.P.P.O.P.R.T - prenosnik-pisni-objavljeno-periodično-revialno-tedensko"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.S", " Ft.P.P.O.P.R.S - prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.M", " Ft.P.P.O.P.R.M - prenosnik-pisni-objavljeno-periodično-revialno-mesečno"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.D", " Ft.P.P.O.P.R.D - prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec"); GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.O", " Ft.P.P.O.P.R.O - prenosnik-pisni-objavljeno-periodično-revialno-občasno"); GIGAFIDA_TAXONOMY.put("Ft.P.P.N", " Ft.P.P.N - prenosnik-pisni-neobjavljeno"); GIGAFIDA_TAXONOMY.put("Ft.P.P.N.J", " Ft.P.P.N.J - prenosnik-pisni-neobjavljeno-javno"); GIGAFIDA_TAXONOMY.put("Ft.P.P.N.I", " Ft.P.P.N.I - prenosnik-pisni-neobjavljeno-interno"); GIGAFIDA_TAXONOMY.put("Ft.P.P.N.Z", " Ft.P.P.N.Z - prenosnik-pisni-neobjavljeno-zasebno"); GIGAFIDA_TAXONOMY.put("Ft.Z", "Ft.Z - zvrst"); GIGAFIDA_TAXONOMY.put("Ft.Z.U", " Ft.Z.U - zvrst-umetnostna"); GIGAFIDA_TAXONOMY.put("Ft.Z.U.P", " Ft.Z.U.P - zvrst-umetnostna-pesniška"); GIGAFIDA_TAXONOMY.put("Ft.Z.U.R", " Ft.Z.U.R - zvrst-umetnostna-prozna"); GIGAFIDA_TAXONOMY.put("Ft.Z.U.D", " Ft.Z.U.D - zvrst-umetnostna-dramska"); GIGAFIDA_TAXONOMY.put("Ft.Z.N", " Ft.Z.N - zvrst-neumetnostna"); GIGAFIDA_TAXONOMY.put("Ft.Z.N.S", " Ft.Z.N.S - zvrst-neumetnostna-strokovna"); GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.H", " Ft.Z.N.S.H - zvrst-neumetnostna-strokovna-humanistična in družboslovna"); GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.N", " Ft.Z.N.S.N - zvrst-neumetnostna-strokovna-naravoslovna in tehnična"); GIGAFIDA_TAXONOMY.put("Ft.Z.N.N", " Ft.Z.N.N - zvrst-neumetnostna-nestrokovna"); GIGAFIDA_TAXONOMY.put("Ft.Z.N.P", " Ft.Z.N.P - zvrst-neumetnostna-pravna"); GIGAFIDA_TAXONOMY.put("Ft.L", "Ft.L - zvrst-lektorirano"); GIGAFIDA_TAXONOMY.put("Ft.L.D", " Ft.L.D - zvrst-lektorirano-da"); GIGAFIDA_TAXONOMY.put("Ft.L.N", " Ft.L.N - zvrst-lektorirano-ne"); // GOS ---------------------------------- GOS_TAXONOMY = new LinkedHashMap<>(); GOS_TAXONOMY.put("gos.T", "gos.T - diskurz"); GOS_TAXONOMY.put("gos.T.J", " gos.T.J - diskurz-javni"); GOS_TAXONOMY.put("gos.T.J.I", " gos.T.J.I - diskurz-javni-informativno-izobraževalni"); GOS_TAXONOMY.put("gos.T.J.R", " gos.T.J.R - diskurz-javni-razvedrilni"); GOS_TAXONOMY.put("gos.T.N", " gos.T.N - diskurz-nejavni"); GOS_TAXONOMY.put("gos.T.N.N", " gos.T.N.N - diskurz-nejavni-nezasebni"); GOS_TAXONOMY.put("gos.T.N.Z", " gos.T.N.Z - diskurz-nejavni-zasebni"); GOS_TAXONOMY.put("gos.S", "gos.S - situacija"); GOS_TAXONOMY.put("gos.S.R", " gos.S.R - situacija-radio"); GOS_TAXONOMY.put("gos.S.T", " gos.S.T - situacija-televizija"); GOS_TAXONOMY.put("gos.K", "gos.K - kanal"); GOS_TAXONOMY.put("gos.K.O", " gos.K.O - kanal-osebni stik"); GOS_TAXONOMY.put("gos.K.P", " gos.K.P - kanal-telefon"); GOS_TAXONOMY.put("gos.K.R", " gos.K.R - kanal-radio"); GOS_TAXONOMY.put("gos.K.T", " gos.K.T - kanal-televizija"); } /** * Returns the whole default taxonomy for the specified corpus type */ public static ObservableList getTaxonomyForComboBox(CorpusType corpusType) { if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) { return FXCollections.observableArrayList(GIGAFIDA_TAXONOMY.values()); } else if (corpusType == CorpusType.GOS) { return FXCollections.observableArrayList(GOS_TAXONOMY.values()); } return FXCollections.observableArrayList(new ArrayList<>()); } /** * Returns taxonomy names only for items found in headers */ public static ObservableList getTaxonomyForComboBox(CorpusType corpusType, HashSet foundTax) { LinkedHashMap tax = new LinkedHashMap<>(); if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) { tax = GIGAFIDA_TAXONOMY; } else if (corpusType == CorpusType.GOS) { tax = GOS_TAXONOMY; } else if (corpusType == CorpusType.VERT){ // if VERT only order taxonomy by alphabet ArrayList sortedFoundTaxonomy = new ArrayList<>(foundTax); Collections.sort(sortedFoundTaxonomy); return FXCollections.observableArrayList(sortedFoundTaxonomy); } ArrayList taxForCombo = new ArrayList<>(); // adds parents taxonomy as well HashSet genFoundTax = new HashSet<>(); for(String e : foundTax){ String[] elList = e.split("\\."); for(int i = 1; i < elList.length - 1; i++){ String candidate = String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i)); genFoundTax.add(candidate); } } foundTax.addAll(genFoundTax); // assures same relative order for (String t : tax.keySet()) { if (foundTax.contains(t)) { taxForCombo.add(tax.get(t)); } } return FXCollections.observableArrayList(taxForCombo); } /** * Returns taxonomy names only for items found in headers */ public static ArrayList getTaxonomyForTaxonomyResult(CorpusType corpusType, Set foundTax) { LinkedHashMap tax = new LinkedHashMap<>(); Set foundTaxHS= new HashSet<>(foundTax); if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) { tax = GIGAFIDA_TAXONOMY; } else if (corpusType == CorpusType.GOS) { tax = GOS_TAXONOMY; } ArrayList taxForCombo = new ArrayList<>(); // adds parents taxonomy as well Set genFoundTax = new HashSet<>(); for(Taxonomy e : foundTaxHS){ String[] elList = e.toString().split("\\."); for(int i = 1; i < elList.length - 1; i++){ Taxonomy candidate = Taxonomy.factory(String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i))); genFoundTax.add(candidate); } } // ArrayList taxonomyString = new ArrayList<>(); // for (Taxonomy t : taxonomyResult.keySet()){ // taxonomyString.add(t.toString()); // } // ObservableList taxonomyObservableString = Tax.getTaxonomyForComboBox(corpus.getCorpusType(), new HashSet<>(taxonomyString)); // ArrayList sortedTaxonomyString = new ArrayList<>(); // for (String t : taxonomyObservableString){ // sortedTaxonomyString.add(t); // } foundTaxHS.addAll(genFoundTax); // assures same relative order for (String t : tax.keySet()) { if (foundTaxHS.contains(Taxonomy.factory(t))) { taxForCombo.add(tax.get(t)); } } return taxForCombo; } public static HashSet getCorpusTypesWithTaxonomy() { return corpusTypesWithTaxonomy; } public static ArrayList getTaxonomyCodes(ArrayList taxonomyNames, CorpusType corpusType) { ArrayList result = new ArrayList<>(); if (ValidationUtil.isEmpty(taxonomyNames)) { return result; } LinkedHashMap tax = new LinkedHashMap<>(); if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) { tax = GIGAFIDA_TAXONOMY; } else if (corpusType == CorpusType.GOS) { tax = GOS_TAXONOMY; } // for easier lookup Map taxInversed = tax.entrySet() .stream() .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey)); for (Taxonomy taxonomyName : taxonomyNames) { result.add(taxInversed.get(taxonomyName.toString())); } return result; } // public static ArrayList getTaxonomyFormatted(ArrayList taxonomyNames, CorpusType corpusType) { // ArrayList result = new ArrayList<>(); // // if (ValidationUtil.isEmpty(taxonomyNames)) { // return result; // } // // LinkedHashMap tax = new LinkedHashMap<>(); // // if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) { // tax = GIGAFIDA_TAXONOMY; // } else if (corpusType == CorpusType.GOS) { // tax = GOS_TAXONOMY; // } // // // for easier lookup // Map taxInversed = tax.entrySet() // .stream() // .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey)); // // for (String taxonomyName : taxonomyNames) { // result.add(taxInversed.get(taxonomyName) + " - " + taxonomyName); // } // // return result; // } /** * Returns a list of proper names for codes * * @param corpusType * @param taxonomy * * @return */ public static ArrayList getTaxonomyForInfo(CorpusType corpusType, ArrayList taxonomy) { LinkedHashMap tax = new LinkedHashMap<>(); if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) { tax = GIGAFIDA_TAXONOMY; } else if (corpusType == CorpusType.GOS) { tax = GOS_TAXONOMY; } ArrayList result = new ArrayList<>(); for (Taxonomy t : taxonomy) { result.add(tax.get(t.toString())); } return result; } public static String getLongTaxonomyName(String shortName){ if (GIGAFIDA_TAXONOMY.containsKey(shortName)) return GIGAFIDA_TAXONOMY.get(shortName); else if(GOS_TAXONOMY.containsKey(shortName)) return GOS_TAXONOMY.get(shortName); else return null; } }