|
|
|
package data;
|
|
|
|
|
|
|
|
import java.util.*;
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
|
|
import gui.ValidationUtil;
|
|
|
|
import javafx.collections.FXCollections;
|
|
|
|
import javafx.collections.ObservableList;
|
|
|
|
|
|
|
|
public class Tax {
|
|
|
|
private static LinkedHashMap<String, String> GIGAFIDA_TAXONOMY;
|
|
|
|
private static LinkedHashMap<String, String> GOS_TAXONOMY;
|
|
|
|
private static final HashSet<CorpusType> corpusTypesWithTaxonomy = new HashSet<>(Arrays.asList(CorpusType.GIGAFIDA, CorpusType.GOS, CorpusType.CCKRES, CorpusType.SSJ500K, CorpusType.VERT));
|
|
|
|
|
|
|
|
static {
|
|
|
|
// GIGAFIDA ----------------------------
|
|
|
|
GIGAFIDA_TAXONOMY = new LinkedHashMap<>();
|
|
|
|
|
|
|
|
GIGAFIDA_TAXONOMY.put("SSJ.T", "SSJ.T - tisk");
|
|
|
|
GIGAFIDA_TAXONOMY.put("SSJ.T.K", " SSJ.T.K - tisk-knjižno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("SSJ.T.K.L", " SSJ.T.K.L - tisk-knjižno-leposlovno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("SSJ.T.K.S", " SSJ.T.K.S - tisk-knjižno-strokovno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("SSJ.T.P", " SSJ.T.P - tisk-periodično");
|
|
|
|
GIGAFIDA_TAXONOMY.put("SSJ.T.P.C", " SSJ.T.P.C - tisk-periodično-časopis");
|
|
|
|
GIGAFIDA_TAXONOMY.put("SSJ.T.P.R", " SSJ.T.P.R - tisk-periodično-revija");
|
|
|
|
GIGAFIDA_TAXONOMY.put("SSJ.T.D", " SSJ.T.D - tisk-drugo");
|
|
|
|
GIGAFIDA_TAXONOMY.put("SSJ.I", "SSJ.I - internet");
|
|
|
|
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P", "Ft.P - prenosnik");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.G", " Ft.P.G - prenosnik-govorni");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.E", " Ft.P.E - prenosnik-elektronski");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P", " Ft.P.P - prenosnik-pisni");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O", " Ft.P.P.O - prenosnik-pisni-objavljeno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.K", " Ft.P.P.O.K - prenosnik-pisni-objavljeno-knjižno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P", " Ft.P.P.O.P - prenosnik-pisni-objavljeno-periodično");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C", " Ft.P.P.O.P.C - prenosnik-pisni-objavljeno-periodično-časopisno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.D", " Ft.P.P.O.P.C.D - prenosnik-pisni-objavljeno-periodično-časopisno-dnevno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.V", " Ft.P.P.O.P.C.V - prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.T", " Ft.P.P.O.P.C.T - prenosnik-pisni-objavljeno-periodično-časopisno-tedensko");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R", " Ft.P.P.O.P.R - prenosnik-pisni-objavljeno-periodično-revialno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.T", " Ft.P.P.O.P.R.T - prenosnik-pisni-objavljeno-periodično-revialno-tedensko");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.S", " Ft.P.P.O.P.R.S - prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.M", " Ft.P.P.O.P.R.M - prenosnik-pisni-objavljeno-periodično-revialno-mesečno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.D", " Ft.P.P.O.P.R.D - prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.O", " Ft.P.P.O.P.R.O - prenosnik-pisni-objavljeno-periodično-revialno-občasno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.N", " Ft.P.P.N - prenosnik-pisni-neobjavljeno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.J", " Ft.P.P.N.J - prenosnik-pisni-neobjavljeno-javno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.I", " Ft.P.P.N.I - prenosnik-pisni-neobjavljeno-interno");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.Z", " Ft.P.P.N.Z - prenosnik-pisni-neobjavljeno-zasebno");
|
|
|
|
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.Z", "Ft.Z - zvrst");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.Z.U", " Ft.Z.U - zvrst-umetnostna");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.Z.U.P", " Ft.Z.U.P - zvrst-umetnostna-pesniška");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.Z.U.R", " Ft.Z.U.R - zvrst-umetnostna-prozna");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.Z.U.D", " Ft.Z.U.D - zvrst-umetnostna-dramska");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.Z.N", " Ft.Z.N - zvrst-neumetnostna");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S", " Ft.Z.N.S - zvrst-neumetnostna-strokovna");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.H", " Ft.Z.N.S.H - zvrst-neumetnostna-strokovna-humanistična in družboslovna");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.N", " Ft.Z.N.S.N - zvrst-neumetnostna-strokovna-naravoslovna in tehnična");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.Z.N.N", " Ft.Z.N.N - zvrst-neumetnostna-nestrokovna");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.Z.N.P", " Ft.Z.N.P - zvrst-neumetnostna-pravna");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.L", "Ft.L - zvrst-lektorirano");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.L.D", " Ft.L.D - zvrst-lektorirano-da");
|
|
|
|
GIGAFIDA_TAXONOMY.put("Ft.L.N", " Ft.L.N - zvrst-lektorirano-ne");
|
|
|
|
|
|
|
|
// GOS ----------------------------------
|
|
|
|
GOS_TAXONOMY = new LinkedHashMap<>();
|
|
|
|
|
|
|
|
GOS_TAXONOMY.put("gos.T", "gos.T - diskurz");
|
|
|
|
GOS_TAXONOMY.put("gos.T.J", " gos.T.J - diskurz-javni");
|
|
|
|
GOS_TAXONOMY.put("gos.T.J.I", " gos.T.J.I - diskurz-javni-informativno-izobraževalni");
|
|
|
|
GOS_TAXONOMY.put("gos.T.J.R", " gos.T.J.R - diskurz-javni-razvedrilni");
|
|
|
|
GOS_TAXONOMY.put("gos.T.N", " gos.T.N - diskurz-nejavni");
|
|
|
|
GOS_TAXONOMY.put("gos.T.N.N", " gos.T.N.N - diskurz-nejavni-nezasebni");
|
|
|
|
GOS_TAXONOMY.put("gos.T.N.Z", " gos.T.N.Z - diskurz-nejavni-zasebni");
|
|
|
|
|
|
|
|
GOS_TAXONOMY.put("gos.S", "gos.S - situacija");
|
|
|
|
GOS_TAXONOMY.put("gos.S.R", " gos.S.R - situacija-radio");
|
|
|
|
GOS_TAXONOMY.put("gos.S.T", " gos.S.T - situacija-televizija");
|
|
|
|
|
|
|
|
GOS_TAXONOMY.put("gos.K", "gos.K - kanal");
|
|
|
|
GOS_TAXONOMY.put("gos.K.O", " gos.K.O - kanal-osebni stik");
|
|
|
|
GOS_TAXONOMY.put("gos.K.P", " gos.K.P - kanal-telefon");
|
|
|
|
GOS_TAXONOMY.put("gos.K.R", " gos.K.R - kanal-radio");
|
|
|
|
GOS_TAXONOMY.put("gos.K.T", " gos.K.T - kanal-televizija");
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the whole default taxonomy for the specified corpus type
|
|
|
|
*/
|
|
|
|
public static ObservableList<String> getTaxonomyForComboBox(CorpusType corpusType) {
|
|
|
|
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
|
|
|
|
return FXCollections.observableArrayList(GIGAFIDA_TAXONOMY.values());
|
|
|
|
} else if (corpusType == CorpusType.GOS) {
|
|
|
|
return FXCollections.observableArrayList(GOS_TAXONOMY.values());
|
|
|
|
}
|
|
|
|
|
|
|
|
return FXCollections.observableArrayList(new ArrayList<>());
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns taxonomy names only for items found in headers
|
|
|
|
*/
|
|
|
|
public static ObservableList<String> getTaxonomyForComboBox(CorpusType corpusType, HashSet<String> foundTax) {
|
|
|
|
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
|
|
|
|
|
|
|
|
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
|
|
|
|
tax = GIGAFIDA_TAXONOMY;
|
|
|
|
} else if (corpusType == CorpusType.GOS) {
|
|
|
|
tax = GOS_TAXONOMY;
|
|
|
|
} else if (corpusType == CorpusType.VERT){
|
|
|
|
// if VERT only order taxonomy by alphabet
|
|
|
|
ArrayList<String> sortedFoundTaxonomy = new ArrayList<>(foundTax);
|
|
|
|
Collections.sort(sortedFoundTaxonomy);
|
|
|
|
return FXCollections.observableArrayList(sortedFoundTaxonomy);
|
|
|
|
}
|
|
|
|
|
|
|
|
ArrayList<String> taxForCombo = new ArrayList<>();
|
|
|
|
|
|
|
|
// adds parents taxonomy as well
|
|
|
|
HashSet<String> genFoundTax = new HashSet<>();
|
|
|
|
|
|
|
|
for(String e : foundTax){
|
|
|
|
String[] elList = e.split("\\.");
|
|
|
|
for(int i = 1; i < elList.length - 1; i++){
|
|
|
|
String candidate = String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i));
|
|
|
|
genFoundTax.add(candidate);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
foundTax.addAll(genFoundTax);
|
|
|
|
|
|
|
|
// assures same relative order
|
|
|
|
for (String t : tax.keySet()) {
|
|
|
|
if (foundTax.contains(t)) {
|
|
|
|
taxForCombo.add(tax.get(t));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return FXCollections.observableArrayList(taxForCombo);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns taxonomy names only for items found in headers
|
|
|
|
*/
|
|
|
|
public static ArrayList<String> getTaxonomyForTaxonomyResult(CorpusType corpusType, Set<Taxonomy> foundTax) {
|
|
|
|
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
|
|
|
|
Set<Taxonomy> foundTaxHS= new HashSet<>(foundTax);
|
|
|
|
|
|
|
|
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
|
|
|
|
tax = GIGAFIDA_TAXONOMY;
|
|
|
|
} else if (corpusType == CorpusType.GOS) {
|
|
|
|
tax = GOS_TAXONOMY;
|
|
|
|
}
|
|
|
|
|
|
|
|
ArrayList<String> taxForCombo = new ArrayList<>();
|
|
|
|
|
|
|
|
// adds parents taxonomy as well
|
|
|
|
Set<Taxonomy> genFoundTax = new HashSet<>();
|
|
|
|
|
|
|
|
for(Taxonomy e : foundTaxHS){
|
|
|
|
String[] elList = e.toString().split("\\.");
|
|
|
|
for(int i = 1; i < elList.length - 1; i++){
|
|
|
|
Taxonomy candidate = Taxonomy.factory(String.join(".", Arrays.copyOfRange(elList, 0, elList.length - i)));
|
|
|
|
genFoundTax.add(candidate);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// ArrayList<String> taxonomyString = new ArrayList<>();
|
|
|
|
// for (Taxonomy t : taxonomyResult.keySet()){
|
|
|
|
// taxonomyString.add(t.toString());
|
|
|
|
// }
|
|
|
|
// ObservableList<String> taxonomyObservableString = Tax.getTaxonomyForComboBox(corpus.getCorpusType(), new HashSet<>(taxonomyString));
|
|
|
|
// ArrayList<String> sortedTaxonomyString = new ArrayList<>();
|
|
|
|
// for (String t : taxonomyObservableString){
|
|
|
|
// sortedTaxonomyString.add(t);
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
foundTaxHS.addAll(genFoundTax);
|
|
|
|
|
|
|
|
// assures same relative order
|
|
|
|
for (String t : tax.keySet()) {
|
|
|
|
if (foundTaxHS.contains(Taxonomy.factory(t))) {
|
|
|
|
taxForCombo.add(tax.get(t));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return taxForCombo;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static HashSet<CorpusType> getCorpusTypesWithTaxonomy() {
|
|
|
|
return corpusTypesWithTaxonomy;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static ArrayList<String> getTaxonomyCodes(ArrayList<Taxonomy> taxonomyNames, CorpusType corpusType) {
|
|
|
|
ArrayList<String> result = new ArrayList<>();
|
|
|
|
|
|
|
|
if (ValidationUtil.isEmpty(taxonomyNames)) {
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
|
|
|
|
|
|
|
|
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
|
|
|
|
tax = GIGAFIDA_TAXONOMY;
|
|
|
|
} else if (corpusType == CorpusType.GOS) {
|
|
|
|
tax = GOS_TAXONOMY;
|
|
|
|
}
|
|
|
|
|
|
|
|
// for easier lookup
|
|
|
|
Map<String, String> taxInversed = tax.entrySet()
|
|
|
|
.stream()
|
|
|
|
.collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
|
|
|
|
|
|
|
|
for (Taxonomy taxonomyName : taxonomyNames) {
|
|
|
|
result.add(taxInversed.get(taxonomyName.toString()));
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
// public static ArrayList<String> getTaxonomyFormatted(ArrayList<String> taxonomyNames, CorpusType corpusType) {
|
|
|
|
// ArrayList<String> result = new ArrayList<>();
|
|
|
|
//
|
|
|
|
// if (ValidationUtil.isEmpty(taxonomyNames)) {
|
|
|
|
// return result;
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// LinkedHashMap<String, String> tax = new LinkedHashMap<>();
|
|
|
|
//
|
|
|
|
// if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
|
|
|
|
// tax = GIGAFIDA_TAXONOMY;
|
|
|
|
// } else if (corpusType == CorpusType.GOS) {
|
|
|
|
// tax = GOS_TAXONOMY;
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// // for easier lookup
|
|
|
|
// Map<String, String> taxInversed = tax.entrySet()
|
|
|
|
// .stream()
|
|
|
|
// .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
|
|
|
|
//
|
|
|
|
// for (String taxonomyName : taxonomyNames) {
|
|
|
|
// result.add(taxInversed.get(taxonomyName) + " - " + taxonomyName);
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// return result;
|
|
|
|
// }
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a list of proper names for codes
|
|
|
|
*
|
|
|
|
* @param corpusType
|
|
|
|
* @param taxonomy
|
|
|
|
*
|
|
|
|
* @return
|
|
|
|
*/
|
|
|
|
public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<Taxonomy> taxonomy) {
|
|
|
|
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
|
|
|
|
|
|
|
|
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES || corpusType == CorpusType.SSJ500K) {
|
|
|
|
tax = GIGAFIDA_TAXONOMY;
|
|
|
|
} else if (corpusType == CorpusType.GOS) {
|
|
|
|
tax = GOS_TAXONOMY;
|
|
|
|
}
|
|
|
|
|
|
|
|
ArrayList<String> result = new ArrayList<>();
|
|
|
|
|
|
|
|
for (Taxonomy t : taxonomy) {
|
|
|
|
result.add(tax.get(t.toString()));
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static String getLongTaxonomyName(String shortName){
|
|
|
|
if (GIGAFIDA_TAXONOMY.containsKey(shortName))
|
|
|
|
return GIGAFIDA_TAXONOMY.get(shortName);
|
|
|
|
else if(GOS_TAXONOMY.containsKey(shortName))
|
|
|
|
return GOS_TAXONOMY.get(shortName);
|
|
|
|
else
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|