Added taxonomy presentation in results

2018-06-29 12:53:29 +02:00
parent d5d06fd7c5
commit 8d7cce6c77
11 changed files with 212 additions and 30 deletions
--- a/src/main/java/data/StatisticsNew.java
+++ b/src/main/java/data/StatisticsNew.java
@@ -32,6 +32,7 @@ public class StatisticsNew {

 	private String resultTitle;
 	private Map<String, AtomicLong> result;
+	private Map<String, Map<String, AtomicLong>> taxonomyResult;
 	private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
 	private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedSuffix;
 	private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedPrefix;
@@ -43,6 +44,20 @@ public class StatisticsNew {
 	public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
 		this.corpus = corpus;
 		this.filter = filter;
+		this.taxonomyResult = new ConcurrentHashMap<>();
+
+		// create table for counting word occurances per taxonomies
+
+		if (this.filter.getTaxonomy().isEmpty()) {
+			for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
+				this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>());
+			}
+		} else {
+			for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
+				Tax taxonomy = new Tax();
+				this.taxonomyResult.put(taxonomy.getLongTaxonomyName(this.filter.getTaxonomy().get(i)), new ConcurrentHashMap<>());
+			}
+		}

 		if (useDB) {
 			this.useDB = true;
@@ -189,7 +204,7 @@ public class StatisticsNew {
 		}

 		stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
-		Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock());
+		Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult);
 		return true;
 	}

@@ -260,6 +275,28 @@ public class StatisticsNew {
 		return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
 	}

+	public void updateTaxonomyResults(String o, List<Word> ngramCandidate) {
+		for (String key : taxonomyResult.keySet()) {
+			// first word should have the same taxonomy as others
+			if (ngramCandidate.get(0).getTaxonomy().contains(key)) {
+				// if taxonomy not in map and in this word
+				AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));
+
+				if (r != null)
+					taxonomyResult.get(key).get(o).incrementAndGet();
+			} else {
+				// if taxonomy not in map and not in this word
+				AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(0));
+			}
+		}
+
+		// if not in map
+
+
+		// else
+
+	}
+
 	public void updateResults(String o) {
 		// if not in map
 		AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));
@@ -377,22 +414,22 @@ public class StatisticsNew {
 			}

 			// taksonomija
-			if (!isEmpty(filter.getTaxonomy())) {
-				info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
-			}
+//			if (!isEmpty(filter.getTaxonomy())) {
+//				info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
+//			}


 		}

-//		if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
-//			ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
-//
-//			info.put("Taksonomija: ", "");
-//			String sep = "";
-//			for (String s : tax) {
-//				info.put(sep = sep + " ", s);
-//			}
-//		}
+		if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+			ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
+
+			info.put("Taksonomija: ", "");
+			String sep = "";
+			for (String s : tax) {
+				info.put(sep = sep + " ", s);
+			}
+		}

 		if (corpus.getCorpusType() == CorpusType.SOLAR) {
 			HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();
--- a/src/main/java/data/Tax.java
+++ b/src/main/java/data/Tax.java
@@ -172,4 +172,13 @@ public class Tax {

 		return result;
 	}
+
+	public static String getLongTaxonomyName(String shortName){
+		if (GIGAFIDA_TAXONOMY.containsKey(shortName))
+			return GIGAFIDA_TAXONOMY.get(shortName);
+		else if(GOS_TAXONOMY.containsKey(shortName))
+			return GOS_TAXONOMY.get(shortName);
+		else
+			return null;
+	}
 }
--- a/src/main/java/data/Word.java
+++ b/src/main/java/data/Word.java
@@ -3,6 +3,7 @@ package data;
 import java.io.Serializable;
 import java.util.Arrays;
 import java.util.HashSet;
+import java.util.List;

 import org.apache.commons.lang3.StringUtils;

@@ -15,6 +16,7 @@ public class Word implements Serializable {
 	private String word;
 	private String lemma;
 	private String msd;
+	private List<String> taxonomy;
 	private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));

 	/**
@@ -50,6 +52,22 @@ public class Word implements Serializable {
 		}
 	}

+	//private char besedna_vrsta;
+	public Word(String word, String lemma, String msd, List<String> taxonomy) {
+		this.lemma = lemma;
+		this.msd = normalizeMsd(msd);
+		this.taxonomy = taxonomy;
+
+		// veliko zacetnico ohranimo samo za lastna imena
+		if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
+				&& this.msd.length() >= 2
+				&& this.msd.charAt(1) == 'l')) {
+			this.word = word.toLowerCase();
+		} else {
+			this.word = word;
+		}
+	}
+
 	public Word() {
 	}

@@ -99,6 +117,10 @@ public class Word implements Serializable {
 		this.word = word;
 	}

+	public List<String> getTaxonomy() {
+		return taxonomy;
+	}
+
 	public String getLemma() {
 		return lemma;
 	}