Refactored results - moved to taxonomyResults

This commit is contained in:
Luka 2018-07-05 09:37:35 +02:00
parent 8d7cce6c77
commit e2ce656fc5
8 changed files with 87 additions and 25 deletions

1
.gitignore vendored
View File

@ -1,6 +1,7 @@
# Created by .ignore support plugin (hsz.mobi) # Created by .ignore support plugin (hsz.mobi)
### Maven template ### Maven template
target/ target/
corpus_analyzer_jar/
pom.xml.tag pom.xml.tag
pom.xml.releaseBackup pom.xml.releaseBackup
pom.xml.versionsBackup pom.xml.versionsBackup

View File

@ -46,7 +46,7 @@ public class Ngrams {
} }
// UPDATE TAXONOMY HERE!!! // UPDATE TAXONOMY HERE!!!
stats.updateTaxonomyResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()), ngramCandidate); stats.updateTaxonomyResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()), ngramCandidate.get(0).getTaxonomy());
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor())); stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
} }
} }
@ -114,6 +114,7 @@ public class Ngrams {
private static void generateNgramLetterCandidates(List<Sentence> corpus, StatisticsNew stats) { private static void generateNgramLetterCandidates(List<Sentence> corpus, StatisticsNew stats) {
for (Sentence s : corpus) { for (Sentence s : corpus) {
for (Word w : s.getWords()) { for (Word w : s.getWords()) {
List<String> taxonomy = w.getTaxonomy();
String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv()); String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv());
// skip this iteration if: // skip this iteration if:
@ -128,6 +129,10 @@ public class Ngrams {
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) { for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
// TODO: locila? // TODO: locila?
stats.updateTaxonomyResults(word.substring(i, i + stats.getFilter().getStringLength()), taxonomy);
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
stats.updateResults(word.substring(i, i + stats.getFilter().getStringLength())); stats.updateResults(word.substring(i, i + stats.getFilter().getStringLength()));
} }
} }

View File

@ -0,0 +1,49 @@
package data;
/*
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
*/
final class MultipleHMKeys {
private final String key1, key2, key3;
public MultipleHMKeys(String key1) {
this.key1 = key1;
this.key2 = null;
this.key3 = null;
}
public MultipleHMKeys(String key1, String key2) {
this.key1 = key1;
this.key2 = key2;
this.key3 = null;
}
public MultipleHMKeys(String key1, String key2, String key3) {
this.key1 = key1;
this.key2 = key2;
this.key3 = key3;
}
public String getKey1() {
return key1;
}
public String getKey2() {
return key2;
}
public String getKey3() {
return key3;
}
@Override
public int hashCode() {
return key1.hashCode() ^ key2.hashCode() ^ key3.hashCode();
}
@Override
public boolean equals(Object obj) {
return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key1.equals(key1)
&& ((MultipleHMKeys) obj).key2.equals(key2)
&& ((MultipleHMKeys) obj).key3.equals(key3);
}
}

View File

@ -45,6 +45,7 @@ public class StatisticsNew {
this.corpus = corpus; this.corpus = corpus;
this.filter = filter; this.filter = filter;
this.taxonomyResult = new ConcurrentHashMap<>(); this.taxonomyResult = new ConcurrentHashMap<>();
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
// create table for counting word occurances per taxonomies // create table for counting word occurances per taxonomies
@ -97,13 +98,18 @@ public class StatisticsNew {
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) { if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
Integer ngramLevel = filter.getNgramValue(); Integer ngramLevel = filter.getNgramValue();
if(ngramLevel == 0) { if(ngramLevel == 0) {
sb.append("Crke"). sb.append(corpus.getCorpusType().toString())
append(separator) .append(separator)
.append(corpus.getCorpusType().toString()) .append("crke")
.append(separator)
.append(filter.getCalculateFor())
.append(separator); .append(separator);
} else if(ngramLevel == 1) { } else if(ngramLevel == 1) {
sb.append("Besede").append(separator) sb.append(corpus.getCorpusType().toString())
.append(corpus.getCorpusType().toString()) .append(separator)
.append("besede")
.append(separator)
.append(filter.getCalculateFor())
.append(separator); .append(separator);
} }
else { else {
@ -196,14 +202,14 @@ public class StatisticsNew {
} }
// if no results and nothing to save, return false // if no results and nothing to save, return false
if (!(result.size() > 0)) { if (!(taxonomyResult.get("Total").size() > 0)) {
analysisProducedResults = false; analysisProducedResults = false;
return false; return false;
} else { } else {
analysisProducedResults = true; analysisProducedResults = true;
} }
stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit)))); stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult); Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult);
return true; return true;
} }
@ -275,10 +281,10 @@ public class StatisticsNew {
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit); return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
} }
public void updateTaxonomyResults(String o, List<Word> ngramCandidate) { public void updateTaxonomyResults(String o, List<String> taxonomy) {
for (String key : taxonomyResult.keySet()) { for (String key : taxonomyResult.keySet()) {
// first word should have the same taxonomy as others // first word should have the same taxonomy as others
if (ngramCandidate.get(0).getTaxonomy().contains(key)) { if (taxonomy.contains(key) || key.equals("Total")) {
// if taxonomy not in map and in this word // if taxonomy not in map and in this word
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1)); AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));

View File

@ -55,7 +55,8 @@ public class Word implements Serializable {
//private char besedna_vrsta; //private char besedna_vrsta;
public Word(String word, String lemma, String msd, List<String> taxonomy) { public Word(String word, String lemma, String msd, List<String> taxonomy) {
this.lemma = lemma; this.lemma = lemma;
this.msd = normalizeMsd(msd); // this.msd = normalizeMsd(msd);
this.msd = msd;
this.taxonomy = taxonomy; this.taxonomy = taxonomy;
// veliko zacetnico ohranimo samo za lastna imena // veliko zacetnico ohranimo samo za lastna imena

View File

@ -1,3 +0,0 @@
Manifest-Version: 1.0
Main-Class: gui.GUIController

View File

@ -108,10 +108,12 @@ public class Export {
} }
FILE_HEADER_AL.add("Skupna relativna pogostost"); FILE_HEADER_AL.add("Skupna relativna pogostost");
for (String key : taxonomyResults.keySet()) { for (String key : taxonomyResults.keySet()) {
if(!key.equals("Total")) {
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]"); FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
FILE_HEADER_AL.add("Delež [" + key + "]"); FILE_HEADER_AL.add("Delež [" + key + "]");
FILE_HEADER_AL.add("Relativna pogostost [" + key + "]"); FILE_HEADER_AL.add("Relativna pogostost [" + key + "]");
} }
}
FILE_HEADER = new String[ FILE_HEADER_AL.size() ]; FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
FILE_HEADER_AL.toArray(FILE_HEADER); FILE_HEADER_AL.toArray(FILE_HEADER);
} else { } else {
@ -160,11 +162,12 @@ public class Export {
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies)); dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies)); dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
for (String key : taxonomyResults.keySet()){ for (String key : taxonomyResults.keySet()){
if(!key.equals("Total")) {
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey()); AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
dataEntry.add(frequency.toString()); dataEntry.add(frequency.toString());
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key))); dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key))); dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key)));
}
} }
csvFilePrinter.printRecord(dataEntry); csvFilePrinter.printRecord(dataEntry);