Refactored results - moved to taxonomyResults
This commit is contained in:
parent
8d7cce6c77
commit
e2ce656fc5
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,6 +1,7 @@
|
||||||
# Created by .ignore support plugin (hsz.mobi)
|
# Created by .ignore support plugin (hsz.mobi)
|
||||||
### Maven template
|
### Maven template
|
||||||
target/
|
target/
|
||||||
|
corpus_analyzer_jar/
|
||||||
pom.xml.tag
|
pom.xml.tag
|
||||||
pom.xml.releaseBackup
|
pom.xml.releaseBackup
|
||||||
pom.xml.versionsBackup
|
pom.xml.versionsBackup
|
||||||
|
|
|
@ -46,7 +46,7 @@ public class Ngrams {
|
||||||
}
|
}
|
||||||
|
|
||||||
// UPDATE TAXONOMY HERE!!!
|
// UPDATE TAXONOMY HERE!!!
|
||||||
stats.updateTaxonomyResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()), ngramCandidate);
|
stats.updateTaxonomyResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()), ngramCandidate.get(0).getTaxonomy());
|
||||||
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -114,6 +114,7 @@ public class Ngrams {
|
||||||
private static void generateNgramLetterCandidates(List<Sentence> corpus, StatisticsNew stats) {
|
private static void generateNgramLetterCandidates(List<Sentence> corpus, StatisticsNew stats) {
|
||||||
for (Sentence s : corpus) {
|
for (Sentence s : corpus) {
|
||||||
for (Word w : s.getWords()) {
|
for (Word w : s.getWords()) {
|
||||||
|
List<String> taxonomy = w.getTaxonomy();
|
||||||
String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv());
|
String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv());
|
||||||
|
|
||||||
// skip this iteration if:
|
// skip this iteration if:
|
||||||
|
@ -128,6 +129,10 @@ public class Ngrams {
|
||||||
|
|
||||||
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
|
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
|
||||||
// TODO: locila?
|
// TODO: locila?
|
||||||
|
stats.updateTaxonomyResults(word.substring(i, i + stats.getFilter().getStringLength()), taxonomy);
|
||||||
|
// stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
||||||
|
|
||||||
|
|
||||||
stats.updateResults(word.substring(i, i + stats.getFilter().getStringLength()));
|
stats.updateResults(word.substring(i, i + stats.getFilter().getStringLength()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
49
src/main/java/data/MultipleHMKeys.java
Normal file
49
src/main/java/data/MultipleHMKeys.java
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
package data;
|
||||||
|
/*
|
||||||
|
Created for when words are sorted by multiple keys, i.e. not just lemmas but lemmas and msd simultaneously.
|
||||||
|
*/
|
||||||
|
final class MultipleHMKeys {
|
||||||
|
private final String key1, key2, key3;
|
||||||
|
|
||||||
|
public MultipleHMKeys(String key1) {
|
||||||
|
this.key1 = key1;
|
||||||
|
this.key2 = null;
|
||||||
|
this.key3 = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MultipleHMKeys(String key1, String key2) {
|
||||||
|
this.key1 = key1;
|
||||||
|
this.key2 = key2;
|
||||||
|
this.key3 = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MultipleHMKeys(String key1, String key2, String key3) {
|
||||||
|
this.key1 = key1;
|
||||||
|
this.key2 = key2;
|
||||||
|
this.key3 = key3;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getKey1() {
|
||||||
|
return key1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getKey2() {
|
||||||
|
return key2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getKey3() {
|
||||||
|
return key3;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return key1.hashCode() ^ key2.hashCode() ^ key3.hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
return (obj instanceof MultipleHMKeys) && ((MultipleHMKeys) obj).key1.equals(key1)
|
||||||
|
&& ((MultipleHMKeys) obj).key2.equals(key2)
|
||||||
|
&& ((MultipleHMKeys) obj).key3.equals(key3);
|
||||||
|
}
|
||||||
|
}
|
|
@ -45,6 +45,7 @@ public class StatisticsNew {
|
||||||
this.corpus = corpus;
|
this.corpus = corpus;
|
||||||
this.filter = filter;
|
this.filter = filter;
|
||||||
this.taxonomyResult = new ConcurrentHashMap<>();
|
this.taxonomyResult = new ConcurrentHashMap<>();
|
||||||
|
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
|
||||||
|
|
||||||
// create table for counting word occurances per taxonomies
|
// create table for counting word occurances per taxonomies
|
||||||
|
|
||||||
|
@ -97,13 +98,18 @@ public class StatisticsNew {
|
||||||
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
||||||
Integer ngramLevel = filter.getNgramValue();
|
Integer ngramLevel = filter.getNgramValue();
|
||||||
if(ngramLevel == 0) {
|
if(ngramLevel == 0) {
|
||||||
sb.append("Crke").
|
sb.append(corpus.getCorpusType().toString())
|
||||||
append(separator)
|
.append(separator)
|
||||||
.append(corpus.getCorpusType().toString())
|
.append("crke")
|
||||||
|
.append(separator)
|
||||||
|
.append(filter.getCalculateFor())
|
||||||
.append(separator);
|
.append(separator);
|
||||||
} else if(ngramLevel == 1) {
|
} else if(ngramLevel == 1) {
|
||||||
sb.append("Besede").append(separator)
|
sb.append(corpus.getCorpusType().toString())
|
||||||
.append(corpus.getCorpusType().toString())
|
.append(separator)
|
||||||
|
.append("besede")
|
||||||
|
.append(separator)
|
||||||
|
.append(filter.getCalculateFor())
|
||||||
.append(separator);
|
.append(separator);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -196,14 +202,14 @@ public class StatisticsNew {
|
||||||
}
|
}
|
||||||
|
|
||||||
// if no results and nothing to save, return false
|
// if no results and nothing to save, return false
|
||||||
if (!(result.size() > 0)) {
|
if (!(taxonomyResult.get("Total").size() > 0)) {
|
||||||
analysisProducedResults = false;
|
analysisProducedResults = false;
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
analysisProducedResults = true;
|
analysisProducedResults = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
|
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
|
||||||
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult);
|
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -275,10 +281,10 @@ public class StatisticsNew {
|
||||||
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void updateTaxonomyResults(String o, List<Word> ngramCandidate) {
|
public void updateTaxonomyResults(String o, List<String> taxonomy) {
|
||||||
for (String key : taxonomyResult.keySet()) {
|
for (String key : taxonomyResult.keySet()) {
|
||||||
// first word should have the same taxonomy as others
|
// first word should have the same taxonomy as others
|
||||||
if (ngramCandidate.get(0).getTaxonomy().contains(key)) {
|
if (taxonomy.contains(key) || key.equals("Total")) {
|
||||||
// if taxonomy not in map and in this word
|
// if taxonomy not in map and in this word
|
||||||
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));
|
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,8 @@ public class Word implements Serializable {
|
||||||
//private char besedna_vrsta;
|
//private char besedna_vrsta;
|
||||||
public Word(String word, String lemma, String msd, List<String> taxonomy) {
|
public Word(String word, String lemma, String msd, List<String> taxonomy) {
|
||||||
this.lemma = lemma;
|
this.lemma = lemma;
|
||||||
this.msd = normalizeMsd(msd);
|
// this.msd = normalizeMsd(msd);
|
||||||
|
this.msd = msd;
|
||||||
this.taxonomy = taxonomy;
|
this.taxonomy = taxonomy;
|
||||||
|
|
||||||
// veliko zacetnico ohranimo samo za lastna imena
|
// veliko zacetnico ohranimo samo za lastna imena
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
Manifest-Version: 1.0
|
|
||||||
Main-Class: gui.GUIController
|
|
||||||
|
|
|
@ -108,9 +108,11 @@ public class Export {
|
||||||
}
|
}
|
||||||
FILE_HEADER_AL.add("Skupna relativna pogostost");
|
FILE_HEADER_AL.add("Skupna relativna pogostost");
|
||||||
for (String key : taxonomyResults.keySet()) {
|
for (String key : taxonomyResults.keySet()) {
|
||||||
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
|
if(!key.equals("Total")) {
|
||||||
FILE_HEADER_AL.add("Delež [" + key + "]");
|
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
|
||||||
FILE_HEADER_AL.add("Relativna pogostost [" + key + "]");
|
FILE_HEADER_AL.add("Delež [" + key + "]");
|
||||||
|
FILE_HEADER_AL.add("Relativna pogostost [" + key + "]");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
|
FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
|
||||||
FILE_HEADER_AL.toArray(FILE_HEADER);
|
FILE_HEADER_AL.toArray(FILE_HEADER);
|
||||||
|
@ -160,11 +162,12 @@ public class Export {
|
||||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
||||||
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
|
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
|
||||||
for (String key : taxonomyResults.keySet()){
|
for (String key : taxonomyResults.keySet()){
|
||||||
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
|
if(!key.equals("Total")) {
|
||||||
dataEntry.add(frequency.toString());
|
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
|
||||||
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
|
dataEntry.add(frequency.toString());
|
||||||
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key)));
|
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
|
||||||
|
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
csvFilePrinter.printRecord(dataEntry);
|
csvFilePrinter.printRecord(dataEntry);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user