|
|
@ -5,6 +5,7 @@ import static gui.ValidationUtil.*;
|
|
|
|
import java.io.UnsupportedEncodingException;
|
|
|
|
import java.io.UnsupportedEncodingException;
|
|
|
|
import java.time.LocalDateTime;
|
|
|
|
import java.time.LocalDateTime;
|
|
|
|
import java.time.format.DateTimeFormatter;
|
|
|
|
import java.time.format.DateTimeFormatter;
|
|
|
|
|
|
|
|
import java.time.temporal.ChronoUnit;
|
|
|
|
import java.util.*;
|
|
|
|
import java.util.*;
|
|
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
|
@ -39,8 +40,10 @@ public class StatisticsNew {
|
|
|
|
private boolean useDB;
|
|
|
|
private boolean useDB;
|
|
|
|
private RDB db;
|
|
|
|
private RDB db;
|
|
|
|
private boolean analysisProducedResults;
|
|
|
|
private boolean analysisProducedResults;
|
|
|
|
private LocalDateTime time;
|
|
|
|
private LocalDateTime timeBeginning;
|
|
|
|
|
|
|
|
private LocalDateTime timeEnding;
|
|
|
|
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
|
|
|
|
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
|
|
|
|
|
|
|
|
private AtomicLong uniGramOccurrences;
|
|
|
|
|
|
|
|
|
|
|
|
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
|
|
|
|
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
|
|
|
|
this.corpus = corpus;
|
|
|
|
this.corpus = corpus;
|
|
|
@ -48,6 +51,8 @@ public class StatisticsNew {
|
|
|
|
this.taxonomyResult = new ConcurrentHashMap<>();
|
|
|
|
this.taxonomyResult = new ConcurrentHashMap<>();
|
|
|
|
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
|
|
|
|
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
|
|
|
|
this.collocability = new ConcurrentHashMap<>();
|
|
|
|
this.collocability = new ConcurrentHashMap<>();
|
|
|
|
|
|
|
|
this.uniGramOccurrences = new AtomicLong(0L);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// create table for counting word occurrences per taxonomies
|
|
|
|
// create table for counting word occurrences per taxonomies
|
|
|
|
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
|
|
|
|
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
|
|
|
@ -75,7 +80,9 @@ public class StatisticsNew {
|
|
|
|
result = new ConcurrentHashMap<>();
|
|
|
|
result = new ConcurrentHashMap<>();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
resultTitle = generateResultTitle();
|
|
|
|
this.timeBeginning = LocalDateTime.now();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// resultTitle = generateResultTitle();
|
|
|
|
|
|
|
|
|
|
|
|
logger.debug(toString());
|
|
|
|
logger.debug(toString());
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -94,7 +101,7 @@ public class StatisticsNew {
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* @return
|
|
|
|
* @return
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
private String generateResultTitle() {
|
|
|
|
public String generateResultTitle() {
|
|
|
|
String separator = "_";
|
|
|
|
String separator = "_";
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
|
|
|
|
|
|
|
@ -108,6 +115,14 @@ public class StatisticsNew {
|
|
|
|
.append(filter.getCalculateFor())
|
|
|
|
.append(filter.getCalculateFor())
|
|
|
|
.append(separator);
|
|
|
|
.append(separator);
|
|
|
|
} else if(ngramLevel == 1) {
|
|
|
|
} else if(ngramLevel == 1) {
|
|
|
|
|
|
|
|
if (filter.getSuffixLength() != null && filter.getSuffixList() != null && filter.getPrefixLength() != null && filter.getPrefixList() != null) {
|
|
|
|
|
|
|
|
sb.append(corpus.getCorpusType().toString())
|
|
|
|
|
|
|
|
.append(separator)
|
|
|
|
|
|
|
|
.append("besedni-deli")
|
|
|
|
|
|
|
|
.append(separator)
|
|
|
|
|
|
|
|
.append(filter.getCalculateFor())
|
|
|
|
|
|
|
|
.append(separator);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
sb.append(corpus.getCorpusType().toString())
|
|
|
|
sb.append(corpus.getCorpusType().toString())
|
|
|
|
.append(separator)
|
|
|
|
.append(separator)
|
|
|
|
.append("besede")
|
|
|
|
.append("besede")
|
|
|
@ -115,6 +130,7 @@ public class StatisticsNew {
|
|
|
|
.append(filter.getCalculateFor())
|
|
|
|
.append(filter.getCalculateFor())
|
|
|
|
.append(separator);
|
|
|
|
.append(separator);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
else {
|
|
|
|
sb.append(filter.getAl().toString())
|
|
|
|
sb.append(filter.getAl().toString())
|
|
|
|
.append(separator)
|
|
|
|
.append(separator)
|
|
|
@ -141,13 +157,20 @@ public class StatisticsNew {
|
|
|
|
// if taxonomy -> taxonomy
|
|
|
|
// if taxonomy -> taxonomy
|
|
|
|
// if cvv -> cvv + dolžina
|
|
|
|
// if cvv -> cvv + dolžina
|
|
|
|
|
|
|
|
|
|
|
|
this.time = this.time != null ? this.time : LocalDateTime.now();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sb.append(time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss")));
|
|
|
|
sb.append(getTimeEnding());
|
|
|
|
return sb.toString();
|
|
|
|
return sb.toString();
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public void setTimeEnding(){
|
|
|
|
|
|
|
|
this.timeEnding = LocalDateTime.now();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public String getTimeEnding(){
|
|
|
|
|
|
|
|
return timeEnding.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss"));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public boolean isAnalysisProducedResults() {
|
|
|
|
public boolean isAnalysisProducedResults() {
|
|
|
|
return analysisProducedResults;
|
|
|
|
return analysisProducedResults;
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -319,6 +342,14 @@ public class StatisticsNew {
|
|
|
|
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
|
|
|
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public void updateUniGramOccurrences(int amount){
|
|
|
|
|
|
|
|
uniGramOccurrences.set(uniGramOccurrences.get() + amount);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public long getUniGramOccurrences(){
|
|
|
|
|
|
|
|
return uniGramOccurrences.longValue();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {
|
|
|
|
public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {
|
|
|
|
for (String key : taxonomyResult.keySet()) {
|
|
|
|
for (String key : taxonomyResult.keySet()) {
|
|
|
|
// first word should have the same taxonomy as others
|
|
|
|
// first word should have the same taxonomy as others
|
|
|
@ -423,22 +454,23 @@ public class StatisticsNew {
|
|
|
|
LinkedHashMap<String, String> info = new LinkedHashMap<>();
|
|
|
|
LinkedHashMap<String, String> info = new LinkedHashMap<>();
|
|
|
|
|
|
|
|
|
|
|
|
info.put("Korpus:", corpus.getCorpusType().toString());
|
|
|
|
info.put("Korpus:", corpus.getCorpusType().toString());
|
|
|
|
info.put("Datum:", time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
|
|
|
|
setTimeEnding();
|
|
|
|
|
|
|
|
info.put("Datum:", timeEnding.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
|
|
|
|
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
|
|
|
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
|
|
|
Integer ngramLevel = filter.getNgramValue();
|
|
|
|
Integer ngramLevel = filter.getNgramValue();
|
|
|
|
if (ngramLevel == 0)
|
|
|
|
if (ngramLevel == 0)
|
|
|
|
info.put("Analiza", "Črke");
|
|
|
|
info.put("Analiza:", "Črke");
|
|
|
|
else if (ngramLevel == 1) {
|
|
|
|
else if (ngramLevel == 1) {
|
|
|
|
// if suffixes or prefixes are not null print word parts
|
|
|
|
// if suffixes or prefixes are not null print word parts
|
|
|
|
if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
|
|
|
|
if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
|
|
|
|
info.put("Analiza", "Besedni deli");
|
|
|
|
info.put("Analiza:", "Besedni deli");
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
info.put("Analiza", "Besede");
|
|
|
|
info.put("Analiza:", "Besede");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
} else
|
|
|
|
info.put("Analiza", filter.getAl().toString());
|
|
|
|
info.put("Analiza:", filter.getAl().toString());
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
info.put("Analiza", filter.getAl().toString());
|
|
|
|
info.put("Analiza:", filter.getAl().toString());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
|
|
|
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
|
|
@ -453,9 +485,68 @@ public class StatisticsNew {
|
|
|
|
if (ngramLevel > 1)
|
|
|
|
if (ngramLevel > 1)
|
|
|
|
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
|
|
|
|
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
|
|
|
|
|
|
|
|
|
|
|
|
// izračunaj za
|
|
|
|
// calculate for
|
|
|
|
info.put("Izračunaj za:", filter.getCalculateFor().toString());
|
|
|
|
info.put("Izračunaj za:", filter.getCalculateFor().toString());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// also write
|
|
|
|
|
|
|
|
if (filter.getMultipleKeys().size() > 0){
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
StringBuilder mk = new StringBuilder();
|
|
|
|
|
|
|
|
for (CalculateFor s : filter.getMultipleKeys()) {
|
|
|
|
|
|
|
|
mk.append(s.toString()).append("; ");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
info.put("Izpiši tudi: ", String.join("; ", mk.substring(0, mk.length() - 2)));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// time elapsed
|
|
|
|
|
|
|
|
// setTimeEnding();
|
|
|
|
|
|
|
|
long seconds = ChronoUnit.MILLIS.between(timeBeginning, timeEnding) / 1000;
|
|
|
|
|
|
|
|
info.put("Čas izvajanja:", String.valueOf(seconds) + " s");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// data limitations
|
|
|
|
|
|
|
|
if (filter.getDisplayTaxonomy()){
|
|
|
|
|
|
|
|
info.put("Izpiši taksonomije: ", "Da");
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
info.put("Izpiši taksonomije: ", "Ne");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// note punctuations - ngram > 1
|
|
|
|
|
|
|
|
if(ngramLevel > 1) {
|
|
|
|
|
|
|
|
if (filter.getNotePunctuations()) {
|
|
|
|
|
|
|
|
info.put("Upoštevaj ločila: ", "Da");
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
info.put("Upoštevaj ločila: ", "Ne");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// also write - n - gram > 1
|
|
|
|
|
|
|
|
if (ngramLevel > 1 && filter.getCollocability().size() > 0){
|
|
|
|
|
|
|
|
StringBuilder mk = new StringBuilder();
|
|
|
|
|
|
|
|
for (Collocability s : filter.getCollocability()) {
|
|
|
|
|
|
|
|
mk.append(s.toString()).append("; ");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
info.put("Kolokabilnost: ", String.join("; ", mk.substring(0, mk.length() - 2)));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// fragmented MSD - n-gram = 1
|
|
|
|
|
|
|
|
if (info.get("Analiza:").equals("Besede")){
|
|
|
|
|
|
|
|
if (filter.getWriteMsdAtTheEnd()){
|
|
|
|
|
|
|
|
info.put("Izpiši razbit MSD: ", "Da");
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
info.put("Izpiši razbit MSD: ", "Ne");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
|
|
|
|
|
|
|
|
if (filter.getPrefixLength() > 0 || filter.getSuffixLength() > 0) {
|
|
|
|
|
|
|
|
info.put("Dolžina predpone: ", String.valueOf(filter.getPrefixLength()));
|
|
|
|
|
|
|
|
info.put("Dolžina pripone: ", String.valueOf(filter.getSuffixLength()));
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
info.put("Seznam predpon: ", String.join("; ", filter.getPrefixList()));
|
|
|
|
|
|
|
|
info.put("Seznam pripon: ", String.join("; ", filter.getSuffixList()));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// msd
|
|
|
|
// msd
|
|
|
|
if (!isEmpty(filter.getMsd())) {
|
|
|
|
if (!isEmpty(filter.getMsd())) {
|
|
|
|
StringBuilder msdPattern = new StringBuilder();
|
|
|
|
StringBuilder msdPattern = new StringBuilder();
|
|
|
@ -479,6 +570,9 @@ public class StatisticsNew {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
info.put("Min. št. pojavitev: ", String.valueOf(filter.getMinimalOccurrences()));
|
|
|
|
|
|
|
|
info.put("Min. št. taksonomij: ", String.valueOf(filter.getMinimalTaxonomy()));
|
|
|
|
|
|
|
|
|
|
|
|
if (corpus.getCorpusType() == CorpusType.SOLAR) {
|
|
|
|
if (corpus.getCorpusType() == CorpusType.SOLAR) {
|
|
|
|
HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();
|
|
|
|
HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();
|
|
|
|
|
|
|
|
|
|
|
|