Added filter parameters to CSV + created names of columns for MSDs + [partly] fixed number of words parameter
This commit is contained in:
@@ -15,6 +15,42 @@ public interface MultipleHMKeys {
|
||||
|
||||
default ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){ return null; }
|
||||
|
||||
default String getMsd(Filter filter) {
|
||||
String msd = "";
|
||||
if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
||||
msd = getK1();
|
||||
} else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
|
||||
int i = 0;
|
||||
for (CalculateFor otherKey : filter.getMultipleKeys()) {
|
||||
switch (i) {
|
||||
case 0:
|
||||
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
|
||||
msd = getK2();
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
|
||||
msd = getK3();
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
|
||||
msd = getK4();
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
|
||||
msd = getK5();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return msd;
|
||||
}
|
||||
|
||||
@Override
|
||||
int hashCode();
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import static gui.ValidationUtil.*;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
@@ -39,8 +40,10 @@ public class StatisticsNew {
|
||||
private boolean useDB;
|
||||
private RDB db;
|
||||
private boolean analysisProducedResults;
|
||||
private LocalDateTime time;
|
||||
private LocalDateTime timeBeginning;
|
||||
private LocalDateTime timeEnding;
|
||||
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
|
||||
private AtomicLong uniGramOccurrences;
|
||||
|
||||
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
|
||||
this.corpus = corpus;
|
||||
@@ -48,8 +51,10 @@ public class StatisticsNew {
|
||||
this.taxonomyResult = new ConcurrentHashMap<>();
|
||||
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
|
||||
this.collocability = new ConcurrentHashMap<>();
|
||||
this.uniGramOccurrences = new AtomicLong(0L);
|
||||
|
||||
// create table for counting word occurrences per taxonomies
|
||||
|
||||
// create table for counting word occurrences per taxonomies
|
||||
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
|
||||
if (this.filter.getTaxonomy().isEmpty()) {
|
||||
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
|
||||
@@ -75,7 +80,9 @@ public class StatisticsNew {
|
||||
result = new ConcurrentHashMap<>();
|
||||
}
|
||||
|
||||
resultTitle = generateResultTitle();
|
||||
this.timeBeginning = LocalDateTime.now();
|
||||
|
||||
// resultTitle = generateResultTitle();
|
||||
|
||||
logger.debug(toString());
|
||||
}
|
||||
@@ -94,7 +101,7 @@ public class StatisticsNew {
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private String generateResultTitle() {
|
||||
public String generateResultTitle() {
|
||||
String separator = "_";
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
@@ -108,12 +115,21 @@ public class StatisticsNew {
|
||||
.append(filter.getCalculateFor())
|
||||
.append(separator);
|
||||
} else if(ngramLevel == 1) {
|
||||
sb.append(corpus.getCorpusType().toString())
|
||||
.append(separator)
|
||||
.append("besede")
|
||||
.append(separator)
|
||||
.append(filter.getCalculateFor())
|
||||
.append(separator);
|
||||
if (filter.getSuffixLength() != null && filter.getSuffixList() != null && filter.getPrefixLength() != null && filter.getPrefixList() != null) {
|
||||
sb.append(corpus.getCorpusType().toString())
|
||||
.append(separator)
|
||||
.append("besedni-deli")
|
||||
.append(separator)
|
||||
.append(filter.getCalculateFor())
|
||||
.append(separator);
|
||||
} else {
|
||||
sb.append(corpus.getCorpusType().toString())
|
||||
.append(separator)
|
||||
.append("besede")
|
||||
.append(separator)
|
||||
.append(filter.getCalculateFor())
|
||||
.append(separator);
|
||||
}
|
||||
}
|
||||
else {
|
||||
sb.append(filter.getAl().toString())
|
||||
@@ -141,13 +157,20 @@ public class StatisticsNew {
|
||||
// if taxonomy -> taxonomy
|
||||
// if cvv -> cvv + dolžina
|
||||
|
||||
this.time = this.time != null ? this.time : LocalDateTime.now();
|
||||
|
||||
sb.append(time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss")));
|
||||
sb.append(getTimeEnding());
|
||||
return sb.toString();
|
||||
|
||||
}
|
||||
|
||||
public void setTimeEnding(){
|
||||
this.timeEnding = LocalDateTime.now();
|
||||
}
|
||||
|
||||
public String getTimeEnding(){
|
||||
return timeEnding.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss"));
|
||||
}
|
||||
|
||||
public boolean isAnalysisProducedResults() {
|
||||
return analysisProducedResults;
|
||||
}
|
||||
@@ -319,6 +342,14 @@ public class StatisticsNew {
|
||||
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
||||
}
|
||||
|
||||
public void updateUniGramOccurrences(int amount){
|
||||
uniGramOccurrences.set(uniGramOccurrences.get() + amount);
|
||||
}
|
||||
|
||||
public long getUniGramOccurrences(){
|
||||
return uniGramOccurrences.longValue();
|
||||
}
|
||||
|
||||
public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {
|
||||
for (String key : taxonomyResult.keySet()) {
|
||||
// first word should have the same taxonomy as others
|
||||
@@ -423,22 +454,23 @@ public class StatisticsNew {
|
||||
LinkedHashMap<String, String> info = new LinkedHashMap<>();
|
||||
|
||||
info.put("Korpus:", corpus.getCorpusType().toString());
|
||||
info.put("Datum:", time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
|
||||
setTimeEnding();
|
||||
info.put("Datum:", timeEnding.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
|
||||
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
||||
Integer ngramLevel = filter.getNgramValue();
|
||||
if (ngramLevel == 0)
|
||||
info.put("Analiza", "Črke");
|
||||
info.put("Analiza:", "Črke");
|
||||
else if (ngramLevel == 1) {
|
||||
// if suffixes or prefixes are not null print word parts
|
||||
if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
|
||||
info.put("Analiza", "Besedni deli");
|
||||
info.put("Analiza:", "Besedni deli");
|
||||
} else {
|
||||
info.put("Analiza", "Besede");
|
||||
info.put("Analiza:", "Besede");
|
||||
}
|
||||
} else
|
||||
info.put("Analiza", filter.getAl().toString());
|
||||
info.put("Analiza:", filter.getAl().toString());
|
||||
} else {
|
||||
info.put("Analiza", filter.getAl().toString());
|
||||
info.put("Analiza:", filter.getAl().toString());
|
||||
}
|
||||
|
||||
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
||||
@@ -453,9 +485,68 @@ public class StatisticsNew {
|
||||
if (ngramLevel > 1)
|
||||
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
|
||||
|
||||
// izračunaj za
|
||||
// calculate for
|
||||
info.put("Izračunaj za:", filter.getCalculateFor().toString());
|
||||
|
||||
// also write
|
||||
if (filter.getMultipleKeys().size() > 0){
|
||||
|
||||
StringBuilder mk = new StringBuilder();
|
||||
for (CalculateFor s : filter.getMultipleKeys()) {
|
||||
mk.append(s.toString()).append("; ");
|
||||
}
|
||||
info.put("Izpiši tudi: ", String.join("; ", mk.substring(0, mk.length() - 2)));
|
||||
}
|
||||
|
||||
// time elapsed
|
||||
// setTimeEnding();
|
||||
long seconds = ChronoUnit.MILLIS.between(timeBeginning, timeEnding) / 1000;
|
||||
info.put("Čas izvajanja:", String.valueOf(seconds) + " s");
|
||||
|
||||
// data limitations
|
||||
if (filter.getDisplayTaxonomy()){
|
||||
info.put("Izpiši taksonomije: ", "Da");
|
||||
} else {
|
||||
info.put("Izpiši taksonomije: ", "Ne");
|
||||
}
|
||||
|
||||
// note punctuations - ngram > 1
|
||||
if(ngramLevel > 1) {
|
||||
if (filter.getNotePunctuations()) {
|
||||
info.put("Upoštevaj ločila: ", "Da");
|
||||
} else {
|
||||
info.put("Upoštevaj ločila: ", "Ne");
|
||||
}
|
||||
}
|
||||
|
||||
// also write - n - gram > 1
|
||||
if (ngramLevel > 1 && filter.getCollocability().size() > 0){
|
||||
StringBuilder mk = new StringBuilder();
|
||||
for (Collocability s : filter.getCollocability()) {
|
||||
mk.append(s.toString()).append("; ");
|
||||
}
|
||||
info.put("Kolokabilnost: ", String.join("; ", mk.substring(0, mk.length() - 2)));
|
||||
}
|
||||
|
||||
// fragmented MSD - n-gram = 1
|
||||
if (info.get("Analiza:").equals("Besede")){
|
||||
if (filter.getWriteMsdAtTheEnd()){
|
||||
info.put("Izpiši razbit MSD: ", "Da");
|
||||
} else {
|
||||
info.put("Izpiši razbit MSD: ", "Ne");
|
||||
}
|
||||
}
|
||||
|
||||
if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
|
||||
if (filter.getPrefixLength() > 0 || filter.getSuffixLength() > 0) {
|
||||
info.put("Dolžina predpone: ", String.valueOf(filter.getPrefixLength()));
|
||||
info.put("Dolžina pripone: ", String.valueOf(filter.getSuffixLength()));
|
||||
} else {
|
||||
info.put("Seznam predpon: ", String.join("; ", filter.getPrefixList()));
|
||||
info.put("Seznam pripon: ", String.join("; ", filter.getSuffixList()));
|
||||
}
|
||||
}
|
||||
|
||||
// msd
|
||||
if (!isEmpty(filter.getMsd())) {
|
||||
StringBuilder msdPattern = new StringBuilder();
|
||||
@@ -479,6 +570,9 @@ public class StatisticsNew {
|
||||
}
|
||||
}
|
||||
|
||||
info.put("Min. št. pojavitev: ", String.valueOf(filter.getMinimalOccurrences()));
|
||||
info.put("Min. št. taksonomij: ", String.valueOf(filter.getMinimalTaxonomy()));
|
||||
|
||||
if (corpus.getCorpusType() == CorpusType.SOLAR) {
|
||||
HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user