Added filter parameters to CSV + created names of columns for MSDs + [partly] fixed number of words parameter

This commit is contained in:
2018-11-13 13:57:49 +01:00
parent a4df732678
commit cbfe3e6025
9 changed files with 502 additions and 219 deletions

View File

@@ -15,6 +15,42 @@ public interface MultipleHMKeys {
default ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){ return null; }
default String getMsd(Filter filter) {
String msd = "";
if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = getK1();
} else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
int i = 0;
for (CalculateFor otherKey : filter.getMultipleKeys()) {
switch (i) {
case 0:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
msd = getK2();
}
break;
case 1:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
msd = getK3();
}
break;
case 2:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
msd = getK4();
}
break;
case 3:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
msd = getK5();
}
break;
}
i++;
}
}
return msd;
}
@Override
int hashCode();

View File

@@ -5,6 +5,7 @@ import static gui.ValidationUtil.*;
import java.io.UnsupportedEncodingException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
@@ -39,8 +40,10 @@ public class StatisticsNew {
private boolean useDB;
private RDB db;
private boolean analysisProducedResults;
private LocalDateTime time;
private LocalDateTime timeBeginning;
private LocalDateTime timeEnding;
private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
private AtomicLong uniGramOccurrences;
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
this.corpus = corpus;
@@ -48,8 +51,10 @@ public class StatisticsNew {
this.taxonomyResult = new ConcurrentHashMap<>();
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
this.collocability = new ConcurrentHashMap<>();
this.uniGramOccurrences = new AtomicLong(0L);
// create table for counting word occurrences per taxonomies
// create table for counting word occurrences per taxonomies
if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
if (this.filter.getTaxonomy().isEmpty()) {
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
@@ -75,7 +80,9 @@ public class StatisticsNew {
result = new ConcurrentHashMap<>();
}
resultTitle = generateResultTitle();
this.timeBeginning = LocalDateTime.now();
// resultTitle = generateResultTitle();
logger.debug(toString());
}
@@ -94,7 +101,7 @@ public class StatisticsNew {
*
* @return
*/
private String generateResultTitle() {
public String generateResultTitle() {
String separator = "_";
StringBuilder sb = new StringBuilder();
@@ -108,12 +115,21 @@ public class StatisticsNew {
.append(filter.getCalculateFor())
.append(separator);
} else if(ngramLevel == 1) {
sb.append(corpus.getCorpusType().toString())
.append(separator)
.append("besede")
.append(separator)
.append(filter.getCalculateFor())
.append(separator);
if (filter.getSuffixLength() != null && filter.getSuffixList() != null && filter.getPrefixLength() != null && filter.getPrefixList() != null) {
sb.append(corpus.getCorpusType().toString())
.append(separator)
.append("besedni-deli")
.append(separator)
.append(filter.getCalculateFor())
.append(separator);
} else {
sb.append(corpus.getCorpusType().toString())
.append(separator)
.append("besede")
.append(separator)
.append(filter.getCalculateFor())
.append(separator);
}
}
else {
sb.append(filter.getAl().toString())
@@ -141,13 +157,20 @@ public class StatisticsNew {
// if taxonomy -> taxonomy
// if cvv -> cvv + dolžina
this.time = this.time != null ? this.time : LocalDateTime.now();
sb.append(time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss")));
sb.append(getTimeEnding());
return sb.toString();
}
public void setTimeEnding(){
this.timeEnding = LocalDateTime.now();
}
public String getTimeEnding(){
return timeEnding.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss"));
}
public boolean isAnalysisProducedResults() {
return analysisProducedResults;
}
@@ -319,6 +342,14 @@ public class StatisticsNew {
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
}
public void updateUniGramOccurrences(int amount){
uniGramOccurrences.set(uniGramOccurrences.get() + amount);
}
public long getUniGramOccurrences(){
return uniGramOccurrences.longValue();
}
public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {
for (String key : taxonomyResult.keySet()) {
// first word should have the same taxonomy as others
@@ -423,22 +454,23 @@ public class StatisticsNew {
LinkedHashMap<String, String> info = new LinkedHashMap<>();
info.put("Korpus:", corpus.getCorpusType().toString());
info.put("Datum:", time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
setTimeEnding();
info.put("Datum:", timeEnding.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
Integer ngramLevel = filter.getNgramValue();
if (ngramLevel == 0)
info.put("Analiza", "Črke");
info.put("Analiza:", "Črke");
else if (ngramLevel == 1) {
// if suffixes or prefixes are not null print word parts
if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
info.put("Analiza", "Besedni deli");
info.put("Analiza:", "Besedni deli");
} else {
info.put("Analiza", "Besede");
info.put("Analiza:", "Besede");
}
} else
info.put("Analiza", filter.getAl().toString());
info.put("Analiza:", filter.getAl().toString());
} else {
info.put("Analiza", filter.getAl().toString());
info.put("Analiza:", filter.getAl().toString());
}
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
@@ -453,9 +485,68 @@ public class StatisticsNew {
if (ngramLevel > 1)
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
// izračunaj za
// calculate for
info.put("Izračunaj za:", filter.getCalculateFor().toString());
// also write
if (filter.getMultipleKeys().size() > 0){
StringBuilder mk = new StringBuilder();
for (CalculateFor s : filter.getMultipleKeys()) {
mk.append(s.toString()).append("; ");
}
info.put("Izpiši tudi: ", String.join("; ", mk.substring(0, mk.length() - 2)));
}
// time elapsed
// setTimeEnding();
long seconds = ChronoUnit.MILLIS.between(timeBeginning, timeEnding) / 1000;
info.put("Čas izvajanja:", String.valueOf(seconds) + " s");
// data limitations
if (filter.getDisplayTaxonomy()){
info.put("Izpiši taksonomije: ", "Da");
} else {
info.put("Izpiši taksonomije: ", "Ne");
}
// note punctuations - ngram > 1
if(ngramLevel > 1) {
if (filter.getNotePunctuations()) {
info.put("Upoštevaj ločila: ", "Da");
} else {
info.put("Upoštevaj ločila: ", "Ne");
}
}
// also write - n - gram > 1
if (ngramLevel > 1 && filter.getCollocability().size() > 0){
StringBuilder mk = new StringBuilder();
for (Collocability s : filter.getCollocability()) {
mk.append(s.toString()).append("; ");
}
info.put("Kolokabilnost: ", String.join("; ", mk.substring(0, mk.length() - 2)));
}
// fragmented MSD - n-gram = 1
if (info.get("Analiza:").equals("Besede")){
if (filter.getWriteMsdAtTheEnd()){
info.put("Izpiši razbit MSD: ", "Da");
} else {
info.put("Izpiši razbit MSD: ", "Ne");
}
}
if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
if (filter.getPrefixLength() > 0 || filter.getSuffixLength() > 0) {
info.put("Dolžina predpone: ", String.valueOf(filter.getPrefixLength()));
info.put("Dolžina pripone: ", String.valueOf(filter.getSuffixLength()));
} else {
info.put("Seznam predpon: ", String.join("; ", filter.getPrefixList()));
info.put("Seznam pripon: ", String.join("; ", filter.getSuffixList()));
}
}
// msd
if (!isEmpty(filter.getMsd())) {
StringBuilder msdPattern = new StringBuilder();
@@ -479,6 +570,9 @@ public class StatisticsNew {
}
}
info.put("Min. št. pojavitev: ", String.valueOf(filter.getMinimalOccurrences()));
info.put("Min. št. taksonomij: ", String.valueOf(filter.getMinimalTaxonomy()));
if (corpus.getCorpusType() == CorpusType.SOLAR) {
HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();