You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
300 lines
7.4 KiB
300 lines
7.4 KiB
package data;
|
|
|
|
import java.io.UnsupportedEncodingException;
|
|
import java.time.LocalDateTime;
|
|
import java.time.format.DateTimeFormatter;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
import java.util.regex.Pattern;
|
|
|
|
import util.Util;
|
|
import util.db.RDB;
|
|
|
|
public class Statistics {
|
|
private CorpusType corpusType;
|
|
private AnalysisLevel analysisLevel;
|
|
private boolean useDB;
|
|
private RDB db;
|
|
|
|
private boolean analysisProducedResults;
|
|
|
|
private String taxonomy;
|
|
private boolean taxonomyIsSet;
|
|
|
|
private char JOSType;
|
|
private boolean JOSTypeIsSet;
|
|
|
|
private String resultTitle;
|
|
public Map<String, AtomicLong> result = new ConcurrentHashMap<>();
|
|
|
|
// nGrams
|
|
private int nGramLevel;
|
|
private Integer skip;
|
|
private CalculateFor cf;
|
|
private List<Pattern> morphosyntacticFilter;
|
|
|
|
// distributions
|
|
private String distributionTaxonomy;
|
|
private char distributionJosWordType;
|
|
private boolean vcc;
|
|
private Integer substringLength;
|
|
|
|
// inflected JOS
|
|
private String inflectedJosTaxonomy;
|
|
|
|
// GOS
|
|
boolean gosOrthMode;
|
|
|
|
// šolar
|
|
Map<String, Object> solarHeadBlockFilter;
|
|
|
|
|
|
// for ngrams
|
|
public Statistics(AnalysisLevel al, int nGramLevel, Integer skip, CalculateFor cf) {
|
|
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
|
this.cf = cf;
|
|
this.analysisLevel = al;
|
|
this.nGramLevel = nGramLevel;
|
|
this.skip = skip == null || skip == 0 ? null : skip;
|
|
|
|
this.resultTitle = String.format("%s%d-gram_%s_%s",
|
|
this.skip != null ? String.format("%d-%s-", skip, "skip") : "",
|
|
nGramLevel,
|
|
cf.toString(),
|
|
dateTime);
|
|
}
|
|
|
|
// for words distributions
|
|
public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) {
|
|
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
|
|
|
this.resultTitle = String.format("%s_%s_%s",
|
|
distributionTaxonomy != null ? distributionTaxonomy.toString() : "",
|
|
distributionJosWordType != null ? distributionJosWordType.toString() : "",
|
|
dateTime);
|
|
|
|
this.analysisLevel = al;
|
|
this.cf = cf;
|
|
this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null;
|
|
this.taxonomyIsSet = distributionTaxonomy != null;
|
|
|
|
this.JOSTypeIsSet = distributionJosWordType != null;
|
|
this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' ';
|
|
}
|
|
|
|
public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) {
|
|
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
|
|
|
this.resultTitle = String.format("%s_%d_%s",
|
|
"Distribucija zaporedij samoglasnikov in soglasnikov",
|
|
substringLength,
|
|
dateTime);
|
|
|
|
this.analysisLevel = al;
|
|
this.cf = cf;
|
|
this.substringLength = substringLength;
|
|
this.vcc = true;
|
|
}
|
|
|
|
public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) {
|
|
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
|
|
|
this.resultTitle = String.format("InflectedJOS_%s_%s",
|
|
distributionTaxonomy != null ? distributionTaxonomy : "",
|
|
dateTime);
|
|
|
|
this.analysisLevel = al;
|
|
this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null;
|
|
this.taxonomyIsSet = inflectedJosTaxonomy != null;
|
|
}
|
|
|
|
public Integer getSkip() {
|
|
return skip;
|
|
}
|
|
|
|
public Integer getSubstringLength() {
|
|
return substringLength;
|
|
}
|
|
|
|
public String getInflectedJosTaxonomy() {
|
|
return inflectedJosTaxonomy;
|
|
}
|
|
|
|
public void setSubstringLength(Integer substringLength) {
|
|
this.substringLength = substringLength;
|
|
}
|
|
|
|
public boolean isVcc() {
|
|
return vcc;
|
|
}
|
|
|
|
public void setVcc(boolean vcc) {
|
|
this.vcc = vcc;
|
|
}
|
|
|
|
public String getDistributionTaxonomy() {
|
|
return distributionTaxonomy;
|
|
}
|
|
|
|
public void setDistributionTaxonomy(String distributionTaxonomy) {
|
|
this.distributionTaxonomy = distributionTaxonomy;
|
|
}
|
|
|
|
public char getDistributionJosWordType() {
|
|
return distributionJosWordType;
|
|
}
|
|
|
|
public void setDistributionJosWordType(char distributionJosWordType) {
|
|
this.distributionJosWordType = distributionJosWordType;
|
|
}
|
|
|
|
public void setMorphosyntacticFilter(List<String> morphosyntacticFilter) {
|
|
// change filter strings to regex patterns
|
|
this.morphosyntacticFilter = new ArrayList<>();
|
|
for (String s : morphosyntacticFilter) {
|
|
this.morphosyntacticFilter.add(Pattern.compile(s.replaceAll("\\*", ".")));
|
|
}
|
|
}
|
|
|
|
public List<Pattern> getMsd() {
|
|
return morphosyntacticFilter;
|
|
}
|
|
|
|
public Map<String, AtomicLong> getResult() {
|
|
return result;
|
|
}
|
|
|
|
public void setTaxonomy(String taxonomy) {
|
|
this.taxonomy = taxonomy;
|
|
}
|
|
|
|
public void setTaxonomyIsSet(boolean taxonomyIsSet) {
|
|
this.taxonomyIsSet = taxonomyIsSet;
|
|
}
|
|
|
|
public char getJOSType() {
|
|
return JOSType;
|
|
}
|
|
|
|
public void setJOSType(char JOSType) {
|
|
this.JOSType = JOSType;
|
|
}
|
|
|
|
public boolean isJOSTypeSet() {
|
|
return JOSTypeIsSet;
|
|
}
|
|
|
|
public void setJOSType(boolean JOSTypeIsSet) {
|
|
this.JOSTypeIsSet = JOSTypeIsSet;
|
|
}
|
|
|
|
public void saveResultToDisk(int... limit) throws UnsupportedEncodingException {
|
|
// Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
|
|
//
|
|
// if (useDB) {
|
|
// result = db.getDump();
|
|
// db.delete();
|
|
// }
|
|
//
|
|
// // if no results and nothing to save, return false
|
|
// if (!(result.size() > 0)) {
|
|
// analysisProducedResults = false;
|
|
// return;
|
|
// } else {
|
|
// analysisProducedResults = true;
|
|
// }
|
|
//
|
|
// stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
|
|
// Export.SetToCSV(stats);
|
|
}
|
|
|
|
// private Map<String, Integer> getSortedResultInflected(Map map) {
|
|
// // first convert to <String, Integer>
|
|
// Map<String, Integer> m = Util.sortByValue(Util.atomicInt2StringAndInt(map), 0);
|
|
//
|
|
// Map<String, Integer> sortedM = new TreeMap<>();
|
|
//
|
|
// sortedM.putAll(m);
|
|
//
|
|
// return sortedM;
|
|
// }
|
|
|
|
private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {
|
|
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
|
}
|
|
|
|
public String getTaxonomy() {
|
|
return taxonomy;
|
|
}
|
|
|
|
public boolean isTaxonomySet() {
|
|
return taxonomyIsSet;
|
|
}
|
|
|
|
public int getnGramLevel() {
|
|
return nGramLevel;
|
|
}
|
|
|
|
public CalculateFor getCf() {
|
|
return cf;
|
|
}
|
|
|
|
public AnalysisLevel getAnalysisLevel() {
|
|
return analysisLevel;
|
|
}
|
|
|
|
public CorpusType getCorpusType() {
|
|
return corpusType;
|
|
}
|
|
|
|
public void setCorpusType(CorpusType corpusType) {
|
|
this.corpusType = corpusType;
|
|
}
|
|
|
|
public boolean isGosOrthMode() {
|
|
return gosOrthMode;
|
|
}
|
|
|
|
public void setGosOrthMode(boolean gosOrthMode) {
|
|
this.gosOrthMode = gosOrthMode;
|
|
}
|
|
|
|
public Map<String, Object> getSolarHeadBlockFilter() {
|
|
return solarHeadBlockFilter;
|
|
}
|
|
|
|
public void setSolarHeadBlockFilter(Map<String, Object> solarHeadBlockFilter) {
|
|
this.solarHeadBlockFilter = solarHeadBlockFilter;
|
|
}
|
|
|
|
public boolean isUseDB() {
|
|
return useDB;
|
|
}
|
|
|
|
public void setUseDB(boolean useDB) {
|
|
if (useDB && db == null) {
|
|
db = new RDB();
|
|
}
|
|
this.useDB = useDB;
|
|
}
|
|
|
|
/**
|
|
* Stores results from this batch to a database and clears results map
|
|
*/
|
|
public void storeTmpResultsToDB() {
|
|
try {
|
|
db.writeBatch(result);
|
|
result = new ConcurrentHashMap<>();
|
|
} catch (UnsupportedEncodingException e) {
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
|
|
public boolean isAnalysisProducedResults() {
|
|
return analysisProducedResults;
|
|
}
|
|
}
|