You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

300 lines
7.4 KiB

package data;
import java.io.UnsupportedEncodingException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import util.Util;
import util.db.RDB;
public class Statistics {
private CorpusType corpusType;
private AnalysisLevel analysisLevel;
private boolean useDB;
private RDB db;
private boolean analysisProducedResults;
private String taxonomy;
private boolean taxonomyIsSet;
private char JOSType;
private boolean JOSTypeIsSet;
private String resultTitle;
public Map<String, AtomicLong> result = new ConcurrentHashMap<>();
// nGrams
private int nGramLevel;
private Integer skip;
private CalculateFor cf;
private List<Pattern> morphosyntacticFilter;
// distributions
private String distributionTaxonomy;
private char distributionJosWordType;
private boolean vcc;
private Integer substringLength;
// inflected JOS
private String inflectedJosTaxonomy;
// GOS
boolean gosOrthMode;
// šolar
Map<String, Object> solarHeadBlockFilter;
// for ngrams
public Statistics(AnalysisLevel al, int nGramLevel, Integer skip, CalculateFor cf) {
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
this.cf = cf;
this.analysisLevel = al;
this.nGramLevel = nGramLevel;
this.skip = skip == null || skip == 0 ? null : skip;
this.resultTitle = String.format("%s%d-gram_%s_%s",
this.skip != null ? String.format("%d-%s-", skip, "skip") : "",
nGramLevel,
cf.toString(),
dateTime);
}
// for words distributions
// public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) {
// String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
//
// this.resultTitle = String.format("%s_%s_%s",
// distributionTaxonomy != null ? distributionTaxonomy.toString() : "",
// distributionJosWordType != null ? distributionJosWordType.toString() : "",
// dateTime);
//
// this.analysisLevel = al;
// this.cf = cf;
// this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null;
// this.taxonomyIsSet = distributionTaxonomy != null;
//
// this.JOSTypeIsSet = distributionJosWordType != null;
// this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' ';
// }
public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) {
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
this.resultTitle = String.format("%s_%d_%s",
"Distribucija zaporedij samoglasnikov in soglasnikov",
substringLength,
dateTime);
this.analysisLevel = al;
this.cf = cf;
this.substringLength = substringLength;
this.vcc = true;
}
// public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) {
// String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
//
// this.resultTitle = String.format("InflectedJOS_%s_%s",
// distributionTaxonomy != null ? distributionTaxonomy : "",
// dateTime);
//
// this.analysisLevel = al;
// this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null;
// this.taxonomyIsSet = inflectedJosTaxonomy != null;
// }
public Integer getSkip() {
return skip;
}
public Integer getSubstringLength() {
return substringLength;
}
public String getInflectedJosTaxonomy() {
return inflectedJosTaxonomy;
}
public void setSubstringLength(Integer substringLength) {
this.substringLength = substringLength;
}
public boolean isVcc() {
return vcc;
}
public void setVcc(boolean vcc) {
this.vcc = vcc;
}
public String getDistributionTaxonomy() {
return distributionTaxonomy;
}
public void setDistributionTaxonomy(String distributionTaxonomy) {
this.distributionTaxonomy = distributionTaxonomy;
}
public char getDistributionJosWordType() {
return distributionJosWordType;
}
public void setDistributionJosWordType(char distributionJosWordType) {
this.distributionJosWordType = distributionJosWordType;
}
public void setMorphosyntacticFilter(List<String> morphosyntacticFilter) {
// change filter strings to regex patterns
this.morphosyntacticFilter = new ArrayList<>();
for (String s : morphosyntacticFilter) {
this.morphosyntacticFilter.add(Pattern.compile(s.replaceAll("\\*", ".")));
}
}
public List<Pattern> getMsd() {
return morphosyntacticFilter;
}
public Map<String, AtomicLong> getResult() {
return result;
}
public void setTaxonomy(String taxonomy) {
this.taxonomy = taxonomy;
}
public void setTaxonomyIsSet(boolean taxonomyIsSet) {
this.taxonomyIsSet = taxonomyIsSet;
}
public char getJOSType() {
return JOSType;
}
public void setJOSType(char JOSType) {
this.JOSType = JOSType;
}
public boolean isJOSTypeSet() {
return JOSTypeIsSet;
}
public void setJOSType(boolean JOSTypeIsSet) {
this.JOSTypeIsSet = JOSTypeIsSet;
}
public void saveResultToDisk(int... limit) throws UnsupportedEncodingException {
// Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
//
// if (useDB) {
// result = db.getDump();
// db.delete();
// }
//
// // if no results and nothing to save, return false
// if (!(result.size() > 0)) {
// analysisProducedResults = false;
// return;
// } else {
// analysisProducedResults = true;
// }
//
// stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
// Export.SetToCSV(stats);
}
// private Map<String, Integer> getSortedResultInflected(Map map) {
// // first convert to <String, Integer>
// Map<String, Integer> m = Util.sortByValue(Util.atomicInt2StringAndInt(map), 0);
//
// Map<String, Integer> sortedM = new TreeMap<>();
//
// sortedM.putAll(m);
//
// return sortedM;
// }
private Map<MultipleHMKeys, Long> getSortedResult(Map<MultipleHMKeys, AtomicLong> map, int limit) {
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
}
public String getTaxonomy() {
return taxonomy;
}
public boolean isTaxonomySet() {
return taxonomyIsSet;
}
public int getnGramLevel() {
return nGramLevel;
}
public CalculateFor getCf() {
return cf;
}
public AnalysisLevel getAnalysisLevel() {
return analysisLevel;
}
public CorpusType getCorpusType() {
return corpusType;
}
public void setCorpusType(CorpusType corpusType) {
this.corpusType = corpusType;
}
public boolean isGosOrthMode() {
return gosOrthMode;
}
public void setGosOrthMode(boolean gosOrthMode) {
this.gosOrthMode = gosOrthMode;
}
public Map<String, Object> getSolarHeadBlockFilter() {
return solarHeadBlockFilter;
}
public void setSolarHeadBlockFilter(Map<String, Object> solarHeadBlockFilter) {
this.solarHeadBlockFilter = solarHeadBlockFilter;
}
public boolean isUseDB() {
return useDB;
}
public void setUseDB(boolean useDB) {
if (useDB && db == null) {
db = new RDB();
}
this.useDB = useDB;
}
/**
* Stores results from this batch to a database and clears results map
*/
public void storeTmpResultsToDB() {
try {
db.writeBatch(result);
result = new ConcurrentHashMap<>();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
public boolean isAnalysisProducedResults() {
return analysisProducedResults;
}
}