package data; import java.io.UnsupportedEncodingException; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Pattern; import util.Util; import util.db.RDB; public class Statistics { private CorpusType corpusType; private AnalysisLevel analysisLevel; private boolean useDB; private RDB db; private boolean analysisProducedResults; private String taxonomy; private boolean taxonomyIsSet; private char JOSType; private boolean JOSTypeIsSet; private String resultTitle; public Map result = new ConcurrentHashMap<>(); // nGrams private int nGramLevel; private Integer skip; private CalculateFor cf; private List morphosyntacticFilter; // distributions private String distributionTaxonomy; private char distributionJosWordType; private boolean vcc; private Integer substringLength; // inflected JOS private String inflectedJosTaxonomy; // GOS boolean gosOrthMode; // šolar Map solarHeadBlockFilter; // for ngrams public Statistics(AnalysisLevel al, int nGramLevel, Integer skip, CalculateFor cf) { String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); this.cf = cf; this.analysisLevel = al; this.nGramLevel = nGramLevel; this.skip = skip == null || skip == 0 ? null : skip; this.resultTitle = String.format("%s%d-gram_%s_%s", this.skip != null ? String.format("%d-%s-", skip, "skip") : "", nGramLevel, cf.toString(), dateTime); } // for words distributions public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) { String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); this.resultTitle = String.format("%s_%s_%s", distributionTaxonomy != null ? distributionTaxonomy.toString() : "", distributionJosWordType != null ? distributionJosWordType.toString() : "", dateTime); this.analysisLevel = al; this.cf = cf; this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null; this.taxonomyIsSet = distributionTaxonomy != null; this.JOSTypeIsSet = distributionJosWordType != null; this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' '; } public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) { String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); this.resultTitle = String.format("%s_%d_%s", "Distribucija zaporedij samoglasnikov in soglasnikov", substringLength, dateTime); this.analysisLevel = al; this.cf = cf; this.substringLength = substringLength; this.vcc = true; } public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) { String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm")); this.resultTitle = String.format("InflectedJOS_%s_%s", distributionTaxonomy != null ? distributionTaxonomy : "", dateTime); this.analysisLevel = al; this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null; this.taxonomyIsSet = inflectedJosTaxonomy != null; } public Integer getSkip() { return skip; } public Integer getSubstringLength() { return substringLength; } public String getInflectedJosTaxonomy() { return inflectedJosTaxonomy; } public void setSubstringLength(Integer substringLength) { this.substringLength = substringLength; } public boolean isVcc() { return vcc; } public void setVcc(boolean vcc) { this.vcc = vcc; } public String getDistributionTaxonomy() { return distributionTaxonomy; } public void setDistributionTaxonomy(String distributionTaxonomy) { this.distributionTaxonomy = distributionTaxonomy; } public char getDistributionJosWordType() { return distributionJosWordType; } public void setDistributionJosWordType(char distributionJosWordType) { this.distributionJosWordType = distributionJosWordType; } public void setMorphosyntacticFilter(List morphosyntacticFilter) { // change filter strings to regex patterns this.morphosyntacticFilter = new ArrayList<>(); for (String s : morphosyntacticFilter) { this.morphosyntacticFilter.add(Pattern.compile(s.replaceAll("\\*", "."))); } } public List getMsd() { return morphosyntacticFilter; } public Map getResult() { return result; } public void setTaxonomy(String taxonomy) { this.taxonomy = taxonomy; } public void setTaxonomyIsSet(boolean taxonomyIsSet) { this.taxonomyIsSet = taxonomyIsSet; } public char getJOSType() { return JOSType; } public void setJOSType(char JOSType) { this.JOSType = JOSType; } public boolean isJOSTypeSet() { return JOSTypeIsSet; } public void setJOSType(boolean JOSTypeIsSet) { this.JOSTypeIsSet = JOSTypeIsSet; } public void saveResultToDisk(int... limit) throws UnsupportedEncodingException { // Set>> stats = new HashSet<>(); // // if (useDB) { // result = db.getDump(); // db.delete(); // } // // // if no results and nothing to save, return false // if (!(result.size() > 0)) { // analysisProducedResults = false; // return; // } else { // analysisProducedResults = true; // } // // stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit)))); // Export.SetToCSV(stats); } // private Map getSortedResultInflected(Map map) { // // first convert to // Map m = Util.sortByValue(Util.atomicInt2StringAndInt(map), 0); // // Map sortedM = new TreeMap<>(); // // sortedM.putAll(m); // // return sortedM; // } private Map getSortedResult(Map map, int limit) { return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit); } public String getTaxonomy() { return taxonomy; } public boolean isTaxonomySet() { return taxonomyIsSet; } public int getnGramLevel() { return nGramLevel; } public CalculateFor getCf() { return cf; } public AnalysisLevel getAnalysisLevel() { return analysisLevel; } public CorpusType getCorpusType() { return corpusType; } public void setCorpusType(CorpusType corpusType) { this.corpusType = corpusType; } public boolean isGosOrthMode() { return gosOrthMode; } public void setGosOrthMode(boolean gosOrthMode) { this.gosOrthMode = gosOrthMode; } public Map getSolarHeadBlockFilter() { return solarHeadBlockFilter; } public void setSolarHeadBlockFilter(Map solarHeadBlockFilter) { this.solarHeadBlockFilter = solarHeadBlockFilter; } public boolean isUseDB() { return useDB; } public void setUseDB(boolean useDB) { if (useDB && db == null) { db = new RDB(); } this.useDB = useDB; } /** * Stores results from this batch to a database and clears results map */ public void storeTmpResultsToDB() { try { db.writeBatch(result); result = new ConcurrentHashMap<>(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } } public boolean isAnalysisProducedResults() { return analysisProducedResults; } }