From ad0f9b09568a019abb32743da70db466fcdfc6c4 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Tue, 11 Jun 2019 09:22:25 +0200 Subject: [PATCH] Fixing logdice all stat (and mini refactoring) --- wani.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/wani.py b/wani.py index 76def42..dfdcd5e 100644 --- a/wani.py +++ b/wani.py @@ -1133,7 +1133,6 @@ class StatsFormatter(Formatter): self.corew = tuple(corew) def new_match(self, match): - jppb_forms = set() self.stats = {"freq": {}} for cid in self.corew: @@ -1145,17 +1144,16 @@ class StatsFormatter(Formatter): self.stats["freq"][cid] = freq - fx = self.stats['freq'][self.jppb[0]] - fy = self.stats['freq'][self.jppb[1]] + fx = self.stats["freq"][self.jppb[0]] + fy = self.stats["freq"][self.jppb[1]] freq = len(match) N = self.word_renderer.num_all_words() - self.stats['delta_12'] = freq / fx - (fy - freq) / (N - fx) - self.stats['delta_21'] = freq / fy - (fx - freq) / (N - fy) + self.stats['d12'] = freq / fx - (fy - freq) / (N - fx) + self.stats['d21'] = freq / fy - (fx - freq) / (N - fy) self.stats['df'] = match.distinct_forms() self.stats['freq_all'] = freq - self.stats['n'] = len(jppb_forms) def header_repeat(self): return ["Distribution"] @@ -1174,18 +1172,17 @@ class StatsFormatter(Formatter): return [self.stat_str(distribution)] def content_right(self, freq): - fx = self.stats['freq'][self.jppb[0]] - fy = self.stats['freq'][self.jppb[1]] + fx = self.stats["freq"][self.jppb[0]] + fy = self.stats["freq"][self.jppb[1]] freq = self.stats['freq_all'] logdice_core = 14 + log2(2 * freq / (fx + fy)) - sum_fi = sum(self.stats['freq'][idx] for idx in self.corew) - logdice_all = 14 + log2(len(self.corew) * freq / sum_fi) + fi = [self.stats["freq"][idx] for idx in self.corew] + fi = [f for f in fi if f > 0] + logdice_all = 14 + log2(len(fi) * freq / sum(fi)) - dp12 = self.stats["delta_12"] - dp21 = self.stats["delta_21"] return [self.stat_str(x) for x in ( - dp12, dp21, logdice_core, logdice_all, self.stats['df'] + self.stats["d12"], self.stats["d21"], logdice_core, logdice_all, self.stats['df'] )] def group(self):