Fixing logdice all stat (and mini refactoring)

This commit is contained in:
Ozbolt Menegatti 2019-06-11 09:22:25 +02:00
parent d30f8c1980
commit ad0f9b0956

23
wani.py
View File

@ -1133,7 +1133,6 @@ class StatsFormatter(Formatter):
self.corew = tuple(corew)
def new_match(self, match):
jppb_forms = set()
self.stats = {"freq": {}}
for cid in self.corew:
@ -1145,17 +1144,16 @@ class StatsFormatter(Formatter):
self.stats["freq"][cid] = freq
fx = self.stats['freq'][self.jppb[0]]
fy = self.stats['freq'][self.jppb[1]]
fx = self.stats["freq"][self.jppb[0]]
fy = self.stats["freq"][self.jppb[1]]
freq = len(match)
N = self.word_renderer.num_all_words()
self.stats['delta_12'] = freq / fx - (fy - freq) / (N - fx)
self.stats['delta_21'] = freq / fy - (fx - freq) / (N - fy)
self.stats['d12'] = freq / fx - (fy - freq) / (N - fx)
self.stats['d21'] = freq / fy - (fx - freq) / (N - fy)
self.stats['df'] = match.distinct_forms()
self.stats['freq_all'] = freq
self.stats['n'] = len(jppb_forms)
def header_repeat(self):
return ["Distribution"]
@ -1174,18 +1172,17 @@ class StatsFormatter(Formatter):
return [self.stat_str(distribution)]
def content_right(self, freq):
fx = self.stats['freq'][self.jppb[0]]
fy = self.stats['freq'][self.jppb[1]]
fx = self.stats["freq"][self.jppb[0]]
fy = self.stats["freq"][self.jppb[1]]
freq = self.stats['freq_all']
logdice_core = 14 + log2(2 * freq / (fx + fy))
sum_fi = sum(self.stats['freq'][idx] for idx in self.corew)
logdice_all = 14 + log2(len(self.corew) * freq / sum_fi)
fi = [self.stats["freq"][idx] for idx in self.corew]
fi = [f for f in fi if f > 0]
logdice_all = 14 + log2(len(fi) * freq / sum(fi))
dp12 = self.stats["delta_12"]
dp21 = self.stats["delta_21"]
return [self.stat_str(x) for x in (
dp12, dp21, logdice_core, logdice_all, self.stats['df']
self.stats["d12"], self.stats["d21"], logdice_core, logdice_all, self.stats['df']
)]
def group(self):