From cb53a9c7b3f1ee4aa22ece37c61ba37c60e05aab Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Mon, 10 Jun 2019 10:25:42 +0200 Subject: [PATCH] moving delta_p12/21 to the end of stats formatter --- wani.py | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/wani.py b/wani.py index d3a5d2e..445e7bc 100644 --- a/wani.py +++ b/wani.py @@ -1136,16 +1136,23 @@ class StatsFormatter(Formatter): self.stats["freq"][cid] = freq + fx = self.stats['freq'][self.jppb[0]] + fy = self.stats['freq'][self.jppb[1]] + freq = len(match) + N = self.word_renderer.num_all_words() + + self.stats['delta_12'] = freq / fx - (fy - freq) / (N - fx) + self.stats['delta_21'] = freq / fy - (fx - freq) / (N - fy) + self.stats['df'] = match.distinct_forms() - self.stats['fcxy'] = len(match) + self.stats['freq_all'] = freq self.stats['n'] = len(jppb_forms) - self.stats['N'] = self.word_renderer.num_all_words() def header_repeat(self): - return ["Distribution", "Delta"] + return ["Distribution"] def header_right(self): - return ["LogDice_core", "LogDice_all", "Distinct_forms"] + return ["Delta_p12", "Delta_p21", "LogDice_core", "LogDice_all", "Distinct_forms"] def content_repeat(self, words, representations, idx, sidx): # not a core word @@ -1155,29 +1162,20 @@ class StatsFormatter(Formatter): word = words[idx] key = (sidx, idx, word.lemma) distribution = self.colocation_ids.dispersions[key] - - delta = "" - if idx in self.jppb: - idx2 = self.jppb[0] if self.jppb[0] != idx else self.jppb[1] - fx = self.stats['freq'][idx] - fy = self.stats['freq'][idx2] - fxy = self.stats['fcxy'] - N = self.stats['N'] - delta = fxy / fx - (fy - fxy) / (N - fx) - - return [str(distribution), str(delta)] + return [str(distribution)] def content_right(self, freq): fx = self.stats['freq'][self.jppb[0]] fy = self.stats['freq'][self.jppb[1]] - fxy = self.stats['fcxy'] - logdice_core = 14 + log2(2 * fxy / (fx + fy)) + freq = self.stats['freq_all'] + logdice_core = 14 + log2(2 * freq / (fx + fy)) sum_fi = sum(self.stats['freq'][idx] for idx in self.corew) - fc = fxy - logdice_all = 14 + log2(len(self.corew) * fc / sum_fi) + logdice_all = 14 + log2(len(self.corew) * freq / sum_fi) - return [str(logdice_core), str(logdice_all), str(self.stats['df'])] + dp12 = str(self.stats["delta_12"]) + dp21 = str(self.stats["delta_21"]) + return [dp12, dp21, str(logdice_core), str(logdice_all), str(self.stats['df'])] def group(self): return True