moving delta_p12/21 to the end of stats formatter

This commit is contained in:
Ozbolt Menegatti 2019-06-10 10:25:42 +02:00
parent 9ccbd02603
commit cb53a9c7b3

38
wani.py
View File

@ -1136,16 +1136,23 @@ class StatsFormatter(Formatter):
self.stats["freq"][cid] = freq
fx = self.stats['freq'][self.jppb[0]]
fy = self.stats['freq'][self.jppb[1]]
freq = len(match)
N = self.word_renderer.num_all_words()
self.stats['delta_12'] = freq / fx - (fy - freq) / (N - fx)
self.stats['delta_21'] = freq / fy - (fx - freq) / (N - fy)
self.stats['df'] = match.distinct_forms()
self.stats['fcxy'] = len(match)
self.stats['freq_all'] = freq
self.stats['n'] = len(jppb_forms)
self.stats['N'] = self.word_renderer.num_all_words()
def header_repeat(self):
return ["Distribution", "Delta"]
return ["Distribution"]
def header_right(self):
return ["LogDice_core", "LogDice_all", "Distinct_forms"]
return ["Delta_p12", "Delta_p21", "LogDice_core", "LogDice_all", "Distinct_forms"]
def content_repeat(self, words, representations, idx, sidx):
# not a core word
@ -1155,29 +1162,20 @@ class StatsFormatter(Formatter):
word = words[idx]
key = (sidx, idx, word.lemma)
distribution = self.colocation_ids.dispersions[key]
delta = ""
if idx in self.jppb:
idx2 = self.jppb[0] if self.jppb[0] != idx else self.jppb[1]
fx = self.stats['freq'][idx]
fy = self.stats['freq'][idx2]
fxy = self.stats['fcxy']
N = self.stats['N']
delta = fxy / fx - (fy - fxy) / (N - fx)
return [str(distribution), str(delta)]
return [str(distribution)]
def content_right(self, freq):
fx = self.stats['freq'][self.jppb[0]]
fy = self.stats['freq'][self.jppb[1]]
fxy = self.stats['fcxy']
logdice_core = 14 + log2(2 * fxy / (fx + fy))
freq = self.stats['freq_all']
logdice_core = 14 + log2(2 * freq / (fx + fy))
sum_fi = sum(self.stats['freq'][idx] for idx in self.corew)
fc = fxy
logdice_all = 14 + log2(len(self.corew) * fc / sum_fi)
logdice_all = 14 + log2(len(self.corew) * freq / sum_fi)
return [str(logdice_core), str(logdice_all), str(self.stats['df'])]
dp12 = str(self.stats["delta_12"])
dp21 = str(self.stats["delta_21"])
return [dp12, dp21, str(logdice_core), str(logdice_all), str(self.stats['df'])]
def group(self):
return True