num_words now proper dict

This commit is contained in:
Ozbolt Menegatti 2019-06-10 00:24:47 +02:00
parent 865351b3f6
commit ca0d6f0f55

View File

@ -869,7 +869,7 @@ class WordMsdRenderer:
self.all_words = [] self.all_words = []
self.rendered_words = {} self.rendered_words = {}
self.frequent_words = {} self.frequent_words = {}
self.num_words = defaultdict(int) self.num_words = {}
self.lemma_msd = {} self.lemma_msd = {}
self.lemma_features = lemma_features self.lemma_features = lemma_features
self.memoized_msd_merges = {} self.memoized_msd_merges = {}
@ -881,6 +881,7 @@ class WordMsdRenderer:
return len(self.all_words) return len(self.all_words)
def generate_renders(self): def generate_renders(self):
num_words = defaultdict(int)
data = defaultdict(lambda: defaultdict(list)) data = defaultdict(lambda: defaultdict(list))
for w in self.all_words: for w in self.all_words:
data[w.lemma][w.msd].append(w.text) data[w.lemma][w.msd].append(w.text)
@ -892,7 +893,7 @@ class WordMsdRenderer:
for msd, texts in ld.items(): for msd, texts in ld.items():
# TODO: this should be out of generate_renders... # TODO: this should be out of generate_renders...
self.num_words[(lemma, msd[0])] += len(texts) num_words[(lemma, msd[0])] += len(texts)
rep = max(set(texts), key=texts.count) rep = max(set(texts), key=texts.count)
self.rendered_words[lemma][msd] = (rep, len(texts)) self.rendered_words[lemma][msd] = (rep, len(texts))
@ -915,6 +916,8 @@ class WordMsdRenderer:
self.lemma_msd[lemma] = "".join( self.lemma_msd[lemma] = "".join(
l1 if l1 != "-" else l2 for l1, l2 in zip(lf[cmsd[0]], cmsd) l1 if l1 != "-" else l2 for l1, l2 in zip(lf[cmsd[0]], cmsd)
) )
self.num_words = dict(num_words)
def merge_msd(self, common_msd, new_msd): def merge_msd(self, common_msd, new_msd):
key = (common_msd, new_msd) key = (common_msd, new_msd)