Getters for number of lemmas and number of all words

pull/1/head
Ozbolt Menegatti 5 years ago
parent 2167e4b6fe
commit 2773a8b9e9

@ -825,6 +825,7 @@ class WordMsdRenderer:
self.all_words = [] self.all_words = []
self.rendered_words = {} self.rendered_words = {}
self.frequent_words = {} self.frequent_words = {}
self.num_words = defaultdict(int)
self.lemma_msd = {} self.lemma_msd = {}
self.lemma_features = lemma_features self.lemma_features = lemma_features
self.memoized_msd_merges = {} self.memoized_msd_merges = {}
@ -832,6 +833,9 @@ class WordMsdRenderer:
def add_words(self, words): def add_words(self, words):
self.all_words.extend(words) self.all_words.extend(words)
def num_all_words(self):
return len(self.all_words)
def generate_renders(self): def generate_renders(self):
data = defaultdict(lambda: defaultdict(list)) data = defaultdict(lambda: defaultdict(list))
for w in self.all_words: for w in self.all_words:
@ -843,6 +847,8 @@ class WordMsdRenderer:
common_msd = "*" * 10 common_msd = "*" * 10
for msd, texts in ld.items(): for msd, texts in ld.items():
self.num_words[(lemma, msd[0])] += len(texts)
rep = max(set(texts), key=texts.count) rep = max(set(texts), key=texts.count)
self.rendered_words[lemma][msd] = (rep, len(texts)) self.rendered_words[lemma][msd] = (rep, len(texts))

Loading…
Cancel
Save