From 2773a8b9e979572a594c8bc38264f1b356514bc6 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Sat, 8 Jun 2019 11:25:00 +0200 Subject: [PATCH] Getters for number of lemmas and number of all words --- wani.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/wani.py b/wani.py index 9cfedd6..a8ead9b 100644 --- a/wani.py +++ b/wani.py @@ -825,6 +825,7 @@ class WordMsdRenderer: self.all_words = [] self.rendered_words = {} self.frequent_words = {} + self.num_words = defaultdict(int) self.lemma_msd = {} self.lemma_features = lemma_features self.memoized_msd_merges = {} @@ -832,6 +833,9 @@ class WordMsdRenderer: def add_words(self, words): self.all_words.extend(words) + def num_all_words(self): + return len(self.all_words) + def generate_renders(self): data = defaultdict(lambda: defaultdict(list)) for w in self.all_words: @@ -843,6 +847,8 @@ class WordMsdRenderer: common_msd = "*" * 10 for msd, texts in ld.items(): + self.num_words[(lemma, msd[0])] += len(texts) + rep = max(set(texts), key=texts.count) self.rendered_words[lemma][msd] = (rep, len(texts))