diff --git a/wani.py b/wani.py index 8195b89..5f4e0e2 100644 --- a/wani.py +++ b/wani.py @@ -1048,6 +1048,38 @@ class Formatter: return len(self.header_repeat()) +class OutFormatter(Formatter): + def additional_init(self): + self.representation = "" + + def header_repeat(self): + return ["Lemma", "Representative_form", "RF_scenario"] + + def header_right(self): + return ["Joint_representative_form", "Frequency"] + + def content_repeat(self, words, representations, idx): + word = words[idx] + if idx not in representations: + return [word.lemma, "", ""] + + rep = representations[idx] + if rep is None: + self.representation += " " + word.lemma + return [word.lemma, word.lemma, "lemma_fallback"] + else: + self.representation += " " + rep + return [word.lemma, rep, "ok"] + + def content_right(self, freq): + rep = re.sub(' +', ' ', self.representation.strip()) + result = [rep, str(freq)] + self.representation = "" + return result + + def group(self): + return True + class AllFormatter(Formatter): def header_repeat(self): return ["Token_ID", "Word_form", "Lemma", "Msd"]