defining formatter for --output

This commit is contained in:
Ozbolt Menegatti 2019-06-09 13:33:03 +02:00
parent 2a437b1703
commit 111b088c6c

32
wani.py
View File

@ -1048,6 +1048,38 @@ class Formatter:
return len(self.header_repeat())
class OutFormatter(Formatter):
def additional_init(self):
self.representation = ""
def header_repeat(self):
return ["Lemma", "Representative_form", "RF_scenario"]
def header_right(self):
return ["Joint_representative_form", "Frequency"]
def content_repeat(self, words, representations, idx):
word = words[idx]
if idx not in representations:
return [word.lemma, "", ""]
rep = representations[idx]
if rep is None:
self.representation += " " + word.lemma
return [word.lemma, word.lemma, "lemma_fallback"]
else:
self.representation += " " + rep
return [word.lemma, rep, "ok"]
def content_right(self, freq):
rep = re.sub(' +', ' ', self.representation.strip())
result = [rep, str(freq)]
self.representation = ""
return result
def group(self):
return True
class AllFormatter(Formatter):
def header_repeat(self):
return ["Token_ID", "Word_form", "Lemma", "Msd"]