defining formatter for --output
This commit is contained in:
parent
2a437b1703
commit
111b088c6c
32
wani.py
32
wani.py
|
@ -1048,6 +1048,38 @@ class Formatter:
|
||||||
return len(self.header_repeat())
|
return len(self.header_repeat())
|
||||||
|
|
||||||
|
|
||||||
|
class OutFormatter(Formatter):
|
||||||
|
def additional_init(self):
|
||||||
|
self.representation = ""
|
||||||
|
|
||||||
|
def header_repeat(self):
|
||||||
|
return ["Lemma", "Representative_form", "RF_scenario"]
|
||||||
|
|
||||||
|
def header_right(self):
|
||||||
|
return ["Joint_representative_form", "Frequency"]
|
||||||
|
|
||||||
|
def content_repeat(self, words, representations, idx):
|
||||||
|
word = words[idx]
|
||||||
|
if idx not in representations:
|
||||||
|
return [word.lemma, "", ""]
|
||||||
|
|
||||||
|
rep = representations[idx]
|
||||||
|
if rep is None:
|
||||||
|
self.representation += " " + word.lemma
|
||||||
|
return [word.lemma, word.lemma, "lemma_fallback"]
|
||||||
|
else:
|
||||||
|
self.representation += " " + rep
|
||||||
|
return [word.lemma, rep, "ok"]
|
||||||
|
|
||||||
|
def content_right(self, freq):
|
||||||
|
rep = re.sub(' +', ' ', self.representation.strip())
|
||||||
|
result = [rep, str(freq)]
|
||||||
|
self.representation = ""
|
||||||
|
return result
|
||||||
|
|
||||||
|
def group(self):
|
||||||
|
return True
|
||||||
|
|
||||||
class AllFormatter(Formatter):
|
class AllFormatter(Formatter):
|
||||||
def header_repeat(self):
|
def header_repeat(self):
|
||||||
return ["Token_ID", "Word_form", "Lemma", "Msd"]
|
return ["Token_ID", "Word_form", "Lemma", "Msd"]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user