implementing multiple agreements to one cid.

This commit is contained in:
Ozbolt Menegatti 2019-06-01 10:36:28 +02:00
parent 0249ef1523
commit ed83b2b9c4

68
wani.py
View File

@ -17,7 +17,6 @@ from tqdm import tqdm
MAX_NUM_COMPONENTS = 5
CODES = {
"Noun": "N",
"Verb": "V",
@ -166,10 +165,10 @@ class ComponentRepresentation:
self.words = []
self.rendition_text = None
self.agreement = None
self.agreement = []
def get_agreement(self):
return None
return []
def add_word(self, word):
self.words.append(word)
@ -210,12 +209,17 @@ class WordFormAnyCR(ComponentRepresentation):
sorted_words = sorted(set(words_counter), key=lambda x: -words_counter.count(x))
for word_msd, word_lemma in sorted_words:
if self.agreement is not None:
if self.agreement.match(word_msd):
if word_lemma is None:
return None
else:
return text_forms[(word_msd, word_lemma)]
for agr in self.agreement:
if not agr.match(word_msd):
break
else:
for agr in self.agreement:
agr.confirm_match()
if word_lemma is None:
return None
else:
return text_forms[(word_msd, word_lemma)]
class WordFormMsdCR(WordFormAnyCR):
def __init__(self, *args):
@ -253,6 +257,7 @@ class WordFormAgreementCR(ComponentRepresentation):
def __init__(self, data, word_renderer):
super().__init__(data, word_renderer)
self.agree_with, self.data = self.data
self.rendition_candidate = None
def get_agreement(self):
return self.agree_with
@ -269,10 +274,13 @@ class WordFormAgreementCR(ComponentRepresentation):
continue
if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, agreements):
self.rendition_text = candidate_text
self.rendition_candidate = candidate_text
return True
return False
def confirm_match(self):
self.rendition_text = self.rendition_candidate
@staticmethod
def check_agreement(msd1, msd2, agreements):
@ -361,20 +369,17 @@ class ComponentRendition:
for cid, reps in representations.items():
for rep in reps:
agr = rep.get_agreement()
if agr is None:
continue
for agr in rep.get_agreement():
if len(representations[agr]) != 1:
n = len(representations[agr])
raise NotImplementedError(
"Structure {}: ".format(structure.id) +
"component {} has agreement".format(cid) +
" with component {}".format(agr) +
", however there are {} (!= 1) representations".format(n) +
" of component {}!".format(agr))
if len(representations[agr]) != 1:
n = len(representations[agr])
raise NotImplementedError(
"Structure {}: ".format(structure.id) +
"component {} has agreement".format(cid) +
" with component {}".format(agr) +
", however there are {} (!= 1) representations".format(n) +
" of component {}!".format(agr))
representations[agr][0].agreement = rep
representations[agr][0].agreement.append(rep)
# representations = {
# c.idx: [[], None] if c.representation.isit(Rendition.WordForm) else [True, ""]
@ -494,6 +499,7 @@ class ComponentRendition:
# if add:
# representations[w_id][0].append(w)
for cid, reps in representations.items():
for rep in reps:
rep.render()
@ -1178,16 +1184,17 @@ class Writer:
def length(self):
return 4 if self.all else 3
def from_word(self, word, representation):
def from_word(self, word, representation, rep_exists):
if word is None:
return [""] * self.length()
elif self.all:
return [word.id, word.text, word.lemma, word.msd]
elif not rep_exists:
return [word.lemma, "", ""]
elif representation is None:
return [word.lemma, word.lemma, "lemma_fallback"]
else:
if representation is None:
return [word.lemma, word.lemma, "lemma_fallback"]
else:
return [word.lemma, representation, "ok"]
return [word.lemma, representation, "ok"]
def sorted_rows(self, rows):
if self.sort_by < 0 or len(rows) < 2:
@ -1218,8 +1225,9 @@ class Writer:
for idx, _comp in enumerate(components):
idx = str(idx + 1)
word = m[idx] if idx in m else None
rep = rprsnt[idx] if idx in rprsnt else None
to_write.extend(self.from_word(word, rep))
rep_exists = idx in rprsnt
rep = rprsnt[idx] if rep_exists else None
to_write.extend(self.from_word(word, rep, rep_exists))
representation += " " + to_write[-2]
# make them equal size