implementing multiple agreements to one cid.
This commit is contained in:
parent
0249ef1523
commit
ed83b2b9c4
40
wani.py
40
wani.py
|
@ -17,7 +17,6 @@ from tqdm import tqdm
|
|||
|
||||
MAX_NUM_COMPONENTS = 5
|
||||
|
||||
|
||||
CODES = {
|
||||
"Noun": "N",
|
||||
"Verb": "V",
|
||||
|
@ -166,10 +165,10 @@ class ComponentRepresentation:
|
|||
|
||||
self.words = []
|
||||
self.rendition_text = None
|
||||
self.agreement = None
|
||||
self.agreement = []
|
||||
|
||||
def get_agreement(self):
|
||||
return None
|
||||
return []
|
||||
|
||||
def add_word(self, word):
|
||||
self.words.append(word)
|
||||
|
@ -210,8 +209,13 @@ class WordFormAnyCR(ComponentRepresentation):
|
|||
sorted_words = sorted(set(words_counter), key=lambda x: -words_counter.count(x))
|
||||
|
||||
for word_msd, word_lemma in sorted_words:
|
||||
if self.agreement is not None:
|
||||
if self.agreement.match(word_msd):
|
||||
for agr in self.agreement:
|
||||
if not agr.match(word_msd):
|
||||
break
|
||||
else:
|
||||
for agr in self.agreement:
|
||||
agr.confirm_match()
|
||||
|
||||
if word_lemma is None:
|
||||
return None
|
||||
else:
|
||||
|
@ -253,6 +257,7 @@ class WordFormAgreementCR(ComponentRepresentation):
|
|||
def __init__(self, data, word_renderer):
|
||||
super().__init__(data, word_renderer)
|
||||
self.agree_with, self.data = self.data
|
||||
self.rendition_candidate = None
|
||||
|
||||
def get_agreement(self):
|
||||
return self.agree_with
|
||||
|
@ -269,11 +274,14 @@ class WordFormAgreementCR(ComponentRepresentation):
|
|||
continue
|
||||
|
||||
if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, agreements):
|
||||
self.rendition_text = candidate_text
|
||||
self.rendition_candidate = candidate_text
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def confirm_match(self):
|
||||
self.rendition_text = self.rendition_candidate
|
||||
|
||||
@staticmethod
|
||||
def check_agreement(msd1, msd2, agreements):
|
||||
for agr_case in agreements:
|
||||
|
@ -361,10 +369,7 @@ class ComponentRendition:
|
|||
|
||||
for cid, reps in representations.items():
|
||||
for rep in reps:
|
||||
agr = rep.get_agreement()
|
||||
if agr is None:
|
||||
continue
|
||||
|
||||
for agr in rep.get_agreement():
|
||||
if len(representations[agr]) != 1:
|
||||
n = len(representations[agr])
|
||||
raise NotImplementedError(
|
||||
|
@ -374,7 +379,7 @@ class ComponentRendition:
|
|||
", however there are {} (!= 1) representations".format(n) +
|
||||
" of component {}!".format(agr))
|
||||
|
||||
representations[agr][0].agreement = rep
|
||||
representations[agr][0].agreement.append(rep)
|
||||
|
||||
# representations = {
|
||||
# c.idx: [[], None] if c.representation.isit(Rendition.WordForm) else [True, ""]
|
||||
|
@ -494,6 +499,7 @@ class ComponentRendition:
|
|||
# if add:
|
||||
# representations[w_id][0].append(w)
|
||||
|
||||
|
||||
for cid, reps in representations.items():
|
||||
for rep in reps:
|
||||
rep.render()
|
||||
|
@ -1178,13 +1184,14 @@ class Writer:
|
|||
def length(self):
|
||||
return 4 if self.all else 3
|
||||
|
||||
def from_word(self, word, representation):
|
||||
def from_word(self, word, representation, rep_exists):
|
||||
if word is None:
|
||||
return [""] * self.length()
|
||||
elif self.all:
|
||||
return [word.id, word.text, word.lemma, word.msd]
|
||||
else:
|
||||
if representation is None:
|
||||
elif not rep_exists:
|
||||
return [word.lemma, "", ""]
|
||||
elif representation is None:
|
||||
return [word.lemma, word.lemma, "lemma_fallback"]
|
||||
else:
|
||||
return [word.lemma, representation, "ok"]
|
||||
|
@ -1218,8 +1225,9 @@ class Writer:
|
|||
for idx, _comp in enumerate(components):
|
||||
idx = str(idx + 1)
|
||||
word = m[idx] if idx in m else None
|
||||
rep = rprsnt[idx] if idx in rprsnt else None
|
||||
to_write.extend(self.from_word(word, rep))
|
||||
rep_exists = idx in rprsnt
|
||||
rep = rprsnt[idx] if rep_exists else None
|
||||
to_write.extend(self.from_word(word, rep, rep_exists))
|
||||
representation += " " + to_write[-2]
|
||||
|
||||
# make them equal size
|
||||
|
|
Loading…
Reference in New Issue
Block a user