implementing multiple agreements to one cid.
This commit is contained in:
parent
0249ef1523
commit
ed83b2b9c4
40
wani.py
40
wani.py
|
@ -17,7 +17,6 @@ from tqdm import tqdm
|
||||||
|
|
||||||
MAX_NUM_COMPONENTS = 5
|
MAX_NUM_COMPONENTS = 5
|
||||||
|
|
||||||
|
|
||||||
CODES = {
|
CODES = {
|
||||||
"Noun": "N",
|
"Noun": "N",
|
||||||
"Verb": "V",
|
"Verb": "V",
|
||||||
|
@ -166,10 +165,10 @@ class ComponentRepresentation:
|
||||||
|
|
||||||
self.words = []
|
self.words = []
|
||||||
self.rendition_text = None
|
self.rendition_text = None
|
||||||
self.agreement = None
|
self.agreement = []
|
||||||
|
|
||||||
def get_agreement(self):
|
def get_agreement(self):
|
||||||
return None
|
return []
|
||||||
|
|
||||||
def add_word(self, word):
|
def add_word(self, word):
|
||||||
self.words.append(word)
|
self.words.append(word)
|
||||||
|
@ -210,8 +209,13 @@ class WordFormAnyCR(ComponentRepresentation):
|
||||||
sorted_words = sorted(set(words_counter), key=lambda x: -words_counter.count(x))
|
sorted_words = sorted(set(words_counter), key=lambda x: -words_counter.count(x))
|
||||||
|
|
||||||
for word_msd, word_lemma in sorted_words:
|
for word_msd, word_lemma in sorted_words:
|
||||||
if self.agreement is not None:
|
for agr in self.agreement:
|
||||||
if self.agreement.match(word_msd):
|
if not agr.match(word_msd):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
for agr in self.agreement:
|
||||||
|
agr.confirm_match()
|
||||||
|
|
||||||
if word_lemma is None:
|
if word_lemma is None:
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
|
@ -253,6 +257,7 @@ class WordFormAgreementCR(ComponentRepresentation):
|
||||||
def __init__(self, data, word_renderer):
|
def __init__(self, data, word_renderer):
|
||||||
super().__init__(data, word_renderer)
|
super().__init__(data, word_renderer)
|
||||||
self.agree_with, self.data = self.data
|
self.agree_with, self.data = self.data
|
||||||
|
self.rendition_candidate = None
|
||||||
|
|
||||||
def get_agreement(self):
|
def get_agreement(self):
|
||||||
return self.agree_with
|
return self.agree_with
|
||||||
|
@ -269,11 +274,14 @@ class WordFormAgreementCR(ComponentRepresentation):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, agreements):
|
if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, agreements):
|
||||||
self.rendition_text = candidate_text
|
self.rendition_candidate = candidate_text
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def confirm_match(self):
|
||||||
|
self.rendition_text = self.rendition_candidate
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def check_agreement(msd1, msd2, agreements):
|
def check_agreement(msd1, msd2, agreements):
|
||||||
for agr_case in agreements:
|
for agr_case in agreements:
|
||||||
|
@ -361,10 +369,7 @@ class ComponentRendition:
|
||||||
|
|
||||||
for cid, reps in representations.items():
|
for cid, reps in representations.items():
|
||||||
for rep in reps:
|
for rep in reps:
|
||||||
agr = rep.get_agreement()
|
for agr in rep.get_agreement():
|
||||||
if agr is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if len(representations[agr]) != 1:
|
if len(representations[agr]) != 1:
|
||||||
n = len(representations[agr])
|
n = len(representations[agr])
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
|
@ -374,7 +379,7 @@ class ComponentRendition:
|
||||||
", however there are {} (!= 1) representations".format(n) +
|
", however there are {} (!= 1) representations".format(n) +
|
||||||
" of component {}!".format(agr))
|
" of component {}!".format(agr))
|
||||||
|
|
||||||
representations[agr][0].agreement = rep
|
representations[agr][0].agreement.append(rep)
|
||||||
|
|
||||||
# representations = {
|
# representations = {
|
||||||
# c.idx: [[], None] if c.representation.isit(Rendition.WordForm) else [True, ""]
|
# c.idx: [[], None] if c.representation.isit(Rendition.WordForm) else [True, ""]
|
||||||
|
@ -494,6 +499,7 @@ class ComponentRendition:
|
||||||
# if add:
|
# if add:
|
||||||
# representations[w_id][0].append(w)
|
# representations[w_id][0].append(w)
|
||||||
|
|
||||||
|
|
||||||
for cid, reps in representations.items():
|
for cid, reps in representations.items():
|
||||||
for rep in reps:
|
for rep in reps:
|
||||||
rep.render()
|
rep.render()
|
||||||
|
@ -1178,13 +1184,14 @@ class Writer:
|
||||||
def length(self):
|
def length(self):
|
||||||
return 4 if self.all else 3
|
return 4 if self.all else 3
|
||||||
|
|
||||||
def from_word(self, word, representation):
|
def from_word(self, word, representation, rep_exists):
|
||||||
if word is None:
|
if word is None:
|
||||||
return [""] * self.length()
|
return [""] * self.length()
|
||||||
elif self.all:
|
elif self.all:
|
||||||
return [word.id, word.text, word.lemma, word.msd]
|
return [word.id, word.text, word.lemma, word.msd]
|
||||||
else:
|
elif not rep_exists:
|
||||||
if representation is None:
|
return [word.lemma, "", ""]
|
||||||
|
elif representation is None:
|
||||||
return [word.lemma, word.lemma, "lemma_fallback"]
|
return [word.lemma, word.lemma, "lemma_fallback"]
|
||||||
else:
|
else:
|
||||||
return [word.lemma, representation, "ok"]
|
return [word.lemma, representation, "ok"]
|
||||||
|
@ -1218,8 +1225,9 @@ class Writer:
|
||||||
for idx, _comp in enumerate(components):
|
for idx, _comp in enumerate(components):
|
||||||
idx = str(idx + 1)
|
idx = str(idx + 1)
|
||||||
word = m[idx] if idx in m else None
|
word = m[idx] if idx in m else None
|
||||||
rep = rprsnt[idx] if idx in rprsnt else None
|
rep_exists = idx in rprsnt
|
||||||
to_write.extend(self.from_word(word, rep))
|
rep = rprsnt[idx] if rep_exists else None
|
||||||
|
to_write.extend(self.from_word(word, rep, rep_exists))
|
||||||
representation += " " + to_write[-2]
|
representation += " " + to_write[-2]
|
||||||
|
|
||||||
# make them equal size
|
# make them equal size
|
||||||
|
|
Loading…
Reference in New Issue
Block a user