fixing matching of agreements with msd
This commit is contained in:
parent
5b9859af3e
commit
5f226d0cd4
48
wani.py
48
wani.py
|
@ -186,7 +186,7 @@ class LemmaCR(ComponentRepresentation):
|
|||
|
||||
class LexisCR(ComponentRepresentation):
|
||||
def _render(self):
|
||||
return self.data
|
||||
return self.data['lexis']
|
||||
|
||||
class WordFormAllCR(ComponentRepresentation):
|
||||
def _render(self):
|
||||
|
@ -224,10 +224,13 @@ class WordFormAnyCR(ComponentRepresentation):
|
|||
class WordFormMsdCR(WordFormAnyCR):
|
||||
def __init__(self, *args):
|
||||
super().__init__(*args)
|
||||
self.backup_word = None
|
||||
self.lemma = None
|
||||
self.msd = None
|
||||
|
||||
def check_msd(self, word):
|
||||
selectors = self.data
|
||||
if 'msd' not in self.data:
|
||||
return True
|
||||
selectors = self.data['msd']
|
||||
for key, value in selectors.items():
|
||||
t = word.msd[0]
|
||||
v = TAGSET[t].index(key.lower())
|
||||
|
@ -241,39 +244,37 @@ class WordFormMsdCR(WordFormAnyCR):
|
|||
pass
|
||||
|
||||
def add_word(self, word):
|
||||
if self.backup_word is None:
|
||||
msd = self.word_renderer.get_lemma_msd(word.lemma, word.msd)
|
||||
WordLemma = namedtuple('WordLemmaOnly', 'msd most_frequent_text lemma text')
|
||||
self.backup_word = WordLemma(msd=msd, most_frequent_text=lambda *x: None, lemma=None, text=None)
|
||||
if self.lemma is None:
|
||||
self.lemma = word.lemma
|
||||
self.msd = word.msd
|
||||
|
||||
if self.check_msd(word):
|
||||
super().add_word(word)
|
||||
|
||||
def _render(self):
|
||||
self.words.append(self.backup_word)
|
||||
msd = self.word_renderer.get_lemma_msd(self.lemma, self.msd)
|
||||
WordLemma = namedtuple('WordLemmaOnly', 'msd most_frequent_text lemma text')
|
||||
backup_word = WordLemma(msd=msd, most_frequent_text=lambda *x: None, lemma=None, text=None)
|
||||
self.words.append(backup_word)
|
||||
return super()._render()
|
||||
|
||||
class WordFormAgreementCR(ComponentRepresentation):
|
||||
class WordFormAgreementCR(WordFormMsdCR):
|
||||
def __init__(self, data, word_renderer):
|
||||
super().__init__(data, word_renderer)
|
||||
self.agree_with, self.data = self.data
|
||||
self.rendition_candidate = None
|
||||
|
||||
def get_agreement(self):
|
||||
return self.agree_with
|
||||
return self.data['other']
|
||||
|
||||
def match(self, word_msd):
|
||||
word_category = self.words[0].msd[0]
|
||||
word_lemma = self.words[0].lemma
|
||||
agreements = self.data
|
||||
|
||||
existing = [(w.msd, w.text) for w in self.words]
|
||||
|
||||
for candidate_msd, candidate_text in self.word_renderer.available_words(word_lemma, existing):
|
||||
if word_category != candidate_msd[0]:
|
||||
for candidate_msd, candidate_text in self.word_renderer.available_words(self.lemma, existing):
|
||||
if self.msd[0] != candidate_msd[0]:
|
||||
continue
|
||||
|
||||
if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, agreements):
|
||||
if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, self.data['agreement']):
|
||||
self.rendition_candidate = candidate_text
|
||||
return True
|
||||
|
||||
|
@ -320,11 +321,9 @@ class WordFormAgreementCR(ComponentRepresentation):
|
|||
|
||||
class ComponentRendition:
|
||||
def __init__(self):
|
||||
self.more = None
|
||||
self.more = {}
|
||||
self.representation_factory = ComponentRepresentation
|
||||
|
||||
def _set_more(self, m):
|
||||
self.more = m
|
||||
|
||||
def add_feature(self, feature):
|
||||
if 'rendition' in feature:
|
||||
|
@ -335,21 +334,24 @@ class ComponentRendition:
|
|||
self.representation_factory = WordFormAnyCR
|
||||
elif feature['rendition'] == "lexis":
|
||||
self.representation_factory = LexisCR
|
||||
self.more = feature['string']
|
||||
self.more['lexis'] = feature['string']
|
||||
else:
|
||||
raise NotImplementedError("Representation rendition: {}".format(feature))
|
||||
|
||||
elif 'selection' in feature:
|
||||
if feature['selection'] == "msd":
|
||||
# could already be agreement
|
||||
if self.representation_factory != WordFormAgreementCR:
|
||||
self.representation_factory = WordFormMsdCR
|
||||
self.more = {k: v for k, v in feature.items() if k != 'selection'}
|
||||
self.more['msd'] = {k: v for k, v in feature.items() if k != 'selection'}
|
||||
elif feature['selection'] == "all":
|
||||
self.representation_factory = WordFormAllCR
|
||||
elif feature['selection'] == 'agreement':
|
||||
assert(feature['head'][:4] == 'cid_')
|
||||
assert(feature['msd'] is not None)
|
||||
self.representation_factory = WordFormAgreementCR
|
||||
self.more = (feature['head'][4:], feature['msd'].split('+'))
|
||||
self.more['agreement'] = feature['msd'].split('+')
|
||||
self.more['other'] = feature['head'][4:]
|
||||
else:
|
||||
raise NotImplementedError("Representation selection: {}".format(feature))
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user