fixing matching of agreements with msd

This commit is contained in:
Ozbolt Menegatti 2019-06-02 12:53:16 +02:00
parent 5b9859af3e
commit 5f226d0cd4

48
wani.py
View File

@ -186,7 +186,7 @@ class LemmaCR(ComponentRepresentation):
class LexisCR(ComponentRepresentation): class LexisCR(ComponentRepresentation):
def _render(self): def _render(self):
return self.data return self.data['lexis']
class WordFormAllCR(ComponentRepresentation): class WordFormAllCR(ComponentRepresentation):
def _render(self): def _render(self):
@ -224,10 +224,13 @@ class WordFormAnyCR(ComponentRepresentation):
class WordFormMsdCR(WordFormAnyCR): class WordFormMsdCR(WordFormAnyCR):
def __init__(self, *args): def __init__(self, *args):
super().__init__(*args) super().__init__(*args)
self.backup_word = None self.lemma = None
self.msd = None
def check_msd(self, word): def check_msd(self, word):
selectors = self.data if 'msd' not in self.data:
return True
selectors = self.data['msd']
for key, value in selectors.items(): for key, value in selectors.items():
t = word.msd[0] t = word.msd[0]
v = TAGSET[t].index(key.lower()) v = TAGSET[t].index(key.lower())
@ -241,39 +244,37 @@ class WordFormMsdCR(WordFormAnyCR):
pass pass
def add_word(self, word): def add_word(self, word):
if self.backup_word is None: if self.lemma is None:
msd = self.word_renderer.get_lemma_msd(word.lemma, word.msd) self.lemma = word.lemma
WordLemma = namedtuple('WordLemmaOnly', 'msd most_frequent_text lemma text') self.msd = word.msd
self.backup_word = WordLemma(msd=msd, most_frequent_text=lambda *x: None, lemma=None, text=None)
if self.check_msd(word): if self.check_msd(word):
super().add_word(word) super().add_word(word)
def _render(self): def _render(self):
self.words.append(self.backup_word) msd = self.word_renderer.get_lemma_msd(self.lemma, self.msd)
WordLemma = namedtuple('WordLemmaOnly', 'msd most_frequent_text lemma text')
backup_word = WordLemma(msd=msd, most_frequent_text=lambda *x: None, lemma=None, text=None)
self.words.append(backup_word)
return super()._render() return super()._render()
class WordFormAgreementCR(ComponentRepresentation): class WordFormAgreementCR(WordFormMsdCR):
def __init__(self, data, word_renderer): def __init__(self, data, word_renderer):
super().__init__(data, word_renderer) super().__init__(data, word_renderer)
self.agree_with, self.data = self.data
self.rendition_candidate = None self.rendition_candidate = None
def get_agreement(self): def get_agreement(self):
return self.agree_with return self.data['other']
def match(self, word_msd): def match(self, word_msd):
word_category = self.words[0].msd[0]
word_lemma = self.words[0].lemma
agreements = self.data
existing = [(w.msd, w.text) for w in self.words] existing = [(w.msd, w.text) for w in self.words]
for candidate_msd, candidate_text in self.word_renderer.available_words(word_lemma, existing): for candidate_msd, candidate_text in self.word_renderer.available_words(self.lemma, existing):
if word_category != candidate_msd[0]: if self.msd[0] != candidate_msd[0]:
continue continue
if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, agreements): if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, self.data['agreement']):
self.rendition_candidate = candidate_text self.rendition_candidate = candidate_text
return True return True
@ -320,11 +321,9 @@ class WordFormAgreementCR(ComponentRepresentation):
class ComponentRendition: class ComponentRendition:
def __init__(self): def __init__(self):
self.more = None self.more = {}
self.representation_factory = ComponentRepresentation self.representation_factory = ComponentRepresentation
def _set_more(self, m):
self.more = m
def add_feature(self, feature): def add_feature(self, feature):
if 'rendition' in feature: if 'rendition' in feature:
@ -335,21 +334,24 @@ class ComponentRendition:
self.representation_factory = WordFormAnyCR self.representation_factory = WordFormAnyCR
elif feature['rendition'] == "lexis": elif feature['rendition'] == "lexis":
self.representation_factory = LexisCR self.representation_factory = LexisCR
self.more = feature['string'] self.more['lexis'] = feature['string']
else: else:
raise NotImplementedError("Representation rendition: {}".format(feature)) raise NotImplementedError("Representation rendition: {}".format(feature))
elif 'selection' in feature: elif 'selection' in feature:
if feature['selection'] == "msd": if feature['selection'] == "msd":
# could already be agreement
if self.representation_factory != WordFormAgreementCR:
self.representation_factory = WordFormMsdCR self.representation_factory = WordFormMsdCR
self.more = {k: v for k, v in feature.items() if k != 'selection'} self.more['msd'] = {k: v for k, v in feature.items() if k != 'selection'}
elif feature['selection'] == "all": elif feature['selection'] == "all":
self.representation_factory = WordFormAllCR self.representation_factory = WordFormAllCR
elif feature['selection'] == 'agreement': elif feature['selection'] == 'agreement':
assert(feature['head'][:4] == 'cid_') assert(feature['head'][:4] == 'cid_')
assert(feature['msd'] is not None) assert(feature['msd'] is not None)
self.representation_factory = WordFormAgreementCR self.representation_factory = WordFormAgreementCR
self.more = (feature['head'][4:], feature['msd'].split('+')) self.more['agreement'] = feature['msd'].split('+')
self.more['other'] = feature['head'][4:]
else: else:
raise NotImplementedError("Representation selection: {}".format(feature)) raise NotImplementedError("Representation selection: {}".format(feature))