From 5f226d0cd4049babcb0242bdbd614d870a6269de Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Sun, 2 Jun 2019 12:53:16 +0200 Subject: [PATCH] fixing matching of agreements with msd --- wani.py | 50 ++++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/wani.py b/wani.py index 036e05d..4043669 100644 --- a/wani.py +++ b/wani.py @@ -186,7 +186,7 @@ class LemmaCR(ComponentRepresentation): class LexisCR(ComponentRepresentation): def _render(self): - return self.data + return self.data['lexis'] class WordFormAllCR(ComponentRepresentation): def _render(self): @@ -224,10 +224,13 @@ class WordFormAnyCR(ComponentRepresentation): class WordFormMsdCR(WordFormAnyCR): def __init__(self, *args): super().__init__(*args) - self.backup_word = None + self.lemma = None + self.msd = None def check_msd(self, word): - selectors = self.data + if 'msd' not in self.data: + return True + selectors = self.data['msd'] for key, value in selectors.items(): t = word.msd[0] v = TAGSET[t].index(key.lower()) @@ -241,39 +244,37 @@ class WordFormMsdCR(WordFormAnyCR): pass def add_word(self, word): - if self.backup_word is None: - msd = self.word_renderer.get_lemma_msd(word.lemma, word.msd) - WordLemma = namedtuple('WordLemmaOnly', 'msd most_frequent_text lemma text') - self.backup_word = WordLemma(msd=msd, most_frequent_text=lambda *x: None, lemma=None, text=None) + if self.lemma is None: + self.lemma = word.lemma + self.msd = word.msd if self.check_msd(word): super().add_word(word) def _render(self): - self.words.append(self.backup_word) + msd = self.word_renderer.get_lemma_msd(self.lemma, self.msd) + WordLemma = namedtuple('WordLemmaOnly', 'msd most_frequent_text lemma text') + backup_word = WordLemma(msd=msd, most_frequent_text=lambda *x: None, lemma=None, text=None) + self.words.append(backup_word) return super()._render() -class WordFormAgreementCR(ComponentRepresentation): +class WordFormAgreementCR(WordFormMsdCR): def __init__(self, data, word_renderer): super().__init__(data, word_renderer) - self.agree_with, self.data = self.data self.rendition_candidate = None def get_agreement(self): - return self.agree_with + return self.data['other'] def match(self, word_msd): - word_category = self.words[0].msd[0] - word_lemma = self.words[0].lemma - agreements = self.data existing = [(w.msd, w.text) for w in self.words] - for candidate_msd, candidate_text in self.word_renderer.available_words(word_lemma, existing): - if word_category != candidate_msd[0]: + for candidate_msd, candidate_text in self.word_renderer.available_words(self.lemma, existing): + if self.msd[0] != candidate_msd[0]: continue - if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, agreements): + if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, self.data['agreement']): self.rendition_candidate = candidate_text return True @@ -320,11 +321,9 @@ class WordFormAgreementCR(ComponentRepresentation): class ComponentRendition: def __init__(self): - self.more = None + self.more = {} self.representation_factory = ComponentRepresentation - def _set_more(self, m): - self.more = m def add_feature(self, feature): if 'rendition' in feature: @@ -335,21 +334,24 @@ class ComponentRendition: self.representation_factory = WordFormAnyCR elif feature['rendition'] == "lexis": self.representation_factory = LexisCR - self.more = feature['string'] + self.more['lexis'] = feature['string'] else: raise NotImplementedError("Representation rendition: {}".format(feature)) elif 'selection' in feature: if feature['selection'] == "msd": - self.representation_factory = WordFormMsdCR - self.more = {k: v for k, v in feature.items() if k != 'selection'} + # could already be agreement + if self.representation_factory != WordFormAgreementCR: + self.representation_factory = WordFormMsdCR + self.more['msd'] = {k: v for k, v in feature.items() if k != 'selection'} elif feature['selection'] == "all": self.representation_factory = WordFormAllCR elif feature['selection'] == 'agreement': assert(feature['head'][:4] == 'cid_') assert(feature['msd'] is not None) self.representation_factory = WordFormAgreementCR - self.more = (feature['head'][4:], feature['msd'].split('+')) + self.more['agreement'] = feature['msd'].split('+') + self.more['other'] = feature['head'][4:] else: raise NotImplementedError("Representation selection: {}".format(feature))