fixing matching of agreements with msd
This commit is contained in:
parent
5b9859af3e
commit
5f226d0cd4
50
wani.py
50
wani.py
|
@ -186,7 +186,7 @@ class LemmaCR(ComponentRepresentation):
|
||||||
|
|
||||||
class LexisCR(ComponentRepresentation):
|
class LexisCR(ComponentRepresentation):
|
||||||
def _render(self):
|
def _render(self):
|
||||||
return self.data
|
return self.data['lexis']
|
||||||
|
|
||||||
class WordFormAllCR(ComponentRepresentation):
|
class WordFormAllCR(ComponentRepresentation):
|
||||||
def _render(self):
|
def _render(self):
|
||||||
|
@ -224,10 +224,13 @@ class WordFormAnyCR(ComponentRepresentation):
|
||||||
class WordFormMsdCR(WordFormAnyCR):
|
class WordFormMsdCR(WordFormAnyCR):
|
||||||
def __init__(self, *args):
|
def __init__(self, *args):
|
||||||
super().__init__(*args)
|
super().__init__(*args)
|
||||||
self.backup_word = None
|
self.lemma = None
|
||||||
|
self.msd = None
|
||||||
|
|
||||||
def check_msd(self, word):
|
def check_msd(self, word):
|
||||||
selectors = self.data
|
if 'msd' not in self.data:
|
||||||
|
return True
|
||||||
|
selectors = self.data['msd']
|
||||||
for key, value in selectors.items():
|
for key, value in selectors.items():
|
||||||
t = word.msd[0]
|
t = word.msd[0]
|
||||||
v = TAGSET[t].index(key.lower())
|
v = TAGSET[t].index(key.lower())
|
||||||
|
@ -241,39 +244,37 @@ class WordFormMsdCR(WordFormAnyCR):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def add_word(self, word):
|
def add_word(self, word):
|
||||||
if self.backup_word is None:
|
if self.lemma is None:
|
||||||
msd = self.word_renderer.get_lemma_msd(word.lemma, word.msd)
|
self.lemma = word.lemma
|
||||||
WordLemma = namedtuple('WordLemmaOnly', 'msd most_frequent_text lemma text')
|
self.msd = word.msd
|
||||||
self.backup_word = WordLemma(msd=msd, most_frequent_text=lambda *x: None, lemma=None, text=None)
|
|
||||||
|
|
||||||
if self.check_msd(word):
|
if self.check_msd(word):
|
||||||
super().add_word(word)
|
super().add_word(word)
|
||||||
|
|
||||||
def _render(self):
|
def _render(self):
|
||||||
self.words.append(self.backup_word)
|
msd = self.word_renderer.get_lemma_msd(self.lemma, self.msd)
|
||||||
|
WordLemma = namedtuple('WordLemmaOnly', 'msd most_frequent_text lemma text')
|
||||||
|
backup_word = WordLemma(msd=msd, most_frequent_text=lambda *x: None, lemma=None, text=None)
|
||||||
|
self.words.append(backup_word)
|
||||||
return super()._render()
|
return super()._render()
|
||||||
|
|
||||||
class WordFormAgreementCR(ComponentRepresentation):
|
class WordFormAgreementCR(WordFormMsdCR):
|
||||||
def __init__(self, data, word_renderer):
|
def __init__(self, data, word_renderer):
|
||||||
super().__init__(data, word_renderer)
|
super().__init__(data, word_renderer)
|
||||||
self.agree_with, self.data = self.data
|
|
||||||
self.rendition_candidate = None
|
self.rendition_candidate = None
|
||||||
|
|
||||||
def get_agreement(self):
|
def get_agreement(self):
|
||||||
return self.agree_with
|
return self.data['other']
|
||||||
|
|
||||||
def match(self, word_msd):
|
def match(self, word_msd):
|
||||||
word_category = self.words[0].msd[0]
|
|
||||||
word_lemma = self.words[0].lemma
|
|
||||||
agreements = self.data
|
|
||||||
|
|
||||||
existing = [(w.msd, w.text) for w in self.words]
|
existing = [(w.msd, w.text) for w in self.words]
|
||||||
|
|
||||||
for candidate_msd, candidate_text in self.word_renderer.available_words(word_lemma, existing):
|
for candidate_msd, candidate_text in self.word_renderer.available_words(self.lemma, existing):
|
||||||
if word_category != candidate_msd[0]:
|
if self.msd[0] != candidate_msd[0]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, agreements):
|
if WordFormAgreementCR.check_agreement(word_msd, candidate_msd, self.data['agreement']):
|
||||||
self.rendition_candidate = candidate_text
|
self.rendition_candidate = candidate_text
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -320,11 +321,9 @@ class WordFormAgreementCR(ComponentRepresentation):
|
||||||
|
|
||||||
class ComponentRendition:
|
class ComponentRendition:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.more = None
|
self.more = {}
|
||||||
self.representation_factory = ComponentRepresentation
|
self.representation_factory = ComponentRepresentation
|
||||||
|
|
||||||
def _set_more(self, m):
|
|
||||||
self.more = m
|
|
||||||
|
|
||||||
def add_feature(self, feature):
|
def add_feature(self, feature):
|
||||||
if 'rendition' in feature:
|
if 'rendition' in feature:
|
||||||
|
@ -335,21 +334,24 @@ class ComponentRendition:
|
||||||
self.representation_factory = WordFormAnyCR
|
self.representation_factory = WordFormAnyCR
|
||||||
elif feature['rendition'] == "lexis":
|
elif feature['rendition'] == "lexis":
|
||||||
self.representation_factory = LexisCR
|
self.representation_factory = LexisCR
|
||||||
self.more = feature['string']
|
self.more['lexis'] = feature['string']
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("Representation rendition: {}".format(feature))
|
raise NotImplementedError("Representation rendition: {}".format(feature))
|
||||||
|
|
||||||
elif 'selection' in feature:
|
elif 'selection' in feature:
|
||||||
if feature['selection'] == "msd":
|
if feature['selection'] == "msd":
|
||||||
self.representation_factory = WordFormMsdCR
|
# could already be agreement
|
||||||
self.more = {k: v for k, v in feature.items() if k != 'selection'}
|
if self.representation_factory != WordFormAgreementCR:
|
||||||
|
self.representation_factory = WordFormMsdCR
|
||||||
|
self.more['msd'] = {k: v for k, v in feature.items() if k != 'selection'}
|
||||||
elif feature['selection'] == "all":
|
elif feature['selection'] == "all":
|
||||||
self.representation_factory = WordFormAllCR
|
self.representation_factory = WordFormAllCR
|
||||||
elif feature['selection'] == 'agreement':
|
elif feature['selection'] == 'agreement':
|
||||||
assert(feature['head'][:4] == 'cid_')
|
assert(feature['head'][:4] == 'cid_')
|
||||||
assert(feature['msd'] is not None)
|
assert(feature['msd'] is not None)
|
||||||
self.representation_factory = WordFormAgreementCR
|
self.representation_factory = WordFormAgreementCR
|
||||||
self.more = (feature['head'][4:], feature['msd'].split('+'))
|
self.more['agreement'] = feature['msd'].split('+')
|
||||||
|
self.more['other'] = feature['head'][4:]
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("Representation selection: {}".format(feature))
|
raise NotImplementedError("Representation selection: {}".format(feature))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user