83 lines
3.7 KiB
Python
83 lines
3.7 KiB
Python
from representation import ComponentRepresentation, LemmaCR, LexisCR, WordFormAgreementCR, WordFormAnyCR, WordFormMsdCR, WordFormAllCR
|
|
|
|
class RepresentationAssigner:
|
|
def __init__(self):
|
|
self.more = {}
|
|
self.representation_factory = ComponentRepresentation
|
|
|
|
def add_feature(self, feature):
|
|
if 'rendition' in feature:
|
|
if feature['rendition'] == "lemma":
|
|
self.representation_factory = LemmaCR
|
|
elif feature['rendition'] == "word_form":
|
|
# just by default, changes with selection
|
|
self.representation_factory = WordFormAnyCR
|
|
elif feature['rendition'] == "lexis":
|
|
self.representation_factory = LexisCR
|
|
self.more['lexis'] = feature['string']
|
|
else:
|
|
raise NotImplementedError("Representation rendition: {}".format(feature))
|
|
|
|
elif 'selection' in feature:
|
|
if feature['selection'] == "msd":
|
|
# could already be agreement
|
|
if self.representation_factory != WordFormAgreementCR:
|
|
self.representation_factory = WordFormMsdCR
|
|
self.more['msd'] = {k: v for k, v in feature.items() if k != 'selection'}
|
|
elif feature['selection'] == "all":
|
|
self.representation_factory = WordFormAllCR
|
|
elif feature['selection'] == 'agreement':
|
|
assert feature['head'][:4] == 'cid_'
|
|
assert feature['msd'] is not None
|
|
self.representation_factory = WordFormAgreementCR
|
|
self.more['agreement'] = feature['msd'].split('+')
|
|
self.more['other'] = feature['head'][4:]
|
|
else:
|
|
raise NotImplementedError("Representation selection: {}".format(feature))
|
|
|
|
def cr_instance(self, word_renderer):
|
|
return self.representation_factory(self.more, word_renderer)
|
|
|
|
@staticmethod
|
|
def set_representations(match, word_renderer):
|
|
representations = {}
|
|
for c in match.structure.components:
|
|
representations[c.idx] = []
|
|
for rep in c.representation:
|
|
representations[c.idx].append(rep.cr_instance(word_renderer))
|
|
|
|
for cid, reps in representations.items():
|
|
for rep in reps:
|
|
for agr in rep.get_agreement():
|
|
if len(representations[agr]) != 1:
|
|
n = len(representations[agr])
|
|
raise NotImplementedError(
|
|
"Structure {}: ".format(match.structure.id) +
|
|
"component {} has agreement".format(cid) +
|
|
" with component {}".format(agr) +
|
|
", however there are {} (!= 1) representations".format(n) +
|
|
" of component {}!".format(agr))
|
|
|
|
representations[agr][0].agreement.append(rep)
|
|
|
|
for words in match.matches:
|
|
# first pass, check everything but agreements
|
|
for w_id, w in words.items():
|
|
component = match.structure.get_component(w_id)
|
|
component_representations = representations[component.idx]
|
|
for representation in component_representations:
|
|
representation.add_word(w)
|
|
|
|
for cid, reps in representations.items():
|
|
for rep in reps:
|
|
rep.render()
|
|
|
|
for cid, reps in representations.items():
|
|
reps = [rep.rendition_text for rep in reps]
|
|
if reps == []:
|
|
pass
|
|
elif all(r is None for r in reps):
|
|
match.representations[cid] = None
|
|
else:
|
|
match.representations[cid] = " ".join(("" if r is None else r) for r in reps)
|