luscenje_struktur/src/lemma_features.py

26 lines
754 B
Python

from restriction import MorphologyRegex
def get_lemma_features(et):
lf = et.find('lemma_features')
if lf is None:
return {}
result = {}
for pos in lf.iter('POS'):
rgx_list = MorphologyRegex(pos).rgx
rgx_str = ""
for position in rgx_list:
if position == ".":
rgx_str += " "
elif len(position) == 1:
rgx_str += position
elif len(position) == 3 and position[0] == "[" and position[2] == "]":
rgx_str += position[1]
else:
raise RuntimeError("Strange rgx for lemma_feature...")
assert rgx_str[0].isupper()
result[rgx_str[0]] = rgx_str.strip().replace(' ', '-')
return result