You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
luscenje_struktur/luscenje_struktur/lemma_features.py

26 lines
776 B

from luscenje_struktur.restriction import MorphologyRegex
def get_lemma_features(et):
lf = et.find('lemma_features')
if lf is None:
return {}
result = {}
for pos in lf.iter('POS'):
rgx_list = MorphologyRegex(pos).rgxs[0]
rgx_str = ""
for position in rgx_list:
if position == ".":
rgx_str += " "
elif len(position) == 1:
rgx_str += position
elif len(position) == 3 and position[0] == "[" and position[2] == "]":
rgx_str += position[1]
else:
raise RuntimeError("Strange rgx for lemma_feature...")
assert rgx_str[0].isupper()
result[rgx_str[0]] = rgx_str.strip().replace(' ', '-')
return result