26 lines
754 B
Python
26 lines
754 B
Python
|
from restriction import MorphologyRegex
|
||
|
|
||
|
|
||
|
def get_lemma_features(et):
|
||
|
lf = et.find('lemma_features')
|
||
|
if lf is None:
|
||
|
return {}
|
||
|
|
||
|
result = {}
|
||
|
for pos in lf.iter('POS'):
|
||
|
rgx_list = MorphologyRegex(pos).rgx
|
||
|
rgx_str = ""
|
||
|
for position in rgx_list:
|
||
|
if position == ".":
|
||
|
rgx_str += " "
|
||
|
elif len(position) == 1:
|
||
|
rgx_str += position
|
||
|
elif len(position) == 3 and position[0] == "[" and position[2] == "]":
|
||
|
rgx_str += position[1]
|
||
|
else:
|
||
|
raise RuntimeError("Strange rgx for lemma_feature...")
|
||
|
|
||
|
assert rgx_str[0].isupper()
|
||
|
result[rgx_str[0]] = rgx_str.strip().replace(' ', '-')
|
||
|
|
||
|
return result
|