Renaming src to luscenje struktur
This commit is contained in:
26
luscenje_struktur/lemma_features.py
Normal file
26
luscenje_struktur/lemma_features.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from restriction import MorphologyRegex
|
||||
|
||||
|
||||
def get_lemma_features(et):
|
||||
lf = et.find('lemma_features')
|
||||
if lf is None:
|
||||
return {}
|
||||
|
||||
result = {}
|
||||
for pos in lf.iter('POS'):
|
||||
rgx_list = MorphologyRegex(pos).rgxs[0]
|
||||
rgx_str = ""
|
||||
for position in rgx_list:
|
||||
if position == ".":
|
||||
rgx_str += " "
|
||||
elif len(position) == 1:
|
||||
rgx_str += position
|
||||
elif len(position) == 3 and position[0] == "[" and position[2] == "]":
|
||||
rgx_str += position[1]
|
||||
else:
|
||||
raise RuntimeError("Strange rgx for lemma_feature...")
|
||||
|
||||
assert rgx_str[0].isupper()
|
||||
result[rgx_str[0]] = rgx_str.strip().replace(' ', '-')
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user