HUGE refactor, creating lots of modules, no code changes though!

This commit is contained in:
2019-06-15 18:55:35 +02:00
parent 43c6c9151b
commit 90dbbca5d5
18 changed files with 1708 additions and 1544 deletions

26
src/lemma_features.py Normal file
View File

@@ -0,0 +1,26 @@
from restriction import MorphologyRegex
def get_lemma_features(et):
lf = et.find('lemma_features')
if lf is None:
return {}
result = {}
for pos in lf.iter('POS'):
rgx_list = MorphologyRegex(pos).rgx
rgx_str = ""
for position in rgx_list:
if position == ".":
rgx_str += " "
elif len(position) == 1:
rgx_str += position
elif len(position) == 3 and position[0] == "[" and position[2] == "]":
rgx_str += position[1]
else:
raise RuntimeError("Strange rgx for lemma_feature...")
assert rgx_str[0].isupper()
result[rgx_str[0]] = rgx_str.strip().replace(' ', '-')
return result