Simplifying and also improving the speed (less regex comparisons!)
This commit is contained in:
parent
09bdd0fe3f
commit
43c6c9151b
28
wani.py
28
wani.py
|
@ -109,21 +109,6 @@ TAGSET = {
|
|||
"X": ['type']
|
||||
}
|
||||
|
||||
CATEGORY_BASES = {
|
||||
"N": ['.'] * 5,
|
||||
"V": ['.'] * 7,
|
||||
"A": ['.'] * 6,
|
||||
"R": ['.'] * 2,
|
||||
"P": ['.'] * 6,
|
||||
"M": ['.'] * 6,
|
||||
"S": ['.'] * 1,
|
||||
"C": ['.'] * 1,
|
||||
"Q": [],
|
||||
"I": [],
|
||||
"Y": [],
|
||||
"X": ['.'] * 1
|
||||
}
|
||||
|
||||
class ComponentType(Enum):
|
||||
Other = 0
|
||||
Core = 2
|
||||
|
@ -431,7 +416,9 @@ def determine_ppb(rgx):
|
|||
if rgx[0] in ("A", "N", "R"):
|
||||
return 0
|
||||
elif rgx[0] == "V":
|
||||
if 'a' in rgx[1]:
|
||||
if len(rgx) == 1:
|
||||
return 2
|
||||
elif 'a' in rgx[1]:
|
||||
return 3
|
||||
elif 'm' in rgx[1]:
|
||||
return 1
|
||||
|
@ -461,7 +448,7 @@ class MorphologyRegex:
|
|||
assert 'POS' in restr_dict
|
||||
category = restr_dict['POS'][0].capitalize()
|
||||
cat_code = CODES[category]
|
||||
rgx = [cat_code] + CATEGORY_BASES[cat_code]
|
||||
rgx = [cat_code] + ['.'] * 10
|
||||
|
||||
del restr_dict['POS']
|
||||
|
||||
|
@ -480,6 +467,13 @@ class MorphologyRegex:
|
|||
if typ:
|
||||
self.min_msd_length = max(index + 1, self.min_msd_length)
|
||||
|
||||
# strip rgx
|
||||
for i in reversed(range(len(rgx))):
|
||||
if rgx[i] == '.':
|
||||
rgx = rgx[:-1]
|
||||
else:
|
||||
break
|
||||
|
||||
self.re_objects = [re.compile(r) for r in rgx]
|
||||
self.rgx = rgx
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user