Simplifying and also improving the speed (less regex comparisons!)
This commit is contained in:
parent
09bdd0fe3f
commit
43c6c9151b
28
wani.py
28
wani.py
|
@ -109,21 +109,6 @@ TAGSET = {
|
||||||
"X": ['type']
|
"X": ['type']
|
||||||
}
|
}
|
||||||
|
|
||||||
CATEGORY_BASES = {
|
|
||||||
"N": ['.'] * 5,
|
|
||||||
"V": ['.'] * 7,
|
|
||||||
"A": ['.'] * 6,
|
|
||||||
"R": ['.'] * 2,
|
|
||||||
"P": ['.'] * 6,
|
|
||||||
"M": ['.'] * 6,
|
|
||||||
"S": ['.'] * 1,
|
|
||||||
"C": ['.'] * 1,
|
|
||||||
"Q": [],
|
|
||||||
"I": [],
|
|
||||||
"Y": [],
|
|
||||||
"X": ['.'] * 1
|
|
||||||
}
|
|
||||||
|
|
||||||
class ComponentType(Enum):
|
class ComponentType(Enum):
|
||||||
Other = 0
|
Other = 0
|
||||||
Core = 2
|
Core = 2
|
||||||
|
@ -431,7 +416,9 @@ def determine_ppb(rgx):
|
||||||
if rgx[0] in ("A", "N", "R"):
|
if rgx[0] in ("A", "N", "R"):
|
||||||
return 0
|
return 0
|
||||||
elif rgx[0] == "V":
|
elif rgx[0] == "V":
|
||||||
if 'a' in rgx[1]:
|
if len(rgx) == 1:
|
||||||
|
return 2
|
||||||
|
elif 'a' in rgx[1]:
|
||||||
return 3
|
return 3
|
||||||
elif 'm' in rgx[1]:
|
elif 'm' in rgx[1]:
|
||||||
return 1
|
return 1
|
||||||
|
@ -461,7 +448,7 @@ class MorphologyRegex:
|
||||||
assert 'POS' in restr_dict
|
assert 'POS' in restr_dict
|
||||||
category = restr_dict['POS'][0].capitalize()
|
category = restr_dict['POS'][0].capitalize()
|
||||||
cat_code = CODES[category]
|
cat_code = CODES[category]
|
||||||
rgx = [cat_code] + CATEGORY_BASES[cat_code]
|
rgx = [cat_code] + ['.'] * 10
|
||||||
|
|
||||||
del restr_dict['POS']
|
del restr_dict['POS']
|
||||||
|
|
||||||
|
@ -480,6 +467,13 @@ class MorphologyRegex:
|
||||||
if typ:
|
if typ:
|
||||||
self.min_msd_length = max(index + 1, self.min_msd_length)
|
self.min_msd_length = max(index + 1, self.min_msd_length)
|
||||||
|
|
||||||
|
# strip rgx
|
||||||
|
for i in reversed(range(len(rgx))):
|
||||||
|
if rgx[i] == '.':
|
||||||
|
rgx = rgx[:-1]
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
self.re_objects = [re.compile(r) for r in rgx]
|
self.re_objects = [re.compile(r) for r in rgx]
|
||||||
self.rgx = rgx
|
self.rgx = rgx
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user