From 43c6c9151b94c0efaf0776bf1962232d4393ad9d Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Sat, 15 Jun 2019 13:10:23 +0200 Subject: [PATCH] Simplifying and also improving the speed (less regex comparisons!) --- wani.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/wani.py b/wani.py index 223b310..50690cb 100644 --- a/wani.py +++ b/wani.py @@ -109,21 +109,6 @@ TAGSET = { "X": ['type'] } -CATEGORY_BASES = { - "N": ['.'] * 5, - "V": ['.'] * 7, - "A": ['.'] * 6, - "R": ['.'] * 2, - "P": ['.'] * 6, - "M": ['.'] * 6, - "S": ['.'] * 1, - "C": ['.'] * 1, - "Q": [], - "I": [], - "Y": [], - "X": ['.'] * 1 -} - class ComponentType(Enum): Other = 0 Core = 2 @@ -431,7 +416,9 @@ def determine_ppb(rgx): if rgx[0] in ("A", "N", "R"): return 0 elif rgx[0] == "V": - if 'a' in rgx[1]: + if len(rgx) == 1: + return 2 + elif 'a' in rgx[1]: return 3 elif 'm' in rgx[1]: return 1 @@ -461,7 +448,7 @@ class MorphologyRegex: assert 'POS' in restr_dict category = restr_dict['POS'][0].capitalize() cat_code = CODES[category] - rgx = [cat_code] + CATEGORY_BASES[cat_code] + rgx = [cat_code] + ['.'] * 10 del restr_dict['POS'] @@ -480,6 +467,13 @@ class MorphologyRegex: if typ: self.min_msd_length = max(index + 1, self.min_msd_length) + # strip rgx + for i in reversed(range(len(rgx))): + if rgx[i] == '.': + rgx = rgx[:-1] + else: + break + self.re_objects = [re.compile(r) for r in rgx] self.rgx = rgx