Simplifying and also improving the speed (less regex comparisons!)
This commit is contained in:
		
							parent
							
								
									09bdd0fe3f
								
							
						
					
					
						commit
						43c6c9151b
					
				
							
								
								
									
										28
									
								
								wani.py
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								wani.py
									
									
									
									
									
								
							| @ -109,21 +109,6 @@ TAGSET = { | ||||
|     "X": ['type'] | ||||
| } | ||||
| 
 | ||||
| CATEGORY_BASES = { | ||||
|     "N": ['.'] * 5, | ||||
|     "V": ['.'] * 7, | ||||
|     "A": ['.'] * 6, | ||||
|     "R": ['.'] * 2, | ||||
|     "P": ['.'] * 6, | ||||
|     "M": ['.'] * 6, | ||||
|     "S": ['.'] * 1, | ||||
|     "C": ['.'] * 1, | ||||
|     "Q": [], | ||||
|     "I": [], | ||||
|     "Y": [], | ||||
|     "X": ['.'] * 1 | ||||
| } | ||||
| 
 | ||||
| class ComponentType(Enum): | ||||
|     Other = 0 | ||||
|     Core = 2 | ||||
| @ -431,7 +416,9 @@ def determine_ppb(rgx): | ||||
|     if rgx[0] in ("A", "N", "R"): | ||||
|         return 0 | ||||
|     elif rgx[0] == "V": | ||||
|         if 'a' in rgx[1]: | ||||
|         if len(rgx) == 1: | ||||
|             return 2 | ||||
|         elif 'a' in rgx[1]: | ||||
|             return 3 | ||||
|         elif 'm' in rgx[1]: | ||||
|             return 1 | ||||
| @ -461,7 +448,7 @@ class MorphologyRegex: | ||||
|         assert 'POS' in restr_dict | ||||
|         category = restr_dict['POS'][0].capitalize() | ||||
|         cat_code = CODES[category] | ||||
|         rgx = [cat_code] + CATEGORY_BASES[cat_code] | ||||
|         rgx = [cat_code] + ['.'] * 10 | ||||
| 
 | ||||
|         del restr_dict['POS'] | ||||
| 
 | ||||
| @ -480,6 +467,13 @@ class MorphologyRegex: | ||||
|             if typ: | ||||
|                 self.min_msd_length = max(index + 1, self.min_msd_length) | ||||
| 
 | ||||
|         # strip rgx | ||||
|         for i in reversed(range(len(rgx))): | ||||
|             if rgx[i] == '.': | ||||
|                 rgx = rgx[:-1] | ||||
|             else: | ||||
|                 break | ||||
| 
 | ||||
|         self.re_objects = [re.compile(r) for r in rgx] | ||||
|         self.rgx = rgx | ||||
|      | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user