Added root filtering.
This commit is contained in:
		
							parent
							
								
									05cbfd03ff
								
							
						
					
					
						commit
						a13559941e
					
				
							
								
								
									
										58
									
								
								Tree.py
									
									
									
									
									
								
							
							
						
						
									
										58
									
								
								Tree.py
									
									
									
									
									
								
							| @ -7,9 +7,9 @@ from Value import Value | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Tree(object): | class Tree(object): | ||||||
|     def __init__(self, form, lemma, upos, xpos, deprel, feats, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, feats_complete_dict, head): |     def __init__(self, form, lemma, upos, xpos, deprel, feats, feats_detailed, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, feats_detailed_dict, head): | ||||||
|         # if not hasattr(self, 'feats'): |         if not hasattr(self, 'feats'): | ||||||
|         #     self.feats = {} |             self.feats_detailed = {} | ||||||
| 
 | 
 | ||||||
|         # form_unicode = str(form).encode("utf-8") |         # form_unicode = str(form).encode("utf-8") | ||||||
|         if form not in form_dict: |         if form not in form_dict: | ||||||
| @ -27,15 +27,15 @@ class Tree(object): | |||||||
|         if deprel not in deprel_dict: |         if deprel not in deprel_dict: | ||||||
|             deprel_dict[deprel] = Value(deprel) |             deprel_dict[deprel] = Value(deprel) | ||||||
|         self.deprel = deprel_dict[deprel] |         self.deprel = deprel_dict[deprel] | ||||||
|         if feats not in feats_complete_dict: |         if feats not in feats_dict: | ||||||
|             feats_complete_dict[feats] = Value(feats) |             feats_dict[feats] = Value(feats) | ||||||
|         self.feats_complete = feats_complete_dict[feats] |         self.feats = feats_dict[feats] | ||||||
|         # for feat in feats.keys(): |         for feat in feats_detailed.keys(): | ||||||
|         #     if next(iter(feats[feat])) not in feats_dict[feat]: |             if next(iter(feats_detailed[feat])) not in feats_detailed_dict[feat]: | ||||||
|         #         feats_dict[feat][next(iter(feats[feat]))] = Value(next(iter(feats[feat]))) |                 feats_detailed_dict[feat][next(iter(feats_detailed[feat]))] = Value(next(iter(feats_detailed[feat]))) | ||||||
|         #     if not feat in self.feats: |             if not feat in self.feats_detailed: | ||||||
|         #         self.feats[feat] = {} |                 self.feats_detailed[feat] = {} | ||||||
|         #     self.feats[feat][next(iter(feats[feat]))] = feats_dict[feat][next(iter(feats[feat]))] |             self.feats_detailed[feat][next(iter(feats_detailed[feat]))] = feats_detailed_dict[feat][next(iter(feats_detailed[feat]))] | ||||||
|         # self.position = position |         # self.position = position | ||||||
| 
 | 
 | ||||||
|         self.parent = head |         self.parent = head | ||||||
| @ -62,8 +62,34 @@ class Tree(object): | |||||||
|     #     return True |     #     return True | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|     def fits_temporary_requirements(self, filters): |     def fits_permanent_requirements(self, filters): | ||||||
|  |         main_attributes = ['deprel', 'feats', 'form', 'lemma', 'upos'] | ||||||
| 
 | 
 | ||||||
|  |         if not filters['root_whitelist']: | ||||||
|  |             return True | ||||||
|  | 
 | ||||||
|  |         for option in filters['root_whitelist']: | ||||||
|  |             filter_passed = True | ||||||
|  | 
 | ||||||
|  |             # check if attributes are valid | ||||||
|  |             for key in option.keys(): | ||||||
|  |                 if key not in main_attributes: | ||||||
|  |                     if key not in self.feats_detailed or option[key] != list(self.feats_detailed[key].items())[0][1].get_value(): | ||||||
|  |                         filter_passed = False | ||||||
|  | 
 | ||||||
|  |             filter_passed = filter_passed and \ | ||||||
|  |                             ('deprel' not in option or option['deprel'] == self.deprel.get_value()) and \ | ||||||
|  |                             ('feats' not in option or option['feats'] == self.feats.get_value()) and \ | ||||||
|  |                             ('form' not in option or option['form'] == self.form.get_value()) and \ | ||||||
|  |                             ('lemma' not in option or option['lemma'] == self.lemma.get_value()) and \ | ||||||
|  |                             ('upos' not in option or option['upos'] == self.upos.get_value()) | ||||||
|  | 
 | ||||||
|  |             if filter_passed: | ||||||
|  |                 return True | ||||||
|  | 
 | ||||||
|  |         return False | ||||||
|  | 
 | ||||||
|  |     def fits_temporary_requirements(self, filters): | ||||||
|         return not filters['label_whitelist'] or self.deprel.get_value() in filters['label_whitelist'] |         return not filters['label_whitelist'] or self.deprel.get_value() in filters['label_whitelist'] | ||||||
| 
 | 
 | ||||||
|     def fits_static_requirements(self, query_tree, filters): |     def fits_static_requirements(self, query_tree, filters): | ||||||
| @ -72,7 +98,7 @@ class Tree(object): | |||||||
|                ('upos' not in query_tree or query_tree['upos'] == self.upos.get_value()) and \ |                ('upos' not in query_tree or query_tree['upos'] == self.upos.get_value()) and \ | ||||||
|                ('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \ |                ('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \ | ||||||
|                ('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \ |                ('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \ | ||||||
|                ('feats' not in query_tree or query_tree['feats'] == self.feats_complete.get_value()) and \ |                ('feats' not in query_tree or query_tree['feats'] == self.feats.get_value()) and \ | ||||||
|                (not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children']))) |                (not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children']))) | ||||||
|                # self.fits_static_requirements_feats(query_tree) |                # self.fits_static_requirements_feats(query_tree) | ||||||
| 
 | 
 | ||||||
| @ -302,7 +328,7 @@ class Tree(object): | |||||||
| 
 | 
 | ||||||
|         active_permanent_query_trees = [] |         active_permanent_query_trees = [] | ||||||
|         for permanent_query_tree in permanent_query_trees: |         for permanent_query_tree in permanent_query_trees: | ||||||
|             if self.fits_static_requirements(permanent_query_tree, filters): |             if self.fits_static_requirements(permanent_query_tree, filters) and self.fits_permanent_requirements(filters): | ||||||
|                 active_permanent_query_trees.append(permanent_query_tree) |                 active_permanent_query_trees.append(permanent_query_tree) | ||||||
|                 if 'children' in permanent_query_tree: |                 if 'children' in permanent_query_tree: | ||||||
|                     all_query_indices.append((permanent_query_tree['children'], True)) |                     all_query_indices.append((permanent_query_tree['children'], True)) | ||||||
| @ -617,4 +643,4 @@ def create_output_string_xpos(tree): | |||||||
|     return tree.xpos.get_value() |     return tree.xpos.get_value() | ||||||
| 
 | 
 | ||||||
| def create_output_string_feats(tree): | def create_output_string_feats(tree): | ||||||
|     return tree.feats_complete.get_value() |     return tree.feats.get_value() | ||||||
|  | |||||||
| @ -11,21 +11,21 @@ import pyconll | |||||||
| from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos, create_output_string_feats | from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos, create_output_string_feats | ||||||
| 
 | 
 | ||||||
| # for separate searches of feats | # for separate searches of feats | ||||||
| # feats_list = [ | feats_detailed_list = [ | ||||||
| #     # lexical features |     # lexical features | ||||||
| #     'PronType', 'NumType', 'Poss', 'Reflex', 'Foreign', 'Abbr', |     'PronType', 'NumType', 'Poss', 'Reflex', 'Foreign', 'Abbr', | ||||||
| # | 
 | ||||||
| #     # Inflectional features (nominal) |     # Inflectional features (nominal) | ||||||
| #     'Gender', 'Animacy', 'NounClass', 'Number', 'Case', 'Definite', 'Degree', |     'Gender', 'Animacy', 'NounClass', 'Number', 'Case', 'Definite', 'Degree', | ||||||
| # | 
 | ||||||
| #     # Inflectional features (verbal) |     # Inflectional features (verbal) | ||||||
| #     'VerbForm', 'Mood', 'Tense', 'Aspect', 'Voice', 'Evident', 'Polarity', 'Person', 'Polite', 'Clusivity', |     'VerbForm', 'Mood', 'Tense', 'Aspect', 'Voice', 'Evident', 'Polarity', 'Person', 'Polite', 'Clusivity', | ||||||
| # | 
 | ||||||
| #     # Other |     # Other | ||||||
| #     'Variant', 'Number[psor]', 'Gender[psor]', 'NumForm' |     'Variant', 'Number[psor]', 'Gender[psor]', 'NumForm' | ||||||
| # ] | ] | ||||||
| # | 
 | ||||||
| # feats_dict = {key: {} for key in feats_list} | feats_detailed_dict = {key: {} for key in feats_detailed_list} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def decode_query(orig_query, dependency_type): | def decode_query(orig_query, dependency_type): | ||||||
| @ -65,10 +65,10 @@ def decode_query(orig_query, dependency_type): | |||||||
|                 elif orig_query_split[0] == 'feats': |                 elif orig_query_split[0] == 'feats': | ||||||
|                     decoded_query['feats'] = orig_query_split[1] |                     decoded_query['feats'] = orig_query_split[1] | ||||||
|                     # return decoded_query |                     # return decoded_query | ||||||
|                 # elif orig_query_split[0] in feats_list: |                 elif orig_query_split[0] in feats_detailed_list: | ||||||
|                 #     decoded_query['feats'] = {} |                     decoded_query['feats_detailed'] = {} | ||||||
|                 #     decoded_query['feats'][orig_query_split[0]] = orig_query_split[1] |                     decoded_query['feats_detailed'][orig_query_split[0]] = orig_query_split[1] | ||||||
|                 #     return decoded_query |                     return decoded_query | ||||||
|                 elif not new_query: |                 elif not new_query: | ||||||
|                     raise Exception('Not supported yet!') |                     raise Exception('Not supported yet!') | ||||||
|                 else: |                 else: | ||||||
| @ -122,7 +122,7 @@ def create_trees(config): | |||||||
| 
 | 
 | ||||||
|         train = pyconll.load_from_file(input_path) |         train = pyconll.load_from_file(input_path) | ||||||
| 
 | 
 | ||||||
|         form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_complete_dict = {}, {}, {}, {}, {}, {} |         form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict = {}, {}, {}, {}, {}, {} | ||||||
| 
 | 
 | ||||||
|         all_trees = [] |         all_trees = [] | ||||||
| 
 | 
 | ||||||
| @ -136,9 +136,9 @@ def create_trees(config): | |||||||
|                 #     token_feats += k + next(iter(v)) + '|' |                 #     token_feats += k + next(iter(v)) + '|' | ||||||
|                 # token_feats = token_feats[:-1] |                 # token_feats = token_feats[:-1] | ||||||
|                 # TODO check if 5th place is always there for feats |                 # TODO check if 5th place is always there for feats | ||||||
|                 token_feats = token._fields[5] |                 feats = token._fields[5] | ||||||
|                 node = Tree(token.form, token.lemma, token.upos, token.xpos, token.deprel, token_feats, form_dict, |                 node = Tree(token.form, token.lemma, token.upos, token.xpos, token.deprel, feats, token.feats, form_dict, | ||||||
|                             lemma_dict, upos_dict, xpos_dict, deprel_dict, None, feats_complete_dict, token.head) |                             lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, feats_detailed_dict, token.head) | ||||||
|                 token_nodes.append(node) |                 token_nodes.append(node) | ||||||
|                 if token.deprel == 'root': |                 if token.deprel == 'root': | ||||||
|                     root = node |                     root = node | ||||||
| @ -276,6 +276,22 @@ def main(): | |||||||
|     else: |     else: | ||||||
|         filters['label_whitelist'] = [] |         filters['label_whitelist'] = [] | ||||||
| 
 | 
 | ||||||
|  |     if config.has_option('settings', 'root_whitelist'): | ||||||
|  |         # test | ||||||
|  |         filters['root_whitelist'] = [] | ||||||
|  | 
 | ||||||
|  |         for option in config.get('settings', 'root_whitelist'). split('|'): | ||||||
|  |             attribute_dict = {} | ||||||
|  |             for attribute in option.split('&'): | ||||||
|  |                 value = attribute.split('=') | ||||||
|  |                 # assert value[0] in ['deprel', 'lemma', 'upos', 'xpos', 'form', | ||||||
|  |                 #                     'feats'], '"root_whitelist" is not set up correctly' | ||||||
|  |                 attribute_dict[value[0]] = value[1] | ||||||
|  |             filters['root_whitelist'].append(attribute_dict) | ||||||
|  |         # filters['root_whitelist'] = [{'upos': 'NOUN', 'Case': 'Nom'}, {'upos': 'ADJ', 'Degree': 'Sup'}] | ||||||
|  |     else: | ||||||
|  |         filters['root_whitelist'] = [] | ||||||
|  | 
 | ||||||
|     filters['complete_tree_type'] = config.get('settings', 'tree_type') == 'complete' |     filters['complete_tree_type'] = config.get('settings', 'tree_type') == 'complete' | ||||||
| 
 | 
 | ||||||
|     for tree in all_trees[2:]: |     for tree in all_trees[2:]: | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user