Added analyze_type filter
This commit is contained in:
		
							parent
							
								
									ba9bdeb8b9
								
							
						
					
					
						commit
						b444df2d54
					
				
							
								
								
									
										48
									
								
								Tree.py
									
									
									
									
									
								
							
							
						
						
									
										48
									
								
								Tree.py
									
									
									
									
									
								
							| @ -110,7 +110,7 @@ class Tree(object): | |||||||
|                 if not is_permanent: |                 if not is_permanent: | ||||||
|                     partial_subtrees[result_index].append([]) |                     partial_subtrees[result_index].append([]) | ||||||
| 
 | 
 | ||||||
|     def get_all_query_indices(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children): |     def get_all_query_indices(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children, create_output_string): | ||||||
|         partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)] |         partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)] | ||||||
|         completed_subtrees = [[] for i in range(completed_subtrees_size)] |         completed_subtrees = [[] for i in range(completed_subtrees_size)] | ||||||
| 
 | 
 | ||||||
| @ -123,7 +123,7 @@ class Tree(object): | |||||||
|         child, child_queries, child_queries_metadata = next(children_queries_generator) |         child, child_queries, child_queries_metadata = next(children_queries_generator) | ||||||
|         while child: |         while child: | ||||||
|             # obtain children results |             # obtain children results | ||||||
|             new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries) |             new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string) | ||||||
| 
 | 
 | ||||||
|             self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices, |             self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices, | ||||||
|                                partial_results_dict, partial_subtrees) |                                partial_results_dict, partial_subtrees) | ||||||
| @ -135,7 +135,7 @@ class Tree(object): | |||||||
| 
 | 
 | ||||||
|         return partial_subtrees, completed_subtrees |         return partial_subtrees, completed_subtrees | ||||||
| 
 | 
 | ||||||
|     def get_subtrees(self, permanent_query_trees, temporary_query_trees): |     def get_subtrees(self, permanent_query_trees, temporary_query_trees, create_output_string): | ||||||
|         """ |         """ | ||||||
| 
 | 
 | ||||||
|         :param permanent_query_trees: |         :param permanent_query_trees: | ||||||
| @ -165,8 +165,8 @@ class Tree(object): | |||||||
|                 if 'r_children' in temporary_query_tree: |                 if 'r_children' in temporary_query_tree: | ||||||
|                     r_all_query_indices.append((temporary_query_tree['r_children'], False)) |                     r_all_query_indices.append((temporary_query_tree['r_children'], False)) | ||||||
| 
 | 
 | ||||||
|         l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, l_all_query_indices, self.l_children) |         l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, l_all_query_indices, self.l_children, create_output_string) | ||||||
|         r_partial_subtrees, r_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, r_all_query_indices, self.r_children) |         r_partial_subtrees, r_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, r_all_query_indices, self.r_children, create_output_string) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -180,43 +180,43 @@ class Tree(object): | |||||||
|             if i < len(active_permanent_query_trees): |             if i < len(active_permanent_query_trees): | ||||||
|                 if ('l_children' in active_permanent_query_trees[i] and 'r_children' in active_permanent_query_trees[i]): |                 if ('l_children' in active_permanent_query_trees[i] and 'r_children' in active_permanent_query_trees[i]): | ||||||
|                     merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], |                     merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], | ||||||
|                                                                 [[self.create_output_string()]]) |                                                                 [[create_output_string(self)]]) | ||||||
|                     merged_partial_subtrees.append( |                     merged_partial_subtrees.append( | ||||||
|                         self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right])) |                         self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right])) | ||||||
|                     # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]])) |                     # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]])) | ||||||
|                     i_left += 1 |                     i_left += 1 | ||||||
|                     i_right += 1 |                     i_right += 1 | ||||||
| 
 | 
 | ||||||
|                 elif 'l_children' in active_permanent_query_trees[i]: |                 elif 'l_children' in active_permanent_query_trees[i]: | ||||||
|                     merged_partial_subtrees.append( |                     merged_partial_subtrees.append( | ||||||
|                         self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]])) |                         self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]])) | ||||||
|                     i_left += 1 |                     i_left += 1 | ||||||
| 
 | 
 | ||||||
|                 elif 'r_children' in active_permanent_query_trees[i]: |                 elif 'r_children' in active_permanent_query_trees[i]: | ||||||
|                     merged_partial_subtrees.append( |                     merged_partial_subtrees.append( | ||||||
|                         self.merge_results([[self.create_output_string()]], r_partial_subtrees[i_right])) |                         self.merge_results([[create_output_string(self)]], r_partial_subtrees[i_right])) | ||||||
|                     i_right += 1 |                     i_right += 1 | ||||||
|                 else: |                 else: | ||||||
|                     merged_partial_subtrees.append([[self.create_output_string()]]) |                     merged_partial_subtrees.append([[create_output_string(self)]]) | ||||||
|             else: |             else: | ||||||
|                 if ('l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)] and 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]): |                 if ('l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)] and 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]): | ||||||
|                     merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]]) |                     merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]]) | ||||||
|                     merged_partial_subtrees.append(self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right])) |                     merged_partial_subtrees.append(self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right])) | ||||||
|                     # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]])) |                     # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]])) | ||||||
|                     i_left += 1 |                     i_left += 1 | ||||||
|                     i_right += 1 |                     i_right += 1 | ||||||
| 
 | 
 | ||||||
|                 elif 'l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]: |                 elif 'l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]: | ||||||
|                     merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]])) |                     merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]])) | ||||||
|                     i_left += 1 |                     i_left += 1 | ||||||
| 
 | 
 | ||||||
|                 elif 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]: |                 elif 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]: | ||||||
|                     merged_partial_subtrees.append(self.merge_results([[self.create_output_string()]], r_partial_subtrees[i_right])) |                     merged_partial_subtrees.append(self.merge_results([[create_output_string(self)]], r_partial_subtrees[i_right])) | ||||||
|                     i_right += 1 |                     i_right += 1 | ||||||
|                 else: |                 else: | ||||||
|                     merged_partial_subtrees.append([[self.create_output_string()]]) |                     merged_partial_subtrees.append([[create_output_string(self)]]) | ||||||
|             # if r_partial_subtrees[i]: |             # if r_partial_subtrees[i]: | ||||||
|             #     merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]])) |             #     merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]])) | ||||||
|             i += 1 |             i += 1 | ||||||
| 
 | 
 | ||||||
|         completed_subtrees = l_completed_subtrees |         completed_subtrees = l_completed_subtrees | ||||||
| @ -233,5 +233,17 @@ class Tree(object): | |||||||
|                 merged_results.append(old_result + new_result) |                 merged_results.append(old_result + new_result) | ||||||
|         return merged_results |         return merged_results | ||||||
| 
 | 
 | ||||||
|     def create_output_string(self): | def create_output_string_form(tree): | ||||||
|         return self.form.get_value() |     return tree.form.get_value() | ||||||
|  | 
 | ||||||
|  | def create_output_string_deprel(tree): | ||||||
|  |     return tree.deprel.get_value() | ||||||
|  | 
 | ||||||
|  | def create_output_string_lemma(tree): | ||||||
|  |     return tree.lemma.get_value() | ||||||
|  | 
 | ||||||
|  | def create_output_string_upos(tree): | ||||||
|  |     return tree.upos.get_value() | ||||||
|  | 
 | ||||||
|  | def create_output_string_xpos(tree): | ||||||
|  |     return tree.xpos.get_value() | ||||||
|  | |||||||
							
								
								
									
										12
									
								
								config.ini
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								config.ini
									
									
									
									
									
								
							| @ -1,12 +0,0 @@ | |||||||
| [settings] |  | ||||||
| input = data/sl_ssj-ud_v2.4.conllu |  | ||||||
| output = results/out.tsv |  | ||||||
| internal_saves = ./internal_saves |  | ||||||
| ngrams = 0 |  | ||||||
| ; ngrams = 2 |  | ||||||
| ; analyze_type options: 'lemma', 'word' |  | ||||||
| ; query = _ > _ |  | ||||||
| ; query = _ > (_ < _) > _ |  | ||||||
| ; query = _ < (_ > _) < _ > _ |  | ||||||
| ; query =  _ < _ > _ |  | ||||||
| query =  _ < _ |  | ||||||
| @ -7,7 +7,7 @@ import re | |||||||
| 
 | 
 | ||||||
| import pyconll | import pyconll | ||||||
| 
 | 
 | ||||||
| from Tree import Tree | from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def decode_query(orig_query): | def decode_query(orig_query): | ||||||
| @ -116,6 +116,7 @@ def main(): | |||||||
|     config = configparser.ConfigParser() |     config = configparser.ConfigParser() | ||||||
|     config.read('config.ini') |     config.read('config.ini') | ||||||
| 
 | 
 | ||||||
|  |     # create queries | ||||||
|     ngrams = 0 |     ngrams = 0 | ||||||
|     if config.getint('settings', 'ngrams') == 2: |     if config.getint('settings', 'ngrams') == 2: | ||||||
|         ngrams = 2 |         ngrams = 2 | ||||||
| @ -126,6 +127,18 @@ def main(): | |||||||
|     (all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict) = create_trees(config) |     (all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict) = create_trees(config) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |     # set filters | ||||||
|  |     assert config.get('settings', 'analyze_type') in ['deprel', 'lemma', 'upos', 'upos', 'xpos'], '"analyze_type" is not set up correctly' | ||||||
|  |     if config.get('settings', 'analyze_type') == 'deprel': | ||||||
|  |         create_output_string_funct = create_output_string_deprel | ||||||
|  |     elif config.get('settings', 'analyze_type') == 'lemma': | ||||||
|  |         create_output_string_funct = create_output_string_lemma | ||||||
|  |     elif config.get('settings', 'analyze_type') == 'upos': | ||||||
|  |         create_output_string_funct = create_output_string_upos | ||||||
|  |     elif config.get('settings', 'analyze_type') == 'xpos': | ||||||
|  |         create_output_string_funct = create_output_string_xpos | ||||||
|  |     else: | ||||||
|  |         create_output_string_funct = create_output_string_form | ||||||
| 
 | 
 | ||||||
|     result_dict = {} |     result_dict = {} | ||||||
| 
 | 
 | ||||||
| @ -134,7 +147,7 @@ def main(): | |||||||
|         # original |         # original | ||||||
|         # r_children = tree.r_children[:1] + tree.r_children[3:4] |         # r_children = tree.r_children[:1] + tree.r_children[3:4] | ||||||
|         # tree.r_children = tree.r_children[:1] + tree.r_children[2:4] |         # tree.r_children = tree.r_children[:1] + tree.r_children[2:4] | ||||||
|         _, subtrees = tree.get_subtrees(query_tree, []) |         _, subtrees = tree.get_subtrees(query_tree, [], create_output_string_funct) | ||||||
|         for query_results in subtrees: |         for query_results in subtrees: | ||||||
|             for result in query_results: |             for result in query_results: | ||||||
|                 if ngrams: |                 if ngrams: | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user