diff --git a/Tree.py b/Tree.py index a975772..f2fe718 100644 --- a/Tree.py +++ b/Tree.py @@ -110,7 +110,7 @@ class Tree(object): if not is_permanent: partial_subtrees[result_index].append([]) - def get_all_query_indices(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children): + def get_all_query_indices(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children, create_output_string): partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)] completed_subtrees = [[] for i in range(completed_subtrees_size)] @@ -123,7 +123,7 @@ class Tree(object): child, child_queries, child_queries_metadata = next(children_queries_generator) while child: # obtain children results - new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries) + new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string) self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices, partial_results_dict, partial_subtrees) @@ -135,7 +135,7 @@ class Tree(object): return partial_subtrees, completed_subtrees - def get_subtrees(self, permanent_query_trees, temporary_query_trees): + def get_subtrees(self, permanent_query_trees, temporary_query_trees, create_output_string): """ :param permanent_query_trees: @@ -165,8 +165,8 @@ class Tree(object): if 'r_children' in temporary_query_tree: r_all_query_indices.append((temporary_query_tree['r_children'], False)) - l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, l_all_query_indices, self.l_children) - r_partial_subtrees, r_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, r_all_query_indices, self.r_children) + l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, l_all_query_indices, self.l_children, create_output_string) + r_partial_subtrees, r_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, r_all_query_indices, self.r_children, create_output_string) @@ -180,43 +180,43 @@ class Tree(object): if i < len(active_permanent_query_trees): if ('l_children' in active_permanent_query_trees[i] and 'r_children' in active_permanent_query_trees[i]): merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], - [[self.create_output_string()]]) + [[create_output_string(self)]]) merged_partial_subtrees.append( self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right])) - # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]])) + # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]])) i_left += 1 i_right += 1 elif 'l_children' in active_permanent_query_trees[i]: merged_partial_subtrees.append( - self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]])) + self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]])) i_left += 1 elif 'r_children' in active_permanent_query_trees[i]: merged_partial_subtrees.append( - self.merge_results([[self.create_output_string()]], r_partial_subtrees[i_right])) + self.merge_results([[create_output_string(self)]], r_partial_subtrees[i_right])) i_right += 1 else: - merged_partial_subtrees.append([[self.create_output_string()]]) + merged_partial_subtrees.append([[create_output_string(self)]]) else: if ('l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)] and 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]): - merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]]) + merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]]) merged_partial_subtrees.append(self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right])) - # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]])) + # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]])) i_left += 1 i_right += 1 elif 'l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]: - merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]])) + merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]])) i_left += 1 elif 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]: - merged_partial_subtrees.append(self.merge_results([[self.create_output_string()]], r_partial_subtrees[i_right])) + merged_partial_subtrees.append(self.merge_results([[create_output_string(self)]], r_partial_subtrees[i_right])) i_right += 1 else: - merged_partial_subtrees.append([[self.create_output_string()]]) + merged_partial_subtrees.append([[create_output_string(self)]]) # if r_partial_subtrees[i]: - # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]])) + # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]])) i += 1 completed_subtrees = l_completed_subtrees @@ -233,5 +233,17 @@ class Tree(object): merged_results.append(old_result + new_result) return merged_results - def create_output_string(self): - return self.form.get_value() +def create_output_string_form(tree): + return tree.form.get_value() + +def create_output_string_deprel(tree): + return tree.deprel.get_value() + +def create_output_string_lemma(tree): + return tree.lemma.get_value() + +def create_output_string_upos(tree): + return tree.upos.get_value() + +def create_output_string_xpos(tree): + return tree.xpos.get_value() diff --git a/config.ini b/config.ini deleted file mode 100644 index 3ee1c06..0000000 --- a/config.ini +++ /dev/null @@ -1,12 +0,0 @@ -[settings] -input = data/sl_ssj-ud_v2.4.conllu -output = results/out.tsv -internal_saves = ./internal_saves -ngrams = 0 -; ngrams = 2 -; analyze_type options: 'lemma', 'word' -; query = _ > _ -; query = _ > (_ < _) > _ -; query = _ < (_ > _) < _ > _ -; query = _ < _ > _ -query = _ < _ diff --git a/dependency-parsetree.py b/dependency-parsetree.py index cb372ae..9b24311 100644 --- a/dependency-parsetree.py +++ b/dependency-parsetree.py @@ -7,7 +7,7 @@ import re import pyconll -from Tree import Tree +from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos def decode_query(orig_query): @@ -116,6 +116,7 @@ def main(): config = configparser.ConfigParser() config.read('config.ini') + # create queries ngrams = 0 if config.getint('settings', 'ngrams') == 2: ngrams = 2 @@ -126,6 +127,18 @@ def main(): (all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict) = create_trees(config) + # set filters + assert config.get('settings', 'analyze_type') in ['deprel', 'lemma', 'upos', 'upos', 'xpos'], '"analyze_type" is not set up correctly' + if config.get('settings', 'analyze_type') == 'deprel': + create_output_string_funct = create_output_string_deprel + elif config.get('settings', 'analyze_type') == 'lemma': + create_output_string_funct = create_output_string_lemma + elif config.get('settings', 'analyze_type') == 'upos': + create_output_string_funct = create_output_string_upos + elif config.get('settings', 'analyze_type') == 'xpos': + create_output_string_funct = create_output_string_xpos + else: + create_output_string_funct = create_output_string_form result_dict = {} @@ -134,7 +147,7 @@ def main(): # original # r_children = tree.r_children[:1] + tree.r_children[3:4] # tree.r_children = tree.r_children[:1] + tree.r_children[2:4] - _, subtrees = tree.get_subtrees(query_tree, []) + _, subtrees = tree.get_subtrees(query_tree, [], create_output_string_funct) for query_results in subtrees: for result in query_results: if ngrams: