diff --git a/ResultTree.py b/ResultTree.py index 2d21184..4eff77d 100644 --- a/ResultTree.py +++ b/ResultTree.py @@ -7,29 +7,13 @@ from generic import create_output_string_form, create_output_string_deprel, crea class ResultTree(object): def __init__(self, node, children, filters): - # self.array = [[create_output_string(node) for create_output_string in create_output_strings]] - # if create_output_string_lemma in create_output_strings: - # key_array = [[create_output_string(node) if create_output_string != create_output_string_lemma else 'L=' + create_output_string(node) for create_output_string in create_output_strings]] - # else: - # key_array = self.array - # if len(self.array[0]) > 1: - # self.key = '&'.join(key_array[0]) - # else: - # # output_string = create_output_strings[0](node) - # self.key = key_array[0][0] - self.node = node - - # order with original numbers in sentences - # self.order = str([architecture_order]) - # order with numbers from 0 to n of n-gram - # self.root = '' - # self.final_order = '' - # self.separators = separators self.children = children self.filters = filters self.key = None self.order_key = None + self.order = None + self.array = None def __repr__(self): return self.get_key() @@ -37,18 +21,21 @@ class ResultTree(object): def set_children(self, children): self.children = children - def get_key(self, get_free=False): - # if self.key: - # return self.key + def reset_params(self): + self.key = None + self.order_key = None + self.order = None + self.array = None + + def get_key(self): + if self.key: + return self.key key = '' write_self_node_to_result = False if self.children: - if get_free: - children = sorted(self.children, key=lambda x: x.node.name) - else: - children = self.children + children = self.children for child in children: - if self.filters['node_order'] and child.node.location < self.node.location and not get_free: + if self.filters['node_order'] and child.node.location < self.node.location: if self.filters['dependency_type']: # separator = ' <' + deprel[i_child][i_answer] + ' ' separator = ' <' + child.node.deprel + ' ' @@ -72,9 +59,31 @@ class ResultTree(object): self.key = self.node.name return self.key + def get_key_sorted(self): + key = '' + write_self_node_to_result = False + if self.children: + children = sorted(self.children, key=lambda x: x.node.name) + for child in children: + if not write_self_node_to_result: + write_self_node_to_result = True + key += self.node.name + if self.filters['dependency_type']: + separator = ' >' + child.node.deprel + ' ' + else: + separator = ' > ' + key += separator + child.get_key_sorted() + + if not write_self_node_to_result: + key += self.node.name + key = '(' + key + ')' + else: + key = self.node.name + return key + def get_order_key(self): - # if self.order_key: - # return self.order_key + if self.order_key: + return self.order_key order_key = '' write_self_node_to_result = False if self.children: @@ -103,8 +112,8 @@ class ResultTree(object): return self.order_key def get_order(self): - # if self.order_key: - # return self.order_key + if self.order: + return self.order order = [] write_self_node_to_result = False if self.children: @@ -125,8 +134,8 @@ class ResultTree(object): return self.order def get_array(self): - # if self.order_key: - # return self.order_key + if self.array: + return self.array array = [] write_self_node_to_result = False if self.children: @@ -164,73 +173,74 @@ class ResultTree(object): # self.key += ' ' + separator + ' ' + string # self.order_key += ' ' + separator + ' ' + architecture_order - def add_separator(self, separator, left=True): - self_copy = copy.copy(self) - if left: - self_copy.separators += [separator] - self_copy.key += separator - self_copy.order_key += separator - else: - self_copy.separators = [separator] + self_copy.separators - self_copy.key = separator + self_copy.key - self_copy.order_key = separator + self_copy.order_key - return self_copy + # def add_separator(self, separator, left=True): + # self_copy = copy.copy(self) + # if left: + # self_copy.separators += [separator] + # self_copy.key += separator + # self_copy.order_key += separator + # else: + # self_copy.separators = [separator] + self_copy.separators + # self_copy.key = separator + self_copy.key + # self_copy.order_key = separator + self_copy.order_key + # return self_copy # def merge_results2(self): - def merge_results(self, right_t, separator, left=True): - left_tree = copy.copy(self) - right_tree = copy.copy(right_t) + # def merge_results(self, right_t, separator, left=True): + # left_tree = copy.copy(self) + # right_tree = copy.copy(right_t) + # + # if separator: + # if left: + # # merged_results.append(left_part + right_part + separator) + # left_tree.key = left_tree.key + right_tree.key + separator + # left_tree.order_key = left_tree.order_key + right_tree.order_key + separator + # left_tree.array = left_tree.array + right_tree.array + # left_tree.order = left_tree.order + right_tree.order + # # left_tree.order = str([architecture_order]) + # left_tree.separators = left_tree.separators + right_tree.separators + [separator] + # else: + # # merged_results.append(left_part + separator + right_part) + # left_tree.key = left_tree.key + separator + right_tree.key + # left_tree.order_key = left_tree.order_key + separator + right_tree.order_key + # left_tree.array = left_tree.array + right_tree.array + # left_tree.order = left_tree.order + right_tree.order + # # left_tree.order = str([architecture_order]) + # left_tree.separators = left_tree.separators + [separator] + right_tree.separators + # else: + # # merged_results.append(left_part + right_part) + # left_tree.key = left_tree.key + right_tree.key + # left_tree.order_key = left_tree.order_key + right_tree.order_key + # left_tree.array = left_tree.array + right_tree.array + # left_tree.order = left_tree.order + right_tree.order + # # left_tree.order = str([architecture_order]) + # left_tree.separators = left_tree.separators + right_tree.separators + # + # return left_tree - if separator: - if left: - # merged_results.append(left_part + right_part + separator) - left_tree.key = left_tree.key + right_tree.key + separator - left_tree.order_key = left_tree.order_key + right_tree.order_key + separator - left_tree.array = left_tree.array + right_tree.array - left_tree.order = left_tree.order + right_tree.order - # left_tree.order = str([architecture_order]) - left_tree.separators = left_tree.separators + right_tree.separators + [separator] - else: - # merged_results.append(left_part + separator + right_part) - left_tree.key = left_tree.key + separator + right_tree.key - left_tree.order_key = left_tree.order_key + separator + right_tree.order_key - left_tree.array = left_tree.array + right_tree.array - left_tree.order = left_tree.order + right_tree.order - # left_tree.order = str([architecture_order]) - left_tree.separators = left_tree.separators + [separator] + right_tree.separators - else: - # merged_results.append(left_part + right_part) - left_tree.key = left_tree.key + right_tree.key - left_tree.order_key = left_tree.order_key + right_tree.order_key - left_tree.array = left_tree.array + right_tree.array - left_tree.order = left_tree.order + right_tree.order - # left_tree.order = str([architecture_order]) - left_tree.separators = left_tree.separators + right_tree.separators + # def extend_answer(self, other_answer, separator): + # self.array.extend(other_answer.array) + # self.order.extend(other_answer.order) + # self.key += separator + other_answer.key + # self.order_key += separator + other_answer.order_key + # self.separators.extend(separator) - return left_tree - - def extend_answer(self, other_answer, separator): - self.array.extend(other_answer.array) - self.order.extend(other_answer.order) - self.key += separator + other_answer.key - self.order_key += separator + other_answer.order_key - self.separators.extend(separator) - - def put_in_bracelets(self, inplace=False): - if inplace: - self.key = ('(' + self.key + ')') - self.order_key = ('(' + self.order_key + ')') - return - result = copy.copy(self) - result.key = ('(' + result.key + ')') - result.order_key = ('(' + result.order_key + ')') - return result + # def put_in_bracelets(self, inplace=False): + # if inplace: + # self.key = ('(' + self.key + ')') + # self.order_key = ('(' + self.order_key + ')') + # return + # result = copy.copy(self) + # result.key = ('(' + result.key + ')') + # result.order_key = ('(' + result.order_key + ')') + # return result def finalize_result(self): result = copy.copy(self) - result.key = result.get_key() + result.reset_params() + # result.key = result.get_key() # result.set_root() # create order letters diff --git a/Tree.py b/Tree.py index afa305d..daefe31 100644 --- a/Tree.py +++ b/Tree.py @@ -146,61 +146,61 @@ class Tree(object): old_subtree.extend(new_subtree) - def get_results(self, partial_results_dict, result_index, result_part, outcome, last_result_part): - # save results for later usage + # def get_results(self, partial_results_dict, result_index, result_part, outcome, last_result_part): + # # save results for later usage + # + # # if result index already in and element 0 exists (otherwise error) + # if result_index in partial_results_dict and 0 in partial_results_dict[result_index]: + # if result_part - 1 in partial_results_dict[result_index]: + # if result_part in partial_results_dict[result_index]: + # partial_results_dict[result_index][result_part].extend(self.merge_results(partial_results_dict[result_index][result_part - 1], outcome)) + # else: + # partial_results_dict[result_index][result_part] = self.merge_results(partial_results_dict[result_index][result_part - 1], outcome) + # + # # extend one word layer with output + # else: + # partial_results_dict[result_index][0].extend(outcome) + # else: + # partial_results_dict[result_index] = {0: outcome} + # + # if last_result_part - 1 in partial_results_dict[result_index]: + # return partial_results_dict[result_index].pop(last_result_part - 1) + # return [] - # if result index already in and element 0 exists (otherwise error) - if result_index in partial_results_dict and 0 in partial_results_dict[result_index]: - if result_part - 1 in partial_results_dict[result_index]: - if result_part in partial_results_dict[result_index]: - partial_results_dict[result_index][result_part].extend(self.merge_results(partial_results_dict[result_index][result_part - 1], outcome)) - else: - partial_results_dict[result_index][result_part] = self.merge_results(partial_results_dict[result_index][result_part - 1], outcome) + # def group_results(self, new_partial_subtrees, child_queries_metadata, all_query_indices, partial_results_dict, partial_subtrees): + # for outcome, (result_part, result_index, is_permanent) in zip(new_partial_subtrees, child_queries_metadata): + # if outcome: + # new_results = self.get_results(partial_results_dict, result_index, result_part, outcome, len(all_query_indices[result_index][0])) + # if new_results: + # self.add_subtrees(partial_subtrees[result_index], new_results) + # else: + # if not is_permanent: + # partial_subtrees[result_index].append([]) - # extend one word layer with output - else: - partial_results_dict[result_index][0].extend(outcome) - else: - partial_results_dict[result_index] = {0: outcome} - - if last_result_part - 1 in partial_results_dict[result_index]: - return partial_results_dict[result_index].pop(last_result_part - 1) - return [] - - def group_results(self, new_partial_subtrees, child_queries_metadata, all_query_indices, partial_results_dict, partial_subtrees): - for outcome, (result_part, result_index, is_permanent) in zip(new_partial_subtrees, child_queries_metadata): - if outcome: - new_results = self.get_results(partial_results_dict, result_index, result_part, outcome, len(all_query_indices[result_index][0])) - if new_results: - self.add_subtrees(partial_subtrees[result_index], new_results) - else: - if not is_permanent: - partial_subtrees[result_index].append([]) - - def get_all_query_indices_old(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children, create_output_string): - partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)] - completed_subtrees = [[] for i in range(completed_subtrees_size)] - - # list of pairs (index of query in group, group of query) - partial_results_dict = {} - - children_queries_generator = self.generate_children_queries(l_all_query_indices, children) - - child_index = 0 - child, child_queries, child_queries_metadata = next(children_queries_generator) - while child: - # obtain children results - new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string) - - self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices, - partial_results_dict, partial_subtrees) - - for i in range(len(new_completed_subtrees)): - completed_subtrees[i].extend(new_completed_subtrees[i]) - child, child_queries, child_queries_metadata = children_queries_generator.send(partial_results_dict) - child_index += 1 - - return partial_subtrees, completed_subtrees + # def get_all_query_indices_old(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children, create_output_string): + # partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)] + # completed_subtrees = [[] for i in range(completed_subtrees_size)] + # + # # list of pairs (index of query in group, group of query) + # partial_results_dict = {} + # + # children_queries_generator = self.generate_children_queries(l_all_query_indices, children) + # + # child_index = 0 + # child, child_queries, child_queries_metadata = next(children_queries_generator) + # while child: + # # obtain children results + # new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string) + # + # self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices, + # partial_results_dict, partial_subtrees) + # + # for i in range(len(new_completed_subtrees)): + # completed_subtrees[i].extend(new_completed_subtrees[i]) + # child, child_queries, child_queries_metadata = children_queries_generator.send(partial_results_dict) + # child_index += 1 + # + # return partial_subtrees, completed_subtrees def get_all_query_indices(self, temporary_query_nb, permanent_query_nb, permanent_query_trees, all_query_indices, children, create_output_string, filters): # l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), @@ -432,37 +432,37 @@ class Tree(object): return partial_answers, complete_answers # return merged_partial_subtrees_architecture[len(active_permanent_query_trees):], merged_partial_subtrees[len(active_permanent_query_trees):], completed_subtrees - @staticmethod - def merge_results(left_parts, right_parts, separator, left=True, right_part_free=False): - if not left_parts: - # return all right_parts - return [r_p.add_separator(separator, left) for r_p in right_parts] - # if left: - # return [r_p + separator for r_p in right_parts] - # # return [r_p.add_separator(separator, left) for r_p in right_parts] - # else: - # return [separator + r_p for r_p in right_parts] - - if not right_parts: - return [l_p.add_separator(separator, False) for l_p in left_parts] - # return [separator + l_p for l_p in left_parts] - merged_results = [] - for left_part in left_parts: - if right_part_free: - for right_part in right_parts[1]: - merged_results.append((right_parts[0], left_part.merge_results(right_part, separator, left))) - else: - for right_part in right_parts: - merged_results.append(left_part.merge_results(right_part, separator, left)) - # merged_results.append(left_part.merge_results(right_part, separator)) - # if separator: - # if left: - # merged_results.append(left_part + right_part + separator) - # else: - # merged_results.append(left_part + separator + right_part) - # else: - # merged_results.append(left_part + right_part) - return merged_results + # @staticmethod + # def merge_results(left_parts, right_parts, separator, left=True, right_part_free=False): + # if not left_parts: + # # return all right_parts + # return [r_p.add_separator(separator, left) for r_p in right_parts] + # # if left: + # # return [r_p + separator for r_p in right_parts] + # # # return [r_p.add_separator(separator, left) for r_p in right_parts] + # # else: + # # return [separator + r_p for r_p in right_parts] + # + # if not right_parts: + # return [l_p.add_separator(separator, False) for l_p in left_parts] + # # return [separator + l_p for l_p in left_parts] + # merged_results = [] + # for left_part in left_parts: + # if right_part_free: + # for right_part in right_parts[1]: + # merged_results.append((right_parts[0], left_part.merge_results(right_part, separator, left))) + # else: + # for right_part in right_parts: + # merged_results.append(left_part.merge_results(right_part, separator, left)) + # # merged_results.append(left_part.merge_results(right_part, separator)) + # # if separator: + # # if left: + # # merged_results.append(left_part + right_part + separator) + # # else: + # # merged_results.append(left_part + separator + right_part) + # # else: + # # merged_results.append(left_part + right_part) + # return merged_results @staticmethod def create_children_groups(left_parts, right_parts): @@ -482,6 +482,7 @@ class Tree(object): for left_part in left_parts: for right_part in right_parts: new_part = copy(left_part) + # new_part.reset_params() new_part.extend(right_part) all_children_group_possibilities.append(new_part) # merged_results.append(left_part.merge_results(right_part, separator)) @@ -511,87 +512,87 @@ class Tree(object): merged_indices.append(new_indices) return merged_results, merged_indices - def merge_results2(self, child, new_results, filters): - if create_output_string_form(self) == 'začelo': - print('HERE!@@!') - if create_output_string_form(self) == 'Dogodek': - print('HERE!@@!') - if create_output_string_form(self) == 'utišal': - print('HERE!@@!') - if create_output_string_form(self) == 'prijel': - print('HERE!@@!') - if filters['node_order']: - new_child = child - # new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key) - else: - new_child = sorted(child, key=lambda x: x[0].key) + # def merge_results2(self, child, new_results, filters): + # if create_output_string_form(self) == 'začelo': + # print('HERE!@@!') + # if create_output_string_form(self) == 'Dogodek': + # print('HERE!@@!') + # if create_output_string_form(self) == 'utišal': + # print('HERE!@@!') + # if create_output_string_form(self) == 'prijel': + # print('HERE!@@!') + # if filters['node_order']: + # new_child = child + # # new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key) + # else: + # new_child = sorted(child, key=lambda x: x[0].key) + # + # l_res = [] + # r_res = [] + # results = [] + # for i_answer, answer in enumerate(new_child): + # if filters['node_order'] and answer[0].order[0] < self.index: + # # if filters['node_order'] and indices[i_child][i_answer] < self.children_split: + # if filters['dependency_type']: + # # separator = ' <' + deprel[i_child][i_answer] + ' ' + # separator = ' <' + answer[0].deprel + ' ' + # else: + # separator = ' < ' + # l_res = self.merge_results(l_res, answer, separator, left=True) + # # l_res += answer + separator + # else: + # if filters['dependency_type']: + # separator = ' >' + answer[0].deprel + ' ' + # else: + # separator = ' > ' + # r_res = self.merge_results(r_res, answer, separator, left=False) + # # r_res += separator + answer + # + # # if filters['node_order']: + # # r_res_sorted = [] + # # for i_answer, answer in new_child_sorted: + # # if filters['dependency_type']: + # # separator = ' >' + answer[0].deprel + ' ' + # # else: + # # separator = ' > ' + # # r_res_sorted = (i_answer, self.merge_results(r_res_sorted, answer, separator, left=False)) + # # + # # + # # r_res_sorted_combined = self.merge_results(new_results, r_res_sorted, None, right_part_free=True) + # # # print('here') + # + # if l_res: + # l_res_combined = self.merge_results(l_res, new_results, None) + # if r_res: + # r_res_combined = self.merge_results(l_res_combined, r_res, None) + # # merged_results.extend(['(' + el + ')' for el in r_res_combined]) + # result = r_res_combined + # # results.extend([el.put_in_bracelets() for el in r_res_combined]) + # else: + # result = l_res_combined + # # results.extend([el.put_in_bracelets() for el in l_res_combined]) + # elif r_res: + # r_res_combined = self.merge_results(new_results, r_res, None) + # result = r_res_combined + # # results.extend([el.put_in_bracelets() for el in r_res_combined]) + # else: + # result = [] + # + # + # results.extend([el.put_in_bracelets() for el in result]) + # + # return results - l_res = [] - r_res = [] - results = [] - for i_answer, answer in enumerate(new_child): - if filters['node_order'] and answer[0].order[0] < self.index: - # if filters['node_order'] and indices[i_child][i_answer] < self.children_split: - if filters['dependency_type']: - # separator = ' <' + deprel[i_child][i_answer] + ' ' - separator = ' <' + answer[0].deprel + ' ' - else: - separator = ' < ' - l_res = self.merge_results(l_res, answer, separator, left=True) - # l_res += answer + separator - else: - if filters['dependency_type']: - separator = ' >' + answer[0].deprel + ' ' - else: - separator = ' > ' - r_res = self.merge_results(r_res, answer, separator, left=False) - # r_res += separator + answer - - # if filters['node_order']: - # r_res_sorted = [] - # for i_answer, answer in new_child_sorted: - # if filters['dependency_type']: - # separator = ' >' + answer[0].deprel + ' ' - # else: - # separator = ' > ' - # r_res_sorted = (i_answer, self.merge_results(r_res_sorted, answer, separator, left=False)) - # - # - # r_res_sorted_combined = self.merge_results(new_results, r_res_sorted, None, right_part_free=True) - # # print('here') - - if l_res: - l_res_combined = self.merge_results(l_res, new_results, None) - if r_res: - r_res_combined = self.merge_results(l_res_combined, r_res, None) - # merged_results.extend(['(' + el + ')' for el in r_res_combined]) - result = r_res_combined - # results.extend([el.put_in_bracelets() for el in r_res_combined]) - else: - result = l_res_combined - # results.extend([el.put_in_bracelets() for el in l_res_combined]) - elif r_res: - r_res_combined = self.merge_results(new_results, r_res, None) - result = r_res_combined - # results.extend([el.put_in_bracelets() for el in r_res_combined]) - else: - result = [] - - - results.extend([el.put_in_bracelets() for el in result]) - - return results - - def create_merged_results(self, answers, separators, separator_switch): - new_answers = [] - for answer_i, answer in enumerate(answers): - new_answer = copy(answer[0]) - print(create_output_string_form(self)) - for answer_part_i, answer_part in enumerate(answer[1:]): - new_answer.extend_answer(answer_part, separators[answer_part_i]) - new_answer.put_in_bracelets(inplace=True) - new_answers.append(new_answer) - return new_answers + # def create_merged_results(self, answers, separators, separator_switch): + # new_answers = [] + # for answer_i, answer in enumerate(answers): + # new_answer = copy(answer[0]) + # print(create_output_string_form(self)) + # for answer_part_i, answer_part in enumerate(answer[1:]): + # new_answer.extend_answer(answer_part, separators[answer_part_i]) + # new_answer.put_in_bracelets(inplace=True) + # new_answers.append(new_answer) + # return new_answers # def create_merged_results(self, new_child, new_answers, i_child, indices, deprel, filters): def merge_results3(self, child, new_results, filters): @@ -635,8 +636,11 @@ class Tree(object): for result in new_results: for children in children_groups: new_result = copy(result) + # if result.key is not None or result.order is not None or result.array is not None or result.order_key is not None: + # print('here') + # new_result.reset_params() new_result.set_children(children) - order = tuple(sorted(new_result.get_order())) + # order = tuple(sorted(new_result.get_order())) results.append(new_result) return results diff --git a/dependency-parsetree.py b/dependency-parsetree.py index 9831f2c..1de6d18 100644 --- a/dependency-parsetree.py +++ b/dependency-parsetree.py @@ -483,9 +483,9 @@ def main(): # print('HERE') # print(tree_i) if filters['node_order']: - key = r.key + r.order + key = r.get_key() + r.order else: - key = r.key + key = r.get_key() # if r == '(" < , < je < velik) < tem': # print(tree_i) # if r in result_dict: @@ -520,9 +520,9 @@ def main(): for query_results in subtrees: for r in query_results: if filters['node_order']: - key = r.key + r.order + key = r.get_key() + r.order else: - key = r.key + key = r.get_key() # if r == '(" < , < je < velik) < tem': # print(tree_i) if key in result_dict: @@ -586,11 +586,11 @@ def main(): break words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))] # words_only = printable_answers(k) - row = [v['object'].key[1:-1]] + words_only + [str(v['number'])] + row = [v['object'].get_key()[1:-1]] + words_only + [str(v['number'])] row += ['%.4f' % absolute_frequency] if filters['node_order']: row += [v['object'].order] - row += [v['object'].get_key(get_free=True)[1:-1]] + row += [v['object'].get_key_sorted()[1:-1]] if filters['nodes_number']: row += ['%d' % len(v['object'].array)] if filters['print_root']: