diff --git a/Result.py b/Result.py index 4c410be..2a8d4b1 100644 --- a/Result.py +++ b/Result.py @@ -1,4 +1,5 @@ import copy +import string class Result(object): @@ -11,10 +12,11 @@ class Result(object): self.key = self.array[0][0] # self.array = [[output_string]] self.order_key = str([architecture_order]) - + self.order = [architecture_order] # order with original numbers in sentences # self.order = str([architecture_order]) # order with numbers from 0 to n of n-gram + self.root = '' self.final_order = '' self.separators = [] @@ -24,6 +26,7 @@ class Result(object): def add(self, string, architecture_order, separator, is_left): if is_left: self.array = [string] + self.array + self.order = [architecture_order] + self.order # self.order = [architecture_order] + self.order self.separators = [separator] + self.separators self.key = string + ' ' + separator + ' ' + self.key @@ -31,6 +34,7 @@ class Result(object): else: self.array += [string] + self.order += [architecture_order] # self.order += [architecture_order] self.separators += [separator] @@ -49,6 +53,9 @@ class Result(object): self_copy.order_key = separator + self_copy.order_key return self_copy + # def merge_results2(self): + + def merge_results(self, right_t, separator, left=True): left_tree = copy.copy(self) right_tree = copy.copy(right_t) @@ -59,6 +66,7 @@ class Result(object): left_tree.key = left_tree.key + right_tree.key + separator left_tree.order_key = left_tree.order_key + right_tree.order_key + separator left_tree.array = left_tree.array + right_tree.array + left_tree.order = left_tree.order + right_tree.order # left_tree.order = str([architecture_order]) left_tree.separators = left_tree.separators + right_tree.separators + [separator] else: @@ -66,6 +74,7 @@ class Result(object): left_tree.key = left_tree.key + separator + right_tree.key left_tree.order_key = left_tree.order_key + separator + right_tree.order_key left_tree.array = left_tree.array + right_tree.array + left_tree.order = left_tree.order + right_tree.order # left_tree.order = str([architecture_order]) left_tree.separators = left_tree.separators + [separator] + right_tree.separators else: @@ -73,12 +82,24 @@ class Result(object): left_tree.key = left_tree.key + right_tree.key left_tree.order_key = left_tree.order_key + right_tree.order_key left_tree.array = left_tree.array + right_tree.array + left_tree.order = left_tree.order + right_tree.order # left_tree.order = str([architecture_order]) left_tree.separators = left_tree.separators + right_tree.separators return left_tree - def put_in_bracelets(self): + def extend_answer(self, other_answer, separator): + self.array.extend(other_answer.array) + self.order.extend(other_answer.order) + self.key += separator + other_answer.key + self.order_key += separator + other_answer.order_key + self.separators.extend(separator) + + def put_in_bracelets(self, inplace=False): + if inplace: + self.key = ('(' + self.key + ')') + self.order_key = ('(' + self.order_key + ')') + return result = copy.copy(self) result.key = ('(' + result.key + ')') result.order_key = ('(' + result.order_key + ')') @@ -87,6 +108,22 @@ class Result(object): def finalize_result(self): result = copy.copy(self) result.key = result.key[1:-1] + result.set_root() + + # create order letters + order_letters = [''] * len(result.order) + for i in range(len(result.order)): + ind = result.order.index(min(result.order)) + result.order[ind] = 10000 + order_letters[ind] = string.ascii_uppercase[i] + result.order = ''.join(order_letters) # result.order_key = result.order_key[1:-1] # TODO When tree is finalized create relative word order (alphabet)! return result + + def set_root(self): + if len(self.array[0]) > 1: + self.root = '{' + ','.join(self.array[0]) + '}' + else: + # output_string = create_output_strings[0](node) + self.root = self.array[0][0] \ No newline at end of file diff --git a/Tree.py b/Tree.py index 4788679..6f08682 100644 --- a/Tree.py +++ b/Tree.py @@ -438,7 +438,8 @@ class Tree(object): merged_results = [] for left_part in left_parts: for right_part in right_parts: - merged_results.append(left_part.merge_results(right_part, separator)) + merged_results.append(left_part.merge_results(right_part, separator, left)) + # merged_results.append(left_part.merge_results(right_part, separator)) # if separator: # if left: # merged_results.append(left_part + right_part + separator) @@ -465,9 +466,114 @@ class Tree(object): merged_indices.append(new_indices) return merged_results, merged_indices + def merge_results2(self, new_child, new_results, i_child, indices, deprel, filters): + l_res = [] + r_res = [] + results = [] + for i_answer, answer in enumerate(new_child): + if filters['node_order'] and indices[i_child][i_answer] < self.children_split: + if filters['dependency_type']: + separator = ' <' + deprel[i_child][i_answer] + ' ' + else: + separator = ' < ' + l_res = self.merge_results(l_res, answer, separator, left=True) + # l_res += answer + separator + else: + if filters['dependency_type']: + separator = ' >' + deprel[i_child][i_answer] + ' ' + else: + separator = ' > ' + r_res = self.merge_results(r_res, answer, separator, left=False) + # r_res += separator + answer + if l_res: + l_res_combined = self.merge_results(l_res, new_results, None) + if r_res: + r_res_combined = self.merge_results(l_res_combined, r_res, None) + # merged_results.extend(['(' + el + ')' for el in r_res_combined]) + results.extend([el.put_in_bracelets() for el in r_res_combined]) + else: + results.extend([el.put_in_bracelets() for el in l_res_combined]) + elif r_res: + r_res_combined = self.merge_results(new_results, r_res, None) + results.extend([el.put_in_bracelets() for el in r_res_combined]) + + return results + + def create_merged_results(self, answers, separators, separator_switch): + new_answers = [] + for answer_i, answer in enumerate(answers): + new_answer = copy(answer[0]) + print(create_output_string_form(self)) + for answer_part_i, answer_part in enumerate(answer[1:]): + new_answer.extend_answer(answer_part, separators[answer_part_i]) + new_answer.put_in_bracelets(inplace=True) + new_answers.append(new_answer) + return new_answers + # def create_merged_results(self, new_child, new_answers, i_child, indices, deprel, filters): + + def merge_results3(self, new_child, new_answers, i_child, indices, deprel, filters): + # l_res = [] + # r_res = [] + # results = [] + separators = [] + l_answers = [] + r_answers = [] + separator_switch = len(new_child) - 1 + for i_answer, answer in enumerate(new_child): + if filters['node_order'] and indices[i_child][i_answer] < self.children_split: + if filters['dependency_type']: + separators.append(' <' + deprel[i_child][i_answer] + ' ') + else: + separators.append(' < ') + l_answers.append(answer) + # l_res = res + # return merged_results + # l_res += answer + separator + else: + if i_answer < separator_switch: + separator_switch = i_answer + if filters['dependency_type']: + separators.append(' >' + deprel[i_child][i_answer] + ' ') + else: + separators.append(' > ') + r_answers.append(answer) + # r_res += separator + answer + + answers = [] + if l_answers and r_answers: + answers = l_answers + [new_answers] + r_answers + # for l_answer in l_answers: + # for r_answer in r_answers: + # answers.append(l_answer + new_answers + r_answer) + elif l_answers: + answers = l_answers + [new_answers] + # for l_answer in l_answers: + # answers.append(l_answer + new_answers) + elif r_answers: + answers = [new_answers] + r_answers + # for r_answer in r_answers: + # answers.append(new_answers + r_answer) + else: + answers = [new_answers] + + results = self.create_merged_results(answers, separators, separator_switch) + + # if l_res: + # l_res_combined = self.merge_results(l_res, new_answers, None) + # if r_res: + # r_res_combined = self.merge_results(l_res_combined, r_res, None) + # # merged_results.extend(['(' + el + ')' for el in r_res_combined]) + # results.extend([el.put_in_bracelets() for el in r_res_combined]) + # else: + # results.extend([el.put_in_bracelets() for el in l_res_combined]) + # elif r_res: + # r_res_combined = self.merge_results(new_answers, r_res, None) + # results.extend([el.put_in_bracelets() for el in r_res_combined]) + + return results def create_output_children(self, children, new_results, filters, indices, deprel): - # if create_output_string_form(self) == 'prijel': + # if create_output_string_form(self) == 'Dogodek': # print('HERE!@@!') # if create_output_string_form(self) == 'utišal': # print('HERE!@@!') @@ -475,45 +581,12 @@ class Tree(object): # print('HERE') merged_results = [] for i_child, child in enumerate(children): - l_res = [] - r_res = [] if filters['node_order']: new_child = child else: - # a = [['tistega', 'dne'], ['sem', 'bil']] - # b = sorted(a) - # TODO CHECK IF THIS WORKS FOR CERTIAN new_child = sorted(child, key=lambda x: x[0].key) - for i_answer, answer in enumerate(new_child): - # res += '(' + el + ') < ' - if not filters['node_order'] or indices[i_child][i_answer] < self.children_split: - if filters['dependency_type']: - separator = ' <' + deprel[i_child][i_answer] + ' ' - else: - separator = ' < ' - l_res = self.merge_results(l_res, answer, separator, left=True) - # l_res += answer + separator - else: - if filters['dependency_type']: - separator = ' >' + deprel[i_child][i_answer] + ' ' - else: - separator = ' > ' - r_res = self.merge_results(r_res, answer, separator, left=False) - # r_res += separator + answer - if l_res: - l_res_combined = self.merge_results(l_res, new_results, None) - if r_res: - r_res_combined = self.merge_results(l_res_combined, r_res, None) - # merged_results.extend(['(' + el + ')' for el in r_res_combined]) - merged_results.extend([el.put_in_bracelets() for el in r_res_combined]) - else: - merged_results.extend([el.put_in_bracelets() for el in l_res_combined]) - elif r_res: - r_res_combined = self.merge_results(new_results, r_res, None) - merged_results.extend([el.put_in_bracelets() for el in r_res_combined]) - - - # merged_results.append('(' + l_res + new_result + r_res + ')') + ################# + merged_results.extend(self.merge_results2(new_child, new_results, i_child, indices, deprel, filters)) return merged_results @staticmethod diff --git a/dependency-parsetree.py b/dependency-parsetree.py index 09c878f..97b5efa 100644 --- a/dependency-parsetree.py +++ b/dependency-parsetree.py @@ -457,33 +457,42 @@ def main(): for tree_i, subtrees in enumerate(all_subtrees): for query_results in subtrees: for r in query_results: + if filters['node_order']: + key = r.key + r.order + else: + key = r.key # if r == '(" < , < je < velik) < tem': # print(tree_i) # if r in result_dict: # result_dict[r] += 1 # else: # result_dict[r] = 1 - if r.key in result_dict: - result_dict[r.key]['number'] += 1 + if key in result_dict: + result_dict[key]['number'] += 1 else: - result_dict[r.key] = {'object': r, 'number': 1} + result_dict[key] = {'object': r, 'number': 1} # 3.65 s (1 core) else: # for tree_i, tree in enumerate(all_trees[-5:]): - for tree_i, tree in enumerate(all_trees): + # for tree_i, tree in enumerate(all_trees): + for tree_i, tree in enumerate(all_trees[1:]): # text = Če pa ostane odrasel otrok doma, se starši le težko sprijaznijo s tem, da je "velik", otrok pa ima ves čas občutek, da se njegovi starši po nepotrebnem vtikajo v njegovo življenje. # for tree_i, tree in enumerate(all_trees[5170:]): # for tree in all_trees: subtrees = tree_calculations((tree, query_tree, create_output_string_functs, filters)) for query_results in subtrees: for r in query_results: + if filters['node_order']: + key = r.key + r.order + else: + key = r.key # if r == '(" < , < je < velik) < tem': # print(tree_i) - if r.key in result_dict: - result_dict[r.key]['number'] += 1 + if key in result_dict: + result_dict[key]['number'] += 1 else: - result_dict[r.key] = {'object': r, 'number': 1} + result_dict[key] = {'object': r, 'number': 1} print("Execution time:") print("--- %s seconds ---" % (time.time() - start_exe_time)) @@ -516,23 +525,32 @@ def main(): len_words = tree_size_range[-1] else: len_words = int(len(config.get('settings', 'query').split(" "))/2 + 1) - header = ["Structure"] + ["Word #" + str(i) + "-" + node_type for i in range(1, len_words + 1) for node_type in node_types] + ['Number of occurences'] - if config.get('settings', 'relative_number'): - header += ['Relative frequency'] - if config.get('settings', 'nodes_number'): - header += ['Nodes number'] - # header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Number of occurences'] + header = ["Structure"] + ["Node #" + str(i) + "-" + node_type for i in range(1, len_words + 1) for node_type in node_types] + ['Absolute frequency'] + header += ['Relative frequency'] + if filters['node_order']: + header += ['Order'] + if config.getboolean('settings', 'nodes_number'): + header += ['Number of nodes'] + if config.getboolean('settings', 'print_root'): + header += ['Root node'] + # header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Absolute frequency'] writer.writerow(header) + if config.getint('settings', 'lines_threshold'): + sorted_list = sorted_list[:config.getint('settings', 'lines_threshold')] + # body for k, v in sorted_list: words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))] # words_only = printable_answers(k) - row = [k] + words_only + [str(v['number'])] - if config.get('settings', 'relative_number'): - row += ['%.4f' % (v['number'] * 1000000.0 / corpus_size)] + row = [v['object'].key] + words_only + [str(v['number'])] + row += ['%.4f' % (v['number'] * 1000000.0 / corpus_size)] + if filters['node_order']: + row += [v['object'].order] if config.get('settings', 'nodes_number'): row += ['%d' % len(v['object'].array)] + if config.get('settings', 'print_root'): + row += [v['object'].root] writer.writerow(row)