Cleaning repo

4 years ago · 421f12cac6
parent 36223d3a98
commit 421f12cac6
5 changed files with 8 additions and 606 deletions
--- a/ResultNode.py
+++ b/ResultNode.py
@ -18,17 +18,8 @@ from generic import generate_key, generate_name
 class ResultNode(object):
    def __init__(self, node, architecture_order, create_output_strings):
        self.name_parts, self.name = generate_name(node, create_output_strings)
        # self.key_free = self.key
        # self.array = [[output_string]]
        # self.order_key = str(architecture_order)
        self.location = architecture_order
        self.deprel = node.deprel.get_value()
        # order with original numbers in sentences
        # self.order = str([architecture_order])
        # order with numbers from 0 to n of n-gram
        # self.root = ''
        # self.final_order = ''
        # self.separators = []
    def __repr__(self):
        return self.name
--- a/ResultTree.py
+++ b/ResultTree.py
@ -15,9 +15,6 @@
 import copy
 import string
 from generic import create_output_string_form, create_output_string_deprel, create_output_string_lemma, \
    create_output_string_upos, create_output_string_xpos, create_output_string_feats, generate_key
 class ResultTree(object):
    def __init__(self, node, children, filters):
@ -51,7 +48,6 @@ class ResultTree(object):
            for child in children:
                if self.filters['node_order'] and child.node.location < self.node.location:
                    if self.filters['dependency_type']:
                        # separator = ' <' + deprel[i_child][i_answer] + ' '
                        separator = ' <' + child.node.deprel + ' '
                    else:
                        separator = ' < '
@ -104,7 +100,6 @@ class ResultTree(object):
            for child in self.children:
                if self.filters['node_order'] and child.node.location < self.node.location:
                    if self.filters['dependency_type']:
                        # separator = ' <' + deprel[i_child][i_answer] + ' '
                        separator = ' <' + child.node.deprel + ' '
                    else:
                        separator = ' < '
@ -169,93 +164,9 @@ class ResultTree(object):
            self.array = [self.node.name_parts]
        return self.array
    # def add(self, string, architecture_order, separator, is_left):
    #     if is_left:
    #         self.array = [string] + self.array
    #         self.order = [architecture_order] + self.order
    #         # self.order = [architecture_order] + self.order
    #         self.separators = [separator] + self.separators
    #         self.key = string + ' ' + separator + ' ' + self.key
    #         self.order_key = architecture_order + ' ' + separator + ' ' + self.order_key
    #
    #     else:
    #         self.array += [string]
    #         self.order += [architecture_order]
    #         # self.order += [architecture_order]
    #         self.separators += [separator]
    #
    #         self.key += ' ' + separator + ' ' + string
    #         self.order_key += ' ' + separator + ' ' + architecture_order
    # def add_separator(self, separator, left=True):
    #     self_copy = copy.copy(self)
    #     if left:
    #         self_copy.separators += [separator]
    #         self_copy.key += separator
    #         self_copy.order_key += separator
    #     else:
    #         self_copy.separators = [separator] + self_copy.separators
    #         self_copy.key = separator + self_copy.key
    #         self_copy.order_key = separator + self_copy.order_key
    #     return self_copy
    # def merge_results2(self):
    # def merge_results(self, right_t, separator, left=True):
    #     left_tree = copy.copy(self)
    #     right_tree = copy.copy(right_t)
    #
    #     if separator:
    #         if left:
    #             # merged_results.append(left_part + right_part + separator)
    #             left_tree.key = left_tree.key + right_tree.key + separator
    #             left_tree.order_key = left_tree.order_key + right_tree.order_key + separator
    #             left_tree.array = left_tree.array + right_tree.array
    #             left_tree.order = left_tree.order + right_tree.order
    #             # left_tree.order = str([architecture_order])
    #             left_tree.separators = left_tree.separators + right_tree.separators + [separator]
    #         else:
    #             # merged_results.append(left_part + separator + right_part)
    #             left_tree.key = left_tree.key + separator + right_tree.key
    #             left_tree.order_key = left_tree.order_key + separator + right_tree.order_key
    #             left_tree.array = left_tree.array + right_tree.array
    #             left_tree.order = left_tree.order + right_tree.order
    #             # left_tree.order = str([architecture_order])
    #             left_tree.separators = left_tree.separators + [separator] + right_tree.separators
    #     else:
    #         # merged_results.append(left_part + right_part)
    #         left_tree.key = left_tree.key + right_tree.key
    #         left_tree.order_key = left_tree.order_key + right_tree.order_key
    #         left_tree.array = left_tree.array + right_tree.array
    #         left_tree.order = left_tree.order + right_tree.order
    #         # left_tree.order = str([architecture_order])
    #         left_tree.separators = left_tree.separators + right_tree.separators
    #
    #     return left_tree
    # def extend_answer(self, other_answer, separator):
    #     self.array.extend(other_answer.array)
    #     self.order.extend(other_answer.order)
    #     self.key += separator + other_answer.key
    #     self.order_key += separator + other_answer.order_key
    #     self.separators.extend(separator)
    # def put_in_bracelets(self, inplace=False):
    #     if inplace:
    #         self.key = ('(' + self.key + ')')
    #         self.order_key = ('(' + self.order_key + ')')
    #         return
    #     result = copy.copy(self)
    #     result.key = ('(' + result.key + ')')
    #     result.order_key = ('(' + result.order_key + ')')
    #     return result
    def finalize_result(self):
        result = copy.copy(self)
        result.reset_params()
        # result.key = result.get_key()
        # result.set_root()
        # create order letters
        order = result.get_order()
@ -265,13 +176,5 @@ class ResultTree(object):
            order[ind] = 10000
            order_letters[ind] = string.ascii_uppercase[i]
        result.order = ''.join(order_letters)
        # result.order_key = result.order_key[1:-1]
        # TODO When tree is finalized create relative word order (alphabet)!
        return result
    # def set_root(self):
    #     if len(self.array[0]) > 1:
    #         self.root = '&'.join(self.array[0])
    #     else:
    #         # output_string = create_output_strings[0](node)
    #         self.root = self.array[0][0]
--- a/Tree.py
+++ b/Tree.py
@ -4,8 +4,7 @@ from copy import copy
 from ResultNode import ResultNode
 from ResultTree import ResultTree
 from Value import Value
-from generic import create_output_string_form, create_output_string_deprel, create_output_string_lemma, \
+from generic import generate_key
    create_output_string_upos, create_output_string_xpos, create_output_string_feats, generate_key
 class Tree(object):
@ -13,7 +12,6 @@ class Tree(object):
        if not hasattr(self, 'feats'):
            self.feats_detailed = {}
        # form_unicode = str(form).encode("utf-8")
        if form not in form_dict:
            form_dict[form] = Value(form)
        self.form = form_dict[form]
@ -40,7 +38,6 @@ class Tree(object):
            if not feat in self.feats_detailed:
                self.feats_detailed[feat] = {}
            self.feats_detailed[feat][next(iter(feats_detailed[feat]))] = feats_detailed_dict[feat][next(iter(feats_detailed[feat]))]
        # self.position = position
        self.parent = head
        self.children = []
@ -52,7 +49,6 @@ class Tree(object):
        self.cache = {}
    def add_child(self, child):
        # child.index = len(self.children)
        self.children.append(child)
    def set_parent(self, parent):
@ -68,7 +64,6 @@ class Tree(object):
        return True
    def fits_permanent_requirements(self, filters):
        main_attributes = ['deprel', 'feats', 'form', 'lemma', 'upos']
@ -121,8 +116,6 @@ class Tree(object):
                if result_index in partial_results and result_part_index in partial_results[result_index] and len(partial_results[result_index][result_part_index]) > 0:
                    if len(all_query_indices[result_index][0]) > result_part_index + 1:
                        new_queries.append((result_part_index + 1, result_index, is_permanent))
                    # else:
                    #     completed_subtrees.append((child, result_index))
            child_queries_metadata = new_queries
@ -142,76 +135,11 @@ class Tree(object):
    def add_subtrees(self, old_subtree, new_subtree):
        old_subtree.extend(new_subtree)
    # def get_results(self, partial_results_dict, result_index, result_part, outcome, last_result_part):
    #     # save results for later usage
    #
    #     # if result index already in and element 0 exists (otherwise error)
    #     if result_index in partial_results_dict and 0 in partial_results_dict[result_index]:
    #         if result_part - 1 in partial_results_dict[result_index]:
    #             if result_part in partial_results_dict[result_index]:
    #                 partial_results_dict[result_index][result_part].extend(self.merge_results(partial_results_dict[result_index][result_part - 1], outcome))
    #             else:
    #                 partial_results_dict[result_index][result_part] = self.merge_results(partial_results_dict[result_index][result_part - 1], outcome)
    #
    #         # extend one word layer with output
    #         else:
    #             partial_results_dict[result_index][0].extend(outcome)
    #     else:
    #         partial_results_dict[result_index] = {0: outcome}
    #
    #     if last_result_part - 1 in partial_results_dict[result_index]:
    #         return partial_results_dict[result_index].pop(last_result_part - 1)
    #     return []
    # def group_results(self, new_partial_subtrees, child_queries_metadata, all_query_indices, partial_results_dict, partial_subtrees):
    #     for outcome, (result_part, result_index, is_permanent) in zip(new_partial_subtrees, child_queries_metadata):
    #         if outcome:
    #             new_results = self.get_results(partial_results_dict, result_index, result_part, outcome, len(all_query_indices[result_index][0]))
    #             if new_results:
    #                 self.add_subtrees(partial_subtrees[result_index], new_results)
    #         else:
    #             if not is_permanent:
    #                 partial_subtrees[result_index].append([])
    # def get_all_query_indices_old(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children, create_output_string):
    #     partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)]
    #     completed_subtrees = [[] for i in range(completed_subtrees_size)]
    #
    #     # list of pairs (index of query in group, group of query)
    #     partial_results_dict = {}
    #
    #     children_queries_generator = self.generate_children_queries(l_all_query_indices, children)
    #
    #     child_index = 0
    #     child, child_queries, child_queries_metadata = next(children_queries_generator)
    #     while child:
    #         # obtain children results
    #         new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string)
    #
    #         self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices,
    #                            partial_results_dict, partial_subtrees)
    #
    #         for i in range(len(new_completed_subtrees)):
    #             completed_subtrees[i].extend(new_completed_subtrees[i])
    #         child, child_queries, child_queries_metadata = children_queries_generator.send(partial_results_dict)
    #         child_index += 1
    #
    #     return partial_subtrees, completed_subtrees
    def get_all_query_indices(self, temporary_query_nb, permanent_query_nb, permanent_query_trees, all_query_indices, children, create_output_string, filters):
        # l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees),
        #                                                                       len(permanent_query_trees),
        #                                                                       permanent_query_trees,
        #                                                                       l_all_query_indices, self.l_children,
        #                                                                       create_output_string)
        partial_answers = [[] for i in range(permanent_query_nb + temporary_query_nb)]
        partial_answers_index = [[] for i in range(permanent_query_nb + temporary_query_nb)]
        complete_answers = [[] for i in range(permanent_query_nb)]
        # list of pairs (index of query in group, group of query)
        partial_results_dict = {}
        # TODO try to erase!!!
        child_queries = [all_query_indice[0] for all_query_indice in all_query_indices]
@ -221,8 +149,6 @@ class Tree(object):
        all_new_partial_answers = [[] for query_part in child_queries_flatten]
        # if filters['caching']:
        # erase duplicate queries
        child_queries_flatten_dedup = []
        child_queries_flatten_dedup_indices = []
        for query_part in child_queries_flatten:
@ -237,7 +163,6 @@ class Tree(object):
        # ask children all queries/partial queries
        for child in children:
            # obtain children results
            # if filters['caching']:
            new_partial_answers_dedup, new_complete_answers = child.get_subtrees(permanent_query_trees, child_queries_flatten_dedup,
                                                                              create_output_string, filters)
@ -247,32 +172,10 @@ class Tree(object):
            for i, flattened_index in enumerate(child_queries_flatten_dedup_indices):
                all_new_partial_answers[i].append(new_partial_answers_dedup[flattened_index])
            # else:
            #     new_partial_answers_architecture, new_partial_answers, new_complete_answers = child.get_subtrees(
            #         permanent_query_trees, child_queries_flatten,
            #         create_output_string, filters)
            #
            #     assert len(new_partial_answers) == len(child_queries_flatten)
            #
            #     for i, new_partial_subtree in enumerate(new_partial_answers):
            #         all_new_partial_answers[i].append(new_partial_subtree)
            #         all_new_partial_answers_architecture[i].append(new_partial_answers_architecture[i])
            #         # if len(new_partial_answers_architecture[i]) > 1:
            #         #     print('HERE!!!')
            #         all_new_partial_answers_deprel[i].append(create_output_string_deprel(child))
            # add 6 queries from 3 split up
            # self.group_results(new_partial_subtrees, child_queries_metadata, all_query_indices,
            #                    partial_results_dict, partial_subtrees)
            for i in range(len(new_complete_answers)):
                # TODO add order rearagement (TO KEY)
                complete_answers[i].extend(new_complete_answers[i])
        # if create_output_string_lemma(self) == 'drama':
        #     print('HERE!@@!')
        # if create_output_string_form(self) == 'vpiti':
        #     print('HERE!@@!')
        # merge answers in appropriate way
        i = 0
        # iterate over all answers per queries
@ -280,60 +183,14 @@ class Tree(object):
            # iterate over answers of query
            # TODO ERROR IN HERE!
            partial_answers[answer_i] = self.create_answers(all_new_partial_answers[i:i + answer_length], answer_length, filters)
            # while i < answers_length:
            #     self.create_grouped_answers()
            #     i += 1
            i += answer_length
            # merged_results = []
            # for old_result in old_results:
            #     for new_result in new_results:
            #         merged_results.append(old_result + new_result)
            # return merged_results
        # children_queries_generator = self.generate_children_queries(all_query_indices, children)
        #
        # child_index = 0
        # child, child_queries, child_queries_metadata = next(children_queries_generator)
        # while child:
        #     # obtain children results
        #     new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string)
        #
        #     self.group_results(new_partial_subtrees, child_queries_metadata, all_query_indices,
        #                        partial_results_dict, partial_subtrees)
        #
        #     for i in range(len(new_completed_subtrees)):
        #         completed_subtrees[i].extend(new_completed_subtrees[i])
        #     child, child_queries, child_queries_metadata = children_queries_generator.send(partial_results_dict)
        #     child_index += 1
        return partial_answers, complete_answers
    def order_dependent_queries(self, active_permanent_query_trees, active_temporary_query_trees, partial_subtrees,
                                create_output_string, merged_partial_subtrees, i_query, i_answer, filters):
        # string_output = ''
        # if create_output_string_form(self) == 'vožnji':
        #     print('HERE!@@!')
        # if create_output_string_form(self) == 'začelo':
        #     print('HERE!@@!')
        node = ResultNode(self, self.index, create_output_string)
        # TEST = ResultTree(node, [], filters)
        # a = TEST.create_key()
        # if i_query < len(active_permanent_query_trees):
        #     if 'children' in active_permanent_query_trees[i_query]:
        #         merged_partial_subtrees.append(
        #             self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters))
        #         i_answer += 1
        #     else:
        #         merged_partial_subtrees.append([Result(self, self.index, create_output_string)])
        # else:
        #     if 'children' in active_temporary_query_trees[i_query - len(active_permanent_query_trees)]:
        #         merged_partial_subtrees.append(
        #             self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters))
        #         i_answer += 1
        #     else:
        #         merged_partial_subtrees.append([Result(self, self.index, create_output_string)])
        if i_query < len(active_permanent_query_trees):
            if 'children' in active_permanent_query_trees[i_query]:
                merged_partial_subtrees.append(
@ -366,9 +223,6 @@ class Tree(object):
        :param temporary_query_trees:
        """
        # if create_output_string_form(self) == 'vožnji':
        #     print('HERE!@@!')
        # list of all children queries grouped by parent queries
        all_query_indices = []
@ -384,7 +238,6 @@ class Tree(object):
        successful_temporary_queries = []
        for i, temporary_query_tree in enumerate(temporary_query_trees):
            if self.fits_static_requirements(temporary_query_tree, filters) and self.fits_temporary_requirements(filters):
                # if 'l_children' in temporary_query_tree and 'r_children' in temporary_query_tree:
                active_temporary_query_trees.append(temporary_query_tree)
                successful_temporary_queries.append(i)
                if 'children' in temporary_query_tree:
@ -397,7 +250,6 @@ class Tree(object):
                                                                                                      create_output_string, filters)
        merged_partial_answers = []
        # merged_partial_answers_architecture = []
        i_question = 0
        # i_child is necessary, because some queries may be answered at the beginning and were not passed to children.
        # i_child is used to point where we are inside answers
@ -414,82 +266,30 @@ class Tree(object):
            # TODO FINALIZE RESULT
            # erase first and last braclets when adding new query result
            add_subtree = [subtree.finalize_result() for subtree in merged_partial_answers[i]]
            # if 0 < len(active_permanent_query_trees):
            complete_answers[i].extend(add_subtree)
            # completed_subtrees[i].extend(merged_partial_subtrees[i])
        # answers to valid queries
        partial_answers = [[] for i in range(len(temporary_query_trees))]
        for inside_i, outside_i in enumerate(successful_temporary_queries):
            # partial_answers_architecture[outside_i] = merged_partial_answers_architecture[len(active_permanent_query_trees) + inside_i]
            partial_answers[outside_i] = merged_partial_answers[
                len(active_permanent_query_trees) + inside_i]
        # return subtrees_architecture, subtrees, completed_subtrees
        return partial_answers, complete_answers
        # return merged_partial_subtrees_architecture[len(active_permanent_query_trees):], merged_partial_subtrees[len(active_permanent_query_trees):], completed_subtrees
    # @staticmethod
    # def merge_results(left_parts, right_parts, separator, left=True, right_part_free=False):
    #     if not left_parts:
    #         # return all right_parts
    #         return [r_p.add_separator(separator, left) for r_p in right_parts]
    #         # if left:
    #         #     return [r_p + separator for r_p in right_parts]
    #         #     # return [r_p.add_separator(separator, left) for r_p in right_parts]
    #         # else:
    #         #     return [separator + r_p for r_p in right_parts]
    #
    #     if not right_parts:
    #         return [l_p.add_separator(separator, False) for l_p in left_parts]
    #         # return [separator + l_p for l_p in left_parts]
    #     merged_results = []
    #     for left_part in left_parts:
    #         if right_part_free:
    #             for right_part in right_parts[1]:
    #                 merged_results.append((right_parts[0], left_part.merge_results(right_part, separator, left)))
    #         else:
    #             for right_part in right_parts:
    #                 merged_results.append(left_part.merge_results(right_part, separator, left))
    #             # merged_results.append(left_part.merge_results(right_part, separator))
    #             # if separator:
    #             #     if left:
    #             #         merged_results.append(left_part + right_part + separator)
    #             #     else:
    #             #         merged_results.append(left_part + separator + right_part)
    #             # else:
    #             #     merged_results.append(left_part + right_part)
    #     return merged_results
    @staticmethod
    def create_children_groups(left_parts, right_parts):
        if not left_parts:
            # return all right_parts
            return right_parts
            # if left:
            #     return [r_p + separator for r_p in right_parts]
            #     # return [r_p.add_separator(separator, left) for r_p in right_parts]
            # else:
            #     return [separator + r_p for r_p in right_parts]
        if not right_parts:
            return left_parts
-            # return [separator + l_p for l_p in left_parts]
+
        all_children_group_possibilities = []
        for left_part in left_parts:
            for right_part in right_parts:
                new_part = copy(left_part)
                # new_part.reset_params()
                new_part.extend(right_part)
                all_children_group_possibilities.append(new_part)
                # merged_results.append(left_part.merge_results(right_part, separator))
                # if separator:
                #     if left:
                #         merged_results.append(left_part + right_part + separator)
                #     else:
                #         merged_results.append(left_part + separator + right_part)
                # else:
                #     merged_results.append(left_part + right_part)
        return all_children_group_possibilities
    @staticmethod
@ -500,112 +300,15 @@ class Tree(object):
            for answer2p_i, new_result in enumerate(answer2):
                if answer1p_i != answer2p_i:
                    new_indices = [answer1p_i] + [answer2p_i]
-                    sorted_indices = sorted(new_indices)
+                    # TODO add comparison answers with different indices if equal than ignore
                    if sorted_indices in merged_indices:
                        test = merged_indices.index(sorted(new_indices))
                        # TODO add comparison answers with different indices if equal than ignore
                    merged_results.append(old_result + new_result)
                    merged_indices.append(new_indices)
        return merged_results, merged_indices
    # def merge_results2(self, child, new_results, filters):
    #     if create_output_string_form(self) == 'začelo':
    #         print('HERE!@@!')
    #     if create_output_string_form(self) == 'Dogodek':
    #         print('HERE!@@!')
    #     if create_output_string_form(self) == 'utišal':
    #         print('HERE!@@!')
    #     if create_output_string_form(self) == 'prijel':
    #         print('HERE!@@!')
    #     if filters['node_order']:
    #         new_child = child
    #         # new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key)
    #     else:
    #         new_child = sorted(child, key=lambda x: x[0].key)
    #
    #     l_res = []
    #     r_res = []
    #     results = []
    #     for i_answer, answer in enumerate(new_child):
    #         if filters['node_order'] and answer[0].order[0] < self.index:
    #         # if filters['node_order'] and indices[i_child][i_answer] < self.children_split:
    #             if filters['dependency_type']:
    #                 # separator = ' <' + deprel[i_child][i_answer] + ' '
    #                 separator = ' <' + answer[0].deprel + ' '
    #             else:
    #                 separator = ' < '
    #             l_res = self.merge_results(l_res, answer, separator, left=True)
    #             # l_res += answer + separator
    #         else:
    #             if filters['dependency_type']:
    #                 separator = ' >' + answer[0].deprel + ' '
    #             else:
    #                 separator = ' > '
    #             r_res = self.merge_results(r_res, answer, separator, left=False)
    #             # r_res += separator + answer
    #
    #     # if filters['node_order']:
    #     #     r_res_sorted = []
    #     #     for i_answer, answer in new_child_sorted:
    #     #         if filters['dependency_type']:
    #     #             separator = ' >' + answer[0].deprel + ' '
    #     #         else:
    #     #             separator = ' > '
    #     #         r_res_sorted = (i_answer, self.merge_results(r_res_sorted, answer, separator, left=False))
    #     #
    #     #
    #     #     r_res_sorted_combined = self.merge_results(new_results, r_res_sorted, None, right_part_free=True)
    #     #     # print('here')
    #
    #     if l_res:
    #         l_res_combined = self.merge_results(l_res, new_results, None)
    #         if r_res:
    #             r_res_combined = self.merge_results(l_res_combined, r_res, None)
    #             # merged_results.extend(['(' + el + ')' for el in r_res_combined])
    #             result = r_res_combined
    #             # results.extend([el.put_in_bracelets() for el in r_res_combined])
    #         else:
    #             result = l_res_combined
    #             # results.extend([el.put_in_bracelets() for el in l_res_combined])
    #     elif r_res:
    #         r_res_combined = self.merge_results(new_results, r_res, None)
    #         result = r_res_combined
    #         # results.extend([el.put_in_bracelets() for el in r_res_combined])
    #     else:
    #         result = []
    #
    #
    #     results.extend([el.put_in_bracelets() for el in result])
    #
    #     return results
    # def create_merged_results(self, answers, separators, separator_switch):
    #     new_answers = []
    #     for answer_i, answer in enumerate(answers):
    #         new_answer = copy(answer[0])
    #         print(create_output_string_form(self))
    #         for answer_part_i, answer_part in enumerate(answer[1:]):
    #             new_answer.extend_answer(answer_part, separators[answer_part_i])
    #         new_answer.put_in_bracelets(inplace=True)
    #         new_answers.append(new_answer)
    #     return new_answers
    # def create_merged_results(self, new_child, new_answers, i_child, indices, deprel, filters):
    def merge_results3(self, child, new_results, filters):
        # if create_output_string_form(self) == 'Dogodek':
        #     print('HERE!@@!')
        # if create_output_string_form(self) == 'začelo':
        #     print('HERE!@@!')
        # if create_output_string_form(self) == 'utišal':
        #     print('HERE!@@!')
        # if create_output_string_form(self) == 'prijel':
        #     print('HERE!@@!')
        if filters['node_order']:
            new_child = child
-            # new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key)
+
            # new_child_sorted = sorted(child, key=lambda x: x[0].get_key())
        else:
            new_child = sorted(child, key=lambda x: x[0].get_key())
@ -613,58 +316,28 @@ class Tree(object):
        for i_answer, answer in enumerate(new_child):
            children_groups = self.create_children_groups(children_groups, [[answer_part] for answer_part in answer])
                # r_res += separator + answer
        # children_groups_sorted = []
        # for i_answer, answer in enumerate(new_child_sorted):
        #     children_groups_sorted = self.create_children_groups(children_groups_sorted, [[answer_part] for answer_part in answer])
        #
        #
        # results_sorted = {}
        # for result in new_results:
        #     for children in children_groups_sorted:
        #         new_result = copy(result)
        #         new_result.set_children(children)
        #         order = tuple(sorted(new_result.get_order()))
        #         results_sorted[order] = new_result
        results = []
        for result in new_results:
            for children in children_groups:
                new_result = copy(result)
                # if result.key is not None or result.order is not None or result.array is not None or result.order_key is not None:
                #     print('here')
                # new_result.reset_params()
                new_result.set_children(children)
                # order = tuple(sorted(new_result.get_order()))
                results.append(new_result)
        return results
    def create_output_children(self, children, new_results, filters):
        # if create_output_string_form(self) == 'Dogodek':
        #     print('HERE!@@!')
        # if create_output_string_form(self) == 'utišal':
        #     print('HERE!@@!')
        # if len(new_results) > 1:
        #     print('HERE')
        merged_results = []
        for i_child, child in enumerate(children):
            # merged_results.extend(self.merge_results2(child, new_results, filters))
            merged_results.extend(self.merge_results3(child, new_results, filters))
        return merged_results
    # @staticmethod
    def create_answers(self, separated_answers, answer_length, filters):
        partly_built_trees = [[None] * answer_length]
        partly_built_trees_architecture_indices = [[None] * answer_length]
        built_trees = []
        built_trees_architecture_indices = []
        # if create_output_string_form(self) == 'Dogodek':
        #     print('HERE!@@!')
        # iterate over children first, so that new partly built trees are added only after all results of specific
        # child are added
        for child_i in range(len(separated_answers[0])):
@ -712,10 +385,7 @@ class Tree(object):
                for unique_tree in unique_trees_architecture:
                    already_in = True
                    for part_i in range(len(unique_tree)):
                        # test = unique_tree[part_i][0].get_order_key()
                        if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].get_order_key() != new_tree[part_i][i_unique_part].get_order_key() for i_unique_part in range(len(unique_tree[part_i]))):
                        # if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].order_key != new_tree[part_i][i_unique_part].order_key for i_unique_part in range(len(unique_tree[part_i]))):
                        # if unique_tree[part_i].order_key != new_tree[part_i].order_key:
                            already_in = False
                            break
                    if already_in:
@ -724,20 +394,5 @@ class Tree(object):
                if is_unique:
                    unique_trees_architecture.append(new_tree)
                    # if not filters['node_order']:
                    # l_ordered_built_trees_architecture.append(new_tree_architecture)
                    l_ordered_built_trees.append(new_tree)
                    # TODO NODE ORDER = FALSE
                    # else:
                    #
                    #     ordered_built_trees_architecture.append(tree_architecture)
                    #     ordered_built_trees.append(tree)
                # print("test")
        # for answer1_i, answer1 in enumerate(separated_answers):
        #     for answer2_i, answer2 in enumerate(separated_answers):
        #         if answer1_i != answer2_i:
        #             res, res_i = self.merge_answer(answer1, answer2, answer1_i, answer2_i)
        #             print('aaa')
        #
        # pass
        return l_ordered_built_trees
--- a/dependency-parsetree.py
+++ b/dependency-parsetree.py
@ -23,11 +23,13 @@ import pickle
 import re
 import string
 import time
 import timeit
 from multiprocessing import Pool
 from pathlib import Path
 import gzip
 import sys
 import pyconll
 from Tree import Tree
 from generic import get_collocabilities, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos, create_output_string_feats
 sys.setrecursionlimit(25000)
 def save_zipped_pickle(obj, filename, protocol=-1):
@ -39,31 +41,6 @@ def load_zipped_pickle(filename):
        loaded_object = pickle.load(f)
        return loaded_object
 import pyconll
 from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos, create_output_string_feats
 # for separate searches of feats
 # feats_detailed_list = [
 #     # lexical features
 #     'PronType', 'NumType', 'Poss', 'Reflex', 'Foreign', 'Abbr',
 #
 #     # Inflectional features (nominal)
 #     'Gender', 'Animacy', 'NounClass', 'Number', 'Case', 'Definite', 'Degree',
 #
 #     # Inflectional features (verbal)
 #     'VerbForm', 'Mood', 'Tense', 'Aspect', 'Voice', 'Evident', 'Polarity', 'Person', 'Polite', 'Clusivity',
 #
 #     # Other
 #     'Variant', 'Number[psor]', 'Gender[psor]', 'NumForm'
 # ]
 # feats_detailed_list = []
 # feats_detailed_dict = {key: {} for key in feats_detailed_list}
 from generic import get_collocabilities
 def decode_query(orig_query, dependency_type, feats_detailed_list):
    new_query = False
@ -72,7 +49,6 @@ def decode_query(orig_query, dependency_type, feats_detailed_list):
        new_query = True
        orig_query = orig_query[1:-1]
    # if orig_query is '_' return {}
    if dependency_type != '':
        decoded_query = {'deprel': dependency_type}
    else:
@ -88,19 +64,14 @@ def decode_query(orig_query, dependency_type, feats_detailed_list):
            if len(orig_query_split) > 1:
                if orig_query_split[0] == 'L':
                    decoded_query['lemma'] = orig_query_split[1]
                    # return decoded_query
                elif orig_query_split[0] == 'upos':
                    decoded_query['upos'] = orig_query_split[1]
                    # return decoded_query
                elif orig_query_split[0] == 'xpos':
                    decoded_query['xpos'] = orig_query_split[1]
                    # return decoded_query
                elif orig_query_split[0] == 'form':
                    decoded_query['form'] = orig_query_split[1]
                    # return decoded_query
                elif orig_query_split[0] == 'feats':
                    decoded_query['feats'] = orig_query_split[1]
                    # return decoded_query
                elif orig_query_split[0] in feats_detailed_list:
                    decoded_query['feats_detailed'] = {}
                    decoded_query['feats_detailed'][orig_query_split[0]] = orig_query_split[1]
@ -111,18 +82,11 @@ def decode_query(orig_query, dependency_type, feats_detailed_list):
                    print('???')
            elif not new_query:
                decoded_query['form'] = orig_query_split_part
                # return decoded_query
        return decoded_query
    # split over spaces if not inside braces
    # PATTERN = re.compile(r'''((?:[^ ()]|\([^.]*\))+)''')
    # all_orders = PATTERN.split(orig_query)
    # PATTERN = re.compile(r"(?:[^ ()]|\([^.]*\))+")
    # all_orders = re.findall(r"(?:[^ ()]|\([^]*\))+", orig_query)
    all_orders = re.split(r"\s+(?=[^()]*(?:\(|$))", orig_query)
    # all_orders = orig_query.split()
    node_actions = all_orders[::2]
    priority_actions = all_orders[1::2]
    priority_actions_beginnings = [a[0] for a in priority_actions]
@ -148,8 +112,6 @@ def decode_query(orig_query, dependency_type, feats_detailed_list):
 def create_trees(input_path, internal_saves, feats_detailed_dict={}, save=True):
    # internal_saves = filters['internal_saves']
    # input_path = filters['input']
    hash_object = hashlib.sha1(input_path.encode('utf-8'))
    hex_dig = hash_object.hexdigest()
    trees_read_outputfile = os.path.join(internal_saves, hex_dig)
@ -165,13 +127,8 @@ def create_trees(input_path, internal_saves, feats_detailed_dict={}, save=True):
        for sentence in train:
            root = None
            root_id = None
            token_nodes = []
            for token in sentence:
                # token_feats = ''
                # for k, v in token.feats.items():
                #     token_feats += k + next(iter(v)) + '|'
                # token_feats = token_feats[:-1]
                if not token.id.isdigit():
                    continue
@ -194,12 +151,6 @@ def create_trees(input_path, internal_saves, feats_detailed_dict={}, save=True):
                    token.set_parent(None)
                else:
                    parent_id = int(token.parent) - 1
                    # if token_id < parent_id:
                    #     token_nodes[parent_id].add_l_child(token)
                    # elif token_id > parent_id:
                    #     token_nodes[parent_id].add_r_child(token)
                    # else:
                    #     raise Exception('Root element should not be here!')
                    if token_nodes[parent_id].children_split == -1 and token_id > parent_id:
                        token_nodes[parent_id].children_split = len(token_nodes[parent_id].children)
                    token_nodes[parent_id].add_child(token)
@ -210,35 +161,19 @@ def create_trees(input_path, internal_saves, feats_detailed_dict={}, save=True):
                    token.children_split = len(token.children)
            if root == None:
                # print(input_path)
                print('No root: ' + sentence.id)
                continue
                # raise Exception('No root element in sentence!')
            all_trees.append(root)
        if save:
            save_zipped_pickle((all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict), trees_read_outputfile, protocol=2)
        # with open(trees_read_outputfile, 'wb') as output:
        #
        #     pickle.dump((all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict), output)
    else:
        print('Reading trees:')
        print('Completed')
        all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict = load_zipped_pickle(trees_read_outputfile)
        # with open(trees_read_outputfile, 'rb') as pkl_file:
        #     (all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict) = pickle.load(pkl_file)
    return all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict
 # def order_independent_queries(query_tree):
 #     all_children = query_tree['l_children'] + query_tree['r_children']
 #     if all_children > 0:
 #
 #     else:
 #         return query_tree
 #     pass
 def printable_answers(query):
    # all_orders = re.findall(r"(?:[^ ()]|\([^]*\))+", query)
    all_orders = re.split(r"\s+(?=[^()]*(?:\(|$))", query)
@ -293,11 +228,6 @@ def tree_calculations_chunks(input_data):
    return result_dict
 def chunkify(a, n):
    k, m = divmod(len(a), n)
    return (a[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n))
 def add_node(tree):
    if 'children' in tree:
        tree['children'].append({})
@ -362,30 +292,11 @@ def create_ngrams_query_trees(n, trees):
                    new_trees.append(new_tree)
        trees = new_trees
        # delete_duplicates(trees)
        # print('here')
    # tree_grow(tree)
    # tree_grow(tree)
    # tree['children'] = [{}]
    return trees
 def count_trees(cpu_cores, all_trees, query_tree, create_output_string_functs, filters, unigrams_dict, result_dict):
    with Pool(cpu_cores) as p:
        # 1.25 s (16 cores)
        # chunked_trees = list(chunkify(all_trees, cpu_cores))
        # if cpu_cores > 1:
        #     part_results = p.map(tree_calculations_chunks,
        #                          [(tree, query_tree, create_output_string_funct, filters) for tree in chunked_trees])
        #
        #     for part_result in part_results:
        #         for r_k, r_v in part_result.items():
        #             if r_k in result_dict:
        #                 result_dict[r_k] += r_v
        #             else:
        #                 result_dict[r_k] = r_v
        # 1.02 s (16 cores)
        if cpu_cores > 1:
            # input_data = (tree, query_tree, create_output_string_functs, filters)
            all_unigrams = p.map(get_unigrams, [(tree, query_tree, create_output_string_functs, filters) for tree in all_trees])
            for unigrams in all_unigrams:
                for unigram in unigrams:
@ -396,24 +307,14 @@ def count_trees(cpu_cores, all_trees, query_tree, create_output_string_functs, f
            all_subtrees = p.map(tree_calculations, [(tree, query_tree, create_output_string_functs, filters) for tree in all_trees])
            # for subtrees in all_subtrees:
            for tree_i, subtrees in enumerate(all_subtrees):
                for query_results in subtrees:
                    for r in query_results:
                        # if r.key == '(ne <advmod more >xcomp (se <expl izogniti) >punct .)':
                        #     print('HERE')
                        #     print(tree_i)
                        if filters['node_order']:
                            key = r.get_key() + r.order
                        else:
                            key = r.get_key()
                        # if r == '(" < , < je < velik) < tem':
                        #     print(tree_i)
                        # if r in result_dict:
                        #     result_dict[r] += 1
                        # else:
                        #     result_dict[r] = 1
                        if key in result_dict:
                            result_dict[key]['number'] += 1
                        else:
@ -421,11 +322,7 @@ def count_trees(cpu_cores, all_trees, query_tree, create_output_string_functs, f
        # 3.65 s (1 core)
        else:
            # for tree_i, tree in enumerate(all_trees[-5:]):
            for tree_i, tree in enumerate(all_trees):
            # for tree_i, tree in enumerate(all_trees[852:]):
            # for tree_i, tree in enumerate(all_trees[1689:]):
            # for tree_i, tree in enumerate(all_trees[1:3]):
                input_data = (tree, query_tree, create_output_string_functs, filters)
                if filters['association_measures']:
                    unigrams = get_unigrams(input_data)
@ -434,10 +331,7 @@ def count_trees(cpu_cores, all_trees, query_tree, create_output_string_functs, f
                            unigrams_dict[unigram] += 1
                        else:
                            unigrams_dict[unigram] = 1
-            # for tree_i, tree in enumerate(all_trees[1:]):
+
            # text = Če pa ostane odrasel otrok doma, se starši le težko sprijaznijo s tem, da je "velik", otrok pa ima ves čas občutek, da se njegovi starši po nepotrebnem vtikajo v njegovo življenje.
            # for tree_i, tree in enumerate(all_trees[5170:]):
            # for tree in all_trees:
                subtrees = tree_calculations(input_data)
                for query_results in subtrees:
                    for r in query_results:
@ -445,8 +339,6 @@ def count_trees(cpu_cores, all_trees, query_tree, create_output_string_functs, f
                            key = r.get_key() + r.order
                        else:
                            key = r.get_key()
                        # if r == '(" < , < je < velik) < tem':
                        #     print(tree_i)
                        if key in result_dict:
                            result_dict[key]['number'] += 1
                        else:
@ -465,7 +357,6 @@ def read_filters(config, feats_detailed_list):
                query_tree.extend(create_ngrams_query_trees(i, [{}]))
    else:
        query_tree = [decode_query('(' + config.get('settings', 'query') + ')', '', feats_detailed_list)]
        # order_independent_queries(query_tree)
    # set filters
    node_types = config.get('settings', 'node_type').split('+')
@ -506,11 +397,8 @@ def read_filters(config, feats_detailed_list):
            attribute_dict = {}
            for attribute in option.split('&'):
                value = attribute.split('=')
                # assert value[0] in ['deprel', 'lemma', 'upos', 'xpos', 'form',
                #                     'feats'], '"root_whitelist" is not set up correctly'
                attribute_dict[value[0]] = value[1]
            filters['root_whitelist'].append(attribute_dict)
        # filters['root_whitelist'] = [{'upos': 'NOUN', 'Case': 'Nom'}, {'upos': 'ADJ', 'Degree': 'Sup'}]
    else:
        filters['root_whitelist'] = []
@ -540,12 +428,6 @@ def main():
    internal_saves = config.get('settings', 'internal_saves')
    input_path = config.get('settings', 'input')
    # a = args.config_file
    # config.read('config.ini')
    # create queries
    if os.path.isdir(input_path):
        checkpoint_path = Path(internal_saves, 'checkpoint.pkl')
@ -572,9 +454,6 @@ def main():
            for path in sorted(pathlist):
                # because path is object not string
                path_str = str(path)
                # if Path(path_str).name == 'GF0003946-dedup.conllu':
                #     break
                # print(path_in_str)
                (all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, sub_corpus_size,
                 feats_detailed_list) = create_trees(path_str, internal_saves, feats_detailed_dict=feats_detailed_list, save=False)
@ -593,7 +472,6 @@ def main():
            # 15.26
            print("Execution time:")
            print("--- %s seconds ---" % (time.time() - start_exe_time))
            # print(1 + 'asd')
            save_zipped_pickle(
                (already_processed, result_dict, unigrams_dict, corpus_size, feats_detailed_list),
                checkpoint_path, protocol=2)
@ -620,26 +498,6 @@ def main():
        print("Execution time:")
        print("--- %s seconds ---" % (time.time() - start_exe_time))
            # test 1 layer queries
            # # tree.r_children = []
            # # tree.children[1].children = []
            # # query = [{'children': [{}]}, {'children': [{}]}]
            # # query = [{"children": [{}, {}]}, {"children": [{}]}, {"children": [{}, {}, {}]}]
            # query = [{"children": [{'form': 'je'}, {}]}, {"children": [{'form': 'je'}]}, {"children": [{'form': 'je'}, {}, {}]}]
            # # query = [{'q1':'', "children": [{'a1':''}, {'a2':''}]}, {'q2':'', "children": [{'b1':''}]}, {'q3':'', "children": [{'c1':''}, {'c2':''}, {'c3':''}]}]
            # _, _, subtrees = tree.get_subtrees(query, [], create_output_string_funct)
            # # _, subtrees = tree.get_subtrees([{'q1':'', "children": [{'a1':''}, {'a2':''}], "children": []}, {'q2':'', "children": [{'b1':''}], "children": []}, {'q3':'', "children": [{'c1':''}, {'c2':''}, {'c3':''}], "children": []}], [])
            # print('HERE!')
            # test 2 layer queries
            # tree.r_children = [Tree('je', '', '', '', '', form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, None)]
            # tree.l_children[1].l_children = []
            # new_tree = Tree('bil', '', '', '', '', form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, None)
            # new_tree.l_children = [tree]
            # _, subtrees = new_tree.get_subtrees(
            #     [{"l_children":[{"l_children": [{'a1': ''}, {'a2': ''}, {'a3': ''}, {'a4': ''}]}]}], [])
            # # _, subtrees = new_tree.get_subtrees(
            # #     [{"l_children":[{"l_children": [{'a1': ''}, {'a2': ''}, {'a3': ''}, {'a4': ''}], "r_children": []}],  "r_children": []}], [])
    sorted_list = sorted(result_dict.items(), key=lambda x: x[1]['number'], reverse=True)
    with open(config.get('settings', 'output'), "w", newline="") as f:
@ -660,7 +518,6 @@ def main():
            header += ['Root node']
        if filters['association_measures']:
            header += ['MI', 'MI3', 'Dice', 'logDice', 't-score', 'simple-LL']
        # header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Absolute frequency']
        writer.writerow(header)
        if filters['lines_threshold']:
@ -673,7 +530,6 @@ def main():
            if filters['frequency_threshold'] and filters['frequency_threshold'] > v['number']:
                break
            words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))]
            # words_only = printable_answers(k)
            row = [v['object'].get_key()[1:-1]] + words_only + [str(v['number'])]
            row += ['%.4f' % relative_frequency]
            if filters['node_order']:
--- a/generic.py
+++ b/generic.py
@ -45,7 +45,6 @@ def generate_key(node, create_output_strings, print_lemma=True):
    if len(array[0]) > 1:
        key = '&'.join(key_array[0])
    else:
        # output_string = create_output_strings[0](node)
        key = key_array[0][0]
    return array, key
@ -61,7 +60,6 @@ def generate_name(node, create_output_strings, print_lemma=True):
    if len(array) > 1:
        name = '&'.join(name_array)
    else:
        # output_string = create_output_strings[0](node)
        name = name_array[0]
    return array, name
@ -74,7 +72,6 @@ def get_collocabilities(ngram, unigrams_dict, corpus_size):
        if len(key_array) > 1:
            key = '&'.join(key_array)
        else:
            # output_string = create_output_strings[0](node)
            key = key_array[0]
        sum_fwi += unigrams_dict[key]
        mul_fwi *= unigrams_dict[key]