Speed improvements

This commit is contained in:
Luka 2019-12-15 10:21:20 +01:00
parent 41f87fc8bd
commit c6eaf12753
3 changed files with 275 additions and 261 deletions

View File

@ -7,29 +7,13 @@ from generic import create_output_string_form, create_output_string_deprel, crea
class ResultTree(object):
def __init__(self, node, children, filters):
# self.array = [[create_output_string(node) for create_output_string in create_output_strings]]
# if create_output_string_lemma in create_output_strings:
# key_array = [[create_output_string(node) if create_output_string != create_output_string_lemma else 'L=' + create_output_string(node) for create_output_string in create_output_strings]]
# else:
# key_array = self.array
# if len(self.array[0]) > 1:
# self.key = '&'.join(key_array[0])
# else:
# # output_string = create_output_strings[0](node)
# self.key = key_array[0][0]
self.node = node
# order with original numbers in sentences
# self.order = str([architecture_order])
# order with numbers from 0 to n of n-gram
# self.root = ''
# self.final_order = ''
# self.separators = separators
self.children = children
self.filters = filters
self.key = None
self.order_key = None
self.order = None
self.array = None
def __repr__(self):
return self.get_key()
@ -37,18 +21,21 @@ class ResultTree(object):
def set_children(self, children):
self.children = children
def get_key(self, get_free=False):
# if self.key:
# return self.key
def reset_params(self):
self.key = None
self.order_key = None
self.order = None
self.array = None
def get_key(self):
if self.key:
return self.key
key = ''
write_self_node_to_result = False
if self.children:
if get_free:
children = sorted(self.children, key=lambda x: x.node.name)
else:
children = self.children
children = self.children
for child in children:
if self.filters['node_order'] and child.node.location < self.node.location and not get_free:
if self.filters['node_order'] and child.node.location < self.node.location:
if self.filters['dependency_type']:
# separator = ' <' + deprel[i_child][i_answer] + ' '
separator = ' <' + child.node.deprel + ' '
@ -72,9 +59,31 @@ class ResultTree(object):
self.key = self.node.name
return self.key
def get_key_sorted(self):
key = ''
write_self_node_to_result = False
if self.children:
children = sorted(self.children, key=lambda x: x.node.name)
for child in children:
if not write_self_node_to_result:
write_self_node_to_result = True
key += self.node.name
if self.filters['dependency_type']:
separator = ' >' + child.node.deprel + ' '
else:
separator = ' > '
key += separator + child.get_key_sorted()
if not write_self_node_to_result:
key += self.node.name
key = '(' + key + ')'
else:
key = self.node.name
return key
def get_order_key(self):
# if self.order_key:
# return self.order_key
if self.order_key:
return self.order_key
order_key = ''
write_self_node_to_result = False
if self.children:
@ -103,8 +112,8 @@ class ResultTree(object):
return self.order_key
def get_order(self):
# if self.order_key:
# return self.order_key
if self.order:
return self.order
order = []
write_self_node_to_result = False
if self.children:
@ -125,8 +134,8 @@ class ResultTree(object):
return self.order
def get_array(self):
# if self.order_key:
# return self.order_key
if self.array:
return self.array
array = []
write_self_node_to_result = False
if self.children:
@ -164,73 +173,74 @@ class ResultTree(object):
# self.key += ' ' + separator + ' ' + string
# self.order_key += ' ' + separator + ' ' + architecture_order
def add_separator(self, separator, left=True):
self_copy = copy.copy(self)
if left:
self_copy.separators += [separator]
self_copy.key += separator
self_copy.order_key += separator
else:
self_copy.separators = [separator] + self_copy.separators
self_copy.key = separator + self_copy.key
self_copy.order_key = separator + self_copy.order_key
return self_copy
# def add_separator(self, separator, left=True):
# self_copy = copy.copy(self)
# if left:
# self_copy.separators += [separator]
# self_copy.key += separator
# self_copy.order_key += separator
# else:
# self_copy.separators = [separator] + self_copy.separators
# self_copy.key = separator + self_copy.key
# self_copy.order_key = separator + self_copy.order_key
# return self_copy
# def merge_results2(self):
def merge_results(self, right_t, separator, left=True):
left_tree = copy.copy(self)
right_tree = copy.copy(right_t)
# def merge_results(self, right_t, separator, left=True):
# left_tree = copy.copy(self)
# right_tree = copy.copy(right_t)
#
# if separator:
# if left:
# # merged_results.append(left_part + right_part + separator)
# left_tree.key = left_tree.key + right_tree.key + separator
# left_tree.order_key = left_tree.order_key + right_tree.order_key + separator
# left_tree.array = left_tree.array + right_tree.array
# left_tree.order = left_tree.order + right_tree.order
# # left_tree.order = str([architecture_order])
# left_tree.separators = left_tree.separators + right_tree.separators + [separator]
# else:
# # merged_results.append(left_part + separator + right_part)
# left_tree.key = left_tree.key + separator + right_tree.key
# left_tree.order_key = left_tree.order_key + separator + right_tree.order_key
# left_tree.array = left_tree.array + right_tree.array
# left_tree.order = left_tree.order + right_tree.order
# # left_tree.order = str([architecture_order])
# left_tree.separators = left_tree.separators + [separator] + right_tree.separators
# else:
# # merged_results.append(left_part + right_part)
# left_tree.key = left_tree.key + right_tree.key
# left_tree.order_key = left_tree.order_key + right_tree.order_key
# left_tree.array = left_tree.array + right_tree.array
# left_tree.order = left_tree.order + right_tree.order
# # left_tree.order = str([architecture_order])
# left_tree.separators = left_tree.separators + right_tree.separators
#
# return left_tree
if separator:
if left:
# merged_results.append(left_part + right_part + separator)
left_tree.key = left_tree.key + right_tree.key + separator
left_tree.order_key = left_tree.order_key + right_tree.order_key + separator
left_tree.array = left_tree.array + right_tree.array
left_tree.order = left_tree.order + right_tree.order
# left_tree.order = str([architecture_order])
left_tree.separators = left_tree.separators + right_tree.separators + [separator]
else:
# merged_results.append(left_part + separator + right_part)
left_tree.key = left_tree.key + separator + right_tree.key
left_tree.order_key = left_tree.order_key + separator + right_tree.order_key
left_tree.array = left_tree.array + right_tree.array
left_tree.order = left_tree.order + right_tree.order
# left_tree.order = str([architecture_order])
left_tree.separators = left_tree.separators + [separator] + right_tree.separators
else:
# merged_results.append(left_part + right_part)
left_tree.key = left_tree.key + right_tree.key
left_tree.order_key = left_tree.order_key + right_tree.order_key
left_tree.array = left_tree.array + right_tree.array
left_tree.order = left_tree.order + right_tree.order
# left_tree.order = str([architecture_order])
left_tree.separators = left_tree.separators + right_tree.separators
# def extend_answer(self, other_answer, separator):
# self.array.extend(other_answer.array)
# self.order.extend(other_answer.order)
# self.key += separator + other_answer.key
# self.order_key += separator + other_answer.order_key
# self.separators.extend(separator)
return left_tree
def extend_answer(self, other_answer, separator):
self.array.extend(other_answer.array)
self.order.extend(other_answer.order)
self.key += separator + other_answer.key
self.order_key += separator + other_answer.order_key
self.separators.extend(separator)
def put_in_bracelets(self, inplace=False):
if inplace:
self.key = ('(' + self.key + ')')
self.order_key = ('(' + self.order_key + ')')
return
result = copy.copy(self)
result.key = ('(' + result.key + ')')
result.order_key = ('(' + result.order_key + ')')
return result
# def put_in_bracelets(self, inplace=False):
# if inplace:
# self.key = ('(' + self.key + ')')
# self.order_key = ('(' + self.order_key + ')')
# return
# result = copy.copy(self)
# result.key = ('(' + result.key + ')')
# result.order_key = ('(' + result.order_key + ')')
# return result
def finalize_result(self):
result = copy.copy(self)
result.key = result.get_key()
result.reset_params()
# result.key = result.get_key()
# result.set_root()
# create order letters

334
Tree.py
View File

@ -146,61 +146,61 @@ class Tree(object):
old_subtree.extend(new_subtree)
def get_results(self, partial_results_dict, result_index, result_part, outcome, last_result_part):
# save results for later usage
# def get_results(self, partial_results_dict, result_index, result_part, outcome, last_result_part):
# # save results for later usage
#
# # if result index already in and element 0 exists (otherwise error)
# if result_index in partial_results_dict and 0 in partial_results_dict[result_index]:
# if result_part - 1 in partial_results_dict[result_index]:
# if result_part in partial_results_dict[result_index]:
# partial_results_dict[result_index][result_part].extend(self.merge_results(partial_results_dict[result_index][result_part - 1], outcome))
# else:
# partial_results_dict[result_index][result_part] = self.merge_results(partial_results_dict[result_index][result_part - 1], outcome)
#
# # extend one word layer with output
# else:
# partial_results_dict[result_index][0].extend(outcome)
# else:
# partial_results_dict[result_index] = {0: outcome}
#
# if last_result_part - 1 in partial_results_dict[result_index]:
# return partial_results_dict[result_index].pop(last_result_part - 1)
# return []
# if result index already in and element 0 exists (otherwise error)
if result_index in partial_results_dict and 0 in partial_results_dict[result_index]:
if result_part - 1 in partial_results_dict[result_index]:
if result_part in partial_results_dict[result_index]:
partial_results_dict[result_index][result_part].extend(self.merge_results(partial_results_dict[result_index][result_part - 1], outcome))
else:
partial_results_dict[result_index][result_part] = self.merge_results(partial_results_dict[result_index][result_part - 1], outcome)
# def group_results(self, new_partial_subtrees, child_queries_metadata, all_query_indices, partial_results_dict, partial_subtrees):
# for outcome, (result_part, result_index, is_permanent) in zip(new_partial_subtrees, child_queries_metadata):
# if outcome:
# new_results = self.get_results(partial_results_dict, result_index, result_part, outcome, len(all_query_indices[result_index][0]))
# if new_results:
# self.add_subtrees(partial_subtrees[result_index], new_results)
# else:
# if not is_permanent:
# partial_subtrees[result_index].append([])
# extend one word layer with output
else:
partial_results_dict[result_index][0].extend(outcome)
else:
partial_results_dict[result_index] = {0: outcome}
if last_result_part - 1 in partial_results_dict[result_index]:
return partial_results_dict[result_index].pop(last_result_part - 1)
return []
def group_results(self, new_partial_subtrees, child_queries_metadata, all_query_indices, partial_results_dict, partial_subtrees):
for outcome, (result_part, result_index, is_permanent) in zip(new_partial_subtrees, child_queries_metadata):
if outcome:
new_results = self.get_results(partial_results_dict, result_index, result_part, outcome, len(all_query_indices[result_index][0]))
if new_results:
self.add_subtrees(partial_subtrees[result_index], new_results)
else:
if not is_permanent:
partial_subtrees[result_index].append([])
def get_all_query_indices_old(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children, create_output_string):
partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)]
completed_subtrees = [[] for i in range(completed_subtrees_size)]
# list of pairs (index of query in group, group of query)
partial_results_dict = {}
children_queries_generator = self.generate_children_queries(l_all_query_indices, children)
child_index = 0
child, child_queries, child_queries_metadata = next(children_queries_generator)
while child:
# obtain children results
new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string)
self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices,
partial_results_dict, partial_subtrees)
for i in range(len(new_completed_subtrees)):
completed_subtrees[i].extend(new_completed_subtrees[i])
child, child_queries, child_queries_metadata = children_queries_generator.send(partial_results_dict)
child_index += 1
return partial_subtrees, completed_subtrees
# def get_all_query_indices_old(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children, create_output_string):
# partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)]
# completed_subtrees = [[] for i in range(completed_subtrees_size)]
#
# # list of pairs (index of query in group, group of query)
# partial_results_dict = {}
#
# children_queries_generator = self.generate_children_queries(l_all_query_indices, children)
#
# child_index = 0
# child, child_queries, child_queries_metadata = next(children_queries_generator)
# while child:
# # obtain children results
# new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string)
#
# self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices,
# partial_results_dict, partial_subtrees)
#
# for i in range(len(new_completed_subtrees)):
# completed_subtrees[i].extend(new_completed_subtrees[i])
# child, child_queries, child_queries_metadata = children_queries_generator.send(partial_results_dict)
# child_index += 1
#
# return partial_subtrees, completed_subtrees
def get_all_query_indices(self, temporary_query_nb, permanent_query_nb, permanent_query_trees, all_query_indices, children, create_output_string, filters):
# l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees),
@ -432,37 +432,37 @@ class Tree(object):
return partial_answers, complete_answers
# return merged_partial_subtrees_architecture[len(active_permanent_query_trees):], merged_partial_subtrees[len(active_permanent_query_trees):], completed_subtrees
@staticmethod
def merge_results(left_parts, right_parts, separator, left=True, right_part_free=False):
if not left_parts:
# return all right_parts
return [r_p.add_separator(separator, left) for r_p in right_parts]
# if left:
# return [r_p + separator for r_p in right_parts]
# # return [r_p.add_separator(separator, left) for r_p in right_parts]
# else:
# return [separator + r_p for r_p in right_parts]
if not right_parts:
return [l_p.add_separator(separator, False) for l_p in left_parts]
# return [separator + l_p for l_p in left_parts]
merged_results = []
for left_part in left_parts:
if right_part_free:
for right_part in right_parts[1]:
merged_results.append((right_parts[0], left_part.merge_results(right_part, separator, left)))
else:
for right_part in right_parts:
merged_results.append(left_part.merge_results(right_part, separator, left))
# merged_results.append(left_part.merge_results(right_part, separator))
# if separator:
# if left:
# merged_results.append(left_part + right_part + separator)
# else:
# merged_results.append(left_part + separator + right_part)
# else:
# merged_results.append(left_part + right_part)
return merged_results
# @staticmethod
# def merge_results(left_parts, right_parts, separator, left=True, right_part_free=False):
# if not left_parts:
# # return all right_parts
# return [r_p.add_separator(separator, left) for r_p in right_parts]
# # if left:
# # return [r_p + separator for r_p in right_parts]
# # # return [r_p.add_separator(separator, left) for r_p in right_parts]
# # else:
# # return [separator + r_p for r_p in right_parts]
#
# if not right_parts:
# return [l_p.add_separator(separator, False) for l_p in left_parts]
# # return [separator + l_p for l_p in left_parts]
# merged_results = []
# for left_part in left_parts:
# if right_part_free:
# for right_part in right_parts[1]:
# merged_results.append((right_parts[0], left_part.merge_results(right_part, separator, left)))
# else:
# for right_part in right_parts:
# merged_results.append(left_part.merge_results(right_part, separator, left))
# # merged_results.append(left_part.merge_results(right_part, separator))
# # if separator:
# # if left:
# # merged_results.append(left_part + right_part + separator)
# # else:
# # merged_results.append(left_part + separator + right_part)
# # else:
# # merged_results.append(left_part + right_part)
# return merged_results
@staticmethod
def create_children_groups(left_parts, right_parts):
@ -482,6 +482,7 @@ class Tree(object):
for left_part in left_parts:
for right_part in right_parts:
new_part = copy(left_part)
# new_part.reset_params()
new_part.extend(right_part)
all_children_group_possibilities.append(new_part)
# merged_results.append(left_part.merge_results(right_part, separator))
@ -511,87 +512,87 @@ class Tree(object):
merged_indices.append(new_indices)
return merged_results, merged_indices
def merge_results2(self, child, new_results, filters):
if create_output_string_form(self) == 'začelo':
print('HERE!@@!')
if create_output_string_form(self) == 'Dogodek':
print('HERE!@@!')
if create_output_string_form(self) == 'utišal':
print('HERE!@@!')
if create_output_string_form(self) == 'prijel':
print('HERE!@@!')
if filters['node_order']:
new_child = child
# new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key)
else:
new_child = sorted(child, key=lambda x: x[0].key)
# def merge_results2(self, child, new_results, filters):
# if create_output_string_form(self) == 'začelo':
# print('HERE!@@!')
# if create_output_string_form(self) == 'Dogodek':
# print('HERE!@@!')
# if create_output_string_form(self) == 'utišal':
# print('HERE!@@!')
# if create_output_string_form(self) == 'prijel':
# print('HERE!@@!')
# if filters['node_order']:
# new_child = child
# # new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key)
# else:
# new_child = sorted(child, key=lambda x: x[0].key)
#
# l_res = []
# r_res = []
# results = []
# for i_answer, answer in enumerate(new_child):
# if filters['node_order'] and answer[0].order[0] < self.index:
# # if filters['node_order'] and indices[i_child][i_answer] < self.children_split:
# if filters['dependency_type']:
# # separator = ' <' + deprel[i_child][i_answer] + ' '
# separator = ' <' + answer[0].deprel + ' '
# else:
# separator = ' < '
# l_res = self.merge_results(l_res, answer, separator, left=True)
# # l_res += answer + separator
# else:
# if filters['dependency_type']:
# separator = ' >' + answer[0].deprel + ' '
# else:
# separator = ' > '
# r_res = self.merge_results(r_res, answer, separator, left=False)
# # r_res += separator + answer
#
# # if filters['node_order']:
# # r_res_sorted = []
# # for i_answer, answer in new_child_sorted:
# # if filters['dependency_type']:
# # separator = ' >' + answer[0].deprel + ' '
# # else:
# # separator = ' > '
# # r_res_sorted = (i_answer, self.merge_results(r_res_sorted, answer, separator, left=False))
# #
# #
# # r_res_sorted_combined = self.merge_results(new_results, r_res_sorted, None, right_part_free=True)
# # # print('here')
#
# if l_res:
# l_res_combined = self.merge_results(l_res, new_results, None)
# if r_res:
# r_res_combined = self.merge_results(l_res_combined, r_res, None)
# # merged_results.extend(['(' + el + ')' for el in r_res_combined])
# result = r_res_combined
# # results.extend([el.put_in_bracelets() for el in r_res_combined])
# else:
# result = l_res_combined
# # results.extend([el.put_in_bracelets() for el in l_res_combined])
# elif r_res:
# r_res_combined = self.merge_results(new_results, r_res, None)
# result = r_res_combined
# # results.extend([el.put_in_bracelets() for el in r_res_combined])
# else:
# result = []
#
#
# results.extend([el.put_in_bracelets() for el in result])
#
# return results
l_res = []
r_res = []
results = []
for i_answer, answer in enumerate(new_child):
if filters['node_order'] and answer[0].order[0] < self.index:
# if filters['node_order'] and indices[i_child][i_answer] < self.children_split:
if filters['dependency_type']:
# separator = ' <' + deprel[i_child][i_answer] + ' '
separator = ' <' + answer[0].deprel + ' '
else:
separator = ' < '
l_res = self.merge_results(l_res, answer, separator, left=True)
# l_res += answer + separator
else:
if filters['dependency_type']:
separator = ' >' + answer[0].deprel + ' '
else:
separator = ' > '
r_res = self.merge_results(r_res, answer, separator, left=False)
# r_res += separator + answer
# if filters['node_order']:
# r_res_sorted = []
# for i_answer, answer in new_child_sorted:
# if filters['dependency_type']:
# separator = ' >' + answer[0].deprel + ' '
# else:
# separator = ' > '
# r_res_sorted = (i_answer, self.merge_results(r_res_sorted, answer, separator, left=False))
#
#
# r_res_sorted_combined = self.merge_results(new_results, r_res_sorted, None, right_part_free=True)
# # print('here')
if l_res:
l_res_combined = self.merge_results(l_res, new_results, None)
if r_res:
r_res_combined = self.merge_results(l_res_combined, r_res, None)
# merged_results.extend(['(' + el + ')' for el in r_res_combined])
result = r_res_combined
# results.extend([el.put_in_bracelets() for el in r_res_combined])
else:
result = l_res_combined
# results.extend([el.put_in_bracelets() for el in l_res_combined])
elif r_res:
r_res_combined = self.merge_results(new_results, r_res, None)
result = r_res_combined
# results.extend([el.put_in_bracelets() for el in r_res_combined])
else:
result = []
results.extend([el.put_in_bracelets() for el in result])
return results
def create_merged_results(self, answers, separators, separator_switch):
new_answers = []
for answer_i, answer in enumerate(answers):
new_answer = copy(answer[0])
print(create_output_string_form(self))
for answer_part_i, answer_part in enumerate(answer[1:]):
new_answer.extend_answer(answer_part, separators[answer_part_i])
new_answer.put_in_bracelets(inplace=True)
new_answers.append(new_answer)
return new_answers
# def create_merged_results(self, answers, separators, separator_switch):
# new_answers = []
# for answer_i, answer in enumerate(answers):
# new_answer = copy(answer[0])
# print(create_output_string_form(self))
# for answer_part_i, answer_part in enumerate(answer[1:]):
# new_answer.extend_answer(answer_part, separators[answer_part_i])
# new_answer.put_in_bracelets(inplace=True)
# new_answers.append(new_answer)
# return new_answers
# def create_merged_results(self, new_child, new_answers, i_child, indices, deprel, filters):
def merge_results3(self, child, new_results, filters):
@ -635,8 +636,11 @@ class Tree(object):
for result in new_results:
for children in children_groups:
new_result = copy(result)
# if result.key is not None or result.order is not None or result.array is not None or result.order_key is not None:
# print('here')
# new_result.reset_params()
new_result.set_children(children)
order = tuple(sorted(new_result.get_order()))
# order = tuple(sorted(new_result.get_order()))
results.append(new_result)
return results

View File

@ -483,9 +483,9 @@ def main():
# print('HERE')
# print(tree_i)
if filters['node_order']:
key = r.key + r.order
key = r.get_key() + r.order
else:
key = r.key
key = r.get_key()
# if r == '(" < , < je < velik) < tem':
# print(tree_i)
# if r in result_dict:
@ -520,9 +520,9 @@ def main():
for query_results in subtrees:
for r in query_results:
if filters['node_order']:
key = r.key + r.order
key = r.get_key() + r.order
else:
key = r.key
key = r.get_key()
# if r == '(" < , < je < velik) < tem':
# print(tree_i)
if key in result_dict:
@ -586,11 +586,11 @@ def main():
break
words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))]
# words_only = printable_answers(k)
row = [v['object'].key[1:-1]] + words_only + [str(v['number'])]
row = [v['object'].get_key()[1:-1]] + words_only + [str(v['number'])]
row += ['%.4f' % absolute_frequency]
if filters['node_order']:
row += [v['object'].order]
row += [v['object'].get_key(get_free=True)[1:-1]]
row += [v['object'].get_key_sorted()[1:-1]]
if filters['nodes_number']:
row += ['%d' % len(v['object'].array)]
if filters['print_root']: