Major refactor of Results

This commit is contained in:
Luka 2019-12-14 22:44:18 +01:00
parent 9fe395baf3
commit 53edc3b796
5 changed files with 407 additions and 83 deletions

20
ResultNode.py Normal file
View File

@ -0,0 +1,20 @@
from generic import generate_key, generate_name
class ResultNode(object):
def __init__(self, node, architecture_order, create_output_strings):
self.name_parts, self.name = generate_name(node, create_output_strings)
# self.key_free = self.key
# self.array = [[output_string]]
# self.order_key = str(architecture_order)
self.location = architecture_order
self.deprel = node.deprel.get_value()
# order with original numbers in sentences
# self.order = str([architecture_order])
# order with numbers from 0 to n of n-gram
# self.root = ''
# self.final_order = ''
# self.separators = []
def __repr__(self):
return self.name

249
ResultTree.py Normal file
View File

@ -0,0 +1,249 @@
import copy
import string
from generic import create_output_string_form, create_output_string_deprel, create_output_string_lemma, \
create_output_string_upos, create_output_string_xpos, create_output_string_feats, generate_key
class ResultTree(object):
def __init__(self, node, children, filters):
# self.array = [[create_output_string(node) for create_output_string in create_output_strings]]
# if create_output_string_lemma in create_output_strings:
# key_array = [[create_output_string(node) if create_output_string != create_output_string_lemma else 'L=' + create_output_string(node) for create_output_string in create_output_strings]]
# else:
# key_array = self.array
# if len(self.array[0]) > 1:
# self.key = '&'.join(key_array[0])
# else:
# # output_string = create_output_strings[0](node)
# self.key = key_array[0][0]
self.node = node
# order with original numbers in sentences
# self.order = str([architecture_order])
# order with numbers from 0 to n of n-gram
# self.root = ''
# self.final_order = ''
# self.separators = separators
self.children = children
self.filters = filters
self.key = None
self.order_key = None
def __repr__(self):
return self.get_key()
def set_children(self, children):
self.children = children
def get_key(self):
# if self.key:
# return self.key
key = ''
write_self_node_to_result = False
if self.children:
for child in self.children:
if child.node.location < self.node.location:
if self.filters['dependency_type']:
# separator = ' <' + deprel[i_child][i_answer] + ' '
separator = ' <' + child.node.deprel + ' '
else:
separator = ' < '
key += child.get_key() + separator
else:
if not write_self_node_to_result:
write_self_node_to_result = True
key += self.node.name
if self.filters['dependency_type']:
separator = ' >' + child.node.deprel + ' '
else:
separator = ' > '
key += separator + child.get_key()
if not write_self_node_to_result:
key += self.node.name
self.key = '(' + key + ')'
else:
self.key = self.node.name
return self.key
def get_order_key(self):
# if self.order_key:
# return self.order_key
order_key = ''
write_self_node_to_result = False
if self.children:
for child in self.children:
if child.node.location < self.node.location:
if self.filters['dependency_type']:
# separator = ' <' + deprel[i_child][i_answer] + ' '
separator = ' <' + child.node.deprel + ' '
else:
separator = ' < '
order_key += child.get_order_key() + separator
else:
if not write_self_node_to_result:
write_self_node_to_result = True
order_key += str(self.node.location)
if self.filters['dependency_type']:
separator = ' >' + child.node.deprel + ' '
else:
separator = ' > '
order_key += separator + child.get_order_key()
if not write_self_node_to_result:
order_key += str(self.node.location)
self.order_key = '(' + order_key + ')'
else:
self.order_key = str(self.node.location)
return self.order_key
def get_order(self):
# if self.order_key:
# return self.order_key
order = []
write_self_node_to_result = False
if self.children:
for child in self.children:
if child.node.location < self.node.location:
order += child.get_order()
else:
if not write_self_node_to_result:
write_self_node_to_result = True
order += [self.node.location]
order += child.get_order()
if not write_self_node_to_result:
order += [self.node.location]
self.order = order
else:
self.order = [self.node.location]
return self.order
def get_array(self):
# if self.order_key:
# return self.order_key
array = []
write_self_node_to_result = False
if self.children:
for child in self.children:
if child.node.location < self.node.location:
array += child.get_array()
else:
if not write_self_node_to_result:
write_self_node_to_result = True
array += [self.node.name_parts]
array += child.get_array()
if not write_self_node_to_result:
array += [self.node.name_parts]
self.array = array
else:
self.array = [self.node.name_parts]
return self.array
# def add(self, string, architecture_order, separator, is_left):
# if is_left:
# self.array = [string] + self.array
# self.order = [architecture_order] + self.order
# # self.order = [architecture_order] + self.order
# self.separators = [separator] + self.separators
# self.key = string + ' ' + separator + ' ' + self.key
# self.order_key = architecture_order + ' ' + separator + ' ' + self.order_key
#
# else:
# self.array += [string]
# self.order += [architecture_order]
# # self.order += [architecture_order]
# self.separators += [separator]
#
# self.key += ' ' + separator + ' ' + string
# self.order_key += ' ' + separator + ' ' + architecture_order
def add_separator(self, separator, left=True):
self_copy = copy.copy(self)
if left:
self_copy.separators += [separator]
self_copy.key += separator
self_copy.order_key += separator
else:
self_copy.separators = [separator] + self_copy.separators
self_copy.key = separator + self_copy.key
self_copy.order_key = separator + self_copy.order_key
return self_copy
# def merge_results2(self):
def merge_results(self, right_t, separator, left=True):
left_tree = copy.copy(self)
right_tree = copy.copy(right_t)
if separator:
if left:
# merged_results.append(left_part + right_part + separator)
left_tree.key = left_tree.key + right_tree.key + separator
left_tree.order_key = left_tree.order_key + right_tree.order_key + separator
left_tree.array = left_tree.array + right_tree.array
left_tree.order = left_tree.order + right_tree.order
# left_tree.order = str([architecture_order])
left_tree.separators = left_tree.separators + right_tree.separators + [separator]
else:
# merged_results.append(left_part + separator + right_part)
left_tree.key = left_tree.key + separator + right_tree.key
left_tree.order_key = left_tree.order_key + separator + right_tree.order_key
left_tree.array = left_tree.array + right_tree.array
left_tree.order = left_tree.order + right_tree.order
# left_tree.order = str([architecture_order])
left_tree.separators = left_tree.separators + [separator] + right_tree.separators
else:
# merged_results.append(left_part + right_part)
left_tree.key = left_tree.key + right_tree.key
left_tree.order_key = left_tree.order_key + right_tree.order_key
left_tree.array = left_tree.array + right_tree.array
left_tree.order = left_tree.order + right_tree.order
# left_tree.order = str([architecture_order])
left_tree.separators = left_tree.separators + right_tree.separators
return left_tree
def extend_answer(self, other_answer, separator):
self.array.extend(other_answer.array)
self.order.extend(other_answer.order)
self.key += separator + other_answer.key
self.order_key += separator + other_answer.order_key
self.separators.extend(separator)
def put_in_bracelets(self, inplace=False):
if inplace:
self.key = ('(' + self.key + ')')
self.order_key = ('(' + self.order_key + ')')
return
result = copy.copy(self)
result.key = ('(' + result.key + ')')
result.order_key = ('(' + result.order_key + ')')
return result
def finalize_result(self):
result = copy.copy(self)
result.key = result.get_key()
# result.set_root()
# create order letters
order = result.get_order()
order_letters = [''] * len(result.order)
for i in range(len(order)):
ind = order.index(min(order))
order[ind] = 10000
order_letters[ind] = string.ascii_uppercase[i]
result.order = ''.join(order_letters)
# result.order_key = result.order_key[1:-1]
# TODO When tree is finalized create relative word order (alphabet)!
return result
# def set_root(self):
# if len(self.array[0]) > 1:
# self.root = '&'.join(self.array[0])
# else:
# # output_string = create_output_strings[0](node)
# self.root = self.array[0][0]

190
Tree.py
View File

@ -4,6 +4,8 @@ from copy import copy
from pyconll.unit import Token
from Result import Result
from ResultNode import ResultNode
from ResultTree import ResultTree
from Value import Value
from generic import create_output_string_form, create_output_string_deprel, create_output_string_lemma, \
create_output_string_upos, create_output_string_xpos, create_output_string_feats, generate_key
@ -314,39 +316,43 @@ class Tree(object):
# string_output = ''
# if create_output_string_form(self) == 'vožnji':
# print('HERE!@@!')
# if create_output_string_form(self) == 'začelo':
# print('HERE!@@!')
node = ResultNode(self, self.index, create_output_string)
# TEST = ResultTree(node, [], filters)
# a = TEST.create_key()
# if i_query < len(active_permanent_query_trees):
# if 'children' in active_permanent_query_trees[i_query]:
# merged_partial_subtrees.append(
# self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters))
# i_answer += 1
# else:
# merged_partial_subtrees.append([Result(self, self.index, create_output_string)])
# else:
# if 'children' in active_temporary_query_trees[i_query - len(active_permanent_query_trees)]:
# merged_partial_subtrees.append(
# self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters))
# i_answer += 1
# else:
# merged_partial_subtrees.append([Result(self, self.index, create_output_string)])
if i_query < len(active_permanent_query_trees):
if 'children' in active_permanent_query_trees[i_query]:
# if not filters['node_order'] or i_child < self.children_split:
# merged_partial_subtrees.append(
# self.create_output_children(partial_subtrees[i_answer], [create_output_string(self)], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
# merged_partial_subtrees_architecture.append(
# self.create_output_children(partial_subtrees_architecture[i_answer], [str([self.index])], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
merged_partial_subtrees.append(
self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters))
self.create_output_children(partial_subtrees[i_answer], [ResultTree(node, [], filters)], filters))
i_answer += 1
else:
merged_partial_subtrees.append([Result(self, self.index, create_output_string)])
# merged_partial_subtrees.append([create_output_string(self)])
# merged_partial_subtrees_architecture.append([str([self.index])])
merged_partial_subtrees.append([ResultTree(node, [], filters)])
else:
if 'children' in active_temporary_query_trees[i_query - len(active_permanent_query_trees)]:
# if not filters['node_order'] or i_child < self.children_split:
# merged_partial_subtrees.append(
# self.create_output_children(partial_subtrees[i_answer], [create_output_string(self)], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
# merged_partial_subtrees_architecture.append(
# self.create_output_children(partial_subtrees_architecture[i_answer], [str([self.index])], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
merged_partial_subtrees.append(
self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters))
self.create_output_children(partial_subtrees[i_answer], [ResultTree(node, [], filters)], filters))
i_answer += 1
else:
merged_partial_subtrees.append([Result(self, self.index, create_output_string)])
# merged_partial_subtrees.append([create_output_string(self)])
# merged_partial_subtrees_architecture.append([str([self.index])])
merged_partial_subtrees.append([ResultTree(node, [], filters)])
return i_answer
@ -458,6 +464,36 @@ class Tree(object):
# merged_results.append(left_part + right_part)
return merged_results
@staticmethod
def create_children_groups(left_parts, right_parts):
if not left_parts:
# return all right_parts
return right_parts
# if left:
# return [r_p + separator for r_p in right_parts]
# # return [r_p.add_separator(separator, left) for r_p in right_parts]
# else:
# return [separator + r_p for r_p in right_parts]
if not right_parts:
return left_parts
# return [separator + l_p for l_p in left_parts]
all_children_group_possibilities = []
for left_part in left_parts:
for right_part in right_parts:
new_part = copy(left_part)
new_part.extend(right_part)
all_children_group_possibilities.append(new_part)
# merged_results.append(left_part.merge_results(right_part, separator))
# if separator:
# if left:
# merged_results.append(left_part + right_part + separator)
# else:
# merged_results.append(left_part + separator + right_part)
# else:
# merged_results.append(left_part + right_part)
return all_children_group_possibilities
@staticmethod
def merge_answer(answer1, answer2, base_answer_i, answer_j):
merged_results = []
@ -476,6 +512,14 @@ class Tree(object):
return merged_results, merged_indices
def merge_results2(self, child, new_results, filters):
if create_output_string_form(self) == 'začelo':
print('HERE!@@!')
if create_output_string_form(self) == 'Dogodek':
print('HERE!@@!')
if create_output_string_form(self) == 'utišal':
print('HERE!@@!')
if create_output_string_form(self) == 'prijel':
print('HERE!@@!')
if filters['node_order']:
new_child = child
# new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key)
@ -550,64 +594,50 @@ class Tree(object):
return new_answers
# def create_merged_results(self, new_child, new_answers, i_child, indices, deprel, filters):
def merge_results3(self, new_child, new_answers, i_child, indices, deprel, filters):
# l_res = []
# r_res = []
# results = []
separators = []
l_answers = []
r_answers = []
separator_switch = len(new_child) - 1
def merge_results3(self, child, new_results, filters):
if create_output_string_form(self) == 'Dogodek':
print('HERE!@@!')
# if create_output_string_form(self) == 'začelo':
# print('HERE!@@!')
# if create_output_string_form(self) == 'utišal':
# print('HERE!@@!')
# if create_output_string_form(self) == 'prijel':
# print('HERE!@@!')
if filters['node_order']:
new_child = child
# new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key)
# new_child_sorted = sorted(child, key=lambda x: x[0].get_key())
else:
new_child = sorted(child, key=lambda x: x[0].get_key())
children_groups = []
for i_answer, answer in enumerate(new_child):
if filters['node_order'] and indices[i_child][i_answer] < self.children_split:
if filters['dependency_type']:
separators.append(' <' + deprel[i_child][i_answer] + ' ')
else:
separators.append(' < ')
l_answers.append(answer)
# l_res = res
# return merged_results
# l_res += answer + separator
else:
if i_answer < separator_switch:
separator_switch = i_answer
if filters['dependency_type']:
separators.append(' >' + deprel[i_child][i_answer] + ' ')
else:
separators.append(' > ')
r_answers.append(answer)
children_groups = self.create_children_groups(children_groups, [[answer_part] for answer_part in answer])
# r_res += separator + answer
answers = []
if l_answers and r_answers:
answers = l_answers + [new_answers] + r_answers
# for l_answer in l_answers:
# for r_answer in r_answers:
# answers.append(l_answer + new_answers + r_answer)
elif l_answers:
answers = l_answers + [new_answers]
# for l_answer in l_answers:
# answers.append(l_answer + new_answers)
elif r_answers:
answers = [new_answers] + r_answers
# for r_answer in r_answers:
# answers.append(new_answers + r_answer)
else:
answers = [new_answers]
# children_groups_sorted = []
# for i_answer, answer in enumerate(new_child_sorted):
# children_groups_sorted = self.create_children_groups(children_groups_sorted, [[answer_part] for answer_part in answer])
#
#
# results_sorted = {}
# for result in new_results:
# for children in children_groups_sorted:
# new_result = copy(result)
# new_result.set_children(children)
# order = sorted(new_result.get_order())
# results_sorted.append(new_result)
results = self.create_merged_results(answers, separators, separator_switch)
# if l_res:
# l_res_combined = self.merge_results(l_res, new_answers, None)
# if r_res:
# r_res_combined = self.merge_results(l_res_combined, r_res, None)
# # merged_results.extend(['(' + el + ')' for el in r_res_combined])
# results.extend([el.put_in_bracelets() for el in r_res_combined])
# else:
# results.extend([el.put_in_bracelets() for el in l_res_combined])
# elif r_res:
# r_res_combined = self.merge_results(new_answers, r_res, None)
# results.extend([el.put_in_bracelets() for el in r_res_combined])
results = []
for result in new_results:
for children in children_groups:
new_result = copy(result)
new_result.set_children(children)
order = new_result.get_order()
results.append(new_result)
return results
@ -620,7 +650,8 @@ class Tree(object):
# print('HERE')
merged_results = []
for i_child, child in enumerate(children):
merged_results.extend(self.merge_results2(child, new_results, filters))
# merged_results.extend(self.merge_results2(child, new_results, filters))
merged_results.extend(self.merge_results3(child, new_results, filters))
return merged_results
# @staticmethod
@ -680,8 +711,9 @@ class Tree(object):
for unique_tree in unique_trees_architecture:
already_in = True
for part_i in range(len(unique_tree)):
test = unique_tree[part_i][0].order_key
if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].order_key != new_tree[part_i][i_unique_part].order_key for i_unique_part in range(len(unique_tree[part_i]))):
# test = unique_tree[part_i][0].get_order_key()
if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].get_order_key() != new_tree[part_i][i_unique_part].get_order_key() for i_unique_part in range(len(unique_tree[part_i]))):
# if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].order_key != new_tree[part_i][i_unique_part].order_key for i_unique_part in range(len(unique_tree[part_i]))):
# if unique_tree[part_i].order_key != new_tree[part_i].order_key:
already_in = False
break

View File

@ -476,8 +476,12 @@ def main():
# for subtrees in all_subtrees:
for tree_i, subtrees in enumerate(all_subtrees):
for query_results in subtrees:
for r in query_results:
# if r.key == '(ne <advmod more >xcomp (se <expl izogniti) >punct .)':
# print('HERE')
# print(tree_i)
if filters['node_order']:
key = r.key + r.order
else:
@ -496,8 +500,10 @@ def main():
# 3.65 s (1 core)
else:
# for tree_i, tree in enumerate(all_trees[-5:]):
# for tree_i, tree in enumerate(all_trees):
for tree_i, tree in enumerate(all_trees[1:]):
for tree_i, tree in enumerate(all_trees):
# for tree_i, tree in enumerate(all_trees[852:]):
# for tree_i, tree in enumerate(all_trees[1689:]):
# for tree_i, tree in enumerate(all_trees[2:]):
input_data = (tree, query_tree, create_output_string_functs, filters)
if filters['association_measures']:
unigrams = get_unigrams(input_data)
@ -573,19 +579,20 @@ def main():
# body
for k, v in sorted_list:
v['object'].get_array()
absolute_frequency = v['number'] * 1000000.0 / corpus_size
if filters['frequency_threshold'] and filters['frequency_threshold'] > absolute_frequency:
break
words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))]
# words_only = printable_answers(k)
row = [v['object'].key] + words_only + [str(v['number'])]
row = [v['object'].key[1:-1]] + words_only + [str(v['number'])]
row += ['%.4f' % absolute_frequency]
if filters['node_order']:
row += [v['object'].order]
if filters['nodes_number']:
row += ['%d' % len(v['object'].array)]
if filters['print_root']:
row += [v['object'].root]
row += [v['object'].node.name]
if filters['association_measures']:
row += get_collocabilities(v, unigrams_dict, corpus_size)
writer.writerow(row)

View File

@ -36,6 +36,22 @@ def generate_key(node, create_output_strings, print_lemma=True):
return array, key
def generate_name(node, create_output_strings, print_lemma=True):
array = [create_output_string(node) for create_output_string in create_output_strings]
if create_output_string_lemma in create_output_strings and print_lemma:
name_array = [create_output_string(
node) if create_output_string != create_output_string_lemma else 'L=' + create_output_string(node) for
create_output_string in create_output_strings]
else:
name_array = array
if len(array) > 1:
name = '&'.join(name_array)
else:
# output_string = create_output_strings[0](node)
name = name_array[0]
return array, name
def get_collocabilities(ngram, unigrams_dict, corpus_size):
sum_fwi = 0.0
mul_fwi = 1.0