Major refactor of Results

This commit is contained in:
Luka 2019-12-14 22:44:18 +01:00
parent 9fe395baf3
commit 53edc3b796
5 changed files with 407 additions and 83 deletions

20
ResultNode.py Normal file
View File

@ -0,0 +1,20 @@
from generic import generate_key, generate_name
class ResultNode(object):
def __init__(self, node, architecture_order, create_output_strings):
self.name_parts, self.name = generate_name(node, create_output_strings)
# self.key_free = self.key
# self.array = [[output_string]]
# self.order_key = str(architecture_order)
self.location = architecture_order
self.deprel = node.deprel.get_value()
# order with original numbers in sentences
# self.order = str([architecture_order])
# order with numbers from 0 to n of n-gram
# self.root = ''
# self.final_order = ''
# self.separators = []
def __repr__(self):
return self.name

249
ResultTree.py Normal file
View File

@ -0,0 +1,249 @@
import copy
import string
from generic import create_output_string_form, create_output_string_deprel, create_output_string_lemma, \
create_output_string_upos, create_output_string_xpos, create_output_string_feats, generate_key
class ResultTree(object):
def __init__(self, node, children, filters):
# self.array = [[create_output_string(node) for create_output_string in create_output_strings]]
# if create_output_string_lemma in create_output_strings:
# key_array = [[create_output_string(node) if create_output_string != create_output_string_lemma else 'L=' + create_output_string(node) for create_output_string in create_output_strings]]
# else:
# key_array = self.array
# if len(self.array[0]) > 1:
# self.key = '&'.join(key_array[0])
# else:
# # output_string = create_output_strings[0](node)
# self.key = key_array[0][0]
self.node = node
# order with original numbers in sentences
# self.order = str([architecture_order])
# order with numbers from 0 to n of n-gram
# self.root = ''
# self.final_order = ''
# self.separators = separators
self.children = children
self.filters = filters
self.key = None
self.order_key = None
def __repr__(self):
return self.get_key()
def set_children(self, children):
self.children = children
def get_key(self):
# if self.key:
# return self.key
key = ''
write_self_node_to_result = False
if self.children:
for child in self.children:
if child.node.location < self.node.location:
if self.filters['dependency_type']:
# separator = ' <' + deprel[i_child][i_answer] + ' '
separator = ' <' + child.node.deprel + ' '
else:
separator = ' < '
key += child.get_key() + separator
else:
if not write_self_node_to_result:
write_self_node_to_result = True
key += self.node.name
if self.filters['dependency_type']:
separator = ' >' + child.node.deprel + ' '
else:
separator = ' > '
key += separator + child.get_key()
if not write_self_node_to_result:
key += self.node.name
self.key = '(' + key + ')'
else:
self.key = self.node.name
return self.key
def get_order_key(self):
# if self.order_key:
# return self.order_key
order_key = ''
write_self_node_to_result = False
if self.children:
for child in self.children:
if child.node.location < self.node.location:
if self.filters['dependency_type']:
# separator = ' <' + deprel[i_child][i_answer] + ' '
separator = ' <' + child.node.deprel + ' '
else:
separator = ' < '
order_key += child.get_order_key() + separator
else:
if not write_self_node_to_result:
write_self_node_to_result = True
order_key += str(self.node.location)
if self.filters['dependency_type']:
separator = ' >' + child.node.deprel + ' '
else:
separator = ' > '
order_key += separator + child.get_order_key()
if not write_self_node_to_result:
order_key += str(self.node.location)
self.order_key = '(' + order_key + ')'
else:
self.order_key = str(self.node.location)
return self.order_key
def get_order(self):
# if self.order_key:
# return self.order_key
order = []
write_self_node_to_result = False
if self.children:
for child in self.children:
if child.node.location < self.node.location:
order += child.get_order()
else:
if not write_self_node_to_result:
write_self_node_to_result = True
order += [self.node.location]
order += child.get_order()
if not write_self_node_to_result:
order += [self.node.location]
self.order = order
else:
self.order = [self.node.location]
return self.order
def get_array(self):
# if self.order_key:
# return self.order_key
array = []
write_self_node_to_result = False
if self.children:
for child in self.children:
if child.node.location < self.node.location:
array += child.get_array()
else:
if not write_self_node_to_result:
write_self_node_to_result = True
array += [self.node.name_parts]
array += child.get_array()
if not write_self_node_to_result:
array += [self.node.name_parts]
self.array = array
else:
self.array = [self.node.name_parts]
return self.array
# def add(self, string, architecture_order, separator, is_left):
# if is_left:
# self.array = [string] + self.array
# self.order = [architecture_order] + self.order
# # self.order = [architecture_order] + self.order
# self.separators = [separator] + self.separators
# self.key = string + ' ' + separator + ' ' + self.key
# self.order_key = architecture_order + ' ' + separator + ' ' + self.order_key
#
# else:
# self.array += [string]
# self.order += [architecture_order]
# # self.order += [architecture_order]
# self.separators += [separator]
#
# self.key += ' ' + separator + ' ' + string
# self.order_key += ' ' + separator + ' ' + architecture_order
def add_separator(self, separator, left=True):
self_copy = copy.copy(self)
if left:
self_copy.separators += [separator]
self_copy.key += separator
self_copy.order_key += separator
else:
self_copy.separators = [separator] + self_copy.separators
self_copy.key = separator + self_copy.key
self_copy.order_key = separator + self_copy.order_key
return self_copy
# def merge_results2(self):
def merge_results(self, right_t, separator, left=True):
left_tree = copy.copy(self)
right_tree = copy.copy(right_t)
if separator:
if left:
# merged_results.append(left_part + right_part + separator)
left_tree.key = left_tree.key + right_tree.key + separator
left_tree.order_key = left_tree.order_key + right_tree.order_key + separator
left_tree.array = left_tree.array + right_tree.array
left_tree.order = left_tree.order + right_tree.order
# left_tree.order = str([architecture_order])
left_tree.separators = left_tree.separators + right_tree.separators + [separator]
else:
# merged_results.append(left_part + separator + right_part)
left_tree.key = left_tree.key + separator + right_tree.key
left_tree.order_key = left_tree.order_key + separator + right_tree.order_key
left_tree.array = left_tree.array + right_tree.array
left_tree.order = left_tree.order + right_tree.order
# left_tree.order = str([architecture_order])
left_tree.separators = left_tree.separators + [separator] + right_tree.separators
else:
# merged_results.append(left_part + right_part)
left_tree.key = left_tree.key + right_tree.key
left_tree.order_key = left_tree.order_key + right_tree.order_key
left_tree.array = left_tree.array + right_tree.array
left_tree.order = left_tree.order + right_tree.order
# left_tree.order = str([architecture_order])
left_tree.separators = left_tree.separators + right_tree.separators
return left_tree
def extend_answer(self, other_answer, separator):
self.array.extend(other_answer.array)
self.order.extend(other_answer.order)
self.key += separator + other_answer.key
self.order_key += separator + other_answer.order_key
self.separators.extend(separator)
def put_in_bracelets(self, inplace=False):
if inplace:
self.key = ('(' + self.key + ')')
self.order_key = ('(' + self.order_key + ')')
return
result = copy.copy(self)
result.key = ('(' + result.key + ')')
result.order_key = ('(' + result.order_key + ')')
return result
def finalize_result(self):
result = copy.copy(self)
result.key = result.get_key()
# result.set_root()
# create order letters
order = result.get_order()
order_letters = [''] * len(result.order)
for i in range(len(order)):
ind = order.index(min(order))
order[ind] = 10000
order_letters[ind] = string.ascii_uppercase[i]
result.order = ''.join(order_letters)
# result.order_key = result.order_key[1:-1]
# TODO When tree is finalized create relative word order (alphabet)!
return result
# def set_root(self):
# if len(self.array[0]) > 1:
# self.root = '&'.join(self.array[0])
# else:
# # output_string = create_output_strings[0](node)
# self.root = self.array[0][0]

190
Tree.py
View File

@ -4,6 +4,8 @@ from copy import copy
from pyconll.unit import Token from pyconll.unit import Token
from Result import Result from Result import Result
from ResultNode import ResultNode
from ResultTree import ResultTree
from Value import Value from Value import Value
from generic import create_output_string_form, create_output_string_deprel, create_output_string_lemma, \ from generic import create_output_string_form, create_output_string_deprel, create_output_string_lemma, \
create_output_string_upos, create_output_string_xpos, create_output_string_feats, generate_key create_output_string_upos, create_output_string_xpos, create_output_string_feats, generate_key
@ -314,39 +316,43 @@ class Tree(object):
# string_output = '' # string_output = ''
# if create_output_string_form(self) == 'vožnji': # if create_output_string_form(self) == 'vožnji':
# print('HERE!@@!') # print('HERE!@@!')
# if create_output_string_form(self) == 'začelo':
# print('HERE!@@!')
node = ResultNode(self, self.index, create_output_string)
# TEST = ResultTree(node, [], filters)
# a = TEST.create_key()
# if i_query < len(active_permanent_query_trees):
# if 'children' in active_permanent_query_trees[i_query]:
# merged_partial_subtrees.append(
# self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters))
# i_answer += 1
# else:
# merged_partial_subtrees.append([Result(self, self.index, create_output_string)])
# else:
# if 'children' in active_temporary_query_trees[i_query - len(active_permanent_query_trees)]:
# merged_partial_subtrees.append(
# self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters))
# i_answer += 1
# else:
# merged_partial_subtrees.append([Result(self, self.index, create_output_string)])
if i_query < len(active_permanent_query_trees): if i_query < len(active_permanent_query_trees):
if 'children' in active_permanent_query_trees[i_query]: if 'children' in active_permanent_query_trees[i_query]:
# if not filters['node_order'] or i_child < self.children_split:
# merged_partial_subtrees.append(
# self.create_output_children(partial_subtrees[i_answer], [create_output_string(self)], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
# merged_partial_subtrees_architecture.append(
# self.create_output_children(partial_subtrees_architecture[i_answer], [str([self.index])], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
merged_partial_subtrees.append( merged_partial_subtrees.append(
self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters)) self.create_output_children(partial_subtrees[i_answer], [ResultTree(node, [], filters)], filters))
i_answer += 1 i_answer += 1
else: else:
merged_partial_subtrees.append([Result(self, self.index, create_output_string)]) merged_partial_subtrees.append([ResultTree(node, [], filters)])
# merged_partial_subtrees.append([create_output_string(self)])
# merged_partial_subtrees_architecture.append([str([self.index])])
else: else:
if 'children' in active_temporary_query_trees[i_query - len(active_permanent_query_trees)]: if 'children' in active_temporary_query_trees[i_query - len(active_permanent_query_trees)]:
# if not filters['node_order'] or i_child < self.children_split:
# merged_partial_subtrees.append(
# self.create_output_children(partial_subtrees[i_answer], [create_output_string(self)], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
# merged_partial_subtrees_architecture.append(
# self.create_output_children(partial_subtrees_architecture[i_answer], [str([self.index])], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
merged_partial_subtrees.append( merged_partial_subtrees.append(
self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters)) self.create_output_children(partial_subtrees[i_answer], [ResultTree(node, [], filters)], filters))
i_answer += 1 i_answer += 1
else: else:
merged_partial_subtrees.append([Result(self, self.index, create_output_string)]) merged_partial_subtrees.append([ResultTree(node, [], filters)])
# merged_partial_subtrees.append([create_output_string(self)])
# merged_partial_subtrees_architecture.append([str([self.index])])
return i_answer return i_answer
@ -458,6 +464,36 @@ class Tree(object):
# merged_results.append(left_part + right_part) # merged_results.append(left_part + right_part)
return merged_results return merged_results
@staticmethod
def create_children_groups(left_parts, right_parts):
if not left_parts:
# return all right_parts
return right_parts
# if left:
# return [r_p + separator for r_p in right_parts]
# # return [r_p.add_separator(separator, left) for r_p in right_parts]
# else:
# return [separator + r_p for r_p in right_parts]
if not right_parts:
return left_parts
# return [separator + l_p for l_p in left_parts]
all_children_group_possibilities = []
for left_part in left_parts:
for right_part in right_parts:
new_part = copy(left_part)
new_part.extend(right_part)
all_children_group_possibilities.append(new_part)
# merged_results.append(left_part.merge_results(right_part, separator))
# if separator:
# if left:
# merged_results.append(left_part + right_part + separator)
# else:
# merged_results.append(left_part + separator + right_part)
# else:
# merged_results.append(left_part + right_part)
return all_children_group_possibilities
@staticmethod @staticmethod
def merge_answer(answer1, answer2, base_answer_i, answer_j): def merge_answer(answer1, answer2, base_answer_i, answer_j):
merged_results = [] merged_results = []
@ -476,6 +512,14 @@ class Tree(object):
return merged_results, merged_indices return merged_results, merged_indices
def merge_results2(self, child, new_results, filters): def merge_results2(self, child, new_results, filters):
if create_output_string_form(self) == 'začelo':
print('HERE!@@!')
if create_output_string_form(self) == 'Dogodek':
print('HERE!@@!')
if create_output_string_form(self) == 'utišal':
print('HERE!@@!')
if create_output_string_form(self) == 'prijel':
print('HERE!@@!')
if filters['node_order']: if filters['node_order']:
new_child = child new_child = child
# new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key) # new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key)
@ -550,64 +594,50 @@ class Tree(object):
return new_answers return new_answers
# def create_merged_results(self, new_child, new_answers, i_child, indices, deprel, filters): # def create_merged_results(self, new_child, new_answers, i_child, indices, deprel, filters):
def merge_results3(self, new_child, new_answers, i_child, indices, deprel, filters): def merge_results3(self, child, new_results, filters):
# l_res = [] if create_output_string_form(self) == 'Dogodek':
# r_res = [] print('HERE!@@!')
# results = [] # if create_output_string_form(self) == 'začelo':
separators = [] # print('HERE!@@!')
l_answers = [] # if create_output_string_form(self) == 'utišal':
r_answers = [] # print('HERE!@@!')
separator_switch = len(new_child) - 1 # if create_output_string_form(self) == 'prijel':
# print('HERE!@@!')
if filters['node_order']:
new_child = child
# new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key)
# new_child_sorted = sorted(child, key=lambda x: x[0].get_key())
else:
new_child = sorted(child, key=lambda x: x[0].get_key())
children_groups = []
for i_answer, answer in enumerate(new_child): for i_answer, answer in enumerate(new_child):
if filters['node_order'] and indices[i_child][i_answer] < self.children_split: children_groups = self.create_children_groups(children_groups, [[answer_part] for answer_part in answer])
if filters['dependency_type']:
separators.append(' <' + deprel[i_child][i_answer] + ' ')
else:
separators.append(' < ')
l_answers.append(answer)
# l_res = res
# return merged_results
# l_res += answer + separator
else:
if i_answer < separator_switch:
separator_switch = i_answer
if filters['dependency_type']:
separators.append(' >' + deprel[i_child][i_answer] + ' ')
else:
separators.append(' > ')
r_answers.append(answer)
# r_res += separator + answer # r_res += separator + answer
answers = [] # children_groups_sorted = []
if l_answers and r_answers: # for i_answer, answer in enumerate(new_child_sorted):
answers = l_answers + [new_answers] + r_answers # children_groups_sorted = self.create_children_groups(children_groups_sorted, [[answer_part] for answer_part in answer])
# for l_answer in l_answers: #
# for r_answer in r_answers: #
# answers.append(l_answer + new_answers + r_answer) # results_sorted = {}
elif l_answers: # for result in new_results:
answers = l_answers + [new_answers] # for children in children_groups_sorted:
# for l_answer in l_answers: # new_result = copy(result)
# answers.append(l_answer + new_answers) # new_result.set_children(children)
elif r_answers: # order = sorted(new_result.get_order())
answers = [new_answers] + r_answers # results_sorted.append(new_result)
# for r_answer in r_answers:
# answers.append(new_answers + r_answer)
else:
answers = [new_answers]
results = self.create_merged_results(answers, separators, separator_switch)
# if l_res: results = []
# l_res_combined = self.merge_results(l_res, new_answers, None) for result in new_results:
# if r_res: for children in children_groups:
# r_res_combined = self.merge_results(l_res_combined, r_res, None) new_result = copy(result)
# # merged_results.extend(['(' + el + ')' for el in r_res_combined]) new_result.set_children(children)
# results.extend([el.put_in_bracelets() for el in r_res_combined]) order = new_result.get_order()
# else: results.append(new_result)
# results.extend([el.put_in_bracelets() for el in l_res_combined])
# elif r_res:
# r_res_combined = self.merge_results(new_answers, r_res, None)
# results.extend([el.put_in_bracelets() for el in r_res_combined])
return results return results
@ -620,7 +650,8 @@ class Tree(object):
# print('HERE') # print('HERE')
merged_results = [] merged_results = []
for i_child, child in enumerate(children): for i_child, child in enumerate(children):
merged_results.extend(self.merge_results2(child, new_results, filters)) # merged_results.extend(self.merge_results2(child, new_results, filters))
merged_results.extend(self.merge_results3(child, new_results, filters))
return merged_results return merged_results
# @staticmethod # @staticmethod
@ -680,8 +711,9 @@ class Tree(object):
for unique_tree in unique_trees_architecture: for unique_tree in unique_trees_architecture:
already_in = True already_in = True
for part_i in range(len(unique_tree)): for part_i in range(len(unique_tree)):
test = unique_tree[part_i][0].order_key # test = unique_tree[part_i][0].get_order_key()
if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].order_key != new_tree[part_i][i_unique_part].order_key for i_unique_part in range(len(unique_tree[part_i]))): if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].get_order_key() != new_tree[part_i][i_unique_part].get_order_key() for i_unique_part in range(len(unique_tree[part_i]))):
# if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].order_key != new_tree[part_i][i_unique_part].order_key for i_unique_part in range(len(unique_tree[part_i]))):
# if unique_tree[part_i].order_key != new_tree[part_i].order_key: # if unique_tree[part_i].order_key != new_tree[part_i].order_key:
already_in = False already_in = False
break break

View File

@ -476,8 +476,12 @@ def main():
# for subtrees in all_subtrees: # for subtrees in all_subtrees:
for tree_i, subtrees in enumerate(all_subtrees): for tree_i, subtrees in enumerate(all_subtrees):
for query_results in subtrees: for query_results in subtrees:
for r in query_results: for r in query_results:
# if r.key == '(ne <advmod more >xcomp (se <expl izogniti) >punct .)':
# print('HERE')
# print(tree_i)
if filters['node_order']: if filters['node_order']:
key = r.key + r.order key = r.key + r.order
else: else:
@ -496,8 +500,10 @@ def main():
# 3.65 s (1 core) # 3.65 s (1 core)
else: else:
# for tree_i, tree in enumerate(all_trees[-5:]): # for tree_i, tree in enumerate(all_trees[-5:]):
# for tree_i, tree in enumerate(all_trees): for tree_i, tree in enumerate(all_trees):
for tree_i, tree in enumerate(all_trees[1:]): # for tree_i, tree in enumerate(all_trees[852:]):
# for tree_i, tree in enumerate(all_trees[1689:]):
# for tree_i, tree in enumerate(all_trees[2:]):
input_data = (tree, query_tree, create_output_string_functs, filters) input_data = (tree, query_tree, create_output_string_functs, filters)
if filters['association_measures']: if filters['association_measures']:
unigrams = get_unigrams(input_data) unigrams = get_unigrams(input_data)
@ -573,19 +579,20 @@ def main():
# body # body
for k, v in sorted_list: for k, v in sorted_list:
v['object'].get_array()
absolute_frequency = v['number'] * 1000000.0 / corpus_size absolute_frequency = v['number'] * 1000000.0 / corpus_size
if filters['frequency_threshold'] and filters['frequency_threshold'] > absolute_frequency: if filters['frequency_threshold'] and filters['frequency_threshold'] > absolute_frequency:
break break
words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))] words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))]
# words_only = printable_answers(k) # words_only = printable_answers(k)
row = [v['object'].key] + words_only + [str(v['number'])] row = [v['object'].key[1:-1]] + words_only + [str(v['number'])]
row += ['%.4f' % absolute_frequency] row += ['%.4f' % absolute_frequency]
if filters['node_order']: if filters['node_order']:
row += [v['object'].order] row += [v['object'].order]
if filters['nodes_number']: if filters['nodes_number']:
row += ['%d' % len(v['object'].array)] row += ['%d' % len(v['object'].array)]
if filters['print_root']: if filters['print_root']:
row += [v['object'].root] row += [v['object'].node.name]
if filters['association_measures']: if filters['association_measures']:
row += get_collocabilities(v, unigrams_dict, corpus_size) row += get_collocabilities(v, unigrams_dict, corpus_size)
writer.writerow(row) writer.writerow(row)

View File

@ -36,6 +36,22 @@ def generate_key(node, create_output_strings, print_lemma=True):
return array, key return array, key
def generate_name(node, create_output_strings, print_lemma=True):
array = [create_output_string(node) for create_output_string in create_output_strings]
if create_output_string_lemma in create_output_strings and print_lemma:
name_array = [create_output_string(
node) if create_output_string != create_output_string_lemma else 'L=' + create_output_string(node) for
create_output_string in create_output_strings]
else:
name_array = array
if len(array) > 1:
name = '&'.join(name_array)
else:
# output_string = create_output_strings[0](node)
name = name_array[0]
return array, name
def get_collocabilities(ngram, unigrams_dict, corpus_size): def get_collocabilities(ngram, unigrams_dict, corpus_size):
sum_fwi = 0.0 sum_fwi = 0.0
mul_fwi = 1.0 mul_fwi = 1.0