Adding results class to code
This commit is contained in:
parent
074aed2b56
commit
d34f429d05
83
Result.py
83
Result.py
|
@ -1,9 +1,86 @@
|
||||||
|
import copy
|
||||||
|
|
||||||
|
|
||||||
class Result(object):
|
class Result(object):
|
||||||
def __init__(self, string, order):
|
def __init__(self, string, architecture_order):
|
||||||
self.key = string
|
self.key = string
|
||||||
self.key_split = [string]
|
self.order_key = str([architecture_order])
|
||||||
|
self.array = [string]
|
||||||
# order with original numbers in sentences
|
# order with original numbers in sentences
|
||||||
self.build_order = [order]
|
# self.order = str([architecture_order])
|
||||||
# order with numbers from 0 to n of n-gram
|
# order with numbers from 0 to n of n-gram
|
||||||
self.final_order = ''
|
self.final_order = ''
|
||||||
|
self.separators = []
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self.key
|
||||||
|
|
||||||
|
def add(self, string, architecture_order, separator, is_left):
|
||||||
|
if is_left:
|
||||||
|
self.array = [string] + self.array
|
||||||
|
# self.order = [architecture_order] + self.order
|
||||||
|
self.separators = [separator] + self.separators
|
||||||
|
self.key = string + ' ' + separator + ' ' + self.key
|
||||||
|
self.order_key = architecture_order + ' ' + separator + ' ' + self.order_key
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.array += [string]
|
||||||
|
# self.order += [architecture_order]
|
||||||
|
self.separators += [separator]
|
||||||
|
|
||||||
|
self.key += ' ' + separator + ' ' + string
|
||||||
|
self.order_key += ' ' + separator + ' ' + architecture_order
|
||||||
|
|
||||||
|
def add_separator(self, separator, left=True):
|
||||||
|
self_copy = copy.copy(self)
|
||||||
|
if left:
|
||||||
|
self_copy.separators += [separator]
|
||||||
|
self_copy.key += separator
|
||||||
|
self_copy.order_key += separator
|
||||||
|
else:
|
||||||
|
self_copy.separators = [separator] + self_copy.separators
|
||||||
|
self_copy.key = separator + self_copy.key
|
||||||
|
self_copy.order_key = separator + self_copy.order_key
|
||||||
|
return self_copy
|
||||||
|
|
||||||
|
def merge_results(self, right_t, separator, left=True):
|
||||||
|
left_tree = copy.copy(self)
|
||||||
|
right_tree = copy.copy(right_t)
|
||||||
|
|
||||||
|
if separator:
|
||||||
|
if left:
|
||||||
|
# merged_results.append(left_part + right_part + separator)
|
||||||
|
left_tree.key = left_tree.key + right_tree.key + separator
|
||||||
|
left_tree.order_key = left_tree.order_key + right_tree.order_key + separator
|
||||||
|
left_tree.array = left_tree.array + right_tree.array
|
||||||
|
# left_tree.order = str([architecture_order])
|
||||||
|
left_tree.separators = left_tree.separators + right_tree.separators + [separator]
|
||||||
|
else:
|
||||||
|
# merged_results.append(left_part + separator + right_part)
|
||||||
|
left_tree.key = left_tree.key + separator + right_tree.key
|
||||||
|
left_tree.order_key = left_tree.order_key + separator + right_tree.order_key
|
||||||
|
left_tree.array = left_tree.array + right_tree.array
|
||||||
|
# left_tree.order = str([architecture_order])
|
||||||
|
left_tree.separators = left_tree.separators + [separator] + right_tree.separators
|
||||||
|
else:
|
||||||
|
# merged_results.append(left_part + right_part)
|
||||||
|
left_tree.key = left_tree.key + right_tree.key
|
||||||
|
left_tree.order_key = left_tree.order_key + right_tree.order_key
|
||||||
|
left_tree.array = left_tree.array + right_tree.array
|
||||||
|
# left_tree.order = str([architecture_order])
|
||||||
|
left_tree.separators = left_tree.separators + right_tree.separators
|
||||||
|
|
||||||
|
return left_tree
|
||||||
|
|
||||||
|
def put_in_bracelets(self):
|
||||||
|
result = copy.copy(self)
|
||||||
|
result.key = ('(' + result.key + ')')
|
||||||
|
result.order_key = ('(' + result.order_key + ')')
|
||||||
|
return result
|
||||||
|
|
||||||
|
def finalize_result(self):
|
||||||
|
result = copy.copy(self)
|
||||||
|
result.key = result.key[1:-1]
|
||||||
|
# result.order_key = result.order_key[1:-1]
|
||||||
|
# TODO When tree is finalized create relative word order (alphabet)!
|
||||||
|
return result
|
||||||
|
|
187
Tree.py
187
Tree.py
|
@ -3,11 +3,12 @@ from copy import copy
|
||||||
|
|
||||||
from pyconll.unit import Token
|
from pyconll.unit import Token
|
||||||
|
|
||||||
|
from Result import Result
|
||||||
from Value import Value
|
from Value import Value
|
||||||
|
|
||||||
|
|
||||||
class Tree(object):
|
class Tree(object):
|
||||||
def __init__(self, form, lemma, upos, xpos, deprel, feats, feats_detailed, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, feats_detailed_dict, head):
|
def __init__(self, index, form, lemma, upos, xpos, deprel, feats, feats_detailed, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, feats_detailed_dict, head):
|
||||||
if not hasattr(self, 'feats'):
|
if not hasattr(self, 'feats'):
|
||||||
self.feats_detailed = {}
|
self.feats_detailed = {}
|
||||||
|
|
||||||
|
@ -42,13 +43,13 @@ class Tree(object):
|
||||||
self.children = []
|
self.children = []
|
||||||
self.children_split = -1
|
self.children_split = -1
|
||||||
|
|
||||||
self.index = 0
|
self.index = index
|
||||||
|
|
||||||
# for caching answers to questions
|
# for caching answers to questions
|
||||||
self.cache = {}
|
self.cache = {}
|
||||||
|
|
||||||
def add_child(self, child):
|
def add_child(self, child):
|
||||||
child.index = len(self.children)
|
# child.index = len(self.children)
|
||||||
self.children.append(child)
|
self.children.append(child)
|
||||||
|
|
||||||
def set_parent(self, parent):
|
def set_parent(self, parent):
|
||||||
|
@ -268,10 +269,11 @@ class Tree(object):
|
||||||
# partial_results_dict, partial_subtrees)
|
# partial_results_dict, partial_subtrees)
|
||||||
|
|
||||||
for i in range(len(new_complete_answers)):
|
for i in range(len(new_complete_answers)):
|
||||||
|
# TODO add order rearagement (TO KEY)
|
||||||
complete_answers[i].extend(new_complete_answers[i])
|
complete_answers[i].extend(new_complete_answers[i])
|
||||||
|
|
||||||
# if create_output_string_form(self) == 'vožnji':
|
if create_output_string_form(self) == 'Dogodek':
|
||||||
# print('HERE!@@!')
|
print('HERE!@@!')
|
||||||
# if create_output_string_form(self) == 'vpiti':
|
# if create_output_string_form(self) == 'vpiti':
|
||||||
# print('HERE!@@!')
|
# print('HERE!@@!')
|
||||||
# merge answers in appropriate way
|
# merge answers in appropriate way
|
||||||
|
@ -317,29 +319,36 @@ class Tree(object):
|
||||||
if i_query < len(active_permanent_query_trees):
|
if i_query < len(active_permanent_query_trees):
|
||||||
if 'children' in active_permanent_query_trees[i_query]:
|
if 'children' in active_permanent_query_trees[i_query]:
|
||||||
# if not filters['node_order'] or i_child < self.children_split:
|
# if not filters['node_order'] or i_child < self.children_split:
|
||||||
|
# merged_partial_subtrees.append(
|
||||||
|
# self.create_output_children(partial_subtrees[i_answer], [create_output_string(self)], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
|
||||||
|
# merged_partial_subtrees_architecture.append(
|
||||||
|
# self.create_output_children(partial_subtrees_architecture[i_answer], [str([self.index])], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
|
||||||
|
|
||||||
merged_partial_subtrees.append(
|
merged_partial_subtrees.append(
|
||||||
self.create_output_children(partial_subtrees[i_answer], [create_output_string(self)], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
|
self.create_output_children(partial_subtrees[i_answer], [Result(create_output_string(self), self.index)], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
|
||||||
merged_partial_subtrees_architecture.append(
|
|
||||||
self.create_output_children(partial_subtrees_architecture[i_answer], [str([self.index])], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
|
|
||||||
|
|
||||||
i_answer += 1
|
i_answer += 1
|
||||||
else:
|
else:
|
||||||
merged_partial_subtrees.append([create_output_string(self)])
|
merged_partial_subtrees.append([Result(create_output_string(self), self.index)])
|
||||||
merged_partial_subtrees_architecture.append([str([self.index])])
|
# merged_partial_subtrees.append([create_output_string(self)])
|
||||||
# merged_partial_subtrees.append([[create_output_string(self)]])
|
# merged_partial_subtrees_architecture.append([str([self.index])])
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if 'children' in active_temporary_query_trees[i_query - len(active_permanent_query_trees)]:
|
if 'children' in active_temporary_query_trees[i_query - len(active_permanent_query_trees)]:
|
||||||
# if not filters['node_order'] or i_child < self.children_split:
|
# if not filters['node_order'] or i_child < self.children_split:
|
||||||
|
# merged_partial_subtrees.append(
|
||||||
|
# self.create_output_children(partial_subtrees[i_answer], [create_output_string(self)], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
|
||||||
|
# merged_partial_subtrees_architecture.append(
|
||||||
|
# self.create_output_children(partial_subtrees_architecture[i_answer], [str([self.index])], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
|
||||||
|
|
||||||
merged_partial_subtrees.append(
|
merged_partial_subtrees.append(
|
||||||
self.create_output_children(partial_subtrees[i_answer], [create_output_string(self)], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
|
self.create_output_children(partial_subtrees[i_answer], [Result(create_output_string(self), self.index)], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
|
||||||
merged_partial_subtrees_architecture.append(
|
|
||||||
self.create_output_children(partial_subtrees_architecture[i_answer], [str([self.index])], filters, partial_subtrees_index[i_answer], partial_subtrees_deprel[i_answer]))
|
|
||||||
|
|
||||||
i_answer += 1
|
i_answer += 1
|
||||||
else:
|
else:
|
||||||
merged_partial_subtrees.append([create_output_string(self)])
|
merged_partial_subtrees.append([Result(create_output_string(self), self.index)])
|
||||||
merged_partial_subtrees_architecture.append([str([self.index])])
|
# merged_partial_subtrees.append([create_output_string(self)])
|
||||||
# merged_partial_subtrees.append([[create_output_string(self)]])
|
# merged_partial_subtrees_architecture.append([str([self.index])])
|
||||||
|
|
||||||
return i_answer
|
return i_answer
|
||||||
|
|
||||||
|
@ -395,8 +404,9 @@ class Tree(object):
|
||||||
i_question += 1
|
i_question += 1
|
||||||
|
|
||||||
for i in range(len(active_permanent_query_trees)):
|
for i in range(len(active_permanent_query_trees)):
|
||||||
|
# TODO FINALIZE RESULT
|
||||||
# erase first and last braclets when adding new query result
|
# erase first and last braclets when adding new query result
|
||||||
add_subtree = [subtree[1:-1] for subtree in merged_partial_answers[i]]
|
add_subtree = [subtree.finalize_result() for subtree in merged_partial_answers[i]]
|
||||||
# if 0 < len(active_permanent_query_trees):
|
# if 0 < len(active_permanent_query_trees):
|
||||||
complete_answers[i].extend(add_subtree)
|
complete_answers[i].extend(add_subtree)
|
||||||
# completed_subtrees[i].extend(merged_partial_subtrees[i])
|
# completed_subtrees[i].extend(merged_partial_subtrees[i])
|
||||||
|
@ -405,7 +415,7 @@ class Tree(object):
|
||||||
partial_answers_architecture = [[] for i in range(len(temporary_query_trees))]
|
partial_answers_architecture = [[] for i in range(len(temporary_query_trees))]
|
||||||
partial_answers = [[] for i in range(len(temporary_query_trees))]
|
partial_answers = [[] for i in range(len(temporary_query_trees))]
|
||||||
for inside_i, outside_i in enumerate(successful_temporary_queries):
|
for inside_i, outside_i in enumerate(successful_temporary_queries):
|
||||||
partial_answers_architecture[outside_i] = merged_partial_answers_architecture[len(active_permanent_query_trees) + inside_i]
|
# partial_answers_architecture[outside_i] = merged_partial_answers_architecture[len(active_permanent_query_trees) + inside_i]
|
||||||
partial_answers[outside_i] = merged_partial_answers[
|
partial_answers[outside_i] = merged_partial_answers[
|
||||||
len(active_permanent_query_trees) + inside_i]
|
len(active_permanent_query_trees) + inside_i]
|
||||||
|
|
||||||
|
@ -416,23 +426,28 @@ class Tree(object):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def merge_results(left_parts, right_parts, separator, left=True):
|
def merge_results(left_parts, right_parts, separator, left=True):
|
||||||
if not left_parts:
|
if not left_parts:
|
||||||
# return right_parts
|
# return all right_parts
|
||||||
if left:
|
return [r_p.add_separator(separator, left) for r_p in right_parts]
|
||||||
return [r_p + separator for r_p in right_parts]
|
# if left:
|
||||||
else:
|
# return [r_p + separator for r_p in right_parts]
|
||||||
return [separator + r_p for r_p in right_parts]
|
# # return [r_p.add_separator(separator, left) for r_p in right_parts]
|
||||||
|
# else:
|
||||||
|
# return [separator + r_p for r_p in right_parts]
|
||||||
|
|
||||||
if not right_parts:
|
if not right_parts:
|
||||||
return [separator + l_p for l_p in left_parts]
|
return [l_p.add_separator(separator, False) for l_p in left_parts]
|
||||||
|
# return [separator + l_p for l_p in left_parts]
|
||||||
merged_results = []
|
merged_results = []
|
||||||
for left_part in left_parts:
|
for left_part in left_parts:
|
||||||
for right_part in right_parts:
|
for right_part in right_parts:
|
||||||
if separator:
|
merged_results.append(left_part.merge_results(right_part, separator))
|
||||||
if left:
|
# if separator:
|
||||||
merged_results.append(left_part + right_part + separator)
|
# if left:
|
||||||
else:
|
# merged_results.append(left_part + right_part + separator)
|
||||||
merged_results.append(left_part + separator + right_part)
|
# else:
|
||||||
else:
|
# merged_results.append(left_part + separator + right_part)
|
||||||
merged_results.append(left_part + right_part)
|
# else:
|
||||||
|
# merged_results.append(left_part + right_part)
|
||||||
return merged_results
|
return merged_results
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -458,50 +473,46 @@ class Tree(object):
|
||||||
# print('HERE!@@!')
|
# print('HERE!@@!')
|
||||||
# if create_output_string_form(self) == 'utišal':
|
# if create_output_string_form(self) == 'utišal':
|
||||||
# print('HERE!@@!')
|
# print('HERE!@@!')
|
||||||
|
# if len(new_results) > 1:
|
||||||
|
# print('HERE')
|
||||||
merged_results = []
|
merged_results = []
|
||||||
for i_child, child in enumerate(children):
|
for i_child, child in enumerate(children):
|
||||||
for i_new_result, new_result in enumerate(new_results):
|
l_res = []
|
||||||
l_res = []
|
r_res = []
|
||||||
r_res = []
|
if filters['node_order']:
|
||||||
if type(child) == str:
|
new_child = child
|
||||||
# res += '(' + child + ') < '
|
else:
|
||||||
print('ERROR!?!?!')
|
# a = [['tistega', 'dne'], ['sem', 'bil']]
|
||||||
if not filters['node_order'] or indices[i_child][i_new_result] < self.children_split:
|
# b = sorted(a)
|
||||||
l_res += child + ' < '
|
# TODO CHECK IF THIS WORKS FOR CERTIAN
|
||||||
|
new_child = sorted(child, key=lambda x: x[0].key)
|
||||||
|
for i_answer, answer in enumerate(new_child):
|
||||||
|
# res += '(' + el + ') < '
|
||||||
|
if not filters['node_order'] or indices[i_child][i_answer] < self.children_split:
|
||||||
|
if filters['dependency_type']:
|
||||||
|
separator = ' <' + deprel[i_child][i_answer] + ' '
|
||||||
else:
|
else:
|
||||||
r_res += ' > ' + child
|
separator = ' < '
|
||||||
|
l_res = self.merge_results(l_res, answer, separator, left=True)
|
||||||
|
# l_res += answer + separator
|
||||||
else:
|
else:
|
||||||
if filters['node_order']:
|
if filters['dependency_type']:
|
||||||
new_child = child
|
separator = ' >' + deprel[i_child][i_answer] + ' '
|
||||||
else:
|
else:
|
||||||
new_child = sorted(child)
|
separator = ' > '
|
||||||
for i_answer, answer in enumerate(new_child):
|
r_res = self.merge_results(r_res, answer, separator, left=False)
|
||||||
# res += '(' + el + ') < '
|
# r_res += separator + answer
|
||||||
if not filters['node_order'] or indices[i_child][i_answer] < self.children_split:
|
if l_res:
|
||||||
if filters['dependency_type']:
|
l_res_combined = self.merge_results(l_res, new_results, None)
|
||||||
separator = ' <' + deprel[i_child][i_answer] + ' '
|
if r_res:
|
||||||
else:
|
r_res_combined = self.merge_results(l_res_combined, r_res, None)
|
||||||
separator = ' < '
|
# merged_results.extend(['(' + el + ')' for el in r_res_combined])
|
||||||
l_res = self.merge_results(l_res, answer, separator, left=True)
|
merged_results.extend([el.put_in_bracelets() for el in r_res_combined])
|
||||||
# l_res += answer + separator
|
else:
|
||||||
else:
|
merged_results.extend([el.put_in_bracelets() for el in l_res_combined])
|
||||||
if filters['dependency_type']:
|
elif r_res:
|
||||||
separator = ' >' + deprel[i_child][i_answer] + ' '
|
r_res_combined = self.merge_results(new_results, r_res, None)
|
||||||
else:
|
merged_results.extend(['(' + el + ')' for el in r_res_combined])
|
||||||
separator = ' > '
|
|
||||||
r_res = self.merge_results(r_res, answer, separator, left=False)
|
|
||||||
# r_res += separator + answer
|
|
||||||
if l_res:
|
|
||||||
l_res_combined = self.merge_results(l_res, new_results, None)
|
|
||||||
if r_res:
|
|
||||||
r_res_combined = self.merge_results(l_res_combined, r_res, None)
|
|
||||||
merged_results.extend(['(' + el + ')' for el in r_res_combined])
|
|
||||||
else:
|
|
||||||
merged_results.extend(['(' + el + ')' for el in l_res_combined])
|
|
||||||
elif r_res:
|
|
||||||
r_res_combined = self.merge_results(new_results, r_res, None)
|
|
||||||
merged_results.extend(['(' + el + ')' for el in r_res_combined])
|
|
||||||
|
|
||||||
|
|
||||||
# merged_results.append('(' + l_res + new_result + r_res + ')')
|
# merged_results.append('(' + l_res + new_result + r_res + ')')
|
||||||
|
@ -553,11 +564,11 @@ class Tree(object):
|
||||||
# TODO
|
# TODO
|
||||||
# node_order = False
|
# node_order = False
|
||||||
partly_built_trees = [[None] * answer_length]
|
partly_built_trees = [[None] * answer_length]
|
||||||
partly_built_trees_architecture = [[None] * answer_length]
|
# partly_built_trees_architecture = [[None] * answer_length]
|
||||||
partly_built_trees_architecture_indices = [[None] * answer_length]
|
partly_built_trees_architecture_indices = [[None] * answer_length]
|
||||||
partly_built_trees_deprel = [[None] * answer_length]
|
partly_built_trees_deprel = [[None] * answer_length]
|
||||||
built_trees = []
|
built_trees = []
|
||||||
built_trees_architecture = []
|
# built_trees_architecture = []
|
||||||
built_trees_architecture_indices = []
|
built_trees_architecture_indices = []
|
||||||
built_trees_deprel = []
|
built_trees_deprel = []
|
||||||
|
|
||||||
|
@ -568,7 +579,7 @@ class Tree(object):
|
||||||
# child are added
|
# child are added
|
||||||
for child_i in range(len(separated_answers[0])):
|
for child_i in range(len(separated_answers[0])):
|
||||||
new_partly_built_trees = []
|
new_partly_built_trees = []
|
||||||
new_partly_built_trees_architecture = []
|
# new_partly_built_trees_architecture = []
|
||||||
new_partly_built_trees_architecture_indices = []
|
new_partly_built_trees_architecture_indices = []
|
||||||
new_partly_built_trees_deprel = []
|
new_partly_built_trees_deprel = []
|
||||||
# iterate over answers parts
|
# iterate over answers parts
|
||||||
|
@ -580,11 +591,11 @@ class Tree(object):
|
||||||
# if tree_part[answer_part_i] equals None add new element in its place
|
# if tree_part[answer_part_i] equals None add new element in its place
|
||||||
if not tree_part[answer_part_i]:
|
if not tree_part[answer_part_i]:
|
||||||
new_tree_part = copy(tree_part)
|
new_tree_part = copy(tree_part)
|
||||||
new_tree_part_architecture = copy(partly_built_trees_architecture[tree_part_i])
|
# new_tree_part_architecture = copy(partly_built_trees_architecture[tree_part_i])
|
||||||
new_tree_part_architecture_indices = copy(partly_built_trees_architecture_indices[tree_part_i])
|
new_tree_part_architecture_indices = copy(partly_built_trees_architecture_indices[tree_part_i])
|
||||||
new_tree_part_deprel = copy(partly_built_trees_deprel[tree_part_i])
|
new_tree_part_deprel = copy(partly_built_trees_deprel[tree_part_i])
|
||||||
new_tree_part[answer_part_i] = separated_answers[answer_part_i][child_i]
|
new_tree_part[answer_part_i] = separated_answers[answer_part_i][child_i]
|
||||||
new_tree_part_architecture[answer_part_i] = separated_answers_architecture[answer_part_i][child_i]
|
# new_tree_part_architecture[answer_part_i] = separated_answers_architecture[answer_part_i][child_i]
|
||||||
new_tree_part_architecture_indices[answer_part_i] = child_i
|
new_tree_part_architecture_indices[answer_part_i] = child_i
|
||||||
new_tree_part_deprel[answer_part_i] = separated_answers_deprel[answer_part_i][child_i]
|
new_tree_part_deprel[answer_part_i] = separated_answers_deprel[answer_part_i][child_i]
|
||||||
completed_tree_part = True
|
completed_tree_part = True
|
||||||
|
@ -593,12 +604,12 @@ class Tree(object):
|
||||||
completed_tree_part = False
|
completed_tree_part = False
|
||||||
if completed_tree_part:
|
if completed_tree_part:
|
||||||
built_trees.append(new_tree_part)
|
built_trees.append(new_tree_part)
|
||||||
built_trees_architecture.append(new_tree_part_architecture)
|
# built_trees_architecture.append(new_tree_part_architecture)
|
||||||
built_trees_architecture_indices.append(new_tree_part_architecture_indices)
|
built_trees_architecture_indices.append(new_tree_part_architecture_indices)
|
||||||
built_trees_deprel.append(new_tree_part_deprel)
|
built_trees_deprel.append(new_tree_part_deprel)
|
||||||
else:
|
else:
|
||||||
new_partly_built_trees.append(new_tree_part)
|
new_partly_built_trees.append(new_tree_part)
|
||||||
new_partly_built_trees_architecture.append(new_tree_part_architecture)
|
# new_partly_built_trees_architecture.append(new_tree_part_architecture)
|
||||||
new_partly_built_trees_architecture_indices.append(new_tree_part_architecture_indices)
|
new_partly_built_trees_architecture_indices.append(new_tree_part_architecture_indices)
|
||||||
new_partly_built_trees_deprel.append(new_tree_part_deprel)
|
new_partly_built_trees_deprel.append(new_tree_part_deprel)
|
||||||
else:
|
else:
|
||||||
|
@ -607,7 +618,7 @@ class Tree(object):
|
||||||
# print('HERE!!!')
|
# print('HERE!!!')
|
||||||
|
|
||||||
partly_built_trees.extend(new_partly_built_trees)
|
partly_built_trees.extend(new_partly_built_trees)
|
||||||
partly_built_trees_architecture.extend(new_partly_built_trees_architecture)
|
# partly_built_trees_architecture.extend(new_partly_built_trees_architecture)
|
||||||
partly_built_trees_architecture_indices.extend(new_partly_built_trees_architecture_indices)
|
partly_built_trees_architecture_indices.extend(new_partly_built_trees_architecture_indices)
|
||||||
partly_built_trees_deprel.extend(new_partly_built_trees_deprel)
|
partly_built_trees_deprel.extend(new_partly_built_trees_deprel)
|
||||||
|
|
||||||
|
@ -615,19 +626,23 @@ class Tree(object):
|
||||||
|
|
||||||
if built_trees:
|
if built_trees:
|
||||||
# sort 3 arrays by architecture indices
|
# sort 3 arrays by architecture indices
|
||||||
temp_trees_index, temp_trees, temp_trees_architectures, temp_trees_deprel = (list(t) for t in zip(
|
# temp_trees_index, temp_trees, temp_trees_architectures, temp_trees_deprel = (list(t) for t in zip(
|
||||||
*sorted(zip(built_trees_architecture_indices, built_trees, built_trees_architecture, built_trees_deprel))))
|
# *sorted(zip(built_trees_architecture_indices, built_trees, built_trees_architecture, built_trees_deprel))))
|
||||||
|
temp_trees_index, temp_trees, temp_trees_deprel = (list(t) for t in zip(
|
||||||
|
*sorted(zip(built_trees_architecture_indices, built_trees, built_trees_deprel))))
|
||||||
|
|
||||||
# order outputs and erase duplicates
|
# order outputs and erase duplicates
|
||||||
# for tree, tree_architecture, tree_architecture_indice in zip(built_trees, built_trees_architecture, built_trees_architecture_indices):
|
# for tree, tree_architecture, tree_architecture_indice in zip(built_trees, built_trees_architecture, built_trees_architecture_indices):
|
||||||
for tree, tree_architecture, tree_index, tree_deprel in zip(temp_trees, temp_trees_architectures, temp_trees_index, temp_trees_deprel):
|
# for tree, tree_architecture, tree_index, tree_deprel in zip(temp_trees, temp_trees_architectures, temp_trees_index, temp_trees_deprel):
|
||||||
new_tree_index, new_tree, new_tree_architecture, new_tree_deprel = (list(t) for t in zip(*sorted(zip(tree_index, tree, tree_architecture, tree_deprel))))
|
for tree, tree_index, tree_deprel in zip(temp_trees, temp_trees_index, temp_trees_deprel):
|
||||||
|
# new_tree_index, new_tree, new_tree_architecture, new_tree_deprel = (list(t) for t in zip(*sorted(zip(tree_index, tree, tree_architecture, tree_deprel))))
|
||||||
|
new_tree_index, new_tree, new_tree_deprel = (list(t) for t in zip(*sorted(zip(tree_index, tree, tree_deprel))))
|
||||||
# TODO check if inside new_tree_architecture in ordered_built_trees_architecture and if not append!
|
# TODO check if inside new_tree_architecture in ordered_built_trees_architecture and if not append!
|
||||||
is_unique = True
|
is_unique = True
|
||||||
for unique_tree in unique_trees_architecture:
|
for unique_tree in unique_trees_architecture:
|
||||||
already_in = True
|
already_in = True
|
||||||
for part_i in range(len(unique_tree)):
|
for part_i in range(len(unique_tree)):
|
||||||
if unique_tree[part_i] != new_tree_architecture[part_i]:
|
if unique_tree[part_i].order_key != new_tree[part_i].order_key:
|
||||||
already_in = False
|
already_in = False
|
||||||
break
|
break
|
||||||
if already_in:
|
if already_in:
|
||||||
|
@ -635,9 +650,9 @@ class Tree(object):
|
||||||
break
|
break
|
||||||
|
|
||||||
if is_unique:
|
if is_unique:
|
||||||
unique_trees_architecture.append(new_tree_architecture)
|
unique_trees_architecture.append(new_tree)
|
||||||
# if not filters['node_order']:
|
# if not filters['node_order']:
|
||||||
l_ordered_built_trees_architecture.append(new_tree_architecture)
|
# l_ordered_built_trees_architecture.append(new_tree_architecture)
|
||||||
l_ordered_built_trees.append(new_tree)
|
l_ordered_built_trees.append(new_tree)
|
||||||
l_ordered_built_trees_index.append(new_tree_index)
|
l_ordered_built_trees_index.append(new_tree_index)
|
||||||
l_ordered_built_trees_deprel.append(new_tree_deprel)
|
l_ordered_built_trees_deprel.append(new_tree_deprel)
|
||||||
|
|
|
@ -139,9 +139,12 @@ def create_trees(config):
|
||||||
# for k, v in token.feats.items():
|
# for k, v in token.feats.items():
|
||||||
# token_feats += k + next(iter(v)) + '|'
|
# token_feats += k + next(iter(v)) + '|'
|
||||||
# token_feats = token_feats[:-1]
|
# token_feats = token_feats[:-1]
|
||||||
|
if not token.id.isdigit():
|
||||||
|
continue
|
||||||
|
|
||||||
# TODO check if 5th place is always there for feats
|
# TODO check if 5th place is always there for feats
|
||||||
feats = token._fields[5]
|
feats = token._fields[5]
|
||||||
node = Tree(token.form, token.lemma, token.upos, token.xpos, token.deprel, feats, token.feats, form_dict,
|
node = Tree(int(token.id), token.form, token.lemma, token.upos, token.xpos, token.deprel, feats, token.feats, form_dict,
|
||||||
lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, feats_detailed_dict, token.head)
|
lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, feats_detailed_dict, token.head)
|
||||||
token_nodes.append(node)
|
token_nodes.append(node)
|
||||||
if token.deprel == 'root':
|
if token.deprel == 'root':
|
||||||
|
@ -438,26 +441,39 @@ def main():
|
||||||
|
|
||||||
# 1.02 s (16 cores)
|
# 1.02 s (16 cores)
|
||||||
if cpu_cores > 1:
|
if cpu_cores > 1:
|
||||||
all_subtrees = p.map(tree_calculations, [(tree, query_tree, create_output_string_funct, filters) for tree in all_trees])
|
all_subtrees = p.map(tree_calculations, [(tree, query_tree, create_output_string_funct, filters) for tree in all_trees[5170:]])
|
||||||
|
|
||||||
for subtrees in all_subtrees:
|
# for subtrees in all_subtrees:
|
||||||
|
for tree_i, subtrees in enumerate(all_subtrees):
|
||||||
for query_results in subtrees:
|
for query_results in subtrees:
|
||||||
for r in query_results:
|
for r in query_results:
|
||||||
|
# if r == '(" < , < je < velik) < tem':
|
||||||
|
# print(tree_i)
|
||||||
|
# if r in result_dict:
|
||||||
|
# result_dict[r] += 1
|
||||||
|
# else:
|
||||||
|
# result_dict[r] = 1
|
||||||
if r in result_dict:
|
if r in result_dict:
|
||||||
result_dict[r] += 1
|
result_dict[r]['number'] += 1
|
||||||
else:
|
else:
|
||||||
result_dict[r] = 1
|
result_dict[r] = {'object': r, 'number': 1}
|
||||||
|
|
||||||
# 3.65 s (1 core)
|
# 3.65 s (1 core)
|
||||||
else:
|
else:
|
||||||
for tree in all_trees:
|
# for tree_i, tree in enumerate(all_trees[-5:]):
|
||||||
|
for tree_i, tree in enumerate(all_trees[1:]):
|
||||||
|
# text = Če pa ostane odrasel otrok doma, se starši le težko sprijaznijo s tem, da je "velik", otrok pa ima ves čas občutek, da se njegovi starši po nepotrebnem vtikajo v njegovo življenje.
|
||||||
|
# for tree_i, tree in enumerate(all_trees[5170:]):
|
||||||
|
# for tree in all_trees:
|
||||||
subtrees = tree_calculations((tree, query_tree, create_output_string_funct, filters))
|
subtrees = tree_calculations((tree, query_tree, create_output_string_funct, filters))
|
||||||
for query_results in subtrees:
|
for query_results in subtrees:
|
||||||
for r in query_results:
|
for r in query_results:
|
||||||
|
# if r == '(" < , < je < velik) < tem':
|
||||||
|
# print(tree_i)
|
||||||
if r in result_dict:
|
if r in result_dict:
|
||||||
result_dict[r] += 1
|
result_dict[r]['number'] += 1
|
||||||
else:
|
else:
|
||||||
result_dict[r] = 1
|
result_dict[r] = {'object': r, 'number': 1}
|
||||||
|
|
||||||
print("Execution time:")
|
print("Execution time:")
|
||||||
print("--- %s seconds ---" % (time.time() - start_exe_time))
|
print("--- %s seconds ---" % (time.time() - start_exe_time))
|
||||||
|
@ -481,7 +497,7 @@ def main():
|
||||||
# [{"l_children":[{"l_children": [{'a1': ''}, {'a2': ''}, {'a3': ''}, {'a4': ''}]}]}], [])
|
# [{"l_children":[{"l_children": [{'a1': ''}, {'a2': ''}, {'a3': ''}, {'a4': ''}]}]}], [])
|
||||||
# # _, subtrees = new_tree.get_subtrees(
|
# # _, subtrees = new_tree.get_subtrees(
|
||||||
# # [{"l_children":[{"l_children": [{'a1': ''}, {'a2': ''}, {'a3': ''}, {'a4': ''}], "r_children": []}], "r_children": []}], [])
|
# # [{"l_children":[{"l_children": [{'a1': ''}, {'a2': ''}, {'a3': ''}, {'a4': ''}], "r_children": []}], "r_children": []}], [])
|
||||||
sorted_list = sorted(result_dict.items(), key=lambda x: x[1], reverse=True)
|
sorted_list = sorted(result_dict.items(), key=lambda x: x[1]['number'], reverse=True)
|
||||||
|
|
||||||
with open(config.get('settings', 'output'), "w", newline="") as f:
|
with open(config.get('settings', 'output'), "w", newline="") as f:
|
||||||
# header - use every second space as a split
|
# header - use every second space as a split
|
||||||
|
@ -496,8 +512,8 @@ def main():
|
||||||
|
|
||||||
# body
|
# body
|
||||||
for k, v in sorted_list:
|
for k, v in sorted_list:
|
||||||
words_only = printable_answers(k)
|
words_only = printable_answers(k.key)
|
||||||
writer.writerow([k] + words_only + [str(v)])
|
writer.writerow([k.key] + words_only + [str(v['number'])])
|
||||||
|
|
||||||
return "Done"
|
return "Done"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user