2019-08-23 09:14:31 +00:00
import sys
2019-10-15 13:52:39 +00:00
from copy import copy
2019-08-23 09:14:31 +00:00
from pyconll . unit import Token
2019-11-29 09:37:48 +00:00
from Result import Result
2019-12-14 21:44:18 +00:00
from ResultNode import ResultNode
from ResultTree import ResultTree
2019-08-23 09:14:31 +00:00
from Value import Value
2019-12-14 08:36:29 +00:00
from generic import create_output_string_form , create_output_string_deprel , create_output_string_lemma , \
create_output_string_upos , create_output_string_xpos , create_output_string_feats , generate_key
2019-08-23 09:14:31 +00:00
class Tree ( object ) :
2019-11-29 09:37:48 +00:00
def __init__ ( self , index , form , lemma , upos , xpos , deprel , feats , feats_detailed , form_dict , lemma_dict , upos_dict , xpos_dict , deprel_dict , feats_dict , feats_detailed_dict , head ) :
2019-11-11 13:52:35 +00:00
if not hasattr ( self , ' feats ' ) :
self . feats_detailed = { }
2019-10-15 19:18:00 +00:00
2019-08-23 09:14:31 +00:00
# form_unicode = str(form).encode("utf-8")
if form not in form_dict :
form_dict [ form ] = Value ( form )
self . form = form_dict [ form ]
if lemma not in lemma_dict :
lemma_dict [ lemma ] = Value ( lemma )
self . lemma = lemma_dict [ lemma ]
if upos not in upos_dict :
upos_dict [ upos ] = Value ( upos )
self . upos = upos_dict [ upos ]
if xpos not in xpos_dict :
xpos_dict [ xpos ] = Value ( xpos )
self . xpos = xpos_dict [ xpos ]
if deprel not in deprel_dict :
deprel_dict [ deprel ] = Value ( deprel )
self . deprel = deprel_dict [ deprel ]
2019-11-11 13:52:35 +00:00
if feats not in feats_dict :
feats_dict [ feats ] = Value ( feats )
self . feats = feats_dict [ feats ]
for feat in feats_detailed . keys ( ) :
2019-12-02 15:59:54 +00:00
if feat not in feats_detailed_dict :
feats_detailed_dict [ feat ] = { }
2019-11-11 13:52:35 +00:00
if next ( iter ( feats_detailed [ feat ] ) ) not in feats_detailed_dict [ feat ] :
feats_detailed_dict [ feat ] [ next ( iter ( feats_detailed [ feat ] ) ) ] = Value ( next ( iter ( feats_detailed [ feat ] ) ) )
if not feat in self . feats_detailed :
self . feats_detailed [ feat ] = { }
self . feats_detailed [ feat ] [ next ( iter ( feats_detailed [ feat ] ) ) ] = feats_detailed_dict [ feat ] [ next ( iter ( feats_detailed [ feat ] ) ) ]
2019-08-23 09:14:31 +00:00
# self.position = position
self . parent = head
2019-11-04 12:53:15 +00:00
self . children = [ ]
self . children_split = - 1
2019-08-23 09:14:31 +00:00
2019-11-29 09:37:48 +00:00
self . index = index
2019-10-15 13:52:39 +00:00
2019-11-12 15:01:17 +00:00
# for caching answers to questions
self . cache = { }
2019-11-04 12:53:15 +00:00
def add_child ( self , child ) :
2019-11-29 09:37:48 +00:00
# child.index = len(self.children)
2019-11-04 12:53:15 +00:00
self . children . append ( child )
2019-08-23 09:14:31 +00:00
def set_parent ( self , parent ) :
self . parent = parent
2019-11-04 12:53:15 +00:00
# def fits_static_requirements_feats(self, query_tree):
# if 'feats' not in query_tree:
# return True
#
# for feat in query_tree['feats'].keys():
# if feat not in self.feats or query_tree['feats'][feat] != next(iter(self.feats[feat].values())).get_value():
# return False
#
# return True
2019-10-15 19:18:00 +00:00
2019-11-11 13:52:35 +00:00
def fits_permanent_requirements ( self , filters ) :
main_attributes = [ ' deprel ' , ' feats ' , ' form ' , ' lemma ' , ' upos ' ]
if not filters [ ' root_whitelist ' ] :
return True
for option in filters [ ' root_whitelist ' ] :
filter_passed = True
# check if attributes are valid
for key in option . keys ( ) :
if key not in main_attributes :
if key not in self . feats_detailed or option [ key ] != list ( self . feats_detailed [ key ] . items ( ) ) [ 0 ] [ 1 ] . get_value ( ) :
filter_passed = False
2019-11-06 15:59:10 +00:00
2019-11-11 13:52:35 +00:00
filter_passed = filter_passed and \
( ' deprel ' not in option or option [ ' deprel ' ] == self . deprel . get_value ( ) ) and \
( ' feats ' not in option or option [ ' feats ' ] == self . feats . get_value ( ) ) and \
( ' form ' not in option or option [ ' form ' ] == self . form . get_value ( ) ) and \
( ' lemma ' not in option or option [ ' lemma ' ] == self . lemma . get_value ( ) ) and \
( ' upos ' not in option or option [ ' upos ' ] == self . upos . get_value ( ) )
if filter_passed :
return True
return False
def fits_temporary_requirements ( self , filters ) :
2019-11-06 15:59:10 +00:00
return not filters [ ' label_whitelist ' ] or self . deprel . get_value ( ) in filters [ ' label_whitelist ' ]
2019-11-06 17:02:53 +00:00
def fits_static_requirements ( self , query_tree , filters ) :
2019-08-29 14:47:33 +00:00
return ( ' form ' not in query_tree or query_tree [ ' form ' ] == self . form . get_value ( ) ) and \
( ' lemma ' not in query_tree or query_tree [ ' lemma ' ] == self . lemma . get_value ( ) ) and \
( ' upos ' not in query_tree or query_tree [ ' upos ' ] == self . upos . get_value ( ) ) and \
( ' xpos ' not in query_tree or query_tree [ ' xpos ' ] == self . xpos . get_value ( ) ) and \
2019-10-15 19:18:00 +00:00
( ' deprel ' not in query_tree or query_tree [ ' deprel ' ] == self . deprel . get_value ( ) ) and \
2019-11-11 13:52:35 +00:00
( ' feats ' not in query_tree or query_tree [ ' feats ' ] == self . feats . get_value ( ) ) and \
2019-11-06 17:02:53 +00:00
( not filters [ ' complete_tree_type ' ] or ( len ( self . children ) == 0 and ' children ' not in query_tree ) or ( ' children ' in query_tree and len ( self . children ) == len ( query_tree [ ' children ' ] ) ) )
2019-11-04 12:53:15 +00:00
# self.fits_static_requirements_feats(query_tree)
2019-08-23 09:14:31 +00:00
2019-08-28 12:05:26 +00:00
def generate_children_queries ( self , all_query_indices , children ) :
2019-08-29 06:22:17 +00:00
partial_results = { }
2019-08-23 17:16:57 +00:00
# list of pairs (index of query in group, group of query, is permanent)
child_queries_metadata = [ ]
2019-08-28 12:05:26 +00:00
for child_index , child in enumerate ( children ) :
2019-08-23 09:14:31 +00:00
new_queries = [ ]
# add continuation queries to children
2019-08-29 06:22:17 +00:00
for result_part_index , result_index , is_permanent in child_queries_metadata :
if result_index in partial_results and result_part_index in partial_results [ result_index ] and len ( partial_results [ result_index ] [ result_part_index ] ) > 0 :
2019-08-23 17:16:57 +00:00
if len ( all_query_indices [ result_index ] [ 0 ] ) > result_part_index + 1 :
2019-08-23 09:14:31 +00:00
new_queries . append ( ( result_part_index + 1 , result_index , is_permanent ) )
# else:
# completed_subtrees.append((child, result_index))
2019-08-23 17:16:57 +00:00
child_queries_metadata = new_queries
2019-08-23 09:14:31 +00:00
# add new queries to children
2019-08-23 17:16:57 +00:00
for result_index , ( group , is_permanent ) in enumerate ( all_query_indices ) :
2019-08-23 09:14:31 +00:00
# check if node has enough children for query to be possible
2019-08-28 12:05:26 +00:00
if len ( children ) - len ( group ) > = child_index :
2019-08-23 17:16:57 +00:00
child_queries_metadata . append ( ( 0 , result_index , is_permanent ) )
2019-08-23 09:14:31 +00:00
2019-08-23 17:16:57 +00:00
child_queries = [ ]
for result_part_index , result_index , _ in child_queries_metadata :
child_queries . append ( all_query_indices [ result_index ] [ 0 ] [ result_part_index ] )
2019-08-23 09:14:31 +00:00
2019-08-29 06:22:17 +00:00
partial_results = yield child , child_queries , child_queries_metadata
2019-08-23 09:14:31 +00:00
yield None , None , None
def add_subtrees ( self , old_subtree , new_subtree ) :
old_subtree . extend ( new_subtree )
2019-12-15 09:21:20 +00:00
# def get_results(self, partial_results_dict, result_index, result_part, outcome, last_result_part):
# # save results for later usage
#
# # if result index already in and element 0 exists (otherwise error)
# if result_index in partial_results_dict and 0 in partial_results_dict[result_index]:
# if result_part - 1 in partial_results_dict[result_index]:
# if result_part in partial_results_dict[result_index]:
# partial_results_dict[result_index][result_part].extend(self.merge_results(partial_results_dict[result_index][result_part - 1], outcome))
# else:
# partial_results_dict[result_index][result_part] = self.merge_results(partial_results_dict[result_index][result_part - 1], outcome)
#
# # extend one word layer with output
# else:
# partial_results_dict[result_index][0].extend(outcome)
# else:
# partial_results_dict[result_index] = {0: outcome}
#
# if last_result_part - 1 in partial_results_dict[result_index]:
# return partial_results_dict[result_index].pop(last_result_part - 1)
# return []
# def group_results(self, new_partial_subtrees, child_queries_metadata, all_query_indices, partial_results_dict, partial_subtrees):
# for outcome, (result_part, result_index, is_permanent) in zip(new_partial_subtrees, child_queries_metadata):
# if outcome:
# new_results = self.get_results(partial_results_dict, result_index, result_part, outcome, len(all_query_indices[result_index][0]))
# if new_results:
# self.add_subtrees(partial_subtrees[result_index], new_results)
# else:
# if not is_permanent:
# partial_subtrees[result_index].append([])
# def get_all_query_indices_old(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children, create_output_string):
# partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)]
# completed_subtrees = [[] for i in range(completed_subtrees_size)]
#
# # list of pairs (index of query in group, group of query)
# partial_results_dict = {}
#
# children_queries_generator = self.generate_children_queries(l_all_query_indices, children)
#
# child_index = 0
# child, child_queries, child_queries_metadata = next(children_queries_generator)
# while child:
# # obtain children results
# new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string)
#
# self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices,
# partial_results_dict, partial_subtrees)
#
# for i in range(len(new_completed_subtrees)):
# completed_subtrees[i].extend(new_completed_subtrees[i])
# child, child_queries, child_queries_metadata = children_queries_generator.send(partial_results_dict)
# child_index += 1
#
# return partial_subtrees, completed_subtrees
2019-08-23 09:14:31 +00:00
2019-11-06 15:27:32 +00:00
def get_all_query_indices ( self , temporary_query_nb , permanent_query_nb , permanent_query_trees , all_query_indices , children , create_output_string , filters ) :
2019-10-15 13:52:39 +00:00
# l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees),
# len(permanent_query_trees),
# permanent_query_trees,
# l_all_query_indices, self.l_children,
# create_output_string)
2019-11-06 15:27:32 +00:00
partial_answers = [ [ ] for i in range ( permanent_query_nb + temporary_query_nb ) ]
partial_answers_index = [ [ ] for i in range ( permanent_query_nb + temporary_query_nb ) ]
complete_answers = [ [ ] for i in range ( permanent_query_nb ) ]
2019-10-15 13:52:39 +00:00
# list of pairs (index of query in group, group of query)
partial_results_dict = { }
# TODO try to erase!!!
child_queries = [ all_query_indice [ 0 ] for all_query_indice in all_query_indices ]
answers_lengths = [ len ( query ) for query in child_queries ]
child_queries_flatten = [ query_part for query in child_queries for query_part in query ]
all_new_partial_answers = [ [ ] for query_part in child_queries_flatten ]
2019-11-06 15:27:32 +00:00
2019-11-15 16:43:37 +00:00
# if filters['caching']:
# erase duplicate queries
child_queries_flatten_dedup = [ ]
child_queries_flatten_dedup_indices = [ ]
for query_part in child_queries_flatten :
try :
index = child_queries_flatten_dedup . index ( query_part )
except ValueError :
index = len ( child_queries_flatten_dedup )
child_queries_flatten_dedup . append ( query_part )
child_queries_flatten_dedup_indices . append ( index )
2019-10-15 13:52:39 +00:00
# ask children all queries/partial queries
for child in children :
# obtain children results
2019-11-15 16:43:37 +00:00
# if filters['caching']:
2019-11-29 10:43:21 +00:00
new_partial_answers_dedup , new_complete_answers = child . get_subtrees ( permanent_query_trees , child_queries_flatten_dedup ,
2019-11-15 16:43:37 +00:00
create_output_string , filters )
assert len ( new_partial_answers_dedup ) == len ( child_queries_flatten_dedup )
# duplicate results again on correct places
for i , flattened_index in enumerate ( child_queries_flatten_dedup_indices ) :
all_new_partial_answers [ i ] . append ( new_partial_answers_dedup [ flattened_index ] )
# else:
# new_partial_answers_architecture, new_partial_answers, new_complete_answers = child.get_subtrees(
# permanent_query_trees, child_queries_flatten,
# create_output_string, filters)
#
# assert len(new_partial_answers) == len(child_queries_flatten)
#
# for i, new_partial_subtree in enumerate(new_partial_answers):
# all_new_partial_answers[i].append(new_partial_subtree)
# all_new_partial_answers_architecture[i].append(new_partial_answers_architecture[i])
# # if len(new_partial_answers_architecture[i]) > 1:
# # print('HERE!!!')
# all_new_partial_answers_deprel[i].append(create_output_string_deprel(child))
2019-10-15 13:52:39 +00:00
# add 6 queries from 3 split up
# self.group_results(new_partial_subtrees, child_queries_metadata, all_query_indices,
# partial_results_dict, partial_subtrees)
2019-11-06 15:27:32 +00:00
for i in range ( len ( new_complete_answers ) ) :
2019-11-29 09:37:48 +00:00
# TODO add order rearagement (TO KEY)
2019-11-06 15:27:32 +00:00
complete_answers [ i ] . extend ( new_complete_answers [ i ] )
2019-10-15 13:52:39 +00:00
2019-11-29 10:43:21 +00:00
# if create_output_string_form(self) == 'Dogodek':
# print('HERE!@@!')
2019-11-06 15:27:32 +00:00
# if create_output_string_form(self) == 'vpiti':
# print('HERE!@@!')
2019-10-15 13:52:39 +00:00
# merge answers in appropriate way
i = 0
# iterate over all answers per queries
for answer_i , answer_length in enumerate ( answers_lengths ) :
# iterate over answers of query
2019-11-06 15:27:32 +00:00
# TODO ERROR IN HERE!
2019-12-14 13:26:23 +00:00
partial_answers [ answer_i ] = self . create_answers ( all_new_partial_answers [ i : i + answer_length ] , answer_length , filters )
2019-10-15 13:52:39 +00:00
# while i < answers_length:
# self.create_grouped_answers()
# i += 1
i + = answer_length
# merged_results = []
# for old_result in old_results:
# for new_result in new_results:
# merged_results.append(old_result + new_result)
# return merged_results
# children_queries_generator = self.generate_children_queries(all_query_indices, children)
#
# child_index = 0
# child, child_queries, child_queries_metadata = next(children_queries_generator)
# while child:
# # obtain children results
# new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string)
#
# self.group_results(new_partial_subtrees, child_queries_metadata, all_query_indices,
# partial_results_dict, partial_subtrees)
#
# for i in range(len(new_completed_subtrees)):
# completed_subtrees[i].extend(new_completed_subtrees[i])
# child, child_queries, child_queries_metadata = children_queries_generator.send(partial_results_dict)
# child_index += 1
2019-12-14 13:26:23 +00:00
return partial_answers , complete_answers
2019-10-15 13:52:39 +00:00
2019-12-14 13:26:23 +00:00
def order_dependent_queries ( self , active_permanent_query_trees , active_temporary_query_trees , partial_subtrees ,
2019-11-29 10:43:21 +00:00
create_output_string , merged_partial_subtrees , i_query , i_answer , filters ) :
2019-10-15 13:52:39 +00:00
# string_output = ''
2019-11-06 15:27:32 +00:00
# if create_output_string_form(self) == 'vožnji':
# print('HERE!@@!')
2019-12-14 21:44:18 +00:00
# if create_output_string_form(self) == 'začelo':
# print('HERE!@@!')
node = ResultNode ( self , self . index , create_output_string )
# TEST = ResultTree(node, [], filters)
# a = TEST.create_key()
# if i_query < len(active_permanent_query_trees):
# if 'children' in active_permanent_query_trees[i_query]:
# merged_partial_subtrees.append(
# self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters))
# i_answer += 1
# else:
# merged_partial_subtrees.append([Result(self, self.index, create_output_string)])
# else:
# if 'children' in active_temporary_query_trees[i_query - len(active_permanent_query_trees)]:
# merged_partial_subtrees.append(
# self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters))
# i_answer += 1
# else:
# merged_partial_subtrees.append([Result(self, self.index, create_output_string)])
2019-11-06 15:27:32 +00:00
if i_query < len ( active_permanent_query_trees ) :
if ' children ' in active_permanent_query_trees [ i_query ] :
2019-11-05 07:58:16 +00:00
merged_partial_subtrees . append (
2019-12-14 21:44:18 +00:00
self . create_output_children ( partial_subtrees [ i_answer ] , [ ResultTree ( node , [ ] , filters ) ] , filters ) )
2019-11-06 15:27:32 +00:00
i_answer + = 1
2019-10-15 13:52:39 +00:00
else :
2019-12-14 21:44:18 +00:00
merged_partial_subtrees . append ( [ ResultTree ( node , [ ] , filters ) ] )
2019-10-15 13:52:39 +00:00
else :
2019-11-06 15:27:32 +00:00
if ' children ' in active_temporary_query_trees [ i_query - len ( active_permanent_query_trees ) ] :
2019-11-05 07:58:16 +00:00
merged_partial_subtrees . append (
2019-12-14 21:44:18 +00:00
self . create_output_children ( partial_subtrees [ i_answer ] , [ ResultTree ( node , [ ] , filters ) ] , filters ) )
2019-11-06 15:27:32 +00:00
i_answer + = 1
2019-10-15 13:52:39 +00:00
else :
2019-12-14 21:44:18 +00:00
merged_partial_subtrees . append ( [ ResultTree ( node , [ ] , filters ) ] )
2019-10-15 13:52:39 +00:00
2019-11-06 15:27:32 +00:00
return i_answer
2019-10-15 13:52:39 +00:00
2019-12-14 08:36:29 +00:00
def get_unigrams ( self , create_output_strings , filters ) :
unigrams = [ generate_key ( self , create_output_strings , print_lemma = False ) [ 1 ] ]
for child in self . children :
unigrams + = child . get_unigrams ( create_output_strings , filters )
return unigrams
2019-11-04 12:53:15 +00:00
def get_subtrees ( self , permanent_query_trees , temporary_query_trees , create_output_string , filters ) :
2019-08-23 09:14:31 +00:00
"""
: param permanent_query_trees :
: param temporary_query_trees :
"""
2019-11-06 15:27:32 +00:00
# if create_output_string_form(self) == 'vožnji':
# print('HERE!@@!')
2019-08-23 09:14:31 +00:00
# list of all children queries grouped by parent queries
2019-11-04 12:53:15 +00:00
all_query_indices = [ ]
2019-08-23 09:14:31 +00:00
2019-08-28 12:05:26 +00:00
active_permanent_query_trees = [ ]
2019-08-23 09:14:31 +00:00
for permanent_query_tree in permanent_query_trees :
2019-11-11 13:52:35 +00:00
if self . fits_static_requirements ( permanent_query_tree , filters ) and self . fits_permanent_requirements ( filters ) :
2019-08-28 12:05:26 +00:00
active_permanent_query_trees . append ( permanent_query_tree )
2019-11-04 12:53:15 +00:00
if ' children ' in permanent_query_tree :
all_query_indices . append ( ( permanent_query_tree [ ' children ' ] , True ) )
2019-10-15 13:52:39 +00:00
# r_all_query_indices.append((permanent_query_tree['r_children'], True))
2019-08-28 12:05:26 +00:00
active_temporary_query_trees = [ ]
2019-10-15 13:52:39 +00:00
successful_temporary_queries = [ ]
2019-08-23 09:14:31 +00:00
for i , temporary_query_tree in enumerate ( temporary_query_trees ) :
2019-11-06 17:02:53 +00:00
if self . fits_static_requirements ( temporary_query_tree , filters ) and self . fits_temporary_requirements ( filters ) :
# if 'l_children' in temporary_query_tree and 'r_children' in temporary_query_tree:
2019-08-28 12:05:26 +00:00
active_temporary_query_trees . append ( temporary_query_tree )
2019-10-15 13:52:39 +00:00
successful_temporary_queries . append ( i )
2019-11-04 12:53:15 +00:00
if ' children ' in temporary_query_tree :
all_query_indices . append ( ( temporary_query_tree [ ' children ' ] , False ) )
2019-10-15 13:52:39 +00:00
2019-12-14 13:26:23 +00:00
partial_subtrees , complete_answers = self . get_all_query_indices ( len ( temporary_query_trees ) ,
2019-11-05 07:58:16 +00:00
len ( permanent_query_trees ) ,
permanent_query_trees ,
all_query_indices , self . children ,
create_output_string , filters )
2019-08-28 12:05:26 +00:00
2019-11-06 15:27:32 +00:00
merged_partial_answers = [ ]
2019-11-29 10:43:21 +00:00
# merged_partial_answers_architecture = []
2019-11-06 15:27:32 +00:00
i_question = 0
# i_child is necessary, because some queries may be answered at the beginning and were not passed to children.
# i_child is used to point where we are inside answers
i_answer = 0
2019-08-28 12:05:26 +00:00
# go over all permanent and temporary query trees
2019-11-06 15:27:32 +00:00
while i_question < len ( active_permanent_query_trees ) + len ( active_temporary_query_trees ) :
2019-08-28 12:05:26 +00:00
# permanent query trees always have left and right child
2019-12-14 13:26:23 +00:00
i_answer = self . order_dependent_queries ( active_permanent_query_trees , active_temporary_query_trees , partial_subtrees ,
2019-11-29 10:43:21 +00:00
create_output_string , merged_partial_answers , i_question , i_answer , filters )
2019-11-06 15:27:32 +00:00
i_question + = 1
2019-08-29 14:47:33 +00:00
for i in range ( len ( active_permanent_query_trees ) ) :
2019-11-29 09:37:48 +00:00
# TODO FINALIZE RESULT
2019-11-04 12:53:15 +00:00
# erase first and last braclets when adding new query result
2019-11-29 09:37:48 +00:00
add_subtree = [ subtree . finalize_result ( ) for subtree in merged_partial_answers [ i ] ]
2019-08-29 14:47:33 +00:00
# if 0 < len(active_permanent_query_trees):
2019-11-06 15:27:32 +00:00
complete_answers [ i ] . extend ( add_subtree )
2019-11-04 12:53:15 +00:00
# completed_subtrees[i].extend(merged_partial_subtrees[i])
2019-10-15 13:52:39 +00:00
# answers to valid queries
2019-11-06 15:27:32 +00:00
partial_answers = [ [ ] for i in range ( len ( temporary_query_trees ) ) ]
2019-10-15 13:52:39 +00:00
for inside_i , outside_i in enumerate ( successful_temporary_queries ) :
2019-11-29 09:37:48 +00:00
# partial_answers_architecture[outside_i] = merged_partial_answers_architecture[len(active_permanent_query_trees) + inside_i]
2019-11-06 15:27:32 +00:00
partial_answers [ outside_i ] = merged_partial_answers [
2019-10-15 13:52:39 +00:00
len ( active_permanent_query_trees ) + inside_i ]
2019-11-06 15:27:32 +00:00
# return subtrees_architecture, subtrees, completed_subtrees
2019-11-29 10:43:21 +00:00
return partial_answers , complete_answers
2019-10-15 13:52:39 +00:00
# return merged_partial_subtrees_architecture[len(active_permanent_query_trees):], merged_partial_subtrees[len(active_permanent_query_trees):], completed_subtrees
2019-08-23 09:14:31 +00:00
2019-12-15 09:21:20 +00:00
# @staticmethod
# def merge_results(left_parts, right_parts, separator, left=True, right_part_free=False):
# if not left_parts:
# # return all right_parts
# return [r_p.add_separator(separator, left) for r_p in right_parts]
# # if left:
# # return [r_p + separator for r_p in right_parts]
# # # return [r_p.add_separator(separator, left) for r_p in right_parts]
# # else:
# # return [separator + r_p for r_p in right_parts]
#
# if not right_parts:
# return [l_p.add_separator(separator, False) for l_p in left_parts]
# # return [separator + l_p for l_p in left_parts]
# merged_results = []
# for left_part in left_parts:
# if right_part_free:
# for right_part in right_parts[1]:
# merged_results.append((right_parts[0], left_part.merge_results(right_part, separator, left)))
# else:
# for right_part in right_parts:
# merged_results.append(left_part.merge_results(right_part, separator, left))
# # merged_results.append(left_part.merge_results(right_part, separator))
# # if separator:
# # if left:
# # merged_results.append(left_part + right_part + separator)
# # else:
# # merged_results.append(left_part + separator + right_part)
# # else:
# # merged_results.append(left_part + right_part)
# return merged_results
2019-08-23 09:14:31 +00:00
2019-12-14 21:44:18 +00:00
@staticmethod
def create_children_groups ( left_parts , right_parts ) :
if not left_parts :
# return all right_parts
return right_parts
# if left:
# return [r_p + separator for r_p in right_parts]
# # return [r_p.add_separator(separator, left) for r_p in right_parts]
# else:
# return [separator + r_p for r_p in right_parts]
if not right_parts :
return left_parts
# return [separator + l_p for l_p in left_parts]
all_children_group_possibilities = [ ]
for left_part in left_parts :
for right_part in right_parts :
new_part = copy ( left_part )
2019-12-15 09:21:20 +00:00
# new_part.reset_params()
2019-12-14 21:44:18 +00:00
new_part . extend ( right_part )
all_children_group_possibilities . append ( new_part )
# merged_results.append(left_part.merge_results(right_part, separator))
# if separator:
# if left:
# merged_results.append(left_part + right_part + separator)
# else:
# merged_results.append(left_part + separator + right_part)
# else:
# merged_results.append(left_part + right_part)
return all_children_group_possibilities
2019-10-15 13:52:39 +00:00
@staticmethod
def merge_answer ( answer1 , answer2 , base_answer_i , answer_j ) :
merged_results = [ ]
merged_indices = [ ]
for answer1p_i , old_result in enumerate ( answer1 ) :
for answer2p_i , new_result in enumerate ( answer2 ) :
if answer1p_i != answer2p_i :
new_indices = [ answer1p_i ] + [ answer2p_i ]
sorted_indices = sorted ( new_indices )
if sorted_indices in merged_indices :
test = merged_indices . index ( sorted ( new_indices ) )
# TODO add comparison answers with different indices if equal than ignore
merged_results . append ( old_result + new_result )
merged_indices . append ( new_indices )
return merged_results , merged_indices
2019-12-15 09:21:20 +00:00
# def merge_results2(self, child, new_results, filters):
# if create_output_string_form(self) == 'začelo':
# print('HERE!@@!')
# if create_output_string_form(self) == 'Dogodek':
# print('HERE!@@!')
# if create_output_string_form(self) == 'utišal':
# print('HERE!@@!')
# if create_output_string_form(self) == 'prijel':
# print('HERE!@@!')
# if filters['node_order']:
# new_child = child
# # new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key)
# else:
# new_child = sorted(child, key=lambda x: x[0].key)
#
# l_res = []
# r_res = []
# results = []
# for i_answer, answer in enumerate(new_child):
# if filters['node_order'] and answer[0].order[0] < self.index:
# # if filters['node_order'] and indices[i_child][i_answer] < self.children_split:
# if filters['dependency_type']:
# # separator = ' <' + deprel[i_child][i_answer] + ' '
# separator = ' <' + answer[0].deprel + ' '
# else:
# separator = ' < '
# l_res = self.merge_results(l_res, answer, separator, left=True)
# # l_res += answer + separator
# else:
# if filters['dependency_type']:
# separator = ' >' + answer[0].deprel + ' '
# else:
# separator = ' > '
# r_res = self.merge_results(r_res, answer, separator, left=False)
# # r_res += separator + answer
#
# # if filters['node_order']:
# # r_res_sorted = []
# # for i_answer, answer in new_child_sorted:
# # if filters['dependency_type']:
# # separator = ' >' + answer[0].deprel + ' '
# # else:
# # separator = ' > '
# # r_res_sorted = (i_answer, self.merge_results(r_res_sorted, answer, separator, left=False))
# #
# #
# # r_res_sorted_combined = self.merge_results(new_results, r_res_sorted, None, right_part_free=True)
# # # print('here')
#
# if l_res:
# l_res_combined = self.merge_results(l_res, new_results, None)
# if r_res:
# r_res_combined = self.merge_results(l_res_combined, r_res, None)
# # merged_results.extend(['(' + el + ')' for el in r_res_combined])
# result = r_res_combined
# # results.extend([el.put_in_bracelets() for el in r_res_combined])
# else:
# result = l_res_combined
# # results.extend([el.put_in_bracelets() for el in l_res_combined])
# elif r_res:
# r_res_combined = self.merge_results(new_results, r_res, None)
# result = r_res_combined
# # results.extend([el.put_in_bracelets() for el in r_res_combined])
# else:
# result = []
#
#
# results.extend([el.put_in_bracelets() for el in result])
#
# return results
# def create_merged_results(self, answers, separators, separator_switch):
# new_answers = []
# for answer_i, answer in enumerate(answers):
# new_answer = copy(answer[0])
# print(create_output_string_form(self))
# for answer_part_i, answer_part in enumerate(answer[1:]):
# new_answer.extend_answer(answer_part, separators[answer_part_i])
# new_answer.put_in_bracelets(inplace=True)
# new_answers.append(new_answer)
# return new_answers
2019-12-11 14:50:14 +00:00
# def create_merged_results(self, new_child, new_answers, i_child, indices, deprel, filters):
2019-12-14 21:44:18 +00:00
def merge_results3 ( self , child , new_results , filters ) :
2019-12-14 22:25:51 +00:00
# if create_output_string_form(self) == 'Dogodek':
# print('HERE!@@!')
2019-12-14 21:44:18 +00:00
# if create_output_string_form(self) == 'začelo':
# print('HERE!@@!')
# if create_output_string_form(self) == 'utišal':
# print('HERE!@@!')
# if create_output_string_form(self) == 'prijel':
# print('HERE!@@!')
if filters [ ' node_order ' ] :
new_child = child
# new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key)
# new_child_sorted = sorted(child, key=lambda x: x[0].get_key())
else :
new_child = sorted ( child , key = lambda x : x [ 0 ] . get_key ( ) )
children_groups = [ ]
2019-12-11 14:50:14 +00:00
for i_answer , answer in enumerate ( new_child ) :
2019-12-14 21:44:18 +00:00
children_groups = self . create_children_groups ( children_groups , [ [ answer_part ] for answer_part in answer ] )
2019-12-11 14:50:14 +00:00
# r_res += separator + answer
2019-12-14 21:44:18 +00:00
# children_groups_sorted = []
# for i_answer, answer in enumerate(new_child_sorted):
# children_groups_sorted = self.create_children_groups(children_groups_sorted, [[answer_part] for answer_part in answer])
#
#
# results_sorted = {}
# for result in new_results:
# for children in children_groups_sorted:
# new_result = copy(result)
# new_result.set_children(children)
2019-12-14 22:25:51 +00:00
# order = tuple(sorted(new_result.get_order()))
# results_sorted[order] = new_result
2019-12-11 14:50:14 +00:00
2019-12-14 21:44:18 +00:00
results = [ ]
for result in new_results :
for children in children_groups :
new_result = copy ( result )
2019-12-15 09:21:20 +00:00
# if result.key is not None or result.order is not None or result.array is not None or result.order_key is not None:
# print('here')
# new_result.reset_params()
2019-12-14 21:44:18 +00:00
new_result . set_children ( children )
2019-12-15 09:21:20 +00:00
# order = tuple(sorted(new_result.get_order()))
2019-12-14 21:44:18 +00:00
results . append ( new_result )
2019-12-11 14:50:14 +00:00
return results
2019-11-05 07:58:16 +00:00
2019-12-14 13:26:23 +00:00
def create_output_children ( self , children , new_results , filters ) :
2019-12-11 14:50:14 +00:00
# if create_output_string_form(self) == 'Dogodek':
2019-11-06 15:27:32 +00:00
# print('HERE!@@!')
# if create_output_string_form(self) == 'utišal':
# print('HERE!@@!')
2019-11-29 09:37:48 +00:00
# if len(new_results) > 1:
# print('HERE')
2019-11-05 07:58:16 +00:00
merged_results = [ ]
for i_child , child in enumerate ( children ) :
2019-12-14 21:44:18 +00:00
# merged_results.extend(self.merge_results2(child, new_results, filters))
merged_results . extend ( self . merge_results3 ( child , new_results , filters ) )
2019-10-15 13:52:39 +00:00
return merged_results
2019-11-06 15:27:32 +00:00
# @staticmethod
2019-12-14 08:36:29 +00:00
def create_answers ( self , separated_answers , answer_length , filters ) :
2019-10-15 13:52:39 +00:00
partly_built_trees = [ [ None ] * answer_length ]
partly_built_trees_architecture_indices = [ [ None ] * answer_length ]
built_trees = [ ]
built_trees_architecture_indices = [ ]
2019-11-06 15:27:32 +00:00
2019-11-29 10:43:21 +00:00
# if create_output_string_form(self) == 'Dogodek':
2019-11-06 15:27:32 +00:00
# print('HERE!@@!')
2019-10-15 13:52:39 +00:00
# iterate over children first, so that new partly built trees are added only after all results of specific
# child are added
for child_i in range ( len ( separated_answers [ 0 ] ) ) :
new_partly_built_trees = [ ]
new_partly_built_trees_architecture_indices = [ ]
# iterate over answers parts
for answer_part_i in range ( len ( separated_answers ) ) :
# necessary because some parts do not pass filters and are not added
if separated_answers [ answer_part_i ] [ child_i ] :
for tree_part_i , tree_part in enumerate ( partly_built_trees ) :
if not tree_part [ answer_part_i ] :
new_tree_part = copy ( tree_part )
new_tree_part_architecture_indices = copy ( partly_built_trees_architecture_indices [ tree_part_i ] )
2019-11-06 15:27:32 +00:00
new_tree_part [ answer_part_i ] = separated_answers [ answer_part_i ] [ child_i ]
2019-10-15 13:52:39 +00:00
new_tree_part_architecture_indices [ answer_part_i ] = child_i
completed_tree_part = True
for val_i , val in enumerate ( new_tree_part ) :
if not val :
completed_tree_part = False
if completed_tree_part :
built_trees . append ( new_tree_part )
built_trees_architecture_indices . append ( new_tree_part_architecture_indices )
else :
new_partly_built_trees . append ( new_tree_part )
new_partly_built_trees_architecture_indices . append ( new_tree_part_architecture_indices )
2019-11-06 15:27:32 +00:00
else :
# pass over repetitions of same words
pass
2019-10-15 13:52:39 +00:00
partly_built_trees . extend ( new_partly_built_trees )
partly_built_trees_architecture_indices . extend ( new_partly_built_trees_architecture_indices )
2019-12-14 13:26:23 +00:00
l_ordered_built_trees , unique_trees_architecture = [ ] , [ ]
2019-10-15 13:52:39 +00:00
if built_trees :
# sort 3 arrays by architecture indices
2019-12-14 08:36:29 +00:00
temp_trees_index , temp_trees = ( list ( t ) for t in zip (
* sorted ( zip ( built_trees_architecture_indices , built_trees ) ) ) )
2019-10-15 13:52:39 +00:00
# order outputs and erase duplicates
2019-12-14 08:36:29 +00:00
for tree , tree_index in zip ( temp_trees , temp_trees_index ) :
new_tree_index , new_tree = ( list ( t ) for t in zip ( * sorted ( zip ( tree_index , tree ) ) ) )
2019-10-15 13:52:39 +00:00
# TODO check if inside new_tree_architecture in ordered_built_trees_architecture and if not append!
is_unique = True
for unique_tree in unique_trees_architecture :
already_in = True
for part_i in range ( len ( unique_tree ) ) :
2019-12-14 21:44:18 +00:00
# test = unique_tree[part_i][0].get_order_key()
if len ( unique_tree [ part_i ] ) != len ( new_tree [ part_i ] ) or any ( unique_tree [ part_i ] [ i_unique_part ] . get_order_key ( ) != new_tree [ part_i ] [ i_unique_part ] . get_order_key ( ) for i_unique_part in range ( len ( unique_tree [ part_i ] ) ) ) :
# if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].order_key != new_tree[part_i][i_unique_part].order_key for i_unique_part in range(len(unique_tree[part_i]))):
2019-11-29 10:43:21 +00:00
# if unique_tree[part_i].order_key != new_tree[part_i].order_key:
2019-10-15 13:52:39 +00:00
already_in = False
break
if already_in :
is_unique = False
break
if is_unique :
2019-11-29 09:37:48 +00:00
unique_trees_architecture . append ( new_tree )
2019-11-04 12:53:15 +00:00
# if not filters['node_order']:
2019-11-29 09:37:48 +00:00
# l_ordered_built_trees_architecture.append(new_tree_architecture)
2019-11-04 12:53:15 +00:00
l_ordered_built_trees . append ( new_tree )
2019-10-15 13:52:39 +00:00
# TODO NODE ORDER = FALSE
# else:
#
# ordered_built_trees_architecture.append(tree_architecture)
# ordered_built_trees.append(tree)
# print("test")
# for answer1_i, answer1 in enumerate(separated_answers):
# for answer2_i, answer2 in enumerate(separated_answers):
# if answer1_i != answer2_i:
# res, res_i = self.merge_answer(answer1, answer2, answer1_i, answer2_i)
# print('aaa')
#
# pass
2019-12-14 13:26:23 +00:00
return l_ordered_built_trees