2019-08-23 09:14:31 +00:00
import sys
from pyconll . unit import Token
from Value import Value
class Tree ( object ) :
def __init__ ( self , form , lemma , upos , xpos , deprel , form_dict , lemma_dict , upos_dict , xpos_dict , deprel_dict , head ) :
# form_unicode = str(form).encode("utf-8")
if form not in form_dict :
form_dict [ form ] = Value ( form )
self . form = form_dict [ form ]
if lemma not in lemma_dict :
lemma_dict [ lemma ] = Value ( lemma )
self . lemma = lemma_dict [ lemma ]
if upos not in upos_dict :
upos_dict [ upos ] = Value ( upos )
self . upos = upos_dict [ upos ]
if xpos not in xpos_dict :
xpos_dict [ xpos ] = Value ( xpos )
self . xpos = xpos_dict [ xpos ]
if deprel not in deprel_dict :
deprel_dict [ deprel ] = Value ( deprel )
self . deprel = deprel_dict [ deprel ]
# self.position = position
self . parent = head
self . l_children = [ ]
self . r_children = [ ]
def add_l_child ( self , child ) :
self . l_children . append ( child )
def add_r_child ( self , child ) :
self . r_children . append ( child )
def set_parent ( self , parent ) :
self . parent = parent
def fits_static_requirements ( self , query_tree ) :
return ( ' form ' not in query_tree or query_tree [ ' form ' ] == self . form . get_value ) and \
( ' lemma ' not in query_tree or query_tree [ ' lemma ' ] == self . lemma . get_value ) and \
( ' upos ' not in query_tree or query_tree [ ' upos ' ] == self . upos . get_value ) and \
( ' xpos ' not in query_tree or query_tree [ ' xpos ' ] == self . xpos . get_value ) and \
( ' deprel ' not in query_tree or query_tree [ ' deprel ' ] == self . deprel . get_value )
def generate_children_queries ( self , l_all_query_indices ) :
subtree_outcomes = [ ]
# list of pairs (index of query in group, group of query)
queries = [ ]
for child_index , child in enumerate ( self . l_children ) :
new_queries = [ ]
# add continuation queries to children
for ( result_part_index , result_index , is_permanent ) , subtree_outcome in zip ( queries , subtree_outcomes ) :
if subtree_outcome :
if len ( l_all_query_indices [ result_index ] [ 0 ] ) > result_part_index + 1 :
new_queries . append ( ( result_part_index + 1 , result_index , is_permanent ) )
# else:
# completed_subtrees.append((child, result_index))
queries = new_queries
# add new queries to children
for result_index , ( group , is_permanent ) in enumerate ( l_all_query_indices ) :
# check if node has enough children for query to be possible
if len ( self . l_children ) - len ( group ) > = child_index :
queries . append ( ( 0 , result_index , is_permanent ) )
l_children_query_trees = [ ]
for result_part_index , result_index , _ in queries :
l_children_query_trees . append ( l_all_query_indices [ result_index ] [ 0 ] [ result_part_index ] )
subtree_outcomes = yield child , l_children_query_trees , queries
yield None , None , None
def add_subtrees ( self , old_subtree , new_subtree ) :
old_subtree . extend ( new_subtree )
def group_results_old ( self , subtree_outcomes , queries , l_all_query_indices , completed_subtrees , query_creation_dict , child_index , partial_subtrees ) :
for outcome , ( result_part_index , result_index , is_permanent ) in zip ( subtree_outcomes , queries ) :
if outcome :
if result_part_index == len ( l_all_query_indices [ result_index ] [ 0 ] ) - 1 :
new_results = self . create_subtrees ( query_creation_dict , result_index , result_part_index , child_index , outcome )
if is_permanent :
self . add_subtrees ( completed_subtrees , new_results )
else :
self . add_subtrees ( partial_subtrees , new_results )
else :
# save results for later usage
if result_index in query_creation_dict :
if result_part_index in query_creation_dict [ result_index ] :
query_creation_dict [ result_index ] [ result_part_index ] [ child_index ] = outcome
else :
query_creation_dict [ result_index ] [ result_part_index ] = { child_index : outcome }
else :
query_creation_dict [ result_index ] = { result_part_index : { child_index : outcome } }
else :
if not is_permanent :
partial_subtrees . append ( None )
2019-08-23 12:02:39 +00:00
# def get_results(self, partial_results_dict, result_index, outcome, outcome_stage):
# # save results for later usage
# if result_index in partial_results_dict:
# # if result_part_index in partial_results_dict[result_index]:
# # # previous_results, previous_stage = partial_results_dict[result_index][result_part_index]
# # partial_results_dict[result_index][stage] = self.add_results_part(partial_results_dict[result_index][result_part_index], outcome)
# # else:
# # partial_results_dict[result_index][result_part_index] = outcome
# for stage in range(outcome_stage - 1, 0, -1):
# # if previous stage exists extend that data
# if stage - 1 in partial_results_dict[result_index]:
# partial_results_dict[result_index][stage] = self.merge_results(partial_results_dict[result_index][stage - 1], outcome)
#
# # extend one word layer with output
# partial_results_dict[result_index][0].extend(outcome)
# else:
# partial_results_dict[result_index] = {0: outcome}
#
# if outcome_stage - 1 in partial_results_dict[result_index]:
# return partial_results_dict[result_index].pop(outcome_stage - 1)
# return []
def get_results ( self , partial_results_dict , result_index , stage , outcome , outcome_stage ) :
2019-08-23 09:14:31 +00:00
# save results for later usage
2019-08-23 12:02:39 +00:00
# if result index already in and element 0 exists (otherwise error)
if result_index in partial_results_dict and 0 in partial_results_dict [ result_index ] :
# if result_part_index in partial_results_dict[result_index]:
# # previous_results, previous_stage = partial_results_dict[result_index][result_part_index]
# partial_results_dict[result_index][stage] = self.add_results_part(partial_results_dict[result_index][result_part_index], outcome)
# else:
# partial_results_dict[result_index][result_part_index] = outcome
# if previous stage exists extend that data
if stage - 1 in partial_results_dict [ result_index ] :
if stage in partial_results_dict [ result_index ] :
partial_results_dict [ result_index ] [ stage ] . extend ( self . merge_results ( partial_results_dict [ result_index ] [ stage - 1 ] , outcome ) )
else :
partial_results_dict [ result_index ] [ stage ] = self . merge_results ( partial_results_dict [ result_index ] [ stage - 1 ] , outcome )
2019-08-23 09:14:31 +00:00
2019-08-23 12:02:39 +00:00
# extend one word layer with output
2019-08-23 09:14:31 +00:00
else :
2019-08-23 12:02:39 +00:00
partial_results_dict [ result_index ] [ 0 ] . extend ( outcome )
2019-08-23 09:14:31 +00:00
else :
2019-08-23 12:02:39 +00:00
partial_results_dict [ result_index ] = { 0 : outcome }
2019-08-23 09:14:31 +00:00
2019-08-23 12:02:39 +00:00
if outcome_stage - 1 in partial_results_dict [ result_index ] :
return partial_results_dict [ result_index ] . pop ( outcome_stage - 1 )
return [ ]
2019-08-23 09:14:31 +00:00
2019-08-23 12:02:39 +00:00
# def add_results_part(self, previous_results_part, new_results):
# combined_results = self.merge_results(previous_results_part, new_results)
#
# return self.create_tuple_from_output(new_results, combined_results=combined_results)
2019-08-23 09:14:31 +00:00
# for new_result in new_results:
# combined_results.append((new_result, 0))
# return combined_results
def group_results ( self , subtree_outcomes , queries , l_all_query_indices , completed_subtrees , partial_results_dict , child_index , partial_subtrees ) :
2019-08-23 12:02:39 +00:00
for outcome , ( stage , result_index , is_permanent ) in zip ( subtree_outcomes , queries ) :
2019-08-23 09:14:31 +00:00
if outcome :
2019-08-23 12:02:39 +00:00
new_results = self . get_results ( partial_results_dict , result_index , stage , outcome , len ( l_all_query_indices [ result_index ] [ 0 ] ) )
if new_results :
2019-08-23 09:14:31 +00:00
if is_permanent :
self . add_subtrees ( completed_subtrees , new_results )
else :
self . add_subtrees ( partial_subtrees , new_results )
else :
if not is_permanent :
partial_subtrees . append ( None )
def get_subtrees ( self , permanent_query_trees , temporary_query_trees ) :
"""
: param permanent_query_trees :
: param temporary_query_trees :
"""
# list of all children queries grouped by parent queries
l_all_query_indices = [ ]
r_all_query_indices = [ ]
active_permanent_querry_trees = [ ]
for permanent_query_tree in permanent_query_trees :
if self . fits_static_requirements ( permanent_query_tree ) :
active_permanent_querry_trees . append ( permanent_query_tree )
l_all_query_indices . append ( ( permanent_query_tree [ ' l_children ' ] , True ) )
r_all_query_indices . append ( ( permanent_query_tree [ ' r_children ' ] , True ) )
active_temporary_query_tree = [ ]
partial_subtrees = list ( [ None ] * len ( temporary_query_trees ) )
for i , temporary_query_tree in enumerate ( temporary_query_trees ) :
if self . fits_static_requirements ( temporary_query_tree ) :
active_temporary_query_tree . append ( temporary_query_tree )
if ' l_children ' in temporary_query_tree :
l_all_query_indices . append ( ( temporary_query_tree [ ' l_children ' ] , False ) )
if ' r_children ' in temporary_query_tree :
r_all_query_indices . append ( ( temporary_query_tree [ ' r_children ' ] , False ) )
if ' l_children ' not in temporary_query_tree and ' r_children ' not in temporary_query_tree :
partial_subtrees [ i ] = [ [ self . create_output_string ( ) ] ]
elif ' l_children ' not in temporary_query_tree and ' r_children ' not in temporary_query_tree :
partial_subtrees [ i ] = None
# if self.fits_static_requirements(temporary_query_tree):
# if temporary_query_tree['l_children'] and self.l_children:
# l_children_permanent_query_trees.append(temporary_query_tree['l_children'])
# if temporary_query_tree['r_children'] and self.r_children:
# r_children_permanent_query_trees.append(temporary_query_tree['r_children'])
# tree_outcomes = []
completed_subtrees = [ ]
# list of pairs (index of query in group, group of query)
queries = [ ]
subtree_outcomes = [ ]
query_creation_dict = { }
children_queries_generator = self . generate_children_queries ( l_all_query_indices )
# # children_queries_generator.send([])
# a = next(children_queries_generator)
# a1 = children_queries_generator.send(list([True] * len(a)))
# # b = next(children_queries_generator)
# b1 = children_queries_generator.send(list([True] * len(a1)))
# # c = next(children_queries_generator)
# c1 = children_queries_generator.send(list([True] * len(b1)))
# # d = next(children_queries_generator)
# d1 = children_queries_generator.send(list([True] * len(c1)))
child_index = 0
child , child_query , child_group_mapper = next ( children_queries_generator )
while child :
subtree_outcomes , completed_subtrees = child . get_subtrees ( permanent_query_trees , child_query )
self . group_results ( subtree_outcomes , child_group_mapper , l_all_query_indices , completed_subtrees , query_creation_dict , child_index , partial_subtrees )
# TODO
child , child_query , child_group_mapper = children_queries_generator . send ( subtree_outcomes )
child_index + = 1
print ( ' test ' )
# for child_index, child in enumerate(self.l_children):
# # add continuation queries to children
# for (result_part_index, query_indices_index, is_permanent), subtree_outcome in zip(queries, subtree_outcomes):
# if subtree_outcome:
# if len(l_all_query_indices[query_indices_index]) > result_part_index + 1:
# queries.append((result_part_index + 1, query_indices_index, is_permanent))
# # else:
# # completed_subtrees.append((child, query_indices_index))
#
# # add new queries to children
# for query_indices_index, (query_indices, is_permanent) in enumerate(l_all_query_indices):
# # check if node has enough children for query to be possible
# if len(self.l_children) - len(query_indices) >= child_index:
# queries.append((0, query_indices_index, is_permanent))
#
#
# l_children_query_trees = []
# for result_part_index, query_indices_index, _ in queries:
# l_children_query_trees.append(l_all_query_indices[query_indices_index][0][result_part_index])
# subtree_outcomes, completed_subtrees = child.get_subtrees(permanent_query_trees, l_children_query_trees)
#
#
#
# # TODO: Right children functionality
#
#
#
# for outcome, (result_part_index, query_indices_index, is_permanent) in zip(subtree_outcomes, queries):
# if outcome:
# if result_part_index == len(l_all_query_indices[query_indices_index]) - 1:
# if is_permanent:
# completed_subtrees.extend(self.create_output(temp_results, query_indices_index, result_part_index, child_index))
# else:
# partial_subtrees.append(self.create_output(temp_results, query_indices_index, result_part_index, child_index))
# else:
# # save results for later usage
# if child_index in temp_results:
# if query_indices_index in temp_results[child_index]:
# temp_results[child_index][query_indices_index][result_part_index] = outcome
# else:
# temp_results[child_index][query_indices_index] = {result_part_index: outcome}
# else:
# temp_results[child_index] = {query_indices_index: {result_part_index: outcome}}
# else:
# if not is_permanent:
# partial_subtrees.append(None)
return partial_subtrees , completed_subtrees
@staticmethod
def merge_results ( old_results , new_results ) :
# previous_results, previous_stage = partial_results_dict[result_index][result_part_index]
merged_results = [ ]
# old_results, old_stage = old_results_tuple
2019-08-23 12:02:39 +00:00
for old_result in old_results :
2019-08-23 09:14:31 +00:00
for new_result in new_results :
2019-08-23 12:02:39 +00:00
merged_results . append ( old_result + new_result )
2019-08-23 09:14:31 +00:00
# if not old_results:
# return new_results
return merged_results
def create_subtrees ( self , query_creation_dict , result_index , result_part_index , child_index , outcome ) :
new_valid_subtrees = [ ]
# stores all result_parts that have specific child_index together
result_connections = { }
for i in range ( result_part_index ) :
for j in range ( child_index ) :
# if child indices exist in result_index and result_part_index plus index of part is higher or equal to index of child (otherwise it is not in query_creation_dict
if result_index in query_creation_dict and i in query_creation_dict [ result_index ] and j in query_creation_dict [ result_index ] [ i ] :
# if result_index in query_creation_dict and i in query_creation_dict[result_index] and j in query_creation_dict[result_index][i] and i >= j:
if i in result_connections :
result_connections [ i ] . append ( j )
else :
result_connections [ i ] = [ j ]
# positioned_candidates[j] = (query_creation_dict[i][result_index][j])
# result = []
return self . create_subtrees_from_result_connections ( 0 , 0 , result_part_index , query_creation_dict , result_connections , result_index , [ ] , outcome )
# new_valid_subtrees.extend(outcome[])
# outcome.append(self.create_output_string())
def create_subtrees_from_result_connections ( self , child_index_i , result_part_index_i , result_part_index_final , query_creation_dict , result_connections ,
result_index , res_array , outcome ) :
if result_part_index_i == result_part_index_final :
# self.merge_results(res_array, outcome)
return self . merge_results ( res_array , outcome )
# res_array.append(query_creation_dict[result_index][result_part_index_i][child_index_i])
results = [ ]
for child_index in result_connections [ result_part_index_i ] :
if not ( result_index in query_creation_dict and result_part_index_i in query_creation_dict [
result_index ] and child_index_i in query_creation_dict [ result_index ] [ result_part_index_i ] ) :
print ( ' HERE! ' )
return [ ]
pass_array = self . merge_results ( res_array ,
query_creation_dict [ result_index ] [ result_part_index_i ] [ child_index ] )
# if child_index >= result_part_index_i:
results . extend ( self . create_subtrees_from_result_connections ( child_index , result_part_index_i + 1 , result_part_index_final , query_creation_dict ,
result_connections , result_index , res_array , outcome ) )
# print('aaa')
return results
def create_output_string ( self ) :
return self . form . get_value ( )