Base algorithm working for simple queries, complex queries so far dont

This commit is contained in:
Luka 2019-08-28 14:05:26 +02:00
parent 81827bdf52
commit 31496a4267
3 changed files with 104 additions and 45 deletions

115
Tree.py
View File

@ -45,11 +45,11 @@ class Tree(object):
('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value) and \
('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value)
def generate_children_queries(self, all_query_indices):
def generate_children_queries(self, all_query_indices, children):
subtree_outcomes = []
# list of pairs (index of query in group, group of query, is permanent)
child_queries_metadata = []
for child_index, child in enumerate(self.l_children):
for child_index, child in enumerate(children):
new_queries = []
# add continuation queries to children
@ -65,7 +65,7 @@ class Tree(object):
# add new queries to children
for result_index, (group, is_permanent) in enumerate(all_query_indices):
# check if node has enough children for query to be possible
if len(self.l_children) - len(group) >= child_index:
if len(children) - len(group) >= child_index:
child_queries_metadata.append((0, result_index, is_permanent))
child_queries = []
@ -100,36 +100,36 @@ class Tree(object):
return partial_results_dict[result_index].pop(last_result_part - 1)
return []
def group_results(self, new_partial_subtrees, child_queries_metadata, all_query_indices, completed_subtrees, partial_results_dict, partial_subtrees):
def group_results(self, new_partial_subtrees, child_queries_metadata, all_query_indices, partial_results_dict, partial_subtrees):
for outcome, (result_part, result_index, is_permanent) in zip(new_partial_subtrees, child_queries_metadata):
if outcome:
new_results = self.get_results(partial_results_dict, result_index, result_part, outcome, len(all_query_indices[result_index][0]))
if new_results:
if is_permanent:
# if is_permanent:
# if result_index in completed_subtrees:
# self.add_subtrees(completed_subtrees[result_index], new_results)
# else:
# completed_subtrees[result_index] = new_results
# comment
self.add_subtrees(completed_subtrees[result_index], new_results)
else:
# self.add_subtrees(completed_subtrees[result_index], new_results)
# else:
# if result_index in completed_subtrees:
# self.add_subtrees(partial_subtrees[result_index], new_results)
# else:
# partial_subtrees[result_index] = new_results
self.add_subtrees(partial_subtrees[result_index - len(completed_subtrees)], new_results)
self.add_subtrees(partial_subtrees[result_index], new_results)
else:
if not is_permanent:
partial_subtrees[result_index - len(completed_subtrees)].append([])
partial_subtrees[result_index].append([])
def get_all_query_indices(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices):
partial_subtrees = [[] for i in range(temporary_query_trees_size)]
def get_all_query_indices(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children):
partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)]
completed_subtrees = [[] for i in range(completed_subtrees_size)]
# list of pairs (index of query in group, group of query)
partial_results_dict = {}
children_queries_generator = self.generate_children_queries(l_all_query_indices)
children_queries_generator = self.generate_children_queries(l_all_query_indices, children)
child_index = 0
child, child_queries, child_queries_metadata = next(children_queries_generator)
@ -137,9 +137,9 @@ class Tree(object):
# obtain children results
new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries)
self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices, new_completed_subtrees,
self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices,
partial_results_dict, partial_subtrees)
# TODO: Right children functionality
for i in range(len(new_completed_subtrees)):
completed_subtrees[i].extend(new_completed_subtrees[i])
child, child_queries, child_queries_metadata = children_queries_generator.send(new_partial_subtrees)
@ -158,39 +158,90 @@ class Tree(object):
l_all_query_indices = []
r_all_query_indices = []
active_permanent_querry_trees = []
active_permanent_query_trees = []
for permanent_query_tree in permanent_query_trees:
if self.fits_static_requirements(permanent_query_tree):
active_permanent_querry_trees.append(permanent_query_tree)
l_all_query_indices.append((permanent_query_tree['l_children'], True))
r_all_query_indices.append((permanent_query_tree['r_children'], True))
# active_temporary_query_tree = []
partial_subtrees = [[] for i in range(len(temporary_query_trees))]
active_permanent_query_trees.append(permanent_query_tree)
if 'l_children' in permanent_query_tree:
l_all_query_indices.append((permanent_query_tree['l_children'], True))
if 'r_children' in permanent_query_tree:
r_all_query_indices.append((permanent_query_tree['r_children'], True))
active_temporary_query_trees = []
# partial_subtrees = [[] for i in range(len(temporary_query_trees))]
for i, temporary_query_tree in enumerate(temporary_query_trees):
if self.fits_static_requirements(temporary_query_tree):
# active_temporary_query_tree.append(temporary_query_tree)
active_temporary_query_trees.append(temporary_query_tree)
# if 'l_children' in temporary_query_tree and 'r_children' in temporary_query_tree:
if 'l_children' in temporary_query_tree:
l_all_query_indices.append((temporary_query_tree['l_children'], False))
if 'r_children' in temporary_query_tree:
r_all_query_indices.append((temporary_query_tree['r_children'], False))
if 'l_children' not in temporary_query_tree and 'r_children' not in temporary_query_tree:
partial_subtrees[i] = [[self.create_output_string()]]
# if 'l_children' not in temporary_query_tree and 'r_children' not in temporary_query_tree:
# partial_subtrees[i] = [[self.create_output_string()]]
# elif 'l_children' not in temporary_query_tree and 'r_children' not in temporary_query_tree:
# partial_subtrees[i] = None
l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, l_all_query_indices)
l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, l_all_query_indices, self.l_children)
r_partial_subtrees, r_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, r_all_query_indices, self.r_children)
merged_partial_subtrees = []
for i in range(len(l_partial_subtrees)):
if l_partial_subtrees[i]:
merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]]))
i = 0
i_left = 0
i_right = 0
# go over all permanent and temporary query trees
while i < len(active_permanent_query_trees) + len(active_temporary_query_trees):
# permanent query trees always have left and right child
if i < len(active_permanent_query_trees):
if ('l_children' in active_permanent_query_trees[i] and 'r_children' in active_permanent_query_trees[i]):
merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left],
[[self.create_output_string()]])
merged_partial_subtrees.append(
self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right]))
# merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]]))
i_left += 1
i_right += 1
elif 'l_children' in active_permanent_query_trees[i]:
merged_partial_subtrees.append(
self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]]))
i_left += 1
elif 'r_children' in active_permanent_query_trees[i]:
merged_partial_subtrees.append(
self.merge_results([[self.create_output_string()]], r_partial_subtrees[i_right]))
i_right += 1
else:
merged_partial_subtrees.append([[self.create_output_string()]])
else:
merged_partial_subtrees.append(partial_subtrees[i])
return merged_partial_subtrees, l_completed_subtrees
if ('l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)] and 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]):
merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]])
merged_partial_subtrees.append(self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right]))
# merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]]))
i_left += 1
i_right += 1
elif 'l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]:
merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]]))
i_left += 1
elif 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]:
merged_partial_subtrees.append(self.merge_results([[self.create_output_string()]], r_partial_subtrees[i_right]))
i_right += 1
else:
merged_partial_subtrees.append([[self.create_output_string()]])
# if r_partial_subtrees[i]:
# merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]]))
i += 1
completed_subtrees = l_completed_subtrees
for i in range(len(permanent_query_trees)):
completed_subtrees[i].extend(merged_partial_subtrees[i])
completed_subtrees[i].extend(r_completed_subtrees[i])
return merged_partial_subtrees[len(permanent_query_trees):], completed_subtrees
@staticmethod
def merge_results(old_results, new_results):

View File

@ -4,4 +4,5 @@ internal_saves = ./internal_saves
output = ./association_rules.tsv
; analyze_type options: 'lemma', 'word'
; query = _ > _
query = _ < (_ > _) < _ > _
query = _ > (_ < _) > (_ < _)
; query = _ < (_ > _) < _ > _

View File

@ -49,8 +49,10 @@ def decode_query(orig_query):
r_children.append(decode_query(node_action))
else:
root = decode_query(node_action)
root["l_children"] = l_children
root["r_children"] = r_children
if l_children:
root["l_children"] = l_children
if r_children:
root["r_children"] = r_children
return root
@ -82,7 +84,7 @@ def create_trees(config):
root_id = int(token.id)
for token_id, token in enumerate(token_nodes):
if token.parent == 0:
if int(token.parent) == 0:
token.set_parent(None)
else:
parent_id = int(token.parent) - 1
@ -118,22 +120,27 @@ def main():
query_tree = decode_query('(' + config.get('settings', 'query') + ')')
for tree in all_trees:
for tree in all_trees[1:]:
# original
# _, subtrees = tree.get_subtrees([query_tree], [])
# r_children = tree.r_children[:1] + tree.r_children[3:4]
tree.r_children = tree.r_children[:1] + tree.r_children[2:4]
_, subtrees = tree.get_subtrees([query_tree], [])
# test 1 layer queries
# tree.r_children = []
# tree.l_children[1].l_children = []
# _, subtrees = tree.get_subtrees([{"l_children": [{'a1':''}, {'a2':''}], "r_children": []}, {"l_children": [{'b1':''}], "r_children": []}, {"l_children": [{'c1':''}, {'c2':''}, {'c3':''}], "r_children": []}], [])
# _, subtrees = tree.get_subtrees([{'q1':'', "l_children": [{'a1':''}, {'a2':''}]}, {'q2':'', "l_children": [{'b1':''}]}, {'q3':'', "l_children": [{'c1':''}, {'c2':''}, {'c3':''}]}], [])
# # _, subtrees = tree.get_subtrees([{'q1':'', "l_children": [{'a1':''}, {'a2':''}], "r_children": []}, {'q2':'', "l_children": [{'b1':''}], "r_children": []}, {'q3':'', "l_children": [{'c1':''}, {'c2':''}, {'c3':''}], "r_children": []}], [])
# test 2 layer queries
tree.r_children = []
tree.l_children[1].l_children = []
new_tree = Tree('bil', '', '', '', '', form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, None)
new_tree.l_children = [tree]
_, subtrees = new_tree.get_subtrees(
[{"l_children":[{"l_children": [{'a1': ''}, {'a2': ''}, {'a3': ''}, {'a4': ''}], "r_children": []}], "r_children": []}], [])
# tree.r_children = [Tree('je', '', '', '', '', form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, None)]
# tree.l_children[1].l_children = []
# new_tree = Tree('bil', '', '', '', '', form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, None)
# new_tree.l_children = [tree]
# _, subtrees = new_tree.get_subtrees(
# [{"l_children":[{"l_children": [{'a1': ''}, {'a2': ''}, {'a3': ''}, {'a4': ''}]}]}], [])
# # _, subtrees = new_tree.get_subtrees(
# # [{"l_children":[{"l_children": [{'a1': ''}, {'a2': ''}, {'a3': ''}, {'a4': ''}], "r_children": []}], "r_children": []}], [])
return