Refactoring original code + partialy added node_order=yes
This commit is contained in:
parent
a614da9766
commit
2b5478483a
243
Tree.py
243
Tree.py
|
@ -7,9 +7,9 @@ from Value import Value
|
||||||
|
|
||||||
|
|
||||||
class Tree(object):
|
class Tree(object):
|
||||||
def __init__(self, form, lemma, upos, xpos, deprel, feats, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, head):
|
def __init__(self, form, lemma, upos, xpos, deprel, feats, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, feats_complete_dict, head):
|
||||||
if not hasattr(self, 'feats'):
|
# if not hasattr(self, 'feats'):
|
||||||
self.feats = {}
|
# self.feats = {}
|
||||||
|
|
||||||
# form_unicode = str(form).encode("utf-8")
|
# form_unicode = str(form).encode("utf-8")
|
||||||
if form not in form_dict:
|
if form not in form_dict:
|
||||||
|
@ -27,40 +27,39 @@ class Tree(object):
|
||||||
if deprel not in deprel_dict:
|
if deprel not in deprel_dict:
|
||||||
deprel_dict[deprel] = Value(deprel)
|
deprel_dict[deprel] = Value(deprel)
|
||||||
self.deprel = deprel_dict[deprel]
|
self.deprel = deprel_dict[deprel]
|
||||||
for feat in feats.keys():
|
if feats not in feats_complete_dict:
|
||||||
if next(iter(feats[feat])) not in feats_dict[feat]:
|
feats_complete_dict[feats] = Value(feats)
|
||||||
feats_dict[feat][next(iter(feats[feat]))] = Value(next(iter(feats[feat])))
|
self.feats_complete = feats_complete_dict[feats]
|
||||||
if not feat in self.feats:
|
# for feat in feats.keys():
|
||||||
self.feats[feat] = {}
|
# if next(iter(feats[feat])) not in feats_dict[feat]:
|
||||||
self.feats[feat][next(iter(feats[feat]))] = feats_dict[feat][next(iter(feats[feat]))]
|
# feats_dict[feat][next(iter(feats[feat]))] = Value(next(iter(feats[feat])))
|
||||||
|
# if not feat in self.feats:
|
||||||
|
# self.feats[feat] = {}
|
||||||
|
# self.feats[feat][next(iter(feats[feat]))] = feats_dict[feat][next(iter(feats[feat]))]
|
||||||
# self.position = position
|
# self.position = position
|
||||||
|
|
||||||
self.parent = head
|
self.parent = head
|
||||||
self.l_children = []
|
self.children = []
|
||||||
self.r_children = []
|
self.children_split = -1
|
||||||
|
|
||||||
self.index = 0
|
self.index = 0
|
||||||
|
|
||||||
def add_l_child(self, child):
|
def add_child(self, child):
|
||||||
child.index = len(self.l_children)
|
child.index = len(self.children)
|
||||||
self.l_children.append(child)
|
self.children.append(child)
|
||||||
|
|
||||||
def add_r_child(self, child):
|
|
||||||
child.index = len(self.l_children) + len(self.r_children)
|
|
||||||
self.r_children.append(child)
|
|
||||||
|
|
||||||
def set_parent(self, parent):
|
def set_parent(self, parent):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
|
|
||||||
def fits_static_requirements_feats(self, query_tree):
|
# def fits_static_requirements_feats(self, query_tree):
|
||||||
if 'feats' not in query_tree:
|
# if 'feats' not in query_tree:
|
||||||
return True
|
# return True
|
||||||
|
#
|
||||||
for feat in query_tree['feats'].keys():
|
# for feat in query_tree['feats'].keys():
|
||||||
if feat not in self.feats or query_tree['feats'][feat] != next(iter(self.feats[feat].values())).get_value():
|
# if feat not in self.feats or query_tree['feats'][feat] != next(iter(self.feats[feat].values())).get_value():
|
||||||
return False
|
# return False
|
||||||
|
#
|
||||||
return True
|
# return True
|
||||||
|
|
||||||
|
|
||||||
def fits_static_requirements(self, query_tree):
|
def fits_static_requirements(self, query_tree):
|
||||||
|
@ -69,7 +68,8 @@ class Tree(object):
|
||||||
('upos' not in query_tree or query_tree['upos'] == self.upos.get_value()) and \
|
('upos' not in query_tree or query_tree['upos'] == self.upos.get_value()) and \
|
||||||
('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \
|
('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \
|
||||||
('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \
|
('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \
|
||||||
self.fits_static_requirements_feats(query_tree)
|
('feats' not in query_tree or query_tree['feats'] == self.feats_complete.get_value())
|
||||||
|
# self.fits_static_requirements_feats(query_tree)
|
||||||
|
|
||||||
def generate_children_queries(self, all_query_indices, children):
|
def generate_children_queries(self, all_query_indices, children):
|
||||||
partial_results = {}
|
partial_results = {}
|
||||||
|
@ -161,7 +161,7 @@ class Tree(object):
|
||||||
|
|
||||||
return partial_subtrees, completed_subtrees
|
return partial_subtrees, completed_subtrees
|
||||||
|
|
||||||
def get_all_query_indices(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, all_query_indices, children, create_output_string):
|
def get_all_query_indices(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, all_query_indices, children, create_output_string, filters):
|
||||||
# l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees),
|
# l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees),
|
||||||
# len(permanent_query_trees),
|
# len(permanent_query_trees),
|
||||||
# permanent_query_trees,
|
# permanent_query_trees,
|
||||||
|
@ -188,7 +188,7 @@ class Tree(object):
|
||||||
for child in children:
|
for child in children:
|
||||||
# obtain children results
|
# obtain children results
|
||||||
new_partial_answers_architecture, new_partial_answers, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries_flatten,
|
new_partial_answers_architecture, new_partial_answers, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries_flatten,
|
||||||
create_output_string)
|
create_output_string, filters)
|
||||||
|
|
||||||
assert len(new_partial_answers) == len(child_queries_flatten)
|
assert len(new_partial_answers) == len(child_queries_flatten)
|
||||||
for i, new_partial_subtree in enumerate(new_partial_answers):
|
for i, new_partial_subtree in enumerate(new_partial_answers):
|
||||||
|
@ -207,7 +207,7 @@ class Tree(object):
|
||||||
# iterate over all answers per queries
|
# iterate over all answers per queries
|
||||||
for answer_i, answer_length in enumerate(answers_lengths):
|
for answer_i, answer_length in enumerate(answers_lengths):
|
||||||
# iterate over answers of query
|
# iterate over answers of query
|
||||||
partial_subtrees[answer_i], partial_subtrees_architectures[answer_i] = self.create_answers(all_new_partial_answers[i:i + answer_length], all_new_partial_answers_architecture[i:i + answer_length], answer_length, len(self.l_children))
|
partial_subtrees[answer_i], partial_subtrees_architectures[answer_i] = self.create_answers(all_new_partial_answers[i:i + answer_length], all_new_partial_answers_architecture[i:i + answer_length], answer_length, filters)
|
||||||
# while i < answers_length:
|
# while i < answers_length:
|
||||||
# self.create_grouped_answers()
|
# self.create_grouped_answers()
|
||||||
# i += 1
|
# i += 1
|
||||||
|
@ -236,88 +236,49 @@ class Tree(object):
|
||||||
|
|
||||||
return partial_subtrees_architectures, partial_subtrees, completed_subtrees
|
return partial_subtrees_architectures, partial_subtrees, completed_subtrees
|
||||||
|
|
||||||
def order_dependent_queries(self, active_permanent_query_trees, active_temporary_query_trees, l_partial_subtrees, l_partial_subtrees_architecture,
|
def order_dependent_queries(self, active_permanent_query_trees, active_temporary_query_trees, partial_subtrees, partial_subtrees_architecture,
|
||||||
r_partial_subtrees, r_partial_subtrees_architecture, create_output_string, merged_partial_subtrees, merged_partial_subtrees_architecture, i, i_left, i_right):
|
create_output_string, merged_partial_subtrees, merged_partial_subtrees_architecture, i, i_child, filters):
|
||||||
# string_output = ''
|
# string_output = ''
|
||||||
if i < len(active_permanent_query_trees):
|
if i < len(active_permanent_query_trees):
|
||||||
if ('l_children' in active_permanent_query_trees[i] and 'r_children' in active_permanent_query_trees[i]):
|
if 'children' in active_permanent_query_trees[i]:
|
||||||
merged_partial_subtree = self.create_output_left_children(l_partial_subtrees[i_left], [create_output_string(self)])
|
if not filters['node_order'] or i_child < self.children_split:
|
||||||
merged_partial_subtrees.append(
|
merged_partial_subtrees.append(
|
||||||
self.create_output_right_children(merged_partial_subtree, r_partial_subtrees[i_right]))
|
self.create_output_left_children(partial_subtrees[i_child], [create_output_string(self)], filters))
|
||||||
|
|
||||||
merged_partial_subtree_architecture = self.create_output_left_children(l_partial_subtrees_architecture[i_left], [str([self.index])])
|
|
||||||
merged_partial_subtrees_architecture.append(
|
merged_partial_subtrees_architecture.append(
|
||||||
self.create_output_right_children(merged_partial_subtree_architecture, l_partial_subtrees_architecture[i_right]))
|
self.create_output_left_children(partial_subtrees_architecture[i_child], [str([self.index])], filters))
|
||||||
# merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left],
|
else:
|
||||||
# [[create_output_string(self)]])
|
merged_partial_subtrees.append(
|
||||||
# merged_partial_subtrees.append(
|
self.create_output_right_children([create_output_string(self)], partial_subtrees[i_child], filters))
|
||||||
# self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right]))
|
|
||||||
# # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]]))
|
|
||||||
i_left += 1
|
|
||||||
i_right += 1
|
|
||||||
|
|
||||||
elif 'l_children' in active_permanent_query_trees[i]:
|
|
||||||
merged_partial_subtrees.append(self.create_output_left_children(l_partial_subtrees[i_left], [create_output_string(self)]))
|
|
||||||
merged_partial_subtrees_architecture.append(self.create_output_left_children(l_partial_subtrees_architecture[i_left], [str([self.index])]))
|
|
||||||
# merged_partial_subtrees.append(
|
|
||||||
# self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]]))
|
|
||||||
i_left += 1
|
|
||||||
|
|
||||||
elif 'r_children' in active_permanent_query_trees[i]:
|
|
||||||
merged_partial_subtrees.append(self.create_output_right_children([create_output_string(self)], r_partial_subtrees[i_right]))
|
|
||||||
merged_partial_subtrees_architecture.append(
|
merged_partial_subtrees_architecture.append(
|
||||||
self.create_output_right_children(r_partial_subtrees_architecture[i_left], [str([self.index])]))
|
self.create_output_right_children([str([self.index])], partial_subtrees_architecture[i_child], filters))
|
||||||
# merged_partial_subtrees.append(
|
|
||||||
# self.merge_results([[create_output_string(self)]], r_partial_subtrees[i_right]))
|
i_child += 1
|
||||||
i_right += 1
|
|
||||||
else:
|
else:
|
||||||
merged_partial_subtrees.append([create_output_string(self)])
|
merged_partial_subtrees.append([create_output_string(self)])
|
||||||
merged_partial_subtrees_architecture.append([str([self.index])])
|
merged_partial_subtrees_architecture.append([str([self.index])])
|
||||||
# merged_partial_subtrees.append([[create_output_string(self)]])
|
# merged_partial_subtrees.append([[create_output_string(self)]])
|
||||||
else:
|
else:
|
||||||
if ('l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)] and 'r_children' in
|
if 'children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]:
|
||||||
active_temporary_query_trees[i - len(active_permanent_query_trees)]):
|
if not filters['node_order'] or i_child < self.children_split:
|
||||||
merged_partial_subtree = self.create_output_left_children(l_partial_subtrees[i_left],
|
|
||||||
[create_output_string(self)])
|
|
||||||
merged_partial_subtrees.append(
|
merged_partial_subtrees.append(
|
||||||
self.create_output_right_children(merged_partial_subtree, r_partial_subtrees[i_right]))
|
self.create_output_left_children(partial_subtrees[i_child], [create_output_string(self)], filters))
|
||||||
|
|
||||||
merged_partial_subtree_architecture = self.create_output_left_children(
|
|
||||||
l_partial_subtrees_architecture[i_left], [str([self.index])])
|
|
||||||
merged_partial_subtrees_architecture.append(
|
merged_partial_subtrees_architecture.append(
|
||||||
self.create_output_right_children(merged_partial_subtree_architecture,
|
self.create_output_left_children(partial_subtrees_architecture[i_child], [str([self.index])], filters))
|
||||||
l_partial_subtrees_architecture[i_right]))
|
else:
|
||||||
# merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]])
|
|
||||||
# merged_partial_subtrees.append(self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right]))
|
|
||||||
# # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]]))
|
|
||||||
i_left += 1
|
|
||||||
i_right += 1
|
|
||||||
|
|
||||||
elif 'l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]:
|
|
||||||
merged_partial_subtrees.append(
|
merged_partial_subtrees.append(
|
||||||
self.create_output_left_children(l_partial_subtrees[i_left], [create_output_string(self)]))
|
self.create_output_right_children([create_output_string(self)], partial_subtrees[i_child], filters))
|
||||||
merged_partial_subtrees_architecture.append(
|
merged_partial_subtrees_architecture.append(
|
||||||
self.create_output_left_children(l_partial_subtrees_architecture[i_left], [str([self.index])]))
|
self.create_output_right_children([str([self.index])], partial_subtrees_architecture[i_child], filters))
|
||||||
# merged_partial_subtrees.append(
|
|
||||||
# self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]]))
|
|
||||||
i_left += 1
|
|
||||||
|
|
||||||
elif 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]:
|
i_child += 1
|
||||||
merged_partial_subtrees.append(
|
|
||||||
self.create_output_right_children([create_output_string(self)], r_partial_subtrees[i_right]))
|
|
||||||
merged_partial_subtrees_architecture.append(
|
|
||||||
self.create_output_right_children(r_partial_subtrees_architecture[i_left], [str([self.index])]))
|
|
||||||
# merged_partial_subtrees.append(
|
|
||||||
# self.merge_results([[create_output_string(self)]], r_partial_subtrees[i_right]))
|
|
||||||
i_right += 1
|
|
||||||
else:
|
else:
|
||||||
merged_partial_subtrees.append([create_output_string(self)])
|
merged_partial_subtrees.append([create_output_string(self)])
|
||||||
merged_partial_subtrees_architecture.append([str([self.index])])
|
merged_partial_subtrees_architecture.append([str([self.index])])
|
||||||
# merged_partial_subtrees.append([[create_output_string(self)]])
|
# merged_partial_subtrees.append([[create_output_string(self)]])
|
||||||
|
|
||||||
return i_left, i_right
|
return i_child
|
||||||
|
|
||||||
def get_subtrees(self, permanent_query_trees, temporary_query_trees, create_output_string):
|
def get_subtrees(self, permanent_query_trees, temporary_query_trees, create_output_string, filters):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
:param permanent_query_trees:
|
:param permanent_query_trees:
|
||||||
|
@ -325,71 +286,41 @@ class Tree(object):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# list of all children queries grouped by parent queries
|
# list of all children queries grouped by parent queries
|
||||||
l_all_query_indices = []
|
all_query_indices = []
|
||||||
r_all_query_indices = []
|
|
||||||
|
|
||||||
active_permanent_query_trees = []
|
active_permanent_query_trees = []
|
||||||
for permanent_query_tree in permanent_query_trees:
|
for permanent_query_tree in permanent_query_trees:
|
||||||
if self.fits_static_requirements(permanent_query_tree):
|
if self.fits_static_requirements(permanent_query_tree):
|
||||||
if 'l_children' in permanent_query_tree and 'r_children' in permanent_query_tree:
|
|
||||||
permanent_query_tree['l_children'] += permanent_query_tree['r_children']
|
|
||||||
del(permanent_query_tree['r_children'])
|
|
||||||
elif 'r_children' in permanent_query_tree:
|
|
||||||
permanent_query_tree['l_children'] = permanent_query_tree['r_children']
|
|
||||||
del(permanent_query_tree['r_children'])
|
|
||||||
active_permanent_query_trees.append(permanent_query_tree)
|
active_permanent_query_trees.append(permanent_query_tree)
|
||||||
if 'l_children' in permanent_query_tree:
|
if 'children' in permanent_query_tree:
|
||||||
l_all_query_indices.append((permanent_query_tree['l_children'], True))
|
all_query_indices.append((permanent_query_tree['children'], True))
|
||||||
if 'r_children' in permanent_query_tree:
|
|
||||||
r_all_query_indices.append((permanent_query_tree['l_children'], True))
|
|
||||||
# r_all_query_indices.append((permanent_query_tree['r_children'], True))
|
# r_all_query_indices.append((permanent_query_tree['r_children'], True))
|
||||||
|
|
||||||
active_temporary_query_trees = []
|
active_temporary_query_trees = []
|
||||||
successful_temporary_queries = []
|
successful_temporary_queries = []
|
||||||
for i, temporary_query_tree in enumerate(temporary_query_trees):
|
for i, temporary_query_tree in enumerate(temporary_query_trees):
|
||||||
if self.fits_static_requirements(temporary_query_tree):
|
if self.fits_static_requirements(temporary_query_tree):
|
||||||
if 'l_children' in temporary_query_tree and 'r_children' in temporary_query_tree:
|
|
||||||
temporary_query_tree['l_children'] += temporary_query_tree['r_children']
|
|
||||||
del(temporary_query_tree['r_children'])
|
|
||||||
elif 'r_children' in temporary_query_tree:
|
|
||||||
temporary_query_tree['l_children'] = temporary_query_tree['r_children']
|
|
||||||
del(temporary_query_tree['r_children'])
|
|
||||||
active_temporary_query_trees.append(temporary_query_tree)
|
active_temporary_query_trees.append(temporary_query_tree)
|
||||||
successful_temporary_queries.append(i)
|
successful_temporary_queries.append(i)
|
||||||
# if 'l_children' in temporary_query_tree and 'r_children' in temporary_query_tree:
|
# if 'l_children' in temporary_query_tree and 'r_children' in temporary_query_tree:
|
||||||
if 'l_children' in temporary_query_tree:
|
if 'children' in temporary_query_tree:
|
||||||
l_all_query_indices.append((temporary_query_tree['l_children'], False))
|
all_query_indices.append((temporary_query_tree['children'], False))
|
||||||
if 'r_children' in temporary_query_tree:
|
|
||||||
# r_all_query_indices.append((temporary_query_tree['r_children'], False))
|
|
||||||
r_all_query_indices.append((temporary_query_tree['l_children'], False))
|
|
||||||
|
|
||||||
# l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices_old(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, l_all_query_indices, self.l_children, create_output_string)
|
partial_subtrees_architecture, partial_subtrees, completed_subtrees = self.get_all_query_indices(len(temporary_query_trees),
|
||||||
# r_partial_subtrees, r_completed_subtrees = self.get_all_query_indices_old(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, r_all_query_indices, self.r_children, create_output_string)
|
|
||||||
|
|
||||||
all_query_indices = l_all_query_indices + r_all_query_indices
|
|
||||||
|
|
||||||
l_partial_subtrees_architecture, l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees),
|
|
||||||
len(permanent_query_trees),
|
len(permanent_query_trees),
|
||||||
permanent_query_trees,
|
permanent_query_trees,
|
||||||
all_query_indices, self.l_children + self.r_children,
|
all_query_indices, self.children,
|
||||||
create_output_string)
|
create_output_string, filters)
|
||||||
# r_partial_subtrees_architecture, r_partial_subtrees, r_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees),
|
|
||||||
# len(permanent_query_trees),
|
|
||||||
# permanent_query_trees,
|
|
||||||
# r_all_query_indices, self.r_children,
|
|
||||||
# create_output_string)
|
|
||||||
r_partial_subtrees_architecture, r_partial_subtrees, r_completed_subtrees = [], [], []
|
|
||||||
|
|
||||||
merged_partial_subtrees = []
|
merged_partial_subtrees = []
|
||||||
merged_partial_subtrees_architecture = []
|
merged_partial_subtrees_architecture = []
|
||||||
i = 0
|
i = 0
|
||||||
i_left = 0
|
i_child = 0
|
||||||
i_right = 0
|
|
||||||
# go over all permanent and temporary query trees
|
# go over all permanent and temporary query trees
|
||||||
while i < len(active_permanent_query_trees) + len(active_temporary_query_trees):
|
while i < len(active_permanent_query_trees) + len(active_temporary_query_trees):
|
||||||
# permanent query trees always have left and right child
|
# permanent query trees always have left and right child
|
||||||
i_left, i_right = self.order_dependent_queries(active_permanent_query_trees, active_temporary_query_trees, l_partial_subtrees, l_partial_subtrees_architecture,
|
i_child = self.order_dependent_queries(active_permanent_query_trees, active_temporary_query_trees, partial_subtrees, partial_subtrees_architecture,
|
||||||
r_partial_subtrees, r_partial_subtrees_architecture, create_output_string, merged_partial_subtrees, merged_partial_subtrees_architecture, i, i_left, i_right)
|
create_output_string, merged_partial_subtrees, merged_partial_subtrees_architecture, i, i_child, filters)
|
||||||
# if i < len(active_permanent_query_trees):
|
# if i < len(active_permanent_query_trees):
|
||||||
# if ('l_children' in active_permanent_query_trees[i] and 'r_children' in active_permanent_query_trees[i]):
|
# if ('l_children' in active_permanent_query_trees[i] and 'r_children' in active_permanent_query_trees[i]):
|
||||||
# merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left],
|
# merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left],
|
||||||
|
@ -432,14 +363,14 @@ class Tree(object):
|
||||||
# # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]]))
|
# # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]]))
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
completed_subtrees = l_completed_subtrees
|
|
||||||
# for i in range(len(permanent_query_trees)):
|
# for i in range(len(permanent_query_trees)):
|
||||||
# for i in range(max(len(completed_subtrees), len(r_completed_subtrees), len(active_permanent_query_trees))):
|
# for i in range(max(len(completed_subtrees), len(r_completed_subtrees), len(active_permanent_query_trees))):
|
||||||
for i in range(len(active_permanent_query_trees)):
|
for i in range(len(active_permanent_query_trees)):
|
||||||
|
# erase first and last braclets when adding new query result
|
||||||
|
add_subtree = [subtree[1:-1] for subtree in merged_partial_subtrees[i]]
|
||||||
# if 0 < len(active_permanent_query_trees):
|
# if 0 < len(active_permanent_query_trees):
|
||||||
completed_subtrees[i].extend(merged_partial_subtrees[i])
|
completed_subtrees[i].extend(add_subtree)
|
||||||
for i in range(len(r_completed_subtrees)):
|
# completed_subtrees[i].extend(merged_partial_subtrees[i])
|
||||||
completed_subtrees[i].extend(r_completed_subtrees[i])
|
|
||||||
|
|
||||||
# answers to valid queries
|
# answers to valid queries
|
||||||
subtrees_architecture = [[] for i in range(len(temporary_query_trees))]
|
subtrees_architecture = [[] for i in range(len(temporary_query_trees))]
|
||||||
|
@ -480,38 +411,50 @@ class Tree(object):
|
||||||
return merged_results, merged_indices
|
return merged_results, merged_indices
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_output_left_children(left_children, new_results):
|
def create_output_left_children(left_children, new_results, filters):
|
||||||
merged_results = []
|
merged_results = []
|
||||||
for child in left_children:
|
for child in left_children:
|
||||||
for new_result in new_results:
|
for new_result in new_results:
|
||||||
res = ''
|
res = ''
|
||||||
if type(child) == str:
|
if type(child) == str:
|
||||||
res += '(' + child + ') < '
|
# res += '(' + child + ') < '
|
||||||
|
res += child + ' < '
|
||||||
else:
|
else:
|
||||||
for el in sorted(child):
|
if filters['node_order']:
|
||||||
res += '(' + el + ') < '
|
new_child = child
|
||||||
merged_results.append(res + new_result)
|
else:
|
||||||
|
new_child = sorted(child)
|
||||||
|
for el in new_child:
|
||||||
|
# res += '(' + el + ') < '
|
||||||
|
res += el + ' < '
|
||||||
|
merged_results.append('(' + res + new_result + ')')
|
||||||
return merged_results
|
return merged_results
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_output_right_children(new_results, right_children):
|
def create_output_right_children(new_results, right_children, filters):
|
||||||
merged_results = []
|
merged_results = []
|
||||||
for child in right_children:
|
for child in right_children:
|
||||||
for new_result in new_results:
|
for new_result in new_results:
|
||||||
res = ''
|
res = ''
|
||||||
if type(child) == str:
|
if type(child) == str:
|
||||||
res += ' > (' + child + ')'
|
res += ' > ' + child
|
||||||
|
# res += ' > (' + child + ')'
|
||||||
else:
|
else:
|
||||||
for el in sorted(child):
|
if filters['node_order']:
|
||||||
res += ' > (' + el + ')'
|
new_child = child
|
||||||
merged_results.append(new_result + res)
|
else:
|
||||||
|
new_child = sorted(child)
|
||||||
|
for el in new_child:
|
||||||
|
res += ' > ' + el
|
||||||
|
# res += ' > (' + el + ')'
|
||||||
|
merged_results.append('(' + new_result + res + ')')
|
||||||
# merged_results.append(new_result + ' > (' + child + ')')
|
# merged_results.append(new_result + ' > (' + child + ')')
|
||||||
return merged_results
|
return merged_results
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_answers(separated_answers, separated_answers_architecture, answer_length, l_children_len):
|
def create_answers(separated_answers, separated_answers_architecture, answer_length, filters):
|
||||||
# TODO
|
# TODO
|
||||||
node_order = False
|
# node_order = False
|
||||||
partly_built_trees = [[None] * answer_length]
|
partly_built_trees = [[None] * answer_length]
|
||||||
partly_built_trees_architecture = [[None] * answer_length]
|
partly_built_trees_architecture = [[None] * answer_length]
|
||||||
partly_built_trees_architecture_indices = [[None] * answer_length]
|
partly_built_trees_architecture_indices = [[None] * answer_length]
|
||||||
|
@ -580,7 +523,7 @@ class Tree(object):
|
||||||
|
|
||||||
if is_unique:
|
if is_unique:
|
||||||
unique_trees_architecture.append(new_tree_architecture)
|
unique_trees_architecture.append(new_tree_architecture)
|
||||||
if not node_order:
|
# if not filters['node_order']:
|
||||||
l_ordered_built_trees_architecture.append(new_tree_architecture)
|
l_ordered_built_trees_architecture.append(new_tree_architecture)
|
||||||
l_ordered_built_trees.append(new_tree)
|
l_ordered_built_trees.append(new_tree)
|
||||||
# TODO NODE ORDER = FALSE
|
# TODO NODE ORDER = FALSE
|
||||||
|
|
|
@ -10,21 +10,22 @@ import pyconll
|
||||||
|
|
||||||
from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos
|
from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos
|
||||||
|
|
||||||
feats_list = [
|
# for separate searches of feats
|
||||||
# lexical features
|
# feats_list = [
|
||||||
'PronType', 'NumType', 'Poss', 'Reflex', 'Foreign', 'Abbr',
|
# # lexical features
|
||||||
|
# 'PronType', 'NumType', 'Poss', 'Reflex', 'Foreign', 'Abbr',
|
||||||
# Inflectional features (nominal)
|
#
|
||||||
'Gender', 'Animacy', 'NounClass', 'Number', 'Case', 'Definite', 'Degree',
|
# # Inflectional features (nominal)
|
||||||
|
# 'Gender', 'Animacy', 'NounClass', 'Number', 'Case', 'Definite', 'Degree',
|
||||||
# Inflectional features (verbal)
|
#
|
||||||
'VerbForm', 'Mood', 'Tense', 'Aspect', 'Voice', 'Evident', 'Polarity', 'Person', 'Polite', 'Clusivity',
|
# # Inflectional features (verbal)
|
||||||
|
# 'VerbForm', 'Mood', 'Tense', 'Aspect', 'Voice', 'Evident', 'Polarity', 'Person', 'Polite', 'Clusivity',
|
||||||
# Other
|
#
|
||||||
'Variant', 'Number[psor]', 'Gender[psor]', 'NumForm'
|
# # Other
|
||||||
]
|
# 'Variant', 'Number[psor]', 'Gender[psor]', 'NumForm'
|
||||||
|
# ]
|
||||||
feats_dict = {key: {} for key in feats_list}
|
#
|
||||||
|
# feats_dict = {key: {} for key in feats_list}
|
||||||
|
|
||||||
|
|
||||||
def decode_query(orig_query, dependency_type):
|
def decode_query(orig_query, dependency_type):
|
||||||
|
@ -59,10 +60,13 @@ def decode_query(orig_query, dependency_type):
|
||||||
elif orig_query_split[0] == 'form':
|
elif orig_query_split[0] == 'form':
|
||||||
decoded_query['form'] = orig_query_split[1]
|
decoded_query['form'] = orig_query_split[1]
|
||||||
return decoded_query
|
return decoded_query
|
||||||
elif orig_query_split[0] in feats_list:
|
elif orig_query_split[0] == 'feats':
|
||||||
decoded_query['feats'] = {}
|
decoded_query['feats'] = orig_query_split[1]
|
||||||
decoded_query['feats'][orig_query_split[0]] = orig_query_split[1]
|
|
||||||
return decoded_query
|
return decoded_query
|
||||||
|
# elif orig_query_split[0] in feats_list:
|
||||||
|
# decoded_query['feats'] = {}
|
||||||
|
# decoded_query['feats'][orig_query_split[0]] = orig_query_split[1]
|
||||||
|
# return decoded_query
|
||||||
elif not new_query:
|
elif not new_query:
|
||||||
raise Exception('Not supported yet!')
|
raise Exception('Not supported yet!')
|
||||||
elif not new_query:
|
elif not new_query:
|
||||||
|
@ -88,20 +92,17 @@ def decode_query(orig_query, dependency_type):
|
||||||
except ValueError:
|
except ValueError:
|
||||||
root_index = len(priority_actions)
|
root_index = len(priority_actions)
|
||||||
|
|
||||||
l_children = []
|
children = []
|
||||||
r_children = []
|
|
||||||
root = None
|
root = None
|
||||||
for i, node_action in enumerate(node_actions):
|
for i, node_action in enumerate(node_actions):
|
||||||
if i < root_index:
|
if i < root_index:
|
||||||
l_children.append(decode_query(node_action, priority_actions[i][1:]))
|
children.append(decode_query(node_action, priority_actions[i][1:]))
|
||||||
elif i > root_index:
|
elif i > root_index:
|
||||||
r_children.append(decode_query(node_action, priority_actions[i - 1][1:]))
|
children.append(decode_query(node_action, priority_actions[i - 1][1:]))
|
||||||
else:
|
else:
|
||||||
root = decode_query(node_action, dependency_type)
|
root = decode_query(node_action, dependency_type)
|
||||||
if l_children:
|
if children:
|
||||||
root["l_children"] = l_children
|
root["children"] = children
|
||||||
if r_children:
|
|
||||||
root["r_children"] = r_children
|
|
||||||
return root
|
return root
|
||||||
|
|
||||||
|
|
||||||
|
@ -116,7 +117,7 @@ def create_trees(config):
|
||||||
|
|
||||||
train = pyconll.load_from_file(input_path)
|
train = pyconll.load_from_file(input_path)
|
||||||
|
|
||||||
form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict = {}, {}, {}, {}, {}
|
form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_complete_dict = {}, {}, {}, {}, {}, {}
|
||||||
|
|
||||||
all_trees = []
|
all_trees = []
|
||||||
|
|
||||||
|
@ -125,8 +126,14 @@ def create_trees(config):
|
||||||
root_id = None
|
root_id = None
|
||||||
token_nodes = []
|
token_nodes = []
|
||||||
for token in sentence:
|
for token in sentence:
|
||||||
node = Tree(token.form, token.lemma, token.upos, token.xpos, token.deprel, token.feats, form_dict,
|
# token_feats = ''
|
||||||
lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, token.head)
|
# for k, v in token.feats.items():
|
||||||
|
# token_feats += k + next(iter(v)) + '|'
|
||||||
|
# token_feats = token_feats[:-1]
|
||||||
|
# TODO check if 5th place is always there for feats
|
||||||
|
token_feats = token._fields[5]
|
||||||
|
node = Tree(token.form, token.lemma, token.upos, token.xpos, token.deprel, token_feats, form_dict,
|
||||||
|
lemma_dict, upos_dict, xpos_dict, deprel_dict, None, feats_complete_dict, token.head)
|
||||||
token_nodes.append(node)
|
token_nodes.append(node)
|
||||||
if token.deprel == 'root':
|
if token.deprel == 'root':
|
||||||
root = node
|
root = node
|
||||||
|
@ -137,14 +144,21 @@ def create_trees(config):
|
||||||
token.set_parent(None)
|
token.set_parent(None)
|
||||||
else:
|
else:
|
||||||
parent_id = int(token.parent) - 1
|
parent_id = int(token.parent) - 1
|
||||||
if token_id < parent_id:
|
# if token_id < parent_id:
|
||||||
token_nodes[parent_id].add_l_child(token)
|
# token_nodes[parent_id].add_l_child(token)
|
||||||
elif token_id > parent_id:
|
# elif token_id > parent_id:
|
||||||
token_nodes[parent_id].add_r_child(token)
|
# token_nodes[parent_id].add_r_child(token)
|
||||||
else:
|
# else:
|
||||||
raise Exception('Root element should not be here!')
|
# raise Exception('Root element should not be here!')
|
||||||
|
if token_nodes[parent_id].children_split == -1 and token_id > parent_id:
|
||||||
|
token_nodes[parent_id].children_split = len(token_nodes[parent_id].children)
|
||||||
|
token_nodes[parent_id].add_child(token)
|
||||||
token.set_parent(token_nodes[parent_id])
|
token.set_parent(token_nodes[parent_id])
|
||||||
|
|
||||||
|
for token in token_nodes:
|
||||||
|
if token.children_split == -1:
|
||||||
|
token.children_split = len(token.children)
|
||||||
|
|
||||||
if root == None:
|
if root == None:
|
||||||
raise Exception('No root element in sentence!')
|
raise Exception('No root element in sentence!')
|
||||||
all_trees.append(root)
|
all_trees.append(root)
|
||||||
|
@ -179,9 +193,11 @@ def printable_answers(query):
|
||||||
|
|
||||||
if len(node_actions) > 1:
|
if len(node_actions) > 1:
|
||||||
res = []
|
res = []
|
||||||
for node_action in node_actions[:-1]:
|
# for node_action in node_actions[:-1]:
|
||||||
res.extend(printable_answers(node_action[1:-1]))
|
# res.extend(printable_answers(node_action[1:-1]))
|
||||||
res.extend([node_actions[-1]])
|
# res.extend([node_actions[-1]])
|
||||||
|
for node_action in node_actions:
|
||||||
|
res.extend(printable_answers(node_action))
|
||||||
return res
|
return res
|
||||||
else:
|
else:
|
||||||
return [query]
|
return [query]
|
||||||
|
@ -205,18 +221,18 @@ def main():
|
||||||
ngrams = 0
|
ngrams = 0
|
||||||
if config.getint('settings', 'ngrams') == 2:
|
if config.getint('settings', 'ngrams') == 2:
|
||||||
ngrams = 2
|
ngrams = 2
|
||||||
query_tree = [{"l_children": [{}]}]
|
query_tree = [{"children": [{}]}]
|
||||||
elif config.getint('settings', 'ngrams') == 3:
|
elif config.getint('settings', 'ngrams') == 3:
|
||||||
ngrams = 3
|
ngrams = 3
|
||||||
query_tree = [{"l_children": [{}, {}]}, {"l_children": [{"l_children": [{}]}]}]
|
query_tree = [{"children": [{}, {}]}, {"children": [{"children": [{}]}]}]
|
||||||
elif config.getint('settings', 'ngrams') == 4:
|
elif config.getint('settings', 'ngrams') == 4:
|
||||||
ngrams = 4
|
ngrams = 4
|
||||||
query_tree = [{"l_children": [{}, {}, {}]}, {"l_children": [{"l_children": [{}, {}]}]}, {"l_children": [{"l_children": [{}]}, {}]}, {"l_children": [{"l_children": [{"l_children": [{}]}]}]}]
|
query_tree = [{"children": [{}, {}, {}]}, {"children": [{"children": [{}, {}]}]}, {"children": [{"children": [{}]}, {}]}, {"children": [{"children": [{"children": [{}]}]}]}]
|
||||||
elif config.getint('settings', 'ngrams') == 5:
|
elif config.getint('settings', 'ngrams') == 5:
|
||||||
ngrams = 5
|
ngrams = 5
|
||||||
query_tree = [{"l_children": [{}, {}, {}, {}]}, {"l_children": [{"l_children": [{}]}, {}, {}]}, {"l_children": [{"l_children": [{}, {}]}, {}]}, {"l_children": [{"l_children": [{}]}, {"l_children": [{}]}]},
|
query_tree = [{"children": [{}, {}, {}, {}]}, {"children": [{"children": [{}]}, {}, {}]}, {"children": [{"children": [{}, {}]}, {}]}, {"children": [{"children": [{}]}, {"children": [{}]}]},
|
||||||
{"l_children": [{"l_children": [{"l_children": [{}]}]}, {}]}, {"l_children": [{"l_children": [{"l_children": [{}]}, {}]}]}, {"l_children": [{"l_children": [{"l_children": [{}, {}]}]}]},
|
{"children": [{"children": [{"children": [{}]}]}, {}]}, {"children": [{"children": [{"children": [{}]}, {}]}]}, {"children": [{"children": [{"children": [{}, {}]}]}]},
|
||||||
{"l_children": [{"l_children": [{"l_children": [{"l_children": [{}]}]}]}]}]
|
{"children": [{"children": [{"children": [{"children": [{}]}]}]}]}]
|
||||||
else:
|
else:
|
||||||
query_tree = [decode_query('(' + config.get('settings', 'query') + ')', '')]
|
query_tree = [decode_query('(' + config.get('settings', 'query') + ')', '')]
|
||||||
# order_independent_queries(query_tree)
|
# order_independent_queries(query_tree)
|
||||||
|
@ -238,14 +254,16 @@ def main():
|
||||||
create_output_string_funct = create_output_string_form
|
create_output_string_funct = create_output_string_form
|
||||||
|
|
||||||
result_dict = {}
|
result_dict = {}
|
||||||
|
filters = {}
|
||||||
|
filters['node_order'] = config.getboolean('settings', 'node_order')
|
||||||
|
|
||||||
# for tree in all_trees[2:]:
|
for tree in all_trees[2:]:
|
||||||
# for tree in all_trees[1205:]:
|
# for tree in all_trees[1205:]:
|
||||||
for tree in all_trees:
|
# for tree in all_trees:
|
||||||
# original
|
# original
|
||||||
# r_children = tree.r_children[:1] + tree.r_children[3:4]
|
# r_children = tree.r_children[:1] + tree.r_children[3:4]
|
||||||
# tree.r_children = tree.r_children[:1] + tree.r_children[2:4]
|
# tree.r_children = tree.r_children[:1] + tree.r_children[2:4]
|
||||||
_, _, subtrees = tree.get_subtrees(query_tree, [], create_output_string_funct)
|
_, _, subtrees = tree.get_subtrees(query_tree, [], create_output_string_funct, filters)
|
||||||
for query_results in subtrees:
|
for query_results in subtrees:
|
||||||
for result in query_results:
|
for result in query_results:
|
||||||
# if ngrams:
|
# if ngrams:
|
||||||
|
@ -258,13 +276,13 @@ def main():
|
||||||
result_dict[r] = 1
|
result_dict[r] = 1
|
||||||
# test 1 layer queries
|
# test 1 layer queries
|
||||||
# # tree.r_children = []
|
# # tree.r_children = []
|
||||||
# # tree.l_children[1].l_children = []
|
# # tree.children[1].children = []
|
||||||
# # query = [{'l_children': [{}]}, {'r_children': [{}]}]
|
# # query = [{'children': [{}]}, {'children': [{}]}]
|
||||||
# # query = [{"l_children": [{}, {}]}, {"l_children": [{}]}, {"l_children": [{}, {}, {}]}]
|
# # query = [{"children": [{}, {}]}, {"children": [{}]}, {"children": [{}, {}, {}]}]
|
||||||
# query = [{"l_children": [{'form': 'je'}, {}]}, {"l_children": [{'form': 'je'}]}, {"l_children": [{'form': 'je'}, {}, {}]}]
|
# query = [{"children": [{'form': 'je'}, {}]}, {"children": [{'form': 'je'}]}, {"children": [{'form': 'je'}, {}, {}]}]
|
||||||
# # query = [{'q1':'', "l_children": [{'a1':''}, {'a2':''}]}, {'q2':'', "l_children": [{'b1':''}]}, {'q3':'', "l_children": [{'c1':''}, {'c2':''}, {'c3':''}]}]
|
# # query = [{'q1':'', "children": [{'a1':''}, {'a2':''}]}, {'q2':'', "children": [{'b1':''}]}, {'q3':'', "children": [{'c1':''}, {'c2':''}, {'c3':''}]}]
|
||||||
# _, _, subtrees = tree.get_subtrees(query, [], create_output_string_funct)
|
# _, _, subtrees = tree.get_subtrees(query, [], create_output_string_funct)
|
||||||
# # _, subtrees = tree.get_subtrees([{'q1':'', "l_children": [{'a1':''}, {'a2':''}], "r_children": []}, {'q2':'', "l_children": [{'b1':''}], "r_children": []}, {'q3':'', "l_children": [{'c1':''}, {'c2':''}, {'c3':''}], "r_children": []}], [])
|
# # _, subtrees = tree.get_subtrees([{'q1':'', "children": [{'a1':''}, {'a2':''}], "children": []}, {'q2':'', "children": [{'b1':''}], "children": []}, {'q3':'', "children": [{'c1':''}, {'c2':''}, {'c3':''}], "children": []}], [])
|
||||||
# print('HERE!')
|
# print('HERE!')
|
||||||
|
|
||||||
# test 2 layer queries
|
# test 2 layer queries
|
||||||
|
|
Loading…
Reference in New Issue
Block a user