parent
40aaffa632
commit
b7193f9126
@ -1,8 +0,0 @@
|
|||||||
.idea/
|
|
||||||
venv/
|
|
||||||
internal_saves/
|
|
||||||
__pycache__/
|
|
||||||
results/
|
|
||||||
data/
|
|
||||||
config2.ini
|
|
||||||
configs/
|
|
@ -1,25 +0,0 @@
|
|||||||
# Copyright 2019 CJVT
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
from generic import generate_key, generate_name
|
|
||||||
|
|
||||||
|
|
||||||
class ResultNode(object):
|
|
||||||
def __init__(self, node, architecture_order, create_output_strings):
|
|
||||||
self.name_parts, self.name = generate_name(node, create_output_strings)
|
|
||||||
self.location = architecture_order
|
|
||||||
self.deprel = node.deprel.get_value()
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return self.name
|
|
@ -1,180 +0,0 @@
|
|||||||
# Copyright 2019 CJVT
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import copy
|
|
||||||
import string
|
|
||||||
|
|
||||||
|
|
||||||
class ResultTree(object):
|
|
||||||
def __init__(self, node, children, filters):
|
|
||||||
self.node = node
|
|
||||||
self.children = children
|
|
||||||
self.filters = filters
|
|
||||||
self.key = None
|
|
||||||
self.order_key = None
|
|
||||||
self.order = None
|
|
||||||
self.array = None
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return self.get_key()
|
|
||||||
|
|
||||||
def set_children(self, children):
|
|
||||||
self.children = children
|
|
||||||
|
|
||||||
def reset_params(self):
|
|
||||||
self.key = None
|
|
||||||
self.order_key = None
|
|
||||||
self.order = None
|
|
||||||
self.array = None
|
|
||||||
|
|
||||||
def get_key(self):
|
|
||||||
if self.key:
|
|
||||||
return self.key
|
|
||||||
key = ''
|
|
||||||
write_self_node_to_result = False
|
|
||||||
if self.children:
|
|
||||||
children = self.children
|
|
||||||
for child in children:
|
|
||||||
if self.filters['node_order'] and child.node.location < self.node.location:
|
|
||||||
if self.filters['dependency_type']:
|
|
||||||
separator = ' <' + child.node.deprel + ' '
|
|
||||||
else:
|
|
||||||
separator = ' < '
|
|
||||||
key += child.get_key() + separator
|
|
||||||
else:
|
|
||||||
if not write_self_node_to_result:
|
|
||||||
write_self_node_to_result = True
|
|
||||||
key += self.node.name
|
|
||||||
if self.filters['dependency_type']:
|
|
||||||
separator = ' >' + child.node.deprel + ' '
|
|
||||||
else:
|
|
||||||
separator = ' > '
|
|
||||||
key += separator + child.get_key()
|
|
||||||
|
|
||||||
if not write_self_node_to_result:
|
|
||||||
key += self.node.name
|
|
||||||
self.key = '(' + key + ')'
|
|
||||||
else:
|
|
||||||
self.key = self.node.name
|
|
||||||
return self.key
|
|
||||||
|
|
||||||
def get_key_sorted(self):
|
|
||||||
key = ''
|
|
||||||
write_self_node_to_result = False
|
|
||||||
if self.children:
|
|
||||||
children = sorted(self.children, key=lambda x: x.node.name)
|
|
||||||
for child in children:
|
|
||||||
if not write_self_node_to_result:
|
|
||||||
write_self_node_to_result = True
|
|
||||||
key += self.node.name
|
|
||||||
if self.filters['dependency_type']:
|
|
||||||
separator = ' >' + child.node.deprel + ' '
|
|
||||||
else:
|
|
||||||
separator = ' > '
|
|
||||||
key += separator + child.get_key_sorted()
|
|
||||||
|
|
||||||
if not write_self_node_to_result:
|
|
||||||
key += self.node.name
|
|
||||||
key = '(' + key + ')'
|
|
||||||
else:
|
|
||||||
key = self.node.name
|
|
||||||
return key
|
|
||||||
|
|
||||||
def get_order_key(self):
|
|
||||||
if self.order_key:
|
|
||||||
return self.order_key
|
|
||||||
order_key = ''
|
|
||||||
write_self_node_to_result = False
|
|
||||||
if self.children:
|
|
||||||
for child in self.children:
|
|
||||||
if self.filters['node_order'] and child.node.location < self.node.location:
|
|
||||||
if self.filters['dependency_type']:
|
|
||||||
separator = ' <' + child.node.deprel + ' '
|
|
||||||
else:
|
|
||||||
separator = ' < '
|
|
||||||
order_key += child.get_order_key() + separator
|
|
||||||
else:
|
|
||||||
if not write_self_node_to_result:
|
|
||||||
write_self_node_to_result = True
|
|
||||||
order_key += str(self.node.location)
|
|
||||||
if self.filters['dependency_type']:
|
|
||||||
separator = ' >' + child.node.deprel + ' '
|
|
||||||
else:
|
|
||||||
separator = ' > '
|
|
||||||
order_key += separator + child.get_order_key()
|
|
||||||
if not write_self_node_to_result:
|
|
||||||
order_key += str(self.node.location)
|
|
||||||
self.order_key = '(' + order_key + ')'
|
|
||||||
else:
|
|
||||||
self.order_key = str(self.node.location)
|
|
||||||
return self.order_key
|
|
||||||
|
|
||||||
def get_order(self):
|
|
||||||
if self.order:
|
|
||||||
return self.order
|
|
||||||
order = []
|
|
||||||
write_self_node_to_result = False
|
|
||||||
if self.children:
|
|
||||||
for child in self.children:
|
|
||||||
if self.filters['node_order'] and child.node.location < self.node.location:
|
|
||||||
order += child.get_order()
|
|
||||||
else:
|
|
||||||
if not write_self_node_to_result:
|
|
||||||
write_self_node_to_result = True
|
|
||||||
order += [self.node.location]
|
|
||||||
order += child.get_order()
|
|
||||||
|
|
||||||
if not write_self_node_to_result:
|
|
||||||
order += [self.node.location]
|
|
||||||
self.order = order
|
|
||||||
else:
|
|
||||||
self.order = [self.node.location]
|
|
||||||
return self.order
|
|
||||||
|
|
||||||
def get_array(self):
|
|
||||||
if self.array:
|
|
||||||
return self.array
|
|
||||||
array = []
|
|
||||||
write_self_node_to_result = False
|
|
||||||
if self.children:
|
|
||||||
for child in self.children:
|
|
||||||
if self.filters['node_order'] and child.node.location < self.node.location:
|
|
||||||
array += child.get_array()
|
|
||||||
else:
|
|
||||||
if not write_self_node_to_result:
|
|
||||||
write_self_node_to_result = True
|
|
||||||
array += [self.node.name_parts]
|
|
||||||
array += child.get_array()
|
|
||||||
|
|
||||||
if not write_self_node_to_result:
|
|
||||||
array += [self.node.name_parts]
|
|
||||||
self.array = array
|
|
||||||
else:
|
|
||||||
self.array = [self.node.name_parts]
|
|
||||||
return self.array
|
|
||||||
|
|
||||||
def finalize_result(self):
|
|
||||||
result = copy.copy(self)
|
|
||||||
result.reset_params()
|
|
||||||
|
|
||||||
# create order letters
|
|
||||||
order = result.get_order()
|
|
||||||
order_letters = [''] * len(result.order)
|
|
||||||
for i in range(len(order)):
|
|
||||||
ind = order.index(min(order))
|
|
||||||
order[ind] = 10000
|
|
||||||
order_letters[ind] = string.ascii_uppercase[i]
|
|
||||||
result.order = ''.join(order_letters)
|
|
||||||
# TODO When tree is finalized create relative word order (alphabet)!
|
|
||||||
return result
|
|
@ -1,393 +0,0 @@
|
|||||||
import sys
|
|
||||||
from copy import copy
|
|
||||||
|
|
||||||
from ResultNode import ResultNode
|
|
||||||
from ResultTree import ResultTree
|
|
||||||
from Value import Value
|
|
||||||
from generic import generate_key
|
|
||||||
|
|
||||||
|
|
||||||
class Tree(object):
|
|
||||||
def __init__(self, index, form, lemma, upos, xpos, deprel, feats_detailed, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, feats_detailed_dict, head):
|
|
||||||
if not hasattr(self, 'feats'):
|
|
||||||
self.feats_detailed = {}
|
|
||||||
|
|
||||||
if form not in form_dict:
|
|
||||||
form_dict[form] = Value(form)
|
|
||||||
self.form = form_dict[form]
|
|
||||||
if lemma not in lemma_dict:
|
|
||||||
lemma_dict[lemma] = Value(lemma)
|
|
||||||
self.lemma = lemma_dict[lemma]
|
|
||||||
if upos not in upos_dict:
|
|
||||||
upos_dict[upos] = Value(upos)
|
|
||||||
self.upos = upos_dict[upos]
|
|
||||||
if xpos not in xpos_dict:
|
|
||||||
xpos_dict[xpos] = Value(xpos)
|
|
||||||
self.xpos = xpos_dict[xpos]
|
|
||||||
if deprel not in deprel_dict:
|
|
||||||
deprel_dict[deprel] = Value(deprel)
|
|
||||||
self.deprel = deprel_dict[deprel]
|
|
||||||
for feat in feats_detailed.keys():
|
|
||||||
if feat not in feats_detailed_dict:
|
|
||||||
feats_detailed_dict[feat] = {}
|
|
||||||
if next(iter(feats_detailed[feat])) not in feats_detailed_dict[feat]:
|
|
||||||
feats_detailed_dict[feat][next(iter(feats_detailed[feat]))] = Value(next(iter(feats_detailed[feat])))
|
|
||||||
if not feat in self.feats_detailed:
|
|
||||||
self.feats_detailed[feat] = {}
|
|
||||||
self.feats_detailed[feat][next(iter(feats_detailed[feat]))] = feats_detailed_dict[feat][next(iter(feats_detailed[feat]))]
|
|
||||||
|
|
||||||
self.parent = head
|
|
||||||
self.children = []
|
|
||||||
self.children_split = -1
|
|
||||||
|
|
||||||
self.index = index
|
|
||||||
|
|
||||||
# for caching answers to questions
|
|
||||||
self.cache = {}
|
|
||||||
|
|
||||||
def add_child(self, child):
|
|
||||||
self.children.append(child)
|
|
||||||
|
|
||||||
def set_parent(self, parent):
|
|
||||||
self.parent = parent
|
|
||||||
|
|
||||||
def fits_static_requirements_feats(self, query_tree):
|
|
||||||
if 'feats_detailed' not in query_tree:
|
|
||||||
return True
|
|
||||||
|
|
||||||
for feat in query_tree['feats_detailed'].keys():
|
|
||||||
if feat not in self.feats_detailed or query_tree['feats_detailed'][feat] != next(iter(self.feats_detailed[feat].values())).get_value():
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def fits_permanent_requirements(self, filters):
|
|
||||||
main_attributes = ['deprel', 'feats', 'form', 'lemma', 'upos']
|
|
||||||
|
|
||||||
if not filters['root_whitelist']:
|
|
||||||
return True
|
|
||||||
|
|
||||||
for option in filters['root_whitelist']:
|
|
||||||
filter_passed = True
|
|
||||||
|
|
||||||
# check if attributes are valid
|
|
||||||
for key in option.keys():
|
|
||||||
if key not in main_attributes:
|
|
||||||
if key not in self.feats_detailed or option[key] != list(self.feats_detailed[key].items())[0][1].get_value():
|
|
||||||
filter_passed = False
|
|
||||||
|
|
||||||
filter_passed = filter_passed and \
|
|
||||||
('deprel' not in option or option['deprel'] == self.deprel.get_value()) and \
|
|
||||||
('form' not in option or option['form'] == self.form.get_value()) and \
|
|
||||||
('lemma' not in option or option['lemma'] == self.lemma.get_value()) and \
|
|
||||||
('upos' not in option or option['upos'] == self.upos.get_value())
|
|
||||||
|
|
||||||
if filter_passed:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def fits_temporary_requirements(self, filters):
|
|
||||||
return not filters['label_whitelist'] or self.deprel.get_value() in filters['label_whitelist']
|
|
||||||
|
|
||||||
def fits_static_requirements(self, query_tree, filters):
|
|
||||||
return ('form' not in query_tree or query_tree['form'] == self.form.get_value()) and \
|
|
||||||
('lemma' not in query_tree or query_tree['lemma'] == self.lemma.get_value()) and \
|
|
||||||
('upos' not in query_tree or query_tree['upos'] == self.upos.get_value()) and \
|
|
||||||
('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \
|
|
||||||
('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \
|
|
||||||
(not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children']))) and \
|
|
||||||
self.fits_static_requirements_feats(query_tree)
|
|
||||||
|
|
||||||
def generate_children_queries(self, all_query_indices, children):
|
|
||||||
partial_results = {}
|
|
||||||
# list of pairs (index of query in group, group of query, is permanent)
|
|
||||||
child_queries_metadata = []
|
|
||||||
for child_index, child in enumerate(children):
|
|
||||||
new_queries = []
|
|
||||||
|
|
||||||
# add continuation queries to children
|
|
||||||
for result_part_index, result_index, is_permanent in child_queries_metadata:
|
|
||||||
if result_index in partial_results and result_part_index in partial_results[result_index] and len(partial_results[result_index][result_part_index]) > 0:
|
|
||||||
if len(all_query_indices[result_index][0]) > result_part_index + 1:
|
|
||||||
new_queries.append((result_part_index + 1, result_index, is_permanent))
|
|
||||||
|
|
||||||
child_queries_metadata = new_queries
|
|
||||||
|
|
||||||
# add new queries to children
|
|
||||||
for result_index, (group, is_permanent) in enumerate(all_query_indices):
|
|
||||||
# check if node has enough children for query to be possible
|
|
||||||
if len(children) - len(group) >= child_index:
|
|
||||||
child_queries_metadata.append((0, result_index, is_permanent))
|
|
||||||
|
|
||||||
child_queries = []
|
|
||||||
for result_part_index, result_index, _ in child_queries_metadata:
|
|
||||||
child_queries.append(all_query_indices[result_index][0][result_part_index])
|
|
||||||
|
|
||||||
partial_results = yield child, child_queries, child_queries_metadata
|
|
||||||
yield None, None, None
|
|
||||||
|
|
||||||
def add_subtrees(self, old_subtree, new_subtree):
|
|
||||||
old_subtree.extend(new_subtree)
|
|
||||||
|
|
||||||
def get_all_query_indices(self, temporary_query_nb, permanent_query_nb, permanent_query_trees, all_query_indices, children, create_output_string, filters):
|
|
||||||
partial_answers = [[] for i in range(permanent_query_nb + temporary_query_nb)]
|
|
||||||
complete_answers = [[] for i in range(permanent_query_nb)]
|
|
||||||
|
|
||||||
# list of pairs (index of query in group, group of query)
|
|
||||||
# TODO try to erase!!!
|
|
||||||
child_queries = [all_query_indice[0] for all_query_indice in all_query_indices]
|
|
||||||
|
|
||||||
answers_lengths = [len(query) for query in child_queries]
|
|
||||||
|
|
||||||
child_queries_flatten = [query_part for query in child_queries for query_part in query]
|
|
||||||
|
|
||||||
all_new_partial_answers = [[] for query_part in child_queries_flatten]
|
|
||||||
|
|
||||||
child_queries_flatten_dedup = []
|
|
||||||
child_queries_flatten_dedup_indices = []
|
|
||||||
for query_part in child_queries_flatten:
|
|
||||||
try:
|
|
||||||
index = child_queries_flatten_dedup.index(query_part)
|
|
||||||
except ValueError:
|
|
||||||
index = len(child_queries_flatten_dedup)
|
|
||||||
child_queries_flatten_dedup.append(query_part)
|
|
||||||
|
|
||||||
child_queries_flatten_dedup_indices.append(index)
|
|
||||||
|
|
||||||
# ask children all queries/partial queries
|
|
||||||
for child in children:
|
|
||||||
# obtain children results
|
|
||||||
new_partial_answers_dedup, new_complete_answers = child.get_subtrees(permanent_query_trees, child_queries_flatten_dedup,
|
|
||||||
create_output_string, filters)
|
|
||||||
|
|
||||||
assert len(new_partial_answers_dedup) == len(child_queries_flatten_dedup)
|
|
||||||
|
|
||||||
# duplicate results again on correct places
|
|
||||||
for i, flattened_index in enumerate(child_queries_flatten_dedup_indices):
|
|
||||||
all_new_partial_answers[i].append(new_partial_answers_dedup[flattened_index])
|
|
||||||
|
|
||||||
for i in range(len(new_complete_answers)):
|
|
||||||
# TODO add order rearagement (TO KEY)
|
|
||||||
complete_answers[i].extend(new_complete_answers[i])
|
|
||||||
|
|
||||||
# merge answers in appropriate way
|
|
||||||
i = 0
|
|
||||||
# iterate over all answers per queries
|
|
||||||
for answer_i, answer_length in enumerate(answers_lengths):
|
|
||||||
# iterate over answers of query
|
|
||||||
# TODO ERROR IN HERE!
|
|
||||||
partial_answers[answer_i] = self.create_answers(all_new_partial_answers[i:i + answer_length], answer_length, filters)
|
|
||||||
i += answer_length
|
|
||||||
|
|
||||||
return partial_answers, complete_answers
|
|
||||||
|
|
||||||
def order_dependent_queries(self, active_permanent_query_trees, active_temporary_query_trees, partial_subtrees,
|
|
||||||
create_output_string, merged_partial_subtrees, i_query, i_answer, filters):
|
|
||||||
node = ResultNode(self, self.index, create_output_string)
|
|
||||||
|
|
||||||
if i_query < len(active_permanent_query_trees):
|
|
||||||
if 'children' in active_permanent_query_trees[i_query]:
|
|
||||||
merged_partial_subtrees.append(
|
|
||||||
self.create_output_children(partial_subtrees[i_answer], [ResultTree(node, [], filters)], filters))
|
|
||||||
i_answer += 1
|
|
||||||
else:
|
|
||||||
merged_partial_subtrees.append([ResultTree(node, [], filters)])
|
|
||||||
else:
|
|
||||||
if 'children' in active_temporary_query_trees[i_query - len(active_permanent_query_trees)]:
|
|
||||||
merged_partial_subtrees.append(
|
|
||||||
self.create_output_children(partial_subtrees[i_answer], [ResultTree(node, [], filters)], filters))
|
|
||||||
i_answer += 1
|
|
||||||
else:
|
|
||||||
merged_partial_subtrees.append([ResultTree(node, [], filters)])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return i_answer
|
|
||||||
|
|
||||||
def get_unigrams(self, create_output_strings, filters):
|
|
||||||
unigrams = [generate_key(self, create_output_strings, print_lemma=False)[1]]
|
|
||||||
for child in self.children:
|
|
||||||
unigrams += child.get_unigrams(create_output_strings, filters)
|
|
||||||
return unigrams
|
|
||||||
|
|
||||||
def get_subtrees(self, permanent_query_trees, temporary_query_trees, create_output_string, filters):
|
|
||||||
"""
|
|
||||||
|
|
||||||
:param permanent_query_trees:
|
|
||||||
:param temporary_query_trees:
|
|
||||||
"""
|
|
||||||
|
|
||||||
# list of all children queries grouped by parent queries
|
|
||||||
all_query_indices = []
|
|
||||||
|
|
||||||
active_permanent_query_trees = []
|
|
||||||
for permanent_query_tree in permanent_query_trees:
|
|
||||||
if self.fits_static_requirements(permanent_query_tree, filters) and self.fits_permanent_requirements(filters):
|
|
||||||
active_permanent_query_trees.append(permanent_query_tree)
|
|
||||||
if 'children' in permanent_query_tree:
|
|
||||||
all_query_indices.append((permanent_query_tree['children'], True))
|
|
||||||
# r_all_query_indices.append((permanent_query_tree['r_children'], True))
|
|
||||||
|
|
||||||
active_temporary_query_trees = []
|
|
||||||
successful_temporary_queries = []
|
|
||||||
for i, temporary_query_tree in enumerate(temporary_query_trees):
|
|
||||||
if self.fits_static_requirements(temporary_query_tree, filters) and self.fits_temporary_requirements(filters):
|
|
||||||
active_temporary_query_trees.append(temporary_query_tree)
|
|
||||||
successful_temporary_queries.append(i)
|
|
||||||
if 'children' in temporary_query_tree:
|
|
||||||
all_query_indices.append((temporary_query_tree['children'], False))
|
|
||||||
|
|
||||||
partial_subtrees, complete_answers = self.get_all_query_indices(len(temporary_query_trees),
|
|
||||||
len(permanent_query_trees),
|
|
||||||
permanent_query_trees,
|
|
||||||
all_query_indices, self.children,
|
|
||||||
create_output_string, filters)
|
|
||||||
|
|
||||||
merged_partial_answers = []
|
|
||||||
i_question = 0
|
|
||||||
# i_child is necessary, because some queries may be answered at the beginning and were not passed to children.
|
|
||||||
# i_child is used to point where we are inside answers
|
|
||||||
i_answer = 0
|
|
||||||
# go over all permanent and temporary query trees
|
|
||||||
while i_question < len(active_permanent_query_trees) + len(active_temporary_query_trees):
|
|
||||||
# permanent query trees always have left and right child
|
|
||||||
i_answer = self.order_dependent_queries(active_permanent_query_trees, active_temporary_query_trees, partial_subtrees,
|
|
||||||
create_output_string, merged_partial_answers, i_question, i_answer, filters)
|
|
||||||
|
|
||||||
i_question += 1
|
|
||||||
|
|
||||||
for i in range(len(active_permanent_query_trees)):
|
|
||||||
# TODO FINALIZE RESULT
|
|
||||||
# erase first and last braclets when adding new query result
|
|
||||||
add_subtree = [subtree.finalize_result() for subtree in merged_partial_answers[i]]
|
|
||||||
complete_answers[i].extend(add_subtree)
|
|
||||||
|
|
||||||
# answers to valid queries
|
|
||||||
partial_answers = [[] for i in range(len(temporary_query_trees))]
|
|
||||||
for inside_i, outside_i in enumerate(successful_temporary_queries):
|
|
||||||
partial_answers[outside_i] = merged_partial_answers[
|
|
||||||
len(active_permanent_query_trees) + inside_i]
|
|
||||||
|
|
||||||
return partial_answers, complete_answers
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def create_children_groups(left_parts, right_parts):
|
|
||||||
if not left_parts:
|
|
||||||
return right_parts
|
|
||||||
|
|
||||||
if not right_parts:
|
|
||||||
return left_parts
|
|
||||||
|
|
||||||
all_children_group_possibilities = []
|
|
||||||
for left_part in left_parts:
|
|
||||||
for right_part in right_parts:
|
|
||||||
new_part = copy(left_part)
|
|
||||||
new_part.extend(right_part)
|
|
||||||
all_children_group_possibilities.append(new_part)
|
|
||||||
return all_children_group_possibilities
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def merge_answer(answer1, answer2, base_answer_i, answer_j):
|
|
||||||
merged_results = []
|
|
||||||
merged_indices = []
|
|
||||||
for answer1p_i, old_result in enumerate(answer1):
|
|
||||||
for answer2p_i, new_result in enumerate(answer2):
|
|
||||||
if answer1p_i != answer2p_i:
|
|
||||||
new_indices = [answer1p_i] + [answer2p_i]
|
|
||||||
# TODO add comparison answers with different indices if equal than ignore
|
|
||||||
merged_results.append(old_result + new_result)
|
|
||||||
merged_indices.append(new_indices)
|
|
||||||
return merged_results, merged_indices
|
|
||||||
|
|
||||||
def merge_results3(self, child, new_results, filters):
|
|
||||||
if filters['node_order']:
|
|
||||||
new_child = child
|
|
||||||
|
|
||||||
else:
|
|
||||||
new_child = sorted(child, key=lambda x: x[0].get_key())
|
|
||||||
|
|
||||||
children_groups = []
|
|
||||||
|
|
||||||
for i_answer, answer in enumerate(new_child):
|
|
||||||
children_groups = self.create_children_groups(children_groups, [[answer_part] for answer_part in answer])
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for result in new_results:
|
|
||||||
for children in children_groups:
|
|
||||||
new_result = copy(result)
|
|
||||||
new_result.set_children(children)
|
|
||||||
results.append(new_result)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
def create_output_children(self, children, new_results, filters):
|
|
||||||
merged_results = []
|
|
||||||
for i_child, child in enumerate(children):
|
|
||||||
merged_results.extend(self.merge_results3(child, new_results, filters))
|
|
||||||
return merged_results
|
|
||||||
|
|
||||||
def create_answers(self, separated_answers, answer_length, filters):
|
|
||||||
partly_built_trees = [[None] * answer_length]
|
|
||||||
partly_built_trees_architecture_indices = [[None] * answer_length]
|
|
||||||
built_trees = []
|
|
||||||
built_trees_architecture_indices = []
|
|
||||||
|
|
||||||
# iterate over children first, so that new partly built trees are added only after all results of specific
|
|
||||||
# child are added
|
|
||||||
for child_i in range(len(separated_answers[0])):
|
|
||||||
new_partly_built_trees = []
|
|
||||||
new_partly_built_trees_architecture_indices = []
|
|
||||||
# iterate over answers parts
|
|
||||||
for answer_part_i in range(len(separated_answers)):
|
|
||||||
# necessary because some parts do not pass filters and are not added
|
|
||||||
if separated_answers[answer_part_i][child_i]:
|
|
||||||
for tree_part_i, tree_part in enumerate(partly_built_trees):
|
|
||||||
if not tree_part[answer_part_i]:
|
|
||||||
new_tree_part = copy(tree_part)
|
|
||||||
new_tree_part_architecture_indices = copy(partly_built_trees_architecture_indices[tree_part_i])
|
|
||||||
new_tree_part[answer_part_i] = separated_answers[answer_part_i][child_i]
|
|
||||||
new_tree_part_architecture_indices[answer_part_i] = child_i
|
|
||||||
completed_tree_part = True
|
|
||||||
for val_i, val in enumerate(new_tree_part):
|
|
||||||
if not val:
|
|
||||||
completed_tree_part = False
|
|
||||||
if completed_tree_part:
|
|
||||||
built_trees.append(new_tree_part)
|
|
||||||
built_trees_architecture_indices.append(new_tree_part_architecture_indices)
|
|
||||||
else:
|
|
||||||
new_partly_built_trees.append(new_tree_part)
|
|
||||||
new_partly_built_trees_architecture_indices.append(new_tree_part_architecture_indices)
|
|
||||||
else:
|
|
||||||
# pass over repetitions of same words
|
|
||||||
pass
|
|
||||||
|
|
||||||
partly_built_trees.extend(new_partly_built_trees)
|
|
||||||
partly_built_trees_architecture_indices.extend(new_partly_built_trees_architecture_indices)
|
|
||||||
|
|
||||||
l_ordered_built_trees, unique_trees_architecture = [], []
|
|
||||||
|
|
||||||
if built_trees:
|
|
||||||
# sort 3 arrays by architecture indices
|
|
||||||
temp_trees_index, temp_trees = (list(t) for t in zip(
|
|
||||||
*sorted(zip(built_trees_architecture_indices, built_trees))))
|
|
||||||
|
|
||||||
# order outputs and erase duplicates
|
|
||||||
for tree, tree_index in zip(temp_trees, temp_trees_index):
|
|
||||||
new_tree_index, new_tree = (list(t) for t in zip(*sorted(zip(tree_index, tree))))
|
|
||||||
# TODO check if inside new_tree_architecture in ordered_built_trees_architecture and if not append!
|
|
||||||
is_unique = True
|
|
||||||
for unique_tree in unique_trees_architecture:
|
|
||||||
already_in = True
|
|
||||||
for part_i in range(len(unique_tree)):
|
|
||||||
if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].get_order_key() != new_tree[part_i][i_unique_part].get_order_key() for i_unique_part in range(len(unique_tree[part_i]))):
|
|
||||||
already_in = False
|
|
||||||
break
|
|
||||||
if already_in:
|
|
||||||
is_unique = False
|
|
||||||
break
|
|
||||||
|
|
||||||
if is_unique:
|
|
||||||
unique_trees_architecture.append(new_tree)
|
|
||||||
l_ordered_built_trees.append(new_tree)
|
|
||||||
return l_ordered_built_trees
|
|
@ -1,20 +0,0 @@
|
|||||||
# Copyright 2019 CJVT
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
class Value(object):
|
|
||||||
def __init__(self, value):
|
|
||||||
self.value = value
|
|
||||||
|
|
||||||
def get_value(self):
|
|
||||||
return self.value
|
|
@ -1,28 +0,0 @@
|
|||||||
[settings]
|
|
||||||
|
|
||||||
;___GENERAL SETTINGS___
|
|
||||||
input = data/sl_ssj-ud_v2.4.conllu
|
|
||||||
output = results/out_official.tsv
|
|
||||||
internal_saves = ./internal_saves
|
|
||||||
cpu_cores = 12
|
|
||||||
|
|
||||||
;___TREE SPECIFICATIONS___
|
|
||||||
tree_size = 2-4
|
|
||||||
tree_type = complete
|
|
||||||
dependency_type = labeled
|
|
||||||
node_order = free
|
|
||||||
node_type = upos
|
|
||||||
|
|
||||||
;___TREE RESTRICTIONS___
|
|
||||||
;label_whitelist = nsubj|obj|obl
|
|
||||||
;root_whitelist = lemma=mati&Case=Acc|lemma=lep&Degree=Sup
|
|
||||||
|
|
||||||
;___SEARCH BY QUERY___
|
|
||||||
;query = _ > _
|
|
||||||
|
|
||||||
;___OUTPUT SETTINGS___
|
|
||||||
;lines_threshold = 10000
|
|
||||||
;frequency_threshold = 0
|
|
||||||
association_measures = no
|
|
||||||
print_root = yes
|
|
||||||
nodes_number = yes
|
|
@ -1,97 +0,0 @@
|
|||||||
# Copyright 2019 CJVT
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import math
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
def create_output_string_form(tree):
|
|
||||||
return tree.form.get_value()
|
|
||||||
|
|
||||||
def create_output_string_deprel(tree):
|
|
||||||
return tree.deprel.get_value()
|
|
||||||
|
|
||||||
def create_output_string_lemma(tree):
|
|
||||||
return tree.lemma.get_value() if tree.lemma.get_value() is not None else '_'
|
|
||||||
|
|
||||||
def create_output_string_upos(tree):
|
|
||||||
return tree.upos.get_value()
|
|
||||||
|
|
||||||
def create_output_string_xpos(tree):
|
|
||||||
return tree.xpos.get_value()
|
|
||||||
|
|
||||||
def create_output_string_feats(tree):
|
|
||||||
return tree.feats.get_value()
|
|
||||||
|
|
||||||
def generate_key(node, create_output_strings, print_lemma=True):
|
|
||||||
array = [[create_output_string(node) for create_output_string in create_output_strings]]
|
|
||||||
if create_output_string_lemma in create_output_strings and print_lemma:
|
|
||||||
key_array = [[create_output_string(
|
|
||||||
node) if create_output_string != create_output_string_lemma else 'L=' + create_output_string(node) for
|
|
||||||
create_output_string in create_output_strings]]
|
|
||||||
else:
|
|
||||||
key_array = array
|
|
||||||
if len(array[0]) > 1:
|
|
||||||
key = '&'.join(key_array[0])
|
|
||||||
else:
|
|
||||||
key = key_array[0][0]
|
|
||||||
|
|
||||||
return array, key
|
|
||||||
|
|
||||||
def generate_name(node, create_output_strings, print_lemma=True):
|
|
||||||
array = [create_output_string(node) for create_output_string in create_output_strings]
|
|
||||||
if create_output_string_lemma in create_output_strings and print_lemma:
|
|
||||||
name_array = [create_output_string(
|
|
||||||
node) if create_output_string != create_output_string_lemma else 'L=' + create_output_string(node) for
|
|
||||||
create_output_string in create_output_strings]
|
|
||||||
else:
|
|
||||||
name_array = array
|
|
||||||
if len(array) > 1:
|
|
||||||
name = '&'.join(name_array)
|
|
||||||
else:
|
|
||||||
name = name_array[0]
|
|
||||||
|
|
||||||
return array, name
|
|
||||||
|
|
||||||
def get_collocabilities(ngram, unigrams_dict, corpus_size):
|
|
||||||
sum_fwi = 0.0
|
|
||||||
mul_fwi = 1.0
|
|
||||||
for key_array in ngram['object'].array:
|
|
||||||
# create key for unigrams
|
|
||||||
if len(key_array) > 1:
|
|
||||||
key = '&'.join(key_array)
|
|
||||||
else:
|
|
||||||
key = key_array[0]
|
|
||||||
sum_fwi += unigrams_dict[key]
|
|
||||||
mul_fwi *= unigrams_dict[key]
|
|
||||||
|
|
||||||
if mul_fwi < 0:
|
|
||||||
mul_fwi = sys.maxsize
|
|
||||||
|
|
||||||
# number of all words
|
|
||||||
N = corpus_size
|
|
||||||
|
|
||||||
# n of ngram
|
|
||||||
n = len(ngram['object'].array)
|
|
||||||
O = ngram['number']
|
|
||||||
E = mul_fwi / pow(N, n-1)
|
|
||||||
|
|
||||||
# ['MI', 'MI3', 'Dice', 'logDice', 't-score', 'simple-LL']
|
|
||||||
mi = math.log(O / E, 2)
|
|
||||||
mi3 = math.log(pow(O, 3) / E, 2)
|
|
||||||
dice = n * O / sum_fwi
|
|
||||||
logdice = 14 + math.log(dice, 2)
|
|
||||||
tscore = (O - E) / math.sqrt(O)
|
|
||||||
simplell = 2 * (O * math.log10(O / E) - (O - E))
|
|
||||||
return ['%.4f' % mi, '%.4f' % mi3, '%.4f' % dice, '%.4f' % logdice, '%.4f' % tscore, '%.4f' % simplell]
|
|
@ -1 +0,0 @@
|
|||||||
py -3 -m pip install -r requirements.txt &
|
|
Before Width: | Height: | Size: 76 KiB |
Before Width: | Height: | Size: 28 KiB |
Before Width: | Height: | Size: 44 KiB |
Before Width: | Height: | Size: 314 KiB |
Before Width: | Height: | Size: 128 KiB |
@ -1 +0,0 @@
|
|||||||
pyconll==3.1.0
|
|
@ -1,17 +0,0 @@
|
|||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
input_path = '/home/lukakrsnik/STARK/data/ud-treebanks-v2.11/'
|
|
||||||
output_path = '/home/lukakrsnik/STARK/results/ud-treebanks-v2.11_B/'
|
|
||||||
config_path = '/home/lukakrsnik/STARK/data/B_test-all-treebanks_3_completed_unlabeled_fixed_form_root=NOUN_5.ini'
|
|
||||||
|
|
||||||
for path in sorted(os.listdir(input_path)):
|
|
||||||
path_obj = Path(input_path, path)
|
|
||||||
pathlist = path_obj.glob('**/*.conllu')
|
|
||||||
for path in sorted(pathlist):
|
|
||||||
folder_name = os.path.join(output_path, path.parts[-2])
|
|
||||||
file_name = os.path.join(folder_name, path.name)
|
|
||||||
if not os.path.exists(folder_name):
|
|
||||||
os.makedirs(folder_name)
|
|
||||||
if not os.path.exists(file_name):
|
|
||||||
os.system("python /home/luka/Development/STARK/stark.py --config_file " + config_path + " --input " + str(path) + " --output " + file_name)
|
|
@ -1,2 +0,0 @@
|
|||||||
py -3 dependency-parsetree.py --config_file=config.ini &
|
|
||||||
@pause
|
|
@ -1,3 +0,0 @@
|
|||||||
source venv/bin/activate
|
|
||||||
python3 dependency-parsetree.py --config_file="$1"
|
|
||||||
deactivate
|
|
Loading…
Reference in new issue