Cleaning repo
This commit is contained in:
parent
36223d3a98
commit
421f12cac6
|
@ -18,17 +18,8 @@ from generic import generate_key, generate_name
|
||||||
class ResultNode(object):
|
class ResultNode(object):
|
||||||
def __init__(self, node, architecture_order, create_output_strings):
|
def __init__(self, node, architecture_order, create_output_strings):
|
||||||
self.name_parts, self.name = generate_name(node, create_output_strings)
|
self.name_parts, self.name = generate_name(node, create_output_strings)
|
||||||
# self.key_free = self.key
|
|
||||||
# self.array = [[output_string]]
|
|
||||||
# self.order_key = str(architecture_order)
|
|
||||||
self.location = architecture_order
|
self.location = architecture_order
|
||||||
self.deprel = node.deprel.get_value()
|
self.deprel = node.deprel.get_value()
|
||||||
# order with original numbers in sentences
|
|
||||||
# self.order = str([architecture_order])
|
|
||||||
# order with numbers from 0 to n of n-gram
|
|
||||||
# self.root = ''
|
|
||||||
# self.final_order = ''
|
|
||||||
# self.separators = []
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return self.name
|
return self.name
|
||||||
|
|
|
@ -15,9 +15,6 @@
|
||||||
import copy
|
import copy
|
||||||
import string
|
import string
|
||||||
|
|
||||||
from generic import create_output_string_form, create_output_string_deprel, create_output_string_lemma, \
|
|
||||||
create_output_string_upos, create_output_string_xpos, create_output_string_feats, generate_key
|
|
||||||
|
|
||||||
|
|
||||||
class ResultTree(object):
|
class ResultTree(object):
|
||||||
def __init__(self, node, children, filters):
|
def __init__(self, node, children, filters):
|
||||||
|
@ -51,7 +48,6 @@ class ResultTree(object):
|
||||||
for child in children:
|
for child in children:
|
||||||
if self.filters['node_order'] and child.node.location < self.node.location:
|
if self.filters['node_order'] and child.node.location < self.node.location:
|
||||||
if self.filters['dependency_type']:
|
if self.filters['dependency_type']:
|
||||||
# separator = ' <' + deprel[i_child][i_answer] + ' '
|
|
||||||
separator = ' <' + child.node.deprel + ' '
|
separator = ' <' + child.node.deprel + ' '
|
||||||
else:
|
else:
|
||||||
separator = ' < '
|
separator = ' < '
|
||||||
|
@ -104,7 +100,6 @@ class ResultTree(object):
|
||||||
for child in self.children:
|
for child in self.children:
|
||||||
if self.filters['node_order'] and child.node.location < self.node.location:
|
if self.filters['node_order'] and child.node.location < self.node.location:
|
||||||
if self.filters['dependency_type']:
|
if self.filters['dependency_type']:
|
||||||
# separator = ' <' + deprel[i_child][i_answer] + ' '
|
|
||||||
separator = ' <' + child.node.deprel + ' '
|
separator = ' <' + child.node.deprel + ' '
|
||||||
else:
|
else:
|
||||||
separator = ' < '
|
separator = ' < '
|
||||||
|
@ -169,93 +164,9 @@ class ResultTree(object):
|
||||||
self.array = [self.node.name_parts]
|
self.array = [self.node.name_parts]
|
||||||
return self.array
|
return self.array
|
||||||
|
|
||||||
# def add(self, string, architecture_order, separator, is_left):
|
|
||||||
# if is_left:
|
|
||||||
# self.array = [string] + self.array
|
|
||||||
# self.order = [architecture_order] + self.order
|
|
||||||
# # self.order = [architecture_order] + self.order
|
|
||||||
# self.separators = [separator] + self.separators
|
|
||||||
# self.key = string + ' ' + separator + ' ' + self.key
|
|
||||||
# self.order_key = architecture_order + ' ' + separator + ' ' + self.order_key
|
|
||||||
#
|
|
||||||
# else:
|
|
||||||
# self.array += [string]
|
|
||||||
# self.order += [architecture_order]
|
|
||||||
# # self.order += [architecture_order]
|
|
||||||
# self.separators += [separator]
|
|
||||||
#
|
|
||||||
# self.key += ' ' + separator + ' ' + string
|
|
||||||
# self.order_key += ' ' + separator + ' ' + architecture_order
|
|
||||||
|
|
||||||
# def add_separator(self, separator, left=True):
|
|
||||||
# self_copy = copy.copy(self)
|
|
||||||
# if left:
|
|
||||||
# self_copy.separators += [separator]
|
|
||||||
# self_copy.key += separator
|
|
||||||
# self_copy.order_key += separator
|
|
||||||
# else:
|
|
||||||
# self_copy.separators = [separator] + self_copy.separators
|
|
||||||
# self_copy.key = separator + self_copy.key
|
|
||||||
# self_copy.order_key = separator + self_copy.order_key
|
|
||||||
# return self_copy
|
|
||||||
|
|
||||||
# def merge_results2(self):
|
|
||||||
|
|
||||||
|
|
||||||
# def merge_results(self, right_t, separator, left=True):
|
|
||||||
# left_tree = copy.copy(self)
|
|
||||||
# right_tree = copy.copy(right_t)
|
|
||||||
#
|
|
||||||
# if separator:
|
|
||||||
# if left:
|
|
||||||
# # merged_results.append(left_part + right_part + separator)
|
|
||||||
# left_tree.key = left_tree.key + right_tree.key + separator
|
|
||||||
# left_tree.order_key = left_tree.order_key + right_tree.order_key + separator
|
|
||||||
# left_tree.array = left_tree.array + right_tree.array
|
|
||||||
# left_tree.order = left_tree.order + right_tree.order
|
|
||||||
# # left_tree.order = str([architecture_order])
|
|
||||||
# left_tree.separators = left_tree.separators + right_tree.separators + [separator]
|
|
||||||
# else:
|
|
||||||
# # merged_results.append(left_part + separator + right_part)
|
|
||||||
# left_tree.key = left_tree.key + separator + right_tree.key
|
|
||||||
# left_tree.order_key = left_tree.order_key + separator + right_tree.order_key
|
|
||||||
# left_tree.array = left_tree.array + right_tree.array
|
|
||||||
# left_tree.order = left_tree.order + right_tree.order
|
|
||||||
# # left_tree.order = str([architecture_order])
|
|
||||||
# left_tree.separators = left_tree.separators + [separator] + right_tree.separators
|
|
||||||
# else:
|
|
||||||
# # merged_results.append(left_part + right_part)
|
|
||||||
# left_tree.key = left_tree.key + right_tree.key
|
|
||||||
# left_tree.order_key = left_tree.order_key + right_tree.order_key
|
|
||||||
# left_tree.array = left_tree.array + right_tree.array
|
|
||||||
# left_tree.order = left_tree.order + right_tree.order
|
|
||||||
# # left_tree.order = str([architecture_order])
|
|
||||||
# left_tree.separators = left_tree.separators + right_tree.separators
|
|
||||||
#
|
|
||||||
# return left_tree
|
|
||||||
|
|
||||||
# def extend_answer(self, other_answer, separator):
|
|
||||||
# self.array.extend(other_answer.array)
|
|
||||||
# self.order.extend(other_answer.order)
|
|
||||||
# self.key += separator + other_answer.key
|
|
||||||
# self.order_key += separator + other_answer.order_key
|
|
||||||
# self.separators.extend(separator)
|
|
||||||
|
|
||||||
# def put_in_bracelets(self, inplace=False):
|
|
||||||
# if inplace:
|
|
||||||
# self.key = ('(' + self.key + ')')
|
|
||||||
# self.order_key = ('(' + self.order_key + ')')
|
|
||||||
# return
|
|
||||||
# result = copy.copy(self)
|
|
||||||
# result.key = ('(' + result.key + ')')
|
|
||||||
# result.order_key = ('(' + result.order_key + ')')
|
|
||||||
# return result
|
|
||||||
|
|
||||||
def finalize_result(self):
|
def finalize_result(self):
|
||||||
result = copy.copy(self)
|
result = copy.copy(self)
|
||||||
result.reset_params()
|
result.reset_params()
|
||||||
# result.key = result.get_key()
|
|
||||||
# result.set_root()
|
|
||||||
|
|
||||||
# create order letters
|
# create order letters
|
||||||
order = result.get_order()
|
order = result.get_order()
|
||||||
|
@ -265,13 +176,5 @@ class ResultTree(object):
|
||||||
order[ind] = 10000
|
order[ind] = 10000
|
||||||
order_letters[ind] = string.ascii_uppercase[i]
|
order_letters[ind] = string.ascii_uppercase[i]
|
||||||
result.order = ''.join(order_letters)
|
result.order = ''.join(order_letters)
|
||||||
# result.order_key = result.order_key[1:-1]
|
|
||||||
# TODO When tree is finalized create relative word order (alphabet)!
|
# TODO When tree is finalized create relative word order (alphabet)!
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# def set_root(self):
|
|
||||||
# if len(self.array[0]) > 1:
|
|
||||||
# self.root = '&'.join(self.array[0])
|
|
||||||
# else:
|
|
||||||
# # output_string = create_output_strings[0](node)
|
|
||||||
# self.root = self.array[0][0]
|
|
351
Tree.py
351
Tree.py
|
@ -4,8 +4,7 @@ from copy import copy
|
||||||
from ResultNode import ResultNode
|
from ResultNode import ResultNode
|
||||||
from ResultTree import ResultTree
|
from ResultTree import ResultTree
|
||||||
from Value import Value
|
from Value import Value
|
||||||
from generic import create_output_string_form, create_output_string_deprel, create_output_string_lemma, \
|
from generic import generate_key
|
||||||
create_output_string_upos, create_output_string_xpos, create_output_string_feats, generate_key
|
|
||||||
|
|
||||||
|
|
||||||
class Tree(object):
|
class Tree(object):
|
||||||
|
@ -13,7 +12,6 @@ class Tree(object):
|
||||||
if not hasattr(self, 'feats'):
|
if not hasattr(self, 'feats'):
|
||||||
self.feats_detailed = {}
|
self.feats_detailed = {}
|
||||||
|
|
||||||
# form_unicode = str(form).encode("utf-8")
|
|
||||||
if form not in form_dict:
|
if form not in form_dict:
|
||||||
form_dict[form] = Value(form)
|
form_dict[form] = Value(form)
|
||||||
self.form = form_dict[form]
|
self.form = form_dict[form]
|
||||||
|
@ -40,7 +38,6 @@ class Tree(object):
|
||||||
if not feat in self.feats_detailed:
|
if not feat in self.feats_detailed:
|
||||||
self.feats_detailed[feat] = {}
|
self.feats_detailed[feat] = {}
|
||||||
self.feats_detailed[feat][next(iter(feats_detailed[feat]))] = feats_detailed_dict[feat][next(iter(feats_detailed[feat]))]
|
self.feats_detailed[feat][next(iter(feats_detailed[feat]))] = feats_detailed_dict[feat][next(iter(feats_detailed[feat]))]
|
||||||
# self.position = position
|
|
||||||
|
|
||||||
self.parent = head
|
self.parent = head
|
||||||
self.children = []
|
self.children = []
|
||||||
|
@ -52,7 +49,6 @@ class Tree(object):
|
||||||
self.cache = {}
|
self.cache = {}
|
||||||
|
|
||||||
def add_child(self, child):
|
def add_child(self, child):
|
||||||
# child.index = len(self.children)
|
|
||||||
self.children.append(child)
|
self.children.append(child)
|
||||||
|
|
||||||
def set_parent(self, parent):
|
def set_parent(self, parent):
|
||||||
|
@ -68,7 +64,6 @@ class Tree(object):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def fits_permanent_requirements(self, filters):
|
def fits_permanent_requirements(self, filters):
|
||||||
main_attributes = ['deprel', 'feats', 'form', 'lemma', 'upos']
|
main_attributes = ['deprel', 'feats', 'form', 'lemma', 'upos']
|
||||||
|
|
||||||
|
@ -121,8 +116,6 @@ class Tree(object):
|
||||||
if result_index in partial_results and result_part_index in partial_results[result_index] and len(partial_results[result_index][result_part_index]) > 0:
|
if result_index in partial_results and result_part_index in partial_results[result_index] and len(partial_results[result_index][result_part_index]) > 0:
|
||||||
if len(all_query_indices[result_index][0]) > result_part_index + 1:
|
if len(all_query_indices[result_index][0]) > result_part_index + 1:
|
||||||
new_queries.append((result_part_index + 1, result_index, is_permanent))
|
new_queries.append((result_part_index + 1, result_index, is_permanent))
|
||||||
# else:
|
|
||||||
# completed_subtrees.append((child, result_index))
|
|
||||||
|
|
||||||
child_queries_metadata = new_queries
|
child_queries_metadata = new_queries
|
||||||
|
|
||||||
|
@ -142,76 +135,11 @@ class Tree(object):
|
||||||
def add_subtrees(self, old_subtree, new_subtree):
|
def add_subtrees(self, old_subtree, new_subtree):
|
||||||
old_subtree.extend(new_subtree)
|
old_subtree.extend(new_subtree)
|
||||||
|
|
||||||
|
|
||||||
# def get_results(self, partial_results_dict, result_index, result_part, outcome, last_result_part):
|
|
||||||
# # save results for later usage
|
|
||||||
#
|
|
||||||
# # if result index already in and element 0 exists (otherwise error)
|
|
||||||
# if result_index in partial_results_dict and 0 in partial_results_dict[result_index]:
|
|
||||||
# if result_part - 1 in partial_results_dict[result_index]:
|
|
||||||
# if result_part in partial_results_dict[result_index]:
|
|
||||||
# partial_results_dict[result_index][result_part].extend(self.merge_results(partial_results_dict[result_index][result_part - 1], outcome))
|
|
||||||
# else:
|
|
||||||
# partial_results_dict[result_index][result_part] = self.merge_results(partial_results_dict[result_index][result_part - 1], outcome)
|
|
||||||
#
|
|
||||||
# # extend one word layer with output
|
|
||||||
# else:
|
|
||||||
# partial_results_dict[result_index][0].extend(outcome)
|
|
||||||
# else:
|
|
||||||
# partial_results_dict[result_index] = {0: outcome}
|
|
||||||
#
|
|
||||||
# if last_result_part - 1 in partial_results_dict[result_index]:
|
|
||||||
# return partial_results_dict[result_index].pop(last_result_part - 1)
|
|
||||||
# return []
|
|
||||||
|
|
||||||
# def group_results(self, new_partial_subtrees, child_queries_metadata, all_query_indices, partial_results_dict, partial_subtrees):
|
|
||||||
# for outcome, (result_part, result_index, is_permanent) in zip(new_partial_subtrees, child_queries_metadata):
|
|
||||||
# if outcome:
|
|
||||||
# new_results = self.get_results(partial_results_dict, result_index, result_part, outcome, len(all_query_indices[result_index][0]))
|
|
||||||
# if new_results:
|
|
||||||
# self.add_subtrees(partial_subtrees[result_index], new_results)
|
|
||||||
# else:
|
|
||||||
# if not is_permanent:
|
|
||||||
# partial_subtrees[result_index].append([])
|
|
||||||
|
|
||||||
# def get_all_query_indices_old(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children, create_output_string):
|
|
||||||
# partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)]
|
|
||||||
# completed_subtrees = [[] for i in range(completed_subtrees_size)]
|
|
||||||
#
|
|
||||||
# # list of pairs (index of query in group, group of query)
|
|
||||||
# partial_results_dict = {}
|
|
||||||
#
|
|
||||||
# children_queries_generator = self.generate_children_queries(l_all_query_indices, children)
|
|
||||||
#
|
|
||||||
# child_index = 0
|
|
||||||
# child, child_queries, child_queries_metadata = next(children_queries_generator)
|
|
||||||
# while child:
|
|
||||||
# # obtain children results
|
|
||||||
# new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string)
|
|
||||||
#
|
|
||||||
# self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices,
|
|
||||||
# partial_results_dict, partial_subtrees)
|
|
||||||
#
|
|
||||||
# for i in range(len(new_completed_subtrees)):
|
|
||||||
# completed_subtrees[i].extend(new_completed_subtrees[i])
|
|
||||||
# child, child_queries, child_queries_metadata = children_queries_generator.send(partial_results_dict)
|
|
||||||
# child_index += 1
|
|
||||||
#
|
|
||||||
# return partial_subtrees, completed_subtrees
|
|
||||||
|
|
||||||
def get_all_query_indices(self, temporary_query_nb, permanent_query_nb, permanent_query_trees, all_query_indices, children, create_output_string, filters):
|
def get_all_query_indices(self, temporary_query_nb, permanent_query_nb, permanent_query_trees, all_query_indices, children, create_output_string, filters):
|
||||||
# l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees),
|
|
||||||
# len(permanent_query_trees),
|
|
||||||
# permanent_query_trees,
|
|
||||||
# l_all_query_indices, self.l_children,
|
|
||||||
# create_output_string)
|
|
||||||
partial_answers = [[] for i in range(permanent_query_nb + temporary_query_nb)]
|
partial_answers = [[] for i in range(permanent_query_nb + temporary_query_nb)]
|
||||||
partial_answers_index = [[] for i in range(permanent_query_nb + temporary_query_nb)]
|
|
||||||
complete_answers = [[] for i in range(permanent_query_nb)]
|
complete_answers = [[] for i in range(permanent_query_nb)]
|
||||||
|
|
||||||
# list of pairs (index of query in group, group of query)
|
# list of pairs (index of query in group, group of query)
|
||||||
partial_results_dict = {}
|
|
||||||
|
|
||||||
# TODO try to erase!!!
|
# TODO try to erase!!!
|
||||||
child_queries = [all_query_indice[0] for all_query_indice in all_query_indices]
|
child_queries = [all_query_indice[0] for all_query_indice in all_query_indices]
|
||||||
|
|
||||||
|
@ -221,8 +149,6 @@ class Tree(object):
|
||||||
|
|
||||||
all_new_partial_answers = [[] for query_part in child_queries_flatten]
|
all_new_partial_answers = [[] for query_part in child_queries_flatten]
|
||||||
|
|
||||||
# if filters['caching']:
|
|
||||||
# erase duplicate queries
|
|
||||||
child_queries_flatten_dedup = []
|
child_queries_flatten_dedup = []
|
||||||
child_queries_flatten_dedup_indices = []
|
child_queries_flatten_dedup_indices = []
|
||||||
for query_part in child_queries_flatten:
|
for query_part in child_queries_flatten:
|
||||||
|
@ -237,7 +163,6 @@ class Tree(object):
|
||||||
# ask children all queries/partial queries
|
# ask children all queries/partial queries
|
||||||
for child in children:
|
for child in children:
|
||||||
# obtain children results
|
# obtain children results
|
||||||
# if filters['caching']:
|
|
||||||
new_partial_answers_dedup, new_complete_answers = child.get_subtrees(permanent_query_trees, child_queries_flatten_dedup,
|
new_partial_answers_dedup, new_complete_answers = child.get_subtrees(permanent_query_trees, child_queries_flatten_dedup,
|
||||||
create_output_string, filters)
|
create_output_string, filters)
|
||||||
|
|
||||||
|
@ -247,32 +172,10 @@ class Tree(object):
|
||||||
for i, flattened_index in enumerate(child_queries_flatten_dedup_indices):
|
for i, flattened_index in enumerate(child_queries_flatten_dedup_indices):
|
||||||
all_new_partial_answers[i].append(new_partial_answers_dedup[flattened_index])
|
all_new_partial_answers[i].append(new_partial_answers_dedup[flattened_index])
|
||||||
|
|
||||||
# else:
|
|
||||||
# new_partial_answers_architecture, new_partial_answers, new_complete_answers = child.get_subtrees(
|
|
||||||
# permanent_query_trees, child_queries_flatten,
|
|
||||||
# create_output_string, filters)
|
|
||||||
#
|
|
||||||
# assert len(new_partial_answers) == len(child_queries_flatten)
|
|
||||||
#
|
|
||||||
# for i, new_partial_subtree in enumerate(new_partial_answers):
|
|
||||||
# all_new_partial_answers[i].append(new_partial_subtree)
|
|
||||||
# all_new_partial_answers_architecture[i].append(new_partial_answers_architecture[i])
|
|
||||||
# # if len(new_partial_answers_architecture[i]) > 1:
|
|
||||||
# # print('HERE!!!')
|
|
||||||
# all_new_partial_answers_deprel[i].append(create_output_string_deprel(child))
|
|
||||||
|
|
||||||
# add 6 queries from 3 split up
|
|
||||||
# self.group_results(new_partial_subtrees, child_queries_metadata, all_query_indices,
|
|
||||||
# partial_results_dict, partial_subtrees)
|
|
||||||
|
|
||||||
for i in range(len(new_complete_answers)):
|
for i in range(len(new_complete_answers)):
|
||||||
# TODO add order rearagement (TO KEY)
|
# TODO add order rearagement (TO KEY)
|
||||||
complete_answers[i].extend(new_complete_answers[i])
|
complete_answers[i].extend(new_complete_answers[i])
|
||||||
|
|
||||||
# if create_output_string_lemma(self) == 'drama':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
# if create_output_string_form(self) == 'vpiti':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
# merge answers in appropriate way
|
# merge answers in appropriate way
|
||||||
i = 0
|
i = 0
|
||||||
# iterate over all answers per queries
|
# iterate over all answers per queries
|
||||||
|
@ -280,60 +183,14 @@ class Tree(object):
|
||||||
# iterate over answers of query
|
# iterate over answers of query
|
||||||
# TODO ERROR IN HERE!
|
# TODO ERROR IN HERE!
|
||||||
partial_answers[answer_i] = self.create_answers(all_new_partial_answers[i:i + answer_length], answer_length, filters)
|
partial_answers[answer_i] = self.create_answers(all_new_partial_answers[i:i + answer_length], answer_length, filters)
|
||||||
# while i < answers_length:
|
|
||||||
# self.create_grouped_answers()
|
|
||||||
# i += 1
|
|
||||||
i += answer_length
|
i += answer_length
|
||||||
|
|
||||||
# merged_results = []
|
|
||||||
# for old_result in old_results:
|
|
||||||
# for new_result in new_results:
|
|
||||||
# merged_results.append(old_result + new_result)
|
|
||||||
# return merged_results
|
|
||||||
# children_queries_generator = self.generate_children_queries(all_query_indices, children)
|
|
||||||
#
|
|
||||||
# child_index = 0
|
|
||||||
# child, child_queries, child_queries_metadata = next(children_queries_generator)
|
|
||||||
# while child:
|
|
||||||
# # obtain children results
|
|
||||||
# new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string)
|
|
||||||
#
|
|
||||||
# self.group_results(new_partial_subtrees, child_queries_metadata, all_query_indices,
|
|
||||||
# partial_results_dict, partial_subtrees)
|
|
||||||
#
|
|
||||||
# for i in range(len(new_completed_subtrees)):
|
|
||||||
# completed_subtrees[i].extend(new_completed_subtrees[i])
|
|
||||||
# child, child_queries, child_queries_metadata = children_queries_generator.send(partial_results_dict)
|
|
||||||
# child_index += 1
|
|
||||||
|
|
||||||
return partial_answers, complete_answers
|
return partial_answers, complete_answers
|
||||||
|
|
||||||
def order_dependent_queries(self, active_permanent_query_trees, active_temporary_query_trees, partial_subtrees,
|
def order_dependent_queries(self, active_permanent_query_trees, active_temporary_query_trees, partial_subtrees,
|
||||||
create_output_string, merged_partial_subtrees, i_query, i_answer, filters):
|
create_output_string, merged_partial_subtrees, i_query, i_answer, filters):
|
||||||
# string_output = ''
|
|
||||||
# if create_output_string_form(self) == 'vožnji':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
# if create_output_string_form(self) == 'začelo':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
node = ResultNode(self, self.index, create_output_string)
|
node = ResultNode(self, self.index, create_output_string)
|
||||||
|
|
||||||
# TEST = ResultTree(node, [], filters)
|
|
||||||
# a = TEST.create_key()
|
|
||||||
# if i_query < len(active_permanent_query_trees):
|
|
||||||
# if 'children' in active_permanent_query_trees[i_query]:
|
|
||||||
# merged_partial_subtrees.append(
|
|
||||||
# self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters))
|
|
||||||
# i_answer += 1
|
|
||||||
# else:
|
|
||||||
# merged_partial_subtrees.append([Result(self, self.index, create_output_string)])
|
|
||||||
# else:
|
|
||||||
# if 'children' in active_temporary_query_trees[i_query - len(active_permanent_query_trees)]:
|
|
||||||
# merged_partial_subtrees.append(
|
|
||||||
# self.create_output_children(partial_subtrees[i_answer], [Result(self, self.index, create_output_string)], filters))
|
|
||||||
# i_answer += 1
|
|
||||||
# else:
|
|
||||||
# merged_partial_subtrees.append([Result(self, self.index, create_output_string)])
|
|
||||||
|
|
||||||
if i_query < len(active_permanent_query_trees):
|
if i_query < len(active_permanent_query_trees):
|
||||||
if 'children' in active_permanent_query_trees[i_query]:
|
if 'children' in active_permanent_query_trees[i_query]:
|
||||||
merged_partial_subtrees.append(
|
merged_partial_subtrees.append(
|
||||||
|
@ -366,9 +223,6 @@ class Tree(object):
|
||||||
:param temporary_query_trees:
|
:param temporary_query_trees:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# if create_output_string_form(self) == 'vožnji':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
|
|
||||||
# list of all children queries grouped by parent queries
|
# list of all children queries grouped by parent queries
|
||||||
all_query_indices = []
|
all_query_indices = []
|
||||||
|
|
||||||
|
@ -384,7 +238,6 @@ class Tree(object):
|
||||||
successful_temporary_queries = []
|
successful_temporary_queries = []
|
||||||
for i, temporary_query_tree in enumerate(temporary_query_trees):
|
for i, temporary_query_tree in enumerate(temporary_query_trees):
|
||||||
if self.fits_static_requirements(temporary_query_tree, filters) and self.fits_temporary_requirements(filters):
|
if self.fits_static_requirements(temporary_query_tree, filters) and self.fits_temporary_requirements(filters):
|
||||||
# if 'l_children' in temporary_query_tree and 'r_children' in temporary_query_tree:
|
|
||||||
active_temporary_query_trees.append(temporary_query_tree)
|
active_temporary_query_trees.append(temporary_query_tree)
|
||||||
successful_temporary_queries.append(i)
|
successful_temporary_queries.append(i)
|
||||||
if 'children' in temporary_query_tree:
|
if 'children' in temporary_query_tree:
|
||||||
|
@ -397,7 +250,6 @@ class Tree(object):
|
||||||
create_output_string, filters)
|
create_output_string, filters)
|
||||||
|
|
||||||
merged_partial_answers = []
|
merged_partial_answers = []
|
||||||
# merged_partial_answers_architecture = []
|
|
||||||
i_question = 0
|
i_question = 0
|
||||||
# i_child is necessary, because some queries may be answered at the beginning and were not passed to children.
|
# i_child is necessary, because some queries may be answered at the beginning and were not passed to children.
|
||||||
# i_child is used to point where we are inside answers
|
# i_child is used to point where we are inside answers
|
||||||
|
@ -414,82 +266,30 @@ class Tree(object):
|
||||||
# TODO FINALIZE RESULT
|
# TODO FINALIZE RESULT
|
||||||
# erase first and last braclets when adding new query result
|
# erase first and last braclets when adding new query result
|
||||||
add_subtree = [subtree.finalize_result() for subtree in merged_partial_answers[i]]
|
add_subtree = [subtree.finalize_result() for subtree in merged_partial_answers[i]]
|
||||||
# if 0 < len(active_permanent_query_trees):
|
|
||||||
complete_answers[i].extend(add_subtree)
|
complete_answers[i].extend(add_subtree)
|
||||||
# completed_subtrees[i].extend(merged_partial_subtrees[i])
|
|
||||||
|
|
||||||
# answers to valid queries
|
# answers to valid queries
|
||||||
partial_answers = [[] for i in range(len(temporary_query_trees))]
|
partial_answers = [[] for i in range(len(temporary_query_trees))]
|
||||||
for inside_i, outside_i in enumerate(successful_temporary_queries):
|
for inside_i, outside_i in enumerate(successful_temporary_queries):
|
||||||
# partial_answers_architecture[outside_i] = merged_partial_answers_architecture[len(active_permanent_query_trees) + inside_i]
|
|
||||||
partial_answers[outside_i] = merged_partial_answers[
|
partial_answers[outside_i] = merged_partial_answers[
|
||||||
len(active_permanent_query_trees) + inside_i]
|
len(active_permanent_query_trees) + inside_i]
|
||||||
|
|
||||||
# return subtrees_architecture, subtrees, completed_subtrees
|
|
||||||
return partial_answers, complete_answers
|
return partial_answers, complete_answers
|
||||||
# return merged_partial_subtrees_architecture[len(active_permanent_query_trees):], merged_partial_subtrees[len(active_permanent_query_trees):], completed_subtrees
|
|
||||||
|
|
||||||
# @staticmethod
|
|
||||||
# def merge_results(left_parts, right_parts, separator, left=True, right_part_free=False):
|
|
||||||
# if not left_parts:
|
|
||||||
# # return all right_parts
|
|
||||||
# return [r_p.add_separator(separator, left) for r_p in right_parts]
|
|
||||||
# # if left:
|
|
||||||
# # return [r_p + separator for r_p in right_parts]
|
|
||||||
# # # return [r_p.add_separator(separator, left) for r_p in right_parts]
|
|
||||||
# # else:
|
|
||||||
# # return [separator + r_p for r_p in right_parts]
|
|
||||||
#
|
|
||||||
# if not right_parts:
|
|
||||||
# return [l_p.add_separator(separator, False) for l_p in left_parts]
|
|
||||||
# # return [separator + l_p for l_p in left_parts]
|
|
||||||
# merged_results = []
|
|
||||||
# for left_part in left_parts:
|
|
||||||
# if right_part_free:
|
|
||||||
# for right_part in right_parts[1]:
|
|
||||||
# merged_results.append((right_parts[0], left_part.merge_results(right_part, separator, left)))
|
|
||||||
# else:
|
|
||||||
# for right_part in right_parts:
|
|
||||||
# merged_results.append(left_part.merge_results(right_part, separator, left))
|
|
||||||
# # merged_results.append(left_part.merge_results(right_part, separator))
|
|
||||||
# # if separator:
|
|
||||||
# # if left:
|
|
||||||
# # merged_results.append(left_part + right_part + separator)
|
|
||||||
# # else:
|
|
||||||
# # merged_results.append(left_part + separator + right_part)
|
|
||||||
# # else:
|
|
||||||
# # merged_results.append(left_part + right_part)
|
|
||||||
# return merged_results
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_children_groups(left_parts, right_parts):
|
def create_children_groups(left_parts, right_parts):
|
||||||
if not left_parts:
|
if not left_parts:
|
||||||
# return all right_parts
|
|
||||||
return right_parts
|
return right_parts
|
||||||
# if left:
|
|
||||||
# return [r_p + separator for r_p in right_parts]
|
|
||||||
# # return [r_p.add_separator(separator, left) for r_p in right_parts]
|
|
||||||
# else:
|
|
||||||
# return [separator + r_p for r_p in right_parts]
|
|
||||||
|
|
||||||
if not right_parts:
|
if not right_parts:
|
||||||
return left_parts
|
return left_parts
|
||||||
# return [separator + l_p for l_p in left_parts]
|
|
||||||
all_children_group_possibilities = []
|
all_children_group_possibilities = []
|
||||||
for left_part in left_parts:
|
for left_part in left_parts:
|
||||||
for right_part in right_parts:
|
for right_part in right_parts:
|
||||||
new_part = copy(left_part)
|
new_part = copy(left_part)
|
||||||
# new_part.reset_params()
|
|
||||||
new_part.extend(right_part)
|
new_part.extend(right_part)
|
||||||
all_children_group_possibilities.append(new_part)
|
all_children_group_possibilities.append(new_part)
|
||||||
# merged_results.append(left_part.merge_results(right_part, separator))
|
|
||||||
# if separator:
|
|
||||||
# if left:
|
|
||||||
# merged_results.append(left_part + right_part + separator)
|
|
||||||
# else:
|
|
||||||
# merged_results.append(left_part + separator + right_part)
|
|
||||||
# else:
|
|
||||||
# merged_results.append(left_part + right_part)
|
|
||||||
return all_children_group_possibilities
|
return all_children_group_possibilities
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -500,112 +300,15 @@ class Tree(object):
|
||||||
for answer2p_i, new_result in enumerate(answer2):
|
for answer2p_i, new_result in enumerate(answer2):
|
||||||
if answer1p_i != answer2p_i:
|
if answer1p_i != answer2p_i:
|
||||||
new_indices = [answer1p_i] + [answer2p_i]
|
new_indices = [answer1p_i] + [answer2p_i]
|
||||||
sorted_indices = sorted(new_indices)
|
|
||||||
|
|
||||||
if sorted_indices in merged_indices:
|
|
||||||
test = merged_indices.index(sorted(new_indices))
|
|
||||||
# TODO add comparison answers with different indices if equal than ignore
|
# TODO add comparison answers with different indices if equal than ignore
|
||||||
merged_results.append(old_result + new_result)
|
merged_results.append(old_result + new_result)
|
||||||
merged_indices.append(new_indices)
|
merged_indices.append(new_indices)
|
||||||
return merged_results, merged_indices
|
return merged_results, merged_indices
|
||||||
|
|
||||||
# def merge_results2(self, child, new_results, filters):
|
|
||||||
# if create_output_string_form(self) == 'začelo':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
# if create_output_string_form(self) == 'Dogodek':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
# if create_output_string_form(self) == 'utišal':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
# if create_output_string_form(self) == 'prijel':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
# if filters['node_order']:
|
|
||||||
# new_child = child
|
|
||||||
# # new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key)
|
|
||||||
# else:
|
|
||||||
# new_child = sorted(child, key=lambda x: x[0].key)
|
|
||||||
#
|
|
||||||
# l_res = []
|
|
||||||
# r_res = []
|
|
||||||
# results = []
|
|
||||||
# for i_answer, answer in enumerate(new_child):
|
|
||||||
# if filters['node_order'] and answer[0].order[0] < self.index:
|
|
||||||
# # if filters['node_order'] and indices[i_child][i_answer] < self.children_split:
|
|
||||||
# if filters['dependency_type']:
|
|
||||||
# # separator = ' <' + deprel[i_child][i_answer] + ' '
|
|
||||||
# separator = ' <' + answer[0].deprel + ' '
|
|
||||||
# else:
|
|
||||||
# separator = ' < '
|
|
||||||
# l_res = self.merge_results(l_res, answer, separator, left=True)
|
|
||||||
# # l_res += answer + separator
|
|
||||||
# else:
|
|
||||||
# if filters['dependency_type']:
|
|
||||||
# separator = ' >' + answer[0].deprel + ' '
|
|
||||||
# else:
|
|
||||||
# separator = ' > '
|
|
||||||
# r_res = self.merge_results(r_res, answer, separator, left=False)
|
|
||||||
# # r_res += separator + answer
|
|
||||||
#
|
|
||||||
# # if filters['node_order']:
|
|
||||||
# # r_res_sorted = []
|
|
||||||
# # for i_answer, answer in new_child_sorted:
|
|
||||||
# # if filters['dependency_type']:
|
|
||||||
# # separator = ' >' + answer[0].deprel + ' '
|
|
||||||
# # else:
|
|
||||||
# # separator = ' > '
|
|
||||||
# # r_res_sorted = (i_answer, self.merge_results(r_res_sorted, answer, separator, left=False))
|
|
||||||
# #
|
|
||||||
# #
|
|
||||||
# # r_res_sorted_combined = self.merge_results(new_results, r_res_sorted, None, right_part_free=True)
|
|
||||||
# # # print('here')
|
|
||||||
#
|
|
||||||
# if l_res:
|
|
||||||
# l_res_combined = self.merge_results(l_res, new_results, None)
|
|
||||||
# if r_res:
|
|
||||||
# r_res_combined = self.merge_results(l_res_combined, r_res, None)
|
|
||||||
# # merged_results.extend(['(' + el + ')' for el in r_res_combined])
|
|
||||||
# result = r_res_combined
|
|
||||||
# # results.extend([el.put_in_bracelets() for el in r_res_combined])
|
|
||||||
# else:
|
|
||||||
# result = l_res_combined
|
|
||||||
# # results.extend([el.put_in_bracelets() for el in l_res_combined])
|
|
||||||
# elif r_res:
|
|
||||||
# r_res_combined = self.merge_results(new_results, r_res, None)
|
|
||||||
# result = r_res_combined
|
|
||||||
# # results.extend([el.put_in_bracelets() for el in r_res_combined])
|
|
||||||
# else:
|
|
||||||
# result = []
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# results.extend([el.put_in_bracelets() for el in result])
|
|
||||||
#
|
|
||||||
# return results
|
|
||||||
|
|
||||||
# def create_merged_results(self, answers, separators, separator_switch):
|
|
||||||
# new_answers = []
|
|
||||||
# for answer_i, answer in enumerate(answers):
|
|
||||||
# new_answer = copy(answer[0])
|
|
||||||
# print(create_output_string_form(self))
|
|
||||||
# for answer_part_i, answer_part in enumerate(answer[1:]):
|
|
||||||
# new_answer.extend_answer(answer_part, separators[answer_part_i])
|
|
||||||
# new_answer.put_in_bracelets(inplace=True)
|
|
||||||
# new_answers.append(new_answer)
|
|
||||||
# return new_answers
|
|
||||||
# def create_merged_results(self, new_child, new_answers, i_child, indices, deprel, filters):
|
|
||||||
|
|
||||||
def merge_results3(self, child, new_results, filters):
|
def merge_results3(self, child, new_results, filters):
|
||||||
# if create_output_string_form(self) == 'Dogodek':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
# if create_output_string_form(self) == 'začelo':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
# if create_output_string_form(self) == 'utišal':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
# if create_output_string_form(self) == 'prijel':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
|
|
||||||
if filters['node_order']:
|
if filters['node_order']:
|
||||||
new_child = child
|
new_child = child
|
||||||
# new_child_sorted = sorted(enumerate(child), key=lambda x: x[1][0].key)
|
|
||||||
# new_child_sorted = sorted(child, key=lambda x: x[0].get_key())
|
|
||||||
else:
|
else:
|
||||||
new_child = sorted(child, key=lambda x: x[0].get_key())
|
new_child = sorted(child, key=lambda x: x[0].get_key())
|
||||||
|
|
||||||
|
@ -613,58 +316,28 @@ class Tree(object):
|
||||||
|
|
||||||
for i_answer, answer in enumerate(new_child):
|
for i_answer, answer in enumerate(new_child):
|
||||||
children_groups = self.create_children_groups(children_groups, [[answer_part] for answer_part in answer])
|
children_groups = self.create_children_groups(children_groups, [[answer_part] for answer_part in answer])
|
||||||
# r_res += separator + answer
|
|
||||||
|
|
||||||
# children_groups_sorted = []
|
|
||||||
# for i_answer, answer in enumerate(new_child_sorted):
|
|
||||||
# children_groups_sorted = self.create_children_groups(children_groups_sorted, [[answer_part] for answer_part in answer])
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# results_sorted = {}
|
|
||||||
# for result in new_results:
|
|
||||||
# for children in children_groups_sorted:
|
|
||||||
# new_result = copy(result)
|
|
||||||
# new_result.set_children(children)
|
|
||||||
# order = tuple(sorted(new_result.get_order()))
|
|
||||||
# results_sorted[order] = new_result
|
|
||||||
|
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for result in new_results:
|
for result in new_results:
|
||||||
for children in children_groups:
|
for children in children_groups:
|
||||||
new_result = copy(result)
|
new_result = copy(result)
|
||||||
# if result.key is not None or result.order is not None or result.array is not None or result.order_key is not None:
|
|
||||||
# print('here')
|
|
||||||
# new_result.reset_params()
|
|
||||||
new_result.set_children(children)
|
new_result.set_children(children)
|
||||||
# order = tuple(sorted(new_result.get_order()))
|
|
||||||
results.append(new_result)
|
results.append(new_result)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def create_output_children(self, children, new_results, filters):
|
def create_output_children(self, children, new_results, filters):
|
||||||
# if create_output_string_form(self) == 'Dogodek':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
# if create_output_string_form(self) == 'utišal':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
# if len(new_results) > 1:
|
|
||||||
# print('HERE')
|
|
||||||
merged_results = []
|
merged_results = []
|
||||||
for i_child, child in enumerate(children):
|
for i_child, child in enumerate(children):
|
||||||
# merged_results.extend(self.merge_results2(child, new_results, filters))
|
|
||||||
merged_results.extend(self.merge_results3(child, new_results, filters))
|
merged_results.extend(self.merge_results3(child, new_results, filters))
|
||||||
return merged_results
|
return merged_results
|
||||||
|
|
||||||
# @staticmethod
|
|
||||||
def create_answers(self, separated_answers, answer_length, filters):
|
def create_answers(self, separated_answers, answer_length, filters):
|
||||||
partly_built_trees = [[None] * answer_length]
|
partly_built_trees = [[None] * answer_length]
|
||||||
partly_built_trees_architecture_indices = [[None] * answer_length]
|
partly_built_trees_architecture_indices = [[None] * answer_length]
|
||||||
built_trees = []
|
built_trees = []
|
||||||
built_trees_architecture_indices = []
|
built_trees_architecture_indices = []
|
||||||
|
|
||||||
# if create_output_string_form(self) == 'Dogodek':
|
|
||||||
# print('HERE!@@!')
|
|
||||||
|
|
||||||
# iterate over children first, so that new partly built trees are added only after all results of specific
|
# iterate over children first, so that new partly built trees are added only after all results of specific
|
||||||
# child are added
|
# child are added
|
||||||
for child_i in range(len(separated_answers[0])):
|
for child_i in range(len(separated_answers[0])):
|
||||||
|
@ -712,10 +385,7 @@ class Tree(object):
|
||||||
for unique_tree in unique_trees_architecture:
|
for unique_tree in unique_trees_architecture:
|
||||||
already_in = True
|
already_in = True
|
||||||
for part_i in range(len(unique_tree)):
|
for part_i in range(len(unique_tree)):
|
||||||
# test = unique_tree[part_i][0].get_order_key()
|
|
||||||
if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].get_order_key() != new_tree[part_i][i_unique_part].get_order_key() for i_unique_part in range(len(unique_tree[part_i]))):
|
if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].get_order_key() != new_tree[part_i][i_unique_part].get_order_key() for i_unique_part in range(len(unique_tree[part_i]))):
|
||||||
# if len(unique_tree[part_i]) != len(new_tree[part_i]) or any(unique_tree[part_i][i_unique_part].order_key != new_tree[part_i][i_unique_part].order_key for i_unique_part in range(len(unique_tree[part_i]))):
|
|
||||||
# if unique_tree[part_i].order_key != new_tree[part_i].order_key:
|
|
||||||
already_in = False
|
already_in = False
|
||||||
break
|
break
|
||||||
if already_in:
|
if already_in:
|
||||||
|
@ -724,20 +394,5 @@ class Tree(object):
|
||||||
|
|
||||||
if is_unique:
|
if is_unique:
|
||||||
unique_trees_architecture.append(new_tree)
|
unique_trees_architecture.append(new_tree)
|
||||||
# if not filters['node_order']:
|
|
||||||
# l_ordered_built_trees_architecture.append(new_tree_architecture)
|
|
||||||
l_ordered_built_trees.append(new_tree)
|
l_ordered_built_trees.append(new_tree)
|
||||||
# TODO NODE ORDER = FALSE
|
|
||||||
# else:
|
|
||||||
#
|
|
||||||
# ordered_built_trees_architecture.append(tree_architecture)
|
|
||||||
# ordered_built_trees.append(tree)
|
|
||||||
# print("test")
|
|
||||||
# for answer1_i, answer1 in enumerate(separated_answers):
|
|
||||||
# for answer2_i, answer2 in enumerate(separated_answers):
|
|
||||||
# if answer1_i != answer2_i:
|
|
||||||
# res, res_i = self.merge_answer(answer1, answer2, answer1_i, answer2_i)
|
|
||||||
# print('aaa')
|
|
||||||
#
|
|
||||||
# pass
|
|
||||||
return l_ordered_built_trees
|
return l_ordered_built_trees
|
||||||
|
|
|
@ -23,11 +23,13 @@ import pickle
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
import time
|
import time
|
||||||
import timeit
|
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import gzip
|
import gzip
|
||||||
import sys
|
import sys
|
||||||
|
import pyconll
|
||||||
|
from Tree import Tree
|
||||||
|
from generic import get_collocabilities, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos, create_output_string_feats
|
||||||
sys.setrecursionlimit(25000)
|
sys.setrecursionlimit(25000)
|
||||||
|
|
||||||
def save_zipped_pickle(obj, filename, protocol=-1):
|
def save_zipped_pickle(obj, filename, protocol=-1):
|
||||||
|
@ -39,31 +41,6 @@ def load_zipped_pickle(filename):
|
||||||
loaded_object = pickle.load(f)
|
loaded_object = pickle.load(f)
|
||||||
return loaded_object
|
return loaded_object
|
||||||
|
|
||||||
import pyconll
|
|
||||||
|
|
||||||
from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos, create_output_string_feats
|
|
||||||
|
|
||||||
# for separate searches of feats
|
|
||||||
# feats_detailed_list = [
|
|
||||||
# # lexical features
|
|
||||||
# 'PronType', 'NumType', 'Poss', 'Reflex', 'Foreign', 'Abbr',
|
|
||||||
#
|
|
||||||
# # Inflectional features (nominal)
|
|
||||||
# 'Gender', 'Animacy', 'NounClass', 'Number', 'Case', 'Definite', 'Degree',
|
|
||||||
#
|
|
||||||
# # Inflectional features (verbal)
|
|
||||||
# 'VerbForm', 'Mood', 'Tense', 'Aspect', 'Voice', 'Evident', 'Polarity', 'Person', 'Polite', 'Clusivity',
|
|
||||||
#
|
|
||||||
# # Other
|
|
||||||
# 'Variant', 'Number[psor]', 'Gender[psor]', 'NumForm'
|
|
||||||
# ]
|
|
||||||
|
|
||||||
# feats_detailed_list = []
|
|
||||||
|
|
||||||
# feats_detailed_dict = {key: {} for key in feats_detailed_list}
|
|
||||||
from generic import get_collocabilities
|
|
||||||
|
|
||||||
|
|
||||||
def decode_query(orig_query, dependency_type, feats_detailed_list):
|
def decode_query(orig_query, dependency_type, feats_detailed_list):
|
||||||
new_query = False
|
new_query = False
|
||||||
|
|
||||||
|
@ -72,7 +49,6 @@ def decode_query(orig_query, dependency_type, feats_detailed_list):
|
||||||
new_query = True
|
new_query = True
|
||||||
orig_query = orig_query[1:-1]
|
orig_query = orig_query[1:-1]
|
||||||
|
|
||||||
# if orig_query is '_' return {}
|
|
||||||
if dependency_type != '':
|
if dependency_type != '':
|
||||||
decoded_query = {'deprel': dependency_type}
|
decoded_query = {'deprel': dependency_type}
|
||||||
else:
|
else:
|
||||||
|
@ -88,19 +64,14 @@ def decode_query(orig_query, dependency_type, feats_detailed_list):
|
||||||
if len(orig_query_split) > 1:
|
if len(orig_query_split) > 1:
|
||||||
if orig_query_split[0] == 'L':
|
if orig_query_split[0] == 'L':
|
||||||
decoded_query['lemma'] = orig_query_split[1]
|
decoded_query['lemma'] = orig_query_split[1]
|
||||||
# return decoded_query
|
|
||||||
elif orig_query_split[0] == 'upos':
|
elif orig_query_split[0] == 'upos':
|
||||||
decoded_query['upos'] = orig_query_split[1]
|
decoded_query['upos'] = orig_query_split[1]
|
||||||
# return decoded_query
|
|
||||||
elif orig_query_split[0] == 'xpos':
|
elif orig_query_split[0] == 'xpos':
|
||||||
decoded_query['xpos'] = orig_query_split[1]
|
decoded_query['xpos'] = orig_query_split[1]
|
||||||
# return decoded_query
|
|
||||||
elif orig_query_split[0] == 'form':
|
elif orig_query_split[0] == 'form':
|
||||||
decoded_query['form'] = orig_query_split[1]
|
decoded_query['form'] = orig_query_split[1]
|
||||||
# return decoded_query
|
|
||||||
elif orig_query_split[0] == 'feats':
|
elif orig_query_split[0] == 'feats':
|
||||||
decoded_query['feats'] = orig_query_split[1]
|
decoded_query['feats'] = orig_query_split[1]
|
||||||
# return decoded_query
|
|
||||||
elif orig_query_split[0] in feats_detailed_list:
|
elif orig_query_split[0] in feats_detailed_list:
|
||||||
decoded_query['feats_detailed'] = {}
|
decoded_query['feats_detailed'] = {}
|
||||||
decoded_query['feats_detailed'][orig_query_split[0]] = orig_query_split[1]
|
decoded_query['feats_detailed'][orig_query_split[0]] = orig_query_split[1]
|
||||||
|
@ -111,18 +82,11 @@ def decode_query(orig_query, dependency_type, feats_detailed_list):
|
||||||
print('???')
|
print('???')
|
||||||
elif not new_query:
|
elif not new_query:
|
||||||
decoded_query['form'] = orig_query_split_part
|
decoded_query['form'] = orig_query_split_part
|
||||||
# return decoded_query
|
|
||||||
return decoded_query
|
return decoded_query
|
||||||
|
|
||||||
# split over spaces if not inside braces
|
# split over spaces if not inside braces
|
||||||
# PATTERN = re.compile(r'''((?:[^ ()]|\([^.]*\))+)''')
|
|
||||||
# all_orders = PATTERN.split(orig_query)
|
|
||||||
# PATTERN = re.compile(r"(?:[^ ()]|\([^.]*\))+")
|
|
||||||
# all_orders = re.findall(r"(?:[^ ()]|\([^]*\))+", orig_query)
|
|
||||||
all_orders = re.split(r"\s+(?=[^()]*(?:\(|$))", orig_query)
|
all_orders = re.split(r"\s+(?=[^()]*(?:\(|$))", orig_query)
|
||||||
|
|
||||||
|
|
||||||
# all_orders = orig_query.split()
|
|
||||||
node_actions = all_orders[::2]
|
node_actions = all_orders[::2]
|
||||||
priority_actions = all_orders[1::2]
|
priority_actions = all_orders[1::2]
|
||||||
priority_actions_beginnings = [a[0] for a in priority_actions]
|
priority_actions_beginnings = [a[0] for a in priority_actions]
|
||||||
|
@ -148,8 +112,6 @@ def decode_query(orig_query, dependency_type, feats_detailed_list):
|
||||||
|
|
||||||
|
|
||||||
def create_trees(input_path, internal_saves, feats_detailed_dict={}, save=True):
|
def create_trees(input_path, internal_saves, feats_detailed_dict={}, save=True):
|
||||||
# internal_saves = filters['internal_saves']
|
|
||||||
# input_path = filters['input']
|
|
||||||
hash_object = hashlib.sha1(input_path.encode('utf-8'))
|
hash_object = hashlib.sha1(input_path.encode('utf-8'))
|
||||||
hex_dig = hash_object.hexdigest()
|
hex_dig = hash_object.hexdigest()
|
||||||
trees_read_outputfile = os.path.join(internal_saves, hex_dig)
|
trees_read_outputfile = os.path.join(internal_saves, hex_dig)
|
||||||
|
@ -165,13 +127,8 @@ def create_trees(input_path, internal_saves, feats_detailed_dict={}, save=True):
|
||||||
|
|
||||||
for sentence in train:
|
for sentence in train:
|
||||||
root = None
|
root = None
|
||||||
root_id = None
|
|
||||||
token_nodes = []
|
token_nodes = []
|
||||||
for token in sentence:
|
for token in sentence:
|
||||||
# token_feats = ''
|
|
||||||
# for k, v in token.feats.items():
|
|
||||||
# token_feats += k + next(iter(v)) + '|'
|
|
||||||
# token_feats = token_feats[:-1]
|
|
||||||
if not token.id.isdigit():
|
if not token.id.isdigit():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -194,12 +151,6 @@ def create_trees(input_path, internal_saves, feats_detailed_dict={}, save=True):
|
||||||
token.set_parent(None)
|
token.set_parent(None)
|
||||||
else:
|
else:
|
||||||
parent_id = int(token.parent) - 1
|
parent_id = int(token.parent) - 1
|
||||||
# if token_id < parent_id:
|
|
||||||
# token_nodes[parent_id].add_l_child(token)
|
|
||||||
# elif token_id > parent_id:
|
|
||||||
# token_nodes[parent_id].add_r_child(token)
|
|
||||||
# else:
|
|
||||||
# raise Exception('Root element should not be here!')
|
|
||||||
if token_nodes[parent_id].children_split == -1 and token_id > parent_id:
|
if token_nodes[parent_id].children_split == -1 and token_id > parent_id:
|
||||||
token_nodes[parent_id].children_split = len(token_nodes[parent_id].children)
|
token_nodes[parent_id].children_split = len(token_nodes[parent_id].children)
|
||||||
token_nodes[parent_id].add_child(token)
|
token_nodes[parent_id].add_child(token)
|
||||||
|
@ -210,35 +161,19 @@ def create_trees(input_path, internal_saves, feats_detailed_dict={}, save=True):
|
||||||
token.children_split = len(token.children)
|
token.children_split = len(token.children)
|
||||||
|
|
||||||
if root == None:
|
if root == None:
|
||||||
# print(input_path)
|
|
||||||
print('No root: ' + sentence.id)
|
print('No root: ' + sentence.id)
|
||||||
continue
|
continue
|
||||||
# raise Exception('No root element in sentence!')
|
|
||||||
all_trees.append(root)
|
all_trees.append(root)
|
||||||
|
|
||||||
if save:
|
if save:
|
||||||
save_zipped_pickle((all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict), trees_read_outputfile, protocol=2)
|
save_zipped_pickle((all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict), trees_read_outputfile, protocol=2)
|
||||||
# with open(trees_read_outputfile, 'wb') as output:
|
|
||||||
#
|
|
||||||
# pickle.dump((all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict), output)
|
|
||||||
else:
|
else:
|
||||||
print('Reading trees:')
|
print('Reading trees:')
|
||||||
print('Completed')
|
print('Completed')
|
||||||
all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict = load_zipped_pickle(trees_read_outputfile)
|
all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict = load_zipped_pickle(trees_read_outputfile)
|
||||||
# with open(trees_read_outputfile, 'rb') as pkl_file:
|
|
||||||
# (all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict) = pickle.load(pkl_file)
|
|
||||||
|
|
||||||
return all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict
|
return all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, corpus_size, feats_detailed_dict
|
||||||
|
|
||||||
|
|
||||||
# def order_independent_queries(query_tree):
|
|
||||||
# all_children = query_tree['l_children'] + query_tree['r_children']
|
|
||||||
# if all_children > 0:
|
|
||||||
#
|
|
||||||
# else:
|
|
||||||
# return query_tree
|
|
||||||
# pass
|
|
||||||
|
|
||||||
def printable_answers(query):
|
def printable_answers(query):
|
||||||
# all_orders = re.findall(r"(?:[^ ()]|\([^]*\))+", query)
|
# all_orders = re.findall(r"(?:[^ ()]|\([^]*\))+", query)
|
||||||
all_orders = re.split(r"\s+(?=[^()]*(?:\(|$))", query)
|
all_orders = re.split(r"\s+(?=[^()]*(?:\(|$))", query)
|
||||||
|
@ -293,11 +228,6 @@ def tree_calculations_chunks(input_data):
|
||||||
return result_dict
|
return result_dict
|
||||||
|
|
||||||
|
|
||||||
def chunkify(a, n):
|
|
||||||
k, m = divmod(len(a), n)
|
|
||||||
return (a[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n))
|
|
||||||
|
|
||||||
|
|
||||||
def add_node(tree):
|
def add_node(tree):
|
||||||
if 'children' in tree:
|
if 'children' in tree:
|
||||||
tree['children'].append({})
|
tree['children'].append({})
|
||||||
|
@ -362,30 +292,11 @@ def create_ngrams_query_trees(n, trees):
|
||||||
new_trees.append(new_tree)
|
new_trees.append(new_tree)
|
||||||
|
|
||||||
trees = new_trees
|
trees = new_trees
|
||||||
# delete_duplicates(trees)
|
|
||||||
# print('here')
|
|
||||||
# tree_grow(tree)
|
|
||||||
# tree_grow(tree)
|
|
||||||
# tree['children'] = [{}]
|
|
||||||
return trees
|
return trees
|
||||||
|
|
||||||
def count_trees(cpu_cores, all_trees, query_tree, create_output_string_functs, filters, unigrams_dict, result_dict):
|
def count_trees(cpu_cores, all_trees, query_tree, create_output_string_functs, filters, unigrams_dict, result_dict):
|
||||||
with Pool(cpu_cores) as p:
|
with Pool(cpu_cores) as p:
|
||||||
# 1.25 s (16 cores)
|
|
||||||
# chunked_trees = list(chunkify(all_trees, cpu_cores))
|
|
||||||
# if cpu_cores > 1:
|
|
||||||
# part_results = p.map(tree_calculations_chunks,
|
|
||||||
# [(tree, query_tree, create_output_string_funct, filters) for tree in chunked_trees])
|
|
||||||
#
|
|
||||||
# for part_result in part_results:
|
|
||||||
# for r_k, r_v in part_result.items():
|
|
||||||
# if r_k in result_dict:
|
|
||||||
# result_dict[r_k] += r_v
|
|
||||||
# else:
|
|
||||||
# result_dict[r_k] = r_v
|
|
||||||
# 1.02 s (16 cores)
|
|
||||||
if cpu_cores > 1:
|
if cpu_cores > 1:
|
||||||
# input_data = (tree, query_tree, create_output_string_functs, filters)
|
|
||||||
all_unigrams = p.map(get_unigrams, [(tree, query_tree, create_output_string_functs, filters) for tree in all_trees])
|
all_unigrams = p.map(get_unigrams, [(tree, query_tree, create_output_string_functs, filters) for tree in all_trees])
|
||||||
for unigrams in all_unigrams:
|
for unigrams in all_unigrams:
|
||||||
for unigram in unigrams:
|
for unigram in unigrams:
|
||||||
|
@ -396,24 +307,14 @@ def count_trees(cpu_cores, all_trees, query_tree, create_output_string_functs, f
|
||||||
|
|
||||||
all_subtrees = p.map(tree_calculations, [(tree, query_tree, create_output_string_functs, filters) for tree in all_trees])
|
all_subtrees = p.map(tree_calculations, [(tree, query_tree, create_output_string_functs, filters) for tree in all_trees])
|
||||||
|
|
||||||
# for subtrees in all_subtrees:
|
|
||||||
for tree_i, subtrees in enumerate(all_subtrees):
|
for tree_i, subtrees in enumerate(all_subtrees):
|
||||||
|
|
||||||
for query_results in subtrees:
|
for query_results in subtrees:
|
||||||
for r in query_results:
|
for r in query_results:
|
||||||
# if r.key == '(ne <advmod more >xcomp (se <expl izogniti) >punct .)':
|
|
||||||
# print('HERE')
|
|
||||||
# print(tree_i)
|
|
||||||
if filters['node_order']:
|
if filters['node_order']:
|
||||||
key = r.get_key() + r.order
|
key = r.get_key() + r.order
|
||||||
else:
|
else:
|
||||||
key = r.get_key()
|
key = r.get_key()
|
||||||
# if r == '(" < , < je < velik) < tem':
|
|
||||||
# print(tree_i)
|
|
||||||
# if r in result_dict:
|
|
||||||
# result_dict[r] += 1
|
|
||||||
# else:
|
|
||||||
# result_dict[r] = 1
|
|
||||||
if key in result_dict:
|
if key in result_dict:
|
||||||
result_dict[key]['number'] += 1
|
result_dict[key]['number'] += 1
|
||||||
else:
|
else:
|
||||||
|
@ -421,11 +322,7 @@ def count_trees(cpu_cores, all_trees, query_tree, create_output_string_functs, f
|
||||||
|
|
||||||
# 3.65 s (1 core)
|
# 3.65 s (1 core)
|
||||||
else:
|
else:
|
||||||
# for tree_i, tree in enumerate(all_trees[-5:]):
|
|
||||||
for tree_i, tree in enumerate(all_trees):
|
for tree_i, tree in enumerate(all_trees):
|
||||||
# for tree_i, tree in enumerate(all_trees[852:]):
|
|
||||||
# for tree_i, tree in enumerate(all_trees[1689:]):
|
|
||||||
# for tree_i, tree in enumerate(all_trees[1:3]):
|
|
||||||
input_data = (tree, query_tree, create_output_string_functs, filters)
|
input_data = (tree, query_tree, create_output_string_functs, filters)
|
||||||
if filters['association_measures']:
|
if filters['association_measures']:
|
||||||
unigrams = get_unigrams(input_data)
|
unigrams = get_unigrams(input_data)
|
||||||
|
@ -434,10 +331,7 @@ def count_trees(cpu_cores, all_trees, query_tree, create_output_string_functs, f
|
||||||
unigrams_dict[unigram] += 1
|
unigrams_dict[unigram] += 1
|
||||||
else:
|
else:
|
||||||
unigrams_dict[unigram] = 1
|
unigrams_dict[unigram] = 1
|
||||||
# for tree_i, tree in enumerate(all_trees[1:]):
|
|
||||||
# text = Če pa ostane odrasel otrok doma, se starši le težko sprijaznijo s tem, da je "velik", otrok pa ima ves čas občutek, da se njegovi starši po nepotrebnem vtikajo v njegovo življenje.
|
|
||||||
# for tree_i, tree in enumerate(all_trees[5170:]):
|
|
||||||
# for tree in all_trees:
|
|
||||||
subtrees = tree_calculations(input_data)
|
subtrees = tree_calculations(input_data)
|
||||||
for query_results in subtrees:
|
for query_results in subtrees:
|
||||||
for r in query_results:
|
for r in query_results:
|
||||||
|
@ -445,8 +339,6 @@ def count_trees(cpu_cores, all_trees, query_tree, create_output_string_functs, f
|
||||||
key = r.get_key() + r.order
|
key = r.get_key() + r.order
|
||||||
else:
|
else:
|
||||||
key = r.get_key()
|
key = r.get_key()
|
||||||
# if r == '(" < , < je < velik) < tem':
|
|
||||||
# print(tree_i)
|
|
||||||
if key in result_dict:
|
if key in result_dict:
|
||||||
result_dict[key]['number'] += 1
|
result_dict[key]['number'] += 1
|
||||||
else:
|
else:
|
||||||
|
@ -465,7 +357,6 @@ def read_filters(config, feats_detailed_list):
|
||||||
query_tree.extend(create_ngrams_query_trees(i, [{}]))
|
query_tree.extend(create_ngrams_query_trees(i, [{}]))
|
||||||
else:
|
else:
|
||||||
query_tree = [decode_query('(' + config.get('settings', 'query') + ')', '', feats_detailed_list)]
|
query_tree = [decode_query('(' + config.get('settings', 'query') + ')', '', feats_detailed_list)]
|
||||||
# order_independent_queries(query_tree)
|
|
||||||
|
|
||||||
# set filters
|
# set filters
|
||||||
node_types = config.get('settings', 'node_type').split('+')
|
node_types = config.get('settings', 'node_type').split('+')
|
||||||
|
@ -506,11 +397,8 @@ def read_filters(config, feats_detailed_list):
|
||||||
attribute_dict = {}
|
attribute_dict = {}
|
||||||
for attribute in option.split('&'):
|
for attribute in option.split('&'):
|
||||||
value = attribute.split('=')
|
value = attribute.split('=')
|
||||||
# assert value[0] in ['deprel', 'lemma', 'upos', 'xpos', 'form',
|
|
||||||
# 'feats'], '"root_whitelist" is not set up correctly'
|
|
||||||
attribute_dict[value[0]] = value[1]
|
attribute_dict[value[0]] = value[1]
|
||||||
filters['root_whitelist'].append(attribute_dict)
|
filters['root_whitelist'].append(attribute_dict)
|
||||||
# filters['root_whitelist'] = [{'upos': 'NOUN', 'Case': 'Nom'}, {'upos': 'ADJ', 'Degree': 'Sup'}]
|
|
||||||
else:
|
else:
|
||||||
filters['root_whitelist'] = []
|
filters['root_whitelist'] = []
|
||||||
|
|
||||||
|
@ -540,12 +428,6 @@ def main():
|
||||||
internal_saves = config.get('settings', 'internal_saves')
|
internal_saves = config.get('settings', 'internal_saves')
|
||||||
input_path = config.get('settings', 'input')
|
input_path = config.get('settings', 'input')
|
||||||
|
|
||||||
# a = args.config_file
|
|
||||||
# config.read('config.ini')
|
|
||||||
# create queries
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if os.path.isdir(input_path):
|
if os.path.isdir(input_path):
|
||||||
|
|
||||||
checkpoint_path = Path(internal_saves, 'checkpoint.pkl')
|
checkpoint_path = Path(internal_saves, 'checkpoint.pkl')
|
||||||
|
@ -572,9 +454,6 @@ def main():
|
||||||
for path in sorted(pathlist):
|
for path in sorted(pathlist):
|
||||||
# because path is object not string
|
# because path is object not string
|
||||||
path_str = str(path)
|
path_str = str(path)
|
||||||
# if Path(path_str).name == 'GF0003946-dedup.conllu':
|
|
||||||
# break
|
|
||||||
# print(path_in_str)
|
|
||||||
|
|
||||||
(all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, sub_corpus_size,
|
(all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, sub_corpus_size,
|
||||||
feats_detailed_list) = create_trees(path_str, internal_saves, feats_detailed_dict=feats_detailed_list, save=False)
|
feats_detailed_list) = create_trees(path_str, internal_saves, feats_detailed_dict=feats_detailed_list, save=False)
|
||||||
|
@ -593,7 +472,6 @@ def main():
|
||||||
# 15.26
|
# 15.26
|
||||||
print("Execution time:")
|
print("Execution time:")
|
||||||
print("--- %s seconds ---" % (time.time() - start_exe_time))
|
print("--- %s seconds ---" % (time.time() - start_exe_time))
|
||||||
# print(1 + 'asd')
|
|
||||||
save_zipped_pickle(
|
save_zipped_pickle(
|
||||||
(already_processed, result_dict, unigrams_dict, corpus_size, feats_detailed_list),
|
(already_processed, result_dict, unigrams_dict, corpus_size, feats_detailed_list),
|
||||||
checkpoint_path, protocol=2)
|
checkpoint_path, protocol=2)
|
||||||
|
@ -620,26 +498,6 @@ def main():
|
||||||
|
|
||||||
print("Execution time:")
|
print("Execution time:")
|
||||||
print("--- %s seconds ---" % (time.time() - start_exe_time))
|
print("--- %s seconds ---" % (time.time() - start_exe_time))
|
||||||
# test 1 layer queries
|
|
||||||
# # tree.r_children = []
|
|
||||||
# # tree.children[1].children = []
|
|
||||||
# # query = [{'children': [{}]}, {'children': [{}]}]
|
|
||||||
# # query = [{"children": [{}, {}]}, {"children": [{}]}, {"children": [{}, {}, {}]}]
|
|
||||||
# query = [{"children": [{'form': 'je'}, {}]}, {"children": [{'form': 'je'}]}, {"children": [{'form': 'je'}, {}, {}]}]
|
|
||||||
# # query = [{'q1':'', "children": [{'a1':''}, {'a2':''}]}, {'q2':'', "children": [{'b1':''}]}, {'q3':'', "children": [{'c1':''}, {'c2':''}, {'c3':''}]}]
|
|
||||||
# _, _, subtrees = tree.get_subtrees(query, [], create_output_string_funct)
|
|
||||||
# # _, subtrees = tree.get_subtrees([{'q1':'', "children": [{'a1':''}, {'a2':''}], "children": []}, {'q2':'', "children": [{'b1':''}], "children": []}, {'q3':'', "children": [{'c1':''}, {'c2':''}, {'c3':''}], "children": []}], [])
|
|
||||||
# print('HERE!')
|
|
||||||
|
|
||||||
# test 2 layer queries
|
|
||||||
# tree.r_children = [Tree('je', '', '', '', '', form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, None)]
|
|
||||||
# tree.l_children[1].l_children = []
|
|
||||||
# new_tree = Tree('bil', '', '', '', '', form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, None)
|
|
||||||
# new_tree.l_children = [tree]
|
|
||||||
# _, subtrees = new_tree.get_subtrees(
|
|
||||||
# [{"l_children":[{"l_children": [{'a1': ''}, {'a2': ''}, {'a3': ''}, {'a4': ''}]}]}], [])
|
|
||||||
# # _, subtrees = new_tree.get_subtrees(
|
|
||||||
# # [{"l_children":[{"l_children": [{'a1': ''}, {'a2': ''}, {'a3': ''}, {'a4': ''}], "r_children": []}], "r_children": []}], [])
|
|
||||||
sorted_list = sorted(result_dict.items(), key=lambda x: x[1]['number'], reverse=True)
|
sorted_list = sorted(result_dict.items(), key=lambda x: x[1]['number'], reverse=True)
|
||||||
|
|
||||||
with open(config.get('settings', 'output'), "w", newline="") as f:
|
with open(config.get('settings', 'output'), "w", newline="") as f:
|
||||||
|
@ -660,7 +518,6 @@ def main():
|
||||||
header += ['Root node']
|
header += ['Root node']
|
||||||
if filters['association_measures']:
|
if filters['association_measures']:
|
||||||
header += ['MI', 'MI3', 'Dice', 'logDice', 't-score', 'simple-LL']
|
header += ['MI', 'MI3', 'Dice', 'logDice', 't-score', 'simple-LL']
|
||||||
# header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Absolute frequency']
|
|
||||||
writer.writerow(header)
|
writer.writerow(header)
|
||||||
|
|
||||||
if filters['lines_threshold']:
|
if filters['lines_threshold']:
|
||||||
|
@ -673,7 +530,6 @@ def main():
|
||||||
if filters['frequency_threshold'] and filters['frequency_threshold'] > v['number']:
|
if filters['frequency_threshold'] and filters['frequency_threshold'] > v['number']:
|
||||||
break
|
break
|
||||||
words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))]
|
words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))]
|
||||||
# words_only = printable_answers(k)
|
|
||||||
row = [v['object'].get_key()[1:-1]] + words_only + [str(v['number'])]
|
row = [v['object'].get_key()[1:-1]] + words_only + [str(v['number'])]
|
||||||
row += ['%.4f' % relative_frequency]
|
row += ['%.4f' % relative_frequency]
|
||||||
if filters['node_order']:
|
if filters['node_order']:
|
||||||
|
|
|
@ -45,7 +45,6 @@ def generate_key(node, create_output_strings, print_lemma=True):
|
||||||
if len(array[0]) > 1:
|
if len(array[0]) > 1:
|
||||||
key = '&'.join(key_array[0])
|
key = '&'.join(key_array[0])
|
||||||
else:
|
else:
|
||||||
# output_string = create_output_strings[0](node)
|
|
||||||
key = key_array[0][0]
|
key = key_array[0][0]
|
||||||
|
|
||||||
return array, key
|
return array, key
|
||||||
|
@ -61,7 +60,6 @@ def generate_name(node, create_output_strings, print_lemma=True):
|
||||||
if len(array) > 1:
|
if len(array) > 1:
|
||||||
name = '&'.join(name_array)
|
name = '&'.join(name_array)
|
||||||
else:
|
else:
|
||||||
# output_string = create_output_strings[0](node)
|
|
||||||
name = name_array[0]
|
name = name_array[0]
|
||||||
|
|
||||||
return array, name
|
return array, name
|
||||||
|
@ -74,7 +72,6 @@ def get_collocabilities(ngram, unigrams_dict, corpus_size):
|
||||||
if len(key_array) > 1:
|
if len(key_array) > 1:
|
||||||
key = '&'.join(key_array)
|
key = '&'.join(key_array)
|
||||||
else:
|
else:
|
||||||
# output_string = create_output_strings[0](node)
|
|
||||||
key = key_array[0]
|
key = key_array[0]
|
||||||
sum_fwi += unigrams_dict[key]
|
sum_fwi += unigrams_dict[key]
|
||||||
mul_fwi *= unigrams_dict[key]
|
mul_fwi *= unigrams_dict[key]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user