Modified names in config.ini + names in results + some other changes.
This commit is contained in:
parent
d0ebf1ba05
commit
da51882d8a
41
Result.py
41
Result.py
|
@ -1,4 +1,5 @@
|
||||||
import copy
|
import copy
|
||||||
|
import string
|
||||||
|
|
||||||
|
|
||||||
class Result(object):
|
class Result(object):
|
||||||
|
@ -11,10 +12,11 @@ class Result(object):
|
||||||
self.key = self.array[0][0]
|
self.key = self.array[0][0]
|
||||||
# self.array = [[output_string]]
|
# self.array = [[output_string]]
|
||||||
self.order_key = str([architecture_order])
|
self.order_key = str([architecture_order])
|
||||||
|
self.order = [architecture_order]
|
||||||
# order with original numbers in sentences
|
# order with original numbers in sentences
|
||||||
# self.order = str([architecture_order])
|
# self.order = str([architecture_order])
|
||||||
# order with numbers from 0 to n of n-gram
|
# order with numbers from 0 to n of n-gram
|
||||||
|
self.root = ''
|
||||||
self.final_order = ''
|
self.final_order = ''
|
||||||
self.separators = []
|
self.separators = []
|
||||||
|
|
||||||
|
@ -24,6 +26,7 @@ class Result(object):
|
||||||
def add(self, string, architecture_order, separator, is_left):
|
def add(self, string, architecture_order, separator, is_left):
|
||||||
if is_left:
|
if is_left:
|
||||||
self.array = [string] + self.array
|
self.array = [string] + self.array
|
||||||
|
self.order = [architecture_order] + self.order
|
||||||
# self.order = [architecture_order] + self.order
|
# self.order = [architecture_order] + self.order
|
||||||
self.separators = [separator] + self.separators
|
self.separators = [separator] + self.separators
|
||||||
self.key = string + ' ' + separator + ' ' + self.key
|
self.key = string + ' ' + separator + ' ' + self.key
|
||||||
|
@ -31,6 +34,7 @@ class Result(object):
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.array += [string]
|
self.array += [string]
|
||||||
|
self.order += [architecture_order]
|
||||||
# self.order += [architecture_order]
|
# self.order += [architecture_order]
|
||||||
self.separators += [separator]
|
self.separators += [separator]
|
||||||
|
|
||||||
|
@ -49,6 +53,9 @@ class Result(object):
|
||||||
self_copy.order_key = separator + self_copy.order_key
|
self_copy.order_key = separator + self_copy.order_key
|
||||||
return self_copy
|
return self_copy
|
||||||
|
|
||||||
|
# def merge_results2(self):
|
||||||
|
|
||||||
|
|
||||||
def merge_results(self, right_t, separator, left=True):
|
def merge_results(self, right_t, separator, left=True):
|
||||||
left_tree = copy.copy(self)
|
left_tree = copy.copy(self)
|
||||||
right_tree = copy.copy(right_t)
|
right_tree = copy.copy(right_t)
|
||||||
|
@ -59,6 +66,7 @@ class Result(object):
|
||||||
left_tree.key = left_tree.key + right_tree.key + separator
|
left_tree.key = left_tree.key + right_tree.key + separator
|
||||||
left_tree.order_key = left_tree.order_key + right_tree.order_key + separator
|
left_tree.order_key = left_tree.order_key + right_tree.order_key + separator
|
||||||
left_tree.array = left_tree.array + right_tree.array
|
left_tree.array = left_tree.array + right_tree.array
|
||||||
|
left_tree.order = left_tree.order + right_tree.order
|
||||||
# left_tree.order = str([architecture_order])
|
# left_tree.order = str([architecture_order])
|
||||||
left_tree.separators = left_tree.separators + right_tree.separators + [separator]
|
left_tree.separators = left_tree.separators + right_tree.separators + [separator]
|
||||||
else:
|
else:
|
||||||
|
@ -66,6 +74,7 @@ class Result(object):
|
||||||
left_tree.key = left_tree.key + separator + right_tree.key
|
left_tree.key = left_tree.key + separator + right_tree.key
|
||||||
left_tree.order_key = left_tree.order_key + separator + right_tree.order_key
|
left_tree.order_key = left_tree.order_key + separator + right_tree.order_key
|
||||||
left_tree.array = left_tree.array + right_tree.array
|
left_tree.array = left_tree.array + right_tree.array
|
||||||
|
left_tree.order = left_tree.order + right_tree.order
|
||||||
# left_tree.order = str([architecture_order])
|
# left_tree.order = str([architecture_order])
|
||||||
left_tree.separators = left_tree.separators + [separator] + right_tree.separators
|
left_tree.separators = left_tree.separators + [separator] + right_tree.separators
|
||||||
else:
|
else:
|
||||||
|
@ -73,12 +82,24 @@ class Result(object):
|
||||||
left_tree.key = left_tree.key + right_tree.key
|
left_tree.key = left_tree.key + right_tree.key
|
||||||
left_tree.order_key = left_tree.order_key + right_tree.order_key
|
left_tree.order_key = left_tree.order_key + right_tree.order_key
|
||||||
left_tree.array = left_tree.array + right_tree.array
|
left_tree.array = left_tree.array + right_tree.array
|
||||||
|
left_tree.order = left_tree.order + right_tree.order
|
||||||
# left_tree.order = str([architecture_order])
|
# left_tree.order = str([architecture_order])
|
||||||
left_tree.separators = left_tree.separators + right_tree.separators
|
left_tree.separators = left_tree.separators + right_tree.separators
|
||||||
|
|
||||||
return left_tree
|
return left_tree
|
||||||
|
|
||||||
def put_in_bracelets(self):
|
def extend_answer(self, other_answer, separator):
|
||||||
|
self.array.extend(other_answer.array)
|
||||||
|
self.order.extend(other_answer.order)
|
||||||
|
self.key += separator + other_answer.key
|
||||||
|
self.order_key += separator + other_answer.order_key
|
||||||
|
self.separators.extend(separator)
|
||||||
|
|
||||||
|
def put_in_bracelets(self, inplace=False):
|
||||||
|
if inplace:
|
||||||
|
self.key = ('(' + self.key + ')')
|
||||||
|
self.order_key = ('(' + self.order_key + ')')
|
||||||
|
return
|
||||||
result = copy.copy(self)
|
result = copy.copy(self)
|
||||||
result.key = ('(' + result.key + ')')
|
result.key = ('(' + result.key + ')')
|
||||||
result.order_key = ('(' + result.order_key + ')')
|
result.order_key = ('(' + result.order_key + ')')
|
||||||
|
@ -87,6 +108,22 @@ class Result(object):
|
||||||
def finalize_result(self):
|
def finalize_result(self):
|
||||||
result = copy.copy(self)
|
result = copy.copy(self)
|
||||||
result.key = result.key[1:-1]
|
result.key = result.key[1:-1]
|
||||||
|
result.set_root()
|
||||||
|
|
||||||
|
# create order letters
|
||||||
|
order_letters = [''] * len(result.order)
|
||||||
|
for i in range(len(result.order)):
|
||||||
|
ind = result.order.index(min(result.order))
|
||||||
|
result.order[ind] = 10000
|
||||||
|
order_letters[ind] = string.ascii_uppercase[i]
|
||||||
|
result.order = ''.join(order_letters)
|
||||||
# result.order_key = result.order_key[1:-1]
|
# result.order_key = result.order_key[1:-1]
|
||||||
# TODO When tree is finalized create relative word order (alphabet)!
|
# TODO When tree is finalized create relative word order (alphabet)!
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def set_root(self):
|
||||||
|
if len(self.array[0]) > 1:
|
||||||
|
self.root = '{' + ','.join(self.array[0]) + '}'
|
||||||
|
else:
|
||||||
|
# output_string = create_output_strings[0](node)
|
||||||
|
self.root = self.array[0][0]
|
147
Tree.py
147
Tree.py
|
@ -438,7 +438,8 @@ class Tree(object):
|
||||||
merged_results = []
|
merged_results = []
|
||||||
for left_part in left_parts:
|
for left_part in left_parts:
|
||||||
for right_part in right_parts:
|
for right_part in right_parts:
|
||||||
merged_results.append(left_part.merge_results(right_part, separator))
|
merged_results.append(left_part.merge_results(right_part, separator, left))
|
||||||
|
# merged_results.append(left_part.merge_results(right_part, separator))
|
||||||
# if separator:
|
# if separator:
|
||||||
# if left:
|
# if left:
|
||||||
# merged_results.append(left_part + right_part + separator)
|
# merged_results.append(left_part + right_part + separator)
|
||||||
|
@ -465,9 +466,114 @@ class Tree(object):
|
||||||
merged_indices.append(new_indices)
|
merged_indices.append(new_indices)
|
||||||
return merged_results, merged_indices
|
return merged_results, merged_indices
|
||||||
|
|
||||||
|
def merge_results2(self, new_child, new_results, i_child, indices, deprel, filters):
|
||||||
|
l_res = []
|
||||||
|
r_res = []
|
||||||
|
results = []
|
||||||
|
for i_answer, answer in enumerate(new_child):
|
||||||
|
if filters['node_order'] and indices[i_child][i_answer] < self.children_split:
|
||||||
|
if filters['dependency_type']:
|
||||||
|
separator = ' <' + deprel[i_child][i_answer] + ' '
|
||||||
|
else:
|
||||||
|
separator = ' < '
|
||||||
|
l_res = self.merge_results(l_res, answer, separator, left=True)
|
||||||
|
# l_res += answer + separator
|
||||||
|
else:
|
||||||
|
if filters['dependency_type']:
|
||||||
|
separator = ' >' + deprel[i_child][i_answer] + ' '
|
||||||
|
else:
|
||||||
|
separator = ' > '
|
||||||
|
r_res = self.merge_results(r_res, answer, separator, left=False)
|
||||||
|
# r_res += separator + answer
|
||||||
|
if l_res:
|
||||||
|
l_res_combined = self.merge_results(l_res, new_results, None)
|
||||||
|
if r_res:
|
||||||
|
r_res_combined = self.merge_results(l_res_combined, r_res, None)
|
||||||
|
# merged_results.extend(['(' + el + ')' for el in r_res_combined])
|
||||||
|
results.extend([el.put_in_bracelets() for el in r_res_combined])
|
||||||
|
else:
|
||||||
|
results.extend([el.put_in_bracelets() for el in l_res_combined])
|
||||||
|
elif r_res:
|
||||||
|
r_res_combined = self.merge_results(new_results, r_res, None)
|
||||||
|
results.extend([el.put_in_bracelets() for el in r_res_combined])
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def create_merged_results(self, answers, separators, separator_switch):
|
||||||
|
new_answers = []
|
||||||
|
for answer_i, answer in enumerate(answers):
|
||||||
|
new_answer = copy(answer[0])
|
||||||
|
print(create_output_string_form(self))
|
||||||
|
for answer_part_i, answer_part in enumerate(answer[1:]):
|
||||||
|
new_answer.extend_answer(answer_part, separators[answer_part_i])
|
||||||
|
new_answer.put_in_bracelets(inplace=True)
|
||||||
|
new_answers.append(new_answer)
|
||||||
|
return new_answers
|
||||||
|
# def create_merged_results(self, new_child, new_answers, i_child, indices, deprel, filters):
|
||||||
|
|
||||||
|
def merge_results3(self, new_child, new_answers, i_child, indices, deprel, filters):
|
||||||
|
# l_res = []
|
||||||
|
# r_res = []
|
||||||
|
# results = []
|
||||||
|
separators = []
|
||||||
|
l_answers = []
|
||||||
|
r_answers = []
|
||||||
|
separator_switch = len(new_child) - 1
|
||||||
|
for i_answer, answer in enumerate(new_child):
|
||||||
|
if filters['node_order'] and indices[i_child][i_answer] < self.children_split:
|
||||||
|
if filters['dependency_type']:
|
||||||
|
separators.append(' <' + deprel[i_child][i_answer] + ' ')
|
||||||
|
else:
|
||||||
|
separators.append(' < ')
|
||||||
|
l_answers.append(answer)
|
||||||
|
# l_res = res
|
||||||
|
# return merged_results
|
||||||
|
# l_res += answer + separator
|
||||||
|
else:
|
||||||
|
if i_answer < separator_switch:
|
||||||
|
separator_switch = i_answer
|
||||||
|
if filters['dependency_type']:
|
||||||
|
separators.append(' >' + deprel[i_child][i_answer] + ' ')
|
||||||
|
else:
|
||||||
|
separators.append(' > ')
|
||||||
|
r_answers.append(answer)
|
||||||
|
# r_res += separator + answer
|
||||||
|
|
||||||
|
answers = []
|
||||||
|
if l_answers and r_answers:
|
||||||
|
answers = l_answers + [new_answers] + r_answers
|
||||||
|
# for l_answer in l_answers:
|
||||||
|
# for r_answer in r_answers:
|
||||||
|
# answers.append(l_answer + new_answers + r_answer)
|
||||||
|
elif l_answers:
|
||||||
|
answers = l_answers + [new_answers]
|
||||||
|
# for l_answer in l_answers:
|
||||||
|
# answers.append(l_answer + new_answers)
|
||||||
|
elif r_answers:
|
||||||
|
answers = [new_answers] + r_answers
|
||||||
|
# for r_answer in r_answers:
|
||||||
|
# answers.append(new_answers + r_answer)
|
||||||
|
else:
|
||||||
|
answers = [new_answers]
|
||||||
|
|
||||||
|
results = self.create_merged_results(answers, separators, separator_switch)
|
||||||
|
|
||||||
|
# if l_res:
|
||||||
|
# l_res_combined = self.merge_results(l_res, new_answers, None)
|
||||||
|
# if r_res:
|
||||||
|
# r_res_combined = self.merge_results(l_res_combined, r_res, None)
|
||||||
|
# # merged_results.extend(['(' + el + ')' for el in r_res_combined])
|
||||||
|
# results.extend([el.put_in_bracelets() for el in r_res_combined])
|
||||||
|
# else:
|
||||||
|
# results.extend([el.put_in_bracelets() for el in l_res_combined])
|
||||||
|
# elif r_res:
|
||||||
|
# r_res_combined = self.merge_results(new_answers, r_res, None)
|
||||||
|
# results.extend([el.put_in_bracelets() for el in r_res_combined])
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
def create_output_children(self, children, new_results, filters, indices, deprel):
|
def create_output_children(self, children, new_results, filters, indices, deprel):
|
||||||
# if create_output_string_form(self) == 'prijel':
|
# if create_output_string_form(self) == 'Dogodek':
|
||||||
# print('HERE!@@!')
|
# print('HERE!@@!')
|
||||||
# if create_output_string_form(self) == 'utišal':
|
# if create_output_string_form(self) == 'utišal':
|
||||||
# print('HERE!@@!')
|
# print('HERE!@@!')
|
||||||
|
@ -475,45 +581,12 @@ class Tree(object):
|
||||||
# print('HERE')
|
# print('HERE')
|
||||||
merged_results = []
|
merged_results = []
|
||||||
for i_child, child in enumerate(children):
|
for i_child, child in enumerate(children):
|
||||||
l_res = []
|
|
||||||
r_res = []
|
|
||||||
if filters['node_order']:
|
if filters['node_order']:
|
||||||
new_child = child
|
new_child = child
|
||||||
else:
|
else:
|
||||||
# a = [['tistega', 'dne'], ['sem', 'bil']]
|
|
||||||
# b = sorted(a)
|
|
||||||
# TODO CHECK IF THIS WORKS FOR CERTIAN
|
|
||||||
new_child = sorted(child, key=lambda x: x[0].key)
|
new_child = sorted(child, key=lambda x: x[0].key)
|
||||||
for i_answer, answer in enumerate(new_child):
|
#################
|
||||||
# res += '(' + el + ') < '
|
merged_results.extend(self.merge_results2(new_child, new_results, i_child, indices, deprel, filters))
|
||||||
if not filters['node_order'] or indices[i_child][i_answer] < self.children_split:
|
|
||||||
if filters['dependency_type']:
|
|
||||||
separator = ' <' + deprel[i_child][i_answer] + ' '
|
|
||||||
else:
|
|
||||||
separator = ' < '
|
|
||||||
l_res = self.merge_results(l_res, answer, separator, left=True)
|
|
||||||
# l_res += answer + separator
|
|
||||||
else:
|
|
||||||
if filters['dependency_type']:
|
|
||||||
separator = ' >' + deprel[i_child][i_answer] + ' '
|
|
||||||
else:
|
|
||||||
separator = ' > '
|
|
||||||
r_res = self.merge_results(r_res, answer, separator, left=False)
|
|
||||||
# r_res += separator + answer
|
|
||||||
if l_res:
|
|
||||||
l_res_combined = self.merge_results(l_res, new_results, None)
|
|
||||||
if r_res:
|
|
||||||
r_res_combined = self.merge_results(l_res_combined, r_res, None)
|
|
||||||
# merged_results.extend(['(' + el + ')' for el in r_res_combined])
|
|
||||||
merged_results.extend([el.put_in_bracelets() for el in r_res_combined])
|
|
||||||
else:
|
|
||||||
merged_results.extend([el.put_in_bracelets() for el in l_res_combined])
|
|
||||||
elif r_res:
|
|
||||||
r_res_combined = self.merge_results(new_results, r_res, None)
|
|
||||||
merged_results.extend([el.put_in_bracelets() for el in r_res_combined])
|
|
||||||
|
|
||||||
|
|
||||||
# merged_results.append('(' + l_res + new_result + r_res + ')')
|
|
||||||
return merged_results
|
return merged_results
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -457,33 +457,42 @@ def main():
|
||||||
for tree_i, subtrees in enumerate(all_subtrees):
|
for tree_i, subtrees in enumerate(all_subtrees):
|
||||||
for query_results in subtrees:
|
for query_results in subtrees:
|
||||||
for r in query_results:
|
for r in query_results:
|
||||||
|
if filters['node_order']:
|
||||||
|
key = r.key + r.order
|
||||||
|
else:
|
||||||
|
key = r.key
|
||||||
# if r == '(" < , < je < velik) < tem':
|
# if r == '(" < , < je < velik) < tem':
|
||||||
# print(tree_i)
|
# print(tree_i)
|
||||||
# if r in result_dict:
|
# if r in result_dict:
|
||||||
# result_dict[r] += 1
|
# result_dict[r] += 1
|
||||||
# else:
|
# else:
|
||||||
# result_dict[r] = 1
|
# result_dict[r] = 1
|
||||||
if r.key in result_dict:
|
if key in result_dict:
|
||||||
result_dict[r.key]['number'] += 1
|
result_dict[key]['number'] += 1
|
||||||
else:
|
else:
|
||||||
result_dict[r.key] = {'object': r, 'number': 1}
|
result_dict[key] = {'object': r, 'number': 1}
|
||||||
|
|
||||||
# 3.65 s (1 core)
|
# 3.65 s (1 core)
|
||||||
else:
|
else:
|
||||||
# for tree_i, tree in enumerate(all_trees[-5:]):
|
# for tree_i, tree in enumerate(all_trees[-5:]):
|
||||||
for tree_i, tree in enumerate(all_trees):
|
# for tree_i, tree in enumerate(all_trees):
|
||||||
|
for tree_i, tree in enumerate(all_trees[1:]):
|
||||||
# text = Če pa ostane odrasel otrok doma, se starši le težko sprijaznijo s tem, da je "velik", otrok pa ima ves čas občutek, da se njegovi starši po nepotrebnem vtikajo v njegovo življenje.
|
# text = Če pa ostane odrasel otrok doma, se starši le težko sprijaznijo s tem, da je "velik", otrok pa ima ves čas občutek, da se njegovi starši po nepotrebnem vtikajo v njegovo življenje.
|
||||||
# for tree_i, tree in enumerate(all_trees[5170:]):
|
# for tree_i, tree in enumerate(all_trees[5170:]):
|
||||||
# for tree in all_trees:
|
# for tree in all_trees:
|
||||||
subtrees = tree_calculations((tree, query_tree, create_output_string_functs, filters))
|
subtrees = tree_calculations((tree, query_tree, create_output_string_functs, filters))
|
||||||
for query_results in subtrees:
|
for query_results in subtrees:
|
||||||
for r in query_results:
|
for r in query_results:
|
||||||
|
if filters['node_order']:
|
||||||
|
key = r.key + r.order
|
||||||
|
else:
|
||||||
|
key = r.key
|
||||||
# if r == '(" < , < je < velik) < tem':
|
# if r == '(" < , < je < velik) < tem':
|
||||||
# print(tree_i)
|
# print(tree_i)
|
||||||
if r.key in result_dict:
|
if key in result_dict:
|
||||||
result_dict[r.key]['number'] += 1
|
result_dict[key]['number'] += 1
|
||||||
else:
|
else:
|
||||||
result_dict[r.key] = {'object': r, 'number': 1}
|
result_dict[key] = {'object': r, 'number': 1}
|
||||||
|
|
||||||
print("Execution time:")
|
print("Execution time:")
|
||||||
print("--- %s seconds ---" % (time.time() - start_exe_time))
|
print("--- %s seconds ---" % (time.time() - start_exe_time))
|
||||||
|
@ -516,23 +525,32 @@ def main():
|
||||||
len_words = tree_size_range[-1]
|
len_words = tree_size_range[-1]
|
||||||
else:
|
else:
|
||||||
len_words = int(len(config.get('settings', 'query').split(" "))/2 + 1)
|
len_words = int(len(config.get('settings', 'query').split(" "))/2 + 1)
|
||||||
header = ["Structure"] + ["Word #" + str(i) + "-" + node_type for i in range(1, len_words + 1) for node_type in node_types] + ['Number of occurences']
|
header = ["Structure"] + ["Node #" + str(i) + "-" + node_type for i in range(1, len_words + 1) for node_type in node_types] + ['Absolute frequency']
|
||||||
if config.get('settings', 'relative_number'):
|
header += ['Relative frequency']
|
||||||
header += ['Relative frequency']
|
if filters['node_order']:
|
||||||
if config.get('settings', 'nodes_number'):
|
header += ['Order']
|
||||||
header += ['Nodes number']
|
if config.getboolean('settings', 'nodes_number'):
|
||||||
# header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Number of occurences']
|
header += ['Number of nodes']
|
||||||
|
if config.getboolean('settings', 'print_root'):
|
||||||
|
header += ['Root node']
|
||||||
|
# header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Absolute frequency']
|
||||||
writer.writerow(header)
|
writer.writerow(header)
|
||||||
|
|
||||||
|
if config.getint('settings', 'lines_threshold'):
|
||||||
|
sorted_list = sorted_list[:config.getint('settings', 'lines_threshold')]
|
||||||
|
|
||||||
# body
|
# body
|
||||||
for k, v in sorted_list:
|
for k, v in sorted_list:
|
||||||
words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))]
|
words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))]
|
||||||
# words_only = printable_answers(k)
|
# words_only = printable_answers(k)
|
||||||
row = [k] + words_only + [str(v['number'])]
|
row = [v['object'].key] + words_only + [str(v['number'])]
|
||||||
if config.get('settings', 'relative_number'):
|
row += ['%.4f' % (v['number'] * 1000000.0 / corpus_size)]
|
||||||
row += ['%.4f' % (v['number'] * 1000000.0 / corpus_size)]
|
if filters['node_order']:
|
||||||
|
row += [v['object'].order]
|
||||||
if config.get('settings', 'nodes_number'):
|
if config.get('settings', 'nodes_number'):
|
||||||
row += ['%d' % len(v['object'].array)]
|
row += ['%d' % len(v['object'].array)]
|
||||||
|
if config.get('settings', 'print_root'):
|
||||||
|
row += [v['object'].root]
|
||||||
|
|
||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user