Modified names in config.ini + names in results + some other changes.

This commit is contained in:
Luka 2019-12-11 15:50:14 +01:00
parent d0ebf1ba05
commit da51882d8a
3 changed files with 183 additions and 55 deletions

View File

@ -1,4 +1,5 @@
import copy import copy
import string
class Result(object): class Result(object):
@ -11,10 +12,11 @@ class Result(object):
self.key = self.array[0][0] self.key = self.array[0][0]
# self.array = [[output_string]] # self.array = [[output_string]]
self.order_key = str([architecture_order]) self.order_key = str([architecture_order])
self.order = [architecture_order]
# order with original numbers in sentences # order with original numbers in sentences
# self.order = str([architecture_order]) # self.order = str([architecture_order])
# order with numbers from 0 to n of n-gram # order with numbers from 0 to n of n-gram
self.root = ''
self.final_order = '' self.final_order = ''
self.separators = [] self.separators = []
@ -24,6 +26,7 @@ class Result(object):
def add(self, string, architecture_order, separator, is_left): def add(self, string, architecture_order, separator, is_left):
if is_left: if is_left:
self.array = [string] + self.array self.array = [string] + self.array
self.order = [architecture_order] + self.order
# self.order = [architecture_order] + self.order # self.order = [architecture_order] + self.order
self.separators = [separator] + self.separators self.separators = [separator] + self.separators
self.key = string + ' ' + separator + ' ' + self.key self.key = string + ' ' + separator + ' ' + self.key
@ -31,6 +34,7 @@ class Result(object):
else: else:
self.array += [string] self.array += [string]
self.order += [architecture_order]
# self.order += [architecture_order] # self.order += [architecture_order]
self.separators += [separator] self.separators += [separator]
@ -49,6 +53,9 @@ class Result(object):
self_copy.order_key = separator + self_copy.order_key self_copy.order_key = separator + self_copy.order_key
return self_copy return self_copy
# def merge_results2(self):
def merge_results(self, right_t, separator, left=True): def merge_results(self, right_t, separator, left=True):
left_tree = copy.copy(self) left_tree = copy.copy(self)
right_tree = copy.copy(right_t) right_tree = copy.copy(right_t)
@ -59,6 +66,7 @@ class Result(object):
left_tree.key = left_tree.key + right_tree.key + separator left_tree.key = left_tree.key + right_tree.key + separator
left_tree.order_key = left_tree.order_key + right_tree.order_key + separator left_tree.order_key = left_tree.order_key + right_tree.order_key + separator
left_tree.array = left_tree.array + right_tree.array left_tree.array = left_tree.array + right_tree.array
left_tree.order = left_tree.order + right_tree.order
# left_tree.order = str([architecture_order]) # left_tree.order = str([architecture_order])
left_tree.separators = left_tree.separators + right_tree.separators + [separator] left_tree.separators = left_tree.separators + right_tree.separators + [separator]
else: else:
@ -66,6 +74,7 @@ class Result(object):
left_tree.key = left_tree.key + separator + right_tree.key left_tree.key = left_tree.key + separator + right_tree.key
left_tree.order_key = left_tree.order_key + separator + right_tree.order_key left_tree.order_key = left_tree.order_key + separator + right_tree.order_key
left_tree.array = left_tree.array + right_tree.array left_tree.array = left_tree.array + right_tree.array
left_tree.order = left_tree.order + right_tree.order
# left_tree.order = str([architecture_order]) # left_tree.order = str([architecture_order])
left_tree.separators = left_tree.separators + [separator] + right_tree.separators left_tree.separators = left_tree.separators + [separator] + right_tree.separators
else: else:
@ -73,12 +82,24 @@ class Result(object):
left_tree.key = left_tree.key + right_tree.key left_tree.key = left_tree.key + right_tree.key
left_tree.order_key = left_tree.order_key + right_tree.order_key left_tree.order_key = left_tree.order_key + right_tree.order_key
left_tree.array = left_tree.array + right_tree.array left_tree.array = left_tree.array + right_tree.array
left_tree.order = left_tree.order + right_tree.order
# left_tree.order = str([architecture_order]) # left_tree.order = str([architecture_order])
left_tree.separators = left_tree.separators + right_tree.separators left_tree.separators = left_tree.separators + right_tree.separators
return left_tree return left_tree
def put_in_bracelets(self): def extend_answer(self, other_answer, separator):
self.array.extend(other_answer.array)
self.order.extend(other_answer.order)
self.key += separator + other_answer.key
self.order_key += separator + other_answer.order_key
self.separators.extend(separator)
def put_in_bracelets(self, inplace=False):
if inplace:
self.key = ('(' + self.key + ')')
self.order_key = ('(' + self.order_key + ')')
return
result = copy.copy(self) result = copy.copy(self)
result.key = ('(' + result.key + ')') result.key = ('(' + result.key + ')')
result.order_key = ('(' + result.order_key + ')') result.order_key = ('(' + result.order_key + ')')
@ -87,6 +108,22 @@ class Result(object):
def finalize_result(self): def finalize_result(self):
result = copy.copy(self) result = copy.copy(self)
result.key = result.key[1:-1] result.key = result.key[1:-1]
result.set_root()
# create order letters
order_letters = [''] * len(result.order)
for i in range(len(result.order)):
ind = result.order.index(min(result.order))
result.order[ind] = 10000
order_letters[ind] = string.ascii_uppercase[i]
result.order = ''.join(order_letters)
# result.order_key = result.order_key[1:-1] # result.order_key = result.order_key[1:-1]
# TODO When tree is finalized create relative word order (alphabet)! # TODO When tree is finalized create relative word order (alphabet)!
return result return result
def set_root(self):
if len(self.array[0]) > 1:
self.root = '{' + ','.join(self.array[0]) + '}'
else:
# output_string = create_output_strings[0](node)
self.root = self.array[0][0]

121
Tree.py
View File

@ -438,7 +438,8 @@ class Tree(object):
merged_results = [] merged_results = []
for left_part in left_parts: for left_part in left_parts:
for right_part in right_parts: for right_part in right_parts:
merged_results.append(left_part.merge_results(right_part, separator)) merged_results.append(left_part.merge_results(right_part, separator, left))
# merged_results.append(left_part.merge_results(right_part, separator))
# if separator: # if separator:
# if left: # if left:
# merged_results.append(left_part + right_part + separator) # merged_results.append(left_part + right_part + separator)
@ -465,28 +466,12 @@ class Tree(object):
merged_indices.append(new_indices) merged_indices.append(new_indices)
return merged_results, merged_indices return merged_results, merged_indices
def merge_results2(self, new_child, new_results, i_child, indices, deprel, filters):
def create_output_children(self, children, new_results, filters, indices, deprel):
# if create_output_string_form(self) == 'prijel':
# print('HERE!@@!')
# if create_output_string_form(self) == 'utišal':
# print('HERE!@@!')
# if len(new_results) > 1:
# print('HERE')
merged_results = []
for i_child, child in enumerate(children):
l_res = [] l_res = []
r_res = [] r_res = []
if filters['node_order']: results = []
new_child = child
else:
# a = [['tistega', 'dne'], ['sem', 'bil']]
# b = sorted(a)
# TODO CHECK IF THIS WORKS FOR CERTIAN
new_child = sorted(child, key=lambda x: x[0].key)
for i_answer, answer in enumerate(new_child): for i_answer, answer in enumerate(new_child):
# res += '(' + el + ') < ' if filters['node_order'] and indices[i_child][i_answer] < self.children_split:
if not filters['node_order'] or indices[i_child][i_answer] < self.children_split:
if filters['dependency_type']: if filters['dependency_type']:
separator = ' <' + deprel[i_child][i_answer] + ' ' separator = ' <' + deprel[i_child][i_answer] + ' '
else: else:
@ -505,15 +490,103 @@ class Tree(object):
if r_res: if r_res:
r_res_combined = self.merge_results(l_res_combined, r_res, None) r_res_combined = self.merge_results(l_res_combined, r_res, None)
# merged_results.extend(['(' + el + ')' for el in r_res_combined]) # merged_results.extend(['(' + el + ')' for el in r_res_combined])
merged_results.extend([el.put_in_bracelets() for el in r_res_combined]) results.extend([el.put_in_bracelets() for el in r_res_combined])
else: else:
merged_results.extend([el.put_in_bracelets() for el in l_res_combined]) results.extend([el.put_in_bracelets() for el in l_res_combined])
elif r_res: elif r_res:
r_res_combined = self.merge_results(new_results, r_res, None) r_res_combined = self.merge_results(new_results, r_res, None)
merged_results.extend([el.put_in_bracelets() for el in r_res_combined]) results.extend([el.put_in_bracelets() for el in r_res_combined])
return results
# merged_results.append('(' + l_res + new_result + r_res + ')') def create_merged_results(self, answers, separators, separator_switch):
new_answers = []
for answer_i, answer in enumerate(answers):
new_answer = copy(answer[0])
print(create_output_string_form(self))
for answer_part_i, answer_part in enumerate(answer[1:]):
new_answer.extend_answer(answer_part, separators[answer_part_i])
new_answer.put_in_bracelets(inplace=True)
new_answers.append(new_answer)
return new_answers
# def create_merged_results(self, new_child, new_answers, i_child, indices, deprel, filters):
def merge_results3(self, new_child, new_answers, i_child, indices, deprel, filters):
# l_res = []
# r_res = []
# results = []
separators = []
l_answers = []
r_answers = []
separator_switch = len(new_child) - 1
for i_answer, answer in enumerate(new_child):
if filters['node_order'] and indices[i_child][i_answer] < self.children_split:
if filters['dependency_type']:
separators.append(' <' + deprel[i_child][i_answer] + ' ')
else:
separators.append(' < ')
l_answers.append(answer)
# l_res = res
# return merged_results
# l_res += answer + separator
else:
if i_answer < separator_switch:
separator_switch = i_answer
if filters['dependency_type']:
separators.append(' >' + deprel[i_child][i_answer] + ' ')
else:
separators.append(' > ')
r_answers.append(answer)
# r_res += separator + answer
answers = []
if l_answers and r_answers:
answers = l_answers + [new_answers] + r_answers
# for l_answer in l_answers:
# for r_answer in r_answers:
# answers.append(l_answer + new_answers + r_answer)
elif l_answers:
answers = l_answers + [new_answers]
# for l_answer in l_answers:
# answers.append(l_answer + new_answers)
elif r_answers:
answers = [new_answers] + r_answers
# for r_answer in r_answers:
# answers.append(new_answers + r_answer)
else:
answers = [new_answers]
results = self.create_merged_results(answers, separators, separator_switch)
# if l_res:
# l_res_combined = self.merge_results(l_res, new_answers, None)
# if r_res:
# r_res_combined = self.merge_results(l_res_combined, r_res, None)
# # merged_results.extend(['(' + el + ')' for el in r_res_combined])
# results.extend([el.put_in_bracelets() for el in r_res_combined])
# else:
# results.extend([el.put_in_bracelets() for el in l_res_combined])
# elif r_res:
# r_res_combined = self.merge_results(new_answers, r_res, None)
# results.extend([el.put_in_bracelets() for el in r_res_combined])
return results
def create_output_children(self, children, new_results, filters, indices, deprel):
# if create_output_string_form(self) == 'Dogodek':
# print('HERE!@@!')
# if create_output_string_form(self) == 'utišal':
# print('HERE!@@!')
# if len(new_results) > 1:
# print('HERE')
merged_results = []
for i_child, child in enumerate(children):
if filters['node_order']:
new_child = child
else:
new_child = sorted(child, key=lambda x: x[0].key)
#################
merged_results.extend(self.merge_results2(new_child, new_results, i_child, indices, deprel, filters))
return merged_results return merged_results
@staticmethod @staticmethod

View File

@ -457,33 +457,42 @@ def main():
for tree_i, subtrees in enumerate(all_subtrees): for tree_i, subtrees in enumerate(all_subtrees):
for query_results in subtrees: for query_results in subtrees:
for r in query_results: for r in query_results:
if filters['node_order']:
key = r.key + r.order
else:
key = r.key
# if r == '(" < , < je < velik) < tem': # if r == '(" < , < je < velik) < tem':
# print(tree_i) # print(tree_i)
# if r in result_dict: # if r in result_dict:
# result_dict[r] += 1 # result_dict[r] += 1
# else: # else:
# result_dict[r] = 1 # result_dict[r] = 1
if r.key in result_dict: if key in result_dict:
result_dict[r.key]['number'] += 1 result_dict[key]['number'] += 1
else: else:
result_dict[r.key] = {'object': r, 'number': 1} result_dict[key] = {'object': r, 'number': 1}
# 3.65 s (1 core) # 3.65 s (1 core)
else: else:
# for tree_i, tree in enumerate(all_trees[-5:]): # for tree_i, tree in enumerate(all_trees[-5:]):
for tree_i, tree in enumerate(all_trees): # for tree_i, tree in enumerate(all_trees):
for tree_i, tree in enumerate(all_trees[1:]):
# text = Če pa ostane odrasel otrok doma, se starši le težko sprijaznijo s tem, da je "velik", otrok pa ima ves čas občutek, da se njegovi starši po nepotrebnem vtikajo v njegovo življenje. # text = Če pa ostane odrasel otrok doma, se starši le težko sprijaznijo s tem, da je "velik", otrok pa ima ves čas občutek, da se njegovi starši po nepotrebnem vtikajo v njegovo življenje.
# for tree_i, tree in enumerate(all_trees[5170:]): # for tree_i, tree in enumerate(all_trees[5170:]):
# for tree in all_trees: # for tree in all_trees:
subtrees = tree_calculations((tree, query_tree, create_output_string_functs, filters)) subtrees = tree_calculations((tree, query_tree, create_output_string_functs, filters))
for query_results in subtrees: for query_results in subtrees:
for r in query_results: for r in query_results:
if filters['node_order']:
key = r.key + r.order
else:
key = r.key
# if r == '(" < , < je < velik) < tem': # if r == '(" < , < je < velik) < tem':
# print(tree_i) # print(tree_i)
if r.key in result_dict: if key in result_dict:
result_dict[r.key]['number'] += 1 result_dict[key]['number'] += 1
else: else:
result_dict[r.key] = {'object': r, 'number': 1} result_dict[key] = {'object': r, 'number': 1}
print("Execution time:") print("Execution time:")
print("--- %s seconds ---" % (time.time() - start_exe_time)) print("--- %s seconds ---" % (time.time() - start_exe_time))
@ -516,23 +525,32 @@ def main():
len_words = tree_size_range[-1] len_words = tree_size_range[-1]
else: else:
len_words = int(len(config.get('settings', 'query').split(" "))/2 + 1) len_words = int(len(config.get('settings', 'query').split(" "))/2 + 1)
header = ["Structure"] + ["Word #" + str(i) + "-" + node_type for i in range(1, len_words + 1) for node_type in node_types] + ['Number of occurences'] header = ["Structure"] + ["Node #" + str(i) + "-" + node_type for i in range(1, len_words + 1) for node_type in node_types] + ['Absolute frequency']
if config.get('settings', 'relative_number'):
header += ['Relative frequency'] header += ['Relative frequency']
if config.get('settings', 'nodes_number'): if filters['node_order']:
header += ['Nodes number'] header += ['Order']
# header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Number of occurences'] if config.getboolean('settings', 'nodes_number'):
header += ['Number of nodes']
if config.getboolean('settings', 'print_root'):
header += ['Root node']
# header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Absolute frequency']
writer.writerow(header) writer.writerow(header)
if config.getint('settings', 'lines_threshold'):
sorted_list = sorted_list[:config.getint('settings', 'lines_threshold')]
# body # body
for k, v in sorted_list: for k, v in sorted_list:
words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))] words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))]
# words_only = printable_answers(k) # words_only = printable_answers(k)
row = [k] + words_only + [str(v['number'])] row = [v['object'].key] + words_only + [str(v['number'])]
if config.get('settings', 'relative_number'):
row += ['%.4f' % (v['number'] * 1000000.0 / corpus_size)] row += ['%.4f' % (v['number'] * 1000000.0 / corpus_size)]
if filters['node_order']:
row += [v['object'].order]
if config.get('settings', 'nodes_number'): if config.get('settings', 'nodes_number'):
row += ['%d' % len(v['object'].array)] row += ['%d' % len(v['object'].array)]
if config.get('settings', 'print_root'):
row += [v['object'].root]
writer.writerow(row) writer.writerow(row)