Modified names in config.ini + names in results + some other changes.
This commit is contained in:
parent
d0ebf1ba05
commit
da51882d8a
41
Result.py
41
Result.py
|
@ -1,4 +1,5 @@
|
|||
import copy
|
||||
import string
|
||||
|
||||
|
||||
class Result(object):
|
||||
|
@ -11,10 +12,11 @@ class Result(object):
|
|||
self.key = self.array[0][0]
|
||||
# self.array = [[output_string]]
|
||||
self.order_key = str([architecture_order])
|
||||
|
||||
self.order = [architecture_order]
|
||||
# order with original numbers in sentences
|
||||
# self.order = str([architecture_order])
|
||||
# order with numbers from 0 to n of n-gram
|
||||
self.root = ''
|
||||
self.final_order = ''
|
||||
self.separators = []
|
||||
|
||||
|
@ -24,6 +26,7 @@ class Result(object):
|
|||
def add(self, string, architecture_order, separator, is_left):
|
||||
if is_left:
|
||||
self.array = [string] + self.array
|
||||
self.order = [architecture_order] + self.order
|
||||
# self.order = [architecture_order] + self.order
|
||||
self.separators = [separator] + self.separators
|
||||
self.key = string + ' ' + separator + ' ' + self.key
|
||||
|
@ -31,6 +34,7 @@ class Result(object):
|
|||
|
||||
else:
|
||||
self.array += [string]
|
||||
self.order += [architecture_order]
|
||||
# self.order += [architecture_order]
|
||||
self.separators += [separator]
|
||||
|
||||
|
@ -49,6 +53,9 @@ class Result(object):
|
|||
self_copy.order_key = separator + self_copy.order_key
|
||||
return self_copy
|
||||
|
||||
# def merge_results2(self):
|
||||
|
||||
|
||||
def merge_results(self, right_t, separator, left=True):
|
||||
left_tree = copy.copy(self)
|
||||
right_tree = copy.copy(right_t)
|
||||
|
@ -59,6 +66,7 @@ class Result(object):
|
|||
left_tree.key = left_tree.key + right_tree.key + separator
|
||||
left_tree.order_key = left_tree.order_key + right_tree.order_key + separator
|
||||
left_tree.array = left_tree.array + right_tree.array
|
||||
left_tree.order = left_tree.order + right_tree.order
|
||||
# left_tree.order = str([architecture_order])
|
||||
left_tree.separators = left_tree.separators + right_tree.separators + [separator]
|
||||
else:
|
||||
|
@ -66,6 +74,7 @@ class Result(object):
|
|||
left_tree.key = left_tree.key + separator + right_tree.key
|
||||
left_tree.order_key = left_tree.order_key + separator + right_tree.order_key
|
||||
left_tree.array = left_tree.array + right_tree.array
|
||||
left_tree.order = left_tree.order + right_tree.order
|
||||
# left_tree.order = str([architecture_order])
|
||||
left_tree.separators = left_tree.separators + [separator] + right_tree.separators
|
||||
else:
|
||||
|
@ -73,12 +82,24 @@ class Result(object):
|
|||
left_tree.key = left_tree.key + right_tree.key
|
||||
left_tree.order_key = left_tree.order_key + right_tree.order_key
|
||||
left_tree.array = left_tree.array + right_tree.array
|
||||
left_tree.order = left_tree.order + right_tree.order
|
||||
# left_tree.order = str([architecture_order])
|
||||
left_tree.separators = left_tree.separators + right_tree.separators
|
||||
|
||||
return left_tree
|
||||
|
||||
def put_in_bracelets(self):
|
||||
def extend_answer(self, other_answer, separator):
|
||||
self.array.extend(other_answer.array)
|
||||
self.order.extend(other_answer.order)
|
||||
self.key += separator + other_answer.key
|
||||
self.order_key += separator + other_answer.order_key
|
||||
self.separators.extend(separator)
|
||||
|
||||
def put_in_bracelets(self, inplace=False):
|
||||
if inplace:
|
||||
self.key = ('(' + self.key + ')')
|
||||
self.order_key = ('(' + self.order_key + ')')
|
||||
return
|
||||
result = copy.copy(self)
|
||||
result.key = ('(' + result.key + ')')
|
||||
result.order_key = ('(' + result.order_key + ')')
|
||||
|
@ -87,6 +108,22 @@ class Result(object):
|
|||
def finalize_result(self):
|
||||
result = copy.copy(self)
|
||||
result.key = result.key[1:-1]
|
||||
result.set_root()
|
||||
|
||||
# create order letters
|
||||
order_letters = [''] * len(result.order)
|
||||
for i in range(len(result.order)):
|
||||
ind = result.order.index(min(result.order))
|
||||
result.order[ind] = 10000
|
||||
order_letters[ind] = string.ascii_uppercase[i]
|
||||
result.order = ''.join(order_letters)
|
||||
# result.order_key = result.order_key[1:-1]
|
||||
# TODO When tree is finalized create relative word order (alphabet)!
|
||||
return result
|
||||
|
||||
def set_root(self):
|
||||
if len(self.array[0]) > 1:
|
||||
self.root = '{' + ','.join(self.array[0]) + '}'
|
||||
else:
|
||||
# output_string = create_output_strings[0](node)
|
||||
self.root = self.array[0][0]
|
147
Tree.py
147
Tree.py
|
@ -438,7 +438,8 @@ class Tree(object):
|
|||
merged_results = []
|
||||
for left_part in left_parts:
|
||||
for right_part in right_parts:
|
||||
merged_results.append(left_part.merge_results(right_part, separator))
|
||||
merged_results.append(left_part.merge_results(right_part, separator, left))
|
||||
# merged_results.append(left_part.merge_results(right_part, separator))
|
||||
# if separator:
|
||||
# if left:
|
||||
# merged_results.append(left_part + right_part + separator)
|
||||
|
@ -465,9 +466,114 @@ class Tree(object):
|
|||
merged_indices.append(new_indices)
|
||||
return merged_results, merged_indices
|
||||
|
||||
def merge_results2(self, new_child, new_results, i_child, indices, deprel, filters):
|
||||
l_res = []
|
||||
r_res = []
|
||||
results = []
|
||||
for i_answer, answer in enumerate(new_child):
|
||||
if filters['node_order'] and indices[i_child][i_answer] < self.children_split:
|
||||
if filters['dependency_type']:
|
||||
separator = ' <' + deprel[i_child][i_answer] + ' '
|
||||
else:
|
||||
separator = ' < '
|
||||
l_res = self.merge_results(l_res, answer, separator, left=True)
|
||||
# l_res += answer + separator
|
||||
else:
|
||||
if filters['dependency_type']:
|
||||
separator = ' >' + deprel[i_child][i_answer] + ' '
|
||||
else:
|
||||
separator = ' > '
|
||||
r_res = self.merge_results(r_res, answer, separator, left=False)
|
||||
# r_res += separator + answer
|
||||
if l_res:
|
||||
l_res_combined = self.merge_results(l_res, new_results, None)
|
||||
if r_res:
|
||||
r_res_combined = self.merge_results(l_res_combined, r_res, None)
|
||||
# merged_results.extend(['(' + el + ')' for el in r_res_combined])
|
||||
results.extend([el.put_in_bracelets() for el in r_res_combined])
|
||||
else:
|
||||
results.extend([el.put_in_bracelets() for el in l_res_combined])
|
||||
elif r_res:
|
||||
r_res_combined = self.merge_results(new_results, r_res, None)
|
||||
results.extend([el.put_in_bracelets() for el in r_res_combined])
|
||||
|
||||
return results
|
||||
|
||||
def create_merged_results(self, answers, separators, separator_switch):
|
||||
new_answers = []
|
||||
for answer_i, answer in enumerate(answers):
|
||||
new_answer = copy(answer[0])
|
||||
print(create_output_string_form(self))
|
||||
for answer_part_i, answer_part in enumerate(answer[1:]):
|
||||
new_answer.extend_answer(answer_part, separators[answer_part_i])
|
||||
new_answer.put_in_bracelets(inplace=True)
|
||||
new_answers.append(new_answer)
|
||||
return new_answers
|
||||
# def create_merged_results(self, new_child, new_answers, i_child, indices, deprel, filters):
|
||||
|
||||
def merge_results3(self, new_child, new_answers, i_child, indices, deprel, filters):
|
||||
# l_res = []
|
||||
# r_res = []
|
||||
# results = []
|
||||
separators = []
|
||||
l_answers = []
|
||||
r_answers = []
|
||||
separator_switch = len(new_child) - 1
|
||||
for i_answer, answer in enumerate(new_child):
|
||||
if filters['node_order'] and indices[i_child][i_answer] < self.children_split:
|
||||
if filters['dependency_type']:
|
||||
separators.append(' <' + deprel[i_child][i_answer] + ' ')
|
||||
else:
|
||||
separators.append(' < ')
|
||||
l_answers.append(answer)
|
||||
# l_res = res
|
||||
# return merged_results
|
||||
# l_res += answer + separator
|
||||
else:
|
||||
if i_answer < separator_switch:
|
||||
separator_switch = i_answer
|
||||
if filters['dependency_type']:
|
||||
separators.append(' >' + deprel[i_child][i_answer] + ' ')
|
||||
else:
|
||||
separators.append(' > ')
|
||||
r_answers.append(answer)
|
||||
# r_res += separator + answer
|
||||
|
||||
answers = []
|
||||
if l_answers and r_answers:
|
||||
answers = l_answers + [new_answers] + r_answers
|
||||
# for l_answer in l_answers:
|
||||
# for r_answer in r_answers:
|
||||
# answers.append(l_answer + new_answers + r_answer)
|
||||
elif l_answers:
|
||||
answers = l_answers + [new_answers]
|
||||
# for l_answer in l_answers:
|
||||
# answers.append(l_answer + new_answers)
|
||||
elif r_answers:
|
||||
answers = [new_answers] + r_answers
|
||||
# for r_answer in r_answers:
|
||||
# answers.append(new_answers + r_answer)
|
||||
else:
|
||||
answers = [new_answers]
|
||||
|
||||
results = self.create_merged_results(answers, separators, separator_switch)
|
||||
|
||||
# if l_res:
|
||||
# l_res_combined = self.merge_results(l_res, new_answers, None)
|
||||
# if r_res:
|
||||
# r_res_combined = self.merge_results(l_res_combined, r_res, None)
|
||||
# # merged_results.extend(['(' + el + ')' for el in r_res_combined])
|
||||
# results.extend([el.put_in_bracelets() for el in r_res_combined])
|
||||
# else:
|
||||
# results.extend([el.put_in_bracelets() for el in l_res_combined])
|
||||
# elif r_res:
|
||||
# r_res_combined = self.merge_results(new_answers, r_res, None)
|
||||
# results.extend([el.put_in_bracelets() for el in r_res_combined])
|
||||
|
||||
return results
|
||||
|
||||
def create_output_children(self, children, new_results, filters, indices, deprel):
|
||||
# if create_output_string_form(self) == 'prijel':
|
||||
# if create_output_string_form(self) == 'Dogodek':
|
||||
# print('HERE!@@!')
|
||||
# if create_output_string_form(self) == 'utišal':
|
||||
# print('HERE!@@!')
|
||||
|
@ -475,45 +581,12 @@ class Tree(object):
|
|||
# print('HERE')
|
||||
merged_results = []
|
||||
for i_child, child in enumerate(children):
|
||||
l_res = []
|
||||
r_res = []
|
||||
if filters['node_order']:
|
||||
new_child = child
|
||||
else:
|
||||
# a = [['tistega', 'dne'], ['sem', 'bil']]
|
||||
# b = sorted(a)
|
||||
# TODO CHECK IF THIS WORKS FOR CERTIAN
|
||||
new_child = sorted(child, key=lambda x: x[0].key)
|
||||
for i_answer, answer in enumerate(new_child):
|
||||
# res += '(' + el + ') < '
|
||||
if not filters['node_order'] or indices[i_child][i_answer] < self.children_split:
|
||||
if filters['dependency_type']:
|
||||
separator = ' <' + deprel[i_child][i_answer] + ' '
|
||||
else:
|
||||
separator = ' < '
|
||||
l_res = self.merge_results(l_res, answer, separator, left=True)
|
||||
# l_res += answer + separator
|
||||
else:
|
||||
if filters['dependency_type']:
|
||||
separator = ' >' + deprel[i_child][i_answer] + ' '
|
||||
else:
|
||||
separator = ' > '
|
||||
r_res = self.merge_results(r_res, answer, separator, left=False)
|
||||
# r_res += separator + answer
|
||||
if l_res:
|
||||
l_res_combined = self.merge_results(l_res, new_results, None)
|
||||
if r_res:
|
||||
r_res_combined = self.merge_results(l_res_combined, r_res, None)
|
||||
# merged_results.extend(['(' + el + ')' for el in r_res_combined])
|
||||
merged_results.extend([el.put_in_bracelets() for el in r_res_combined])
|
||||
else:
|
||||
merged_results.extend([el.put_in_bracelets() for el in l_res_combined])
|
||||
elif r_res:
|
||||
r_res_combined = self.merge_results(new_results, r_res, None)
|
||||
merged_results.extend([el.put_in_bracelets() for el in r_res_combined])
|
||||
|
||||
|
||||
# merged_results.append('(' + l_res + new_result + r_res + ')')
|
||||
#################
|
||||
merged_results.extend(self.merge_results2(new_child, new_results, i_child, indices, deprel, filters))
|
||||
return merged_results
|
||||
|
||||
@staticmethod
|
||||
|
|
|
@ -457,33 +457,42 @@ def main():
|
|||
for tree_i, subtrees in enumerate(all_subtrees):
|
||||
for query_results in subtrees:
|
||||
for r in query_results:
|
||||
if filters['node_order']:
|
||||
key = r.key + r.order
|
||||
else:
|
||||
key = r.key
|
||||
# if r == '(" < , < je < velik) < tem':
|
||||
# print(tree_i)
|
||||
# if r in result_dict:
|
||||
# result_dict[r] += 1
|
||||
# else:
|
||||
# result_dict[r] = 1
|
||||
if r.key in result_dict:
|
||||
result_dict[r.key]['number'] += 1
|
||||
if key in result_dict:
|
||||
result_dict[key]['number'] += 1
|
||||
else:
|
||||
result_dict[r.key] = {'object': r, 'number': 1}
|
||||
result_dict[key] = {'object': r, 'number': 1}
|
||||
|
||||
# 3.65 s (1 core)
|
||||
else:
|
||||
# for tree_i, tree in enumerate(all_trees[-5:]):
|
||||
for tree_i, tree in enumerate(all_trees):
|
||||
# for tree_i, tree in enumerate(all_trees):
|
||||
for tree_i, tree in enumerate(all_trees[1:]):
|
||||
# text = Če pa ostane odrasel otrok doma, se starši le težko sprijaznijo s tem, da je "velik", otrok pa ima ves čas občutek, da se njegovi starši po nepotrebnem vtikajo v njegovo življenje.
|
||||
# for tree_i, tree in enumerate(all_trees[5170:]):
|
||||
# for tree in all_trees:
|
||||
subtrees = tree_calculations((tree, query_tree, create_output_string_functs, filters))
|
||||
for query_results in subtrees:
|
||||
for r in query_results:
|
||||
if filters['node_order']:
|
||||
key = r.key + r.order
|
||||
else:
|
||||
key = r.key
|
||||
# if r == '(" < , < je < velik) < tem':
|
||||
# print(tree_i)
|
||||
if r.key in result_dict:
|
||||
result_dict[r.key]['number'] += 1
|
||||
if key in result_dict:
|
||||
result_dict[key]['number'] += 1
|
||||
else:
|
||||
result_dict[r.key] = {'object': r, 'number': 1}
|
||||
result_dict[key] = {'object': r, 'number': 1}
|
||||
|
||||
print("Execution time:")
|
||||
print("--- %s seconds ---" % (time.time() - start_exe_time))
|
||||
|
@ -516,23 +525,32 @@ def main():
|
|||
len_words = tree_size_range[-1]
|
||||
else:
|
||||
len_words = int(len(config.get('settings', 'query').split(" "))/2 + 1)
|
||||
header = ["Structure"] + ["Word #" + str(i) + "-" + node_type for i in range(1, len_words + 1) for node_type in node_types] + ['Number of occurences']
|
||||
if config.get('settings', 'relative_number'):
|
||||
header += ['Relative frequency']
|
||||
if config.get('settings', 'nodes_number'):
|
||||
header += ['Nodes number']
|
||||
# header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Number of occurences']
|
||||
header = ["Structure"] + ["Node #" + str(i) + "-" + node_type for i in range(1, len_words + 1) for node_type in node_types] + ['Absolute frequency']
|
||||
header += ['Relative frequency']
|
||||
if filters['node_order']:
|
||||
header += ['Order']
|
||||
if config.getboolean('settings', 'nodes_number'):
|
||||
header += ['Number of nodes']
|
||||
if config.getboolean('settings', 'print_root'):
|
||||
header += ['Root node']
|
||||
# header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Absolute frequency']
|
||||
writer.writerow(header)
|
||||
|
||||
if config.getint('settings', 'lines_threshold'):
|
||||
sorted_list = sorted_list[:config.getint('settings', 'lines_threshold')]
|
||||
|
||||
# body
|
||||
for k, v in sorted_list:
|
||||
words_only = [word_att for word in v['object'].array for word_att in word] + ['' for i in range((tree_size_range[-1] - len(v['object'].array)) * len(v['object'].array[0]))]
|
||||
# words_only = printable_answers(k)
|
||||
row = [k] + words_only + [str(v['number'])]
|
||||
if config.get('settings', 'relative_number'):
|
||||
row += ['%.4f' % (v['number'] * 1000000.0 / corpus_size)]
|
||||
row = [v['object'].key] + words_only + [str(v['number'])]
|
||||
row += ['%.4f' % (v['number'] * 1000000.0 / corpus_size)]
|
||||
if filters['node_order']:
|
||||
row += [v['object'].order]
|
||||
if config.get('settings', 'nodes_number'):
|
||||
row += ['%d' % len(v['object'].array)]
|
||||
if config.get('settings', 'print_root'):
|
||||
row += [v['object'].root]
|
||||
|
||||
writer.writerow(row)
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user