Added analyze_type filter

This commit is contained in:
Luka 2019-08-29 09:23:22 +02:00
parent ba9bdeb8b9
commit b444df2d54
3 changed files with 45 additions and 32 deletions

48
Tree.py
View File

@ -110,7 +110,7 @@ class Tree(object):
if not is_permanent: if not is_permanent:
partial_subtrees[result_index].append([]) partial_subtrees[result_index].append([])
def get_all_query_indices(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children): def get_all_query_indices(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children, create_output_string):
partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)] partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)]
completed_subtrees = [[] for i in range(completed_subtrees_size)] completed_subtrees = [[] for i in range(completed_subtrees_size)]
@ -123,7 +123,7 @@ class Tree(object):
child, child_queries, child_queries_metadata = next(children_queries_generator) child, child_queries, child_queries_metadata = next(children_queries_generator)
while child: while child:
# obtain children results # obtain children results
new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries) new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string)
self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices, self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices,
partial_results_dict, partial_subtrees) partial_results_dict, partial_subtrees)
@ -135,7 +135,7 @@ class Tree(object):
return partial_subtrees, completed_subtrees return partial_subtrees, completed_subtrees
def get_subtrees(self, permanent_query_trees, temporary_query_trees): def get_subtrees(self, permanent_query_trees, temporary_query_trees, create_output_string):
""" """
:param permanent_query_trees: :param permanent_query_trees:
@ -165,8 +165,8 @@ class Tree(object):
if 'r_children' in temporary_query_tree: if 'r_children' in temporary_query_tree:
r_all_query_indices.append((temporary_query_tree['r_children'], False)) r_all_query_indices.append((temporary_query_tree['r_children'], False))
l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, l_all_query_indices, self.l_children) l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, l_all_query_indices, self.l_children, create_output_string)
r_partial_subtrees, r_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, r_all_query_indices, self.r_children) r_partial_subtrees, r_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, r_all_query_indices, self.r_children, create_output_string)
@ -180,43 +180,43 @@ class Tree(object):
if i < len(active_permanent_query_trees): if i < len(active_permanent_query_trees):
if ('l_children' in active_permanent_query_trees[i] and 'r_children' in active_permanent_query_trees[i]): if ('l_children' in active_permanent_query_trees[i] and 'r_children' in active_permanent_query_trees[i]):
merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left],
[[self.create_output_string()]]) [[create_output_string(self)]])
merged_partial_subtrees.append( merged_partial_subtrees.append(
self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right])) self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right]))
# merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]])) # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]]))
i_left += 1 i_left += 1
i_right += 1 i_right += 1
elif 'l_children' in active_permanent_query_trees[i]: elif 'l_children' in active_permanent_query_trees[i]:
merged_partial_subtrees.append( merged_partial_subtrees.append(
self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]])) self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]]))
i_left += 1 i_left += 1
elif 'r_children' in active_permanent_query_trees[i]: elif 'r_children' in active_permanent_query_trees[i]:
merged_partial_subtrees.append( merged_partial_subtrees.append(
self.merge_results([[self.create_output_string()]], r_partial_subtrees[i_right])) self.merge_results([[create_output_string(self)]], r_partial_subtrees[i_right]))
i_right += 1 i_right += 1
else: else:
merged_partial_subtrees.append([[self.create_output_string()]]) merged_partial_subtrees.append([[create_output_string(self)]])
else: else:
if ('l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)] and 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]): if ('l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)] and 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]):
merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]]) merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]])
merged_partial_subtrees.append(self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right])) merged_partial_subtrees.append(self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right]))
# merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]])) # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]]))
i_left += 1 i_left += 1
i_right += 1 i_right += 1
elif 'l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]: elif 'l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]:
merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]])) merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]]))
i_left += 1 i_left += 1
elif 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]: elif 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]:
merged_partial_subtrees.append(self.merge_results([[self.create_output_string()]], r_partial_subtrees[i_right])) merged_partial_subtrees.append(self.merge_results([[create_output_string(self)]], r_partial_subtrees[i_right]))
i_right += 1 i_right += 1
else: else:
merged_partial_subtrees.append([[self.create_output_string()]]) merged_partial_subtrees.append([[create_output_string(self)]])
# if r_partial_subtrees[i]: # if r_partial_subtrees[i]:
# merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]])) # merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]]))
i += 1 i += 1
completed_subtrees = l_completed_subtrees completed_subtrees = l_completed_subtrees
@ -233,5 +233,17 @@ class Tree(object):
merged_results.append(old_result + new_result) merged_results.append(old_result + new_result)
return merged_results return merged_results
def create_output_string(self): def create_output_string_form(tree):
return self.form.get_value() return tree.form.get_value()
def create_output_string_deprel(tree):
return tree.deprel.get_value()
def create_output_string_lemma(tree):
return tree.lemma.get_value()
def create_output_string_upos(tree):
return tree.upos.get_value()
def create_output_string_xpos(tree):
return tree.xpos.get_value()

View File

@ -1,12 +0,0 @@
[settings]
input = data/sl_ssj-ud_v2.4.conllu
output = results/out.tsv
internal_saves = ./internal_saves
ngrams = 0
; ngrams = 2
; analyze_type options: 'lemma', 'word'
; query = _ > _
; query = _ > (_ < _) > _
; query = _ < (_ > _) < _ > _
; query = _ < _ > _
query = _ < _

View File

@ -7,7 +7,7 @@ import re
import pyconll import pyconll
from Tree import Tree from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos
def decode_query(orig_query): def decode_query(orig_query):
@ -116,6 +116,7 @@ def main():
config = configparser.ConfigParser() config = configparser.ConfigParser()
config.read('config.ini') config.read('config.ini')
# create queries
ngrams = 0 ngrams = 0
if config.getint('settings', 'ngrams') == 2: if config.getint('settings', 'ngrams') == 2:
ngrams = 2 ngrams = 2
@ -126,6 +127,18 @@ def main():
(all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict) = create_trees(config) (all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict) = create_trees(config)
# set filters
assert config.get('settings', 'analyze_type') in ['deprel', 'lemma', 'upos', 'upos', 'xpos'], '"analyze_type" is not set up correctly'
if config.get('settings', 'analyze_type') == 'deprel':
create_output_string_funct = create_output_string_deprel
elif config.get('settings', 'analyze_type') == 'lemma':
create_output_string_funct = create_output_string_lemma
elif config.get('settings', 'analyze_type') == 'upos':
create_output_string_funct = create_output_string_upos
elif config.get('settings', 'analyze_type') == 'xpos':
create_output_string_funct = create_output_string_xpos
else:
create_output_string_funct = create_output_string_form
result_dict = {} result_dict = {}
@ -134,7 +147,7 @@ def main():
# original # original
# r_children = tree.r_children[:1] + tree.r_children[3:4] # r_children = tree.r_children[:1] + tree.r_children[3:4]
# tree.r_children = tree.r_children[:1] + tree.r_children[2:4] # tree.r_children = tree.r_children[:1] + tree.r_children[2:4]
_, subtrees = tree.get_subtrees(query_tree, []) _, subtrees = tree.get_subtrees(query_tree, [], create_output_string_funct)
for query_results in subtrees: for query_results in subtrees:
for result in query_results: for result in query_results:
if ngrams: if ngrams: