Added analyze_type filter
This commit is contained in:
parent
ba9bdeb8b9
commit
b444df2d54
48
Tree.py
48
Tree.py
|
@ -110,7 +110,7 @@ class Tree(object):
|
||||||
if not is_permanent:
|
if not is_permanent:
|
||||||
partial_subtrees[result_index].append([])
|
partial_subtrees[result_index].append([])
|
||||||
|
|
||||||
def get_all_query_indices(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children):
|
def get_all_query_indices(self, temporary_query_trees_size, completed_subtrees_size, permanent_query_trees, l_all_query_indices, children, create_output_string):
|
||||||
partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)]
|
partial_subtrees = [[] for i in range(completed_subtrees_size + temporary_query_trees_size)]
|
||||||
completed_subtrees = [[] for i in range(completed_subtrees_size)]
|
completed_subtrees = [[] for i in range(completed_subtrees_size)]
|
||||||
|
|
||||||
|
@ -123,7 +123,7 @@ class Tree(object):
|
||||||
child, child_queries, child_queries_metadata = next(children_queries_generator)
|
child, child_queries, child_queries_metadata = next(children_queries_generator)
|
||||||
while child:
|
while child:
|
||||||
# obtain children results
|
# obtain children results
|
||||||
new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries)
|
new_partial_subtrees, new_completed_subtrees = child.get_subtrees(permanent_query_trees, child_queries, create_output_string)
|
||||||
|
|
||||||
self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices,
|
self.group_results(new_partial_subtrees, child_queries_metadata, l_all_query_indices,
|
||||||
partial_results_dict, partial_subtrees)
|
partial_results_dict, partial_subtrees)
|
||||||
|
@ -135,7 +135,7 @@ class Tree(object):
|
||||||
|
|
||||||
return partial_subtrees, completed_subtrees
|
return partial_subtrees, completed_subtrees
|
||||||
|
|
||||||
def get_subtrees(self, permanent_query_trees, temporary_query_trees):
|
def get_subtrees(self, permanent_query_trees, temporary_query_trees, create_output_string):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
:param permanent_query_trees:
|
:param permanent_query_trees:
|
||||||
|
@ -165,8 +165,8 @@ class Tree(object):
|
||||||
if 'r_children' in temporary_query_tree:
|
if 'r_children' in temporary_query_tree:
|
||||||
r_all_query_indices.append((temporary_query_tree['r_children'], False))
|
r_all_query_indices.append((temporary_query_tree['r_children'], False))
|
||||||
|
|
||||||
l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, l_all_query_indices, self.l_children)
|
l_partial_subtrees, l_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, l_all_query_indices, self.l_children, create_output_string)
|
||||||
r_partial_subtrees, r_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, r_all_query_indices, self.r_children)
|
r_partial_subtrees, r_completed_subtrees = self.get_all_query_indices(len(temporary_query_trees), len(permanent_query_trees), permanent_query_trees, r_all_query_indices, self.r_children, create_output_string)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -180,43 +180,43 @@ class Tree(object):
|
||||||
if i < len(active_permanent_query_trees):
|
if i < len(active_permanent_query_trees):
|
||||||
if ('l_children' in active_permanent_query_trees[i] and 'r_children' in active_permanent_query_trees[i]):
|
if ('l_children' in active_permanent_query_trees[i] and 'r_children' in active_permanent_query_trees[i]):
|
||||||
merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left],
|
merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left],
|
||||||
[[self.create_output_string()]])
|
[[create_output_string(self)]])
|
||||||
merged_partial_subtrees.append(
|
merged_partial_subtrees.append(
|
||||||
self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right]))
|
self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right]))
|
||||||
# merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]]))
|
# merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]]))
|
||||||
i_left += 1
|
i_left += 1
|
||||||
i_right += 1
|
i_right += 1
|
||||||
|
|
||||||
elif 'l_children' in active_permanent_query_trees[i]:
|
elif 'l_children' in active_permanent_query_trees[i]:
|
||||||
merged_partial_subtrees.append(
|
merged_partial_subtrees.append(
|
||||||
self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]]))
|
self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]]))
|
||||||
i_left += 1
|
i_left += 1
|
||||||
|
|
||||||
elif 'r_children' in active_permanent_query_trees[i]:
|
elif 'r_children' in active_permanent_query_trees[i]:
|
||||||
merged_partial_subtrees.append(
|
merged_partial_subtrees.append(
|
||||||
self.merge_results([[self.create_output_string()]], r_partial_subtrees[i_right]))
|
self.merge_results([[create_output_string(self)]], r_partial_subtrees[i_right]))
|
||||||
i_right += 1
|
i_right += 1
|
||||||
else:
|
else:
|
||||||
merged_partial_subtrees.append([[self.create_output_string()]])
|
merged_partial_subtrees.append([[create_output_string(self)]])
|
||||||
else:
|
else:
|
||||||
if ('l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)] and 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]):
|
if ('l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)] and 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]):
|
||||||
merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]])
|
merged_partial_subtree = self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]])
|
||||||
merged_partial_subtrees.append(self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right]))
|
merged_partial_subtrees.append(self.merge_results(merged_partial_subtree, r_partial_subtrees[i_right]))
|
||||||
# merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]]))
|
# merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]]))
|
||||||
i_left += 1
|
i_left += 1
|
||||||
i_right += 1
|
i_right += 1
|
||||||
|
|
||||||
elif 'l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]:
|
elif 'l_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]:
|
||||||
merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i_left], [[self.create_output_string()]]))
|
merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i_left], [[create_output_string(self)]]))
|
||||||
i_left += 1
|
i_left += 1
|
||||||
|
|
||||||
elif 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]:
|
elif 'r_children' in active_temporary_query_trees[i - len(active_permanent_query_trees)]:
|
||||||
merged_partial_subtrees.append(self.merge_results([[self.create_output_string()]], r_partial_subtrees[i_right]))
|
merged_partial_subtrees.append(self.merge_results([[create_output_string(self)]], r_partial_subtrees[i_right]))
|
||||||
i_right += 1
|
i_right += 1
|
||||||
else:
|
else:
|
||||||
merged_partial_subtrees.append([[self.create_output_string()]])
|
merged_partial_subtrees.append([[create_output_string(self)]])
|
||||||
# if r_partial_subtrees[i]:
|
# if r_partial_subtrees[i]:
|
||||||
# merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[self.create_output_string()]]))
|
# merged_partial_subtrees.append(self.merge_results(l_partial_subtrees[i], [[create_output_string(self)]]))
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
completed_subtrees = l_completed_subtrees
|
completed_subtrees = l_completed_subtrees
|
||||||
|
@ -233,5 +233,17 @@ class Tree(object):
|
||||||
merged_results.append(old_result + new_result)
|
merged_results.append(old_result + new_result)
|
||||||
return merged_results
|
return merged_results
|
||||||
|
|
||||||
def create_output_string(self):
|
def create_output_string_form(tree):
|
||||||
return self.form.get_value()
|
return tree.form.get_value()
|
||||||
|
|
||||||
|
def create_output_string_deprel(tree):
|
||||||
|
return tree.deprel.get_value()
|
||||||
|
|
||||||
|
def create_output_string_lemma(tree):
|
||||||
|
return tree.lemma.get_value()
|
||||||
|
|
||||||
|
def create_output_string_upos(tree):
|
||||||
|
return tree.upos.get_value()
|
||||||
|
|
||||||
|
def create_output_string_xpos(tree):
|
||||||
|
return tree.xpos.get_value()
|
||||||
|
|
12
config.ini
12
config.ini
|
@ -1,12 +0,0 @@
|
||||||
[settings]
|
|
||||||
input = data/sl_ssj-ud_v2.4.conllu
|
|
||||||
output = results/out.tsv
|
|
||||||
internal_saves = ./internal_saves
|
|
||||||
ngrams = 0
|
|
||||||
; ngrams = 2
|
|
||||||
; analyze_type options: 'lemma', 'word'
|
|
||||||
; query = _ > _
|
|
||||||
; query = _ > (_ < _) > _
|
|
||||||
; query = _ < (_ > _) < _ > _
|
|
||||||
; query = _ < _ > _
|
|
||||||
query = _ < _
|
|
|
@ -7,7 +7,7 @@ import re
|
||||||
|
|
||||||
import pyconll
|
import pyconll
|
||||||
|
|
||||||
from Tree import Tree
|
from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos
|
||||||
|
|
||||||
|
|
||||||
def decode_query(orig_query):
|
def decode_query(orig_query):
|
||||||
|
@ -116,6 +116,7 @@ def main():
|
||||||
config = configparser.ConfigParser()
|
config = configparser.ConfigParser()
|
||||||
config.read('config.ini')
|
config.read('config.ini')
|
||||||
|
|
||||||
|
# create queries
|
||||||
ngrams = 0
|
ngrams = 0
|
||||||
if config.getint('settings', 'ngrams') == 2:
|
if config.getint('settings', 'ngrams') == 2:
|
||||||
ngrams = 2
|
ngrams = 2
|
||||||
|
@ -126,6 +127,18 @@ def main():
|
||||||
(all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict) = create_trees(config)
|
(all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict) = create_trees(config)
|
||||||
|
|
||||||
|
|
||||||
|
# set filters
|
||||||
|
assert config.get('settings', 'analyze_type') in ['deprel', 'lemma', 'upos', 'upos', 'xpos'], '"analyze_type" is not set up correctly'
|
||||||
|
if config.get('settings', 'analyze_type') == 'deprel':
|
||||||
|
create_output_string_funct = create_output_string_deprel
|
||||||
|
elif config.get('settings', 'analyze_type') == 'lemma':
|
||||||
|
create_output_string_funct = create_output_string_lemma
|
||||||
|
elif config.get('settings', 'analyze_type') == 'upos':
|
||||||
|
create_output_string_funct = create_output_string_upos
|
||||||
|
elif config.get('settings', 'analyze_type') == 'xpos':
|
||||||
|
create_output_string_funct = create_output_string_xpos
|
||||||
|
else:
|
||||||
|
create_output_string_funct = create_output_string_form
|
||||||
|
|
||||||
result_dict = {}
|
result_dict = {}
|
||||||
|
|
||||||
|
@ -134,7 +147,7 @@ def main():
|
||||||
# original
|
# original
|
||||||
# r_children = tree.r_children[:1] + tree.r_children[3:4]
|
# r_children = tree.r_children[:1] + tree.r_children[3:4]
|
||||||
# tree.r_children = tree.r_children[:1] + tree.r_children[2:4]
|
# tree.r_children = tree.r_children[:1] + tree.r_children[2:4]
|
||||||
_, subtrees = tree.get_subtrees(query_tree, [])
|
_, subtrees = tree.get_subtrees(query_tree, [], create_output_string_funct)
|
||||||
for query_results in subtrees:
|
for query_results in subtrees:
|
||||||
for result in query_results:
|
for result in query_results:
|
||||||
if ngrams:
|
if ngrams:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user