Added root filtering.
This commit is contained in:
parent
05cbfd03ff
commit
a13559941e
58
Tree.py
58
Tree.py
|
@ -7,9 +7,9 @@ from Value import Value
|
||||||
|
|
||||||
|
|
||||||
class Tree(object):
|
class Tree(object):
|
||||||
def __init__(self, form, lemma, upos, xpos, deprel, feats, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, feats_complete_dict, head):
|
def __init__(self, form, lemma, upos, xpos, deprel, feats, feats_detailed, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, feats_detailed_dict, head):
|
||||||
# if not hasattr(self, 'feats'):
|
if not hasattr(self, 'feats'):
|
||||||
# self.feats = {}
|
self.feats_detailed = {}
|
||||||
|
|
||||||
# form_unicode = str(form).encode("utf-8")
|
# form_unicode = str(form).encode("utf-8")
|
||||||
if form not in form_dict:
|
if form not in form_dict:
|
||||||
|
@ -27,15 +27,15 @@ class Tree(object):
|
||||||
if deprel not in deprel_dict:
|
if deprel not in deprel_dict:
|
||||||
deprel_dict[deprel] = Value(deprel)
|
deprel_dict[deprel] = Value(deprel)
|
||||||
self.deprel = deprel_dict[deprel]
|
self.deprel = deprel_dict[deprel]
|
||||||
if feats not in feats_complete_dict:
|
if feats not in feats_dict:
|
||||||
feats_complete_dict[feats] = Value(feats)
|
feats_dict[feats] = Value(feats)
|
||||||
self.feats_complete = feats_complete_dict[feats]
|
self.feats = feats_dict[feats]
|
||||||
# for feat in feats.keys():
|
for feat in feats_detailed.keys():
|
||||||
# if next(iter(feats[feat])) not in feats_dict[feat]:
|
if next(iter(feats_detailed[feat])) not in feats_detailed_dict[feat]:
|
||||||
# feats_dict[feat][next(iter(feats[feat]))] = Value(next(iter(feats[feat])))
|
feats_detailed_dict[feat][next(iter(feats_detailed[feat]))] = Value(next(iter(feats_detailed[feat])))
|
||||||
# if not feat in self.feats:
|
if not feat in self.feats_detailed:
|
||||||
# self.feats[feat] = {}
|
self.feats_detailed[feat] = {}
|
||||||
# self.feats[feat][next(iter(feats[feat]))] = feats_dict[feat][next(iter(feats[feat]))]
|
self.feats_detailed[feat][next(iter(feats_detailed[feat]))] = feats_detailed_dict[feat][next(iter(feats_detailed[feat]))]
|
||||||
# self.position = position
|
# self.position = position
|
||||||
|
|
||||||
self.parent = head
|
self.parent = head
|
||||||
|
@ -62,8 +62,34 @@ class Tree(object):
|
||||||
# return True
|
# return True
|
||||||
|
|
||||||
|
|
||||||
def fits_temporary_requirements(self, filters):
|
def fits_permanent_requirements(self, filters):
|
||||||
|
main_attributes = ['deprel', 'feats', 'form', 'lemma', 'upos']
|
||||||
|
|
||||||
|
if not filters['root_whitelist']:
|
||||||
|
return True
|
||||||
|
|
||||||
|
for option in filters['root_whitelist']:
|
||||||
|
filter_passed = True
|
||||||
|
|
||||||
|
# check if attributes are valid
|
||||||
|
for key in option.keys():
|
||||||
|
if key not in main_attributes:
|
||||||
|
if key not in self.feats_detailed or option[key] != list(self.feats_detailed[key].items())[0][1].get_value():
|
||||||
|
filter_passed = False
|
||||||
|
|
||||||
|
filter_passed = filter_passed and \
|
||||||
|
('deprel' not in option or option['deprel'] == self.deprel.get_value()) and \
|
||||||
|
('feats' not in option or option['feats'] == self.feats.get_value()) and \
|
||||||
|
('form' not in option or option['form'] == self.form.get_value()) and \
|
||||||
|
('lemma' not in option or option['lemma'] == self.lemma.get_value()) and \
|
||||||
|
('upos' not in option or option['upos'] == self.upos.get_value())
|
||||||
|
|
||||||
|
if filter_passed:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def fits_temporary_requirements(self, filters):
|
||||||
return not filters['label_whitelist'] or self.deprel.get_value() in filters['label_whitelist']
|
return not filters['label_whitelist'] or self.deprel.get_value() in filters['label_whitelist']
|
||||||
|
|
||||||
def fits_static_requirements(self, query_tree, filters):
|
def fits_static_requirements(self, query_tree, filters):
|
||||||
|
@ -72,7 +98,7 @@ class Tree(object):
|
||||||
('upos' not in query_tree or query_tree['upos'] == self.upos.get_value()) and \
|
('upos' not in query_tree or query_tree['upos'] == self.upos.get_value()) and \
|
||||||
('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \
|
('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \
|
||||||
('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \
|
('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \
|
||||||
('feats' not in query_tree or query_tree['feats'] == self.feats_complete.get_value()) and \
|
('feats' not in query_tree or query_tree['feats'] == self.feats.get_value()) and \
|
||||||
(not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children'])))
|
(not filters['complete_tree_type'] or (len(self.children) == 0 and 'children' not in query_tree) or ('children' in query_tree and len(self.children) == len(query_tree['children'])))
|
||||||
# self.fits_static_requirements_feats(query_tree)
|
# self.fits_static_requirements_feats(query_tree)
|
||||||
|
|
||||||
|
@ -302,7 +328,7 @@ class Tree(object):
|
||||||
|
|
||||||
active_permanent_query_trees = []
|
active_permanent_query_trees = []
|
||||||
for permanent_query_tree in permanent_query_trees:
|
for permanent_query_tree in permanent_query_trees:
|
||||||
if self.fits_static_requirements(permanent_query_tree, filters):
|
if self.fits_static_requirements(permanent_query_tree, filters) and self.fits_permanent_requirements(filters):
|
||||||
active_permanent_query_trees.append(permanent_query_tree)
|
active_permanent_query_trees.append(permanent_query_tree)
|
||||||
if 'children' in permanent_query_tree:
|
if 'children' in permanent_query_tree:
|
||||||
all_query_indices.append((permanent_query_tree['children'], True))
|
all_query_indices.append((permanent_query_tree['children'], True))
|
||||||
|
@ -617,4 +643,4 @@ def create_output_string_xpos(tree):
|
||||||
return tree.xpos.get_value()
|
return tree.xpos.get_value()
|
||||||
|
|
||||||
def create_output_string_feats(tree):
|
def create_output_string_feats(tree):
|
||||||
return tree.feats_complete.get_value()
|
return tree.feats.get_value()
|
||||||
|
|
|
@ -11,21 +11,21 @@ import pyconll
|
||||||
from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos, create_output_string_feats
|
from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos, create_output_string_feats
|
||||||
|
|
||||||
# for separate searches of feats
|
# for separate searches of feats
|
||||||
# feats_list = [
|
feats_detailed_list = [
|
||||||
# # lexical features
|
# lexical features
|
||||||
# 'PronType', 'NumType', 'Poss', 'Reflex', 'Foreign', 'Abbr',
|
'PronType', 'NumType', 'Poss', 'Reflex', 'Foreign', 'Abbr',
|
||||||
#
|
|
||||||
# # Inflectional features (nominal)
|
# Inflectional features (nominal)
|
||||||
# 'Gender', 'Animacy', 'NounClass', 'Number', 'Case', 'Definite', 'Degree',
|
'Gender', 'Animacy', 'NounClass', 'Number', 'Case', 'Definite', 'Degree',
|
||||||
#
|
|
||||||
# # Inflectional features (verbal)
|
# Inflectional features (verbal)
|
||||||
# 'VerbForm', 'Mood', 'Tense', 'Aspect', 'Voice', 'Evident', 'Polarity', 'Person', 'Polite', 'Clusivity',
|
'VerbForm', 'Mood', 'Tense', 'Aspect', 'Voice', 'Evident', 'Polarity', 'Person', 'Polite', 'Clusivity',
|
||||||
#
|
|
||||||
# # Other
|
# Other
|
||||||
# 'Variant', 'Number[psor]', 'Gender[psor]', 'NumForm'
|
'Variant', 'Number[psor]', 'Gender[psor]', 'NumForm'
|
||||||
# ]
|
]
|
||||||
#
|
|
||||||
# feats_dict = {key: {} for key in feats_list}
|
feats_detailed_dict = {key: {} for key in feats_detailed_list}
|
||||||
|
|
||||||
|
|
||||||
def decode_query(orig_query, dependency_type):
|
def decode_query(orig_query, dependency_type):
|
||||||
|
@ -65,10 +65,10 @@ def decode_query(orig_query, dependency_type):
|
||||||
elif orig_query_split[0] == 'feats':
|
elif orig_query_split[0] == 'feats':
|
||||||
decoded_query['feats'] = orig_query_split[1]
|
decoded_query['feats'] = orig_query_split[1]
|
||||||
# return decoded_query
|
# return decoded_query
|
||||||
# elif orig_query_split[0] in feats_list:
|
elif orig_query_split[0] in feats_detailed_list:
|
||||||
# decoded_query['feats'] = {}
|
decoded_query['feats_detailed'] = {}
|
||||||
# decoded_query['feats'][orig_query_split[0]] = orig_query_split[1]
|
decoded_query['feats_detailed'][orig_query_split[0]] = orig_query_split[1]
|
||||||
# return decoded_query
|
return decoded_query
|
||||||
elif not new_query:
|
elif not new_query:
|
||||||
raise Exception('Not supported yet!')
|
raise Exception('Not supported yet!')
|
||||||
else:
|
else:
|
||||||
|
@ -122,7 +122,7 @@ def create_trees(config):
|
||||||
|
|
||||||
train = pyconll.load_from_file(input_path)
|
train = pyconll.load_from_file(input_path)
|
||||||
|
|
||||||
form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_complete_dict = {}, {}, {}, {}, {}, {}
|
form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict = {}, {}, {}, {}, {}, {}
|
||||||
|
|
||||||
all_trees = []
|
all_trees = []
|
||||||
|
|
||||||
|
@ -136,9 +136,9 @@ def create_trees(config):
|
||||||
# token_feats += k + next(iter(v)) + '|'
|
# token_feats += k + next(iter(v)) + '|'
|
||||||
# token_feats = token_feats[:-1]
|
# token_feats = token_feats[:-1]
|
||||||
# TODO check if 5th place is always there for feats
|
# TODO check if 5th place is always there for feats
|
||||||
token_feats = token._fields[5]
|
feats = token._fields[5]
|
||||||
node = Tree(token.form, token.lemma, token.upos, token.xpos, token.deprel, token_feats, form_dict,
|
node = Tree(token.form, token.lemma, token.upos, token.xpos, token.deprel, feats, token.feats, form_dict,
|
||||||
lemma_dict, upos_dict, xpos_dict, deprel_dict, None, feats_complete_dict, token.head)
|
lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, feats_detailed_dict, token.head)
|
||||||
token_nodes.append(node)
|
token_nodes.append(node)
|
||||||
if token.deprel == 'root':
|
if token.deprel == 'root':
|
||||||
root = node
|
root = node
|
||||||
|
@ -276,6 +276,22 @@ def main():
|
||||||
else:
|
else:
|
||||||
filters['label_whitelist'] = []
|
filters['label_whitelist'] = []
|
||||||
|
|
||||||
|
if config.has_option('settings', 'root_whitelist'):
|
||||||
|
# test
|
||||||
|
filters['root_whitelist'] = []
|
||||||
|
|
||||||
|
for option in config.get('settings', 'root_whitelist'). split('|'):
|
||||||
|
attribute_dict = {}
|
||||||
|
for attribute in option.split('&'):
|
||||||
|
value = attribute.split('=')
|
||||||
|
# assert value[0] in ['deprel', 'lemma', 'upos', 'xpos', 'form',
|
||||||
|
# 'feats'], '"root_whitelist" is not set up correctly'
|
||||||
|
attribute_dict[value[0]] = value[1]
|
||||||
|
filters['root_whitelist'].append(attribute_dict)
|
||||||
|
# filters['root_whitelist'] = [{'upos': 'NOUN', 'Case': 'Nom'}, {'upos': 'ADJ', 'Degree': 'Sup'}]
|
||||||
|
else:
|
||||||
|
filters['root_whitelist'] = []
|
||||||
|
|
||||||
filters['complete_tree_type'] = config.get('settings', 'tree_type') == 'complete'
|
filters['complete_tree_type'] = config.get('settings', 'tree_type') == 'complete'
|
||||||
|
|
||||||
for tree in all_trees[2:]:
|
for tree in all_trees[2:]:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user