Added feats to queries
This commit is contained in:
parent
dce2ccdff1
commit
a614da9766
25
Tree.py
25
Tree.py
|
@ -7,7 +7,10 @@ from Value import Value
|
||||||
|
|
||||||
|
|
||||||
class Tree(object):
|
class Tree(object):
|
||||||
def __init__(self, form, lemma, upos, xpos, deprel, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, head):
|
def __init__(self, form, lemma, upos, xpos, deprel, feats, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, head):
|
||||||
|
if not hasattr(self, 'feats'):
|
||||||
|
self.feats = {}
|
||||||
|
|
||||||
# form_unicode = str(form).encode("utf-8")
|
# form_unicode = str(form).encode("utf-8")
|
||||||
if form not in form_dict:
|
if form not in form_dict:
|
||||||
form_dict[form] = Value(form)
|
form_dict[form] = Value(form)
|
||||||
|
@ -24,6 +27,12 @@ class Tree(object):
|
||||||
if deprel not in deprel_dict:
|
if deprel not in deprel_dict:
|
||||||
deprel_dict[deprel] = Value(deprel)
|
deprel_dict[deprel] = Value(deprel)
|
||||||
self.deprel = deprel_dict[deprel]
|
self.deprel = deprel_dict[deprel]
|
||||||
|
for feat in feats.keys():
|
||||||
|
if next(iter(feats[feat])) not in feats_dict[feat]:
|
||||||
|
feats_dict[feat][next(iter(feats[feat]))] = Value(next(iter(feats[feat])))
|
||||||
|
if not feat in self.feats:
|
||||||
|
self.feats[feat] = {}
|
||||||
|
self.feats[feat][next(iter(feats[feat]))] = feats_dict[feat][next(iter(feats[feat]))]
|
||||||
# self.position = position
|
# self.position = position
|
||||||
|
|
||||||
self.parent = head
|
self.parent = head
|
||||||
|
@ -43,12 +52,24 @@ class Tree(object):
|
||||||
def set_parent(self, parent):
|
def set_parent(self, parent):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
|
|
||||||
|
def fits_static_requirements_feats(self, query_tree):
|
||||||
|
if 'feats' not in query_tree:
|
||||||
|
return True
|
||||||
|
|
||||||
|
for feat in query_tree['feats'].keys():
|
||||||
|
if feat not in self.feats or query_tree['feats'][feat] != next(iter(self.feats[feat].values())).get_value():
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def fits_static_requirements(self, query_tree):
|
def fits_static_requirements(self, query_tree):
|
||||||
return ('form' not in query_tree or query_tree['form'] == self.form.get_value()) and \
|
return ('form' not in query_tree or query_tree['form'] == self.form.get_value()) and \
|
||||||
('lemma' not in query_tree or query_tree['lemma'] == self.lemma.get_value()) and \
|
('lemma' not in query_tree or query_tree['lemma'] == self.lemma.get_value()) and \
|
||||||
('upos' not in query_tree or query_tree['upos'] == self.upos.get_value()) and \
|
('upos' not in query_tree or query_tree['upos'] == self.upos.get_value()) and \
|
||||||
('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \
|
('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \
|
||||||
('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value())
|
('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \
|
||||||
|
self.fits_static_requirements_feats(query_tree)
|
||||||
|
|
||||||
def generate_children_queries(self, all_query_indices, children):
|
def generate_children_queries(self, all_query_indices, children):
|
||||||
partial_results = {}
|
partial_results = {}
|
||||||
|
|
|
@ -10,6 +10,22 @@ import pyconll
|
||||||
|
|
||||||
from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos
|
from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos
|
||||||
|
|
||||||
|
feats_list = [
|
||||||
|
# lexical features
|
||||||
|
'PronType', 'NumType', 'Poss', 'Reflex', 'Foreign', 'Abbr',
|
||||||
|
|
||||||
|
# Inflectional features (nominal)
|
||||||
|
'Gender', 'Animacy', 'NounClass', 'Number', 'Case', 'Definite', 'Degree',
|
||||||
|
|
||||||
|
# Inflectional features (verbal)
|
||||||
|
'VerbForm', 'Mood', 'Tense', 'Aspect', 'Voice', 'Evident', 'Polarity', 'Person', 'Polite', 'Clusivity',
|
||||||
|
|
||||||
|
# Other
|
||||||
|
'Variant', 'Number[psor]', 'Gender[psor]', 'NumForm'
|
||||||
|
]
|
||||||
|
|
||||||
|
feats_dict = {key: {} for key in feats_list}
|
||||||
|
|
||||||
|
|
||||||
def decode_query(orig_query, dependency_type):
|
def decode_query(orig_query, dependency_type):
|
||||||
new_query = False
|
new_query = False
|
||||||
|
@ -43,6 +59,10 @@ def decode_query(orig_query, dependency_type):
|
||||||
elif orig_query_split[0] == 'form':
|
elif orig_query_split[0] == 'form':
|
||||||
decoded_query['form'] = orig_query_split[1]
|
decoded_query['form'] = orig_query_split[1]
|
||||||
return decoded_query
|
return decoded_query
|
||||||
|
elif orig_query_split[0] in feats_list:
|
||||||
|
decoded_query['feats'] = {}
|
||||||
|
decoded_query['feats'][orig_query_split[0]] = orig_query_split[1]
|
||||||
|
return decoded_query
|
||||||
elif not new_query:
|
elif not new_query:
|
||||||
raise Exception('Not supported yet!')
|
raise Exception('Not supported yet!')
|
||||||
elif not new_query:
|
elif not new_query:
|
||||||
|
@ -105,8 +125,8 @@ def create_trees(config):
|
||||||
root_id = None
|
root_id = None
|
||||||
token_nodes = []
|
token_nodes = []
|
||||||
for token in sentence:
|
for token in sentence:
|
||||||
node = Tree(token.form, token.lemma, token.upos, token.xpos, token.deprel, form_dict,
|
node = Tree(token.form, token.lemma, token.upos, token.xpos, token.deprel, token.feats, form_dict,
|
||||||
lemma_dict, upos_dict, xpos_dict, deprel_dict, token.head)
|
lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, token.head)
|
||||||
token_nodes.append(node)
|
token_nodes.append(node)
|
||||||
if token.deprel == 'root':
|
if token.deprel == 'root':
|
||||||
root = node
|
root = node
|
||||||
|
@ -265,9 +285,8 @@ def main():
|
||||||
if ngrams:
|
if ngrams:
|
||||||
len_words = ngrams
|
len_words = ngrams
|
||||||
else:
|
else:
|
||||||
len_words = len(config.get('settings', 'query').split(" "))
|
len_words = int(len(config.get('settings', 'query').split(" "))/2 + 1)
|
||||||
span = 2
|
header = ["Structure"] + ["Word #" + str(i) for i in range(1, len_words + 1)] + ['Number of occurences']
|
||||||
header = ["Structure"] + ["Word #" + str(int(i/2 + 1)) for i in range(0, len_words * 2, span)] + ['Number of occurences']
|
|
||||||
# header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Number of occurences']
|
# header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Number of occurences']
|
||||||
writer.writerow(header)
|
writer.writerow(header)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user