From a614da97660d12205400be3f424238b01f9a8c01 Mon Sep 17 00:00:00 2001 From: Luka Date: Tue, 15 Oct 2019 21:18:00 +0200 Subject: [PATCH] Added feats to queries --- Tree.py | 25 +++++++++++++++++++++++-- dependency-parsetree.py | 29 ++++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/Tree.py b/Tree.py index 0fb98bf..27dd93a 100644 --- a/Tree.py +++ b/Tree.py @@ -7,7 +7,10 @@ from Value import Value class Tree(object): - def __init__(self, form, lemma, upos, xpos, deprel, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, head): + def __init__(self, form, lemma, upos, xpos, deprel, feats, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, head): + if not hasattr(self, 'feats'): + self.feats = {} + # form_unicode = str(form).encode("utf-8") if form not in form_dict: form_dict[form] = Value(form) @@ -24,6 +27,12 @@ class Tree(object): if deprel not in deprel_dict: deprel_dict[deprel] = Value(deprel) self.deprel = deprel_dict[deprel] + for feat in feats.keys(): + if next(iter(feats[feat])) not in feats_dict[feat]: + feats_dict[feat][next(iter(feats[feat]))] = Value(next(iter(feats[feat]))) + if not feat in self.feats: + self.feats[feat] = {} + self.feats[feat][next(iter(feats[feat]))] = feats_dict[feat][next(iter(feats[feat]))] # self.position = position self.parent = head @@ -43,12 +52,24 @@ class Tree(object): def set_parent(self, parent): self.parent = parent + def fits_static_requirements_feats(self, query_tree): + if 'feats' not in query_tree: + return True + + for feat in query_tree['feats'].keys(): + if feat not in self.feats or query_tree['feats'][feat] != next(iter(self.feats[feat].values())).get_value(): + return False + + return True + + def fits_static_requirements(self, query_tree): return ('form' not in query_tree or query_tree['form'] == self.form.get_value()) and \ ('lemma' not in query_tree or query_tree['lemma'] == self.lemma.get_value()) and \ ('upos' not in query_tree or query_tree['upos'] == self.upos.get_value()) and \ ('xpos' not in query_tree or query_tree['xpos'] == self.xpos.get_value()) and \ - ('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) + ('deprel' not in query_tree or query_tree['deprel'] == self.deprel.get_value()) and \ + self.fits_static_requirements_feats(query_tree) def generate_children_queries(self, all_query_indices, children): partial_results = {} diff --git a/dependency-parsetree.py b/dependency-parsetree.py index afa09c3..4093e79 100644 --- a/dependency-parsetree.py +++ b/dependency-parsetree.py @@ -10,6 +10,22 @@ import pyconll from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos +feats_list = [ + # lexical features + 'PronType', 'NumType', 'Poss', 'Reflex', 'Foreign', 'Abbr', + + # Inflectional features (nominal) + 'Gender', 'Animacy', 'NounClass', 'Number', 'Case', 'Definite', 'Degree', + + # Inflectional features (verbal) + 'VerbForm', 'Mood', 'Tense', 'Aspect', 'Voice', 'Evident', 'Polarity', 'Person', 'Polite', 'Clusivity', + + # Other + 'Variant', 'Number[psor]', 'Gender[psor]', 'NumForm' +] + +feats_dict = {key: {} for key in feats_list} + def decode_query(orig_query, dependency_type): new_query = False @@ -43,6 +59,10 @@ def decode_query(orig_query, dependency_type): elif orig_query_split[0] == 'form': decoded_query['form'] = orig_query_split[1] return decoded_query + elif orig_query_split[0] in feats_list: + decoded_query['feats'] = {} + decoded_query['feats'][orig_query_split[0]] = orig_query_split[1] + return decoded_query elif not new_query: raise Exception('Not supported yet!') elif not new_query: @@ -105,8 +125,8 @@ def create_trees(config): root_id = None token_nodes = [] for token in sentence: - node = Tree(token.form, token.lemma, token.upos, token.xpos, token.deprel, form_dict, - lemma_dict, upos_dict, xpos_dict, deprel_dict, token.head) + node = Tree(token.form, token.lemma, token.upos, token.xpos, token.deprel, token.feats, form_dict, + lemma_dict, upos_dict, xpos_dict, deprel_dict, feats_dict, token.head) token_nodes.append(node) if token.deprel == 'root': root = node @@ -265,9 +285,8 @@ def main(): if ngrams: len_words = ngrams else: - len_words = len(config.get('settings', 'query').split(" ")) - span = 2 - header = ["Structure"] + ["Word #" + str(int(i/2 + 1)) for i in range(0, len_words * 2, span)] + ['Number of occurences'] + len_words = int(len(config.get('settings', 'query').split(" "))/2 + 1) + header = ["Structure"] + ["Word #" + str(i) for i in range(1, len_words + 1)] + ['Number of occurences'] # header = [" ".join(words[i:i + span]) for i in range(0, len(words), span)] + ['Number of occurences'] writer.writerow(header)