Added parameter for path of config.ini input + Added dependency_relation into queries
This commit is contained in:
parent
9dc71b58c2
commit
b541270df7
|
@ -1,3 +1,4 @@
|
||||||
|
import argparse
|
||||||
import configparser
|
import configparser
|
||||||
import csv
|
import csv
|
||||||
import hashlib
|
import hashlib
|
||||||
|
@ -10,7 +11,7 @@ import pyconll
|
||||||
from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos
|
from Tree import Tree, create_output_string_form, create_output_string_deprel, create_output_string_lemma, create_output_string_upos, create_output_string_xpos
|
||||||
|
|
||||||
|
|
||||||
def decode_query(orig_query):
|
def decode_query(orig_query, dependency_type):
|
||||||
new_query = False
|
new_query = False
|
||||||
|
|
||||||
# if command in bracelets remove them and treat command as new query
|
# if command in bracelets remove them and treat command as new query
|
||||||
|
@ -20,23 +21,33 @@ def decode_query(orig_query):
|
||||||
|
|
||||||
orig_query_split = orig_query.split(' ')[0].split('=')
|
orig_query_split = orig_query.split(' ')[0].split('=')
|
||||||
# if orig_query is '_' return {}
|
# if orig_query is '_' return {}
|
||||||
|
if dependency_type != '':
|
||||||
|
decoded_query = {'deprel': dependency_type}
|
||||||
|
else:
|
||||||
|
decoded_query = {}
|
||||||
|
|
||||||
if orig_query == '_':
|
if orig_query == '_':
|
||||||
return {}
|
return decoded_query
|
||||||
# if no spaces in query then this is query node and do this otherwise further split query
|
# if no spaces in query then this is query node and do this otherwise further split query
|
||||||
elif len(orig_query.split(' ')) == 1:
|
elif len(orig_query.split(' ')) == 1:
|
||||||
if len(orig_query_split) > 1:
|
if len(orig_query_split) > 1:
|
||||||
if orig_query_split[0] == 'L':
|
if orig_query_split[0] == 'L':
|
||||||
return {'lemma': orig_query_split[1]}
|
decoded_query['lemma'] = orig_query_split[1]
|
||||||
|
return decoded_query
|
||||||
elif orig_query_split[0] == 'upos':
|
elif orig_query_split[0] == 'upos':
|
||||||
return {'upos': orig_query_split[1]}
|
decoded_query['upos'] = orig_query_split[1]
|
||||||
|
return decoded_query
|
||||||
elif orig_query_split[0] == 'xpos':
|
elif orig_query_split[0] == 'xpos':
|
||||||
return {'xpos': orig_query_split[1]}
|
decoded_query['xpos'] = orig_query_split[1]
|
||||||
|
return decoded_query
|
||||||
elif orig_query_split[0] == 'form':
|
elif orig_query_split[0] == 'form':
|
||||||
return {'form': orig_query_split[1]}
|
decoded_query['form'] = orig_query_split[1]
|
||||||
|
return decoded_query
|
||||||
elif not new_query:
|
elif not new_query:
|
||||||
raise Exception('Not supported yet!')
|
raise Exception('Not supported yet!')
|
||||||
elif not new_query:
|
elif not new_query:
|
||||||
return {'form': orig_query}
|
decoded_query['form'] = orig_query_split[1]
|
||||||
|
return decoded_query
|
||||||
|
|
||||||
# split over spaces if not inside braces
|
# split over spaces if not inside braces
|
||||||
PATTERN = re.compile(r'''((?:[^ ()]|\([^(]*\))+)''')
|
PATTERN = re.compile(r'''((?:[^ ()]|\([^(]*\))+)''')
|
||||||
|
@ -59,11 +70,11 @@ def decode_query(orig_query):
|
||||||
root = None
|
root = None
|
||||||
for i, node_action in enumerate(node_actions):
|
for i, node_action in enumerate(node_actions):
|
||||||
if i < root_index:
|
if i < root_index:
|
||||||
l_children.append(decode_query(node_action))
|
l_children.append(decode_query(node_action, priority_actions[i][1:]))
|
||||||
elif i > root_index:
|
elif i > root_index:
|
||||||
r_children.append(decode_query(node_action))
|
r_children.append(decode_query(node_action, priority_actions[i - 1][1:]))
|
||||||
else:
|
else:
|
||||||
root = decode_query(node_action)
|
root = decode_query(node_action, dependency_type)
|
||||||
if l_children:
|
if l_children:
|
||||||
root["l_children"] = l_children
|
root["l_children"] = l_children
|
||||||
if r_children:
|
if r_children:
|
||||||
|
@ -127,16 +138,27 @@ def create_trees(config):
|
||||||
return all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict
|
return all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
config = configparser.ConfigParser()
|
parser = argparse.ArgumentParser()
|
||||||
config.read('config.ini')
|
|
||||||
|
|
||||||
|
## Required parameters
|
||||||
|
parser.add_argument("--config_file",
|
||||||
|
default=None,
|
||||||
|
type=str,
|
||||||
|
required=True,
|
||||||
|
help="The input config file.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
config = configparser.ConfigParser()
|
||||||
|
config.read(args.config_file)
|
||||||
|
# a = args.config_file
|
||||||
|
# config.read('config.ini')
|
||||||
# create queries
|
# create queries
|
||||||
ngrams = 0
|
ngrams = 0
|
||||||
if config.getint('settings', 'ngrams') == 2:
|
if config.getint('settings', 'ngrams') == 2:
|
||||||
ngrams = 2
|
ngrams = 2
|
||||||
query_tree = [{"l_children": [{}]}, {"r_children": [{}]}]
|
query_tree = [{"l_children": [{}]}, {"r_children": [{}]}]
|
||||||
else:
|
else:
|
||||||
query_tree = [decode_query('(' + config.get('settings', 'query') + ')')]
|
query_tree = [decode_query('(' + config.get('settings', 'query') + ')', '')]
|
||||||
|
|
||||||
(all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict) = create_trees(config)
|
(all_trees, form_dict, lemma_dict, upos_dict, xpos_dict, deprel_dict) = create_trees(config)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user