From c0e43f22605f29716a6e146ab32e81d334f06e23 Mon Sep 17 00:00:00 2001 From: Cyprian Laskowski Date: Mon, 1 Mar 2021 16:04:01 +0100 Subject: [PATCH] Redmine #1835: separated loading jos models and running classla --- package/structure_assignment/api.py | 1 + package/structure_assignment/pipeline.py | 7 +++++-- scripts/pipeline1.py | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/package/structure_assignment/api.py b/package/structure_assignment/api.py index 39842b3..17c4ea3 100644 --- a/package/structure_assignment/api.py +++ b/package/structure_assignment/api.py @@ -12,6 +12,7 @@ api_prefix = os.environ['API_PREFIX'] resource_directory = os.environ['API_RESOURCE_DIR'] tmp_directory = os.environ['API_TMP'] pipeline.initialise(temp_dir=tmp_directory, resource_dir=resource_directory) +pipeline.load_classla_models() @app.route(api_prefix + '/test/', methods=['GET']) def test(string): diff --git a/package/structure_assignment/pipeline.py b/package/structure_assignment/pipeline.py index 8e53fdb..7caed7c 100644 --- a/package/structure_assignment/pipeline.py +++ b/package/structure_assignment/pipeline.py @@ -30,13 +30,12 @@ def __get_tmp_file_name(file_key): return tmp_directory + '/' + FILE_NAME_MAP[file_key] def initialise(**argument_map): - global tmp_directory, resource_directory, nlp + global tmp_directory, resource_directory tmp_directory = argument_map['temp_dir'] resource_directory = argument_map['resource_dir'] shutil.rmtree(tmp_directory, True) os.makedirs(tmp_directory, exist_ok=True) NLP_CONFIG_MAP['models_dir'] = resource_directory + '/classla' - nlp = classla.Pipeline('sl', **NLP_CONFIG_MAP) def import_file(file_name, file_key): shutil.copyfile(file_name, __get_tmp_file_name(file_key)) @@ -51,6 +50,10 @@ def do_tweak_conllu(): output_file_name = __get_tmp_file_name('obeliks-tweaked') tweak_conllu(input_file_name, output_file_name) +def load_classla_models(): + global nlp + nlp = classla.Pipeline('sl', **NLP_CONFIG_MAP) + def do_parse(): input_file_name = __get_tmp_file_name('obeliks-tweaked') output_file_name = __get_tmp_file_name('classla-parsed') diff --git a/scripts/pipeline1.py b/scripts/pipeline1.py index 39a48dc..79e6d86 100644 --- a/scripts/pipeline1.py +++ b/scripts/pipeline1.py @@ -18,6 +18,7 @@ def run_pipeline(input_file_name, output_file_name): pipeline.import_file(input_file_name, 'strings-list') pipeline.do_tokenise() pipeline.do_tweak_conllu() + pipeline.load_classla_models() pipeline.do_parse() pipeline.do_translate_jos() pipeline.do_conllu_to_tei()