From e9eff0658f210ae17bfb0c01c4b9aa95cc6c938d Mon Sep 17 00:00:00 2001 From: Cyprian Laskowski Date: Wed, 3 Aug 2022 08:41:07 +0200 Subject: [PATCH] Redmine #1487: fixed wani cleanup and tweaked runner parameters --- scripts/process.py | 2 +- structure_assignment/api.py | 2 +- structure_assignment/pipeline.py | 24 ++++++++++++++---------- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/scripts/process.py b/scripts/process.py index 64d6f09..3b1c6e9 100644 --- a/scripts/process.py +++ b/scripts/process.py @@ -22,7 +22,7 @@ if (__name__ == '__main__'): output_structure_file_name = arguments.outstructs nlp_needed = mode in {'strings_to_parse', 'strings_to_dictionary', 'all'} - runner = Runner(classla_directory, nlp_needed, wani_file_name) + runner = Runner(nlp_needed, classla_directory, wani_file_name) if (mode == 'strings_to_parse'): runner.strings_to_parse(input_file_name, output_file_name) elif (mode == 'strings_to_dictionary'): diff --git a/structure_assignment/api.py b/structure_assignment/api.py index 0b7861e..8314434 100644 --- a/structure_assignment/api.py +++ b/structure_assignment/api.py @@ -13,7 +13,7 @@ app = Flask(__name__) api_prefix = os.environ['API_PREFIX'] resource_directory = os.environ['API_RESOURCE_DIR'] -runner = Runner(resource_directory, True) +runner = Runner(True, resource_directory) @app.route(api_prefix + '/strings_to_parse', methods=['GET', 'POST']) diff --git a/structure_assignment/pipeline.py b/structure_assignment/pipeline.py index 75134a7..d940b84 100644 --- a/structure_assignment/pipeline.py +++ b/structure_assignment/pipeline.py @@ -15,13 +15,13 @@ from conversion_utils.tei_to_dictionary import convert as tei_to_dictionary class Runner: - def __init__(self, classla_directory, nlp_needed, wani=None): + def __init__(self, nlp_needed, classla_directory=None, wani_file_name=None): self.classla_directory = classla_directory if (nlp_needed): NLP_CONFIG_MAP['dir'] = classla_directory self.nlp = classla.Pipeline('sl', **NLP_CONFIG_MAP) - if (wani is not None): - self._provide_wani(wani) + if (wani_file_name is not None): + self._provide_wani(wani_file_name) def _provide_wani(self, wani_file_name): # TODO: remove once wani is incorporated into luscenje_struktur package self.wani_directory = tempfile.mkdtemp() @@ -39,7 +39,7 @@ class Runner: pipeline.export_file(output_structure_file_name, 'structures-new') pipeline.do_validate_dictionary() pipeline.export_file(output_file_name, 'dictionary') - pipeline.cleanup() + self.cleanup(pipeline) def strings_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name): pipeline = Pipeline(self.nlp) @@ -49,14 +49,14 @@ class Runner: self._parse_to_dictionary_sequence(pipeline) pipeline.export_file(output_file_name, 'dictionary') pipeline.export_file(output_structure_file_name, 'structures-new') - pipeline.cleanup() + self.cleanup(pipeline) def strings_to_parse(self, input_file_name, output_file_name): pipeline = Pipeline(self.nlp) pipeline.import_file(input_file_name, 'strings-list') self._strings_to_parse_sequence(pipeline) pipeline.export_file(output_file_name, 'tei-initial') - pipeline.cleanup() + self.cleanup(pipeline) def parse_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name): pipeline = Pipeline() @@ -65,19 +65,19 @@ class Runner: self._parse_to_dictionary_sequence(pipeline) pipeline.export_file(output_file_name, 'dictionary') pipeline.export_file(output_structure_file_name, 'structures-new') - pipeline.cleanup() + self.cleanup(pipeline) def validate_structures(self, input_file_name): pipeline = Pipeline() pipeline.import_file(input_file_name, 'structures-new') pipeline.do_validate_structures() - pipeline.cleanup() + self.cleanup(pipeline) def validate_dictionary(self, input_file_name): pipeline = Pipeline() pipeline.import_file(input_file_name, 'dictionary') pipeline.do_validate_dictionary() - pipeline.cleanup() + self.cleanup(pipeline) def _strings_to_parse_sequence(self, pipeline): pipeline.do_tokenise() @@ -92,6 +92,11 @@ class Runner: pipeline.do_assign_other_structures() pipeline.do_tei_to_dictionary() + def cleanup(self, pipeline): + shutil.rmtree(self.wani_directory, True) + pipeline.cleanup() + + class Pipeline: def __init__(self, nlp=None): @@ -220,4 +225,3 @@ class Pipeline: def cleanup(self): shutil.rmtree(self.tmp_directory, True) - shutil.rmtree(self.wani_directory, True)