From 51f5e14ee9488d26b3eab607073453d27c03785e Mon Sep 17 00:00:00 2001 From: Cyprian Laskowski Date: Thu, 25 Mar 2021 10:57:19 +0100 Subject: [PATCH] Redmine #1487: minor bugs and refactoring --- package/structure_assignment/pipeline.py | 20 +++++++------------- scripts/process.py | 2 +- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/package/structure_assignment/pipeline.py b/package/structure_assignment/pipeline.py index 6b10c33..f8b96c0 100644 --- a/package/structure_assignment/pipeline.py +++ b/package/structure_assignment/pipeline.py @@ -26,18 +26,18 @@ class Runner: NLP_CONFIG_MAP['dir'] = resource_directory + '/classla' self.nlp = classla.Pipeline('sl', **NLP_CONFIG_MAP) - def run_all(input_file_name, output_file_name, structure_file_name): + def run_all(self, input_file_name, output_file_name, structure_file_name): pipeline = Pipeline(self.resource_directory, self.nlp) pipeline.import_file(input_file_name, 'strings-list') self._strings_to_parse_sequence(pipeline) self._parse_to_dictionary_sequence(pipeline) - pipeline.export_file(output_file_name, 'dictionary') + pipeline.do_validate_structures() pipeline.export_file(structure_file_name, 'structures-new') - self._validate_structures(structure_file_name) - self._validate_dictionary(output_file_name) + pipeline.do_validate_dictionary() + pipeline.export_file(output_file_name, 'dictionary') pipeline.cleanup() - def strings_to_dictionary(input_file_name, output_file_name, structure_file_name): + def strings_to_dictionary(self, input_file_name, output_file_name, structure_file_name): pipeline = Pipeline(self.resource_directory, self.nlp) pipeline.import_file(input_file_name, 'strings-list') self._strings_to_parse_sequence(pipeline) @@ -64,13 +64,13 @@ class Runner: def validate_structures(self, input_file_name): pipeline = Pipeline(self.resource_directory) pipeline.import_file(input_file_name, 'structures-new') - self._validate_structures_sequence(pipeline) + pipeline.do_validate_structures() pipeline.cleanup() def validate_dictionary(self, input_file_name): pipeline = Pipeline(self.resource_directory) pipeline.import_file(input_file_name, 'dictionary') - self._validate_dictionary_sequence(pipeline) + pipeline.do_validate_dictionary() pipeline.cleanup() def _strings_to_parse_sequence(self, pipeline): @@ -92,12 +92,6 @@ class Runner: pipeline.do_tei_to_dictionary_multiple() pipeline.do_merge_dictionaries() - def _validate_structures_sequence(self, pipeline): - pipeline.do_validate_structures() - - def _validate_dictionary_sequence(self, pipeline): - pipeline.do_validate_dictionary() - class Pipeline: diff --git a/scripts/process.py b/scripts/process.py index 0e4296d..ad06b83 100644 --- a/scripts/process.py +++ b/scripts/process.py @@ -31,4 +31,4 @@ if (__name__ == '__main__'): elif (part_name == 'validate_dictionary'): runner.validate_dictionary(input_file_name) elif (part_name == 'all'): - runner.run_all(input_file_name, output_file_name, nlp, structure_file_name) + runner.run_all(input_file_name, output_file_name, structure_file_name)