Redmine #1487: fixed wani cleanup and tweaked runner parameters

This commit is contained in:
Cyprian Laskowski 2022-08-03 08:41:07 +02:00
parent bb4e045c1d
commit e9eff0658f
3 changed files with 16 additions and 12 deletions

View File

@ -22,7 +22,7 @@ if (__name__ == '__main__'):
output_structure_file_name = arguments.outstructs output_structure_file_name = arguments.outstructs
nlp_needed = mode in {'strings_to_parse', 'strings_to_dictionary', 'all'} nlp_needed = mode in {'strings_to_parse', 'strings_to_dictionary', 'all'}
runner = Runner(classla_directory, nlp_needed, wani_file_name) runner = Runner(nlp_needed, classla_directory, wani_file_name)
if (mode == 'strings_to_parse'): if (mode == 'strings_to_parse'):
runner.strings_to_parse(input_file_name, output_file_name) runner.strings_to_parse(input_file_name, output_file_name)
elif (mode == 'strings_to_dictionary'): elif (mode == 'strings_to_dictionary'):

View File

@ -13,7 +13,7 @@ app = Flask(__name__)
api_prefix = os.environ['API_PREFIX'] api_prefix = os.environ['API_PREFIX']
resource_directory = os.environ['API_RESOURCE_DIR'] resource_directory = os.environ['API_RESOURCE_DIR']
runner = Runner(resource_directory, True) runner = Runner(True, resource_directory)
@app.route(api_prefix + '/strings_to_parse', methods=['GET', 'POST']) @app.route(api_prefix + '/strings_to_parse', methods=['GET', 'POST'])

View File

@ -15,13 +15,13 @@ from conversion_utils.tei_to_dictionary import convert as tei_to_dictionary
class Runner: class Runner:
def __init__(self, classla_directory, nlp_needed, wani=None): def __init__(self, nlp_needed, classla_directory=None, wani_file_name=None):
self.classla_directory = classla_directory self.classla_directory = classla_directory
if (nlp_needed): if (nlp_needed):
NLP_CONFIG_MAP['dir'] = classla_directory NLP_CONFIG_MAP['dir'] = classla_directory
self.nlp = classla.Pipeline('sl', **NLP_CONFIG_MAP) self.nlp = classla.Pipeline('sl', **NLP_CONFIG_MAP)
if (wani is not None): if (wani_file_name is not None):
self._provide_wani(wani) self._provide_wani(wani_file_name)
def _provide_wani(self, wani_file_name): # TODO: remove once wani is incorporated into luscenje_struktur package def _provide_wani(self, wani_file_name): # TODO: remove once wani is incorporated into luscenje_struktur package
self.wani_directory = tempfile.mkdtemp() self.wani_directory = tempfile.mkdtemp()
@ -39,7 +39,7 @@ class Runner:
pipeline.export_file(output_structure_file_name, 'structures-new') pipeline.export_file(output_structure_file_name, 'structures-new')
pipeline.do_validate_dictionary() pipeline.do_validate_dictionary()
pipeline.export_file(output_file_name, 'dictionary') pipeline.export_file(output_file_name, 'dictionary')
pipeline.cleanup() self.cleanup(pipeline)
def strings_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name): def strings_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name):
pipeline = Pipeline(self.nlp) pipeline = Pipeline(self.nlp)
@ -49,14 +49,14 @@ class Runner:
self._parse_to_dictionary_sequence(pipeline) self._parse_to_dictionary_sequence(pipeline)
pipeline.export_file(output_file_name, 'dictionary') pipeline.export_file(output_file_name, 'dictionary')
pipeline.export_file(output_structure_file_name, 'structures-new') pipeline.export_file(output_structure_file_name, 'structures-new')
pipeline.cleanup() self.cleanup(pipeline)
def strings_to_parse(self, input_file_name, output_file_name): def strings_to_parse(self, input_file_name, output_file_name):
pipeline = Pipeline(self.nlp) pipeline = Pipeline(self.nlp)
pipeline.import_file(input_file_name, 'strings-list') pipeline.import_file(input_file_name, 'strings-list')
self._strings_to_parse_sequence(pipeline) self._strings_to_parse_sequence(pipeline)
pipeline.export_file(output_file_name, 'tei-initial') pipeline.export_file(output_file_name, 'tei-initial')
pipeline.cleanup() self.cleanup(pipeline)
def parse_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name): def parse_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name):
pipeline = Pipeline() pipeline = Pipeline()
@ -65,19 +65,19 @@ class Runner:
self._parse_to_dictionary_sequence(pipeline) self._parse_to_dictionary_sequence(pipeline)
pipeline.export_file(output_file_name, 'dictionary') pipeline.export_file(output_file_name, 'dictionary')
pipeline.export_file(output_structure_file_name, 'structures-new') pipeline.export_file(output_structure_file_name, 'structures-new')
pipeline.cleanup() self.cleanup(pipeline)
def validate_structures(self, input_file_name): def validate_structures(self, input_file_name):
pipeline = Pipeline() pipeline = Pipeline()
pipeline.import_file(input_file_name, 'structures-new') pipeline.import_file(input_file_name, 'structures-new')
pipeline.do_validate_structures() pipeline.do_validate_structures()
pipeline.cleanup() self.cleanup(pipeline)
def validate_dictionary(self, input_file_name): def validate_dictionary(self, input_file_name):
pipeline = Pipeline() pipeline = Pipeline()
pipeline.import_file(input_file_name, 'dictionary') pipeline.import_file(input_file_name, 'dictionary')
pipeline.do_validate_dictionary() pipeline.do_validate_dictionary()
pipeline.cleanup() self.cleanup(pipeline)
def _strings_to_parse_sequence(self, pipeline): def _strings_to_parse_sequence(self, pipeline):
pipeline.do_tokenise() pipeline.do_tokenise()
@ -92,6 +92,11 @@ class Runner:
pipeline.do_assign_other_structures() pipeline.do_assign_other_structures()
pipeline.do_tei_to_dictionary() pipeline.do_tei_to_dictionary()
def cleanup(self, pipeline):
shutil.rmtree(self.wani_directory, True)
pipeline.cleanup()
class Pipeline: class Pipeline:
def __init__(self, nlp=None): def __init__(self, nlp=None):
@ -220,4 +225,3 @@ class Pipeline:
def cleanup(self): def cleanup(self):
shutil.rmtree(self.tmp_directory, True) shutil.rmtree(self.tmp_directory, True)
shutil.rmtree(self.wani_directory, True)