Redmine #1487: fixed wani cleanup and tweaked runner parameters
This commit is contained in:
parent
bb4e045c1d
commit
e9eff0658f
|
@ -22,7 +22,7 @@ if (__name__ == '__main__'):
|
||||||
output_structure_file_name = arguments.outstructs
|
output_structure_file_name = arguments.outstructs
|
||||||
|
|
||||||
nlp_needed = mode in {'strings_to_parse', 'strings_to_dictionary', 'all'}
|
nlp_needed = mode in {'strings_to_parse', 'strings_to_dictionary', 'all'}
|
||||||
runner = Runner(classla_directory, nlp_needed, wani_file_name)
|
runner = Runner(nlp_needed, classla_directory, wani_file_name)
|
||||||
if (mode == 'strings_to_parse'):
|
if (mode == 'strings_to_parse'):
|
||||||
runner.strings_to_parse(input_file_name, output_file_name)
|
runner.strings_to_parse(input_file_name, output_file_name)
|
||||||
elif (mode == 'strings_to_dictionary'):
|
elif (mode == 'strings_to_dictionary'):
|
||||||
|
|
|
@ -13,7 +13,7 @@ app = Flask(__name__)
|
||||||
api_prefix = os.environ['API_PREFIX']
|
api_prefix = os.environ['API_PREFIX']
|
||||||
resource_directory = os.environ['API_RESOURCE_DIR']
|
resource_directory = os.environ['API_RESOURCE_DIR']
|
||||||
|
|
||||||
runner = Runner(resource_directory, True)
|
runner = Runner(True, resource_directory)
|
||||||
|
|
||||||
|
|
||||||
@app.route(api_prefix + '/strings_to_parse', methods=['GET', 'POST'])
|
@app.route(api_prefix + '/strings_to_parse', methods=['GET', 'POST'])
|
||||||
|
|
|
@ -15,13 +15,13 @@ from conversion_utils.tei_to_dictionary import convert as tei_to_dictionary
|
||||||
|
|
||||||
class Runner:
|
class Runner:
|
||||||
|
|
||||||
def __init__(self, classla_directory, nlp_needed, wani=None):
|
def __init__(self, nlp_needed, classla_directory=None, wani_file_name=None):
|
||||||
self.classla_directory = classla_directory
|
self.classla_directory = classla_directory
|
||||||
if (nlp_needed):
|
if (nlp_needed):
|
||||||
NLP_CONFIG_MAP['dir'] = classla_directory
|
NLP_CONFIG_MAP['dir'] = classla_directory
|
||||||
self.nlp = classla.Pipeline('sl', **NLP_CONFIG_MAP)
|
self.nlp = classla.Pipeline('sl', **NLP_CONFIG_MAP)
|
||||||
if (wani is not None):
|
if (wani_file_name is not None):
|
||||||
self._provide_wani(wani)
|
self._provide_wani(wani_file_name)
|
||||||
|
|
||||||
def _provide_wani(self, wani_file_name): # TODO: remove once wani is incorporated into luscenje_struktur package
|
def _provide_wani(self, wani_file_name): # TODO: remove once wani is incorporated into luscenje_struktur package
|
||||||
self.wani_directory = tempfile.mkdtemp()
|
self.wani_directory = tempfile.mkdtemp()
|
||||||
|
@ -39,7 +39,7 @@ class Runner:
|
||||||
pipeline.export_file(output_structure_file_name, 'structures-new')
|
pipeline.export_file(output_structure_file_name, 'structures-new')
|
||||||
pipeline.do_validate_dictionary()
|
pipeline.do_validate_dictionary()
|
||||||
pipeline.export_file(output_file_name, 'dictionary')
|
pipeline.export_file(output_file_name, 'dictionary')
|
||||||
pipeline.cleanup()
|
self.cleanup(pipeline)
|
||||||
|
|
||||||
def strings_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name):
|
def strings_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name):
|
||||||
pipeline = Pipeline(self.nlp)
|
pipeline = Pipeline(self.nlp)
|
||||||
|
@ -49,14 +49,14 @@ class Runner:
|
||||||
self._parse_to_dictionary_sequence(pipeline)
|
self._parse_to_dictionary_sequence(pipeline)
|
||||||
pipeline.export_file(output_file_name, 'dictionary')
|
pipeline.export_file(output_file_name, 'dictionary')
|
||||||
pipeline.export_file(output_structure_file_name, 'structures-new')
|
pipeline.export_file(output_structure_file_name, 'structures-new')
|
||||||
pipeline.cleanup()
|
self.cleanup(pipeline)
|
||||||
|
|
||||||
def strings_to_parse(self, input_file_name, output_file_name):
|
def strings_to_parse(self, input_file_name, output_file_name):
|
||||||
pipeline = Pipeline(self.nlp)
|
pipeline = Pipeline(self.nlp)
|
||||||
pipeline.import_file(input_file_name, 'strings-list')
|
pipeline.import_file(input_file_name, 'strings-list')
|
||||||
self._strings_to_parse_sequence(pipeline)
|
self._strings_to_parse_sequence(pipeline)
|
||||||
pipeline.export_file(output_file_name, 'tei-initial')
|
pipeline.export_file(output_file_name, 'tei-initial')
|
||||||
pipeline.cleanup()
|
self.cleanup(pipeline)
|
||||||
|
|
||||||
def parse_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name):
|
def parse_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name):
|
||||||
pipeline = Pipeline()
|
pipeline = Pipeline()
|
||||||
|
@ -65,19 +65,19 @@ class Runner:
|
||||||
self._parse_to_dictionary_sequence(pipeline)
|
self._parse_to_dictionary_sequence(pipeline)
|
||||||
pipeline.export_file(output_file_name, 'dictionary')
|
pipeline.export_file(output_file_name, 'dictionary')
|
||||||
pipeline.export_file(output_structure_file_name, 'structures-new')
|
pipeline.export_file(output_structure_file_name, 'structures-new')
|
||||||
pipeline.cleanup()
|
self.cleanup(pipeline)
|
||||||
|
|
||||||
def validate_structures(self, input_file_name):
|
def validate_structures(self, input_file_name):
|
||||||
pipeline = Pipeline()
|
pipeline = Pipeline()
|
||||||
pipeline.import_file(input_file_name, 'structures-new')
|
pipeline.import_file(input_file_name, 'structures-new')
|
||||||
pipeline.do_validate_structures()
|
pipeline.do_validate_structures()
|
||||||
pipeline.cleanup()
|
self.cleanup(pipeline)
|
||||||
|
|
||||||
def validate_dictionary(self, input_file_name):
|
def validate_dictionary(self, input_file_name):
|
||||||
pipeline = Pipeline()
|
pipeline = Pipeline()
|
||||||
pipeline.import_file(input_file_name, 'dictionary')
|
pipeline.import_file(input_file_name, 'dictionary')
|
||||||
pipeline.do_validate_dictionary()
|
pipeline.do_validate_dictionary()
|
||||||
pipeline.cleanup()
|
self.cleanup(pipeline)
|
||||||
|
|
||||||
def _strings_to_parse_sequence(self, pipeline):
|
def _strings_to_parse_sequence(self, pipeline):
|
||||||
pipeline.do_tokenise()
|
pipeline.do_tokenise()
|
||||||
|
@ -92,6 +92,11 @@ class Runner:
|
||||||
pipeline.do_assign_other_structures()
|
pipeline.do_assign_other_structures()
|
||||||
pipeline.do_tei_to_dictionary()
|
pipeline.do_tei_to_dictionary()
|
||||||
|
|
||||||
|
def cleanup(self, pipeline):
|
||||||
|
shutil.rmtree(self.wani_directory, True)
|
||||||
|
pipeline.cleanup()
|
||||||
|
|
||||||
|
|
||||||
class Pipeline:
|
class Pipeline:
|
||||||
|
|
||||||
def __init__(self, nlp=None):
|
def __init__(self, nlp=None):
|
||||||
|
@ -220,4 +225,3 @@ class Pipeline:
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
shutil.rmtree(self.tmp_directory, True)
|
shutil.rmtree(self.tmp_directory, True)
|
||||||
shutil.rmtree(self.wani_directory, True)
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user