Redmine #1835: moved command functions into class

This commit is contained in:
2021-03-18 13:48:14 +01:00
parent 59dd78f9b2
commit 753cfad794
2 changed files with 90 additions and 73 deletions

View File

@@ -1,69 +1,9 @@
import argparse
import tempfile
import shutil
from structure_assignment.pipeline import Pipeline, create_nlp
from structure_assignment.pipeline import Runner
resource_directory = '../resources'
def run_all(input_file_name, output_file_name, nlp, structure_file_name):
tmp_directory = tempfile.mkdtemp()
tmp_file_name = tmp_directory + '/parsed.xml'
strings_to_parse(input_file_name, tmp_file_name, nlp)
parse_to_dictionary(tmp_file_name, output_file_name, structure_file_name)
shutil.rmtree(tmp_directory)
validate_structures(structure_file_name)
validate_dictionary(output_file_name)
def strings_to_dictionary(input_file_name, output_file_name, nlp, structure_file_name):
tmp_directory = tempfile.mkdtemp()
tmp_file_name = tmp_directory + '/parsed.xml'
strings_to_parse(input_file_name, tmp_file_name, nlp)
parse_to_dictionary(tmp_file_name, output_file_name, structure_file_name)
shutil.rmtree(tmp_directory)
def strings_to_parse(input_file_name, output_file_name, nlp):
pipeline = Pipeline(resource_directory, nlp)
pipeline.import_file(input_file_name, 'strings-list')
pipeline.do_tokenise()
pipeline.do_tweak_conllu()
pipeline.export_file(output_file_name, 'obeliks-tweaked')
# pipeline.do_parse()
# pipeline.do_translate_jos()
# pipeline.do_conllu_to_tei()
# pipeline.export_file(output_file_name, 'tei-initial')
pipeline.cleanup()
def parse_to_dictionary(input_file_name, output_file_name, structure_file_name):
pipeline = Pipeline(resource_directory)
pipeline.import_file(input_file_name, 'tei-initial')
pipeline.do_split_tei()
pipeline.do_assign_single()
pipeline.do_tei_to_dictionary_single()
pipeline.do_find_structure_units_first()
pipeline.do_assign_multiple_first()
pipeline.do_create_structures()
pipeline.do_find_structure_units_second()
pipeline.do_assign_multiple_second()
pipeline.do_tei_to_dictionary_multiple()
pipeline.do_merge_dictionaries()
pipeline.export_file(output_file_name, 'dictionary')
pipeline.export_file(structure_file_name, 'structures-new')
pipeline.cleanup()
def validate_structures(input_file_name):
pipeline = Pipeline(resource_directory)
pipeline.import_file(input_file_name, 'structures-new')
pipeline.do_validate_structures()
pipeline.cleanup()
def validate_dictionary(input_file_name):
pipeline = Pipeline(resource_directory)
pipeline.import_file(input_file_name, 'dictionary')
pipeline.do_validate_dictionary()
pipeline.cleanup()
if (__name__ == '__main__'):
arg_parser = argparse.ArgumentParser(description='Run part or all of structure pipeline.')
@@ -78,17 +18,17 @@ if (__name__ == '__main__'):
output_file_name = arguments.outfile
structure_file_name = arguments.structures
nlp_needed = part_name in {'strings_to_parse', 'strings_to_dictionary', 'all'}
runner = Runner(resource_directory, nlp_needed)
if (part_name == 'strings_to_parse'):
nlp = create_nlp(resource_directory)
strings_to_parse(input_file_name, output_file_name, nlp)
runner.strings_to_parse(input_file_name, output_file_name)
elif (part_name == 'strings_to_dictionary'):
nlp = create_nlp(resource_directory)
strings_to_dictionary(input_file_name, output_file_name, nlp, structure_file_name)
runner.strings_to_dictionary(input_file_name, output_file_name, structure_file_name)
elif (part_name == 'parse_to_dictionary'):
parse_to_dictionary(input_file_name, output_file_name, structure_file_name)
runner.parse_to_dictionary(input_file_name, output_file_name, structure_file_name)
elif (part_name == 'validate_structures'):
validate_structures(input_file_name)
runner.validate_structures(input_file_name)
elif (part_name == 'validate_dictionary'):
validate_dictionary(input_file_name)
runner.validate_dictionary(input_file_name)
elif (part_name == 'all'):
run_all(input_file_name, output_file_name, nlp, structure_file_name)
runner.run_all(input_file_name, output_file_name, nlp, structure_file_name)