structure_assignment/scripts/process.py

35 lines
1.5 KiB
Python

import argparse
from structure_assignment.pipeline import Runner
resource_directory = '../resources'
if (__name__ == '__main__'):
arg_parser = argparse.ArgumentParser(description='Run part or all of structure pipeline.')
arg_parser.add_argument('-part', type=str, help='Part name')
arg_parser.add_argument('-infile', type=str, help='Input file')
arg_parser.add_argument('-outfile', type=str, help='Output file')
arg_parser.add_argument('-structures', type=str, help='Updated structure file')
arguments = arg_parser.parse_args()
part_name = arguments.part
input_file_name = arguments.infile
output_file_name = arguments.outfile
structure_file_name = arguments.structures
nlp_needed = part_name in {'strings_to_parse', 'strings_to_dictionary', 'all'}
runner = Runner(resource_directory, nlp_needed)
if (part_name == 'strings_to_parse'):
runner.strings_to_parse(input_file_name, output_file_name)
elif (part_name == 'strings_to_dictionary'):
runner.strings_to_dictionary(input_file_name, output_file_name, structure_file_name)
elif (part_name == 'parse_to_dictionary'):
runner.parse_to_dictionary(input_file_name, output_file_name, structure_file_name)
elif (part_name == 'validate_structures'):
runner.validate_structures(input_file_name)
elif (part_name == 'validate_dictionary'):
runner.validate_dictionary(input_file_name)
elif (part_name == 'all'):
runner.run_all(input_file_name, output_file_name, structure_file_name)