structure_assignment/scripts/process.py

37 lines
1.7 KiB
Python

import argparse
from structure_assignment.pipeline import Runner
classla_directory = '../resources/classla'
if (__name__ == '__main__'):
arg_parser = argparse.ArgumentParser(description='Run part or all of structure pipeline.')
arg_parser.add_argument('-mode', type=str, help='Mode')
arg_parser.add_argument('-infile', type=str, help='Input file')
arg_parser.add_argument('-outfile', type=str, help='Output file')
arg_parser.add_argument('-instructs', type=str, help='Input structure file')
arg_parser.add_argument('-outstructs', type=str, help='Output structure file')
arguments = arg_parser.parse_args()
mode = arguments.mode
input_file_name = arguments.infile
output_file_name = arguments.outfile
input_structure_file_name = arguments.instructs
output_structure_file_name = arguments.outstructs
nlp_needed = mode in {'strings_to_parse', 'strings_to_dictionary', 'all'}
runner = Runner(nlp_needed, classla_directory)
if (mode == 'strings_to_parse'):
runner.strings_to_parse(input_file_name, output_file_name)
elif (mode == 'strings_to_dictionary'):
runner.strings_to_dictionary(input_file_name, output_file_name, input_structure_file_name, output_structure_file_name)
elif (mode == 'parse_to_dictionary'):
runner.parse_to_dictionary(input_file_name, output_file_name, input_structure_file_name, output_structure_file_name)
elif (mode == 'validate_structures'):
runner.validate_structures(input_file_name)
elif (mode == 'validate_dictionary'):
runner.validate_dictionary(input_file_name)
elif (mode == 'all'):
runner.run_all(input_file_name, output_file_name, input_structure_file_name, output_structure_file_name)