structure_assignment/scripts/pipeline1.py

29 lines
1.0 KiB
Python
Raw Normal View History

import argparse
import classla
from classla import Document
from classla.models.common.conll import CoNLLFile
import structure_assignment.pipeline as pipeline
arg_parser = argparse.ArgumentParser(description='Parse Slovene strings and convert to TEI.')
arg_parser.add_argument('-inlist', type=str, help='Input list file')
arg_parser.add_argument('-outtei', type=str, help='Output TEI file')
arguments = arg_parser.parse_args()
2020-11-10 13:07:44 +00:00
input_file_name = arguments.inlist
output_file_name = arguments.outtei
def run_pipeline(input_file_name, output_file_name):
pipeline.initialise(temp_dir='/tmp/structure_assignment_pipeline1', resource_dir='../resources')
pipeline.import_file(input_file_name, 'strings-list')
pipeline.do_tokenise()
pipeline.do_tweak_conllu()
pipeline.load_classla_models()
pipeline.do_parse()
pipeline.do_translate_jos()
pipeline.do_conllu_to_tei()
pipeline.export_file(output_file_name, 'tei-initial')
if (__name__ == '__main__'):
run_pipeline(input_file_name, output_file_name)