Redmine #1835: created pipeline class and adapted pipeline1
parent
e3aa8a3aee
commit
5395d8def0
@ -1,28 +1,26 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
import classla
|
from structure_assignment.pipeline import Pipeline, create_nlp
|
||||||
from classla import Document
|
|
||||||
from classla.models.common.conll import CoNLLFile
|
|
||||||
|
|
||||||
import structure_assignment.pipeline as pipeline
|
def run_pipeline(nlp, input_file_name, output_file_name):
|
||||||
|
pipeline = Pipeline(nlp)
|
||||||
arg_parser = argparse.ArgumentParser(description='Parse Slovene strings and convert to TEI.')
|
|
||||||
arg_parser.add_argument('-inlist', type=str, help='Input list file')
|
|
||||||
arg_parser.add_argument('-outtei', type=str, help='Output TEI file')
|
|
||||||
arguments = arg_parser.parse_args()
|
|
||||||
input_file_name = arguments.inlist
|
|
||||||
output_file_name = arguments.outtei
|
|
||||||
|
|
||||||
def run_pipeline(input_file_name, output_file_name):
|
|
||||||
pipeline.initialise(temp_dir='/tmp/structure_assignment_pipeline1', resource_dir='../resources')
|
|
||||||
pipeline.import_file(input_file_name, 'strings-list')
|
pipeline.import_file(input_file_name, 'strings-list')
|
||||||
pipeline.do_tokenise()
|
pipeline.do_tokenise()
|
||||||
pipeline.do_tweak_conllu()
|
pipeline.do_tweak_conllu()
|
||||||
pipeline.load_classla_models()
|
|
||||||
pipeline.do_parse()
|
pipeline.do_parse()
|
||||||
pipeline.do_translate_jos()
|
pipeline.do_translate_jos()
|
||||||
pipeline.do_conllu_to_tei()
|
pipeline.do_conllu_to_tei()
|
||||||
pipeline.export_file(output_file_name, 'tei-initial')
|
pipeline.export_file(output_file_name, 'tei-initial')
|
||||||
|
pipeline.cleanup()
|
||||||
|
|
||||||
if (__name__ == '__main__'):
|
if (__name__ == '__main__'):
|
||||||
|
|
||||||
|
arg_parser = argparse.ArgumentParser(description='Parse Slovene strings and convert to TEI.')
|
||||||
|
arg_parser.add_argument('-inlist', type=str, help='Input list file')
|
||||||
|
arg_parser.add_argument('-outtei', type=str, help='Output TEI file')
|
||||||
|
arguments = arg_parser.parse_args()
|
||||||
|
input_file_name = arguments.inlist
|
||||||
|
output_file_name = arguments.outtei
|
||||||
|
|
||||||
|
nlp = create_nlp('../resources')
|
||||||
run_pipeline(input_file_name, output_file_name)
|
run_pipeline(input_file_name, output_file_name)
|
||||||
|
Loading…
Reference in new issue