Redmine #1835: separated loading jos models and running classla

This commit is contained in:
Cyprian Laskowski 2021-03-01 16:04:01 +01:00
parent 351865c50e
commit c0e43f2260
3 changed files with 7 additions and 2 deletions

View File

@ -12,6 +12,7 @@ api_prefix = os.environ['API_PREFIX']
resource_directory = os.environ['API_RESOURCE_DIR'] resource_directory = os.environ['API_RESOURCE_DIR']
tmp_directory = os.environ['API_TMP'] tmp_directory = os.environ['API_TMP']
pipeline.initialise(temp_dir=tmp_directory, resource_dir=resource_directory) pipeline.initialise(temp_dir=tmp_directory, resource_dir=resource_directory)
pipeline.load_classla_models()
@app.route(api_prefix + '/test/<string:string>', methods=['GET']) @app.route(api_prefix + '/test/<string:string>', methods=['GET'])
def test(string): def test(string):

View File

@ -30,13 +30,12 @@ def __get_tmp_file_name(file_key):
return tmp_directory + '/' + FILE_NAME_MAP[file_key] return tmp_directory + '/' + FILE_NAME_MAP[file_key]
def initialise(**argument_map): def initialise(**argument_map):
global tmp_directory, resource_directory, nlp global tmp_directory, resource_directory
tmp_directory = argument_map['temp_dir'] tmp_directory = argument_map['temp_dir']
resource_directory = argument_map['resource_dir'] resource_directory = argument_map['resource_dir']
shutil.rmtree(tmp_directory, True) shutil.rmtree(tmp_directory, True)
os.makedirs(tmp_directory, exist_ok=True) os.makedirs(tmp_directory, exist_ok=True)
NLP_CONFIG_MAP['models_dir'] = resource_directory + '/classla' NLP_CONFIG_MAP['models_dir'] = resource_directory + '/classla'
nlp = classla.Pipeline('sl', **NLP_CONFIG_MAP)
def import_file(file_name, file_key): def import_file(file_name, file_key):
shutil.copyfile(file_name, __get_tmp_file_name(file_key)) shutil.copyfile(file_name, __get_tmp_file_name(file_key))
@ -51,6 +50,10 @@ def do_tweak_conllu():
output_file_name = __get_tmp_file_name('obeliks-tweaked') output_file_name = __get_tmp_file_name('obeliks-tweaked')
tweak_conllu(input_file_name, output_file_name) tweak_conllu(input_file_name, output_file_name)
def load_classla_models():
global nlp
nlp = classla.Pipeline('sl', **NLP_CONFIG_MAP)
def do_parse(): def do_parse():
input_file_name = __get_tmp_file_name('obeliks-tweaked') input_file_name = __get_tmp_file_name('obeliks-tweaked')
output_file_name = __get_tmp_file_name('classla-parsed') output_file_name = __get_tmp_file_name('classla-parsed')

View File

@ -18,6 +18,7 @@ def run_pipeline(input_file_name, output_file_name):
pipeline.import_file(input_file_name, 'strings-list') pipeline.import_file(input_file_name, 'strings-list')
pipeline.do_tokenise() pipeline.do_tokenise()
pipeline.do_tweak_conllu() pipeline.do_tweak_conllu()
pipeline.load_classla_models()
pipeline.do_parse() pipeline.do_parse()
pipeline.do_translate_jos() pipeline.do_translate_jos()
pipeline.do_conllu_to_tei() pipeline.do_conllu_to_tei()