diff --git a/package/structure_assignment/api.py b/package/structure_assignment/api.py index 17c4ea3..5fe6381 100644 --- a/package/structure_assignment/api.py +++ b/package/structure_assignment/api.py @@ -10,8 +10,7 @@ import structure_assignment.pipeline as pipeline app = Flask(__name__) api_prefix = os.environ['API_PREFIX'] resource_directory = os.environ['API_RESOURCE_DIR'] -tmp_directory = os.environ['API_TMP'] -pipeline.initialise(temp_dir=tmp_directory, resource_dir=resource_directory) +pipeline.initialise(resource_dir=resource_directory) pipeline.load_classla_models() @app.route(api_prefix + '/test/', methods=['GET']) @@ -30,6 +29,7 @@ def test(string): pipeline.do_translate_jos() pipeline.do_conllu_to_tei() pipeline.export_file(parse_file_name, 'tei-initial') + # pipeline.cleanup() tei = lxml.parse(parse_file_name).getroot() message = lxml.tostring(tei, encoding='UTF-8', pretty_print=True).decode() ok = True @@ -38,4 +38,4 @@ def test(string): ok = False results = {'ok':ok, 'message':message} - return Response(message, mimetype='text/xml') \ No newline at end of file + return Response(message, mimetype='text/xml') diff --git a/package/structure_assignment/constants.py b/package/structure_assignment/constants.py index 4a4bd8f..247a52d 100644 --- a/package/structure_assignment/constants.py +++ b/package/structure_assignment/constants.py @@ -1,8 +1,5 @@ # scripts TEI_SPLIT_SCRIPT_NAME = 'split_tei.py' -CONLLU_TWEAK_SCRIPT_NAME = 'tweak_conllu.py' -TRANSLATION_SCRIPT_NAME = 'translate_jos.py' -CONLLU_TEI_SCRIPT_NAME = 'conllu_to_xml.py' MWE_EXTRACTION_SCRIPT_NAME = 'wani.py' STRUCTURE_SINGLE_ASSIGNMENT_SCRIPT_NAME = 'assign_single_structures.py' STRUCTURE_ASSIGNMENT_SCRIPT_NAME = 'assign_structures.py' @@ -36,22 +33,3 @@ FILE_NAME_MAP = {'strings-list': 'strings.txt', 'dictionary-multiple': 'dictionary_multiple.xml', 'dictionary': 'dictionary.xml' } - -# STRING_LIST_FILE_NAME = 'strings.txt' -# OBELIKS_RAW_FILE_NAME = 'obeliks_raw.conllu' -# OBELIKS_TWEAKED_FILE_NAME = 'obeliks_tweaked.conllu' -# CLASSLA_OUTPUT_FILE_NAME = 'classla_raw.conllu' -# CLASSLA_TRANSLATED_FILE_NAME = 'classla_translated.conllu' -# TEI_INIT_FILE_NAME = 'tei_initial.xml' -# TEI_SINGLE_FILE_NAME = 'tei_single.xml' -# TEI_SINGLE_STRUCTURE_FILE_NAME = 'tei_single_with_ids.xml' -# TEI_MULTIPLE_FILE_NAME = 'tei_multiple.xml' -# TEI_MULTIPLE_STRUCTURE_1_FILE_NAME = 'tei_multiple_with_ids1.xml' -# TEI_MULTIPLE_STRUCTURE_2_FILE_NAME = 'tei_multiple_with_ids2.xml' -# MWE_CSV_1_FILE_NAME = 'mwes1.csv' -# MWE_CSV_2_FILE_NAME = 'mwes2.csv' -# STRUCTURE_OLD_FILE_NAME = 'structures_old.xml' -# STRUCTURE_NEW_FILE_NAME = 'structures_new.xml' -# DICTIONARY_SINGLE_FILE_NAME = 'dictionary_single.xml' -# DICTIONARY_MULTIPLE_FILE_NAME = 'dictionary_multiple.xml' -# DICTIONARY_FILE_NAME = 'dictionary.xml' diff --git a/package/structure_assignment/pipeline.py b/package/structure_assignment/pipeline.py index 7caed7c..b111b14 100644 --- a/package/structure_assignment/pipeline.py +++ b/package/structure_assignment/pipeline.py @@ -1,6 +1,7 @@ import codecs import shutil import os +import tempfile import obeliks @@ -22,19 +23,16 @@ NLP_CONFIG_MAP = { XML_ID_PREFIX = 's' +tmp_directory = tempfile.mkdtemp() resource_directory = None -tmp_directory = None nlp = None def __get_tmp_file_name(file_key): return tmp_directory + '/' + FILE_NAME_MAP[file_key] def initialise(**argument_map): - global tmp_directory, resource_directory - tmp_directory = argument_map['temp_dir'] + global resource_directory resource_directory = argument_map['resource_dir'] - shutil.rmtree(tmp_directory, True) - os.makedirs(tmp_directory, exist_ok=True) NLP_CONFIG_MAP['models_dir'] = resource_directory + '/classla' def import_file(file_name, file_key): @@ -76,3 +74,6 @@ def do_conllu_to_tei(): def export_file(file_name, file_key): shutil.copyfile(__get_tmp_file_name(file_key), file_name) + +def cleanup(): + shutil.rmtree(tmp_directory, True)