IssueID #1487: incorporated script for translating JOS msds and dependencies
This commit is contained in:
parent
7651774914
commit
37d176c477
1
scripts/.gitignore
vendored
1
scripts/.gitignore
vendored
|
@ -4,3 +4,4 @@
|
||||||
/wani.py
|
/wani.py
|
||||||
/create_structures.py
|
/create_structures.py
|
||||||
/tei_to_dictionary.py
|
/tei_to_dictionary.py
|
||||||
|
/translate_jos.py
|
|
@ -4,6 +4,7 @@ TMP_DIRECTORY = '../tmp/structure_assignment'
|
||||||
# scripts
|
# scripts
|
||||||
TEI_SPLIT_SCRIPT_NAME = 'split_tei.py'
|
TEI_SPLIT_SCRIPT_NAME = 'split_tei.py'
|
||||||
CONLLU_TWEAK_SCRIPT_NAME = 'tweak_conllu.py'
|
CONLLU_TWEAK_SCRIPT_NAME = 'tweak_conllu.py'
|
||||||
|
TRANSLATION_SCRIPT_NAME = 'translate_jos.py'
|
||||||
CONLLU_TEI_SCRIPT_NAME = 'conllu_to_xml.py'
|
CONLLU_TEI_SCRIPT_NAME = 'conllu_to_xml.py'
|
||||||
MWE_EXTRACTION_SCRIPT_NAME = 'wani.py'
|
MWE_EXTRACTION_SCRIPT_NAME = 'wani.py'
|
||||||
STRUCTURE_ASSIGNMENT_SCRIPT_NAME = 'assign_structures.py'
|
STRUCTURE_ASSIGNMENT_SCRIPT_NAME = 'assign_structures.py'
|
||||||
|
@ -23,7 +24,8 @@ DICTIONARY_SCHEMA_FILE_NAME = '../resources/monolingual_dictionaries.xsd'
|
||||||
STRING_LIST_FILE_NAME = TMP_DIRECTORY + '/strings.txt'
|
STRING_LIST_FILE_NAME = TMP_DIRECTORY + '/strings.txt'
|
||||||
OBELIKS_RAW_FILE_NAME = TMP_DIRECTORY + '/obeliks_raw.conllu'
|
OBELIKS_RAW_FILE_NAME = TMP_DIRECTORY + '/obeliks_raw.conllu'
|
||||||
OBELIKS_TWEAKED_FILE_NAME = TMP_DIRECTORY + '/obeliks_tweaked.conllu'
|
OBELIKS_TWEAKED_FILE_NAME = TMP_DIRECTORY + '/obeliks_tweaked.conllu'
|
||||||
CLASSLA_FILE_NAME = TMP_DIRECTORY + '/classla.conllu'
|
CLASSLA_OUTPUT_FILE_NAME = TMP_DIRECTORY + '/classla_raw.conllu'
|
||||||
|
CLASSLA_TRANSLATED_FILE_NAME = TMP_DIRECTORY + '/classla_translated.conllu'
|
||||||
TEI_INIT_FILE_NAME = TMP_DIRECTORY + '/tei_initial.xml'
|
TEI_INIT_FILE_NAME = TMP_DIRECTORY + '/tei_initial.xml'
|
||||||
TEI_SINGLE_FILE_NAME = TMP_DIRECTORY + '/tei_single.xml'
|
TEI_SINGLE_FILE_NAME = TMP_DIRECTORY + '/tei_single.xml'
|
||||||
TEI_MULTIPLE_FILE_NAME = TMP_DIRECTORY + '/tei_multiple.xml'
|
TEI_MULTIPLE_FILE_NAME = TMP_DIRECTORY + '/tei_multiple.xml'
|
||||||
|
|
|
@ -31,8 +31,9 @@ def run_pipeline(input_file_name, output_file_name):
|
||||||
shutil.copyfile(input_file_name, STRING_LIST_FILE_NAME)
|
shutil.copyfile(input_file_name, STRING_LIST_FILE_NAME)
|
||||||
run_obeliks(STRING_LIST_FILE_NAME, OBELIKS_RAW_FILE_NAME)
|
run_obeliks(STRING_LIST_FILE_NAME, OBELIKS_RAW_FILE_NAME)
|
||||||
tweak_conllu(OBELIKS_RAW_FILE_NAME, OBELIKS_TWEAKED_FILE_NAME)
|
tweak_conllu(OBELIKS_RAW_FILE_NAME, OBELIKS_TWEAKED_FILE_NAME)
|
||||||
run_classla(OBELIKS_TWEAKED_FILE_NAME, CLASSLA_FILE_NAME)
|
run_classla(OBELIKS_TWEAKED_FILE_NAME, CLASSLA_OUTPUT_FILE_NAME)
|
||||||
run_tei_conversion(CLASSLA_FILE_NAME, TEI_INIT_FILE_NAME)
|
run_jos_translation(CLASSLA_OUTPUT_FILE_NAME, CLASSLA_TRANSLATED_FILE_NAME)
|
||||||
|
run_tei_conversion(CLASSLA_TRANSLATED_FILE_NAME, TEI_INIT_FILE_NAME)
|
||||||
shutil.copyfile(TEI_INIT_FILE_NAME, output_file_name)
|
shutil.copyfile(TEI_INIT_FILE_NAME, output_file_name)
|
||||||
|
|
||||||
def run_obeliks(list_file_name, conllu_file_name):
|
def run_obeliks(list_file_name, conllu_file_name):
|
||||||
|
@ -54,6 +55,12 @@ def run_classla(obeliks_file_name, classla_file_name):
|
||||||
result = nlp(doc)
|
result = nlp(doc)
|
||||||
result.conll_file.write_conll(classla_file_name)
|
result.conll_file.write_conll(classla_file_name)
|
||||||
|
|
||||||
|
def run_jos_translation(input_file_name, output_file_name):
|
||||||
|
print('Translating JOS ...')
|
||||||
|
translate_command = ' '.join(['python', TRANSLATION_SCRIPT_NAME, '-infile', input_file_name, '-dict', TRANSLATION_FILE_NAME, '-outfile', output_file_name])
|
||||||
|
print(translate_command)
|
||||||
|
os.system(translate_command)
|
||||||
|
|
||||||
def run_tei_conversion(classla_file_name, tei_file_name):
|
def run_tei_conversion(classla_file_name, tei_file_name):
|
||||||
print('Converting to tei ...')
|
print('Converting to tei ...')
|
||||||
convert_command = ' '.join(['python', CONLLU_TEI_SCRIPT_NAME, '-o', tei_file_name, classla_file_name])
|
convert_command = ' '.join(['python', CONLLU_TEI_SCRIPT_NAME, '-o', tei_file_name, classla_file_name])
|
||||||
|
|
|
@ -21,6 +21,7 @@ cd scripts
|
||||||
ln -s ../nova_slovnica/python/scripts/conllu_to_xml.py .
|
ln -s ../nova_slovnica/python/scripts/conllu_to_xml.py .
|
||||||
ln -s ../nova_slovnica/python/scripts/assign_structures.py .
|
ln -s ../nova_slovnica/python/scripts/assign_structures.py .
|
||||||
ln -s ../nova_slovnica/python/scripts/create_structures.py .
|
ln -s ../nova_slovnica/python/scripts/create_structures.py .
|
||||||
|
ln -s ../nova_slovnica/python/scripts/translate_jos.py .
|
||||||
ln -s ../nova_slovnica/python/scripts/tei_to_dictionary.py .
|
ln -s ../nova_slovnica/python/scripts/tei_to_dictionary.py .
|
||||||
ln -s ../luscenje_struktur/wani.py .
|
ln -s ../luscenje_struktur/wani.py .
|
||||||
cd ..
|
cd ..
|
||||||
|
|
Loading…
Reference in New Issue
Block a user