IssueID #1487: incorporated script for translating JOS msds and dependencies
This commit is contained in:
parent
7651774914
commit
37d176c477
3
scripts/.gitignore
vendored
3
scripts/.gitignore
vendored
|
@ -3,4 +3,5 @@
|
|||
/conllu_to_xml.py
|
||||
/wani.py
|
||||
/create_structures.py
|
||||
/tei_to_dictionary.py
|
||||
/tei_to_dictionary.py
|
||||
/translate_jos.py
|
|
@ -4,6 +4,7 @@ TMP_DIRECTORY = '../tmp/structure_assignment'
|
|||
# scripts
|
||||
TEI_SPLIT_SCRIPT_NAME = 'split_tei.py'
|
||||
CONLLU_TWEAK_SCRIPT_NAME = 'tweak_conllu.py'
|
||||
TRANSLATION_SCRIPT_NAME = 'translate_jos.py'
|
||||
CONLLU_TEI_SCRIPT_NAME = 'conllu_to_xml.py'
|
||||
MWE_EXTRACTION_SCRIPT_NAME = 'wani.py'
|
||||
STRUCTURE_ASSIGNMENT_SCRIPT_NAME = 'assign_structures.py'
|
||||
|
@ -23,7 +24,8 @@ DICTIONARY_SCHEMA_FILE_NAME = '../resources/monolingual_dictionaries.xsd'
|
|||
STRING_LIST_FILE_NAME = TMP_DIRECTORY + '/strings.txt'
|
||||
OBELIKS_RAW_FILE_NAME = TMP_DIRECTORY + '/obeliks_raw.conllu'
|
||||
OBELIKS_TWEAKED_FILE_NAME = TMP_DIRECTORY + '/obeliks_tweaked.conllu'
|
||||
CLASSLA_FILE_NAME = TMP_DIRECTORY + '/classla.conllu'
|
||||
CLASSLA_OUTPUT_FILE_NAME = TMP_DIRECTORY + '/classla_raw.conllu'
|
||||
CLASSLA_TRANSLATED_FILE_NAME = TMP_DIRECTORY + '/classla_translated.conllu'
|
||||
TEI_INIT_FILE_NAME = TMP_DIRECTORY + '/tei_initial.xml'
|
||||
TEI_SINGLE_FILE_NAME = TMP_DIRECTORY + '/tei_single.xml'
|
||||
TEI_MULTIPLE_FILE_NAME = TMP_DIRECTORY + '/tei_multiple.xml'
|
||||
|
|
|
@ -31,8 +31,9 @@ def run_pipeline(input_file_name, output_file_name):
|
|||
shutil.copyfile(input_file_name, STRING_LIST_FILE_NAME)
|
||||
run_obeliks(STRING_LIST_FILE_NAME, OBELIKS_RAW_FILE_NAME)
|
||||
tweak_conllu(OBELIKS_RAW_FILE_NAME, OBELIKS_TWEAKED_FILE_NAME)
|
||||
run_classla(OBELIKS_TWEAKED_FILE_NAME, CLASSLA_FILE_NAME)
|
||||
run_tei_conversion(CLASSLA_FILE_NAME, TEI_INIT_FILE_NAME)
|
||||
run_classla(OBELIKS_TWEAKED_FILE_NAME, CLASSLA_OUTPUT_FILE_NAME)
|
||||
run_jos_translation(CLASSLA_OUTPUT_FILE_NAME, CLASSLA_TRANSLATED_FILE_NAME)
|
||||
run_tei_conversion(CLASSLA_TRANSLATED_FILE_NAME, TEI_INIT_FILE_NAME)
|
||||
shutil.copyfile(TEI_INIT_FILE_NAME, output_file_name)
|
||||
|
||||
def run_obeliks(list_file_name, conllu_file_name):
|
||||
|
@ -54,6 +55,12 @@ def run_classla(obeliks_file_name, classla_file_name):
|
|||
result = nlp(doc)
|
||||
result.conll_file.write_conll(classla_file_name)
|
||||
|
||||
def run_jos_translation(input_file_name, output_file_name):
|
||||
print('Translating JOS ...')
|
||||
translate_command = ' '.join(['python', TRANSLATION_SCRIPT_NAME, '-infile', input_file_name, '-dict', TRANSLATION_FILE_NAME, '-outfile', output_file_name])
|
||||
print(translate_command)
|
||||
os.system(translate_command)
|
||||
|
||||
def run_tei_conversion(classla_file_name, tei_file_name):
|
||||
print('Converting to tei ...')
|
||||
convert_command = ' '.join(['python', CONLLU_TEI_SCRIPT_NAME, '-o', tei_file_name, classla_file_name])
|
||||
|
|
|
@ -21,6 +21,7 @@ cd scripts
|
|||
ln -s ../nova_slovnica/python/scripts/conllu_to_xml.py .
|
||||
ln -s ../nova_slovnica/python/scripts/assign_structures.py .
|
||||
ln -s ../nova_slovnica/python/scripts/create_structures.py .
|
||||
ln -s ../nova_slovnica/python/scripts/translate_jos.py .
|
||||
ln -s ../nova_slovnica/python/scripts/tei_to_dictionary.py .
|
||||
ln -s ../luscenje_struktur/wani.py .
|
||||
cd ..
|
||||
|
|
Loading…
Reference in New Issue
Block a user