Allowed for restricting of JOS translation to one column

This commit is contained in:
Cyprian Laskowski 2023-10-18 21:54:00 +02:00
parent 89be603103
commit f28b5a3a01

View File

@ -8,6 +8,7 @@ from importlib_resources import files
from conversion_utils.jos_msds_and_properties import Converter, Msd from conversion_utils.jos_msds_and_properties import Converter, Msd
def get_syn_map(): def get_syn_map():
dict_file_name = files('conversion_utils.resources').joinpath('dict.xml') dict_file_name = files('conversion_utils.resources').joinpath('dict.xml')
dict_file = codecs.open(dict_file_name, 'r') dict_file = codecs.open(dict_file_name, 'r')
@ -15,7 +16,8 @@ def get_syn_map():
dict_file.close() dict_file.close()
return {syn.get('en'):syn.get('sl') for syn in root.xpath('syns/syn')} return {syn.get('en'):syn.get('sl') for syn in root.xpath('syns/syn')}
def translate(input_file_name, output_file_name):
def translate(input_file_name, scope, output_file_name):
syn_map = get_syn_map() syn_map = get_syn_map()
@ -29,8 +31,10 @@ def translate(input_file_name, output_file_name):
if (len(columns) != 10): if (len(columns) != 10):
output_file.write(line) output_file.write(line)
else: else:
columns[4] = converter.translate_msd(Msd(columns[4], 'en'), 'sl').code if (scope in {'msd', 'both'}):
columns[7] = syn_map[columns[7]] columns[4] = converter.translate_msd(Msd(columns[4], 'en'), 'sl').code
if (scope in {'dep', 'both'}):
columns[7] = syn_map[columns[7]]
output_file.write('\t'.join(columns) + '\n') output_file.write('\t'.join(columns) + '\n')
input_file.close() input_file.close()
@ -41,6 +45,7 @@ if (__name__ == '__main__'):
arg_parser = argparse.ArgumentParser(description='Translate JOS msds and dependency labels.') arg_parser = argparse.ArgumentParser(description='Translate JOS msds and dependency labels.')
arg_parser.add_argument('-infile', type=str, help='Input conllu') arg_parser.add_argument('-infile', type=str, help='Input conllu')
arg_parser.add_argument('-scope', type=str, options=['msd', 'dep', 'both'], default='both', help='Input conllu')
arg_parser.add_argument('-outfile', type=str, help='Output conllu') arg_parser.add_argument('-outfile', type=str, help='Output conllu')
arguments = arg_parser.parse_args() arguments = arg_parser.parse_args()
input_file_name = arguments.infile input_file_name = arguments.infile