diff --git a/conversion_utils/conllu_to_tei.py b/conversion_utils/conllu_to_tei.py index 3542280..164976e 100644 --- a/conversion_utils/conllu_to_tei.py +++ b/conversion_utils/conllu_to_tei.py @@ -312,7 +312,7 @@ def construct_tei_etrees(conllu_lines): def convert_file(input_file_name, output_file_name): - input_file = open(input_file_name, 'r') + input_file = open(input_file_name, 'r', encoding='utf-8') root = construct_tei_etrees(input_file)[0] tree = etree.ElementTree(root) tree.write(output_file_name, encoding='UTF-8', pretty_print=True) @@ -332,7 +332,7 @@ if __name__ == '__main__': args = parser.parse_args() if args.out: - f_out = open(args.out, 'w') + f_out = open(args.out, 'w', encoding='utf-8') else: f_out = sys.stdout @@ -341,7 +341,7 @@ if __name__ == '__main__': for arg in args.files: filelist = glob(arg) for f in filelist: - with open(f, 'r') as conllu_f: + with open(f, 'r', encoding='utf-8') as conllu_f: tei_etrees = construct_tei_etrees(conllu_f) for tei_etree in tei_etrees: f_out.write(etree.tostring(tei_etree, pretty_print=True, encoding='utf-8').decode()) diff --git a/conversion_utils/translate_conllu_jos.py b/conversion_utils/translate_conllu_jos.py index b92ac0c..b73c195 100644 --- a/conversion_utils/translate_conllu_jos.py +++ b/conversion_utils/translate_conllu_jos.py @@ -4,7 +4,6 @@ This script was developed in the context of a specific task and may not generali """ import argparse -import codecs import lxml.etree as lxml from importlib_resources import files @@ -13,7 +12,7 @@ from conversion_utils.jos_msds_and_properties import Converter, Msd def get_syn_map(): dict_file_name = files('conversion_utils.resources').joinpath('dict.xml') - dict_file = codecs.open(dict_file_name, 'r') + dict_file = open(dict_file_name, 'r', encoding='utf-8') root = lxml.parse(dict_file).getroot() dict_file.close() return {syn.get('en'):syn.get('sl') for syn in root.xpath('syns/syn')} @@ -23,8 +22,8 @@ def translate(input_file_name, scope, output_file_name): syn_map = get_syn_map() - output_file = codecs.open(output_file_name, 'w') - input_file = codecs.open(input_file_name, 'r') + output_file = open(output_file_name, 'w', encoding='utf-8') + input_file = open(input_file_name, 'r', encoding='utf-8') converter = Converter()