UTF-8 encoding fix
This commit is contained in:
parent
f43ea39f1b
commit
b711fae3b5
@ -312,7 +312,7 @@ def construct_tei_etrees(conllu_lines):
|
||||
|
||||
|
||||
def convert_file(input_file_name, output_file_name):
|
||||
input_file = open(input_file_name, 'r')
|
||||
input_file = open(input_file_name, 'r', encoding='utf-8')
|
||||
root = construct_tei_etrees(input_file)[0]
|
||||
tree = etree.ElementTree(root)
|
||||
tree.write(output_file_name, encoding='UTF-8', pretty_print=True)
|
||||
@ -332,7 +332,7 @@ if __name__ == '__main__':
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.out:
|
||||
f_out = open(args.out, 'w')
|
||||
f_out = open(args.out, 'w', encoding='utf-8')
|
||||
else:
|
||||
f_out = sys.stdout
|
||||
|
||||
@ -341,7 +341,7 @@ if __name__ == '__main__':
|
||||
for arg in args.files:
|
||||
filelist = glob(arg)
|
||||
for f in filelist:
|
||||
with open(f, 'r') as conllu_f:
|
||||
with open(f, 'r', encoding='utf-8') as conllu_f:
|
||||
tei_etrees = construct_tei_etrees(conllu_f)
|
||||
for tei_etree in tei_etrees:
|
||||
f_out.write(etree.tostring(tei_etree, pretty_print=True, encoding='utf-8').decode())
|
||||
|
||||
@ -4,7 +4,6 @@ This script was developed in the context of a specific task and may not generali
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import codecs
|
||||
import lxml.etree as lxml
|
||||
from importlib_resources import files
|
||||
|
||||
@ -13,7 +12,7 @@ from conversion_utils.jos_msds_and_properties import Converter, Msd
|
||||
|
||||
def get_syn_map():
|
||||
dict_file_name = files('conversion_utils.resources').joinpath('dict.xml')
|
||||
dict_file = codecs.open(dict_file_name, 'r')
|
||||
dict_file = open(dict_file_name, 'r', encoding='utf-8')
|
||||
root = lxml.parse(dict_file).getroot()
|
||||
dict_file.close()
|
||||
return {syn.get('en'):syn.get('sl') for syn in root.xpath('syns/syn')}
|
||||
@ -23,8 +22,8 @@ def translate(input_file_name, scope, output_file_name):
|
||||
|
||||
syn_map = get_syn_map()
|
||||
|
||||
output_file = codecs.open(output_file_name, 'w')
|
||||
input_file = codecs.open(input_file_name, 'r')
|
||||
output_file = open(output_file_name, 'w', encoding='utf-8')
|
||||
input_file = open(input_file_name, 'r', encoding='utf-8')
|
||||
|
||||
converter = Converter()
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user