IssueID #1835: made encoding-related improvements
This commit is contained in:
parent
c43c90352d
commit
86e2b12782
|
@ -1,3 +1,4 @@
|
|||
import codecs
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
|
@ -24,8 +25,9 @@ def strings_to_parse():
|
|||
|
||||
if (request.method == 'GET'):
|
||||
string = request.args.get('string')
|
||||
with open(string_file_name, 'w') as string_file:
|
||||
string_file.write(string + '\n')
|
||||
string_file = codecs.open(string_file_name, 'w', 'UTF-8')
|
||||
string_file.write(string + '\n')
|
||||
string_file.close()
|
||||
elif (request.method == 'POST'):
|
||||
file_data = request.files['file']
|
||||
file_data.save(string_file_name)
|
||||
|
@ -33,7 +35,7 @@ def strings_to_parse():
|
|||
try:
|
||||
runner.strings_to_parse(string_file_name, parsed_file_name)
|
||||
root = lxml.parse(parsed_file_name).getroot()
|
||||
message = lxml.tostring(root, encoding='UTF-8', pretty_print=True).decode()
|
||||
message = lxml.tostring(root, encoding='UTF-8', pretty_print=True)
|
||||
shutil.rmtree(tmp_directory)
|
||||
except Exception as e:
|
||||
message = '<error>' + str(e) + '</error>'
|
||||
|
|
|
@ -4,7 +4,6 @@ import tempfile
|
|||
from types import SimpleNamespace
|
||||
import lxml.etree as lxml
|
||||
|
||||
import obeliks
|
||||
import classla
|
||||
|
||||
from structure_assignment.constants import *
|
||||
|
@ -105,6 +104,7 @@ class Pipeline:
|
|||
import sys
|
||||
sys.path.insert(0, self.tmp_directory)
|
||||
self.file_map = {key: self.tmp_directory + '/' + FILE_MAP[key] for key in FILE_MAP.keys()}
|
||||
self.classla_directory = resource_directory + '/classla'
|
||||
|
||||
def import_file(self, file_name, file_key):
|
||||
shutil.copyfile(file_name, self.file_map[file_key])
|
||||
|
@ -113,7 +113,12 @@ class Pipeline:
|
|||
print('Tokenising with obeliks ...')
|
||||
input_file_name = self.file_map['strings-list']
|
||||
output_file_name = self.file_map['obeliks-tokenised']
|
||||
obeliks.run(in_file=input_file_name, out_file=output_file_name, conllu=True)
|
||||
with open(input_file_name, 'r') as input_file:
|
||||
input_conllu = input_file.read()
|
||||
tokeniser = classla.Pipeline('sl', processors='tokenize', dir=self.classla_directory)
|
||||
output_conllu = tokeniser(input_conllu).to_conll()
|
||||
with open(output_file_name, 'w') as output_file:
|
||||
output_file.write(output_conllu)
|
||||
|
||||
def do_tweak_conllu(self):
|
||||
print('Tweaking conllu ...')
|
||||
|
|
Loading…
Reference in New Issue
Block a user