Redmine #1835: made input structure specification xml into parameter

This commit is contained in:
2021-06-29 21:00:27 +02:00
parent 86e2b12782
commit 3c38cdbcae
6 changed files with 51 additions and 51 deletions

View File

@@ -29,7 +29,7 @@ def strings_to_parse():
string_file.write(string + '\n')
string_file.close()
elif (request.method == 'POST'):
file_data = request.files['file']
file_data = request.files['strings']
file_data.save(string_file_name)
try:
@@ -49,19 +49,22 @@ def parse_to_dictionary():
tmp_directory = tempfile.mkdtemp()
parsed_file_name = tmp_directory + '/input_parsed.xml'
dictionary_file_name = tmp_directory + '/output_dictionary.xml'
structure_file_name = tmp_directory + '/output_structures.xml'
input_structure_file_name = tmp_directory + '/input_structures.xml'
output_structure_file_name = tmp_directory + '/output_structures.xml'
try:
file_data = request.files['file']
file_data.save(parsed_file_name)
parsed_file_data = request.files['parsed']
parsed_file_data.save(parsed_file_name)
structure_file_data = request.files['structures']
structure_file_data.save(input_structure_file_name)
runner.parse_to_dictionary(parsed_file_name, dictionary_file_name, structure_file_name)
runner.parse_to_dictionary(parsed_file_name, dictionary_file_name, input_structure_file_name, output_structure_file_name)
root = lxml.Element('response')
dictionary_root = lxml.parse(dictionary_file_name).getroot()
root.append(dictionary_root)
structure_root = lxml.parse(structure_file_name).getroot()
structure_root = lxml.parse(output_structure_file_name).getroot()
new_structure_count = len(structure_root.xpath('.//syntactic_structure[@tempId]'))
root.set('new_structures', str(new_structure_count))
structure_ids = set(dictionary_root.xpath('.//lexicalUnit/@structure_id'))
@@ -79,30 +82,28 @@ def parse_to_dictionary():
return Response(message, mimetype='text/xml')
@app.route(api_prefix + '/strings_to_dictionary', methods=['GET', 'POST'])
@app.route(api_prefix + '/strings_to_dictionary', methods=['POST'])
def strings_to_dictionary():
tmp_directory = tempfile.mkdtemp()
string_file_name = tmp_directory + '/input_string.txt'
dictionary_file_name = tmp_directory + '/output_dictionary.xml'
structure_file_name = tmp_directory + '/output_structures.xml'
input_structure_file_name = tmp_directory + '/input_structures.xml'
output_structure_file_name = tmp_directory + '/output_structures.xml'
try:
if (request.method == 'GET'):
string = request.args.get('string')
with open(string_file_name, 'w') as string_file:
string_file.write(string + '\n')
elif (request.method == 'POST'):
file_data = request.files['file']
file_data.save(string_file_name)
string_file_data = request.files['strings']
string_file_data.save(string_file_name)
structure_file_data = request.files['structures']
structure_file_data.save(input_structure_file_name)
runner.strings_to_dictionary(string_file_name, dictionary_file_name, structure_file_name)
runner.strings_to_dictionary(string_file_name, dictionary_file_name, input_structure_file_name, output_structure_file_name)
root = lxml.Element('response')
dictionary_root = lxml.parse(dictionary_file_name).getroot()
root.append(dictionary_root)
structure_root = lxml.parse(structure_file_name).getroot()
structure_root = lxml.parse(output_structure_file_name).getroot()
new_structure_count = len(structure_root.xpath('.//syntactic_structure[@tempId]'))
root.set('new_structures', str(new_structure_count))
structure_ids = set(dictionary_root.xpath('.//lexicalUnit/@structure_id'))

View File

@@ -13,7 +13,7 @@ FILE_MAP = {'strings-list': 'strings.txt',
'tei-multiple-ids-2': 'tei_multiple_with_ids2.xml',
'mwes-1': 'mwes1.csv',
'mwes-2': 'mwes2.csv',
'structures-old': 'structures.xml',
'structures-old': 'structures_old.xml',
'structures-new': 'structures_new.xml',
'dictionary-single': 'dictionary_single.xml',
'dictionary-multiple': 'dictionary_multiple.xml',

View File

@@ -25,24 +25,26 @@ class Runner:
NLP_CONFIG_MAP['dir'] = resource_directory + '/classla'
self.nlp = classla.Pipeline('sl', **NLP_CONFIG_MAP)
def run_all(self, input_file_name, output_file_name, structure_file_name):
def run_all(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name):
pipeline = Pipeline(self.resource_directory, self.nlp)
pipeline.import_file(input_file_name, 'strings-list')
pipeline.import_file(input_structure_file_name, 'structures-old')
self._strings_to_parse_sequence(pipeline)
self._parse_to_dictionary_sequence(pipeline)
pipeline.do_validate_structures()
pipeline.export_file(structure_file_name, 'structures-new')
pipeline.export_file(output_structure_file_name, 'structures-new')
pipeline.do_validate_dictionary()
pipeline.export_file(output_file_name, 'dictionary')
pipeline.cleanup()
def strings_to_dictionary(self, input_file_name, output_file_name, structure_file_name):
def strings_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name):
pipeline = Pipeline(self.resource_directory, self.nlp)
pipeline.import_file(input_file_name, 'strings-list')
pipeline.import_file(input_structure_file_name, 'structures-old')
self._strings_to_parse_sequence(pipeline)
self._parse_to_dictionary_sequence(pipeline)
pipeline.export_file(output_file_name, 'dictionary')
pipeline.export_file(structure_file_name, 'structures-new')
pipeline.export_file(output_structure_file_name, 'structures-new')
pipeline.cleanup()
def strings_to_parse(self, input_file_name, output_file_name):
@@ -52,12 +54,13 @@ class Runner:
pipeline.export_file(output_file_name, 'tei-initial')
pipeline.cleanup()
def parse_to_dictionary(self, input_file_name, output_file_name, structure_file_name):
def parse_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name):
pipeline = Pipeline(self.resource_directory)
pipeline.import_file(input_file_name, 'tei-initial')
pipeline.import_file(input_structure_file_name, 'structures-old')
self._parse_to_dictionary_sequence(pipeline)
pipeline.export_file(output_file_name, 'dictionary')
pipeline.export_file(structure_file_name, 'structures-new')
pipeline.export_file(output_structure_file_name, 'structures-new')
pipeline.cleanup()
def validate_structures(self, input_file_name):