You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

122 lines
4.8 KiB

import codecs
import os
import shutil
import tempfile
import lxml.etree as lxml
from flask import Flask, Response, request
from flask_httpauth import HTTPBasicAuth
from structure_assignment.pipeline import Runner
app = Flask(__name__)
api_prefix = os.environ['API_PREFIX']
resource_directory = os.environ['API_RESOURCE_DIR']
runner = Runner(True, resource_directory)
@app.route(api_prefix + '/strings_to_parse', methods=['GET', 'POST'])
def strings_to_parse():
tmp_directory = tempfile.mkdtemp()
string_file_name = tmp_directory + '/input_string.txt'
parsed_file_name = tmp_directory + '/output_parsed.xml'
if (request.method == 'GET'):
string = request.args.get('string')
string_file = codecs.open(string_file_name, 'w', 'UTF-8')
string_file.write(string + '\n')
string_file.close()
elif (request.method == 'POST'):
file_data = request.files['strings']
file_data.save(string_file_name)
try:
runner.strings_to_parse(string_file_name, parsed_file_name)
root = lxml.parse(parsed_file_name).getroot()
message = lxml.tostring(root, encoding='UTF-8', pretty_print=True)
shutil.rmtree(tmp_directory)
except Exception as e:
message = '<error>' + str(e) + '</error>'
return Response(message, mimetype='text/xml')
@app.route(api_prefix + '/parse_to_dictionary', methods=['POST'])
def parse_to_dictionary():
tmp_directory = tempfile.mkdtemp()
parsed_file_name = tmp_directory + '/input_parsed.xml'
dictionary_file_name = tmp_directory + '/output_dictionary.xml'
input_structure_file_name = tmp_directory + '/input_structures.xml'
output_structure_file_name = tmp_directory + '/output_structures.xml'
try:
parsed_file_data = request.files['parsed']
parsed_file_data.save(parsed_file_name)
structure_file_data = request.files['structures']
structure_file_data.save(input_structure_file_name)
runner.parse_to_dictionary(parsed_file_name, dictionary_file_name, input_structure_file_name, output_structure_file_name)
root = lxml.Element('response')
dictionary_root = lxml.parse(dictionary_file_name).getroot()
root.append(dictionary_root)
structure_root = lxml.parse(output_structure_file_name).getroot()
new_structure_count = len(structure_root.xpath('.//syntactic_structure[@tempId]'))
root.set('new_structures', str(new_structure_count))
structure_ids = set(dictionary_root.xpath('.//lexicalUnit/@structure_id'))
structures = structure_root.xpath('syntactic_structure')
structures_element = lxml.SubElement(root, 'syntactic_structures')
for structure in structures:
if (structure.get('id') in structure_ids or structure.get('tempId') in structure_ids):
structures_element.append(structure)
message = lxml.tostring(root, encoding='UTF-8', pretty_print=True).decode()
shutil.rmtree(tmp_directory)
except Exception as e:
message = '<error>' + str(e) + '</error>'
return Response(message, mimetype='text/xml')
@app.route(api_prefix + '/strings_to_dictionary', methods=['POST'])
def strings_to_dictionary():
tmp_directory = tempfile.mkdtemp()
string_file_name = tmp_directory + '/input_string.txt'
dictionary_file_name = tmp_directory + '/output_dictionary.xml'
input_structure_file_name = tmp_directory + '/input_structures.xml'
output_structure_file_name = tmp_directory + '/output_structures.xml'
try:
string_file_data = request.files['strings']
string_file_data.save(string_file_name)
structure_file_data = request.files['structures']
structure_file_data.save(input_structure_file_name)
runner.strings_to_dictionary(string_file_name, dictionary_file_name, input_structure_file_name, output_structure_file_name)
root = lxml.Element('response')
dictionary_root = lxml.parse(dictionary_file_name).getroot()
root.append(dictionary_root)
structure_root = lxml.parse(output_structure_file_name).getroot()
new_structure_count = len(structure_root.xpath('.//syntactic_structure[@tempId]'))
root.set('new_structures', str(new_structure_count))
structure_ids = set(dictionary_root.xpath('.//lexicalUnit/@structure_id'))
structures = structure_root.xpath('syntactic_structure')
structures_element = lxml.SubElement(root, 'syntactic_structures')
for structure in structures:
if (structure.get('id') in structure_ids or structure.get('tempId') in structure_ids):
structures_element.append(structure)
message = lxml.tostring(root, encoding='UTF-8', pretty_print=True).decode()
shutil.rmtree(tmp_directory)
except Exception as e:
message = '<error>' + str(e) + '</error>'
return Response(message, mimetype='text/xml')