You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
122 lines
4.8 KiB
122 lines
4.8 KiB
import codecs
|
|
import os
|
|
import shutil
|
|
import tempfile
|
|
import lxml.etree as lxml
|
|
|
|
from flask import Flask, Response, request
|
|
from flask_httpauth import HTTPBasicAuth
|
|
|
|
from structure_assignment.pipeline import Runner
|
|
|
|
app = Flask(__name__)
|
|
api_prefix = os.environ['API_PREFIX']
|
|
resource_directory = os.environ['API_RESOURCE_DIR']
|
|
|
|
runner = Runner(True, resource_directory)
|
|
|
|
|
|
@app.route(api_prefix + '/strings_to_parse', methods=['GET', 'POST'])
|
|
def strings_to_parse():
|
|
|
|
tmp_directory = tempfile.mkdtemp()
|
|
string_file_name = tmp_directory + '/input_string.txt'
|
|
parsed_file_name = tmp_directory + '/output_parsed.xml'
|
|
|
|
if (request.method == 'GET'):
|
|
string = request.args.get('string')
|
|
string_file = codecs.open(string_file_name, 'w', 'UTF-8')
|
|
string_file.write(string + '\n')
|
|
string_file.close()
|
|
elif (request.method == 'POST'):
|
|
file_data = request.files['strings']
|
|
file_data.save(string_file_name)
|
|
|
|
try:
|
|
runner.strings_to_parse(string_file_name, parsed_file_name)
|
|
root = lxml.parse(parsed_file_name).getroot()
|
|
message = lxml.tostring(root, encoding='UTF-8', pretty_print=True)
|
|
shutil.rmtree(tmp_directory)
|
|
except Exception as e:
|
|
message = '<error>' + str(e) + '</error>'
|
|
|
|
return Response(message, mimetype='text/xml')
|
|
|
|
|
|
@app.route(api_prefix + '/parse_to_dictionary', methods=['POST'])
|
|
def parse_to_dictionary():
|
|
|
|
tmp_directory = tempfile.mkdtemp()
|
|
parsed_file_name = tmp_directory + '/input_parsed.xml'
|
|
dictionary_file_name = tmp_directory + '/output_dictionary.xml'
|
|
input_structure_file_name = tmp_directory + '/input_structures.xml'
|
|
output_structure_file_name = tmp_directory + '/output_structures.xml'
|
|
|
|
try:
|
|
|
|
parsed_file_data = request.files['parsed']
|
|
parsed_file_data.save(parsed_file_name)
|
|
structure_file_data = request.files['structures']
|
|
structure_file_data.save(input_structure_file_name)
|
|
|
|
runner.parse_to_dictionary(parsed_file_name, dictionary_file_name, input_structure_file_name, output_structure_file_name)
|
|
root = lxml.Element('response')
|
|
|
|
dictionary_root = lxml.parse(dictionary_file_name).getroot()
|
|
root.append(dictionary_root)
|
|
structure_root = lxml.parse(output_structure_file_name).getroot()
|
|
new_structure_count = len(structure_root.xpath('.//syntactic_structure[@tempId]'))
|
|
root.set('new_structures', str(new_structure_count))
|
|
structure_ids = set(dictionary_root.xpath('.//lexicalUnit/@structure_id'))
|
|
structures = structure_root.xpath('syntactic_structure')
|
|
structures_element = lxml.SubElement(root, 'syntactic_structures')
|
|
for structure in structures:
|
|
if (structure.get('id') in structure_ids or structure.get('tempId') in structure_ids):
|
|
structures_element.append(structure)
|
|
message = lxml.tostring(root, encoding='UTF-8', pretty_print=True).decode()
|
|
shutil.rmtree(tmp_directory)
|
|
|
|
except Exception as e:
|
|
message = '<error>' + str(e) + '</error>'
|
|
|
|
return Response(message, mimetype='text/xml')
|
|
|
|
|
|
@app.route(api_prefix + '/strings_to_dictionary', methods=['POST'])
|
|
def strings_to_dictionary():
|
|
|
|
tmp_directory = tempfile.mkdtemp()
|
|
string_file_name = tmp_directory + '/input_string.txt'
|
|
dictionary_file_name = tmp_directory + '/output_dictionary.xml'
|
|
input_structure_file_name = tmp_directory + '/input_structures.xml'
|
|
output_structure_file_name = tmp_directory + '/output_structures.xml'
|
|
|
|
try:
|
|
|
|
string_file_data = request.files['strings']
|
|
string_file_data.save(string_file_name)
|
|
structure_file_data = request.files['structures']
|
|
structure_file_data.save(input_structure_file_name)
|
|
|
|
runner.strings_to_dictionary(string_file_name, dictionary_file_name, input_structure_file_name, output_structure_file_name)
|
|
root = lxml.Element('response')
|
|
|
|
dictionary_root = lxml.parse(dictionary_file_name).getroot()
|
|
root.append(dictionary_root)
|
|
structure_root = lxml.parse(output_structure_file_name).getroot()
|
|
new_structure_count = len(structure_root.xpath('.//syntactic_structure[@tempId]'))
|
|
root.set('new_structures', str(new_structure_count))
|
|
structure_ids = set(dictionary_root.xpath('.//lexicalUnit/@structure_id'))
|
|
structures = structure_root.xpath('syntactic_structure')
|
|
structures_element = lxml.SubElement(root, 'syntactic_structures')
|
|
for structure in structures:
|
|
if (structure.get('id') in structure_ids or structure.get('tempId') in structure_ids):
|
|
structures_element.append(structure)
|
|
message = lxml.tostring(root, encoding='UTF-8', pretty_print=True).decode()
|
|
shutil.rmtree(tmp_directory)
|
|
|
|
except Exception as e:
|
|
message = '<error>' + str(e) + '</error>'
|
|
|
|
return Response(message, mimetype='text/xml')
|