Redmine #1835: made input structure specification xml into parameter
This commit is contained in:
@@ -29,7 +29,7 @@ def strings_to_parse():
|
||||
string_file.write(string + '\n')
|
||||
string_file.close()
|
||||
elif (request.method == 'POST'):
|
||||
file_data = request.files['file']
|
||||
file_data = request.files['strings']
|
||||
file_data.save(string_file_name)
|
||||
|
||||
try:
|
||||
@@ -49,19 +49,22 @@ def parse_to_dictionary():
|
||||
tmp_directory = tempfile.mkdtemp()
|
||||
parsed_file_name = tmp_directory + '/input_parsed.xml'
|
||||
dictionary_file_name = tmp_directory + '/output_dictionary.xml'
|
||||
structure_file_name = tmp_directory + '/output_structures.xml'
|
||||
input_structure_file_name = tmp_directory + '/input_structures.xml'
|
||||
output_structure_file_name = tmp_directory + '/output_structures.xml'
|
||||
|
||||
try:
|
||||
|
||||
file_data = request.files['file']
|
||||
file_data.save(parsed_file_name)
|
||||
parsed_file_data = request.files['parsed']
|
||||
parsed_file_data.save(parsed_file_name)
|
||||
structure_file_data = request.files['structures']
|
||||
structure_file_data.save(input_structure_file_name)
|
||||
|
||||
runner.parse_to_dictionary(parsed_file_name, dictionary_file_name, structure_file_name)
|
||||
runner.parse_to_dictionary(parsed_file_name, dictionary_file_name, input_structure_file_name, output_structure_file_name)
|
||||
root = lxml.Element('response')
|
||||
|
||||
dictionary_root = lxml.parse(dictionary_file_name).getroot()
|
||||
root.append(dictionary_root)
|
||||
structure_root = lxml.parse(structure_file_name).getroot()
|
||||
structure_root = lxml.parse(output_structure_file_name).getroot()
|
||||
new_structure_count = len(structure_root.xpath('.//syntactic_structure[@tempId]'))
|
||||
root.set('new_structures', str(new_structure_count))
|
||||
structure_ids = set(dictionary_root.xpath('.//lexicalUnit/@structure_id'))
|
||||
@@ -79,30 +82,28 @@ def parse_to_dictionary():
|
||||
return Response(message, mimetype='text/xml')
|
||||
|
||||
|
||||
@app.route(api_prefix + '/strings_to_dictionary', methods=['GET', 'POST'])
|
||||
@app.route(api_prefix + '/strings_to_dictionary', methods=['POST'])
|
||||
def strings_to_dictionary():
|
||||
|
||||
tmp_directory = tempfile.mkdtemp()
|
||||
string_file_name = tmp_directory + '/input_string.txt'
|
||||
dictionary_file_name = tmp_directory + '/output_dictionary.xml'
|
||||
structure_file_name = tmp_directory + '/output_structures.xml'
|
||||
input_structure_file_name = tmp_directory + '/input_structures.xml'
|
||||
output_structure_file_name = tmp_directory + '/output_structures.xml'
|
||||
|
||||
try:
|
||||
|
||||
if (request.method == 'GET'):
|
||||
string = request.args.get('string')
|
||||
with open(string_file_name, 'w') as string_file:
|
||||
string_file.write(string + '\n')
|
||||
elif (request.method == 'POST'):
|
||||
file_data = request.files['file']
|
||||
file_data.save(string_file_name)
|
||||
string_file_data = request.files['strings']
|
||||
string_file_data.save(string_file_name)
|
||||
structure_file_data = request.files['structures']
|
||||
structure_file_data.save(input_structure_file_name)
|
||||
|
||||
runner.strings_to_dictionary(string_file_name, dictionary_file_name, structure_file_name)
|
||||
runner.strings_to_dictionary(string_file_name, dictionary_file_name, input_structure_file_name, output_structure_file_name)
|
||||
root = lxml.Element('response')
|
||||
|
||||
dictionary_root = lxml.parse(dictionary_file_name).getroot()
|
||||
root.append(dictionary_root)
|
||||
structure_root = lxml.parse(structure_file_name).getroot()
|
||||
structure_root = lxml.parse(output_structure_file_name).getroot()
|
||||
new_structure_count = len(structure_root.xpath('.//syntactic_structure[@tempId]'))
|
||||
root.set('new_structures', str(new_structure_count))
|
||||
structure_ids = set(dictionary_root.xpath('.//lexicalUnit/@structure_id'))
|
||||
|
||||
@@ -13,7 +13,7 @@ FILE_MAP = {'strings-list': 'strings.txt',
|
||||
'tei-multiple-ids-2': 'tei_multiple_with_ids2.xml',
|
||||
'mwes-1': 'mwes1.csv',
|
||||
'mwes-2': 'mwes2.csv',
|
||||
'structures-old': 'structures.xml',
|
||||
'structures-old': 'structures_old.xml',
|
||||
'structures-new': 'structures_new.xml',
|
||||
'dictionary-single': 'dictionary_single.xml',
|
||||
'dictionary-multiple': 'dictionary_multiple.xml',
|
||||
|
||||
@@ -25,24 +25,26 @@ class Runner:
|
||||
NLP_CONFIG_MAP['dir'] = resource_directory + '/classla'
|
||||
self.nlp = classla.Pipeline('sl', **NLP_CONFIG_MAP)
|
||||
|
||||
def run_all(self, input_file_name, output_file_name, structure_file_name):
|
||||
def run_all(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name):
|
||||
pipeline = Pipeline(self.resource_directory, self.nlp)
|
||||
pipeline.import_file(input_file_name, 'strings-list')
|
||||
pipeline.import_file(input_structure_file_name, 'structures-old')
|
||||
self._strings_to_parse_sequence(pipeline)
|
||||
self._parse_to_dictionary_sequence(pipeline)
|
||||
pipeline.do_validate_structures()
|
||||
pipeline.export_file(structure_file_name, 'structures-new')
|
||||
pipeline.export_file(output_structure_file_name, 'structures-new')
|
||||
pipeline.do_validate_dictionary()
|
||||
pipeline.export_file(output_file_name, 'dictionary')
|
||||
pipeline.cleanup()
|
||||
|
||||
def strings_to_dictionary(self, input_file_name, output_file_name, structure_file_name):
|
||||
def strings_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name):
|
||||
pipeline = Pipeline(self.resource_directory, self.nlp)
|
||||
pipeline.import_file(input_file_name, 'strings-list')
|
||||
pipeline.import_file(input_structure_file_name, 'structures-old')
|
||||
self._strings_to_parse_sequence(pipeline)
|
||||
self._parse_to_dictionary_sequence(pipeline)
|
||||
pipeline.export_file(output_file_name, 'dictionary')
|
||||
pipeline.export_file(structure_file_name, 'structures-new')
|
||||
pipeline.export_file(output_structure_file_name, 'structures-new')
|
||||
pipeline.cleanup()
|
||||
|
||||
def strings_to_parse(self, input_file_name, output_file_name):
|
||||
@@ -52,12 +54,13 @@ class Runner:
|
||||
pipeline.export_file(output_file_name, 'tei-initial')
|
||||
pipeline.cleanup()
|
||||
|
||||
def parse_to_dictionary(self, input_file_name, output_file_name, structure_file_name):
|
||||
def parse_to_dictionary(self, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name):
|
||||
pipeline = Pipeline(self.resource_directory)
|
||||
pipeline.import_file(input_file_name, 'tei-initial')
|
||||
pipeline.import_file(input_structure_file_name, 'structures-old')
|
||||
self._parse_to_dictionary_sequence(pipeline)
|
||||
pipeline.export_file(output_file_name, 'dictionary')
|
||||
pipeline.export_file(structure_file_name, 'structures-new')
|
||||
pipeline.export_file(output_structure_file_name, 'structures-new')
|
||||
pipeline.cleanup()
|
||||
|
||||
def validate_structures(self, input_file_name):
|
||||
|
||||
Reference in New Issue
Block a user