Redmine #1835: minor improvements

This commit is contained in:
2021-03-15 16:24:01 +01:00
parent 08c291b5db
commit cb6960f58a
3 changed files with 30 additions and 9 deletions

View File

@@ -1,23 +1,27 @@
import argparse
import tempfile
import os
from structure_assignment.pipeline import Pipeline, create_nlp
resource_directory = '../resources'
def run_all(input_file_name, output_file_name, nlp, structure_file_name):
tmp_file_name = '/tmp/tmp.xml' # TODO: do better than this
tmp_file_name = tempfile.mksfile()
string_to_parse(input_file_name, tmp_file_name, nlp)
parse_to_dictionary(tmp_file_name, output_file_name, structure_file_name)
os.remove(tmp_file_name)
validate_structures(structure_file_name)
validate_dictionary(output_file_name)
def strings_to_dictionary(input_file_name, output_file_name, nlp, structure_file_name):
tmp_file_name = '/tmp/tmp.xml' # TODO: do better than this
tmp_file_name = tempfile.mksfile()
string_to_parse(input_file_name, tmp_file_name, nlp)
parse_to_dictionary(tmp_file_name, output_file_name, structure_file_name)
os.remove(tmp_file_name)
def strings_to_parse(input_file_name, output_file_name, nlp):
pipeline = Pipeline(nlp, resource_directory)
pipeline = Pipeline(resource_directory, nlp)
pipeline.import_file(input_file_name, 'strings-list')
pipeline.do_tokenise()
pipeline.do_tweak_conllu()
@@ -29,7 +33,7 @@ def strings_to_parse(input_file_name, output_file_name, nlp):
pipeline.cleanup()
def parse_to_dictionary(input_file_name, output_file_name, structure_file_name):
pipeline = Pipeline(None, resource_directory)
pipeline = Pipeline(resource_directory)
pipeline.import_file(input_file_name, 'tei-initial')
pipeline.do_split_tei()
pipeline.do_assign_single()
@@ -46,13 +50,13 @@ def parse_to_dictionary(input_file_name, output_file_name, structure_file_name):
pipeline.cleanup()
def validate_structures(input_file_name):
pipeline = Pipeline(None, resource_directory)
pipeline = Pipeline(resource_directory)
pipeline.import_file(input_file_name, 'structures-new')
pipeline.do_validate_structures()
pipeline.cleanup()
def validate_dictionary(input_file_name):
pipeline = Pipeline(None, resource_directory)
pipeline = Pipeline(resource_directory)
pipeline.import_file(input_file_name, 'dictionary')
pipeline.do_validate_dictionary()
pipeline.cleanup()
@@ -85,4 +89,4 @@ if (__name__ == '__main__'):
elif (part_name == 'validate_dictionary'):
validate_dictionary(input_file_name)
elif (part_name == 'all'):
run_all(input_file_name)
run_all(input_file_name, output_file_name, nlp, structure_file_name)