Redmine #1835: minor improvements
This commit is contained in:
@@ -1,23 +1,27 @@
|
||||
import argparse
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
from structure_assignment.pipeline import Pipeline, create_nlp
|
||||
|
||||
resource_directory = '../resources'
|
||||
|
||||
def run_all(input_file_name, output_file_name, nlp, structure_file_name):
|
||||
tmp_file_name = '/tmp/tmp.xml' # TODO: do better than this
|
||||
tmp_file_name = tempfile.mksfile()
|
||||
string_to_parse(input_file_name, tmp_file_name, nlp)
|
||||
parse_to_dictionary(tmp_file_name, output_file_name, structure_file_name)
|
||||
os.remove(tmp_file_name)
|
||||
validate_structures(structure_file_name)
|
||||
validate_dictionary(output_file_name)
|
||||
|
||||
def strings_to_dictionary(input_file_name, output_file_name, nlp, structure_file_name):
|
||||
tmp_file_name = '/tmp/tmp.xml' # TODO: do better than this
|
||||
tmp_file_name = tempfile.mksfile()
|
||||
string_to_parse(input_file_name, tmp_file_name, nlp)
|
||||
parse_to_dictionary(tmp_file_name, output_file_name, structure_file_name)
|
||||
os.remove(tmp_file_name)
|
||||
|
||||
def strings_to_parse(input_file_name, output_file_name, nlp):
|
||||
pipeline = Pipeline(nlp, resource_directory)
|
||||
pipeline = Pipeline(resource_directory, nlp)
|
||||
pipeline.import_file(input_file_name, 'strings-list')
|
||||
pipeline.do_tokenise()
|
||||
pipeline.do_tweak_conllu()
|
||||
@@ -29,7 +33,7 @@ def strings_to_parse(input_file_name, output_file_name, nlp):
|
||||
pipeline.cleanup()
|
||||
|
||||
def parse_to_dictionary(input_file_name, output_file_name, structure_file_name):
|
||||
pipeline = Pipeline(None, resource_directory)
|
||||
pipeline = Pipeline(resource_directory)
|
||||
pipeline.import_file(input_file_name, 'tei-initial')
|
||||
pipeline.do_split_tei()
|
||||
pipeline.do_assign_single()
|
||||
@@ -46,13 +50,13 @@ def parse_to_dictionary(input_file_name, output_file_name, structure_file_name):
|
||||
pipeline.cleanup()
|
||||
|
||||
def validate_structures(input_file_name):
|
||||
pipeline = Pipeline(None, resource_directory)
|
||||
pipeline = Pipeline(resource_directory)
|
||||
pipeline.import_file(input_file_name, 'structures-new')
|
||||
pipeline.do_validate_structures()
|
||||
pipeline.cleanup()
|
||||
|
||||
def validate_dictionary(input_file_name):
|
||||
pipeline = Pipeline(None, resource_directory)
|
||||
pipeline = Pipeline(resource_directory)
|
||||
pipeline.import_file(input_file_name, 'dictionary')
|
||||
pipeline.do_validate_dictionary()
|
||||
pipeline.cleanup()
|
||||
@@ -85,4 +89,4 @@ if (__name__ == '__main__'):
|
||||
elif (part_name == 'validate_dictionary'):
|
||||
validate_dictionary(input_file_name)
|
||||
elif (part_name == 'all'):
|
||||
run_all(input_file_name)
|
||||
run_all(input_file_name, output_file_name, nlp, structure_file_name)
|
||||
|
||||
Reference in New Issue
Block a user