Redmine #2921: Added new 'strings to tokens' sequence
This commit is contained in:
parent
442f3ca163
commit
eb52405300
|
@ -52,6 +52,13 @@ class Runner:
|
|||
pipeline.export_file(output_file_name, 'tei-initial')
|
||||
self.cleanup(pipeline)
|
||||
|
||||
def strings_to_tokens(self, input_file_name, output_file_name):
|
||||
pipeline = Pipeline(self.nlp)
|
||||
pipeline.import_file(input_file_name, 'strings-list')
|
||||
self._strings_to_tokens_sequence(pipeline)
|
||||
pipeline.export_file(output_file_name, 'obeliks-tokenised')
|
||||
self.cleanup(pipeline)
|
||||
|
||||
def tagged_to_dictionary(self, strings_file_name, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name): # TODO: refactor/tidy
|
||||
|
||||
classla_conllu_file_name = '/tmp/classla.conlu'
|
||||
|
@ -132,6 +139,9 @@ class Runner:
|
|||
pipeline.do_validate_dictionary()
|
||||
self.cleanup(pipeline)
|
||||
|
||||
def _strings_to_tokens_sequence(self, pipeline):
|
||||
pipeline.do_tokenise()
|
||||
|
||||
def _strings_to_parse_sequence(self, pipeline):
|
||||
pipeline.do_tokenise()
|
||||
pipeline.do_tweak_conllu()
|
||||
|
|
Loading…
Reference in New Issue
Block a user