Redmine #2921: Added new 'strings to tokens' sequence
This commit is contained in:
parent
442f3ca163
commit
eb52405300
|
@ -52,6 +52,13 @@ class Runner:
|
||||||
pipeline.export_file(output_file_name, 'tei-initial')
|
pipeline.export_file(output_file_name, 'tei-initial')
|
||||||
self.cleanup(pipeline)
|
self.cleanup(pipeline)
|
||||||
|
|
||||||
|
def strings_to_tokens(self, input_file_name, output_file_name):
|
||||||
|
pipeline = Pipeline(self.nlp)
|
||||||
|
pipeline.import_file(input_file_name, 'strings-list')
|
||||||
|
self._strings_to_tokens_sequence(pipeline)
|
||||||
|
pipeline.export_file(output_file_name, 'obeliks-tokenised')
|
||||||
|
self.cleanup(pipeline)
|
||||||
|
|
||||||
def tagged_to_dictionary(self, strings_file_name, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name): # TODO: refactor/tidy
|
def tagged_to_dictionary(self, strings_file_name, input_file_name, output_file_name, input_structure_file_name, output_structure_file_name): # TODO: refactor/tidy
|
||||||
|
|
||||||
classla_conllu_file_name = '/tmp/classla.conlu'
|
classla_conllu_file_name = '/tmp/classla.conlu'
|
||||||
|
@ -132,6 +139,9 @@ class Runner:
|
||||||
pipeline.do_validate_dictionary()
|
pipeline.do_validate_dictionary()
|
||||||
self.cleanup(pipeline)
|
self.cleanup(pipeline)
|
||||||
|
|
||||||
|
def _strings_to_tokens_sequence(self, pipeline):
|
||||||
|
pipeline.do_tokenise()
|
||||||
|
|
||||||
def _strings_to_parse_sequence(self, pipeline):
|
def _strings_to_parse_sequence(self, pipeline):
|
||||||
pipeline.do_tokenise()
|
pipeline.do_tokenise()
|
||||||
pipeline.do_tweak_conllu()
|
pipeline.do_tweak_conllu()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user