Redmine #1487: updated pipeline for classla/obeliks changes
This commit is contained in:
@@ -24,6 +24,6 @@ FILE_MAP = {'strings-list': 'strings.txt',
|
||||
NLP_CONFIG_MAP = {
|
||||
'type': 'standard_jos',
|
||||
'processors': 'tokenize,pos,lemma,depparse',
|
||||
'tokenize_pretokenized': True,
|
||||
'tokenize_pretokenized': 'conllu',
|
||||
'pos_use_lexicon': True,
|
||||
}
|
||||
|
||||
@@ -5,10 +5,7 @@ from types import SimpleNamespace
|
||||
import lxml.etree as lxml
|
||||
|
||||
import obeliks
|
||||
|
||||
import classla
|
||||
from classla import Document
|
||||
from classla.utils.conll import CoNLL
|
||||
|
||||
from structure_assignment.constants import *
|
||||
from structure_assignment.tweak_conllu import tweak as tweak_conllu
|
||||
@@ -134,11 +131,11 @@ class Pipeline:
|
||||
print('Parsing with classla ...')
|
||||
input_file_name = self.file_map['obeliks-tweaked']
|
||||
output_file_name = self.file_map['classla-parsed']
|
||||
doc = Document(text=None)
|
||||
conll_file = CoNLLFile(filename=input_file_name)
|
||||
doc.conll_file = conll_file
|
||||
result = nlp(doc)
|
||||
result.conll_file.write_conll(output_file_name)
|
||||
with open(input_file_name, 'r') as input_file:
|
||||
input_conllu = input_file.read()
|
||||
doc = self.nlp(input_conllu)
|
||||
with open(output_file_name, 'w') as output_file:
|
||||
output_file.write(doc.to_conll())
|
||||
|
||||
def do_translate_jos(self):
|
||||
print('Translating JOS ...')
|
||||
|
||||
Reference in New Issue
Block a user