Redmine #1487: updated pipeline for classla/obeliks changes

This commit is contained in:
Cyprian Laskowski 2021-03-25 10:56:37 +01:00
parent 60db704fc4
commit 92fa061dfc
3 changed files with 7 additions and 10 deletions

View File

@ -24,6 +24,6 @@ FILE_MAP = {'strings-list': 'strings.txt',
NLP_CONFIG_MAP = {
'type': 'standard_jos',
'processors': 'tokenize,pos,lemma,depparse',
'tokenize_pretokenized': True,
'tokenize_pretokenized': 'conllu',
'pos_use_lexicon': True,
}

View File

@ -5,10 +5,7 @@ from types import SimpleNamespace
import lxml.etree as lxml
import obeliks
import classla
from classla import Document
from classla.utils.conll import CoNLL
from structure_assignment.constants import *
from structure_assignment.tweak_conllu import tweak as tweak_conllu
@ -134,11 +131,11 @@ class Pipeline:
print('Parsing with classla ...')
input_file_name = self.file_map['obeliks-tweaked']
output_file_name = self.file_map['classla-parsed']
doc = Document(text=None)
conll_file = CoNLLFile(filename=input_file_name)
doc.conll_file = conll_file
result = nlp(doc)
result.conll_file.write_conll(output_file_name)
with open(input_file_name, 'r') as input_file:
input_conllu = input_file.read()
doc = self.nlp(input_conllu)
with open(output_file_name, 'w') as output_file:
output_file.write(doc.to_conll())
def do_translate_jos(self):
print('Translating JOS ...')

View File

@ -17,7 +17,7 @@ pip install lxml
pip install psycopg2cffi
pip install sqlalchemy
pip install classla
python -c "import classla; classla.download('sl_ssj_jos')" <<< $'Y\nresources/classla'
python -c "import classla; classla.download('sl', type='standard_jos')" <<< $'Y\nresources/classla'
pip install obeliks
pip install nova_slovnica/python/package/
pip install luscenje_struktur/