Redmine #1487: updated pipeline for classla/obeliks changes

This commit is contained in:
Cyprian Laskowski 2021-03-25 10:56:37 +01:00
parent 60db704fc4
commit 92fa061dfc
3 changed files with 7 additions and 10 deletions

View File

@ -24,6 +24,6 @@ FILE_MAP = {'strings-list': 'strings.txt',
NLP_CONFIG_MAP = { NLP_CONFIG_MAP = {
'type': 'standard_jos', 'type': 'standard_jos',
'processors': 'tokenize,pos,lemma,depparse', 'processors': 'tokenize,pos,lemma,depparse',
'tokenize_pretokenized': True, 'tokenize_pretokenized': 'conllu',
'pos_use_lexicon': True, 'pos_use_lexicon': True,
} }

View File

@ -5,10 +5,7 @@ from types import SimpleNamespace
import lxml.etree as lxml import lxml.etree as lxml
import obeliks import obeliks
import classla import classla
from classla import Document
from classla.utils.conll import CoNLL
from structure_assignment.constants import * from structure_assignment.constants import *
from structure_assignment.tweak_conllu import tweak as tweak_conllu from structure_assignment.tweak_conllu import tweak as tweak_conllu
@ -134,11 +131,11 @@ class Pipeline:
print('Parsing with classla ...') print('Parsing with classla ...')
input_file_name = self.file_map['obeliks-tweaked'] input_file_name = self.file_map['obeliks-tweaked']
output_file_name = self.file_map['classla-parsed'] output_file_name = self.file_map['classla-parsed']
doc = Document(text=None) with open(input_file_name, 'r') as input_file:
conll_file = CoNLLFile(filename=input_file_name) input_conllu = input_file.read()
doc.conll_file = conll_file doc = self.nlp(input_conllu)
result = nlp(doc) with open(output_file_name, 'w') as output_file:
result.conll_file.write_conll(output_file_name) output_file.write(doc.to_conll())
def do_translate_jos(self): def do_translate_jos(self):
print('Translating JOS ...') print('Translating JOS ...')

View File

@ -17,7 +17,7 @@ pip install lxml
pip install psycopg2cffi pip install psycopg2cffi
pip install sqlalchemy pip install sqlalchemy
pip install classla pip install classla
python -c "import classla; classla.download('sl_ssj_jos')" <<< $'Y\nresources/classla' python -c "import classla; classla.download('sl', type='standard_jos')" <<< $'Y\nresources/classla'
pip install obeliks pip install obeliks
pip install nova_slovnica/python/package/ pip install nova_slovnica/python/package/
pip install luscenje_struktur/ pip install luscenje_struktur/