Redmine #1487: updated pipeline for classla/obeliks changes
This commit is contained in:
parent
60db704fc4
commit
92fa061dfc
|
@ -24,6 +24,6 @@ FILE_MAP = {'strings-list': 'strings.txt',
|
|||
NLP_CONFIG_MAP = {
|
||||
'type': 'standard_jos',
|
||||
'processors': 'tokenize,pos,lemma,depparse',
|
||||
'tokenize_pretokenized': True,
|
||||
'tokenize_pretokenized': 'conllu',
|
||||
'pos_use_lexicon': True,
|
||||
}
|
||||
|
|
|
@ -5,10 +5,7 @@ from types import SimpleNamespace
|
|||
import lxml.etree as lxml
|
||||
|
||||
import obeliks
|
||||
|
||||
import classla
|
||||
from classla import Document
|
||||
from classla.utils.conll import CoNLL
|
||||
|
||||
from structure_assignment.constants import *
|
||||
from structure_assignment.tweak_conllu import tweak as tweak_conllu
|
||||
|
@ -134,11 +131,11 @@ class Pipeline:
|
|||
print('Parsing with classla ...')
|
||||
input_file_name = self.file_map['obeliks-tweaked']
|
||||
output_file_name = self.file_map['classla-parsed']
|
||||
doc = Document(text=None)
|
||||
conll_file = CoNLLFile(filename=input_file_name)
|
||||
doc.conll_file = conll_file
|
||||
result = nlp(doc)
|
||||
result.conll_file.write_conll(output_file_name)
|
||||
with open(input_file_name, 'r') as input_file:
|
||||
input_conllu = input_file.read()
|
||||
doc = self.nlp(input_conllu)
|
||||
with open(output_file_name, 'w') as output_file:
|
||||
output_file.write(doc.to_conll())
|
||||
|
||||
def do_translate_jos(self):
|
||||
print('Translating JOS ...')
|
||||
|
|
|
@ -17,7 +17,7 @@ pip install lxml
|
|||
pip install psycopg2cffi
|
||||
pip install sqlalchemy
|
||||
pip install classla
|
||||
python -c "import classla; classla.download('sl_ssj_jos')" <<< $'Y\nresources/classla'
|
||||
python -c "import classla; classla.download('sl', type='standard_jos')" <<< $'Y\nresources/classla'
|
||||
pip install obeliks
|
||||
pip install nova_slovnica/python/package/
|
||||
pip install luscenje_struktur/
|
||||
|
|
Loading…
Reference in New Issue
Block a user