Redmine #1487: updated pipeline for classla/obeliks changes
This commit is contained in:
parent
60db704fc4
commit
92fa061dfc
|
@ -24,6 +24,6 @@ FILE_MAP = {'strings-list': 'strings.txt',
|
||||||
NLP_CONFIG_MAP = {
|
NLP_CONFIG_MAP = {
|
||||||
'type': 'standard_jos',
|
'type': 'standard_jos',
|
||||||
'processors': 'tokenize,pos,lemma,depparse',
|
'processors': 'tokenize,pos,lemma,depparse',
|
||||||
'tokenize_pretokenized': True,
|
'tokenize_pretokenized': 'conllu',
|
||||||
'pos_use_lexicon': True,
|
'pos_use_lexicon': True,
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,10 +5,7 @@ from types import SimpleNamespace
|
||||||
import lxml.etree as lxml
|
import lxml.etree as lxml
|
||||||
|
|
||||||
import obeliks
|
import obeliks
|
||||||
|
|
||||||
import classla
|
import classla
|
||||||
from classla import Document
|
|
||||||
from classla.utils.conll import CoNLL
|
|
||||||
|
|
||||||
from structure_assignment.constants import *
|
from structure_assignment.constants import *
|
||||||
from structure_assignment.tweak_conllu import tweak as tweak_conllu
|
from structure_assignment.tweak_conllu import tweak as tweak_conllu
|
||||||
|
@ -134,11 +131,11 @@ class Pipeline:
|
||||||
print('Parsing with classla ...')
|
print('Parsing with classla ...')
|
||||||
input_file_name = self.file_map['obeliks-tweaked']
|
input_file_name = self.file_map['obeliks-tweaked']
|
||||||
output_file_name = self.file_map['classla-parsed']
|
output_file_name = self.file_map['classla-parsed']
|
||||||
doc = Document(text=None)
|
with open(input_file_name, 'r') as input_file:
|
||||||
conll_file = CoNLLFile(filename=input_file_name)
|
input_conllu = input_file.read()
|
||||||
doc.conll_file = conll_file
|
doc = self.nlp(input_conllu)
|
||||||
result = nlp(doc)
|
with open(output_file_name, 'w') as output_file:
|
||||||
result.conll_file.write_conll(output_file_name)
|
output_file.write(doc.to_conll())
|
||||||
|
|
||||||
def do_translate_jos(self):
|
def do_translate_jos(self):
|
||||||
print('Translating JOS ...')
|
print('Translating JOS ...')
|
||||||
|
|
|
@ -17,7 +17,7 @@ pip install lxml
|
||||||
pip install psycopg2cffi
|
pip install psycopg2cffi
|
||||||
pip install sqlalchemy
|
pip install sqlalchemy
|
||||||
pip install classla
|
pip install classla
|
||||||
python -c "import classla; classla.download('sl_ssj_jos')" <<< $'Y\nresources/classla'
|
python -c "import classla; classla.download('sl', type='standard_jos')" <<< $'Y\nresources/classla'
|
||||||
pip install obeliks
|
pip install obeliks
|
||||||
pip install nova_slovnica/python/package/
|
pip install nova_slovnica/python/package/
|
||||||
pip install luscenje_struktur/
|
pip install luscenje_struktur/
|
||||||
|
|
Loading…
Reference in New Issue
Block a user