forked from kristjan/cjvt-srl-tagging
srl taggin pipeline (output in .tsv)
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
from lxml import etree
|
||||
import re
|
||||
from parser.msd.msdmap import Msdmap
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
from fillpred_model.step1 import build_model_row
|
||||
|
||||
class Parser:
|
||||
# reads a TEI xml file and returns a dictionary:
|
||||
@@ -15,6 +18,8 @@ class Parser:
|
||||
self.W_TAGS = ['w']
|
||||
self.C_TAGS = ['c']
|
||||
self.S_TAGS = ['S', 'pc']
|
||||
with Path("./fillpred_model/model.pickle").open("rb") as fp:
|
||||
self.fillpred_model = pickle.load(fp)
|
||||
|
||||
def parse_tei(self, filepath):
|
||||
|
||||
@@ -111,11 +116,11 @@ class Parser:
|
||||
|
||||
def to_conll_2009_SRL(self, sentence_entry, napreds=9):
|
||||
|
||||
def fillpred(pos, feat):
|
||||
# TODO (decision tree or bayes on mate training data)
|
||||
if pos == "V" and "main" in feat.split("|"):
|
||||
return True
|
||||
return False
|
||||
def fillpred(tsv_row):
|
||||
mrow = build_model_row(tsv_row)
|
||||
x = mrow[:-1]
|
||||
y = self.fillpred_model.predict([x])
|
||||
return y[0] # bool
|
||||
|
||||
apreds_string = '\t'.join(["_" for x in range(napreds)])
|
||||
|
||||
@@ -137,7 +142,6 @@ class Parser:
|
||||
|
||||
pos = self.msdmap.slo_msd_to_eng_pos(token[4])
|
||||
feat = "|".join(self.msdmap.slo_msd_to_eng_long(token[4]).split(" "))
|
||||
fprd = fillpred(pos, feat)
|
||||
|
||||
"""
|
||||
print(t_id)
|
||||
@@ -146,10 +150,7 @@ class Parser:
|
||||
print(token)
|
||||
print(sentence_entry["links"])
|
||||
"""
|
||||
|
||||
# format: 14 + apreds
|
||||
out_str += '\t'.join(map(str,
|
||||
[
|
||||
row_list = [
|
||||
t_id,
|
||||
form,
|
||||
token[3], # lemma
|
||||
@@ -162,11 +163,18 @@ class Parser:
|
||||
sentence_entry["links"][t_id][2], # phead
|
||||
sentence_entry["links"][t_id][0], # deprel
|
||||
sentence_entry["links"][t_id][0], # pdeprel
|
||||
"Y" if fprd else "_", # fillpred
|
||||
token[3] if fprd else "_", # pred
|
||||
"_", # fillpred
|
||||
"_", # pred
|
||||
apreds_string,
|
||||
"\n",
|
||||
]
|
||||
]
|
||||
fprd = fillpred(row_list)
|
||||
row_list[12] = "Y" if fprd else "_"
|
||||
row_list[13] = token[3] if fprd else "_"
|
||||
|
||||
# format: 14 + apreds
|
||||
out_str += '\t'.join(map(str,
|
||||
row_list
|
||||
))
|
||||
out_str += "\n" # newline at the end of sentence
|
||||
return out_str
|
||||
|
||||
Reference in New Issue
Block a user