forked from kristjan/cjvt-srl-tagging
mate-tools tags the corpus. Need to specify predicates better
This commit is contained in:
@@ -109,20 +109,32 @@ class Parser:
|
||||
return res_dict
|
||||
|
||||
|
||||
def to_conll_2009_SRL(self, sentence_entry, napreds=100):
|
||||
def to_conll_2009_SRL(self, sentence_entry, napreds=9):
|
||||
|
||||
def fillpred(pos, feat):
|
||||
# TODO (decision tree or bayes on mate training data)
|
||||
if pos == "V" and "main" in feat.split("|"):
|
||||
return True
|
||||
return False
|
||||
|
||||
apreds_string = '\t'.join(["_" for x in range(napreds)])
|
||||
|
||||
# works with kres, with parsed links
|
||||
out_str = ""
|
||||
for token in sentence_entry["tokens"]:
|
||||
if token[0] != "w":
|
||||
continue
|
||||
|
||||
t_id = token[1]
|
||||
form = token[2]
|
||||
|
||||
# handle stop signs
|
||||
if token[0] != "w":
|
||||
out_str += '\t'.join(
|
||||
[t_id] +
|
||||
[form for x in range(7)] +
|
||||
["0", "0", "modra", "modra", "_", "_"] +
|
||||
[apreds_string, "\n"]
|
||||
)
|
||||
continue
|
||||
|
||||
pos = self.msdmap.slo_msd_to_eng_pos(token[4])
|
||||
feat = "|".join(self.msdmap.slo_msd_to_eng_long(token[4]).split(" "))
|
||||
fprd = fillpred(pos, feat)
|
||||
@@ -135,28 +147,28 @@ class Parser:
|
||||
print(sentence_entry["links"])
|
||||
"""
|
||||
|
||||
apreds = "".join(["\t_" for x in range(napreds)])
|
||||
|
||||
# format: 14 + apreds
|
||||
out_str += "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}{}\n".format(
|
||||
t_id, # id
|
||||
token[2], # form
|
||||
token[3], # lemma
|
||||
token[3], # plemma
|
||||
pos, # pos
|
||||
pos, # ppos
|
||||
feat, # feat
|
||||
feat, # pfeat
|
||||
sentence_entry["links"][t_id][2], # head
|
||||
sentence_entry["links"][t_id][2], # phead
|
||||
sentence_entry["links"][t_id][0], # deprel
|
||||
sentence_entry["links"][t_id][0], # pdeprel
|
||||
"Y" if fprd else "_", # fillpred
|
||||
token[3] if fprd else "_", # pred
|
||||
apreds,
|
||||
)
|
||||
out_str += "\n"
|
||||
# print(out_str)
|
||||
out_str += '\t'.join(map(str,
|
||||
[
|
||||
t_id,
|
||||
form,
|
||||
token[3], # lemma
|
||||
token[3], # plemma
|
||||
pos, # pos
|
||||
pos, # ppos
|
||||
feat, # feat
|
||||
feat, # pfeat
|
||||
sentence_entry["links"][t_id][2], # head
|
||||
sentence_entry["links"][t_id][2], # phead
|
||||
sentence_entry["links"][t_id][0], # deprel
|
||||
sentence_entry["links"][t_id][0], # pdeprel
|
||||
"Y" if fprd else "_", # fillpred
|
||||
token[3] if fprd else "_", # pred
|
||||
apreds_string,
|
||||
"\n",
|
||||
]
|
||||
))
|
||||
out_str += "\n" # newline at the end of sentence
|
||||
return out_str
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user