This commit is contained in:
voje 2019-02-15 07:46:22 +01:00
parent 77ef78d21f
commit a5da389c41
2 changed files with 12 additions and 13 deletions

View File

@ -32,19 +32,16 @@ class Msdmap():
def slo_msd_to_eng_long(self, slo_msd):
return (self.msd_table[self.msd_table["slo_msd"] == slo_msd]["eng_long"]).values[0]
def slo_msd_to_eng_pos(self, slo_msd):
# first letter in slo_msd == slo_pos
return self.pos_slo_ang_map(1, slo_msd[0])[3]
def pos_slo_ang_map(self, col, query):
for pos in self.pos_slo_ang:
if pos[col] == query:
return pos
raise ValueError("Wrong part of speech.")
def msd_from_slo(self, msd):
pos = self.pos_slo_ang_map(1, msd[0])
category = pos[2]
attr = [self.pos_val_map(category, 1, m)
for m in msd[1:] if m != "-"]
return (pos, attr)
if __name__ == "__main__":
msdmap = Msdmap()
@ -57,4 +54,5 @@ if __name__ == "__main__":
for msd in test_msds:
print(msd)
print(msdmap.slo_msd_to_eng_long(msd))
print(msdmap.slo_msd_to_eng_pos(msd))
print()

View File

@ -118,13 +118,14 @@ def to_conll_2009_SRL(sentence_entry):
return "_"
msdm = Msdmap()
# works with kres, with parsed links
out_str = ""
for token in sentence_entry["tokens"]:
if token[0] != "w":
continue
t_id = token[1]
msd = msdm.msd_from_slo(token[4])
fprd = fillpred("TODO", "todo")
"""
@ -141,10 +142,10 @@ def to_conll_2009_SRL(sentence_entry):
token[2], # form
token[3], # lemma
token[3], # plemma
msd[0][3], # pos
msd[0][3], # ppos
"|".join([msd[0][2]] + [el[4] for el in msd[1]]), # feat
"|".join([msd[0][2]] + [el[4] for el in msd[1]]), # pfeat
msdm.slo_msd_to_eng_pos(token[4]), # pos
msdm.slo_msd_to_eng_pos(token[4]), # ppos
"|".join(msdm.slo_msd_to_eng_long().split(" ")), # feat
"|".join(msdm.slo_msd_to_eng_long().split(" ")), # pfeat
sentence_entry["links"][t_id][2], # head
sentence_entry["links"][t_id][2], # phead
sentence_entry["links"][t_id][0], # deprel
@ -167,4 +168,4 @@ def to_conll_2009_full(sentence_entry):
token[2], # form
)
out_str += "\n"
return out_str
return out_str