diff --git a/tools/parser/msd/msdmap.py b/tools/parser/msd/msdmap.py index b0f64b6..e9c48de 100644 --- a/tools/parser/msd/msdmap.py +++ b/tools/parser/msd/msdmap.py @@ -32,19 +32,16 @@ class Msdmap(): def slo_msd_to_eng_long(self, slo_msd): return (self.msd_table[self.msd_table["slo_msd"] == slo_msd]["eng_long"]).values[0] + def slo_msd_to_eng_pos(self, slo_msd): + # first letter in slo_msd == slo_pos + return self.pos_slo_ang_map(1, slo_msd[0])[3] + def pos_slo_ang_map(self, col, query): for pos in self.pos_slo_ang: if pos[col] == query: return pos raise ValueError("Wrong part of speech.") - def msd_from_slo(self, msd): - pos = self.pos_slo_ang_map(1, msd[0]) - category = pos[2] - attr = [self.pos_val_map(category, 1, m) - for m in msd[1:] if m != "-"] - return (pos, attr) - if __name__ == "__main__": msdmap = Msdmap() @@ -57,4 +54,5 @@ if __name__ == "__main__": for msd in test_msds: print(msd) print(msdmap.slo_msd_to_eng_long(msd)) + print(msdmap.slo_msd_to_eng_pos(msd)) print() diff --git a/tools/parser/parser.py b/tools/parser/parser.py index 2409089..081778a 100644 --- a/tools/parser/parser.py +++ b/tools/parser/parser.py @@ -118,13 +118,14 @@ def to_conll_2009_SRL(sentence_entry): return "_" msdm = Msdmap() + # works with kres, with parsed links out_str = "" for token in sentence_entry["tokens"]: if token[0] != "w": continue + t_id = token[1] - msd = msdm.msd_from_slo(token[4]) fprd = fillpred("TODO", "todo") """ @@ -141,10 +142,10 @@ def to_conll_2009_SRL(sentence_entry): token[2], # form token[3], # lemma token[3], # plemma - msd[0][3], # pos - msd[0][3], # ppos - "|".join([msd[0][2]] + [el[4] for el in msd[1]]), # feat - "|".join([msd[0][2]] + [el[4] for el in msd[1]]), # pfeat + msdm.slo_msd_to_eng_pos(token[4]), # pos + msdm.slo_msd_to_eng_pos(token[4]), # ppos + "|".join(msdm.slo_msd_to_eng_long().split(" ")), # feat + "|".join(msdm.slo_msd_to_eng_long().split(" ")), # pfeat sentence_entry["links"][t_id][2], # head sentence_entry["links"][t_id][2], # phead sentence_entry["links"][t_id][0], # deprel @@ -167,4 +168,4 @@ def to_conll_2009_full(sentence_entry): token[2], # form ) out_str += "\n" - return out_str \ No newline at end of file + return out_str