weird bug: Parser.py 175

This commit is contained in:
voje 2019-03-11 00:51:05 +01:00
parent 55c07f88ca
commit 0750c64f48
4 changed files with 14 additions and 12 deletions

View File

@ -64,16 +64,20 @@ class Parser():
}] }]
return res_links return res_links
def parse_srl_links(self, sent_el, xml_file=None): def parse_srl_links(self, sent_el, sent_srl_links):
if self.corpus == "kres": if self.corpus == "kres":
return self.parse_srl_links_kres(sent_el, xml_file) return self.parse_srl_links_kres(sent_el, sent_srl_links)
else: else:
return self.parse_any_links_ssj(sent_el, "SRL") return self.parse_any_links_ssj(sent_el, "SRL")
def parse_srl_links_kres(self, sent_el, sent_srl_dict): def parse_srl_links_kres(self, sent_el, sent_srl_links):
print("HA")
if len(sent_srl_links) == 0:
print("HI")
return []
print(sent_srl_dict) print(sent_srl_dict)
# find the correspointing json file with srl links # find the correspointing json file with srl links
return "TODO" return []
def parse(self): def parse(self):
if self.corpus == "kres": if self.corpus == "kres":
@ -158,20 +162,18 @@ class Parser():
sentence_id = "{}.{}.{}".format(f_id, p_id, s_id) sentence_id = "{}.{}.{}".format(f_id, p_id, s_id)
# make a generator instead of holding the whole corpus in memory # make a generator instead of holding the whole corpus in memory
# TODO -- match ids
print("---")
print(sorted(srl_dict.keys(), key=lambda x: x.split(".")[1])[:100])
print(sentence_id)
print(srl_dict.get(str(sentence_id)))
print("---")
if sentence_id in res_dict: if sentence_id in res_dict:
raise KeyError("duplicated id: {}".format(sentence_id)) raise KeyError("duplicated id: {}".format(sentence_id))
jos_links = self.parse_jos_links(s)
srl_links = srl_dict.get(sentence_id) if self.corpus == "kres" else None
srl_links_fixed = self.parse_srl_links(s, srl_links)
print(srl_links)
res_dict[sentence_id] = { res_dict[sentence_id] = {
"sid": sentence_id, "sid": sentence_id,
"text": sentence_text, "text": sentence_text,
"tokens": sentence_tokens, "tokens": sentence_tokens,
"jos_links": self.parse_jos_links(s), "jos_links": "BBBB",
"srl_links": self.parse_srl_links(s, srl_dict[sentence_id]), "srl_links": "AAAAA",
} }
print(res_dict[sentence_id]) print(res_dict[sentence_id])