forked from kristjan/cjvt-valency
weird bug: Parser.py 175
This commit is contained in:
parent
55c07f88ca
commit
0750c64f48
|
@ -64,16 +64,20 @@ class Parser():
|
||||||
}]
|
}]
|
||||||
return res_links
|
return res_links
|
||||||
|
|
||||||
def parse_srl_links(self, sent_el, xml_file=None):
|
def parse_srl_links(self, sent_el, sent_srl_links):
|
||||||
if self.corpus == "kres":
|
if self.corpus == "kres":
|
||||||
return self.parse_srl_links_kres(sent_el, xml_file)
|
return self.parse_srl_links_kres(sent_el, sent_srl_links)
|
||||||
else:
|
else:
|
||||||
return self.parse_any_links_ssj(sent_el, "SRL")
|
return self.parse_any_links_ssj(sent_el, "SRL")
|
||||||
|
|
||||||
def parse_srl_links_kres(self, sent_el, sent_srl_dict):
|
def parse_srl_links_kres(self, sent_el, sent_srl_links):
|
||||||
|
print("HA")
|
||||||
|
if len(sent_srl_links) == 0:
|
||||||
|
print("HI")
|
||||||
|
return []
|
||||||
print(sent_srl_dict)
|
print(sent_srl_dict)
|
||||||
# find the correspointing json file with srl links
|
# find the correspointing json file with srl links
|
||||||
return "TODO"
|
return []
|
||||||
|
|
||||||
def parse(self):
|
def parse(self):
|
||||||
if self.corpus == "kres":
|
if self.corpus == "kres":
|
||||||
|
@ -158,20 +162,18 @@ class Parser():
|
||||||
sentence_id = "{}.{}.{}".format(f_id, p_id, s_id)
|
sentence_id = "{}.{}.{}".format(f_id, p_id, s_id)
|
||||||
|
|
||||||
# make a generator instead of holding the whole corpus in memory
|
# make a generator instead of holding the whole corpus in memory
|
||||||
# TODO -- match ids
|
|
||||||
print("---")
|
|
||||||
print(sorted(srl_dict.keys(), key=lambda x: x.split(".")[1])[:100])
|
|
||||||
print(sentence_id)
|
|
||||||
print(srl_dict.get(str(sentence_id)))
|
|
||||||
print("---")
|
|
||||||
if sentence_id in res_dict:
|
if sentence_id in res_dict:
|
||||||
raise KeyError("duplicated id: {}".format(sentence_id))
|
raise KeyError("duplicated id: {}".format(sentence_id))
|
||||||
|
jos_links = self.parse_jos_links(s)
|
||||||
|
srl_links = srl_dict.get(sentence_id) if self.corpus == "kres" else None
|
||||||
|
srl_links_fixed = self.parse_srl_links(s, srl_links)
|
||||||
|
print(srl_links)
|
||||||
res_dict[sentence_id] = {
|
res_dict[sentence_id] = {
|
||||||
"sid": sentence_id,
|
"sid": sentence_id,
|
||||||
"text": sentence_text,
|
"text": sentence_text,
|
||||||
"tokens": sentence_tokens,
|
"tokens": sentence_tokens,
|
||||||
"jos_links": self.parse_jos_links(s),
|
"jos_links": "BBBB",
|
||||||
"srl_links": self.parse_srl_links(s, srl_dict[sentence_id]),
|
"srl_links": "AAAAA",
|
||||||
}
|
}
|
||||||
|
|
||||||
print(res_dict[sentence_id])
|
print(res_dict[sentence_id])
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user