|
|
@ -75,9 +75,9 @@ class Parser():
|
|
|
|
if len(sent_srl_links) == 0:
|
|
|
|
if len(sent_srl_links) == 0:
|
|
|
|
print("HI")
|
|
|
|
print("HI")
|
|
|
|
return []
|
|
|
|
return []
|
|
|
|
print(sent_srl_dict)
|
|
|
|
print(sent_srl_links)
|
|
|
|
# find the correspointing json file with srl links
|
|
|
|
# find the correspointing json file with srl links
|
|
|
|
return []
|
|
|
|
return sent_srl_links
|
|
|
|
|
|
|
|
|
|
|
|
def parse(self):
|
|
|
|
def parse(self):
|
|
|
|
if self.corpus == "kres":
|
|
|
|
if self.corpus == "kres":
|
|
|
@ -112,7 +112,7 @@ class Parser():
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
divs = root.findall(".//div")
|
|
|
|
divs = root.findall(".//div")
|
|
|
|
|
|
|
|
|
|
|
|
res_dict = [] # TODO: try making an iterator instead
|
|
|
|
res_dict = {}
|
|
|
|
|
|
|
|
|
|
|
|
# parse divs
|
|
|
|
# parse divs
|
|
|
|
for div in divs:
|
|
|
|
for div in divs:
|
|
|
@ -150,7 +150,7 @@ class Parser():
|
|
|
|
sentence_text += el.text
|
|
|
|
sentence_text += el.text
|
|
|
|
sentence_tokens += [{
|
|
|
|
sentence_tokens += [{
|
|
|
|
"word": False,
|
|
|
|
"word": False,
|
|
|
|
"tid": el_id,
|
|
|
|
"tid": int(el_id),
|
|
|
|
"text": el.text,
|
|
|
|
"text": el.text,
|
|
|
|
}]
|
|
|
|
}]
|
|
|
|
elif el.tag in self.S_TAGS:
|
|
|
|
elif el.tag in self.S_TAGS:
|
|
|
@ -166,16 +166,15 @@ class Parser():
|
|
|
|
raise KeyError("duplicated id: {}".format(sentence_id))
|
|
|
|
raise KeyError("duplicated id: {}".format(sentence_id))
|
|
|
|
jos_links = self.parse_jos_links(s)
|
|
|
|
jos_links = self.parse_jos_links(s)
|
|
|
|
srl_links = srl_dict.get(sentence_id) if self.corpus == "kres" else None
|
|
|
|
srl_links = srl_dict.get(sentence_id) if self.corpus == "kres" else None
|
|
|
|
srl_links_fixed = self.parse_srl_links(s, srl_links)
|
|
|
|
srl_links_parsed = self.parse_srl_links(s, srl_links)
|
|
|
|
print(srl_links)
|
|
|
|
|
|
|
|
res_dict[sentence_id] = {
|
|
|
|
res_dict[sentence_id] = {
|
|
|
|
"sid": sentence_id,
|
|
|
|
"sid": sentence_id,
|
|
|
|
"text": sentence_text,
|
|
|
|
"text": sentence_text,
|
|
|
|
"tokens": sentence_tokens,
|
|
|
|
"tokens": sentence_tokens,
|
|
|
|
"jos_links": "BBBB",
|
|
|
|
"jos_links": jos_links,
|
|
|
|
"srl_links": "AAAAA",
|
|
|
|
"srl_links": srl_links_parsed
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
print("------------------------------------------------- END")
|
|
|
|
print(res_dict[sentence_id])
|
|
|
|
print(res_dict[sentence_id])
|
|
|
|
print("------------------------------------------------- END")
|
|
|
|
print("------------------------------------------------- END")
|
|
|
|
return # TODO dev break
|
|
|
|
return # TODO dev break
|
|
|
|