fixed the weird bug (defined a list instead of dict... should have gone to sleep yesterday)

dev
voje 5 years ago
parent 0750c64f48
commit d1dea2e22e

2
.gitignore vendored

@ -1,3 +1,3 @@
data/samples/
*/__pycache__/
*egg-info/
*.pyc

@ -0,0 +1 @@
/home/voje/work_data/final_json

@ -75,9 +75,9 @@ class Parser():
if len(sent_srl_links) == 0:
print("HI")
return []
print(sent_srl_dict)
print(sent_srl_links)
# find the correspointing json file with srl links
return []
return sent_srl_links
def parse(self):
if self.corpus == "kres":
@ -112,7 +112,7 @@ class Parser():
else:
divs = root.findall(".//div")
res_dict = [] # TODO: try making an iterator instead
res_dict = {}
# parse divs
for div in divs:
@ -150,7 +150,7 @@ class Parser():
sentence_text += el.text
sentence_tokens += [{
"word": False,
"tid": el_id,
"tid": int(el_id),
"text": el.text,
}]
elif el.tag in self.S_TAGS:
@ -166,16 +166,15 @@ class Parser():
raise KeyError("duplicated id: {}".format(sentence_id))
jos_links = self.parse_jos_links(s)
srl_links = srl_dict.get(sentence_id) if self.corpus == "kres" else None
srl_links_fixed = self.parse_srl_links(s, srl_links)
print(srl_links)
srl_links_parsed = self.parse_srl_links(s, srl_links)
res_dict[sentence_id] = {
"sid": sentence_id,
"text": sentence_text,
"tokens": sentence_tokens,
"jos_links": "BBBB",
"srl_links": "AAAAA",
"jos_links": jos_links,
"srl_links": srl_links_parsed
}
print("------------------------------------------------- END")
print(res_dict[sentence_id])
print("------------------------------------------------- END")
return # TODO dev break

Loading…
Cancel
Save