fixed the weird bug (defined a list instead of dict... should have gone to sleep yesterday)

dev
voje 5 years ago
parent 0750c64f48
commit d1dea2e22e

2
.gitignore vendored

@ -1,3 +1,3 @@
data/samples/ data/samples/
*/__pycache__/
*egg-info/ *egg-info/
*.pyc

@ -0,0 +1 @@
/home/voje/work_data/final_json

@ -75,9 +75,9 @@ class Parser():
if len(sent_srl_links) == 0: if len(sent_srl_links) == 0:
print("HI") print("HI")
return [] return []
print(sent_srl_dict) print(sent_srl_links)
# find the correspointing json file with srl links # find the correspointing json file with srl links
return [] return sent_srl_links
def parse(self): def parse(self):
if self.corpus == "kres": if self.corpus == "kres":
@ -112,7 +112,7 @@ class Parser():
else: else:
divs = root.findall(".//div") divs = root.findall(".//div")
res_dict = [] # TODO: try making an iterator instead res_dict = {}
# parse divs # parse divs
for div in divs: for div in divs:
@ -150,7 +150,7 @@ class Parser():
sentence_text += el.text sentence_text += el.text
sentence_tokens += [{ sentence_tokens += [{
"word": False, "word": False,
"tid": el_id, "tid": int(el_id),
"text": el.text, "text": el.text,
}] }]
elif el.tag in self.S_TAGS: elif el.tag in self.S_TAGS:
@ -166,16 +166,15 @@ class Parser():
raise KeyError("duplicated id: {}".format(sentence_id)) raise KeyError("duplicated id: {}".format(sentence_id))
jos_links = self.parse_jos_links(s) jos_links = self.parse_jos_links(s)
srl_links = srl_dict.get(sentence_id) if self.corpus == "kres" else None srl_links = srl_dict.get(sentence_id) if self.corpus == "kres" else None
srl_links_fixed = self.parse_srl_links(s, srl_links) srl_links_parsed = self.parse_srl_links(s, srl_links)
print(srl_links)
res_dict[sentence_id] = { res_dict[sentence_id] = {
"sid": sentence_id, "sid": sentence_id,
"text": sentence_text, "text": sentence_text,
"tokens": sentence_tokens, "tokens": sentence_tokens,
"jos_links": "BBBB", "jos_links": jos_links,
"srl_links": "AAAAA", "srl_links": srl_links_parsed
} }
print("------------------------------------------------- END")
print(res_dict[sentence_id]) print(res_dict[sentence_id])
print("------------------------------------------------- END") print("------------------------------------------------- END")
return # TODO dev break return # TODO dev break

Loading…
Cancel
Save