fixed the weird bug (defined a list instead of dict... should have gone to sleep yesterday)
This commit is contained in:
parent
0750c64f48
commit
d1dea2e22e
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1,3 +1,3 @@
|
|||
data/samples/
|
||||
*/__pycache__/
|
||||
*egg-info/
|
||||
*.pyc
|
||||
|
|
1
data/kres_srl_ikt
Symbolic link
1
data/kres_srl_ikt
Symbolic link
|
@ -0,0 +1 @@
|
|||
/home/voje/work_data/final_json
|
|
@ -75,9 +75,9 @@ class Parser():
|
|||
if len(sent_srl_links) == 0:
|
||||
print("HI")
|
||||
return []
|
||||
print(sent_srl_dict)
|
||||
print(sent_srl_links)
|
||||
# find the correspointing json file with srl links
|
||||
return []
|
||||
return sent_srl_links
|
||||
|
||||
def parse(self):
|
||||
if self.corpus == "kres":
|
||||
|
@ -112,7 +112,7 @@ class Parser():
|
|||
else:
|
||||
divs = root.findall(".//div")
|
||||
|
||||
res_dict = [] # TODO: try making an iterator instead
|
||||
res_dict = {}
|
||||
|
||||
# parse divs
|
||||
for div in divs:
|
||||
|
@ -150,7 +150,7 @@ class Parser():
|
|||
sentence_text += el.text
|
||||
sentence_tokens += [{
|
||||
"word": False,
|
||||
"tid": el_id,
|
||||
"tid": int(el_id),
|
||||
"text": el.text,
|
||||
}]
|
||||
elif el.tag in self.S_TAGS:
|
||||
|
@ -166,16 +166,15 @@ class Parser():
|
|||
raise KeyError("duplicated id: {}".format(sentence_id))
|
||||
jos_links = self.parse_jos_links(s)
|
||||
srl_links = srl_dict.get(sentence_id) if self.corpus == "kres" else None
|
||||
srl_links_fixed = self.parse_srl_links(s, srl_links)
|
||||
print(srl_links)
|
||||
srl_links_parsed = self.parse_srl_links(s, srl_links)
|
||||
res_dict[sentence_id] = {
|
||||
"sid": sentence_id,
|
||||
"text": sentence_text,
|
||||
"tokens": sentence_tokens,
|
||||
"jos_links": "BBBB",
|
||||
"srl_links": "AAAAA",
|
||||
"jos_links": jos_links,
|
||||
"srl_links": srl_links_parsed
|
||||
}
|
||||
|
||||
print("------------------------------------------------- END")
|
||||
print(res_dict[sentence_id])
|
||||
print("------------------------------------------------- END")
|
||||
return # TODO dev break
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user