forked from kristjan/cjvt-valency
fixed the weird bug (defined a list instead of dict... should have gone to sleep yesterday)
This commit is contained in:
parent
0750c64f48
commit
d1dea2e22e
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1,3 +1,3 @@
|
||||||
data/samples/
|
data/samples/
|
||||||
*/__pycache__/
|
|
||||||
*egg-info/
|
*egg-info/
|
||||||
|
*.pyc
|
||||||
|
|
1
data/kres_srl_ikt
Symbolic link
1
data/kres_srl_ikt
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
/home/voje/work_data/final_json
|
|
@ -75,9 +75,9 @@ class Parser():
|
||||||
if len(sent_srl_links) == 0:
|
if len(sent_srl_links) == 0:
|
||||||
print("HI")
|
print("HI")
|
||||||
return []
|
return []
|
||||||
print(sent_srl_dict)
|
print(sent_srl_links)
|
||||||
# find the correspointing json file with srl links
|
# find the correspointing json file with srl links
|
||||||
return []
|
return sent_srl_links
|
||||||
|
|
||||||
def parse(self):
|
def parse(self):
|
||||||
if self.corpus == "kres":
|
if self.corpus == "kres":
|
||||||
|
@ -112,7 +112,7 @@ class Parser():
|
||||||
else:
|
else:
|
||||||
divs = root.findall(".//div")
|
divs = root.findall(".//div")
|
||||||
|
|
||||||
res_dict = [] # TODO: try making an iterator instead
|
res_dict = {}
|
||||||
|
|
||||||
# parse divs
|
# parse divs
|
||||||
for div in divs:
|
for div in divs:
|
||||||
|
@ -150,7 +150,7 @@ class Parser():
|
||||||
sentence_text += el.text
|
sentence_text += el.text
|
||||||
sentence_tokens += [{
|
sentence_tokens += [{
|
||||||
"word": False,
|
"word": False,
|
||||||
"tid": el_id,
|
"tid": int(el_id),
|
||||||
"text": el.text,
|
"text": el.text,
|
||||||
}]
|
}]
|
||||||
elif el.tag in self.S_TAGS:
|
elif el.tag in self.S_TAGS:
|
||||||
|
@ -166,16 +166,15 @@ class Parser():
|
||||||
raise KeyError("duplicated id: {}".format(sentence_id))
|
raise KeyError("duplicated id: {}".format(sentence_id))
|
||||||
jos_links = self.parse_jos_links(s)
|
jos_links = self.parse_jos_links(s)
|
||||||
srl_links = srl_dict.get(sentence_id) if self.corpus == "kres" else None
|
srl_links = srl_dict.get(sentence_id) if self.corpus == "kres" else None
|
||||||
srl_links_fixed = self.parse_srl_links(s, srl_links)
|
srl_links_parsed = self.parse_srl_links(s, srl_links)
|
||||||
print(srl_links)
|
|
||||||
res_dict[sentence_id] = {
|
res_dict[sentence_id] = {
|
||||||
"sid": sentence_id,
|
"sid": sentence_id,
|
||||||
"text": sentence_text,
|
"text": sentence_text,
|
||||||
"tokens": sentence_tokens,
|
"tokens": sentence_tokens,
|
||||||
"jos_links": "BBBB",
|
"jos_links": jos_links,
|
||||||
"srl_links": "AAAAA",
|
"srl_links": srl_links_parsed
|
||||||
}
|
}
|
||||||
|
print("------------------------------------------------- END")
|
||||||
print(res_dict[sentence_id])
|
print(res_dict[sentence_id])
|
||||||
print("------------------------------------------------- END")
|
print("------------------------------------------------- END")
|
||||||
return # TODO dev break
|
return # TODO dev break
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user