diff --git a/src/pkg/corpusparser/corpusparser/Parser.py b/src/pkg/corpusparser/corpusparser/Parser.py index 9fb0e91..fb8b408 100644 --- a/src/pkg/corpusparser/corpusparser/Parser.py +++ b/src/pkg/corpusparser/corpusparser/Parser.py @@ -64,16 +64,20 @@ class Parser(): }] return res_links - def parse_srl_links(self, sent_el, xml_file=None): + def parse_srl_links(self, sent_el, sent_srl_links): if self.corpus == "kres": - return self.parse_srl_links_kres(sent_el, xml_file) + return self.parse_srl_links_kres(sent_el, sent_srl_links) else: return self.parse_any_links_ssj(sent_el, "SRL") - def parse_srl_links_kres(self, sent_el, sent_srl_dict): + def parse_srl_links_kres(self, sent_el, sent_srl_links): + print("HA") + if len(sent_srl_links) == 0: + print("HI") + return [] print(sent_srl_dict) # find the correspointing json file with srl links - return "TODO" + return [] def parse(self): if self.corpus == "kres": @@ -158,20 +162,18 @@ class Parser(): sentence_id = "{}.{}.{}".format(f_id, p_id, s_id) # make a generator instead of holding the whole corpus in memory - # TODO -- match ids - print("---") - print(sorted(srl_dict.keys(), key=lambda x: x.split(".")[1])[:100]) - print(sentence_id) - print(srl_dict.get(str(sentence_id))) - print("---") if sentence_id in res_dict: raise KeyError("duplicated id: {}".format(sentence_id)) + jos_links = self.parse_jos_links(s) + srl_links = srl_dict.get(sentence_id) if self.corpus == "kres" else None + srl_links_fixed = self.parse_srl_links(s, srl_links) + print(srl_links) res_dict[sentence_id] = { "sid": sentence_id, "text": sentence_text, "tokens": sentence_tokens, - "jos_links": self.parse_jos_links(s), - "srl_links": self.parse_srl_links(s, srl_dict[sentence_id]), + "jos_links": "BBBB", + "srl_links": "AAAAA", } print(res_dict[sentence_id]) diff --git a/src/pkg/corpusparser/corpusparser/__pycache__/Parser.cpython-35.pyc b/src/pkg/corpusparser/corpusparser/__pycache__/Parser.cpython-35.pyc new file mode 100644 index 0000000..e438b67 Binary files /dev/null and b/src/pkg/corpusparser/corpusparser/__pycache__/Parser.cpython-35.pyc differ diff --git a/src/pkg/corpusparser/corpusparser/__pycache__/Sentence.cpython-35.pyc b/src/pkg/corpusparser/corpusparser/__pycache__/Sentence.cpython-35.pyc new file mode 100644 index 0000000..c0ae9f7 Binary files /dev/null and b/src/pkg/corpusparser/corpusparser/__pycache__/Sentence.cpython-35.pyc differ diff --git a/src/pkg/corpusparser/corpusparser/__pycache__/__init__.cpython-35.pyc b/src/pkg/corpusparser/corpusparser/__pycache__/__init__.cpython-35.pyc new file mode 100644 index 0000000..a365c8e Binary files /dev/null and b/src/pkg/corpusparser/corpusparser/__pycache__/__init__.cpython-35.pyc differ