weird bug: Parser.py 175

2019-03-11 00:51:05 +01:00 · 2019-03-11 00:51:05 +01:00 · 0750c64f48
commit 0750c64f48
parent 55c07f88ca
4 changed files with 14 additions and 12 deletions
--- a/src/pkg/corpusparser/corpusparser/Parser.py
+++ b/src/pkg/corpusparser/corpusparser/Parser.py
@ -64,16 +64,20 @@ class Parser():
            }]
        return res_links
-    def parse_srl_links(self, sent_el, xml_file=None):
+    def parse_srl_links(self, sent_el, sent_srl_links):
        if self.corpus == "kres":
-            return self.parse_srl_links_kres(sent_el, xml_file)
+            return self.parse_srl_links_kres(sent_el, sent_srl_links)
        else:
            return self.parse_any_links_ssj(sent_el, "SRL")
-    def parse_srl_links_kres(self, sent_el, sent_srl_dict):
+    def parse_srl_links_kres(self, sent_el, sent_srl_links):
        print("HA")
        if len(sent_srl_links) == 0:
            print("HI")
            return []
        print(sent_srl_dict)
        # find the correspointing json file with srl links
-        return "TODO"
+        return []
    def parse(self):
        if self.corpus == "kres":
@ -158,20 +162,18 @@ class Parser():
                    sentence_id = "{}.{}.{}".format(f_id, p_id, s_id)
                    # make a generator instead of holding the whole corpus in memory
                    # TODO -- match ids
                    print("---")
                    print(sorted(srl_dict.keys(), key=lambda x: x.split(".")[1])[:100])
                    print(sentence_id)
                    print(srl_dict.get(str(sentence_id)))
                    print("---")
                    if sentence_id in res_dict:
                        raise KeyError("duplicated id: {}".format(sentence_id))
                    jos_links = self.parse_jos_links(s)
                    srl_links = srl_dict.get(sentence_id) if self.corpus == "kres" else None
                    srl_links_fixed = self.parse_srl_links(s, srl_links)
                    print(srl_links)
                    res_dict[sentence_id] = {
                        "sid": sentence_id,
                        "text": sentence_text,
                        "tokens": sentence_tokens,
-                        "jos_links": self.parse_jos_links(s),
+                        "jos_links": "BBBB",
-                        "srl_links": self.parse_srl_links(s, srl_dict[sentence_id]),
+                        "srl_links": "AAAAA",
                    }
                    print(res_dict[sentence_id])
--- a/src/pkg/corpusparser/corpusparser/pycache/Parser.cpython-35.pyc
+++ b/src/pkg/corpusparser/corpusparser/pycache/Parser.cpython-35.pyc
--- a/src/pkg/corpusparser/corpusparser/pycache/Sentence.cpython-35.pyc
+++ b/src/pkg/corpusparser/corpusparser/pycache/Sentence.cpython-35.pyc
--- a/src/pkg/corpusparser/corpusparser/pycache/init.cpython-35.pyc
+++ b/src/pkg/corpusparser/corpusparser/pycache/init.cpython-35.pyc