Adding processing improvemets.

2020-09-23 13:02:31 +02:00
parent 220529b777
commit ce1fb46b4e
2 changed files with 45 additions and 15 deletions
@@ -3,6 +3,41 @@ from corpusparser import enriched_lemma

 log = logging.getLogger(__name__)

+def frames_from_db_entry_headword(dbent, headword):
+    def _full_tid(tid):
+        return ".".join([dbent["sid"], str(tid)])
+
+    token_dict = {str(x["tid"]): x for x in dbent["tokens"]}
+
+    frames = []
+    if "srl_links" not in dbent:
+        return []
+    srldict = {}
+    for srl in dbent["srl_links"]:
+        key = str(srl["from"])
+        if enriched_lemma(token_dict[key]) != headword:
+            continue
+        if key not in srldict:
+            srldict[key] = [srl]
+        else:
+            srldict[key] += [srl]
+    for hwtid, srlarr in srldict.items():
+        frames += [Frame(
+            hw_lemma=enriched_lemma(token_dict[hwtid]),
+            tids=[_full_tid(hwtid)],
+            slots=[
+                Slot(
+                    functor=srl["afun"],
+                    tids=[_full_tid(srl["to"])]
+                ) for srl in srlarr
+            ],
+            # sentences=[(dbent["sid"], dbent["tokens"])],
+            sentences=[
+                [(_full_tid(t["tid"]), t) for t in dbent["tokens"]],
+            ]
+        )]
+    return frames
+
 def frames_from_db_entry(dbent):
    def _full_tid(tid):
        return ".".join([dbent["sid"], str(tid)])