Adding processing improvemets.

This commit is contained in:
2020-09-23 13:02:31 +02:00
parent 220529b777
commit ce1fb46b4e
2 changed files with 45 additions and 15 deletions

View File

@@ -3,6 +3,41 @@ from corpusparser import enriched_lemma
log = logging.getLogger(__name__)
def frames_from_db_entry_headword(dbent, headword):
def _full_tid(tid):
return ".".join([dbent["sid"], str(tid)])
token_dict = {str(x["tid"]): x for x in dbent["tokens"]}
frames = []
if "srl_links" not in dbent:
return []
srldict = {}
for srl in dbent["srl_links"]:
key = str(srl["from"])
if enriched_lemma(token_dict[key]) != headword:
continue
if key not in srldict:
srldict[key] = [srl]
else:
srldict[key] += [srl]
for hwtid, srlarr in srldict.items():
frames += [Frame(
hw_lemma=enriched_lemma(token_dict[hwtid]),
tids=[_full_tid(hwtid)],
slots=[
Slot(
functor=srl["afun"],
tids=[_full_tid(srl["to"])]
) for srl in srlarr
],
# sentences=[(dbent["sid"], dbent["tokens"])],
sentences=[
[(_full_tid(t["tid"]), t) for t in dbent["tokens"]],
]
)]
return frames
def frames_from_db_entry(dbent):
def _full_tid(tid):
return ".".join([dbent["sid"], str(tid)])