forked from kristjan/cjvt-valency
Adding processing improvemets.
This commit is contained in:
@@ -3,6 +3,41 @@ from corpusparser import enriched_lemma
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def frames_from_db_entry_headword(dbent, headword):
|
||||
def _full_tid(tid):
|
||||
return ".".join([dbent["sid"], str(tid)])
|
||||
|
||||
token_dict = {str(x["tid"]): x for x in dbent["tokens"]}
|
||||
|
||||
frames = []
|
||||
if "srl_links" not in dbent:
|
||||
return []
|
||||
srldict = {}
|
||||
for srl in dbent["srl_links"]:
|
||||
key = str(srl["from"])
|
||||
if enriched_lemma(token_dict[key]) != headword:
|
||||
continue
|
||||
if key not in srldict:
|
||||
srldict[key] = [srl]
|
||||
else:
|
||||
srldict[key] += [srl]
|
||||
for hwtid, srlarr in srldict.items():
|
||||
frames += [Frame(
|
||||
hw_lemma=enriched_lemma(token_dict[hwtid]),
|
||||
tids=[_full_tid(hwtid)],
|
||||
slots=[
|
||||
Slot(
|
||||
functor=srl["afun"],
|
||||
tids=[_full_tid(srl["to"])]
|
||||
) for srl in srlarr
|
||||
],
|
||||
# sentences=[(dbent["sid"], dbent["tokens"])],
|
||||
sentences=[
|
||||
[(_full_tid(t["tid"]), t) for t in dbent["tokens"]],
|
||||
]
|
||||
)]
|
||||
return frames
|
||||
|
||||
def frames_from_db_entry(dbent):
|
||||
def _full_tid(tid):
|
||||
return ".".join([dbent["sid"], str(tid)])
|
||||
|
||||
Reference in New Issue
Block a user