forked from kristjan/cjvt-valency
working on api changes
This commit is contained in:
parent
ca942344d7
commit
044fa66910
|
@ -26,6 +26,7 @@ from email.mime.text import MIMEText
|
|||
from copy import deepcopy as DC
|
||||
from pathlib import Path
|
||||
from pymongo import MongoClient
|
||||
import pymongo
|
||||
import argparse
|
||||
|
||||
# some db collections
|
||||
|
@ -55,7 +56,8 @@ CORS(app)
|
|||
|
||||
@app.route("/api/dev")
|
||||
def api_dev():
|
||||
cur = valdb.kres.find({"sid": "F0015940.37.2"})
|
||||
print("DEV")
|
||||
cur = valdb.kres.find({"headwords": "nagovarjati"})
|
||||
frames = []
|
||||
for ent in cur:
|
||||
frames += frames_from_db_entry(ent)
|
||||
|
@ -236,20 +238,18 @@ def api_token():
|
|||
def prepare_frames(ret_frames):
|
||||
# append sentences
|
||||
for frame in ret_frames:
|
||||
frame.sentences = []
|
||||
unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids}
|
||||
log.debug(str(unique_sids))
|
||||
frame.sentences = []
|
||||
# frame.sentences = []
|
||||
frame.aggr_sent = {}
|
||||
# sid, tid==hw
|
||||
for sid, tid in unique_sids.items():
|
||||
hwl = vallex.get_token(tid)["lemma"]
|
||||
# hwl = vallex.get_token(tid)["lemma"]
|
||||
hwl = "derp"
|
||||
tmp_idx = len(frame.sentences)
|
||||
if hwl not in frame.aggr_sent:
|
||||
frame.aggr_sent[hwl] = []
|
||||
frame.aggr_sent[hwl].append(tmp_idx)
|
||||
frame.sentences.append(
|
||||
vallex.get_tokenized_sentence(tid)
|
||||
)
|
||||
# return (n-frames, rendered template)
|
||||
# json frames
|
||||
json_ret = {"frames": []}
|
||||
|
@ -259,7 +259,7 @@ def prepare_frames(ret_frames):
|
|||
|
||||
|
||||
# input: hw, reduct_function
|
||||
@app.route("/api/hw-frames")
|
||||
@app.route("/api/frames")
|
||||
def api_get_frames():
|
||||
hw = request.args.get("hw")
|
||||
if hw is None:
|
||||
|
@ -267,8 +267,21 @@ def api_get_frames():
|
|||
|
||||
rf_name = request.args.get("rf", "reduce_0") # 2nd is default
|
||||
RF = reduce_functions[rf_name]["f"]
|
||||
entry = vallex.entries[hw] # TODO hw -> [Frame,]
|
||||
ret_frames = RF(entry.raw_frames, vallex)
|
||||
|
||||
# entry = vallex.entries[hw] # TODO hw -> [Frame,]
|
||||
cur = valdb.kres.find({"headwords": hw})
|
||||
frames = []
|
||||
for ent in cur:
|
||||
# TODO: maybe filter by hw?
|
||||
frames += frames_from_db_entry(ent)
|
||||
|
||||
# return json.dumps([x.to_json() for x in frames])
|
||||
|
||||
print("A")
|
||||
print(frames[0].to_json())
|
||||
ret_frames = RF(frames, None)
|
||||
print("B")
|
||||
print(ret_frames[0].to_json())
|
||||
return prepare_frames(ret_frames)
|
||||
|
||||
|
||||
|
@ -408,18 +421,22 @@ def prepare_db():
|
|||
for corpus in CORPORA:
|
||||
for e in valdb[corpus].find({}):
|
||||
if e["srl_links"] is None:
|
||||
continue
|
||||
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
|
||||
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
|
||||
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
|
||||
e["headwords"] = headwords
|
||||
e["headwords"] = []
|
||||
e["functors"] = []
|
||||
else:
|
||||
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
|
||||
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
|
||||
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
|
||||
e["headwords"] = headwords
|
||||
|
||||
functors = list(set([x["afun"] for x in e["srl_links"]]))
|
||||
e["headwords"] = headwords
|
||||
e["functors"] = functors
|
||||
functors = list(set([x["afun"] for x in e["srl_links"]]))
|
||||
e["functors"] = functors
|
||||
|
||||
valdb[corpus].save(e)
|
||||
|
||||
valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
|
||||
valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
|
||||
|
||||
# create app_index (used in frontend, left side word index)
|
||||
for corpus in CORPORA:
|
||||
res_hws = {}
|
||||
|
|
|
@ -6,6 +6,8 @@ def frames_from_db_entry(dbent):
|
|||
def _full_tid(tid):
|
||||
return ".".join([dbent["sid"], str(tid)])
|
||||
|
||||
token_dict = {str(x["tid"]): x for x in dbent["tokens"]}
|
||||
|
||||
frames = []
|
||||
if "srl_links" not in dbent:
|
||||
return []
|
||||
|
@ -18,19 +20,20 @@ def frames_from_db_entry(dbent):
|
|||
srldict[key] += [srl]
|
||||
for hwtid, srlarr in srldict.items():
|
||||
frames += [Frame(
|
||||
hw_lemma=_full_tid(hwtid),
|
||||
tids=[_full_tid(x["to"]) for x in srlarr],
|
||||
hw_lemma=token_dict[hwtid]["text"],
|
||||
tids=[_full_tid(hwtid)],
|
||||
slots=[
|
||||
Slot(
|
||||
functor=srl["afun"],
|
||||
tids=[_full_tid(srl["to"])]
|
||||
) for srl in srlarr
|
||||
]
|
||||
],
|
||||
sentences=[dbent["tokens"]],
|
||||
)]
|
||||
return frames
|
||||
|
||||
class Frame():
|
||||
def __init__(self, tids, deep_links=None, slots=None, hw_lemma=None):
|
||||
def __init__(self, tids, deep_links=None, slots=None, hw_lemma=None, sentences=None):
|
||||
self.hw = hw_lemma
|
||||
self.tids = tids # list of tokens with the same hw_lemma
|
||||
# Each tid = "S123.t123";
|
||||
|
@ -41,7 +44,7 @@ class Frame():
|
|||
else:
|
||||
self.slots = slots
|
||||
self.sense_info = {}
|
||||
self.sentences = None # Used for passing to view in app.py, get_frames
|
||||
self.sentences = sentences
|
||||
self.aggr_sent = None # Dictionary { hw: self.sentences idx }
|
||||
|
||||
def to_json(self):
|
||||
|
|
|
@ -56,11 +56,13 @@ def reduce_1(frames, vallex=None):
|
|||
break
|
||||
else:
|
||||
# Python for else -> fires if loop has ended.
|
||||
# init new frame set [set_of_functors, list_of_one_frame]
|
||||
frame_sets.append([functors, [frame]])
|
||||
|
||||
ret_frames = []
|
||||
for fs in frame_sets:
|
||||
tids = []
|
||||
sentences = []
|
||||
slots = {}
|
||||
# All possible slots in this frame.
|
||||
for functor in fs[0]:
|
||||
|
@ -68,12 +70,13 @@ def reduce_1(frames, vallex=None):
|
|||
# Reduce slots from all frames. (Merge ACT from all frames, ...)
|
||||
for frame in fs[1]:
|
||||
tids += frame.tids
|
||||
sentences += frame.sentences
|
||||
for sl in frame.slots:
|
||||
slots[sl.functor].tids += sl.tids
|
||||
slots_list = []
|
||||
for k, e in slots.items():
|
||||
slots_list.append(e)
|
||||
rf = Frame(tids=tids, slots=slots_list)
|
||||
rf = Frame(tids=tids, slots=slots_list, sentences=sentences)
|
||||
rf.sort_slots()
|
||||
ret_frames.append(rf)
|
||||
return sorted_by_len_tids(ret_frames)
|
||||
|
|
Loading…
Reference in New Issue
Block a user