|
|
@ -235,28 +235,6 @@ def api_token():
|
|
|
|
|
|
|
|
|
|
|
|
# FRAMES ----------------------------.
|
|
|
|
# FRAMES ----------------------------.
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_frames(ret_frames):
|
|
|
|
|
|
|
|
# append sentences
|
|
|
|
|
|
|
|
for frame in ret_frames:
|
|
|
|
|
|
|
|
unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids}
|
|
|
|
|
|
|
|
# frame.sentences = []
|
|
|
|
|
|
|
|
frame.aggr_sent = {}
|
|
|
|
|
|
|
|
# sid, tid==hw
|
|
|
|
|
|
|
|
for sid, tid in unique_sids.items():
|
|
|
|
|
|
|
|
# hwl = vallex.get_token(tid)["lemma"]
|
|
|
|
|
|
|
|
hwl = frame.hw
|
|
|
|
|
|
|
|
tmp_idx = len(frame.sentences)
|
|
|
|
|
|
|
|
if hwl not in frame.aggr_sent:
|
|
|
|
|
|
|
|
frame.aggr_sent[hwl] = []
|
|
|
|
|
|
|
|
frame.aggr_sent[hwl].append(tmp_idx)
|
|
|
|
|
|
|
|
# return (n-frames, rendered template)
|
|
|
|
|
|
|
|
# json frames
|
|
|
|
|
|
|
|
json_ret = {"frames": []}
|
|
|
|
|
|
|
|
for frame in ret_frames:
|
|
|
|
|
|
|
|
json_ret["frames"].append(DC(frame.to_json()))
|
|
|
|
|
|
|
|
return json.dumps(json_ret)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# input: hw, reduct_function
|
|
|
|
# input: hw, reduct_function
|
|
|
|
@app.route("/api/frames")
|
|
|
|
@app.route("/api/frames")
|
|
|
|
def api_get_frames():
|
|
|
|
def api_get_frames():
|
|
|
@ -280,8 +258,34 @@ def api_get_frames():
|
|
|
|
frames = [x for x in frames if x.hw == hw]
|
|
|
|
frames = [x for x in frames if x.hw == hw]
|
|
|
|
|
|
|
|
|
|
|
|
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
|
|
|
|
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
|
|
|
|
return prepare_frames(ret_frames)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
json_ret = {"frames": []}
|
|
|
|
|
|
|
|
for frame in ret_frames:
|
|
|
|
|
|
|
|
json_ret["frames"].append(frame.to_json())
|
|
|
|
|
|
|
|
return json.dumps(json_ret)
|
|
|
|
|
|
|
|
# return prepare_frames(ret_frames)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _aggregate_by_hw(ret_frames):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _tid_to_lemma(tid, sentence):
|
|
|
|
|
|
|
|
# slow and hackish
|
|
|
|
|
|
|
|
for pair in sentence:
|
|
|
|
|
|
|
|
if pair[0] == tid:
|
|
|
|
|
|
|
|
return pair[1]["lemma"]
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# append sentences
|
|
|
|
|
|
|
|
for frame in ret_frames:
|
|
|
|
|
|
|
|
# unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids}
|
|
|
|
|
|
|
|
frame.aggr_sent = {} # map of headword: [sentence indexes]
|
|
|
|
|
|
|
|
# sid, tid==hw
|
|
|
|
|
|
|
|
for i, tid in enumerate(frame.tids):
|
|
|
|
|
|
|
|
# hwl = vallex.get_token(tid)["lemma"]
|
|
|
|
|
|
|
|
hwl = _tid_to_lemma(tid, frame.sentences[i])
|
|
|
|
|
|
|
|
if hwl not in frame.aggr_sent:
|
|
|
|
|
|
|
|
frame.aggr_sent[hwl] = []
|
|
|
|
|
|
|
|
frame.aggr_sent[hwl].append(i)
|
|
|
|
|
|
|
|
return ret_frames
|
|
|
|
|
|
|
|
|
|
|
|
# input: functor, reduce_function
|
|
|
|
# input: functor, reduce_function
|
|
|
|
@app.route("/api/functor-frames")
|
|
|
|
@app.route("/api/functor-frames")
|
|
|
@ -302,12 +306,17 @@ def api_get_functor_frames():
|
|
|
|
for ent in cur:
|
|
|
|
for ent in cur:
|
|
|
|
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
|
|
|
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
|
|
|
|
|
|
|
|
|
|
|
for f in frames:
|
|
|
|
# filter by relevant functor
|
|
|
|
print(f.to_json())
|
|
|
|
frames = [x for x in frames if functor in x.get_functors()]
|
|
|
|
|
|
|
|
|
|
|
|
# raw_frames = vallex.functors_index[functor] # TODO
|
|
|
|
# raw_frames = vallex.functors_index[functor] # TODO
|
|
|
|
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
|
|
|
|
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
|
|
|
|
return prepare_frames(ret_frames)
|
|
|
|
ret_frames = _aggregate_by_hw(ret_frames)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
json_ret = {"frames": []}
|
|
|
|
|
|
|
|
for frame in ret_frames:
|
|
|
|
|
|
|
|
json_ret["frames"].append(DC(frame.to_json()))
|
|
|
|
|
|
|
|
return json.dumps(json_ret)
|
|
|
|
|
|
|
|
|
|
|
|
# FRAMES ----------------------------^
|
|
|
|
# FRAMES ----------------------------^
|
|
|
|
|
|
|
|
|
|
|
@ -422,33 +431,8 @@ def api_senses_update():
|
|
|
|
|
|
|
|
|
|
|
|
# APP PREFLIGHT ---------------------.
|
|
|
|
# APP PREFLIGHT ---------------------.
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_db():
|
|
|
|
def prepare_app_index():
|
|
|
|
def helper_tid_to_token(tid, tokens):
|
|
|
|
log.info("[*] preparing app_index")
|
|
|
|
for t in tokens:
|
|
|
|
|
|
|
|
if t["tid"] == tid:
|
|
|
|
|
|
|
|
return t
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# update entries (add headwords and fuctors for indexing)
|
|
|
|
|
|
|
|
for corpus in CORPORA:
|
|
|
|
|
|
|
|
for e in valdb[corpus].find({}):
|
|
|
|
|
|
|
|
if e["srl_links"] is None:
|
|
|
|
|
|
|
|
e["headwords"] = []
|
|
|
|
|
|
|
|
e["functors"] = []
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
|
|
|
|
|
|
|
|
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
|
|
|
|
|
|
|
|
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
|
|
|
|
|
|
|
|
e["headwords"] = headwords
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
functors = list(set([x["afun"] for x in e["srl_links"]]))
|
|
|
|
|
|
|
|
e["functors"] = functors
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
valdb[corpus].save(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
|
|
|
|
|
|
|
|
valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# create app_index (used in frontend, left side word index)
|
|
|
|
# create app_index (used in frontend, left side word index)
|
|
|
|
tmp_app_index = {c: {} for c in CORPORA}
|
|
|
|
tmp_app_index = {c: {} for c in CORPORA}
|
|
|
|
for corpus in CORPORA:
|
|
|
|
for corpus in CORPORA:
|
|
|
@ -495,6 +479,7 @@ if __name__ == "__main__":
|
|
|
|
print("Starting app.py main()")
|
|
|
|
print("Starting app.py main()")
|
|
|
|
aparser = argparse.ArgumentParser(description="Arguments for app.py")
|
|
|
|
aparser = argparse.ArgumentParser(description="Arguments for app.py")
|
|
|
|
aparser.add_argument("--config-file", type=str, help="check ./conf_files/")
|
|
|
|
aparser.add_argument("--config-file", type=str, help="check ./conf_files/")
|
|
|
|
|
|
|
|
aparser.add_argument('--prepare-db', action="store_true", default=False)
|
|
|
|
aparser.add_argument("--dbuser", type=str)
|
|
|
|
aparser.add_argument("--dbuser", type=str)
|
|
|
|
aparser.add_argument("--dbpass", type=str)
|
|
|
|
aparser.add_argument("--dbpass", type=str)
|
|
|
|
aparser.add_argument("--dbaddr", type=str)
|
|
|
|
aparser.add_argument("--dbaddr", type=str)
|
|
|
@ -521,8 +506,9 @@ if __name__ == "__main__":
|
|
|
|
)
|
|
|
|
)
|
|
|
|
valdb = client.valdb
|
|
|
|
valdb = client.valdb
|
|
|
|
|
|
|
|
|
|
|
|
if bool(config["prepare_db"]):
|
|
|
|
if args.prepare_db:
|
|
|
|
prepare_db()
|
|
|
|
prepare_app_index()
|
|
|
|
|
|
|
|
sys.exit()
|
|
|
|
|
|
|
|
|
|
|
|
# app index from db
|
|
|
|
# app index from db
|
|
|
|
app_index = (valdb.appindex.find_one({"dockey": "appindex"}))["data"]
|
|
|
|
app_index = (valdb.appindex.find_one({"dockey": "appindex"}))["data"]
|
|
|
|