From 044fa669105c758be433bb654fab97ec7d6a8dcd Mon Sep 17 00:00:00 2001 From: voje Date: Thu, 4 Apr 2019 22:42:26 +0200 Subject: [PATCH] working on api changes --- src/backend_flask/app.py | 53 ++++++++++++++------- src/pkg/valency/valency/Frame.py | 13 +++-- src/pkg/valency/valency/reduce_functions.py | 5 +- 3 files changed, 47 insertions(+), 24 deletions(-) diff --git a/src/backend_flask/app.py b/src/backend_flask/app.py index a5eddcb..557e2e7 100644 --- a/src/backend_flask/app.py +++ b/src/backend_flask/app.py @@ -26,6 +26,7 @@ from email.mime.text import MIMEText from copy import deepcopy as DC from pathlib import Path from pymongo import MongoClient +import pymongo import argparse # some db collections @@ -55,7 +56,8 @@ CORS(app) @app.route("/api/dev") def api_dev(): - cur = valdb.kres.find({"sid": "F0015940.37.2"}) + print("DEV") + cur = valdb.kres.find({"headwords": "nagovarjati"}) frames = [] for ent in cur: frames += frames_from_db_entry(ent) @@ -236,20 +238,18 @@ def api_token(): def prepare_frames(ret_frames): # append sentences for frame in ret_frames: - frame.sentences = [] unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids} log.debug(str(unique_sids)) - frame.sentences = [] + # frame.sentences = [] frame.aggr_sent = {} + # sid, tid==hw for sid, tid in unique_sids.items(): - hwl = vallex.get_token(tid)["lemma"] + # hwl = vallex.get_token(tid)["lemma"] + hwl = "derp" tmp_idx = len(frame.sentences) if hwl not in frame.aggr_sent: frame.aggr_sent[hwl] = [] frame.aggr_sent[hwl].append(tmp_idx) - frame.sentences.append( - vallex.get_tokenized_sentence(tid) - ) # return (n-frames, rendered template) # json frames json_ret = {"frames": []} @@ -259,7 +259,7 @@ def prepare_frames(ret_frames): # input: hw, reduct_function -@app.route("/api/hw-frames") +@app.route("/api/frames") def api_get_frames(): hw = request.args.get("hw") if hw is None: @@ -267,8 +267,21 @@ def api_get_frames(): rf_name = request.args.get("rf", "reduce_0") # 2nd is default RF = reduce_functions[rf_name]["f"] - entry = vallex.entries[hw] # TODO hw -> [Frame,] - ret_frames = RF(entry.raw_frames, vallex) + + # entry = vallex.entries[hw] # TODO hw -> [Frame,] + cur = valdb.kres.find({"headwords": hw}) + frames = [] + for ent in cur: + # TODO: maybe filter by hw? + frames += frames_from_db_entry(ent) + + # return json.dumps([x.to_json() for x in frames]) + + print("A") + print(frames[0].to_json()) + ret_frames = RF(frames, None) + print("B") + print(ret_frames[0].to_json()) return prepare_frames(ret_frames) @@ -408,18 +421,22 @@ def prepare_db(): for corpus in CORPORA: for e in valdb[corpus].find({}): if e["srl_links"] is None: - continue - hw_tids = list(set([x["from"] for x in e["srl_links"]])) - hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids] - headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens] - e["headwords"] = headwords + e["headwords"] = [] + e["functors"] = [] + else: + hw_tids = list(set([x["from"] for x in e["srl_links"]])) + hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids] + headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens] + e["headwords"] = headwords - functors = list(set([x["afun"] for x in e["srl_links"]])) - e["headwords"] = headwords - e["functors"] = functors + functors = list(set([x["afun"] for x in e["srl_links"]])) + e["functors"] = functors valdb[corpus].save(e) + valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)]) + valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)]) + # create app_index (used in frontend, left side word index) for corpus in CORPORA: res_hws = {} diff --git a/src/pkg/valency/valency/Frame.py b/src/pkg/valency/valency/Frame.py index fceb0c8..bc73801 100644 --- a/src/pkg/valency/valency/Frame.py +++ b/src/pkg/valency/valency/Frame.py @@ -6,6 +6,8 @@ def frames_from_db_entry(dbent): def _full_tid(tid): return ".".join([dbent["sid"], str(tid)]) + token_dict = {str(x["tid"]): x for x in dbent["tokens"]} + frames = [] if "srl_links" not in dbent: return [] @@ -18,19 +20,20 @@ def frames_from_db_entry(dbent): srldict[key] += [srl] for hwtid, srlarr in srldict.items(): frames += [Frame( - hw_lemma=_full_tid(hwtid), - tids=[_full_tid(x["to"]) for x in srlarr], + hw_lemma=token_dict[hwtid]["text"], + tids=[_full_tid(hwtid)], slots=[ Slot( functor=srl["afun"], tids=[_full_tid(srl["to"])] ) for srl in srlarr - ] + ], + sentences=[dbent["tokens"]], )] return frames class Frame(): - def __init__(self, tids, deep_links=None, slots=None, hw_lemma=None): + def __init__(self, tids, deep_links=None, slots=None, hw_lemma=None, sentences=None): self.hw = hw_lemma self.tids = tids # list of tokens with the same hw_lemma # Each tid = "S123.t123"; @@ -41,7 +44,7 @@ class Frame(): else: self.slots = slots self.sense_info = {} - self.sentences = None # Used for passing to view in app.py, get_frames + self.sentences = sentences self.aggr_sent = None # Dictionary { hw: self.sentences idx } def to_json(self): diff --git a/src/pkg/valency/valency/reduce_functions.py b/src/pkg/valency/valency/reduce_functions.py index 397afd7..846a7ac 100644 --- a/src/pkg/valency/valency/reduce_functions.py +++ b/src/pkg/valency/valency/reduce_functions.py @@ -56,11 +56,13 @@ def reduce_1(frames, vallex=None): break else: # Python for else -> fires if loop has ended. + # init new frame set [set_of_functors, list_of_one_frame] frame_sets.append([functors, [frame]]) ret_frames = [] for fs in frame_sets: tids = [] + sentences = [] slots = {} # All possible slots in this frame. for functor in fs[0]: @@ -68,12 +70,13 @@ def reduce_1(frames, vallex=None): # Reduce slots from all frames. (Merge ACT from all frames, ...) for frame in fs[1]: tids += frame.tids + sentences += frame.sentences for sl in frame.slots: slots[sl.functor].tids += sl.tids slots_list = [] for k, e in slots.items(): slots_list.append(e) - rf = Frame(tids=tids, slots=slots_list) + rf = Frame(tids=tids, slots=slots_list, sentences=sentences) rf.sort_slots() ret_frames.append(rf) return sorted_by_len_tids(ret_frames)