working on api changes

dev
voje 5 years ago
parent ca942344d7
commit 044fa66910

@ -26,6 +26,7 @@ from email.mime.text import MIMEText
from copy import deepcopy as DC from copy import deepcopy as DC
from pathlib import Path from pathlib import Path
from pymongo import MongoClient from pymongo import MongoClient
import pymongo
import argparse import argparse
# some db collections # some db collections
@ -55,7 +56,8 @@ CORS(app)
@app.route("/api/dev") @app.route("/api/dev")
def api_dev(): def api_dev():
cur = valdb.kres.find({"sid": "F0015940.37.2"}) print("DEV")
cur = valdb.kres.find({"headwords": "nagovarjati"})
frames = [] frames = []
for ent in cur: for ent in cur:
frames += frames_from_db_entry(ent) frames += frames_from_db_entry(ent)
@ -236,20 +238,18 @@ def api_token():
def prepare_frames(ret_frames): def prepare_frames(ret_frames):
# append sentences # append sentences
for frame in ret_frames: for frame in ret_frames:
frame.sentences = []
unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids} unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids}
log.debug(str(unique_sids)) log.debug(str(unique_sids))
frame.sentences = [] # frame.sentences = []
frame.aggr_sent = {} frame.aggr_sent = {}
# sid, tid==hw
for sid, tid in unique_sids.items(): for sid, tid in unique_sids.items():
hwl = vallex.get_token(tid)["lemma"] # hwl = vallex.get_token(tid)["lemma"]
hwl = "derp"
tmp_idx = len(frame.sentences) tmp_idx = len(frame.sentences)
if hwl not in frame.aggr_sent: if hwl not in frame.aggr_sent:
frame.aggr_sent[hwl] = [] frame.aggr_sent[hwl] = []
frame.aggr_sent[hwl].append(tmp_idx) frame.aggr_sent[hwl].append(tmp_idx)
frame.sentences.append(
vallex.get_tokenized_sentence(tid)
)
# return (n-frames, rendered template) # return (n-frames, rendered template)
# json frames # json frames
json_ret = {"frames": []} json_ret = {"frames": []}
@ -259,7 +259,7 @@ def prepare_frames(ret_frames):
# input: hw, reduct_function # input: hw, reduct_function
@app.route("/api/hw-frames") @app.route("/api/frames")
def api_get_frames(): def api_get_frames():
hw = request.args.get("hw") hw = request.args.get("hw")
if hw is None: if hw is None:
@ -267,8 +267,21 @@ def api_get_frames():
rf_name = request.args.get("rf", "reduce_0") # 2nd is default rf_name = request.args.get("rf", "reduce_0") # 2nd is default
RF = reduce_functions[rf_name]["f"] RF = reduce_functions[rf_name]["f"]
entry = vallex.entries[hw] # TODO hw -> [Frame,]
ret_frames = RF(entry.raw_frames, vallex) # entry = vallex.entries[hw] # TODO hw -> [Frame,]
cur = valdb.kres.find({"headwords": hw})
frames = []
for ent in cur:
# TODO: maybe filter by hw?
frames += frames_from_db_entry(ent)
# return json.dumps([x.to_json() for x in frames])
print("A")
print(frames[0].to_json())
ret_frames = RF(frames, None)
print("B")
print(ret_frames[0].to_json())
return prepare_frames(ret_frames) return prepare_frames(ret_frames)
@ -408,18 +421,22 @@ def prepare_db():
for corpus in CORPORA: for corpus in CORPORA:
for e in valdb[corpus].find({}): for e in valdb[corpus].find({}):
if e["srl_links"] is None: if e["srl_links"] is None:
continue e["headwords"] = []
hw_tids = list(set([x["from"] for x in e["srl_links"]])) e["functors"] = []
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids] else:
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens] hw_tids = list(set([x["from"] for x in e["srl_links"]]))
e["headwords"] = headwords hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
e["headwords"] = headwords
functors = list(set([x["afun"] for x in e["srl_links"]])) functors = list(set([x["afun"] for x in e["srl_links"]]))
e["headwords"] = headwords e["functors"] = functors
e["functors"] = functors
valdb[corpus].save(e) valdb[corpus].save(e)
valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
# create app_index (used in frontend, left side word index) # create app_index (used in frontend, left side word index)
for corpus in CORPORA: for corpus in CORPORA:
res_hws = {} res_hws = {}

@ -6,6 +6,8 @@ def frames_from_db_entry(dbent):
def _full_tid(tid): def _full_tid(tid):
return ".".join([dbent["sid"], str(tid)]) return ".".join([dbent["sid"], str(tid)])
token_dict = {str(x["tid"]): x for x in dbent["tokens"]}
frames = [] frames = []
if "srl_links" not in dbent: if "srl_links" not in dbent:
return [] return []
@ -18,19 +20,20 @@ def frames_from_db_entry(dbent):
srldict[key] += [srl] srldict[key] += [srl]
for hwtid, srlarr in srldict.items(): for hwtid, srlarr in srldict.items():
frames += [Frame( frames += [Frame(
hw_lemma=_full_tid(hwtid), hw_lemma=token_dict[hwtid]["text"],
tids=[_full_tid(x["to"]) for x in srlarr], tids=[_full_tid(hwtid)],
slots=[ slots=[
Slot( Slot(
functor=srl["afun"], functor=srl["afun"],
tids=[_full_tid(srl["to"])] tids=[_full_tid(srl["to"])]
) for srl in srlarr ) for srl in srlarr
] ],
sentences=[dbent["tokens"]],
)] )]
return frames return frames
class Frame(): class Frame():
def __init__(self, tids, deep_links=None, slots=None, hw_lemma=None): def __init__(self, tids, deep_links=None, slots=None, hw_lemma=None, sentences=None):
self.hw = hw_lemma self.hw = hw_lemma
self.tids = tids # list of tokens with the same hw_lemma self.tids = tids # list of tokens with the same hw_lemma
# Each tid = "S123.t123"; # Each tid = "S123.t123";
@ -41,7 +44,7 @@ class Frame():
else: else:
self.slots = slots self.slots = slots
self.sense_info = {} self.sense_info = {}
self.sentences = None # Used for passing to view in app.py, get_frames self.sentences = sentences
self.aggr_sent = None # Dictionary { hw: self.sentences idx } self.aggr_sent = None # Dictionary { hw: self.sentences idx }
def to_json(self): def to_json(self):

@ -56,11 +56,13 @@ def reduce_1(frames, vallex=None):
break break
else: else:
# Python for else -> fires if loop has ended. # Python for else -> fires if loop has ended.
# init new frame set [set_of_functors, list_of_one_frame]
frame_sets.append([functors, [frame]]) frame_sets.append([functors, [frame]])
ret_frames = [] ret_frames = []
for fs in frame_sets: for fs in frame_sets:
tids = [] tids = []
sentences = []
slots = {} slots = {}
# All possible slots in this frame. # All possible slots in this frame.
for functor in fs[0]: for functor in fs[0]:
@ -68,12 +70,13 @@ def reduce_1(frames, vallex=None):
# Reduce slots from all frames. (Merge ACT from all frames, ...) # Reduce slots from all frames. (Merge ACT from all frames, ...)
for frame in fs[1]: for frame in fs[1]:
tids += frame.tids tids += frame.tids
sentences += frame.sentences
for sl in frame.slots: for sl in frame.slots:
slots[sl.functor].tids += sl.tids slots[sl.functor].tids += sl.tids
slots_list = [] slots_list = []
for k, e in slots.items(): for k, e in slots.items():
slots_list.append(e) slots_list.append(e)
rf = Frame(tids=tids, slots=slots_list) rf = Frame(tids=tids, slots=slots_list, sentences=sentences)
rf.sort_slots() rf.sort_slots()
ret_frames.append(rf) ret_frames.append(rf)
return sorted_by_len_tids(ret_frames) return sorted_by_len_tids(ret_frames)

Loading…
Cancel
Save