|
|
@ -26,6 +26,7 @@ from email.mime.text import MIMEText
|
|
|
|
from copy import deepcopy as DC
|
|
|
|
from copy import deepcopy as DC
|
|
|
|
from pathlib import Path
|
|
|
|
from pathlib import Path
|
|
|
|
from pymongo import MongoClient
|
|
|
|
from pymongo import MongoClient
|
|
|
|
|
|
|
|
import pymongo
|
|
|
|
import argparse
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
|
|
|
|
# some db collections
|
|
|
|
# some db collections
|
|
|
@ -55,7 +56,8 @@ CORS(app)
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/api/dev")
|
|
|
|
@app.route("/api/dev")
|
|
|
|
def api_dev():
|
|
|
|
def api_dev():
|
|
|
|
cur = valdb.kres.find({"sid": "F0015940.37.2"})
|
|
|
|
print("DEV")
|
|
|
|
|
|
|
|
cur = valdb.kres.find({"headwords": "nagovarjati"})
|
|
|
|
frames = []
|
|
|
|
frames = []
|
|
|
|
for ent in cur:
|
|
|
|
for ent in cur:
|
|
|
|
frames += frames_from_db_entry(ent)
|
|
|
|
frames += frames_from_db_entry(ent)
|
|
|
@ -236,20 +238,18 @@ def api_token():
|
|
|
|
def prepare_frames(ret_frames):
|
|
|
|
def prepare_frames(ret_frames):
|
|
|
|
# append sentences
|
|
|
|
# append sentences
|
|
|
|
for frame in ret_frames:
|
|
|
|
for frame in ret_frames:
|
|
|
|
frame.sentences = []
|
|
|
|
|
|
|
|
unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids}
|
|
|
|
unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids}
|
|
|
|
log.debug(str(unique_sids))
|
|
|
|
log.debug(str(unique_sids))
|
|
|
|
frame.sentences = []
|
|
|
|
# frame.sentences = []
|
|
|
|
frame.aggr_sent = {}
|
|
|
|
frame.aggr_sent = {}
|
|
|
|
|
|
|
|
# sid, tid==hw
|
|
|
|
for sid, tid in unique_sids.items():
|
|
|
|
for sid, tid in unique_sids.items():
|
|
|
|
hwl = vallex.get_token(tid)["lemma"]
|
|
|
|
# hwl = vallex.get_token(tid)["lemma"]
|
|
|
|
|
|
|
|
hwl = "derp"
|
|
|
|
tmp_idx = len(frame.sentences)
|
|
|
|
tmp_idx = len(frame.sentences)
|
|
|
|
if hwl not in frame.aggr_sent:
|
|
|
|
if hwl not in frame.aggr_sent:
|
|
|
|
frame.aggr_sent[hwl] = []
|
|
|
|
frame.aggr_sent[hwl] = []
|
|
|
|
frame.aggr_sent[hwl].append(tmp_idx)
|
|
|
|
frame.aggr_sent[hwl].append(tmp_idx)
|
|
|
|
frame.sentences.append(
|
|
|
|
|
|
|
|
vallex.get_tokenized_sentence(tid)
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
# return (n-frames, rendered template)
|
|
|
|
# return (n-frames, rendered template)
|
|
|
|
# json frames
|
|
|
|
# json frames
|
|
|
|
json_ret = {"frames": []}
|
|
|
|
json_ret = {"frames": []}
|
|
|
@ -259,7 +259,7 @@ def prepare_frames(ret_frames):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# input: hw, reduct_function
|
|
|
|
# input: hw, reduct_function
|
|
|
|
@app.route("/api/hw-frames")
|
|
|
|
@app.route("/api/frames")
|
|
|
|
def api_get_frames():
|
|
|
|
def api_get_frames():
|
|
|
|
hw = request.args.get("hw")
|
|
|
|
hw = request.args.get("hw")
|
|
|
|
if hw is None:
|
|
|
|
if hw is None:
|
|
|
@ -267,8 +267,21 @@ def api_get_frames():
|
|
|
|
|
|
|
|
|
|
|
|
rf_name = request.args.get("rf", "reduce_0") # 2nd is default
|
|
|
|
rf_name = request.args.get("rf", "reduce_0") # 2nd is default
|
|
|
|
RF = reduce_functions[rf_name]["f"]
|
|
|
|
RF = reduce_functions[rf_name]["f"]
|
|
|
|
entry = vallex.entries[hw] # TODO hw -> [Frame,]
|
|
|
|
|
|
|
|
ret_frames = RF(entry.raw_frames, vallex)
|
|
|
|
# entry = vallex.entries[hw] # TODO hw -> [Frame,]
|
|
|
|
|
|
|
|
cur = valdb.kres.find({"headwords": hw})
|
|
|
|
|
|
|
|
frames = []
|
|
|
|
|
|
|
|
for ent in cur:
|
|
|
|
|
|
|
|
# TODO: maybe filter by hw?
|
|
|
|
|
|
|
|
frames += frames_from_db_entry(ent)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# return json.dumps([x.to_json() for x in frames])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("A")
|
|
|
|
|
|
|
|
print(frames[0].to_json())
|
|
|
|
|
|
|
|
ret_frames = RF(frames, None)
|
|
|
|
|
|
|
|
print("B")
|
|
|
|
|
|
|
|
print(ret_frames[0].to_json())
|
|
|
|
return prepare_frames(ret_frames)
|
|
|
|
return prepare_frames(ret_frames)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -408,18 +421,22 @@ def prepare_db():
|
|
|
|
for corpus in CORPORA:
|
|
|
|
for corpus in CORPORA:
|
|
|
|
for e in valdb[corpus].find({}):
|
|
|
|
for e in valdb[corpus].find({}):
|
|
|
|
if e["srl_links"] is None:
|
|
|
|
if e["srl_links"] is None:
|
|
|
|
continue
|
|
|
|
e["headwords"] = []
|
|
|
|
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
|
|
|
|
e["functors"] = []
|
|
|
|
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
|
|
|
|
else:
|
|
|
|
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
|
|
|
|
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
|
|
|
|
e["headwords"] = headwords
|
|
|
|
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
|
|
|
|
|
|
|
|
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
|
|
|
|
|
|
|
|
e["headwords"] = headwords
|
|
|
|
|
|
|
|
|
|
|
|
functors = list(set([x["afun"] for x in e["srl_links"]]))
|
|
|
|
functors = list(set([x["afun"] for x in e["srl_links"]]))
|
|
|
|
e["headwords"] = headwords
|
|
|
|
e["functors"] = functors
|
|
|
|
e["functors"] = functors
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
valdb[corpus].save(e)
|
|
|
|
valdb[corpus].save(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
|
|
|
|
|
|
|
|
valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
|
|
|
|
|
|
|
|
|
|
|
|
# create app_index (used in frontend, left side word index)
|
|
|
|
# create app_index (used in frontend, left side word index)
|
|
|
|
for corpus in CORPORA:
|
|
|
|
for corpus in CORPORA:
|
|
|
|
res_hws = {}
|
|
|
|
res_hws = {}
|
|
|
|