cjvt-valency/src/backend_flask/app.py

521 lines
14 KiB
Python
Raw Normal View History

2019-03-22 13:50:47 +00:00
# -*- coding: utf-8 -*-
from flask import Flask, render_template, request, url_for, redirect
2019-04-02 19:51:44 +00:00
from valency.Frame import Frame, Slot, frames_from_db_entry
2019-03-28 18:17:45 +00:00
from valency.reduce_functions import reduce_functions
2019-03-22 13:50:47 +00:00
"""
from valency import k_utils
from valency.ssj_struct import *
from valency.val_struct import *
from valency.reduce_functions import *
"""
import logging
import sys
import json
import yaml
from flask_cors import CORS
import hashlib
import uuid
import datetime
import string
import random
import smtplib
from email.mime.text import MIMEText
from copy import deepcopy as DC
from pathlib import Path
2019-03-23 19:27:23 +00:00
from pymongo import MongoClient
2019-04-04 20:42:26 +00:00
import pymongo
2019-03-22 13:50:47 +00:00
import argparse
2019-03-28 18:17:45 +00:00
# some db collections
USERS_COLL = "users"
TOKENS_COLL = "usertokens"
SENSES_COLL = "senses"
SENSEMAP_COLL = "sensemap"
# pre-generated data (gui leftside word index)
CORPORA = ["ssj", "kres"]
2019-04-06 17:38:36 +00:00
app_index = None
2019-03-22 13:50:47 +00:00
log = logging.getLogger(__name__)
2019-04-02 19:51:44 +00:00
valdb = None
2019-03-22 13:50:47 +00:00
app = Flask(__name__)
2019-03-22 13:50:47 +00:00
# when running vuejs via webpack
# CORS(app)
# CORS(app, resources={r"/api/*": {
# "origins": "*",
# }})
CORS(app)
2019-03-22 13:50:47 +00:00
2019-04-02 19:51:44 +00:00
# DEV -------------------------------.
@app.route("/api/dev")
def api_dev():
2019-04-04 20:42:26 +00:00
print("DEV")
cur = valdb.kres.find({"headwords": "nagovarjati"})
2019-04-02 19:51:44 +00:00
frames = []
for ent in cur:
frames += frames_from_db_entry(ent)
return json.dumps([x.to_json() for x in frames])
# DEV -------------------------------^
2019-03-28 18:17:45 +00:00
# INDEX SELECTION -------------------.
2019-03-22 13:50:47 +00:00
@app.route("/api/words/<corpus>")
def api_words(corpus):
2019-03-22 13:50:47 +00:00
return json.dumps({
"sorted_words": app_index[corpus]["words"], # todo - make corpus as arg
2019-03-22 13:50:47 +00:00
})
@app.route("/api/functors/<corpus>")
def api_functors(corpus):
return json.dumps(app_index[corpus]["functors"])
2019-03-22 13:50:47 +00:00
2019-03-28 18:17:45 +00:00
# INDEX SELECTION -------------------^
# AUTH ------------------------------.
2019-03-22 13:50:47 +00:00
@app.route("/api/register", methods=["POST"])
def api_register():
b = request.get_data()
data = json.loads(b.decode())
username = data["username"]
password = data["password"]
email = data["email"]
if (
username == "" or
password == "" or
email == ""
):
return "ERR"
2019-04-07 20:11:57 +00:00
email_hash = hashlib.sha256(email.encode("utf-8")).hexdigest()
2019-03-28 18:17:45 +00:00
existing = list(valdb[USERS_COLL].find({
2019-04-07 20:11:57 +00:00
"$or": [{"username": username}, {"email": email_hash}]
2019-03-22 13:50:47 +00:00
}))
if len(existing) > 0:
return "ERR: Username or email already exists."
entry = {
"username": username,
"hpass": hashlib.sha256(
password.encode("utf-8")).hexdigest(),
2019-04-07 20:11:57 +00:00
"email": email_hash
2019-03-22 13:50:47 +00:00
}
2019-03-28 18:17:45 +00:00
valdb[USERS_COLL].insert(entry)
2019-03-22 13:50:47 +00:00
return "OK"
@app.route("/api/login", methods=["POST"])
def api_login():
b = request.get_data()
data = json.loads(b.decode())
username = data["username"]
password = data["password"]
hpass = hashlib.sha256(password.encode("utf-8")).hexdigest()
2019-03-28 18:17:45 +00:00
db_user = list(valdb[USERS_COLL].find({
2019-03-22 13:50:47 +00:00
"username": username,
"hpass": hpass
}))
if len(db_user) == 0:
return json.dumps({"token": None})
# update or create token
token = uuid.uuid4().hex
token_entry = {
"username": username,
"date": datetime.datetime.utcnow(),
"token": token
}
2019-03-28 18:17:45 +00:00
valdb[TOKENS_COLL].update(
2019-03-22 13:50:47 +00:00
{"username": token_entry["username"]},
token_entry,
upsert=True
)
return json.dumps({"token": token})
def send_new_pass_mail(recipient, new_pass):
# dtime = str(datetime.datetime.now())
SENDER = "valencaglagolov@gmail.com"
msg = MIMEText(
"Pošiljamo vam novo geslo za "
"vstop v aplikacijo Vezljivostni vzorci slovenskih glagolov.\n"
"Geslo: {}.".format(new_pass)
)
msg["Subject"] = "Pozabljeno geslo"
msg["From"] = SENDER
msg["To"] = recipient
try:
server = smtplib.SMTP("smtp.gmail.com", 587)
server.ehlo()
server.starttls()
server.login(
SENDER,
"rapid limb soapy fermi"
)
server.sendmail(SENDER, [recipient], msg.as_string())
server.close()
log.info("Sent new password.")
except Error as e:
log.error("Sending new password failed")
log.error(e)
@app.route("/api/new_pass", methods=["POST"])
def api_new_pass():
b = request.get_data()
data = json.loads(b.decode())
username = data["username"]
email = data["email"]
hemail = hashlib.sha256(email.encode("utf-8")).hexdigest()
2019-03-28 18:17:45 +00:00
db_res = list(valdb[USERS_COLL].find({
2019-03-22 13:50:47 +00:00
"username": username,
"email": hemail
}))
# check if user is valid
if len(db_res) == 0:
return json.dumps({"confirmation": False})
# create a new password
new_pass = "".join([random.choice(
string.ascii_letters + string.digits) for i in range(10)])
# update locally
hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest()
2019-03-28 18:17:45 +00:00
valdb[USERS_COLL].update(
2019-03-22 13:50:47 +00:00
{
"username": username,
"email": hemail
},
{"$set": {
"hpass": hpass
}}
)
# send via mail
send_new_pass_mail(email, new_pass)
return json.dumps({"confirmation": True})
2019-03-28 18:17:45 +00:00
def token_to_username(token):
key = {
"token": token
}
res = list(valdb[TOKENS_COLL].find(key))
if len(res) != 1:
return None
username = res[0]["username"]
# update deletion interval
valdb[TOKENS_COLL].update(
key, {"$set": {"date": datetime.datetime.utcnow()}})
return username
@app.route("/api/token", methods=["POST"])
def api_token():
# check if token is valid
b = request.get_data()
data = json.loads(b.decode())
token = data.get("token")
# user = data.get("user")
user = token_to_username(token)
confirm = (user is not None)
return json.dumps({
"confirmation": confirm,
"username": user
})
# AUTH ------------------------------^
# FRAMES ----------------------------.
# input: hw, reduct_function
2019-04-04 20:42:26 +00:00
@app.route("/api/frames")
2019-03-22 13:50:47 +00:00
def api_get_frames():
hw = request.args.get("hw")
if hw is None:
2019-03-28 18:17:45 +00:00
return json.dumps({"error": "Required argument: hw (headword)."})
2019-03-22 13:50:47 +00:00
2019-04-07 20:11:57 +00:00
rf_name = request.args.get("rf", "reduce_0")
2019-03-22 13:50:47 +00:00
RF = reduce_functions[rf_name]["f"]
2019-04-04 20:42:26 +00:00
2019-04-06 17:38:36 +00:00
corpus = request.args.get("cor")
if corpus not in CORPORA:
return json.dumps({"error": "cor={kres,ssj}"})
cur = valdb[corpus].find({"headwords": hw})
2019-04-04 20:42:26 +00:00
frames = []
for ent in cur:
2019-04-07 19:43:42 +00:00
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
2019-04-04 20:42:26 +00:00
2019-04-07 20:11:57 +00:00
# filter by relevant hw
2019-04-07 19:43:42 +00:00
frames = [x for x in frames if x.hw == hw]
2019-04-04 20:42:26 +00:00
2019-04-07 19:43:42 +00:00
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
2019-03-22 13:50:47 +00:00
2019-04-13 22:26:15 +00:00
json_ret = {"frames": []}
for frame in ret_frames:
json_ret["frames"].append(frame.to_json())
return json.dumps(json_ret)
# return prepare_frames(ret_frames)
def _aggregate_by_hw(ret_frames):
def _tid_to_lemma(tid, sentence):
# slow and hackish
for pair in sentence:
if pair[0] == tid:
return pair[1]["lemma"]
return None
# append sentences
for frame in ret_frames:
# unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids}
frame.aggr_sent = {} # map of headword: [sentence indexes]
# sid, tid==hw
for i, tid in enumerate(frame.tids):
# hwl = vallex.get_token(tid)["lemma"]
hwl = _tid_to_lemma(tid, frame.sentences[i])
if hwl not in frame.aggr_sent:
frame.aggr_sent[hwl] = []
frame.aggr_sent[hwl].append(i)
return ret_frames
2019-03-22 13:50:47 +00:00
2019-03-28 18:17:45 +00:00
# input: functor, reduce_function
2019-03-22 13:50:47 +00:00
@app.route("/api/functor-frames")
def api_get_functor_frames():
functor = request.args.get("functor")
if functor is None:
return json.dumps({"error": "Missing argument: functor."})
2019-04-07 20:11:57 +00:00
rf_name = request.args.get("rf", "reduce_0")
2019-03-22 13:50:47 +00:00
RF = reduce_functions[rf_name]["f"]
2019-04-07 20:11:57 +00:00
corpus = request.args.get("cor")
if corpus not in CORPORA:
return json.dumps({"error": "cor={kres,ssj}"})
cur = valdb[corpus].find({"functors": functor})
frames = []
for ent in cur:
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
2019-04-13 22:26:15 +00:00
# filter by relevant functor
frames = [x for x in frames if functor in x.get_functors()]
2019-04-07 20:11:57 +00:00
# raw_frames = vallex.functors_index[functor] # TODO
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
2019-04-13 22:26:15 +00:00
ret_frames = _aggregate_by_hw(ret_frames)
json_ret = {"frames": []}
for frame in ret_frames:
json_ret["frames"].append(DC(frame.to_json()))
return json.dumps(json_ret)
2019-03-22 13:50:47 +00:00
2019-03-28 18:17:45 +00:00
# FRAMES ----------------------------^
2019-03-22 13:50:47 +00:00
2019-03-28 18:17:45 +00:00
# SENSES ----------------------------.
2019-03-22 13:50:47 +00:00
@app.route("/api/senses/get")
def api_senses_get():
# returns senses and mapping for hw
hw = request.args.get("hw")
2019-03-28 18:17:45 +00:00
senses = list(valdb[SENSES_COLL].find({
2019-03-22 13:50:47 +00:00
"hw": hw
}))
2019-03-28 18:17:45 +00:00
sense_map_query = list(valdb[SENSEMAP_COLL].find({
2019-03-22 13:50:47 +00:00
"hw": hw
}))
# aggregation by max date possible on DB side
# but no simple way of returning full entries
# aggregate hw and ssj_id by max date
sense_map_aggr = {}
for sm in sense_map_query:
key = sm["hw"] + sm["ssj_id"]
if key in sense_map_aggr:
sense_map_aggr[key] = max(
[sm, sense_map_aggr[key]], key=lambda x: x["date"])
else:
sense_map_aggr[key] = sm
sense_map_list = [x[1] for x in sense_map_aggr.items()]
sense_map = {}
for el in sense_map_list:
sense_map[el["ssj_id"]] = el
for k, e in sense_map.items():
del(e["_id"])
del(e["date"])
for e in senses:
del(e["_id"])
if "date" in e:
del(e["date"])
# sort senses: user defined first, sskj second
# sskj senses sorted by sskj sense_id
user_senses = [s for s in senses if s["author"] != "SSKJ"]
sskj_senses = [s for s in senses if s["author"] == "SSKJ"]
def sorting_helper(sense):
arr = sense["sense_id"].split("-")
return "{:03d}-{:03d}-{:03d}".format(
int(arr[1]), int(arr[2]), int(arr[3]))
sskj_senses = sorted(sskj_senses, key=sorting_helper)
senses = user_senses + sskj_senses
return json.dumps({
"senses": senses,
"sense_map": sense_map,
})
@app.route("/api/senses/update", methods=["POST"])
def api_senses_update():
b = request.get_data()
data = json.loads(b.decode())
token = data.get("token")
hw = data.get("hw")
sense_map = data.get("sense_map")
new_senses = data.get("new_senses")
username = token_to_username(token)
if username is None:
log.debug("Not a user.")
return "Not a user."
# store new senses,
# create new sense_ids
id_map = {}
for ns in new_senses:
tmp_dt = datetime.datetime.utcnow()
new_sense_id = "{}-{}".format(
username,
hashlib.sha256("{}{}{}".format(
username,
ns["desc"],
str(tmp_dt)
).encode("utf-8")).hexdigest()[:10]
)
frontend_sense_id = ns["sense_id"]
ns["sense_id"] = new_sense_id
ns["date"] = tmp_dt
id_map[frontend_sense_id] = new_sense_id
# insert into db
2019-03-28 18:17:45 +00:00
valdb[SENSES_COLL].insert(ns)
2019-03-22 13:50:47 +00:00
# replace tmp_id with mongo's _id
for ssj_id, el in sense_map.items():
sense_id = el["sense_id"]
if sense_id in id_map.keys():
sense_id = id_map[sense_id]
data = {
"user": username,
"hw": hw,
"ssj_id": ssj_id,
"sense_id": sense_id,
"date": datetime.datetime.utcnow()
}
# vallex.db["v2_sense_map"].update(key, data, upsert=True)
2019-03-28 18:17:45 +00:00
valdb[SENSEMAP_COLL].insert(data)
2019-03-22 13:50:47 +00:00
return "OK"
2019-03-28 18:17:45 +00:00
# SENSES ----------------------------^
# APP PREFLIGHT ---------------------.
def prepare_app_index():
log.info("[*] preparing app_index")
# create app_index (used in frontend, left side word index)
2019-04-06 17:38:36 +00:00
tmp_app_index = {c: {} for c in CORPORA}
for corpus in CORPORA:
res_hws = {}
res_fns = {}
for e in valdb[corpus].find({}):
2019-03-24 18:55:32 +00:00
if "headwords" not in e:
continue
for hw in e["headwords"]:
if hw in res_hws:
res_hws[hw] += 1
else:
res_hws[hw] = 1
2019-03-24 18:55:32 +00:00
if "functors" not in e:
continue
for fn in e["functors"]:
if fn in res_fns:
res_fns[fn] += 1
else:
res_fns[fn] = 1
alphabetical = {}
for k, e in res_hws.items():
fst = k[0].lower()
if fst in alphabetical:
alphabetical[fst].append((k, e))
else:
alphabetical[fst] = [(k, e)]
for k, e in alphabetical.items():
alphabetical[k] = sorted(e, key=lambda x: x[0])
2019-04-06 17:38:36 +00:00
tmp_app_index[corpus]["words"] = alphabetical
functors = [(k, e) for (k, e) in res_fns.items()]
functors = sorted(functors, key=lambda x: x[0])
2019-04-06 17:38:36 +00:00
tmp_app_index[corpus]["functors"] = functors
valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True)
2019-03-28 18:17:45 +00:00
# APP PREFLIGHT ---------------------^
2019-03-22 13:50:47 +00:00
if __name__ == "__main__":
2019-03-23 19:27:23 +00:00
print("Starting app.py main()")
2019-03-22 13:50:47 +00:00
aparser = argparse.ArgumentParser(description="Arguments for app.py")
aparser.add_argument("--config-file", type=str, help="check ./conf_files/")
aparser.add_argument('--prepare-db', action="store_true", default=False)
2019-03-23 19:27:23 +00:00
aparser.add_argument("--dbuser", type=str)
aparser.add_argument("--dbpass", type=str)
aparser.add_argument("--dbaddr", type=str)
2019-03-22 13:50:47 +00:00
args = aparser.parse_args()
config = None
with Path(args.config_file).open("r") as fp:
config = list(yaml.safe_load_all(fp))[0]
app.debug = bool(config["debug"])
logfile = config["logfile"]
if app.debug:
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
else:
logging.basicConfig(filename=logfile, level=logging.INFO)
2019-03-23 19:27:23 +00:00
# db login
client = MongoClient(
"mongodb://{}".format(args.dbaddr),
username=args.dbuser,
password=args.dbpass,
authSource="valdb",
2019-04-22 18:32:06 +00:00
authMechanism='SCRAM-SHA-1'
2019-03-23 19:27:23 +00:00
)
valdb = client.valdb
if args.prepare_db:
prepare_app_index()
sys.exit()
2019-04-06 17:38:36 +00:00
# app index from db
app_index = (valdb.appindex.find_one({"dockey": "appindex"}))["data"]
2019-03-22 13:50:47 +00:00
# log.info("[*] Starting app.py with config:\n%s".format(config))
2019-03-23 19:27:23 +00:00
log.info("[*] Starting app.py with config:\n{}".format(config))
2019-03-22 13:50:47 +00:00
app.run(host=str(config["host"]), port=int(config["port"]))
2019-04-22 18:32:06 +00:00