You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
cjvt-valency/src/backend_flask/app.py

473 lines
13 KiB

# -*- coding: utf-8 -*-
from flask import Flask, render_template, request, url_for, redirect
"""
from valency import k_utils
from valency.ssj_struct import *
from valency.val_struct import *
from valency.reduce_functions import *
"""
import logging
import sys
import json
import yaml
from flask_cors import CORS
import hashlib
import uuid
import datetime
import string
import random
import smtplib
from email.mime.text import MIMEText
from copy import deepcopy as DC
from pathlib import Path
from pymongo import MongoClient
import argparse
CORPORA = ["ssj", "kres"]
log = logging.getLogger(__name__)
app = Flask(__name__)
app_index = {c: {} for c in CORPORA}
# when running vuejs via webpack
# CORS(app)
CORS(app, resources={r"/api/*": {
"origins": "*",
}})
# for testing functions
@app.route("/test_dev")
def test_dev():
ret = vallex.test_dev()
return(str(ret) or "edit val_struct.py: test_dev()")
@app.route("/")
def index():
return(render_template("index.html"))
@app.route("/home", defaults={"pathname": ""})
@app.route("/home/<path:pathname>")
def home(pathname):
return redirect(url_for("index"), code=302)
# @app.route("/api/words/<corpus>")
# def api_words(corpus):
@app.route("/api/words")
def api_words():
return json.dumps({
"sorted_words": app_index["ssj"]["words"], # todo - make corpus as arg
})
@app.route("/api/functors")
def api_functors():
# return array ([functor, len])
return json.dumps(app_index["ssj"]["functors"])
@app.route("/api/register", methods=["POST"])
def api_register():
USERS_COLL = "v2_users"
b = request.get_data()
data = json.loads(b.decode())
username = data["username"]
password = data["password"]
email = data["email"]
if (
username == "" or
password == "" or
email == ""
):
return "ERR"
existing = list(vallex.db[USERS_COLL].find({
"$or": [{"username": username}, {"email": email}]
}))
if len(existing) > 0:
return "ERR: Username or email already exists."
entry = {
"username": username,
"hpass": hashlib.sha256(
password.encode("utf-8")).hexdigest(),
"email": hashlib.sha256(
email.encode("utf-8")).hexdigest()
}
vallex.db[USERS_COLL].insert(entry)
return "OK"
@app.route("/api/login", methods=["POST"])
def api_login():
USERS_COLL = "v2_users"
TOKENS_COLL = "v2_user_tokens"
b = request.get_data()
data = json.loads(b.decode())
username = data["username"]
password = data["password"]
hpass = hashlib.sha256(password.encode("utf-8")).hexdigest()
db_user = list(vallex.db[USERS_COLL].find({
"username": username,
"hpass": hpass
}))
if len(db_user) == 0:
return json.dumps({"token": None})
# update or create token
token = uuid.uuid4().hex
token_entry = {
"username": username,
"date": datetime.datetime.utcnow(),
"token": token
}
vallex.db[TOKENS_COLL].update(
{"username": token_entry["username"]},
token_entry,
upsert=True
)
return json.dumps({"token": token})
def send_new_pass_mail(recipient, new_pass):
# dtime = str(datetime.datetime.now())
SENDER = "valencaglagolov@gmail.com"
msg = MIMEText(
"Pošiljamo vam novo geslo za "
"vstop v aplikacijo Vezljivostni vzorci slovenskih glagolov.\n"
"Geslo: {}.".format(new_pass)
)
msg["Subject"] = "Pozabljeno geslo"
msg["From"] = SENDER
msg["To"] = recipient
try:
server = smtplib.SMTP("smtp.gmail.com", 587)
server.ehlo()
server.starttls()
server.login(
SENDER,
"rapid limb soapy fermi"
)
server.sendmail(SENDER, [recipient], msg.as_string())
server.close()
log.info("Sent new password.")
except Error as e:
log.error("Sending new password failed")
log.error(e)
@app.route("/api/new_pass", methods=["POST"])
def api_new_pass():
b = request.get_data()
data = json.loads(b.decode())
username = data["username"]
email = data["email"]
hemail = hashlib.sha256(email.encode("utf-8")).hexdigest()
db_res = list(vallex.db.v2_users.find({
"username": username,
"email": hemail
}))
# check if user is valid
if len(db_res) == 0:
return json.dumps({"confirmation": False})
# create a new password
new_pass = "".join([random.choice(
string.ascii_letters + string.digits) for i in range(10)])
# update locally
hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest()
vallex.db.v2_users.update(
{
"username": username,
"email": hemail
},
{"$set": {
"hpass": hpass
}}
)
# send via mail
send_new_pass_mail(email, new_pass)
return json.dumps({"confirmation": True})
def prepare_frames(ret_frames):
# append sentences
for frame in ret_frames:
frame.sentences = []
unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids}
log.debug(str(unique_sids))
frame.sentences = []
frame.aggr_sent = {}
for sid, tid in unique_sids.items():
hwl = vallex.get_token(tid)["lemma"]
tmp_idx = len(frame.sentences)
if hwl not in frame.aggr_sent:
frame.aggr_sent[hwl] = []
frame.aggr_sent[hwl].append(tmp_idx)
frame.sentences.append(
vallex.get_tokenized_sentence(tid)
)
# return (n-frames, rendered template)
# json frames
json_ret = {"frames": []}
for frame in ret_frames:
json_ret["frames"].append(DC(frame.to_json()))
return json.dumps(json_ret)
@app.route("/api/frames")
def api_get_frames():
hw = request.args.get("hw")
if hw is None:
return json.dumps({"error": "Headword not found."})
rf_name = request.args.get("rf", "reduce_0") # 2nd is default
RF = reduce_functions[rf_name]["f"]
entry = vallex.entries[hw]
ret_frames = RF(entry.raw_frames, vallex)
return prepare_frames(ret_frames)
@app.route("/api/functor-frames")
def api_get_functor_frames():
functor = request.args.get("functor")
if functor is None:
return json.dumps({"error": "Missing argument: functor."})
rf_name = request.args.get("rf", "reduce_0") # 2nd is default
RF = reduce_functions[rf_name]["f"]
raw_frames = vallex.functors_index[functor]
ret_frames = RF(raw_frames, vallex)
return prepare_frames(ret_frames)
def token_to_username(token):
COLLNAME = "v2_user_tokens"
key = {
"token": token
}
res = list(vallex.db[COLLNAME].find(key))
if len(res) != 1:
return None
username = res[0]["username"]
# update deletion interval
vallex.db[COLLNAME].update(
key, {"$set": {"date": datetime.datetime.utcnow()}})
return username
@app.route("/api/token", methods=["POST"])
def api_token():
# check if token is valid
b = request.get_data()
data = json.loads(b.decode())
token = data.get("token")
# user = data.get("user")
user = token_to_username(token)
confirm = (user is not None)
return json.dumps({
"confirmation": confirm,
"username": user
})
@app.route("/api/senses/get")
def api_senses_get():
# returns senses and mapping for hw
hw = request.args.get("hw")
senses = list(vallex.db["v2_senses"].find({
"hw": hw
}))
sense_map_query = list(vallex.db["v2_sense_map"].find({
"hw": hw
}))
# aggregation by max date possible on DB side
# but no simple way of returning full entries
# aggregate hw and ssj_id by max date
sense_map_aggr = {}
for sm in sense_map_query:
key = sm["hw"] + sm["ssj_id"]
if key in sense_map_aggr:
sense_map_aggr[key] = max(
[sm, sense_map_aggr[key]], key=lambda x: x["date"])
else:
sense_map_aggr[key] = sm
sense_map_list = [x[1] for x in sense_map_aggr.items()]
sense_map = {}
for el in sense_map_list:
sense_map[el["ssj_id"]] = el
for k, e in sense_map.items():
del(e["_id"])
del(e["date"])
for e in senses:
del(e["_id"])
if "date" in e:
del(e["date"])
# sort senses: user defined first, sskj second
# sskj senses sorted by sskj sense_id
user_senses = [s for s in senses if s["author"] != "SSKJ"]
sskj_senses = [s for s in senses if s["author"] == "SSKJ"]
def sorting_helper(sense):
arr = sense["sense_id"].split("-")
return "{:03d}-{:03d}-{:03d}".format(
int(arr[1]), int(arr[2]), int(arr[3]))
sskj_senses = sorted(sskj_senses, key=sorting_helper)
senses = user_senses + sskj_senses
return json.dumps({
"senses": senses,
"sense_map": sense_map,
})
@app.route("/api/senses/update", methods=["POST"])
def api_senses_update():
b = request.get_data()
data = json.loads(b.decode())
token = data.get("token")
hw = data.get("hw")
sense_map = data.get("sense_map")
new_senses = data.get("new_senses")
username = token_to_username(token)
if username is None:
log.debug("Not a user.")
return "Not a user."
# store new senses,
# create new sense_ids
id_map = {}
for ns in new_senses:
tmp_dt = datetime.datetime.utcnow()
new_sense_id = "{}-{}".format(
username,
hashlib.sha256("{}{}{}".format(
username,
ns["desc"],
str(tmp_dt)
).encode("utf-8")).hexdigest()[:10]
)
frontend_sense_id = ns["sense_id"]
ns["sense_id"] = new_sense_id
ns["date"] = tmp_dt
id_map[frontend_sense_id] = new_sense_id
# insert into db
vallex.db["v2_senses"].insert(ns)
# replace tmp_id with mongo's _id
for ssj_id, el in sense_map.items():
sense_id = el["sense_id"]
if sense_id in id_map.keys():
sense_id = id_map[sense_id]
data = {
"user": username,
"hw": hw,
"ssj_id": ssj_id,
"sense_id": sense_id,
"date": datetime.datetime.utcnow()
}
# vallex.db["v2_sense_map"].update(key, data, upsert=True)
vallex.db["v2_sense_map"].insert(data)
return "OK"
def prepare_db():
def helper_tid_to_token(tid, tokens):
for t in tokens:
if t["tid"] == tid:
return t
return None
# update entries (add headwords and fuctors for indexing)
for corpus in ["ssj", "kres"]:
for e in valdb[corpus].find({}):
#! hw_tids are not array ids
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
e["headwords"] = headwords
functors = list(set([x["afun"] for x in e["srl_links"]]))
e["headwords"] = headwords
e["functors"] = functors
valdb[corpus].save(e)
# create app_index (used in frontend, left side word index)
for corpus in CORPORA:
res_hws = {}
res_fns = {}
for e in valdb[corpus].find({}):
for hw in e["headwords"]:
if hw in res_hws:
res_hws[hw] += 1
else:
res_hws[hw] = 1
for fn in e["functors"]:
if fn in res_fns:
res_fns[fn] += 1
else:
res_fns[fn] = 1
alphabetical = {}
for k, e in res_hws.items():
fst = k[0].lower()
if fst in alphabetical:
alphabetical[fst].append((k, e))
else:
alphabetical[fst] = [(k, e)]
for k, e in alphabetical.items():
alphabetical[k] = sorted(e, key=lambda x: x[0])
app_index[corpus]["words"] = alphabetical
functors = [(k, e) for (k, e) in res_fns.items()]
functors = sorted(functors, key=lambda x: x[0])
app_index[corpus]["functors"] = functors
if __name__ == "__main__":
print("Starting app.py main()")
aparser = argparse.ArgumentParser(description="Arguments for app.py")
aparser.add_argument("--config-file", type=str, help="check ./conf_files/")
aparser.add_argument("--dbuser", type=str)
aparser.add_argument("--dbpass", type=str)
aparser.add_argument("--dbaddr", type=str)
args = aparser.parse_args()
config = None
with Path(args.config_file).open("r") as fp:
config = list(yaml.safe_load_all(fp))[0]
app.debug = bool(config["debug"])
logfile = config["logfile"]
if app.debug:
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
else:
logging.basicConfig(filename=logfile, level=logging.INFO)
# db login
client = MongoClient(
"mongodb://{}".format(args.dbaddr),
username=args.dbuser,
password=args.dbpass,
authSource="valdb",
authMechanism='SCRAM-SHA-256'
)
valdb = client.valdb
if config["prepare_db"]:
prepare_db()
# log.info("[*] Starting app.py with config:\n%s".format(config))
log.info("[*] Starting app.py with config:\n{}".format(config))
app.run(host=str(config["host"]), port=int(config["port"]))