# -*- coding: utf-8 -*- from flask import Flask, render_template, request, url_for, redirect from valency.Frame import Frame, Slot, frames_from_db_entry from valency.reduce_functions import reduce_functions """ from valency import k_utils from valency.ssj_struct import * from valency.val_struct import * from valency.reduce_functions import * """ import logging import sys import json import yaml from flask_cors import CORS import hashlib import uuid import datetime import string import random import smtplib from email.mime.text import MIMEText from copy import deepcopy as DC from pathlib import Path from pymongo import MongoClient import pymongo import argparse # some db collections USERS_COLL = "users" TOKENS_COLL = "usertokens" SENSES_COLL = "senses" SENSEMAP_COLL = "sensemap" # pre-generated data (gui leftside word index) CORPORA = ["ssj", "kres"] app_index = None log = logging.getLogger(__name__) valdb = None app = Flask(__name__) # when running vuejs via webpack # CORS(app) # CORS(app, resources={r"/api/*": { # "origins": "*", # }}) CORS(app) # DEV -------------------------------. @app.route("/api/dev") def api_dev(): print("DEV") cur = valdb.kres.find({"headwords": "nagovarjati"}) frames = [] for ent in cur: frames += frames_from_db_entry(ent) return json.dumps([x.to_json() for x in frames]) # DEV -------------------------------^ # INDEX SELECTION -------------------. @app.route("/api/words/") def api_words(corpus): return json.dumps({ "sorted_words": app_index[corpus]["words"], # todo - make corpus as arg }) @app.route("/api/functors/") def api_functors(corpus): return json.dumps(app_index[corpus]["functors"]) # INDEX SELECTION -------------------^ # AUTH ------------------------------. @app.route("/api/register", methods=["POST"]) def api_register(): b = request.get_data() data = json.loads(b.decode()) username = data["username"] password = data["password"] email = data["email"] if ( username == "" or password == "" or email == "" ): return "ERR" existing = list(valdb[USERS_COLL].find({ "$or": [{"username": username}, {"email": email}] })) if len(existing) > 0: return "ERR: Username or email already exists." entry = { "username": username, "hpass": hashlib.sha256( password.encode("utf-8")).hexdigest(), "email": hashlib.sha256( email.encode("utf-8")).hexdigest() } valdb[USERS_COLL].insert(entry) return "OK" @app.route("/api/login", methods=["POST"]) def api_login(): b = request.get_data() data = json.loads(b.decode()) username = data["username"] password = data["password"] hpass = hashlib.sha256(password.encode("utf-8")).hexdigest() db_user = list(valdb[USERS_COLL].find({ "username": username, "hpass": hpass })) if len(db_user) == 0: return json.dumps({"token": None}) # update or create token token = uuid.uuid4().hex token_entry = { "username": username, "date": datetime.datetime.utcnow(), "token": token } valdb[TOKENS_COLL].update( {"username": token_entry["username"]}, token_entry, upsert=True ) return json.dumps({"token": token}) def send_new_pass_mail(recipient, new_pass): # dtime = str(datetime.datetime.now()) SENDER = "valencaglagolov@gmail.com" msg = MIMEText( "PoĊĦiljamo vam novo geslo za " "vstop v aplikacijo Vezljivostni vzorci slovenskih glagolov.\n" "Geslo: {}.".format(new_pass) ) msg["Subject"] = "Pozabljeno geslo" msg["From"] = SENDER msg["To"] = recipient try: server = smtplib.SMTP("smtp.gmail.com", 587) server.ehlo() server.starttls() server.login( SENDER, "rapid limb soapy fermi" ) server.sendmail(SENDER, [recipient], msg.as_string()) server.close() log.info("Sent new password.") except Error as e: log.error("Sending new password failed") log.error(e) @app.route("/api/new_pass", methods=["POST"]) def api_new_pass(): b = request.get_data() data = json.loads(b.decode()) username = data["username"] email = data["email"] hemail = hashlib.sha256(email.encode("utf-8")).hexdigest() db_res = list(valdb[USERS_COLL].find({ "username": username, "email": hemail })) # check if user is valid if len(db_res) == 0: return json.dumps({"confirmation": False}) # create a new password new_pass = "".join([random.choice( string.ascii_letters + string.digits) for i in range(10)]) # update locally hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest() valdb[USERS_COLL].update( { "username": username, "email": hemail }, {"$set": { "hpass": hpass }} ) # send via mail send_new_pass_mail(email, new_pass) return json.dumps({"confirmation": True}) def token_to_username(token): key = { "token": token } res = list(valdb[TOKENS_COLL].find(key)) if len(res) != 1: return None username = res[0]["username"] # update deletion interval valdb[TOKENS_COLL].update( key, {"$set": {"date": datetime.datetime.utcnow()}}) return username @app.route("/api/token", methods=["POST"]) def api_token(): # check if token is valid b = request.get_data() data = json.loads(b.decode()) token = data.get("token") # user = data.get("user") user = token_to_username(token) confirm = (user is not None) return json.dumps({ "confirmation": confirm, "username": user }) # AUTH ------------------------------^ # FRAMES ----------------------------. def prepare_frames(ret_frames): # append sentences for frame in ret_frames: unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids} # frame.sentences = [] frame.aggr_sent = {} # sid, tid==hw for sid, tid in unique_sids.items(): # hwl = vallex.get_token(tid)["lemma"] hwl = "derp" tmp_idx = len(frame.sentences) if hwl not in frame.aggr_sent: frame.aggr_sent[hwl] = [] frame.aggr_sent[hwl].append(tmp_idx) # return (n-frames, rendered template) # json frames json_ret = {"frames": []} for frame in ret_frames: json_ret["frames"].append(DC(frame.to_json())) return json.dumps(json_ret) # input: hw, reduct_function @app.route("/api/frames") def api_get_frames(): hw = request.args.get("hw") if hw is None: return json.dumps({"error": "Required argument: hw (headword)."}) rf_name = request.args.get("rf", "reduce_0") # 2nd is default RF = reduce_functions[rf_name]["f"] corpus = request.args.get("cor") if corpus not in CORPORA: return json.dumps({"error": "cor={kres,ssj}"}) # entry = vallex.entries[hw] # TODO hw -> [Frame,] cur = valdb[corpus].find({"headwords": hw}) print("N_results: " + str(cur.count())) frames = [] for ent in cur: # TODO: maybe filter by hw? frames += frames_from_db_entry(ent) # return json.dumps([x.to_json() for x in frames]) ret_frames = RF(frames, None) return prepare_frames(ret_frames) # input: functor, reduce_function @app.route("/api/functor-frames") def api_get_functor_frames(): functor = request.args.get("functor") if functor is None: return json.dumps({"error": "Missing argument: functor."}) rf_name = request.args.get("rf", "reduce_0") # 2nd is default RF = reduce_functions[rf_name]["f"] raw_frames = vallex.functors_index[functor] # TODO ret_frames = RF(raw_frames, vallex) return prepare_frames(ret_frames) # FRAMES ----------------------------^ # SENSES ----------------------------. @app.route("/api/senses/get") def api_senses_get(): # returns senses and mapping for hw hw = request.args.get("hw") senses = list(valdb[SENSES_COLL].find({ "hw": hw })) sense_map_query = list(valdb[SENSEMAP_COLL].find({ "hw": hw })) # aggregation by max date possible on DB side # but no simple way of returning full entries # aggregate hw and ssj_id by max date sense_map_aggr = {} for sm in sense_map_query: key = sm["hw"] + sm["ssj_id"] if key in sense_map_aggr: sense_map_aggr[key] = max( [sm, sense_map_aggr[key]], key=lambda x: x["date"]) else: sense_map_aggr[key] = sm sense_map_list = [x[1] for x in sense_map_aggr.items()] sense_map = {} for el in sense_map_list: sense_map[el["ssj_id"]] = el for k, e in sense_map.items(): del(e["_id"]) del(e["date"]) for e in senses: del(e["_id"]) if "date" in e: del(e["date"]) # sort senses: user defined first, sskj second # sskj senses sorted by sskj sense_id user_senses = [s for s in senses if s["author"] != "SSKJ"] sskj_senses = [s for s in senses if s["author"] == "SSKJ"] def sorting_helper(sense): arr = sense["sense_id"].split("-") return "{:03d}-{:03d}-{:03d}".format( int(arr[1]), int(arr[2]), int(arr[3])) sskj_senses = sorted(sskj_senses, key=sorting_helper) senses = user_senses + sskj_senses return json.dumps({ "senses": senses, "sense_map": sense_map, }) @app.route("/api/senses/update", methods=["POST"]) def api_senses_update(): b = request.get_data() data = json.loads(b.decode()) token = data.get("token") hw = data.get("hw") sense_map = data.get("sense_map") new_senses = data.get("new_senses") username = token_to_username(token) if username is None: log.debug("Not a user.") return "Not a user." # store new senses, # create new sense_ids id_map = {} for ns in new_senses: tmp_dt = datetime.datetime.utcnow() new_sense_id = "{}-{}".format( username, hashlib.sha256("{}{}{}".format( username, ns["desc"], str(tmp_dt) ).encode("utf-8")).hexdigest()[:10] ) frontend_sense_id = ns["sense_id"] ns["sense_id"] = new_sense_id ns["date"] = tmp_dt id_map[frontend_sense_id] = new_sense_id # insert into db valdb[SENSES_COLL].insert(ns) # replace tmp_id with mongo's _id for ssj_id, el in sense_map.items(): sense_id = el["sense_id"] if sense_id in id_map.keys(): sense_id = id_map[sense_id] data = { "user": username, "hw": hw, "ssj_id": ssj_id, "sense_id": sense_id, "date": datetime.datetime.utcnow() } # vallex.db["v2_sense_map"].update(key, data, upsert=True) valdb[SENSEMAP_COLL].insert(data) return "OK" # SENSES ----------------------------^ # APP PREFLIGHT ---------------------. def prepare_db(): def helper_tid_to_token(tid, tokens): for t in tokens: if t["tid"] == tid: return t return None # update entries (add headwords and fuctors for indexing) for corpus in CORPORA: for e in valdb[corpus].find({}): if e["srl_links"] is None: e["headwords"] = [] e["functors"] = [] else: hw_tids = list(set([x["from"] for x in e["srl_links"]])) hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids] headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens] e["headwords"] = headwords functors = list(set([x["afun"] for x in e["srl_links"]])) e["functors"] = functors valdb[corpus].save(e) valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)]) valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)]) # create app_index (used in frontend, left side word index) tmp_app_index = {c: {} for c in CORPORA} for corpus in CORPORA: res_hws = {} res_fns = {} for e in valdb[corpus].find({}): if "headwords" not in e: continue for hw in e["headwords"]: if hw in res_hws: res_hws[hw] += 1 else: res_hws[hw] = 1 if "functors" not in e: continue for fn in e["functors"]: if fn in res_fns: res_fns[fn] += 1 else: res_fns[fn] = 1 alphabetical = {} for k, e in res_hws.items(): fst = k[0].lower() if fst in alphabetical: alphabetical[fst].append((k, e)) else: alphabetical[fst] = [(k, e)] for k, e in alphabetical.items(): alphabetical[k] = sorted(e, key=lambda x: x[0]) tmp_app_index[corpus]["words"] = alphabetical functors = [(k, e) for (k, e) in res_fns.items()] functors = sorted(functors, key=lambda x: x[0]) tmp_app_index[corpus]["functors"] = functors valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True) # APP PREFLIGHT ---------------------^ if __name__ == "__main__": print("Starting app.py main()") aparser = argparse.ArgumentParser(description="Arguments for app.py") aparser.add_argument("--config-file", type=str, help="check ./conf_files/") aparser.add_argument("--dbuser", type=str) aparser.add_argument("--dbpass", type=str) aparser.add_argument("--dbaddr", type=str) args = aparser.parse_args() config = None with Path(args.config_file).open("r") as fp: config = list(yaml.safe_load_all(fp))[0] app.debug = bool(config["debug"]) logfile = config["logfile"] if app.debug: logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) else: logging.basicConfig(filename=logfile, level=logging.INFO) # db login client = MongoClient( "mongodb://{}".format(args.dbaddr), username=args.dbuser, password=args.dbpass, authSource="valdb", authMechanism='SCRAM-SHA-256' ) valdb = client.valdb if bool(config["prepare_db"]): prepare_db() # app index from db app_index = (valdb.appindex.find_one({"dockey": "appindex"}))["data"] # log.info("[*] Starting app.py with config:\n%s".format(config)) log.info("[*] Starting app.py with config:\n{}".format(config)) app.run(host=str(config["host"]), port=int(config["port"]))