prepared app for production (removed global variables, add flask-pymongo as db driver

2019-05-04 01:28:46 +02:00 · 2019-05-04 01:28:46 +02:00 · 2ff339e24c
commit 2ff339e24c
parent 707034153c
12 changed files with 140 additions and 134 deletions
--- a/18
+++ b/18
@ -0,0 +1,18 @@
 FROM cjvt-python-env
 RUN pip3 install gunicorn
 RUN mkdir -p /project/src/backend_flask
 RUN mkdir -p /project/src/pkg
 RUN mkdir -p /project/data
 COPY src/backend_flask /project/src/backend_flask
 COPY src/pkg /project/src/pkg
 COPY data/appindex.json /project/data
 COPY src/backend_flask/entrypoint.sh /.
 COPY src/backend_flask/conf_files/prod_conf.yaml /project
 ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
--- a/5
+++ b/5
@ -124,12 +124,15 @@ backend-dev:
 		--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
 		--appindex-json $(APPINDEX_PATH)
-backend-prod:
+backend-prod-old:
 	cd ./src/backend_flask; python3 app.py \
 		--config-file ./conf_files/prod_conf.yaml \
 		--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
 		--appindex-json $(APPINDEX_PATH)
 backend-prod:
 	cd ./src/backend_flask; $(MAKE) prod
 ## add sskj senses to db (generated with pkg/seqparser)
 sskj-senses:
 	python3 ./src/pkg/seqparser/seqparser/main.py \
--- a/dockerfiles/python-env/Dockerfile
+++ b/dockerfiles/python-env/Dockerfile
@ -1,26 +1,25 @@
-FROM ubuntu:16.04
+FROM ubuntu:18.04
 RUN apt-get update --fix-missing
 RUN apt-get install -y \
 vim \
 python3 \
 python3-pip \
-sshfs
+sshfs \
 curl
 RUN pip3 install --upgrade pip
 RUN pip3 install \
 	lxml \
 	pandas \
 	sklearn \
 	argparse \
 	pyyaml \
 	pathlib \
 	flask \
 	flask_cors \
 	pymongo \
-	flask
+	flask-pymongo
 RUN apt-get install -y \
 	curl
 ENV PYTHONIOENCODING UTF-8
 RUN pip3 install \
 	pyyaml \
 	flask_cors
--- a/dockerfiles/python-env/Makefile
+++ b/dockerfiles/python-env/Makefile
@ -1,4 +1,4 @@
-IMAGE_NAME="cjvt-python-env"
+IMAGE_NAME="cjvt-python-env"  # don't change, used in backend_flask/Makefile
 CNNAME="python-env"
 all: build run
--- a/dockerfiles/python-env/entrypoint.sh
+++ b/dockerfiles/python-env/entrypoint.sh
@ -0,0 +1,5 @@
 #!/bin/bash
 echo "testing entrypoint."
 $(exit 1)
 exit 0
--- a/src/backend_flask/Makefile
+++ b/src/backend_flask/Makefile
@ -0,0 +1,16 @@
 IMG="backend-flask"
 CNT="backend_flask"
 clean:
 	- docker rm -f $(CNT)
 run: clean build
 	docker run -d --net host --name $(CNT) $(IMG)
 	docker logs -f $(CNT)
 build: build-cjvt-python-env
 	# docker build . -f ../../Dockerfile-backend-flask -t $(IMG)
 	cd ../..; docker build . -f Dockerfile-backend-flask -t $(IMG)
 build-cjvt-python-env:
 	cd ../../dockerfiles/python-env; $(MAKE) build
--- a/src/backend_flask/app.py
+++ b/src/backend_flask/app.py
@ -26,26 +26,21 @@ from email.mime.text import MIMEText
 from copy import deepcopy as DC
 from pathlib import Path
 from pymongo import MongoClient
 from flask_pymongo import PyMongo
 import pymongo
 import argparse
 # some db collections
 USERS_COLL = "users"
 TOKENS_COLL = "usertokens"
 SENSES_COLL = "senses"
 SENSEMAP_COLL = "sensemap"
 # pre-generated data (gui leftside word index)
 CORPORA = ["ssj", "kres"]
 app_index = None
 sskj_wordlist = None  # used by _is_banned(hw)
 BANNED_HEADWORDS = ["biti"]
 QUERY_LIMIT = 1000  # Some headwords contain thousands of examples - not practical for the app
 log = logging.getLogger(__name__)
 valdb = None
 app = Flask(__name__)
 app.config.from_object("db_config")
 mongo = PyMongo(app)
 app.config["CORPORA"] = ["ssj", "kres"]
 app.config["BANNED_HEADWORDS"] = ["biti"]
 app.config["QUERY_LIMIT"] = 1000
 # when running vuejs via webpack
 # CORS(app)
@ -60,7 +55,7 @@ CORS(app)
@app.route("/api/dev")
 def api_dev():
    print("DEV")
-    cur = valdb.kres.find({"headwords": "nagovarjati"})
+    cur = mongo.db.kres.find({"headwords": "nagovarjati"})
    frames = []
    for ent in cur:
        frames += frames_from_db_entry(ent)
@ -73,12 +68,12 @@ def api_dev():
@app.route("/api/words/<corpus>")
 def api_words(corpus):
    return json.dumps({
-        "sorted_words": app_index[corpus]["words"], # todo - make corpus as arg
+        "sorted_words": app.config["app_index"][corpus]["words"], # todo - make corpus as arg
    })
@app.route("/api/functors/<corpus>")
 def api_functors(corpus):
-    return json.dumps(app_index[corpus]["functors"])
+    return json.dumps(app.config["app_index"][corpus]["functors"])
 # INDEX SELECTION -------------------^
@ -99,7 +94,7 @@ def api_register():
    ):
        return "ERR"
    email_hash = hashlib.sha256(email.encode("utf-8")).hexdigest()
-    existing = list(valdb[USERS_COLL].find({
+    existing = list(mongo.db.users.find({
        "$or": [{"username": username}, {"email": email_hash}]
    }))
    if len(existing) > 0:
@ -110,7 +105,7 @@ def api_register():
            password.encode("utf-8")).hexdigest(),
        "email": email_hash
    }
-    valdb[USERS_COLL].insert(entry)
+    mongo.db.users.insert(entry)
    return "OK"
@ -122,7 +117,7 @@ def api_login():
    password = data["password"]
    hpass = hashlib.sha256(password.encode("utf-8")).hexdigest()
-    db_user = list(valdb[USERS_COLL].find({
+    db_user = list(mongo.db.users.find({
        "username": username,
        "hpass": hpass
    }))
@ -136,7 +131,7 @@ def api_login():
        "date": datetime.datetime.utcnow(),
        "token": token
    }
-    valdb[TOKENS_COLL].update(
+    mongo.db.usertokens.update(
        {"username": token_entry["username"]},
        token_entry,
        upsert=True
@ -179,7 +174,7 @@ def api_new_pass():
    username = data["username"]
    email = data["email"]
    hemail = hashlib.sha256(email.encode("utf-8")).hexdigest()
-    db_res = list(valdb[USERS_COLL].find({
+    db_res = list(mongo.db.users.find({
        "username": username,
        "email": hemail
    }))
@ -191,7 +186,7 @@ def api_new_pass():
        string.ascii_letters + string.digits) for i in range(10)])
    # update locally
    hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest()
-    valdb[USERS_COLL].update(
+    mongo.db.users.update(
        {
            "username": username,
            "email": hemail
@ -209,12 +204,12 @@ def token_to_username(token):
    key = {
        "token": token
    }
-    res = list(valdb[TOKENS_COLL].find(key))
+    res = list(mongo.db.usertokens.find(key))
    if len(res) != 1:
        return None
    username = res[0]["username"]
    # update deletion interval
-    valdb[TOKENS_COLL].update(
+    mongo.db.usertokens.update(
        key, {"$set": {"date": datetime.datetime.utcnow()}})
    return username
@ -249,19 +244,19 @@ def api_get_frames():
    RF = reduce_functions[rf_name]["f"]
    corpus = request.args.get("cor")
-    if corpus not in CORPORA:
+    if corpus not in app.config["CORPORA"]:
        return json.dumps({"error": "cor={kres,ssj}"})
-    cur = valdb[corpus].find({"headwords": hw})
+    cur = mongo.db[corpus].find({"headwords": hw})
    frames = []
-    for ent in cur[:QUERY_LIMIT]:
+    for ent in cur[:app.config["QUERY_LIMIT"]]:
        frames += frames_from_db_entry(ent)  # pre-process this step for prod TODO
    cur.close()
    # filter by relevant hw
    frames = [x for x in frames if x.hw == hw]
-    ret_frames = RF(frames, valdb[SENSEMAP_COLL])
+    ret_frames = RF(frames, mongo.db.sensemap)
    json_ret = {"frames": []}
    for frame in ret_frames:
@ -302,12 +297,12 @@ def api_get_functor_frames():
    RF = reduce_functions[rf_name]["f"]
    corpus = request.args.get("cor")
-    if corpus not in CORPORA:
+    if corpus not in app.config["CORPORA"]:
        return json.dumps({"error": "cor={kres,ssj}"})
-    cur = valdb[corpus].find({"functors": functor})
+    cur = mongo.db[corpus].find({"functors": functor})
    frames = []
-    for ent in cur[:QUERY_LIMIT]:
+    for ent in cur[:app.config["QUERY_LIMIT"]]:
        frames += frames_from_db_entry(ent)  # pre-process this step for prod TODO
    cur.close()
@ -315,7 +310,7 @@ def api_get_functor_frames():
    frames = [x for x in frames if functor in x.get_functors()]
    # raw_frames = vallex.functors_index[functor]  # TODO
-    ret_frames = RF(frames, valdb[SENSEMAP_COLL])
+    ret_frames = RF(frames, mongo.db.sensemap)
    ret_frames = _aggregate_by_hw(ret_frames)
    json_ret = {"frames": []}
@ -334,10 +329,10 @@ def api_get_functor_frames():
 def api_senses_get():
    # returns senses and mapping for hw
    hw = request.args.get("hw")
-    senses = list(valdb[SENSES_COLL].find({
+    senses = list(mongo.db.senses.find({
        "hw": hw
    }))
-    sense_map_query = list(valdb[SENSEMAP_COLL].find({
+    sense_map_query = list(mongo.db.sensemap.find({
        "hw": hw
    }))
    # aggregation by max date possible on DB side
@ -417,7 +412,7 @@ def api_senses_update():
        print(ns)
        # insert into db
-        valdb[SENSES_COLL].insert(ns)
+        mongo.db.senses.insert(ns)
    # replace tmp_id with mongo's _id
    for ssj_id, el in sense_map.items():
@ -432,7 +427,7 @@ def api_senses_update():
            "date": datetime.datetime.utcnow()
        }
        # vallex.db["v2_sense_map"].update(key, data, upsert=True)
-        valdb[SENSEMAP_COLL].insert(data)
+        mongo.db.sensemap.insert(data)
    return "OK"
 # SENSES ----------------------------^
@ -441,7 +436,7 @@ def api_senses_update():
 # APP PREFLIGHT ---------------------.
 def _is_banned(hw):
    banned = True
-    if hw in BANNED_HEADWORDS:
+    if hw in app.config["BANNED_HEADWORDS"]:
        banned = True
    elif hw in sskj_wordlist["wordlist"]:
        banned = False
@ -449,17 +444,17 @@ def _is_banned(hw):
        banned = False
    return banned
-def prepare_app_index(appindex_json):
+def prepare_app_index(appindex_json, sskj_wordlist):
    log.info("[*] preparing app_index")
    # create app_index (used in frontend, left side word index)
-    tmp_app_index = {c: {} for c in CORPORA}
+    tmp_app_index = {c: {} for c in app.config["CORPORA"]}
-    for corpus in CORPORA:
+    for corpus in app.config["CORPORA"]:
        res_hws = {}
        res_fns = {}
-        nentries = valdb[corpus].count()
+        nentries = mongo.db[corpus].count()
        idx = 0
-        for e in valdb[corpus].find({}):
+        for e in mongo.db[corpus].find({}):
            if "headwords" not in e:
                continue
            for hw in e["headwords"]:
@ -504,6 +499,33 @@ def prepare_app_index(appindex_json):
 # APP PREFLIGHT ---------------------^
 def init_wsgi(app):
    config = None
    with Path("/project/prod_conf.yaml").open("r") as fp:
        config = list(yaml.safe_load_all(fp))[0]
    app.debug = False
    logfile = config["logfile"]
    if app.debug:
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
    else:
        logging.basicConfig(filename=logfile, level=logging.INFO)
    # db login
    # app index from db
    with Path(config["appindex"]).open("r") as fp:
        # a dirty hack but ok
        app.config["app_index"] = json.load(fp)
    # log.info("[*] Starting app.py with config:\n%s".format(config))
    log.info("[*] Starting app.py with config:\n{}".format(config))
 # if we don't pass arguments, assume production environment (gunicorn)
 if len(sys.argv) == 1:
    init_wsgi()
 if __name__ == "__main__":
    print("Starting app.py main()")
    aparser = argparse.ArgumentParser(description="Arguments for app.py")
@ -516,7 +538,6 @@ if __name__ == "__main__":
    aparser.add_argument("--appindex-json", type=str)
    args = aparser.parse_args()
    config = None
    with Path(args.config_file).open("r") as fp:
        config = list(yaml.safe_load_all(fp))[0]
@ -527,28 +548,31 @@ if __name__ == "__main__":
    else:
        logging.basicConfig(filename=logfile, level=logging.INFO)
    """
    # db login
    client = MongoClient(
        "mongodb://{}".format(args.dbaddr),
        username=args.dbuser,
        password=args.dbpass,
-        authSource="valdb",
+        authSource="mongo.db",
        authMechanism='SCRAM-SHA-1'
    )
-    valdb = client.valdb
+    valdb = client.mongo.db
    """
    if args.prepare_db:
        with Path(args.sskj_wordlist).open("r") as fp:
            sskj_wordlist = json.load(fp)
-        prepare_app_index(args.appindex_json)
+        prepare_app_index(args.appindex_json, sskj_wordlist)
        sys.exit()
    # app index from db
    with Path(args.appindex_json).open("r") as fp:
-        app_index = json.load(fp)
+        app.config["app_index"] = json.load(fp)
    # log.info("[*] Starting app.py with config:\n%s".format(config))
    log.info("[*] Starting app.py with config:\n{}".format(config))
    app.run(host=str(config["host"]), port=int(config["port"]))
--- a/src/backend_flask/conf_files/dev_conf.yaml
+++ b/src/backend_flask/conf_files/dev_conf.yaml
@ -4,3 +4,4 @@ port: 8084
 host: localhost
 logfile: "/var/log/valency_backend.log"
 ---
--- a/src/backend_flask/conf_files/prod_conf.yaml
+++ b/src/backend_flask/conf_files/prod_conf.yaml
@ -1,6 +1,10 @@
 ---
 debug: True
 port: 8084
 host: 0.0.0.0
 logfile: "/var/log/valency_backend.log"
---
+appindex: /project/data/appindex.json
 # Same as in root Makefile
 dbaddr: 0.0.0.0:27017
 dbuser: valuser
 dbpass: valuserpass
--- a/src/backend_flask/db_config.py
+++ b/src/backend_flask/db_config.py
@ -0,0 +1,2 @@
 MONGO_URI = "mongodb://valuser:valuserpass@127.0.0.1:27017/valdb"
 MONGO_AUTH_SOURCE = 'admin'
--- a/src/backend_flask/entrypoint.sh
+++ b/src/backend_flask/entrypoint.sh
@ -0,0 +1,7 @@
 #!/bin/bash
 pip3 install -e /project/src/pkg/cjvt-corpusparser/.
 pip3 install -e /project/src/pkg/valency/.
 pip3 install -e /project/src/pkg/seqparser/.
 sleep 10000
--- a/src/backend_flask/preprocess.py
+++ b/src/backend_flask/preprocess.py
@ -1,73 +0,0 @@
 # Deprecated: headword creation moved to be part of corpusparser,
 # index creation moved to app.py as a preprocessing (with exit) step
 CORPORA = ["kres", "ssj"]
 if __name__ == "__main__":
 	valdb = None
    def helper_tid_to_token(tid, tokens):
        for t in tokens:
            if t["tid"] == tid:
                return t
        return None
    # update entries (add headwords and fuctors for indexing)
    for corpus in CORPORA:
        for e in valdb[corpus].find({}):
            if e["srl_links"] is None:
                e["headwords"] = []
                e["functors"] = []
            else:
                hw_tids = list(set([x["from"] for x in e["srl_links"]]))
                hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
                headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
                e["headwords"] = headwords
                functors = list(set([x["afun"] for x in e["srl_links"]]))
                e["functors"] = functors
            valdb[corpus].save(e)
        valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
        valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
    # create app_index (used in frontend, left side word index)
    tmp_app_index = {c: {} for c in CORPORA}
    for corpus in CORPORA:
        res_hws = {}
        res_fns = {}
        for e in valdb[corpus].find({}):
            if "headwords" not in e:
                continue
            for hw in e["headwords"]:
                if hw in res_hws:
                    res_hws[hw] += 1
                else:
                    res_hws[hw] = 1
            if "functors" not in e:
                continue
            for fn in e["functors"]:
                if fn in res_fns:
                    res_fns[fn] += 1
                else:
                    res_fns[fn] = 1
        alphabetical = {}
        for k, e in res_hws.items():
            fst = k[0].lower()
            if fst in alphabetical:
                alphabetical[fst].append((k, e))
            else:
                alphabetical[fst] = [(k, e)]
        for k, e in alphabetical.items():
            alphabetical[k] = sorted(e, key=lambda x: x[0]) 
        tmp_app_index[corpus]["words"] = alphabetical
        functors = [(k, e) for (k, e) in res_fns.items()]
        functors = sorted(functors, key=lambda x: x[0])
        tmp_app_index[corpus]["functors"] = functors
    valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True)
		`@ -0,0 +1,2 @@`
							`MONGO_URI = "mongodb://valuser:valuserpass@127.0.0.1:27017/valdb"`
							`MONGO_AUTH_SOURCE = 'admin'`