prepared app for production (removed global variables, add flask-pymongo as db driver

2019-05-04 01:28:46 +02:00 · 2019-05-04 01:28:46 +02:00 · 2ff339e24c
commit 2ff339e24c
parent 707034153c
12 changed files with 140 additions and 134 deletions
--- a/18
+++ b/18
@ -0,0 +1,18 @@
+FROM cjvt-python-env
+
+RUN pip3 install gunicorn
+
+RUN mkdir -p /project/src/backend_flask
+RUN mkdir -p /project/src/pkg
+RUN mkdir -p /project/data
+
+COPY src/backend_flask /project/src/backend_flask
+COPY src/pkg /project/src/pkg
+
+COPY data/appindex.json /project/data
+
+COPY src/backend_flask/entrypoint.sh /.
+
+COPY src/backend_flask/conf_files/prod_conf.yaml /project
+
+ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
--- a/5
+++ b/5
@ -124,12 +124,15 @@ backend-dev:
 		--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
 		--appindex-json $(APPINDEX_PATH)

-backend-prod:
+backend-prod-old:
 	cd ./src/backend_flask; python3 app.py \
 		--config-file ./conf_files/prod_conf.yaml \
 		--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
 		--appindex-json $(APPINDEX_PATH)

+backend-prod:
+	cd ./src/backend_flask; $(MAKE) prod
+
 ## add sskj senses to db (generated with pkg/seqparser)
 sskj-senses:
 	python3 ./src/pkg/seqparser/seqparser/main.py \
--- a/dockerfiles/python-env/Dockerfile
+++ b/dockerfiles/python-env/Dockerfile
@ -1,26 +1,25 @@
-FROM ubuntu:16.04
+FROM ubuntu:18.04

 RUN apt-get update --fix-missing
 RUN apt-get install -y \
 vim \
 python3 \
 python3-pip \
-sshfs
+sshfs \
+curl
+
+RUN pip3 install --upgrade pip

 RUN pip3 install \
 	lxml \
 	pandas \
 	sklearn \
 	argparse \
+	pyyaml \
 	pathlib \
+	flask \
+	flask_cors \
 	pymongo \
-	flask
-
-RUN apt-get install -y \
-	curl
+	flask-pymongo

 ENV PYTHONIOENCODING UTF-8
-
-RUN pip3 install \
-	pyyaml \
-	flask_cors
--- a/dockerfiles/python-env/Makefile
+++ b/dockerfiles/python-env/Makefile
@ -1,4 +1,4 @@
-IMAGE_NAME="cjvt-python-env"
+IMAGE_NAME="cjvt-python-env"  # don't change, used in backend_flask/Makefile
 CNNAME="python-env"

 all: build run
--- a/dockerfiles/python-env/entrypoint.sh
+++ b/dockerfiles/python-env/entrypoint.sh
@ -0,0 +1,5 @@
+#!/bin/bash
+
+echo "testing entrypoint."
+$(exit 1)
+exit 0
--- a/src/backend_flask/Makefile
+++ b/src/backend_flask/Makefile
@ -0,0 +1,16 @@
+IMG="backend-flask"
+CNT="backend_flask"
+
+clean:
+	- docker rm -f $(CNT)
+
+run: clean build
+	docker run -d --net host --name $(CNT) $(IMG)
+	docker logs -f $(CNT)
+
+build: build-cjvt-python-env
+	# docker build . -f ../../Dockerfile-backend-flask -t $(IMG)
+	cd ../..; docker build . -f Dockerfile-backend-flask -t $(IMG)
+
+build-cjvt-python-env:
+	cd ../../dockerfiles/python-env; $(MAKE) build
--- a/src/backend_flask/app.py
+++ b/src/backend_flask/app.py
@ -26,26 +26,21 @@ from email.mime.text import MIMEText
 from copy import deepcopy as DC
 from pathlib import Path
 from pymongo import MongoClient
+from flask_pymongo import PyMongo
 import pymongo
 import argparse

-# some db collections
-USERS_COLL = "users"
-TOKENS_COLL = "usertokens"
-SENSES_COLL = "senses"
-SENSEMAP_COLL = "sensemap"
-
-# pre-generated data (gui leftside word index)
-CORPORA = ["ssj", "kres"]
-app_index = None
-sskj_wordlist = None  # used by _is_banned(hw)
-BANNED_HEADWORDS = ["biti"]
-QUERY_LIMIT = 1000  # Some headwords contain thousands of examples - not practical for the app

 log = logging.getLogger(__name__)
-valdb = None
 app = Flask(__name__)

+app.config.from_object("db_config")
+mongo = PyMongo(app)
+
+app.config["CORPORA"] = ["ssj", "kres"]
+app.config["BANNED_HEADWORDS"] = ["biti"]
+app.config["QUERY_LIMIT"] = 1000
+

 # when running vuejs via webpack
 # CORS(app)
@ -60,7 +55,7 @@ CORS(app)
@app.route("/api/dev")
 def api_dev():
    print("DEV")
-    cur = valdb.kres.find({"headwords": "nagovarjati"})
+    cur = mongo.db.kres.find({"headwords": "nagovarjati"})
    frames = []
    for ent in cur:
        frames += frames_from_db_entry(ent)
@ -73,12 +68,12 @@ def api_dev():
@app.route("/api/words/<corpus>")
 def api_words(corpus):
    return json.dumps({
-        "sorted_words": app_index[corpus]["words"], # todo - make corpus as arg
+        "sorted_words": app.config["app_index"][corpus]["words"], # todo - make corpus as arg
    })

@app.route("/api/functors/<corpus>")
 def api_functors(corpus):
-    return json.dumps(app_index[corpus]["functors"])
+    return json.dumps(app.config["app_index"][corpus]["functors"])

 # INDEX SELECTION -------------------^

@ -99,7 +94,7 @@ def api_register():
    ):
        return "ERR"
    email_hash = hashlib.sha256(email.encode("utf-8")).hexdigest()
-    existing = list(valdb[USERS_COLL].find({
+    existing = list(mongo.db.users.find({
        "$or": [{"username": username}, {"email": email_hash}]
    }))
    if len(existing) > 0:
@ -110,7 +105,7 @@ def api_register():
            password.encode("utf-8")).hexdigest(),
        "email": email_hash
    }
-    valdb[USERS_COLL].insert(entry)
+    mongo.db.users.insert(entry)
    return "OK"


@ -122,7 +117,7 @@ def api_login():
    password = data["password"]
    hpass = hashlib.sha256(password.encode("utf-8")).hexdigest()

-    db_user = list(valdb[USERS_COLL].find({
+    db_user = list(mongo.db.users.find({
        "username": username,
        "hpass": hpass
    }))
@ -136,7 +131,7 @@ def api_login():
        "date": datetime.datetime.utcnow(),
        "token": token
    }
-    valdb[TOKENS_COLL].update(
+    mongo.db.usertokens.update(
        {"username": token_entry["username"]},
        token_entry,
        upsert=True
@ -179,7 +174,7 @@ def api_new_pass():
    username = data["username"]
    email = data["email"]
    hemail = hashlib.sha256(email.encode("utf-8")).hexdigest()
-    db_res = list(valdb[USERS_COLL].find({
+    db_res = list(mongo.db.users.find({
        "username": username,
        "email": hemail
    }))
@ -191,7 +186,7 @@ def api_new_pass():
        string.ascii_letters + string.digits) for i in range(10)])
    # update locally
    hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest()
-    valdb[USERS_COLL].update(
+    mongo.db.users.update(
        {
            "username": username,
            "email": hemail
@ -209,12 +204,12 @@ def token_to_username(token):
    key = {
        "token": token
    }
-    res = list(valdb[TOKENS_COLL].find(key))
+    res = list(mongo.db.usertokens.find(key))
    if len(res) != 1:
        return None
    username = res[0]["username"]
    # update deletion interval
-    valdb[TOKENS_COLL].update(
+    mongo.db.usertokens.update(
        key, {"$set": {"date": datetime.datetime.utcnow()}})
    return username

@ -249,19 +244,19 @@ def api_get_frames():
    RF = reduce_functions[rf_name]["f"]

    corpus = request.args.get("cor")
-    if corpus not in CORPORA:
+    if corpus not in app.config["CORPORA"]:
        return json.dumps({"error": "cor={kres,ssj}"})

-    cur = valdb[corpus].find({"headwords": hw})
+    cur = mongo.db[corpus].find({"headwords": hw})
    frames = []
-    for ent in cur[:QUERY_LIMIT]:
+    for ent in cur[:app.config["QUERY_LIMIT"]]:
        frames += frames_from_db_entry(ent)  # pre-process this step for prod TODO
    cur.close()

    # filter by relevant hw
    frames = [x for x in frames if x.hw == hw]

-    ret_frames = RF(frames, valdb[SENSEMAP_COLL])
+    ret_frames = RF(frames, mongo.db.sensemap)

    json_ret = {"frames": []}
    for frame in ret_frames:
@ -302,12 +297,12 @@ def api_get_functor_frames():
    RF = reduce_functions[rf_name]["f"]

    corpus = request.args.get("cor")
-    if corpus not in CORPORA:
+    if corpus not in app.config["CORPORA"]:
        return json.dumps({"error": "cor={kres,ssj}"})

-    cur = valdb[corpus].find({"functors": functor})
+    cur = mongo.db[corpus].find({"functors": functor})
    frames = []
-    for ent in cur[:QUERY_LIMIT]:
+    for ent in cur[:app.config["QUERY_LIMIT"]]:
        frames += frames_from_db_entry(ent)  # pre-process this step for prod TODO
    cur.close()

@ -315,7 +310,7 @@ def api_get_functor_frames():
    frames = [x for x in frames if functor in x.get_functors()]

    # raw_frames = vallex.functors_index[functor]  # TODO
-    ret_frames = RF(frames, valdb[SENSEMAP_COLL])
+    ret_frames = RF(frames, mongo.db.sensemap)
    ret_frames = _aggregate_by_hw(ret_frames)

    json_ret = {"frames": []}
@ -334,10 +329,10 @@ def api_get_functor_frames():
 def api_senses_get():
    # returns senses and mapping for hw
    hw = request.args.get("hw")
-    senses = list(valdb[SENSES_COLL].find({
+    senses = list(mongo.db.senses.find({
        "hw": hw
    }))
-    sense_map_query = list(valdb[SENSEMAP_COLL].find({
+    sense_map_query = list(mongo.db.sensemap.find({
        "hw": hw
    }))
    # aggregation by max date possible on DB side
@ -417,7 +412,7 @@ def api_senses_update():
        print(ns)

        # insert into db
-        valdb[SENSES_COLL].insert(ns)
+        mongo.db.senses.insert(ns)

    # replace tmp_id with mongo's _id
    for ssj_id, el in sense_map.items():
@ -432,7 +427,7 @@ def api_senses_update():
            "date": datetime.datetime.utcnow()
        }
        # vallex.db["v2_sense_map"].update(key, data, upsert=True)
-        valdb[SENSEMAP_COLL].insert(data)
+        mongo.db.sensemap.insert(data)
    return "OK"

 # SENSES ----------------------------^
@ -441,7 +436,7 @@ def api_senses_update():
 # APP PREFLIGHT ---------------------.
 def _is_banned(hw):
    banned = True
-    if hw in BANNED_HEADWORDS:
+    if hw in app.config["BANNED_HEADWORDS"]:
        banned = True
    elif hw in sskj_wordlist["wordlist"]:
        banned = False
@ -449,17 +444,17 @@ def _is_banned(hw):
        banned = False
    return banned

-def prepare_app_index(appindex_json):
+def prepare_app_index(appindex_json, sskj_wordlist):
    log.info("[*] preparing app_index")
    # create app_index (used in frontend, left side word index)
-    tmp_app_index = {c: {} for c in CORPORA}
-    for corpus in CORPORA:
+    tmp_app_index = {c: {} for c in app.config["CORPORA"]}
+    for corpus in app.config["CORPORA"]:
        res_hws = {}
        res_fns = {}

-        nentries = valdb[corpus].count()
+        nentries = mongo.db[corpus].count()
        idx = 0
-        for e in valdb[corpus].find({}):
+        for e in mongo.db[corpus].find({}):
            if "headwords" not in e:
                continue
            for hw in e["headwords"]:
@ -504,6 +499,33 @@ def prepare_app_index(appindex_json):
 # APP PREFLIGHT ---------------------^


+def init_wsgi(app):
+    config = None
+    with Path("/project/prod_conf.yaml").open("r") as fp:
+        config = list(yaml.safe_load_all(fp))[0]
+
+    app.debug = False
+    logfile = config["logfile"]
+    if app.debug:
+        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
+    else:
+        logging.basicConfig(filename=logfile, level=logging.INFO)
+
+    # db login
+    # app index from db
+    with Path(config["appindex"]).open("r") as fp:
+        # a dirty hack but ok
+        app.config["app_index"] = json.load(fp)
+
+    # log.info("[*] Starting app.py with config:\n%s".format(config))
+    log.info("[*] Starting app.py with config:\n{}".format(config))
+
+
+# if we don't pass arguments, assume production environment (gunicorn)
+if len(sys.argv) == 1:
+    init_wsgi()
+
+
 if __name__ == "__main__":
    print("Starting app.py main()")
    aparser = argparse.ArgumentParser(description="Arguments for app.py")
@ -516,7 +538,6 @@ if __name__ == "__main__":
    aparser.add_argument("--appindex-json", type=str)
    args = aparser.parse_args()

-    config = None
    with Path(args.config_file).open("r") as fp:
        config = list(yaml.safe_load_all(fp))[0]

@ -527,28 +548,31 @@ if __name__ == "__main__":
    else:
        logging.basicConfig(filename=logfile, level=logging.INFO)

+    """
    # db login
    client = MongoClient(
        "mongodb://{}".format(args.dbaddr),
        username=args.dbuser,
        password=args.dbpass,
-        authSource="valdb",
+        authSource="mongo.db",
        authMechanism='SCRAM-SHA-1'
    )
-    valdb = client.valdb
+    valdb = client.mongo.db
+    """

    if args.prepare_db:
        with Path(args.sskj_wordlist).open("r") as fp:
            sskj_wordlist = json.load(fp)
-        prepare_app_index(args.appindex_json)
+        prepare_app_index(args.appindex_json, sskj_wordlist)
        sys.exit()

    # app index from db
    with Path(args.appindex_json).open("r") as fp:
-        app_index = json.load(fp)
+        app.config["app_index"] = json.load(fp)

    # log.info("[*] Starting app.py with config:\n%s".format(config))
    log.info("[*] Starting app.py with config:\n{}".format(config))

    app.run(host=str(config["host"]), port=int(config["port"]))

+
--- a/src/backend_flask/conf_files/dev_conf.yaml
+++ b/src/backend_flask/conf_files/dev_conf.yaml
@ -4,3 +4,4 @@ port: 8084
 host: localhost
 logfile: "/var/log/valency_backend.log"
 ---
+
--- a/src/backend_flask/conf_files/prod_conf.yaml
+++ b/src/backend_flask/conf_files/prod_conf.yaml
@ -1,6 +1,10 @@
 ---
-debug: True
 port: 8084
 host: 0.0.0.0
 logfile: "/var/log/valency_backend.log"
---
+appindex: /project/data/appindex.json
+
+# Same as in root Makefile
+dbaddr: 0.0.0.0:27017
+dbuser: valuser
+dbpass: valuserpass
--- a/src/backend_flask/db_config.py
+++ b/src/backend_flask/db_config.py
@ -0,0 +1,2 @@
+MONGO_URI = "mongodb://valuser:valuserpass@127.0.0.1:27017/valdb"
+MONGO_AUTH_SOURCE = 'admin'
--- a/src/backend_flask/entrypoint.sh
+++ b/src/backend_flask/entrypoint.sh
@ -0,0 +1,7 @@
+#!/bin/bash
+
+pip3 install -e /project/src/pkg/cjvt-corpusparser/.
+pip3 install -e /project/src/pkg/valency/.
+pip3 install -e /project/src/pkg/seqparser/.
+
+sleep 10000
--- a/src/backend_flask/preprocess.py
+++ b/src/backend_flask/preprocess.py
@ -1,73 +0,0 @@
-# Deprecated: headword creation moved to be part of corpusparser,
-# index creation moved to app.py as a preprocessing (with exit) step
-
-CORPORA = ["kres", "ssj"]
-
-if __name__ == "__main__":
-
-	valdb = None
-
-    def helper_tid_to_token(tid, tokens):
-        for t in tokens:
-            if t["tid"] == tid:
-                return t
-        return None
-
-    # update entries (add headwords and fuctors for indexing)
-    for corpus in CORPORA:
-        for e in valdb[corpus].find({}):
-            if e["srl_links"] is None:
-                e["headwords"] = []
-                e["functors"] = []
-            else:
-                hw_tids = list(set([x["from"] for x in e["srl_links"]]))
-                hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
-                headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
-                e["headwords"] = headwords
-
-                functors = list(set([x["afun"] for x in e["srl_links"]]))
-                e["functors"] = functors
-
-            valdb[corpus].save(e)
-
-        valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
-        valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
-
-    # create app_index (used in frontend, left side word index)
-    tmp_app_index = {c: {} for c in CORPORA}
-    for corpus in CORPORA:
-        res_hws = {}
-        res_fns = {}
-        for e in valdb[corpus].find({}):
-            if "headwords" not in e:
-                continue
-            for hw in e["headwords"]:
-                if hw in res_hws:
-                    res_hws[hw] += 1
-                else:
-                    res_hws[hw] = 1
-            if "functors" not in e:
-                continue
-            for fn in e["functors"]:
-                if fn in res_fns:
-                    res_fns[fn] += 1
-                else:
-                    res_fns[fn] = 1
-
-        alphabetical = {}
-        for k, e in res_hws.items():
-            fst = k[0].lower()
-            if fst in alphabetical:
-                alphabetical[fst].append((k, e))
-            else:
-                alphabetical[fst] = [(k, e)]
-
-        for k, e in alphabetical.items():
-            alphabetical[k] = sorted(e, key=lambda x: x[0]) 
-        tmp_app_index[corpus]["words"] = alphabetical
-
-        functors = [(k, e) for (k, e) in res_fns.items()]
-        functors = sorted(functors, key=lambda x: x[0])
-        tmp_app_index[corpus]["functors"] = functors
-
-    valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True)