From 2ff339e24ce407e2a804a7acf1e26aee2dd49441 Mon Sep 17 00:00:00 2001
From: voje <kristjan.voje@gmail.com>
Date: Sat, 4 May 2019 01:28:46 +0200
Subject: [PATCH] prepared app for production (removed global variables, add
 flask-pymongo as db driver

---
 Dockerfile-backend-flask                    |  18 +++
 Makefile                                    |   5 +-
 dockerfiles/python-env/Dockerfile           |  19 ++--
 dockerfiles/python-env/Makefile             |   2 +-
 dockerfiles/python-env/entrypoint.sh        |   5 +
 src/backend_flask/Makefile                  |  16 +++
 src/backend_flask/app.py                    | 118 ++++++++++++--------
 src/backend_flask/conf_files/dev_conf.yaml  |   1 +
 src/backend_flask/conf_files/prod_conf.yaml |   8 +-
 src/backend_flask/db_config.py              |   2 +
 src/backend_flask/entrypoint.sh             |   7 ++
 src/backend_flask/preprocess.py             |  73 ------------
 12 files changed, 140 insertions(+), 134 deletions(-)
 create mode 100644 Dockerfile-backend-flask
 create mode 100755 dockerfiles/python-env/entrypoint.sh
 create mode 100644 src/backend_flask/Makefile
 create mode 100644 src/backend_flask/db_config.py
 create mode 100755 src/backend_flask/entrypoint.sh
 delete mode 100644 src/backend_flask/preprocess.py

diff --git a/Dockerfile-backend-flask b/Dockerfile-backend-flask
new file mode 100644
index 0000000..fe9349b
--- /dev/null
+++ b/Dockerfile-backend-flask
@@ -0,0 +1,18 @@
+FROM cjvt-python-env
+
+RUN pip3 install gunicorn
+
+RUN mkdir -p /project/src/backend_flask
+RUN mkdir -p /project/src/pkg
+RUN mkdir -p /project/data
+
+COPY src/backend_flask /project/src/backend_flask
+COPY src/pkg /project/src/pkg
+
+COPY data/appindex.json /project/data
+
+COPY src/backend_flask/entrypoint.sh /.
+
+COPY src/backend_flask/conf_files/prod_conf.yaml /project
+
+ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
\ No newline at end of file
diff --git a/Makefile b/Makefile
index a1ec4e8..2e74b6e 100644
--- a/Makefile
+++ b/Makefile
@@ -124,12 +124,15 @@ backend-dev:
 		--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
 		--appindex-json $(APPINDEX_PATH)
 
-backend-prod:
+backend-prod-old:
 	cd ./src/backend_flask; python3 app.py \
 		--config-file ./conf_files/prod_conf.yaml \
 		--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
 		--appindex-json $(APPINDEX_PATH)
 
+backend-prod:
+	cd ./src/backend_flask; $(MAKE) prod
+
 ## add sskj senses to db (generated with pkg/seqparser)
 sskj-senses:
 	python3 ./src/pkg/seqparser/seqparser/main.py \
diff --git a/dockerfiles/python-env/Dockerfile b/dockerfiles/python-env/Dockerfile
index 266d331..0249115 100644
--- a/dockerfiles/python-env/Dockerfile
+++ b/dockerfiles/python-env/Dockerfile
@@ -1,26 +1,25 @@
-FROM ubuntu:16.04
+FROM ubuntu:18.04
 
 RUN apt-get update --fix-missing
 RUN apt-get install -y \
 vim \
 python3 \
 python3-pip \
-sshfs
+sshfs \
+curl
+
+RUN pip3 install --upgrade pip
 
 RUN pip3 install \
 	lxml \
 	pandas \
 	sklearn \
 	argparse \
+	pyyaml \
 	pathlib \
+	flask \
+	flask_cors \
 	pymongo \
-	flask
-
-RUN apt-get install -y \
-	curl
+	flask-pymongo
 
 ENV PYTHONIOENCODING UTF-8
-
-RUN pip3 install \
-	pyyaml \
-	flask_cors
diff --git a/dockerfiles/python-env/Makefile b/dockerfiles/python-env/Makefile
index e9b30d4..9596535 100644
--- a/dockerfiles/python-env/Makefile
+++ b/dockerfiles/python-env/Makefile
@@ -1,4 +1,4 @@
-IMAGE_NAME="cjvt-python-env"
+IMAGE_NAME="cjvt-python-env"  # don't change, used in backend_flask/Makefile
 CNNAME="python-env"
 
 all: build run
diff --git a/dockerfiles/python-env/entrypoint.sh b/dockerfiles/python-env/entrypoint.sh
new file mode 100755
index 0000000..7d1cf5f
--- /dev/null
+++ b/dockerfiles/python-env/entrypoint.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+echo "testing entrypoint."
+$(exit 1)
+exit 0
\ No newline at end of file
diff --git a/src/backend_flask/Makefile b/src/backend_flask/Makefile
new file mode 100644
index 0000000..d1a5da9
--- /dev/null
+++ b/src/backend_flask/Makefile
@@ -0,0 +1,16 @@
+IMG="backend-flask"
+CNT="backend_flask"
+
+clean:
+	- docker rm -f $(CNT)
+
+run: clean build
+	docker run -d --net host --name $(CNT) $(IMG)
+	docker logs -f $(CNT)
+
+build: build-cjvt-python-env
+	# docker build . -f ../../Dockerfile-backend-flask -t $(IMG)
+	cd ../..; docker build . -f Dockerfile-backend-flask -t $(IMG)
+
+build-cjvt-python-env:
+	cd ../../dockerfiles/python-env; $(MAKE) build
\ No newline at end of file
diff --git a/src/backend_flask/app.py b/src/backend_flask/app.py
index 670ff4a..36f5513 100644
--- a/src/backend_flask/app.py
+++ b/src/backend_flask/app.py
@@ -26,26 +26,21 @@ from email.mime.text import MIMEText
 from copy import deepcopy as DC
 from pathlib import Path
 from pymongo import MongoClient
+from flask_pymongo import PyMongo
 import pymongo
 import argparse
 
-# some db collections
-USERS_COLL = "users"
-TOKENS_COLL = "usertokens"
-SENSES_COLL = "senses"
-SENSEMAP_COLL = "sensemap"
-
-# pre-generated data (gui leftside word index)
-CORPORA = ["ssj", "kres"]
-app_index = None
-sskj_wordlist = None  # used by _is_banned(hw)
-BANNED_HEADWORDS = ["biti"]
-QUERY_LIMIT = 1000  # Some headwords contain thousands of examples - not practical for the app
 
 log = logging.getLogger(__name__)
-valdb = None
 app = Flask(__name__)
 
+app.config.from_object("db_config")
+mongo = PyMongo(app)
+
+app.config["CORPORA"] = ["ssj", "kres"]
+app.config["BANNED_HEADWORDS"] = ["biti"]
+app.config["QUERY_LIMIT"] = 1000
+
 
 # when running vuejs via webpack
 # CORS(app)
@@ -60,7 +55,7 @@ CORS(app)
 @app.route("/api/dev")
 def api_dev():
     print("DEV")
-    cur = valdb.kres.find({"headwords": "nagovarjati"})
+    cur = mongo.db.kres.find({"headwords": "nagovarjati"})
     frames = []
     for ent in cur:
         frames += frames_from_db_entry(ent)
@@ -73,12 +68,12 @@ def api_dev():
 @app.route("/api/words/<corpus>")
 def api_words(corpus):
     return json.dumps({
-        "sorted_words": app_index[corpus]["words"], # todo - make corpus as arg
+        "sorted_words": app.config["app_index"][corpus]["words"], # todo - make corpus as arg
     })
 
 @app.route("/api/functors/<corpus>")
 def api_functors(corpus):
-    return json.dumps(app_index[corpus]["functors"])
+    return json.dumps(app.config["app_index"][corpus]["functors"])
 
 # INDEX SELECTION -------------------^
 
@@ -99,7 +94,7 @@ def api_register():
     ):
         return "ERR"
     email_hash = hashlib.sha256(email.encode("utf-8")).hexdigest()
-    existing = list(valdb[USERS_COLL].find({
+    existing = list(mongo.db.users.find({
         "$or": [{"username": username}, {"email": email_hash}]
     }))
     if len(existing) > 0:
@@ -110,7 +105,7 @@ def api_register():
             password.encode("utf-8")).hexdigest(),
         "email": email_hash
     }
-    valdb[USERS_COLL].insert(entry)
+    mongo.db.users.insert(entry)
     return "OK"
 
 
@@ -122,7 +117,7 @@ def api_login():
     password = data["password"]
     hpass = hashlib.sha256(password.encode("utf-8")).hexdigest()
 
-    db_user = list(valdb[USERS_COLL].find({
+    db_user = list(mongo.db.users.find({
         "username": username,
         "hpass": hpass
     }))
@@ -136,7 +131,7 @@ def api_login():
         "date": datetime.datetime.utcnow(),
         "token": token
     }
-    valdb[TOKENS_COLL].update(
+    mongo.db.usertokens.update(
         {"username": token_entry["username"]},
         token_entry,
         upsert=True
@@ -179,7 +174,7 @@ def api_new_pass():
     username = data["username"]
     email = data["email"]
     hemail = hashlib.sha256(email.encode("utf-8")).hexdigest()
-    db_res = list(valdb[USERS_COLL].find({
+    db_res = list(mongo.db.users.find({
         "username": username,
         "email": hemail
     }))
@@ -191,7 +186,7 @@ def api_new_pass():
         string.ascii_letters + string.digits) for i in range(10)])
     # update locally
     hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest()
-    valdb[USERS_COLL].update(
+    mongo.db.users.update(
         {
             "username": username,
             "email": hemail
@@ -209,12 +204,12 @@ def token_to_username(token):
     key = {
         "token": token
     }
-    res = list(valdb[TOKENS_COLL].find(key))
+    res = list(mongo.db.usertokens.find(key))
     if len(res) != 1:
         return None
     username = res[0]["username"]
     # update deletion interval
-    valdb[TOKENS_COLL].update(
+    mongo.db.usertokens.update(
         key, {"$set": {"date": datetime.datetime.utcnow()}})
     return username
 
@@ -249,19 +244,19 @@ def api_get_frames():
     RF = reduce_functions[rf_name]["f"]
 
     corpus = request.args.get("cor")
-    if corpus not in CORPORA:
+    if corpus not in app.config["CORPORA"]:
         return json.dumps({"error": "cor={kres,ssj}"})
 
-    cur = valdb[corpus].find({"headwords": hw})
+    cur = mongo.db[corpus].find({"headwords": hw})
     frames = []
-    for ent in cur[:QUERY_LIMIT]:
+    for ent in cur[:app.config["QUERY_LIMIT"]]:
         frames += frames_from_db_entry(ent)  # pre-process this step for prod TODO
     cur.close()
 
     # filter by relevant hw
     frames = [x for x in frames if x.hw == hw]
 
-    ret_frames = RF(frames, valdb[SENSEMAP_COLL])
+    ret_frames = RF(frames, mongo.db.sensemap)
 
     json_ret = {"frames": []}
     for frame in ret_frames:
@@ -302,12 +297,12 @@ def api_get_functor_frames():
     RF = reduce_functions[rf_name]["f"]
 
     corpus = request.args.get("cor")
-    if corpus not in CORPORA:
+    if corpus not in app.config["CORPORA"]:
         return json.dumps({"error": "cor={kres,ssj}"})
 
-    cur = valdb[corpus].find({"functors": functor})
+    cur = mongo.db[corpus].find({"functors": functor})
     frames = []
-    for ent in cur[:QUERY_LIMIT]:
+    for ent in cur[:app.config["QUERY_LIMIT"]]:
         frames += frames_from_db_entry(ent)  # pre-process this step for prod TODO
     cur.close()
 
@@ -315,7 +310,7 @@ def api_get_functor_frames():
     frames = [x for x in frames if functor in x.get_functors()]
 
     # raw_frames = vallex.functors_index[functor]  # TODO
-    ret_frames = RF(frames, valdb[SENSEMAP_COLL])
+    ret_frames = RF(frames, mongo.db.sensemap)
     ret_frames = _aggregate_by_hw(ret_frames)
 
     json_ret = {"frames": []}
@@ -334,10 +329,10 @@ def api_get_functor_frames():
 def api_senses_get():
     # returns senses and mapping for hw
     hw = request.args.get("hw")
-    senses = list(valdb[SENSES_COLL].find({
+    senses = list(mongo.db.senses.find({
         "hw": hw
     }))
-    sense_map_query = list(valdb[SENSEMAP_COLL].find({
+    sense_map_query = list(mongo.db.sensemap.find({
         "hw": hw
     }))
     # aggregation by max date possible on DB side
@@ -417,7 +412,7 @@ def api_senses_update():
         print(ns)
 
         # insert into db
-        valdb[SENSES_COLL].insert(ns)
+        mongo.db.senses.insert(ns)
 
     # replace tmp_id with mongo's _id
     for ssj_id, el in sense_map.items():
@@ -432,7 +427,7 @@ def api_senses_update():
             "date": datetime.datetime.utcnow()
         }
         # vallex.db["v2_sense_map"].update(key, data, upsert=True)
-        valdb[SENSEMAP_COLL].insert(data)
+        mongo.db.sensemap.insert(data)
     return "OK"
 
 # SENSES ----------------------------^
@@ -441,7 +436,7 @@ def api_senses_update():
 # APP PREFLIGHT ---------------------.
 def _is_banned(hw):
     banned = True
-    if hw in BANNED_HEADWORDS:
+    if hw in app.config["BANNED_HEADWORDS"]:
         banned = True
     elif hw in sskj_wordlist["wordlist"]:
         banned = False
@@ -449,17 +444,17 @@ def _is_banned(hw):
         banned = False
     return banned
 
-def prepare_app_index(appindex_json):
+def prepare_app_index(appindex_json, sskj_wordlist):
     log.info("[*] preparing app_index")
     # create app_index (used in frontend, left side word index)
-    tmp_app_index = {c: {} for c in CORPORA}
-    for corpus in CORPORA:
+    tmp_app_index = {c: {} for c in app.config["CORPORA"]}
+    for corpus in app.config["CORPORA"]:
         res_hws = {}
         res_fns = {}
 
-        nentries = valdb[corpus].count()
+        nentries = mongo.db[corpus].count()
         idx = 0
-        for e in valdb[corpus].find({}):
+        for e in mongo.db[corpus].find({}):
             if "headwords" not in e:
                 continue
             for hw in e["headwords"]:
@@ -504,6 +499,33 @@ def prepare_app_index(appindex_json):
 # APP PREFLIGHT ---------------------^
 
 
+def init_wsgi(app):
+    config = None
+    with Path("/project/prod_conf.yaml").open("r") as fp:
+        config = list(yaml.safe_load_all(fp))[0]
+
+    app.debug = False
+    logfile = config["logfile"]
+    if app.debug:
+        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
+    else:
+        logging.basicConfig(filename=logfile, level=logging.INFO)
+
+    # db login
+    # app index from db
+    with Path(config["appindex"]).open("r") as fp:
+        # a dirty hack but ok
+        app.config["app_index"] = json.load(fp)
+
+    # log.info("[*] Starting app.py with config:\n%s".format(config))
+    log.info("[*] Starting app.py with config:\n{}".format(config))
+
+
+# if we don't pass arguments, assume production environment (gunicorn)
+if len(sys.argv) == 1:
+    init_wsgi()
+
+
 if __name__ == "__main__":
     print("Starting app.py main()")
     aparser = argparse.ArgumentParser(description="Arguments for app.py")
@@ -516,7 +538,6 @@ if __name__ == "__main__":
     aparser.add_argument("--appindex-json", type=str)
     args = aparser.parse_args()
 
-    config = None
     with Path(args.config_file).open("r") as fp:
         config = list(yaml.safe_load_all(fp))[0]
 
@@ -527,28 +548,31 @@ if __name__ == "__main__":
     else:
         logging.basicConfig(filename=logfile, level=logging.INFO)
 
+    """
     # db login
     client = MongoClient(
         "mongodb://{}".format(args.dbaddr),
         username=args.dbuser,
         password=args.dbpass,
-        authSource="valdb",
+        authSource="mongo.db",
         authMechanism='SCRAM-SHA-1'
     )
-    valdb = client.valdb
+    valdb = client.mongo.db
+    """
 
     if args.prepare_db:
         with Path(args.sskj_wordlist).open("r") as fp:
             sskj_wordlist = json.load(fp)
-        prepare_app_index(args.appindex_json)
+        prepare_app_index(args.appindex_json, sskj_wordlist)
         sys.exit()
 
     # app index from db
     with Path(args.appindex_json).open("r") as fp:
-        app_index = json.load(fp)
+        app.config["app_index"] = json.load(fp)
 
     # log.info("[*] Starting app.py with config:\n%s".format(config))
     log.info("[*] Starting app.py with config:\n{}".format(config))
 
     app.run(host=str(config["host"]), port=int(config["port"]))
 
+
diff --git a/src/backend_flask/conf_files/dev_conf.yaml b/src/backend_flask/conf_files/dev_conf.yaml
index 1f21a14..f023ce2 100644
--- a/src/backend_flask/conf_files/dev_conf.yaml
+++ b/src/backend_flask/conf_files/dev_conf.yaml
@@ -4,3 +4,4 @@ port: 8084
 host: localhost
 logfile: "/var/log/valency_backend.log"
 ---
+
diff --git a/src/backend_flask/conf_files/prod_conf.yaml b/src/backend_flask/conf_files/prod_conf.yaml
index c5aaa24..754e6cb 100644
--- a/src/backend_flask/conf_files/prod_conf.yaml
+++ b/src/backend_flask/conf_files/prod_conf.yaml
@@ -1,6 +1,10 @@
 ---
-debug: True
 port: 8084
 host: 0.0.0.0
 logfile: "/var/log/valency_backend.log"
----
+appindex: /project/data/appindex.json
+
+# Same as in root Makefile
+dbaddr: 0.0.0.0:27017
+dbuser: valuser
+dbpass: valuserpass
diff --git a/src/backend_flask/db_config.py b/src/backend_flask/db_config.py
new file mode 100644
index 0000000..ccd4bce
--- /dev/null
+++ b/src/backend_flask/db_config.py
@@ -0,0 +1,2 @@
+MONGO_URI = "mongodb://valuser:valuserpass@127.0.0.1:27017/valdb"
+MONGO_AUTH_SOURCE = 'admin'
diff --git a/src/backend_flask/entrypoint.sh b/src/backend_flask/entrypoint.sh
new file mode 100755
index 0000000..b924e73
--- /dev/null
+++ b/src/backend_flask/entrypoint.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+pip3 install -e /project/src/pkg/cjvt-corpusparser/.
+pip3 install -e /project/src/pkg/valency/.
+pip3 install -e /project/src/pkg/seqparser/.
+
+sleep 10000
diff --git a/src/backend_flask/preprocess.py b/src/backend_flask/preprocess.py
deleted file mode 100644
index 4057411..0000000
--- a/src/backend_flask/preprocess.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Deprecated: headword creation moved to be part of corpusparser,
-# index creation moved to app.py as a preprocessing (with exit) step
-
-CORPORA = ["kres", "ssj"]
-
-if __name__ == "__main__":
-
-	valdb = None
-
-    def helper_tid_to_token(tid, tokens):
-        for t in tokens:
-            if t["tid"] == tid:
-                return t
-        return None
-
-    # update entries (add headwords and fuctors for indexing)
-    for corpus in CORPORA:
-        for e in valdb[corpus].find({}):
-            if e["srl_links"] is None:
-                e["headwords"] = []
-                e["functors"] = []
-            else:
-                hw_tids = list(set([x["from"] for x in e["srl_links"]]))
-                hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
-                headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
-                e["headwords"] = headwords
-
-                functors = list(set([x["afun"] for x in e["srl_links"]]))
-                e["functors"] = functors
-
-            valdb[corpus].save(e)
-
-        valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
-        valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
-
-    # create app_index (used in frontend, left side word index)
-    tmp_app_index = {c: {} for c in CORPORA}
-    for corpus in CORPORA:
-        res_hws = {}
-        res_fns = {}
-        for e in valdb[corpus].find({}):
-            if "headwords" not in e:
-                continue
-            for hw in e["headwords"]:
-                if hw in res_hws:
-                    res_hws[hw] += 1
-                else:
-                    res_hws[hw] = 1
-            if "functors" not in e:
-                continue
-            for fn in e["functors"]:
-                if fn in res_fns:
-                    res_fns[fn] += 1
-                else:
-                    res_fns[fn] = 1
-
-        alphabetical = {}
-        for k, e in res_hws.items():
-            fst = k[0].lower()
-            if fst in alphabetical:
-                alphabetical[fst].append((k, e))
-            else:
-                alphabetical[fst] = [(k, e)]
-
-        for k, e in alphabetical.items():
-            alphabetical[k] = sorted(e, key=lambda x: x[0]) 
-        tmp_app_index[corpus]["words"] = alphabetical
-
-        functors = [(k, e) for (k, e) in res_fns.items()]
-        functors = sorted(functors, key=lambda x: x[0])
-        tmp_app_index[corpus]["functors"] = functors
-
-    valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True)
\ No newline at end of file