prepared app for production (removed global variables, add flask-pymongo as db driver

This commit is contained in:
voje 2019-05-04 01:28:46 +02:00
parent 707034153c
commit 2ff339e24c
12 changed files with 140 additions and 134 deletions

18
Dockerfile-backend-flask Normal file
View File

@ -0,0 +1,18 @@
FROM cjvt-python-env
RUN pip3 install gunicorn
RUN mkdir -p /project/src/backend_flask
RUN mkdir -p /project/src/pkg
RUN mkdir -p /project/data
COPY src/backend_flask /project/src/backend_flask
COPY src/pkg /project/src/pkg
COPY data/appindex.json /project/data
COPY src/backend_flask/entrypoint.sh /.
COPY src/backend_flask/conf_files/prod_conf.yaml /project
ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]

View File

@ -124,12 +124,15 @@ backend-dev:
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
--appindex-json $(APPINDEX_PATH)
backend-prod:
backend-prod-old:
cd ./src/backend_flask; python3 app.py \
--config-file ./conf_files/prod_conf.yaml \
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
--appindex-json $(APPINDEX_PATH)
backend-prod:
cd ./src/backend_flask; $(MAKE) prod
## add sskj senses to db (generated with pkg/seqparser)
sskj-senses:
python3 ./src/pkg/seqparser/seqparser/main.py \

View File

@ -1,26 +1,25 @@
FROM ubuntu:16.04
FROM ubuntu:18.04
RUN apt-get update --fix-missing
RUN apt-get install -y \
vim \
python3 \
python3-pip \
sshfs
sshfs \
curl
RUN pip3 install --upgrade pip
RUN pip3 install \
lxml \
pandas \
sklearn \
argparse \
pyyaml \
pathlib \
flask \
flask_cors \
pymongo \
flask
RUN apt-get install -y \
curl
flask-pymongo
ENV PYTHONIOENCODING UTF-8
RUN pip3 install \
pyyaml \
flask_cors

View File

@ -1,4 +1,4 @@
IMAGE_NAME="cjvt-python-env"
IMAGE_NAME="cjvt-python-env" # don't change, used in backend_flask/Makefile
CNNAME="python-env"
all: build run

View File

@ -0,0 +1,5 @@
#!/bin/bash
echo "testing entrypoint."
$(exit 1)
exit 0

View File

@ -0,0 +1,16 @@
IMG="backend-flask"
CNT="backend_flask"
clean:
- docker rm -f $(CNT)
run: clean build
docker run -d --net host --name $(CNT) $(IMG)
docker logs -f $(CNT)
build: build-cjvt-python-env
# docker build . -f ../../Dockerfile-backend-flask -t $(IMG)
cd ../..; docker build . -f Dockerfile-backend-flask -t $(IMG)
build-cjvt-python-env:
cd ../../dockerfiles/python-env; $(MAKE) build

View File

@ -26,26 +26,21 @@ from email.mime.text import MIMEText
from copy import deepcopy as DC
from pathlib import Path
from pymongo import MongoClient
from flask_pymongo import PyMongo
import pymongo
import argparse
# some db collections
USERS_COLL = "users"
TOKENS_COLL = "usertokens"
SENSES_COLL = "senses"
SENSEMAP_COLL = "sensemap"
# pre-generated data (gui leftside word index)
CORPORA = ["ssj", "kres"]
app_index = None
sskj_wordlist = None # used by _is_banned(hw)
BANNED_HEADWORDS = ["biti"]
QUERY_LIMIT = 1000 # Some headwords contain thousands of examples - not practical for the app
log = logging.getLogger(__name__)
valdb = None
app = Flask(__name__)
app.config.from_object("db_config")
mongo = PyMongo(app)
app.config["CORPORA"] = ["ssj", "kres"]
app.config["BANNED_HEADWORDS"] = ["biti"]
app.config["QUERY_LIMIT"] = 1000
# when running vuejs via webpack
# CORS(app)
@ -60,7 +55,7 @@ CORS(app)
@app.route("/api/dev")
def api_dev():
print("DEV")
cur = valdb.kres.find({"headwords": "nagovarjati"})
cur = mongo.db.kres.find({"headwords": "nagovarjati"})
frames = []
for ent in cur:
frames += frames_from_db_entry(ent)
@ -73,12 +68,12 @@ def api_dev():
@app.route("/api/words/<corpus>")
def api_words(corpus):
return json.dumps({
"sorted_words": app_index[corpus]["words"], # todo - make corpus as arg
"sorted_words": app.config["app_index"][corpus]["words"], # todo - make corpus as arg
})
@app.route("/api/functors/<corpus>")
def api_functors(corpus):
return json.dumps(app_index[corpus]["functors"])
return json.dumps(app.config["app_index"][corpus]["functors"])
# INDEX SELECTION -------------------^
@ -99,7 +94,7 @@ def api_register():
):
return "ERR"
email_hash = hashlib.sha256(email.encode("utf-8")).hexdigest()
existing = list(valdb[USERS_COLL].find({
existing = list(mongo.db.users.find({
"$or": [{"username": username}, {"email": email_hash}]
}))
if len(existing) > 0:
@ -110,7 +105,7 @@ def api_register():
password.encode("utf-8")).hexdigest(),
"email": email_hash
}
valdb[USERS_COLL].insert(entry)
mongo.db.users.insert(entry)
return "OK"
@ -122,7 +117,7 @@ def api_login():
password = data["password"]
hpass = hashlib.sha256(password.encode("utf-8")).hexdigest()
db_user = list(valdb[USERS_COLL].find({
db_user = list(mongo.db.users.find({
"username": username,
"hpass": hpass
}))
@ -136,7 +131,7 @@ def api_login():
"date": datetime.datetime.utcnow(),
"token": token
}
valdb[TOKENS_COLL].update(
mongo.db.usertokens.update(
{"username": token_entry["username"]},
token_entry,
upsert=True
@ -179,7 +174,7 @@ def api_new_pass():
username = data["username"]
email = data["email"]
hemail = hashlib.sha256(email.encode("utf-8")).hexdigest()
db_res = list(valdb[USERS_COLL].find({
db_res = list(mongo.db.users.find({
"username": username,
"email": hemail
}))
@ -191,7 +186,7 @@ def api_new_pass():
string.ascii_letters + string.digits) for i in range(10)])
# update locally
hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest()
valdb[USERS_COLL].update(
mongo.db.users.update(
{
"username": username,
"email": hemail
@ -209,12 +204,12 @@ def token_to_username(token):
key = {
"token": token
}
res = list(valdb[TOKENS_COLL].find(key))
res = list(mongo.db.usertokens.find(key))
if len(res) != 1:
return None
username = res[0]["username"]
# update deletion interval
valdb[TOKENS_COLL].update(
mongo.db.usertokens.update(
key, {"$set": {"date": datetime.datetime.utcnow()}})
return username
@ -249,19 +244,19 @@ def api_get_frames():
RF = reduce_functions[rf_name]["f"]
corpus = request.args.get("cor")
if corpus not in CORPORA:
if corpus not in app.config["CORPORA"]:
return json.dumps({"error": "cor={kres,ssj}"})
cur = valdb[corpus].find({"headwords": hw})
cur = mongo.db[corpus].find({"headwords": hw})
frames = []
for ent in cur[:QUERY_LIMIT]:
for ent in cur[:app.config["QUERY_LIMIT"]]:
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
cur.close()
# filter by relevant hw
frames = [x for x in frames if x.hw == hw]
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
ret_frames = RF(frames, mongo.db.sensemap)
json_ret = {"frames": []}
for frame in ret_frames:
@ -302,12 +297,12 @@ def api_get_functor_frames():
RF = reduce_functions[rf_name]["f"]
corpus = request.args.get("cor")
if corpus not in CORPORA:
if corpus not in app.config["CORPORA"]:
return json.dumps({"error": "cor={kres,ssj}"})
cur = valdb[corpus].find({"functors": functor})
cur = mongo.db[corpus].find({"functors": functor})
frames = []
for ent in cur[:QUERY_LIMIT]:
for ent in cur[:app.config["QUERY_LIMIT"]]:
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
cur.close()
@ -315,7 +310,7 @@ def api_get_functor_frames():
frames = [x for x in frames if functor in x.get_functors()]
# raw_frames = vallex.functors_index[functor] # TODO
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
ret_frames = RF(frames, mongo.db.sensemap)
ret_frames = _aggregate_by_hw(ret_frames)
json_ret = {"frames": []}
@ -334,10 +329,10 @@ def api_get_functor_frames():
def api_senses_get():
# returns senses and mapping for hw
hw = request.args.get("hw")
senses = list(valdb[SENSES_COLL].find({
senses = list(mongo.db.senses.find({
"hw": hw
}))
sense_map_query = list(valdb[SENSEMAP_COLL].find({
sense_map_query = list(mongo.db.sensemap.find({
"hw": hw
}))
# aggregation by max date possible on DB side
@ -417,7 +412,7 @@ def api_senses_update():
print(ns)
# insert into db
valdb[SENSES_COLL].insert(ns)
mongo.db.senses.insert(ns)
# replace tmp_id with mongo's _id
for ssj_id, el in sense_map.items():
@ -432,7 +427,7 @@ def api_senses_update():
"date": datetime.datetime.utcnow()
}
# vallex.db["v2_sense_map"].update(key, data, upsert=True)
valdb[SENSEMAP_COLL].insert(data)
mongo.db.sensemap.insert(data)
return "OK"
# SENSES ----------------------------^
@ -441,7 +436,7 @@ def api_senses_update():
# APP PREFLIGHT ---------------------.
def _is_banned(hw):
banned = True
if hw in BANNED_HEADWORDS:
if hw in app.config["BANNED_HEADWORDS"]:
banned = True
elif hw in sskj_wordlist["wordlist"]:
banned = False
@ -449,17 +444,17 @@ def _is_banned(hw):
banned = False
return banned
def prepare_app_index(appindex_json):
def prepare_app_index(appindex_json, sskj_wordlist):
log.info("[*] preparing app_index")
# create app_index (used in frontend, left side word index)
tmp_app_index = {c: {} for c in CORPORA}
for corpus in CORPORA:
tmp_app_index = {c: {} for c in app.config["CORPORA"]}
for corpus in app.config["CORPORA"]:
res_hws = {}
res_fns = {}
nentries = valdb[corpus].count()
nentries = mongo.db[corpus].count()
idx = 0
for e in valdb[corpus].find({}):
for e in mongo.db[corpus].find({}):
if "headwords" not in e:
continue
for hw in e["headwords"]:
@ -504,6 +499,33 @@ def prepare_app_index(appindex_json):
# APP PREFLIGHT ---------------------^
def init_wsgi(app):
config = None
with Path("/project/prod_conf.yaml").open("r") as fp:
config = list(yaml.safe_load_all(fp))[0]
app.debug = False
logfile = config["logfile"]
if app.debug:
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
else:
logging.basicConfig(filename=logfile, level=logging.INFO)
# db login
# app index from db
with Path(config["appindex"]).open("r") as fp:
# a dirty hack but ok
app.config["app_index"] = json.load(fp)
# log.info("[*] Starting app.py with config:\n%s".format(config))
log.info("[*] Starting app.py with config:\n{}".format(config))
# if we don't pass arguments, assume production environment (gunicorn)
if len(sys.argv) == 1:
init_wsgi()
if __name__ == "__main__":
print("Starting app.py main()")
aparser = argparse.ArgumentParser(description="Arguments for app.py")
@ -516,7 +538,6 @@ if __name__ == "__main__":
aparser.add_argument("--appindex-json", type=str)
args = aparser.parse_args()
config = None
with Path(args.config_file).open("r") as fp:
config = list(yaml.safe_load_all(fp))[0]
@ -527,28 +548,31 @@ if __name__ == "__main__":
else:
logging.basicConfig(filename=logfile, level=logging.INFO)
"""
# db login
client = MongoClient(
"mongodb://{}".format(args.dbaddr),
username=args.dbuser,
password=args.dbpass,
authSource="valdb",
authSource="mongo.db",
authMechanism='SCRAM-SHA-1'
)
valdb = client.valdb
valdb = client.mongo.db
"""
if args.prepare_db:
with Path(args.sskj_wordlist).open("r") as fp:
sskj_wordlist = json.load(fp)
prepare_app_index(args.appindex_json)
prepare_app_index(args.appindex_json, sskj_wordlist)
sys.exit()
# app index from db
with Path(args.appindex_json).open("r") as fp:
app_index = json.load(fp)
app.config["app_index"] = json.load(fp)
# log.info("[*] Starting app.py with config:\n%s".format(config))
log.info("[*] Starting app.py with config:\n{}".format(config))
app.run(host=str(config["host"]), port=int(config["port"]))

View File

@ -4,3 +4,4 @@ port: 8084
host: localhost
logfile: "/var/log/valency_backend.log"
---

View File

@ -1,6 +1,10 @@
---
debug: True
port: 8084
host: 0.0.0.0
logfile: "/var/log/valency_backend.log"
---
appindex: /project/data/appindex.json
# Same as in root Makefile
dbaddr: 0.0.0.0:27017
dbuser: valuser
dbpass: valuserpass

View File

@ -0,0 +1,2 @@
MONGO_URI = "mongodb://valuser:valuserpass@127.0.0.1:27017/valdb"
MONGO_AUTH_SOURCE = 'admin'

View File

@ -0,0 +1,7 @@
#!/bin/bash
pip3 install -e /project/src/pkg/cjvt-corpusparser/.
pip3 install -e /project/src/pkg/valency/.
pip3 install -e /project/src/pkg/seqparser/.
sleep 10000

View File

@ -1,73 +0,0 @@
# Deprecated: headword creation moved to be part of corpusparser,
# index creation moved to app.py as a preprocessing (with exit) step
CORPORA = ["kres", "ssj"]
if __name__ == "__main__":
valdb = None
def helper_tid_to_token(tid, tokens):
for t in tokens:
if t["tid"] == tid:
return t
return None
# update entries (add headwords and fuctors for indexing)
for corpus in CORPORA:
for e in valdb[corpus].find({}):
if e["srl_links"] is None:
e["headwords"] = []
e["functors"] = []
else:
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
e["headwords"] = headwords
functors = list(set([x["afun"] for x in e["srl_links"]]))
e["functors"] = functors
valdb[corpus].save(e)
valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
# create app_index (used in frontend, left side word index)
tmp_app_index = {c: {} for c in CORPORA}
for corpus in CORPORA:
res_hws = {}
res_fns = {}
for e in valdb[corpus].find({}):
if "headwords" not in e:
continue
for hw in e["headwords"]:
if hw in res_hws:
res_hws[hw] += 1
else:
res_hws[hw] = 1
if "functors" not in e:
continue
for fn in e["functors"]:
if fn in res_fns:
res_fns[fn] += 1
else:
res_fns[fn] = 1
alphabetical = {}
for k, e in res_hws.items():
fst = k[0].lower()
if fst in alphabetical:
alphabetical[fst].append((k, e))
else:
alphabetical[fst] = [(k, e)]
for k, e in alphabetical.items():
alphabetical[k] = sorted(e, key=lambda x: x[0])
tmp_app_index[corpus]["words"] = alphabetical
functors = [(k, e) for (k, e) in res_fns.items()]
functors = sorted(functors, key=lambda x: x[0])
tmp_app_index[corpus]["functors"] = functors
valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True)