prepared app for production (removed global variables, add flask-pymongo as db driver

This commit is contained in:
voje 2019-05-04 01:28:46 +02:00
parent 707034153c
commit 2ff339e24c
12 changed files with 140 additions and 134 deletions

18
Dockerfile-backend-flask Normal file
View File

@ -0,0 +1,18 @@
FROM cjvt-python-env
RUN pip3 install gunicorn
RUN mkdir -p /project/src/backend_flask
RUN mkdir -p /project/src/pkg
RUN mkdir -p /project/data
COPY src/backend_flask /project/src/backend_flask
COPY src/pkg /project/src/pkg
COPY data/appindex.json /project/data
COPY src/backend_flask/entrypoint.sh /.
COPY src/backend_flask/conf_files/prod_conf.yaml /project
ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]

View File

@ -124,12 +124,15 @@ backend-dev:
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \ --dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
--appindex-json $(APPINDEX_PATH) --appindex-json $(APPINDEX_PATH)
backend-prod: backend-prod-old:
cd ./src/backend_flask; python3 app.py \ cd ./src/backend_flask; python3 app.py \
--config-file ./conf_files/prod_conf.yaml \ --config-file ./conf_files/prod_conf.yaml \
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \ --dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
--appindex-json $(APPINDEX_PATH) --appindex-json $(APPINDEX_PATH)
backend-prod:
cd ./src/backend_flask; $(MAKE) prod
## add sskj senses to db (generated with pkg/seqparser) ## add sskj senses to db (generated with pkg/seqparser)
sskj-senses: sskj-senses:
python3 ./src/pkg/seqparser/seqparser/main.py \ python3 ./src/pkg/seqparser/seqparser/main.py \

View File

@ -1,26 +1,25 @@
FROM ubuntu:16.04 FROM ubuntu:18.04
RUN apt-get update --fix-missing RUN apt-get update --fix-missing
RUN apt-get install -y \ RUN apt-get install -y \
vim \ vim \
python3 \ python3 \
python3-pip \ python3-pip \
sshfs sshfs \
curl
RUN pip3 install --upgrade pip
RUN pip3 install \ RUN pip3 install \
lxml \ lxml \
pandas \ pandas \
sklearn \ sklearn \
argparse \ argparse \
pyyaml \
pathlib \ pathlib \
flask \
flask_cors \
pymongo \ pymongo \
flask flask-pymongo
RUN apt-get install -y \
curl
ENV PYTHONIOENCODING UTF-8 ENV PYTHONIOENCODING UTF-8
RUN pip3 install \
pyyaml \
flask_cors

View File

@ -1,4 +1,4 @@
IMAGE_NAME="cjvt-python-env" IMAGE_NAME="cjvt-python-env" # don't change, used in backend_flask/Makefile
CNNAME="python-env" CNNAME="python-env"
all: build run all: build run

View File

@ -0,0 +1,5 @@
#!/bin/bash
echo "testing entrypoint."
$(exit 1)
exit 0

View File

@ -0,0 +1,16 @@
IMG="backend-flask"
CNT="backend_flask"
clean:
- docker rm -f $(CNT)
run: clean build
docker run -d --net host --name $(CNT) $(IMG)
docker logs -f $(CNT)
build: build-cjvt-python-env
# docker build . -f ../../Dockerfile-backend-flask -t $(IMG)
cd ../..; docker build . -f Dockerfile-backend-flask -t $(IMG)
build-cjvt-python-env:
cd ../../dockerfiles/python-env; $(MAKE) build

View File

@ -26,26 +26,21 @@ from email.mime.text import MIMEText
from copy import deepcopy as DC from copy import deepcopy as DC
from pathlib import Path from pathlib import Path
from pymongo import MongoClient from pymongo import MongoClient
from flask_pymongo import PyMongo
import pymongo import pymongo
import argparse import argparse
# some db collections
USERS_COLL = "users"
TOKENS_COLL = "usertokens"
SENSES_COLL = "senses"
SENSEMAP_COLL = "sensemap"
# pre-generated data (gui leftside word index)
CORPORA = ["ssj", "kres"]
app_index = None
sskj_wordlist = None # used by _is_banned(hw)
BANNED_HEADWORDS = ["biti"]
QUERY_LIMIT = 1000 # Some headwords contain thousands of examples - not practical for the app
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
valdb = None
app = Flask(__name__) app = Flask(__name__)
app.config.from_object("db_config")
mongo = PyMongo(app)
app.config["CORPORA"] = ["ssj", "kres"]
app.config["BANNED_HEADWORDS"] = ["biti"]
app.config["QUERY_LIMIT"] = 1000
# when running vuejs via webpack # when running vuejs via webpack
# CORS(app) # CORS(app)
@ -60,7 +55,7 @@ CORS(app)
@app.route("/api/dev") @app.route("/api/dev")
def api_dev(): def api_dev():
print("DEV") print("DEV")
cur = valdb.kres.find({"headwords": "nagovarjati"}) cur = mongo.db.kres.find({"headwords": "nagovarjati"})
frames = [] frames = []
for ent in cur: for ent in cur:
frames += frames_from_db_entry(ent) frames += frames_from_db_entry(ent)
@ -73,12 +68,12 @@ def api_dev():
@app.route("/api/words/<corpus>") @app.route("/api/words/<corpus>")
def api_words(corpus): def api_words(corpus):
return json.dumps({ return json.dumps({
"sorted_words": app_index[corpus]["words"], # todo - make corpus as arg "sorted_words": app.config["app_index"][corpus]["words"], # todo - make corpus as arg
}) })
@app.route("/api/functors/<corpus>") @app.route("/api/functors/<corpus>")
def api_functors(corpus): def api_functors(corpus):
return json.dumps(app_index[corpus]["functors"]) return json.dumps(app.config["app_index"][corpus]["functors"])
# INDEX SELECTION -------------------^ # INDEX SELECTION -------------------^
@ -99,7 +94,7 @@ def api_register():
): ):
return "ERR" return "ERR"
email_hash = hashlib.sha256(email.encode("utf-8")).hexdigest() email_hash = hashlib.sha256(email.encode("utf-8")).hexdigest()
existing = list(valdb[USERS_COLL].find({ existing = list(mongo.db.users.find({
"$or": [{"username": username}, {"email": email_hash}] "$or": [{"username": username}, {"email": email_hash}]
})) }))
if len(existing) > 0: if len(existing) > 0:
@ -110,7 +105,7 @@ def api_register():
password.encode("utf-8")).hexdigest(), password.encode("utf-8")).hexdigest(),
"email": email_hash "email": email_hash
} }
valdb[USERS_COLL].insert(entry) mongo.db.users.insert(entry)
return "OK" return "OK"
@ -122,7 +117,7 @@ def api_login():
password = data["password"] password = data["password"]
hpass = hashlib.sha256(password.encode("utf-8")).hexdigest() hpass = hashlib.sha256(password.encode("utf-8")).hexdigest()
db_user = list(valdb[USERS_COLL].find({ db_user = list(mongo.db.users.find({
"username": username, "username": username,
"hpass": hpass "hpass": hpass
})) }))
@ -136,7 +131,7 @@ def api_login():
"date": datetime.datetime.utcnow(), "date": datetime.datetime.utcnow(),
"token": token "token": token
} }
valdb[TOKENS_COLL].update( mongo.db.usertokens.update(
{"username": token_entry["username"]}, {"username": token_entry["username"]},
token_entry, token_entry,
upsert=True upsert=True
@ -179,7 +174,7 @@ def api_new_pass():
username = data["username"] username = data["username"]
email = data["email"] email = data["email"]
hemail = hashlib.sha256(email.encode("utf-8")).hexdigest() hemail = hashlib.sha256(email.encode("utf-8")).hexdigest()
db_res = list(valdb[USERS_COLL].find({ db_res = list(mongo.db.users.find({
"username": username, "username": username,
"email": hemail "email": hemail
})) }))
@ -191,7 +186,7 @@ def api_new_pass():
string.ascii_letters + string.digits) for i in range(10)]) string.ascii_letters + string.digits) for i in range(10)])
# update locally # update locally
hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest() hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest()
valdb[USERS_COLL].update( mongo.db.users.update(
{ {
"username": username, "username": username,
"email": hemail "email": hemail
@ -209,12 +204,12 @@ def token_to_username(token):
key = { key = {
"token": token "token": token
} }
res = list(valdb[TOKENS_COLL].find(key)) res = list(mongo.db.usertokens.find(key))
if len(res) != 1: if len(res) != 1:
return None return None
username = res[0]["username"] username = res[0]["username"]
# update deletion interval # update deletion interval
valdb[TOKENS_COLL].update( mongo.db.usertokens.update(
key, {"$set": {"date": datetime.datetime.utcnow()}}) key, {"$set": {"date": datetime.datetime.utcnow()}})
return username return username
@ -249,19 +244,19 @@ def api_get_frames():
RF = reduce_functions[rf_name]["f"] RF = reduce_functions[rf_name]["f"]
corpus = request.args.get("cor") corpus = request.args.get("cor")
if corpus not in CORPORA: if corpus not in app.config["CORPORA"]:
return json.dumps({"error": "cor={kres,ssj}"}) return json.dumps({"error": "cor={kres,ssj}"})
cur = valdb[corpus].find({"headwords": hw}) cur = mongo.db[corpus].find({"headwords": hw})
frames = [] frames = []
for ent in cur[:QUERY_LIMIT]: for ent in cur[:app.config["QUERY_LIMIT"]]:
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
cur.close() cur.close()
# filter by relevant hw # filter by relevant hw
frames = [x for x in frames if x.hw == hw] frames = [x for x in frames if x.hw == hw]
ret_frames = RF(frames, valdb[SENSEMAP_COLL]) ret_frames = RF(frames, mongo.db.sensemap)
json_ret = {"frames": []} json_ret = {"frames": []}
for frame in ret_frames: for frame in ret_frames:
@ -302,12 +297,12 @@ def api_get_functor_frames():
RF = reduce_functions[rf_name]["f"] RF = reduce_functions[rf_name]["f"]
corpus = request.args.get("cor") corpus = request.args.get("cor")
if corpus not in CORPORA: if corpus not in app.config["CORPORA"]:
return json.dumps({"error": "cor={kres,ssj}"}) return json.dumps({"error": "cor={kres,ssj}"})
cur = valdb[corpus].find({"functors": functor}) cur = mongo.db[corpus].find({"functors": functor})
frames = [] frames = []
for ent in cur[:QUERY_LIMIT]: for ent in cur[:app.config["QUERY_LIMIT"]]:
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
cur.close() cur.close()
@ -315,7 +310,7 @@ def api_get_functor_frames():
frames = [x for x in frames if functor in x.get_functors()] frames = [x for x in frames if functor in x.get_functors()]
# raw_frames = vallex.functors_index[functor] # TODO # raw_frames = vallex.functors_index[functor] # TODO
ret_frames = RF(frames, valdb[SENSEMAP_COLL]) ret_frames = RF(frames, mongo.db.sensemap)
ret_frames = _aggregate_by_hw(ret_frames) ret_frames = _aggregate_by_hw(ret_frames)
json_ret = {"frames": []} json_ret = {"frames": []}
@ -334,10 +329,10 @@ def api_get_functor_frames():
def api_senses_get(): def api_senses_get():
# returns senses and mapping for hw # returns senses and mapping for hw
hw = request.args.get("hw") hw = request.args.get("hw")
senses = list(valdb[SENSES_COLL].find({ senses = list(mongo.db.senses.find({
"hw": hw "hw": hw
})) }))
sense_map_query = list(valdb[SENSEMAP_COLL].find({ sense_map_query = list(mongo.db.sensemap.find({
"hw": hw "hw": hw
})) }))
# aggregation by max date possible on DB side # aggregation by max date possible on DB side
@ -417,7 +412,7 @@ def api_senses_update():
print(ns) print(ns)
# insert into db # insert into db
valdb[SENSES_COLL].insert(ns) mongo.db.senses.insert(ns)
# replace tmp_id with mongo's _id # replace tmp_id with mongo's _id
for ssj_id, el in sense_map.items(): for ssj_id, el in sense_map.items():
@ -432,7 +427,7 @@ def api_senses_update():
"date": datetime.datetime.utcnow() "date": datetime.datetime.utcnow()
} }
# vallex.db["v2_sense_map"].update(key, data, upsert=True) # vallex.db["v2_sense_map"].update(key, data, upsert=True)
valdb[SENSEMAP_COLL].insert(data) mongo.db.sensemap.insert(data)
return "OK" return "OK"
# SENSES ----------------------------^ # SENSES ----------------------------^
@ -441,7 +436,7 @@ def api_senses_update():
# APP PREFLIGHT ---------------------. # APP PREFLIGHT ---------------------.
def _is_banned(hw): def _is_banned(hw):
banned = True banned = True
if hw in BANNED_HEADWORDS: if hw in app.config["BANNED_HEADWORDS"]:
banned = True banned = True
elif hw in sskj_wordlist["wordlist"]: elif hw in sskj_wordlist["wordlist"]:
banned = False banned = False
@ -449,17 +444,17 @@ def _is_banned(hw):
banned = False banned = False
return banned return banned
def prepare_app_index(appindex_json): def prepare_app_index(appindex_json, sskj_wordlist):
log.info("[*] preparing app_index") log.info("[*] preparing app_index")
# create app_index (used in frontend, left side word index) # create app_index (used in frontend, left side word index)
tmp_app_index = {c: {} for c in CORPORA} tmp_app_index = {c: {} for c in app.config["CORPORA"]}
for corpus in CORPORA: for corpus in app.config["CORPORA"]:
res_hws = {} res_hws = {}
res_fns = {} res_fns = {}
nentries = valdb[corpus].count() nentries = mongo.db[corpus].count()
idx = 0 idx = 0
for e in valdb[corpus].find({}): for e in mongo.db[corpus].find({}):
if "headwords" not in e: if "headwords" not in e:
continue continue
for hw in e["headwords"]: for hw in e["headwords"]:
@ -504,6 +499,33 @@ def prepare_app_index(appindex_json):
# APP PREFLIGHT ---------------------^ # APP PREFLIGHT ---------------------^
def init_wsgi(app):
config = None
with Path("/project/prod_conf.yaml").open("r") as fp:
config = list(yaml.safe_load_all(fp))[0]
app.debug = False
logfile = config["logfile"]
if app.debug:
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
else:
logging.basicConfig(filename=logfile, level=logging.INFO)
# db login
# app index from db
with Path(config["appindex"]).open("r") as fp:
# a dirty hack but ok
app.config["app_index"] = json.load(fp)
# log.info("[*] Starting app.py with config:\n%s".format(config))
log.info("[*] Starting app.py with config:\n{}".format(config))
# if we don't pass arguments, assume production environment (gunicorn)
if len(sys.argv) == 1:
init_wsgi()
if __name__ == "__main__": if __name__ == "__main__":
print("Starting app.py main()") print("Starting app.py main()")
aparser = argparse.ArgumentParser(description="Arguments for app.py") aparser = argparse.ArgumentParser(description="Arguments for app.py")
@ -516,7 +538,6 @@ if __name__ == "__main__":
aparser.add_argument("--appindex-json", type=str) aparser.add_argument("--appindex-json", type=str)
args = aparser.parse_args() args = aparser.parse_args()
config = None
with Path(args.config_file).open("r") as fp: with Path(args.config_file).open("r") as fp:
config = list(yaml.safe_load_all(fp))[0] config = list(yaml.safe_load_all(fp))[0]
@ -527,28 +548,31 @@ if __name__ == "__main__":
else: else:
logging.basicConfig(filename=logfile, level=logging.INFO) logging.basicConfig(filename=logfile, level=logging.INFO)
"""
# db login # db login
client = MongoClient( client = MongoClient(
"mongodb://{}".format(args.dbaddr), "mongodb://{}".format(args.dbaddr),
username=args.dbuser, username=args.dbuser,
password=args.dbpass, password=args.dbpass,
authSource="valdb", authSource="mongo.db",
authMechanism='SCRAM-SHA-1' authMechanism='SCRAM-SHA-1'
) )
valdb = client.valdb valdb = client.mongo.db
"""
if args.prepare_db: if args.prepare_db:
with Path(args.sskj_wordlist).open("r") as fp: with Path(args.sskj_wordlist).open("r") as fp:
sskj_wordlist = json.load(fp) sskj_wordlist = json.load(fp)
prepare_app_index(args.appindex_json) prepare_app_index(args.appindex_json, sskj_wordlist)
sys.exit() sys.exit()
# app index from db # app index from db
with Path(args.appindex_json).open("r") as fp: with Path(args.appindex_json).open("r") as fp:
app_index = json.load(fp) app.config["app_index"] = json.load(fp)
# log.info("[*] Starting app.py with config:\n%s".format(config)) # log.info("[*] Starting app.py with config:\n%s".format(config))
log.info("[*] Starting app.py with config:\n{}".format(config)) log.info("[*] Starting app.py with config:\n{}".format(config))
app.run(host=str(config["host"]), port=int(config["port"])) app.run(host=str(config["host"]), port=int(config["port"]))

View File

@ -4,3 +4,4 @@ port: 8084
host: localhost host: localhost
logfile: "/var/log/valency_backend.log" logfile: "/var/log/valency_backend.log"
--- ---

View File

@ -1,6 +1,10 @@
--- ---
debug: True
port: 8084 port: 8084
host: 0.0.0.0 host: 0.0.0.0
logfile: "/var/log/valency_backend.log" logfile: "/var/log/valency_backend.log"
--- appindex: /project/data/appindex.json
# Same as in root Makefile
dbaddr: 0.0.0.0:27017
dbuser: valuser
dbpass: valuserpass

View File

@ -0,0 +1,2 @@
MONGO_URI = "mongodb://valuser:valuserpass@127.0.0.1:27017/valdb"
MONGO_AUTH_SOURCE = 'admin'

View File

@ -0,0 +1,7 @@
#!/bin/bash
pip3 install -e /project/src/pkg/cjvt-corpusparser/.
pip3 install -e /project/src/pkg/valency/.
pip3 install -e /project/src/pkg/seqparser/.
sleep 10000

View File

@ -1,73 +0,0 @@
# Deprecated: headword creation moved to be part of corpusparser,
# index creation moved to app.py as a preprocessing (with exit) step
CORPORA = ["kres", "ssj"]
if __name__ == "__main__":
valdb = None
def helper_tid_to_token(tid, tokens):
for t in tokens:
if t["tid"] == tid:
return t
return None
# update entries (add headwords and fuctors for indexing)
for corpus in CORPORA:
for e in valdb[corpus].find({}):
if e["srl_links"] is None:
e["headwords"] = []
e["functors"] = []
else:
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
e["headwords"] = headwords
functors = list(set([x["afun"] for x in e["srl_links"]]))
e["functors"] = functors
valdb[corpus].save(e)
valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
# create app_index (used in frontend, left side word index)
tmp_app_index = {c: {} for c in CORPORA}
for corpus in CORPORA:
res_hws = {}
res_fns = {}
for e in valdb[corpus].find({}):
if "headwords" not in e:
continue
for hw in e["headwords"]:
if hw in res_hws:
res_hws[hw] += 1
else:
res_hws[hw] = 1
if "functors" not in e:
continue
for fn in e["functors"]:
if fn in res_fns:
res_fns[fn] += 1
else:
res_fns[fn] = 1
alphabetical = {}
for k, e in res_hws.items():
fst = k[0].lower()
if fst in alphabetical:
alphabetical[fst].append((k, e))
else:
alphabetical[fst] = [(k, e)]
for k, e in alphabetical.items():
alphabetical[k] = sorted(e, key=lambda x: x[0])
tmp_app_index[corpus]["words"] = alphabetical
functors = [(k, e) for (k, e) in res_fns.items()]
functors = sorted(functors, key=lambda x: x[0])
tmp_app_index[corpus]["functors"] = functors
valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True)