forked from kristjan/cjvt-valency
prepared app for production (removed global variables, add flask-pymongo as db driver
This commit is contained in:
parent
707034153c
commit
2ff339e24c
18
Dockerfile-backend-flask
Normal file
18
Dockerfile-backend-flask
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
FROM cjvt-python-env
|
||||||
|
|
||||||
|
RUN pip3 install gunicorn
|
||||||
|
|
||||||
|
RUN mkdir -p /project/src/backend_flask
|
||||||
|
RUN mkdir -p /project/src/pkg
|
||||||
|
RUN mkdir -p /project/data
|
||||||
|
|
||||||
|
COPY src/backend_flask /project/src/backend_flask
|
||||||
|
COPY src/pkg /project/src/pkg
|
||||||
|
|
||||||
|
COPY data/appindex.json /project/data
|
||||||
|
|
||||||
|
COPY src/backend_flask/entrypoint.sh /.
|
||||||
|
|
||||||
|
COPY src/backend_flask/conf_files/prod_conf.yaml /project
|
||||||
|
|
||||||
|
ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
|
5
Makefile
5
Makefile
|
@ -124,12 +124,15 @@ backend-dev:
|
||||||
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
|
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
|
||||||
--appindex-json $(APPINDEX_PATH)
|
--appindex-json $(APPINDEX_PATH)
|
||||||
|
|
||||||
backend-prod:
|
backend-prod-old:
|
||||||
cd ./src/backend_flask; python3 app.py \
|
cd ./src/backend_flask; python3 app.py \
|
||||||
--config-file ./conf_files/prod_conf.yaml \
|
--config-file ./conf_files/prod_conf.yaml \
|
||||||
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
|
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
|
||||||
--appindex-json $(APPINDEX_PATH)
|
--appindex-json $(APPINDEX_PATH)
|
||||||
|
|
||||||
|
backend-prod:
|
||||||
|
cd ./src/backend_flask; $(MAKE) prod
|
||||||
|
|
||||||
## add sskj senses to db (generated with pkg/seqparser)
|
## add sskj senses to db (generated with pkg/seqparser)
|
||||||
sskj-senses:
|
sskj-senses:
|
||||||
python3 ./src/pkg/seqparser/seqparser/main.py \
|
python3 ./src/pkg/seqparser/seqparser/main.py \
|
||||||
|
|
|
@ -1,26 +1,25 @@
|
||||||
FROM ubuntu:16.04
|
FROM ubuntu:18.04
|
||||||
|
|
||||||
RUN apt-get update --fix-missing
|
RUN apt-get update --fix-missing
|
||||||
RUN apt-get install -y \
|
RUN apt-get install -y \
|
||||||
vim \
|
vim \
|
||||||
python3 \
|
python3 \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
sshfs
|
sshfs \
|
||||||
|
curl
|
||||||
|
|
||||||
|
RUN pip3 install --upgrade pip
|
||||||
|
|
||||||
RUN pip3 install \
|
RUN pip3 install \
|
||||||
lxml \
|
lxml \
|
||||||
pandas \
|
pandas \
|
||||||
sklearn \
|
sklearn \
|
||||||
argparse \
|
argparse \
|
||||||
|
pyyaml \
|
||||||
pathlib \
|
pathlib \
|
||||||
|
flask \
|
||||||
|
flask_cors \
|
||||||
pymongo \
|
pymongo \
|
||||||
flask
|
flask-pymongo
|
||||||
|
|
||||||
RUN apt-get install -y \
|
|
||||||
curl
|
|
||||||
|
|
||||||
ENV PYTHONIOENCODING UTF-8
|
ENV PYTHONIOENCODING UTF-8
|
||||||
|
|
||||||
RUN pip3 install \
|
|
||||||
pyyaml \
|
|
||||||
flask_cors
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
IMAGE_NAME="cjvt-python-env"
|
IMAGE_NAME="cjvt-python-env" # don't change, used in backend_flask/Makefile
|
||||||
CNNAME="python-env"
|
CNNAME="python-env"
|
||||||
|
|
||||||
all: build run
|
all: build run
|
||||||
|
|
5
dockerfiles/python-env/entrypoint.sh
Executable file
5
dockerfiles/python-env/entrypoint.sh
Executable file
|
@ -0,0 +1,5 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
echo "testing entrypoint."
|
||||||
|
$(exit 1)
|
||||||
|
exit 0
|
16
src/backend_flask/Makefile
Normal file
16
src/backend_flask/Makefile
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
IMG="backend-flask"
|
||||||
|
CNT="backend_flask"
|
||||||
|
|
||||||
|
clean:
|
||||||
|
- docker rm -f $(CNT)
|
||||||
|
|
||||||
|
run: clean build
|
||||||
|
docker run -d --net host --name $(CNT) $(IMG)
|
||||||
|
docker logs -f $(CNT)
|
||||||
|
|
||||||
|
build: build-cjvt-python-env
|
||||||
|
# docker build . -f ../../Dockerfile-backend-flask -t $(IMG)
|
||||||
|
cd ../..; docker build . -f Dockerfile-backend-flask -t $(IMG)
|
||||||
|
|
||||||
|
build-cjvt-python-env:
|
||||||
|
cd ../../dockerfiles/python-env; $(MAKE) build
|
|
@ -26,26 +26,21 @@ from email.mime.text import MIMEText
|
||||||
from copy import deepcopy as DC
|
from copy import deepcopy as DC
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pymongo import MongoClient
|
from pymongo import MongoClient
|
||||||
|
from flask_pymongo import PyMongo
|
||||||
import pymongo
|
import pymongo
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
# some db collections
|
|
||||||
USERS_COLL = "users"
|
|
||||||
TOKENS_COLL = "usertokens"
|
|
||||||
SENSES_COLL = "senses"
|
|
||||||
SENSEMAP_COLL = "sensemap"
|
|
||||||
|
|
||||||
# pre-generated data (gui leftside word index)
|
|
||||||
CORPORA = ["ssj", "kres"]
|
|
||||||
app_index = None
|
|
||||||
sskj_wordlist = None # used by _is_banned(hw)
|
|
||||||
BANNED_HEADWORDS = ["biti"]
|
|
||||||
QUERY_LIMIT = 1000 # Some headwords contain thousands of examples - not practical for the app
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
valdb = None
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
app.config.from_object("db_config")
|
||||||
|
mongo = PyMongo(app)
|
||||||
|
|
||||||
|
app.config["CORPORA"] = ["ssj", "kres"]
|
||||||
|
app.config["BANNED_HEADWORDS"] = ["biti"]
|
||||||
|
app.config["QUERY_LIMIT"] = 1000
|
||||||
|
|
||||||
|
|
||||||
# when running vuejs via webpack
|
# when running vuejs via webpack
|
||||||
# CORS(app)
|
# CORS(app)
|
||||||
|
@ -60,7 +55,7 @@ CORS(app)
|
||||||
@app.route("/api/dev")
|
@app.route("/api/dev")
|
||||||
def api_dev():
|
def api_dev():
|
||||||
print("DEV")
|
print("DEV")
|
||||||
cur = valdb.kres.find({"headwords": "nagovarjati"})
|
cur = mongo.db.kres.find({"headwords": "nagovarjati"})
|
||||||
frames = []
|
frames = []
|
||||||
for ent in cur:
|
for ent in cur:
|
||||||
frames += frames_from_db_entry(ent)
|
frames += frames_from_db_entry(ent)
|
||||||
|
@ -73,12 +68,12 @@ def api_dev():
|
||||||
@app.route("/api/words/<corpus>")
|
@app.route("/api/words/<corpus>")
|
||||||
def api_words(corpus):
|
def api_words(corpus):
|
||||||
return json.dumps({
|
return json.dumps({
|
||||||
"sorted_words": app_index[corpus]["words"], # todo - make corpus as arg
|
"sorted_words": app.config["app_index"][corpus]["words"], # todo - make corpus as arg
|
||||||
})
|
})
|
||||||
|
|
||||||
@app.route("/api/functors/<corpus>")
|
@app.route("/api/functors/<corpus>")
|
||||||
def api_functors(corpus):
|
def api_functors(corpus):
|
||||||
return json.dumps(app_index[corpus]["functors"])
|
return json.dumps(app.config["app_index"][corpus]["functors"])
|
||||||
|
|
||||||
# INDEX SELECTION -------------------^
|
# INDEX SELECTION -------------------^
|
||||||
|
|
||||||
|
@ -99,7 +94,7 @@ def api_register():
|
||||||
):
|
):
|
||||||
return "ERR"
|
return "ERR"
|
||||||
email_hash = hashlib.sha256(email.encode("utf-8")).hexdigest()
|
email_hash = hashlib.sha256(email.encode("utf-8")).hexdigest()
|
||||||
existing = list(valdb[USERS_COLL].find({
|
existing = list(mongo.db.users.find({
|
||||||
"$or": [{"username": username}, {"email": email_hash}]
|
"$or": [{"username": username}, {"email": email_hash}]
|
||||||
}))
|
}))
|
||||||
if len(existing) > 0:
|
if len(existing) > 0:
|
||||||
|
@ -110,7 +105,7 @@ def api_register():
|
||||||
password.encode("utf-8")).hexdigest(),
|
password.encode("utf-8")).hexdigest(),
|
||||||
"email": email_hash
|
"email": email_hash
|
||||||
}
|
}
|
||||||
valdb[USERS_COLL].insert(entry)
|
mongo.db.users.insert(entry)
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
||||||
|
|
||||||
|
@ -122,7 +117,7 @@ def api_login():
|
||||||
password = data["password"]
|
password = data["password"]
|
||||||
hpass = hashlib.sha256(password.encode("utf-8")).hexdigest()
|
hpass = hashlib.sha256(password.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
db_user = list(valdb[USERS_COLL].find({
|
db_user = list(mongo.db.users.find({
|
||||||
"username": username,
|
"username": username,
|
||||||
"hpass": hpass
|
"hpass": hpass
|
||||||
}))
|
}))
|
||||||
|
@ -136,7 +131,7 @@ def api_login():
|
||||||
"date": datetime.datetime.utcnow(),
|
"date": datetime.datetime.utcnow(),
|
||||||
"token": token
|
"token": token
|
||||||
}
|
}
|
||||||
valdb[TOKENS_COLL].update(
|
mongo.db.usertokens.update(
|
||||||
{"username": token_entry["username"]},
|
{"username": token_entry["username"]},
|
||||||
token_entry,
|
token_entry,
|
||||||
upsert=True
|
upsert=True
|
||||||
|
@ -179,7 +174,7 @@ def api_new_pass():
|
||||||
username = data["username"]
|
username = data["username"]
|
||||||
email = data["email"]
|
email = data["email"]
|
||||||
hemail = hashlib.sha256(email.encode("utf-8")).hexdigest()
|
hemail = hashlib.sha256(email.encode("utf-8")).hexdigest()
|
||||||
db_res = list(valdb[USERS_COLL].find({
|
db_res = list(mongo.db.users.find({
|
||||||
"username": username,
|
"username": username,
|
||||||
"email": hemail
|
"email": hemail
|
||||||
}))
|
}))
|
||||||
|
@ -191,7 +186,7 @@ def api_new_pass():
|
||||||
string.ascii_letters + string.digits) for i in range(10)])
|
string.ascii_letters + string.digits) for i in range(10)])
|
||||||
# update locally
|
# update locally
|
||||||
hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest()
|
hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest()
|
||||||
valdb[USERS_COLL].update(
|
mongo.db.users.update(
|
||||||
{
|
{
|
||||||
"username": username,
|
"username": username,
|
||||||
"email": hemail
|
"email": hemail
|
||||||
|
@ -209,12 +204,12 @@ def token_to_username(token):
|
||||||
key = {
|
key = {
|
||||||
"token": token
|
"token": token
|
||||||
}
|
}
|
||||||
res = list(valdb[TOKENS_COLL].find(key))
|
res = list(mongo.db.usertokens.find(key))
|
||||||
if len(res) != 1:
|
if len(res) != 1:
|
||||||
return None
|
return None
|
||||||
username = res[0]["username"]
|
username = res[0]["username"]
|
||||||
# update deletion interval
|
# update deletion interval
|
||||||
valdb[TOKENS_COLL].update(
|
mongo.db.usertokens.update(
|
||||||
key, {"$set": {"date": datetime.datetime.utcnow()}})
|
key, {"$set": {"date": datetime.datetime.utcnow()}})
|
||||||
return username
|
return username
|
||||||
|
|
||||||
|
@ -249,19 +244,19 @@ def api_get_frames():
|
||||||
RF = reduce_functions[rf_name]["f"]
|
RF = reduce_functions[rf_name]["f"]
|
||||||
|
|
||||||
corpus = request.args.get("cor")
|
corpus = request.args.get("cor")
|
||||||
if corpus not in CORPORA:
|
if corpus not in app.config["CORPORA"]:
|
||||||
return json.dumps({"error": "cor={kres,ssj}"})
|
return json.dumps({"error": "cor={kres,ssj}"})
|
||||||
|
|
||||||
cur = valdb[corpus].find({"headwords": hw})
|
cur = mongo.db[corpus].find({"headwords": hw})
|
||||||
frames = []
|
frames = []
|
||||||
for ent in cur[:QUERY_LIMIT]:
|
for ent in cur[:app.config["QUERY_LIMIT"]]:
|
||||||
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
||||||
cur.close()
|
cur.close()
|
||||||
|
|
||||||
# filter by relevant hw
|
# filter by relevant hw
|
||||||
frames = [x for x in frames if x.hw == hw]
|
frames = [x for x in frames if x.hw == hw]
|
||||||
|
|
||||||
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
|
ret_frames = RF(frames, mongo.db.sensemap)
|
||||||
|
|
||||||
json_ret = {"frames": []}
|
json_ret = {"frames": []}
|
||||||
for frame in ret_frames:
|
for frame in ret_frames:
|
||||||
|
@ -302,12 +297,12 @@ def api_get_functor_frames():
|
||||||
RF = reduce_functions[rf_name]["f"]
|
RF = reduce_functions[rf_name]["f"]
|
||||||
|
|
||||||
corpus = request.args.get("cor")
|
corpus = request.args.get("cor")
|
||||||
if corpus not in CORPORA:
|
if corpus not in app.config["CORPORA"]:
|
||||||
return json.dumps({"error": "cor={kres,ssj}"})
|
return json.dumps({"error": "cor={kres,ssj}"})
|
||||||
|
|
||||||
cur = valdb[corpus].find({"functors": functor})
|
cur = mongo.db[corpus].find({"functors": functor})
|
||||||
frames = []
|
frames = []
|
||||||
for ent in cur[:QUERY_LIMIT]:
|
for ent in cur[:app.config["QUERY_LIMIT"]]:
|
||||||
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
||||||
cur.close()
|
cur.close()
|
||||||
|
|
||||||
|
@ -315,7 +310,7 @@ def api_get_functor_frames():
|
||||||
frames = [x for x in frames if functor in x.get_functors()]
|
frames = [x for x in frames if functor in x.get_functors()]
|
||||||
|
|
||||||
# raw_frames = vallex.functors_index[functor] # TODO
|
# raw_frames = vallex.functors_index[functor] # TODO
|
||||||
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
|
ret_frames = RF(frames, mongo.db.sensemap)
|
||||||
ret_frames = _aggregate_by_hw(ret_frames)
|
ret_frames = _aggregate_by_hw(ret_frames)
|
||||||
|
|
||||||
json_ret = {"frames": []}
|
json_ret = {"frames": []}
|
||||||
|
@ -334,10 +329,10 @@ def api_get_functor_frames():
|
||||||
def api_senses_get():
|
def api_senses_get():
|
||||||
# returns senses and mapping for hw
|
# returns senses and mapping for hw
|
||||||
hw = request.args.get("hw")
|
hw = request.args.get("hw")
|
||||||
senses = list(valdb[SENSES_COLL].find({
|
senses = list(mongo.db.senses.find({
|
||||||
"hw": hw
|
"hw": hw
|
||||||
}))
|
}))
|
||||||
sense_map_query = list(valdb[SENSEMAP_COLL].find({
|
sense_map_query = list(mongo.db.sensemap.find({
|
||||||
"hw": hw
|
"hw": hw
|
||||||
}))
|
}))
|
||||||
# aggregation by max date possible on DB side
|
# aggregation by max date possible on DB side
|
||||||
|
@ -417,7 +412,7 @@ def api_senses_update():
|
||||||
print(ns)
|
print(ns)
|
||||||
|
|
||||||
# insert into db
|
# insert into db
|
||||||
valdb[SENSES_COLL].insert(ns)
|
mongo.db.senses.insert(ns)
|
||||||
|
|
||||||
# replace tmp_id with mongo's _id
|
# replace tmp_id with mongo's _id
|
||||||
for ssj_id, el in sense_map.items():
|
for ssj_id, el in sense_map.items():
|
||||||
|
@ -432,7 +427,7 @@ def api_senses_update():
|
||||||
"date": datetime.datetime.utcnow()
|
"date": datetime.datetime.utcnow()
|
||||||
}
|
}
|
||||||
# vallex.db["v2_sense_map"].update(key, data, upsert=True)
|
# vallex.db["v2_sense_map"].update(key, data, upsert=True)
|
||||||
valdb[SENSEMAP_COLL].insert(data)
|
mongo.db.sensemap.insert(data)
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
||||||
# SENSES ----------------------------^
|
# SENSES ----------------------------^
|
||||||
|
@ -441,7 +436,7 @@ def api_senses_update():
|
||||||
# APP PREFLIGHT ---------------------.
|
# APP PREFLIGHT ---------------------.
|
||||||
def _is_banned(hw):
|
def _is_banned(hw):
|
||||||
banned = True
|
banned = True
|
||||||
if hw in BANNED_HEADWORDS:
|
if hw in app.config["BANNED_HEADWORDS"]:
|
||||||
banned = True
|
banned = True
|
||||||
elif hw in sskj_wordlist["wordlist"]:
|
elif hw in sskj_wordlist["wordlist"]:
|
||||||
banned = False
|
banned = False
|
||||||
|
@ -449,17 +444,17 @@ def _is_banned(hw):
|
||||||
banned = False
|
banned = False
|
||||||
return banned
|
return banned
|
||||||
|
|
||||||
def prepare_app_index(appindex_json):
|
def prepare_app_index(appindex_json, sskj_wordlist):
|
||||||
log.info("[*] preparing app_index")
|
log.info("[*] preparing app_index")
|
||||||
# create app_index (used in frontend, left side word index)
|
# create app_index (used in frontend, left side word index)
|
||||||
tmp_app_index = {c: {} for c in CORPORA}
|
tmp_app_index = {c: {} for c in app.config["CORPORA"]}
|
||||||
for corpus in CORPORA:
|
for corpus in app.config["CORPORA"]:
|
||||||
res_hws = {}
|
res_hws = {}
|
||||||
res_fns = {}
|
res_fns = {}
|
||||||
|
|
||||||
nentries = valdb[corpus].count()
|
nentries = mongo.db[corpus].count()
|
||||||
idx = 0
|
idx = 0
|
||||||
for e in valdb[corpus].find({}):
|
for e in mongo.db[corpus].find({}):
|
||||||
if "headwords" not in e:
|
if "headwords" not in e:
|
||||||
continue
|
continue
|
||||||
for hw in e["headwords"]:
|
for hw in e["headwords"]:
|
||||||
|
@ -504,6 +499,33 @@ def prepare_app_index(appindex_json):
|
||||||
# APP PREFLIGHT ---------------------^
|
# APP PREFLIGHT ---------------------^
|
||||||
|
|
||||||
|
|
||||||
|
def init_wsgi(app):
|
||||||
|
config = None
|
||||||
|
with Path("/project/prod_conf.yaml").open("r") as fp:
|
||||||
|
config = list(yaml.safe_load_all(fp))[0]
|
||||||
|
|
||||||
|
app.debug = False
|
||||||
|
logfile = config["logfile"]
|
||||||
|
if app.debug:
|
||||||
|
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
|
||||||
|
else:
|
||||||
|
logging.basicConfig(filename=logfile, level=logging.INFO)
|
||||||
|
|
||||||
|
# db login
|
||||||
|
# app index from db
|
||||||
|
with Path(config["appindex"]).open("r") as fp:
|
||||||
|
# a dirty hack but ok
|
||||||
|
app.config["app_index"] = json.load(fp)
|
||||||
|
|
||||||
|
# log.info("[*] Starting app.py with config:\n%s".format(config))
|
||||||
|
log.info("[*] Starting app.py with config:\n{}".format(config))
|
||||||
|
|
||||||
|
|
||||||
|
# if we don't pass arguments, assume production environment (gunicorn)
|
||||||
|
if len(sys.argv) == 1:
|
||||||
|
init_wsgi()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("Starting app.py main()")
|
print("Starting app.py main()")
|
||||||
aparser = argparse.ArgumentParser(description="Arguments for app.py")
|
aparser = argparse.ArgumentParser(description="Arguments for app.py")
|
||||||
|
@ -516,7 +538,6 @@ if __name__ == "__main__":
|
||||||
aparser.add_argument("--appindex-json", type=str)
|
aparser.add_argument("--appindex-json", type=str)
|
||||||
args = aparser.parse_args()
|
args = aparser.parse_args()
|
||||||
|
|
||||||
config = None
|
|
||||||
with Path(args.config_file).open("r") as fp:
|
with Path(args.config_file).open("r") as fp:
|
||||||
config = list(yaml.safe_load_all(fp))[0]
|
config = list(yaml.safe_load_all(fp))[0]
|
||||||
|
|
||||||
|
@ -527,28 +548,31 @@ if __name__ == "__main__":
|
||||||
else:
|
else:
|
||||||
logging.basicConfig(filename=logfile, level=logging.INFO)
|
logging.basicConfig(filename=logfile, level=logging.INFO)
|
||||||
|
|
||||||
|
"""
|
||||||
# db login
|
# db login
|
||||||
client = MongoClient(
|
client = MongoClient(
|
||||||
"mongodb://{}".format(args.dbaddr),
|
"mongodb://{}".format(args.dbaddr),
|
||||||
username=args.dbuser,
|
username=args.dbuser,
|
||||||
password=args.dbpass,
|
password=args.dbpass,
|
||||||
authSource="valdb",
|
authSource="mongo.db",
|
||||||
authMechanism='SCRAM-SHA-1'
|
authMechanism='SCRAM-SHA-1'
|
||||||
)
|
)
|
||||||
valdb = client.valdb
|
valdb = client.mongo.db
|
||||||
|
"""
|
||||||
|
|
||||||
if args.prepare_db:
|
if args.prepare_db:
|
||||||
with Path(args.sskj_wordlist).open("r") as fp:
|
with Path(args.sskj_wordlist).open("r") as fp:
|
||||||
sskj_wordlist = json.load(fp)
|
sskj_wordlist = json.load(fp)
|
||||||
prepare_app_index(args.appindex_json)
|
prepare_app_index(args.appindex_json, sskj_wordlist)
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
# app index from db
|
# app index from db
|
||||||
with Path(args.appindex_json).open("r") as fp:
|
with Path(args.appindex_json).open("r") as fp:
|
||||||
app_index = json.load(fp)
|
app.config["app_index"] = json.load(fp)
|
||||||
|
|
||||||
# log.info("[*] Starting app.py with config:\n%s".format(config))
|
# log.info("[*] Starting app.py with config:\n%s".format(config))
|
||||||
log.info("[*] Starting app.py with config:\n{}".format(config))
|
log.info("[*] Starting app.py with config:\n{}".format(config))
|
||||||
|
|
||||||
app.run(host=str(config["host"]), port=int(config["port"]))
|
app.run(host=str(config["host"]), port=int(config["port"]))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,3 +4,4 @@ port: 8084
|
||||||
host: localhost
|
host: localhost
|
||||||
logfile: "/var/log/valency_backend.log"
|
logfile: "/var/log/valency_backend.log"
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
---
|
---
|
||||||
debug: True
|
|
||||||
port: 8084
|
port: 8084
|
||||||
host: 0.0.0.0
|
host: 0.0.0.0
|
||||||
logfile: "/var/log/valency_backend.log"
|
logfile: "/var/log/valency_backend.log"
|
||||||
---
|
appindex: /project/data/appindex.json
|
||||||
|
|
||||||
|
# Same as in root Makefile
|
||||||
|
dbaddr: 0.0.0.0:27017
|
||||||
|
dbuser: valuser
|
||||||
|
dbpass: valuserpass
|
||||||
|
|
2
src/backend_flask/db_config.py
Normal file
2
src/backend_flask/db_config.py
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
MONGO_URI = "mongodb://valuser:valuserpass@127.0.0.1:27017/valdb"
|
||||||
|
MONGO_AUTH_SOURCE = 'admin'
|
7
src/backend_flask/entrypoint.sh
Executable file
7
src/backend_flask/entrypoint.sh
Executable file
|
@ -0,0 +1,7 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
pip3 install -e /project/src/pkg/cjvt-corpusparser/.
|
||||||
|
pip3 install -e /project/src/pkg/valency/.
|
||||||
|
pip3 install -e /project/src/pkg/seqparser/.
|
||||||
|
|
||||||
|
sleep 10000
|
|
@ -1,73 +0,0 @@
|
||||||
# Deprecated: headword creation moved to be part of corpusparser,
|
|
||||||
# index creation moved to app.py as a preprocessing (with exit) step
|
|
||||||
|
|
||||||
CORPORA = ["kres", "ssj"]
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
|
|
||||||
valdb = None
|
|
||||||
|
|
||||||
def helper_tid_to_token(tid, tokens):
|
|
||||||
for t in tokens:
|
|
||||||
if t["tid"] == tid:
|
|
||||||
return t
|
|
||||||
return None
|
|
||||||
|
|
||||||
# update entries (add headwords and fuctors for indexing)
|
|
||||||
for corpus in CORPORA:
|
|
||||||
for e in valdb[corpus].find({}):
|
|
||||||
if e["srl_links"] is None:
|
|
||||||
e["headwords"] = []
|
|
||||||
e["functors"] = []
|
|
||||||
else:
|
|
||||||
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
|
|
||||||
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
|
|
||||||
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
|
|
||||||
e["headwords"] = headwords
|
|
||||||
|
|
||||||
functors = list(set([x["afun"] for x in e["srl_links"]]))
|
|
||||||
e["functors"] = functors
|
|
||||||
|
|
||||||
valdb[corpus].save(e)
|
|
||||||
|
|
||||||
valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
|
|
||||||
valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
|
|
||||||
|
|
||||||
# create app_index (used in frontend, left side word index)
|
|
||||||
tmp_app_index = {c: {} for c in CORPORA}
|
|
||||||
for corpus in CORPORA:
|
|
||||||
res_hws = {}
|
|
||||||
res_fns = {}
|
|
||||||
for e in valdb[corpus].find({}):
|
|
||||||
if "headwords" not in e:
|
|
||||||
continue
|
|
||||||
for hw in e["headwords"]:
|
|
||||||
if hw in res_hws:
|
|
||||||
res_hws[hw] += 1
|
|
||||||
else:
|
|
||||||
res_hws[hw] = 1
|
|
||||||
if "functors" not in e:
|
|
||||||
continue
|
|
||||||
for fn in e["functors"]:
|
|
||||||
if fn in res_fns:
|
|
||||||
res_fns[fn] += 1
|
|
||||||
else:
|
|
||||||
res_fns[fn] = 1
|
|
||||||
|
|
||||||
alphabetical = {}
|
|
||||||
for k, e in res_hws.items():
|
|
||||||
fst = k[0].lower()
|
|
||||||
if fst in alphabetical:
|
|
||||||
alphabetical[fst].append((k, e))
|
|
||||||
else:
|
|
||||||
alphabetical[fst] = [(k, e)]
|
|
||||||
|
|
||||||
for k, e in alphabetical.items():
|
|
||||||
alphabetical[k] = sorted(e, key=lambda x: x[0])
|
|
||||||
tmp_app_index[corpus]["words"] = alphabetical
|
|
||||||
|
|
||||||
functors = [(k, e) for (k, e) in res_fns.items()]
|
|
||||||
functors = sorted(functors, key=lambda x: x[0])
|
|
||||||
tmp_app_index[corpus]["functors"] = functors
|
|
||||||
|
|
||||||
valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True)
|
|
Loading…
Reference in New Issue
Block a user