prepared app for production (removed global variables, add flask-pymongo as db driver
This commit is contained in:
		
							parent
							
								
									707034153c
								
							
						
					
					
						commit
						2ff339e24c
					
				
							
								
								
									
										18
									
								
								Dockerfile-backend-flask
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								Dockerfile-backend-flask
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,18 @@
 | 
			
		||||
FROM cjvt-python-env
 | 
			
		||||
 | 
			
		||||
RUN pip3 install gunicorn
 | 
			
		||||
 | 
			
		||||
RUN mkdir -p /project/src/backend_flask
 | 
			
		||||
RUN mkdir -p /project/src/pkg
 | 
			
		||||
RUN mkdir -p /project/data
 | 
			
		||||
 | 
			
		||||
COPY src/backend_flask /project/src/backend_flask
 | 
			
		||||
COPY src/pkg /project/src/pkg
 | 
			
		||||
 | 
			
		||||
COPY data/appindex.json /project/data
 | 
			
		||||
 | 
			
		||||
COPY src/backend_flask/entrypoint.sh /.
 | 
			
		||||
 | 
			
		||||
COPY src/backend_flask/conf_files/prod_conf.yaml /project
 | 
			
		||||
 | 
			
		||||
ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
 | 
			
		||||
							
								
								
									
										5
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								Makefile
									
									
									
									
									
								
							@ -124,12 +124,15 @@ backend-dev:
 | 
			
		||||
		--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
 | 
			
		||||
		--appindex-json $(APPINDEX_PATH)
 | 
			
		||||
 | 
			
		||||
backend-prod:
 | 
			
		||||
backend-prod-old:
 | 
			
		||||
	cd ./src/backend_flask; python3 app.py \
 | 
			
		||||
		--config-file ./conf_files/prod_conf.yaml \
 | 
			
		||||
		--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
 | 
			
		||||
		--appindex-json $(APPINDEX_PATH)
 | 
			
		||||
 | 
			
		||||
backend-prod:
 | 
			
		||||
	cd ./src/backend_flask; $(MAKE) prod
 | 
			
		||||
 | 
			
		||||
## add sskj senses to db (generated with pkg/seqparser)
 | 
			
		||||
sskj-senses:
 | 
			
		||||
	python3 ./src/pkg/seqparser/seqparser/main.py \
 | 
			
		||||
 | 
			
		||||
@ -1,26 +1,25 @@
 | 
			
		||||
FROM ubuntu:16.04
 | 
			
		||||
FROM ubuntu:18.04
 | 
			
		||||
 | 
			
		||||
RUN apt-get update --fix-missing
 | 
			
		||||
RUN apt-get install -y \
 | 
			
		||||
vim \
 | 
			
		||||
python3 \
 | 
			
		||||
python3-pip \
 | 
			
		||||
sshfs
 | 
			
		||||
sshfs \
 | 
			
		||||
curl
 | 
			
		||||
 | 
			
		||||
RUN pip3 install --upgrade pip
 | 
			
		||||
 | 
			
		||||
RUN pip3 install \
 | 
			
		||||
	lxml \
 | 
			
		||||
	pandas \
 | 
			
		||||
	sklearn \
 | 
			
		||||
	argparse \
 | 
			
		||||
	pyyaml \
 | 
			
		||||
	pathlib \
 | 
			
		||||
	flask \
 | 
			
		||||
	flask_cors \
 | 
			
		||||
	pymongo \
 | 
			
		||||
	flask
 | 
			
		||||
 | 
			
		||||
RUN apt-get install -y \
 | 
			
		||||
	curl
 | 
			
		||||
	flask-pymongo
 | 
			
		||||
 | 
			
		||||
ENV PYTHONIOENCODING UTF-8
 | 
			
		||||
 | 
			
		||||
RUN pip3 install \
 | 
			
		||||
	pyyaml \
 | 
			
		||||
	flask_cors
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,4 @@
 | 
			
		||||
IMAGE_NAME="cjvt-python-env"
 | 
			
		||||
IMAGE_NAME="cjvt-python-env"  # don't change, used in backend_flask/Makefile
 | 
			
		||||
CNNAME="python-env"
 | 
			
		||||
 | 
			
		||||
all: build run
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										5
									
								
								dockerfiles/python-env/entrypoint.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										5
									
								
								dockerfiles/python-env/entrypoint.sh
									
									
									
									
									
										Executable file
									
								
							@ -0,0 +1,5 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
echo "testing entrypoint."
 | 
			
		||||
$(exit 1)
 | 
			
		||||
exit 0
 | 
			
		||||
							
								
								
									
										16
									
								
								src/backend_flask/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								src/backend_flask/Makefile
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,16 @@
 | 
			
		||||
IMG="backend-flask"
 | 
			
		||||
CNT="backend_flask"
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
	- docker rm -f $(CNT)
 | 
			
		||||
 | 
			
		||||
run: clean build
 | 
			
		||||
	docker run -d --net host --name $(CNT) $(IMG)
 | 
			
		||||
	docker logs -f $(CNT)
 | 
			
		||||
 | 
			
		||||
build: build-cjvt-python-env
 | 
			
		||||
	# docker build . -f ../../Dockerfile-backend-flask -t $(IMG)
 | 
			
		||||
	cd ../..; docker build . -f Dockerfile-backend-flask -t $(IMG)
 | 
			
		||||
 | 
			
		||||
build-cjvt-python-env:
 | 
			
		||||
	cd ../../dockerfiles/python-env; $(MAKE) build
 | 
			
		||||
@ -26,26 +26,21 @@ from email.mime.text import MIMEText
 | 
			
		||||
from copy import deepcopy as DC
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from pymongo import MongoClient
 | 
			
		||||
from flask_pymongo import PyMongo
 | 
			
		||||
import pymongo
 | 
			
		||||
import argparse
 | 
			
		||||
 | 
			
		||||
# some db collections
 | 
			
		||||
USERS_COLL = "users"
 | 
			
		||||
TOKENS_COLL = "usertokens"
 | 
			
		||||
SENSES_COLL = "senses"
 | 
			
		||||
SENSEMAP_COLL = "sensemap"
 | 
			
		||||
 | 
			
		||||
# pre-generated data (gui leftside word index)
 | 
			
		||||
CORPORA = ["ssj", "kres"]
 | 
			
		||||
app_index = None
 | 
			
		||||
sskj_wordlist = None  # used by _is_banned(hw)
 | 
			
		||||
BANNED_HEADWORDS = ["biti"]
 | 
			
		||||
QUERY_LIMIT = 1000  # Some headwords contain thousands of examples - not practical for the app
 | 
			
		||||
 | 
			
		||||
log = logging.getLogger(__name__)
 | 
			
		||||
valdb = None
 | 
			
		||||
app = Flask(__name__)
 | 
			
		||||
 | 
			
		||||
app.config.from_object("db_config")
 | 
			
		||||
mongo = PyMongo(app)
 | 
			
		||||
 | 
			
		||||
app.config["CORPORA"] = ["ssj", "kres"]
 | 
			
		||||
app.config["BANNED_HEADWORDS"] = ["biti"]
 | 
			
		||||
app.config["QUERY_LIMIT"] = 1000
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# when running vuejs via webpack
 | 
			
		||||
# CORS(app)
 | 
			
		||||
@ -60,7 +55,7 @@ CORS(app)
 | 
			
		||||
@app.route("/api/dev")
 | 
			
		||||
def api_dev():
 | 
			
		||||
    print("DEV")
 | 
			
		||||
    cur = valdb.kres.find({"headwords": "nagovarjati"})
 | 
			
		||||
    cur = mongo.db.kres.find({"headwords": "nagovarjati"})
 | 
			
		||||
    frames = []
 | 
			
		||||
    for ent in cur:
 | 
			
		||||
        frames += frames_from_db_entry(ent)
 | 
			
		||||
@ -73,12 +68,12 @@ def api_dev():
 | 
			
		||||
@app.route("/api/words/<corpus>")
 | 
			
		||||
def api_words(corpus):
 | 
			
		||||
    return json.dumps({
 | 
			
		||||
        "sorted_words": app_index[corpus]["words"], # todo - make corpus as arg
 | 
			
		||||
        "sorted_words": app.config["app_index"][corpus]["words"], # todo - make corpus as arg
 | 
			
		||||
    })
 | 
			
		||||
 | 
			
		||||
@app.route("/api/functors/<corpus>")
 | 
			
		||||
def api_functors(corpus):
 | 
			
		||||
    return json.dumps(app_index[corpus]["functors"])
 | 
			
		||||
    return json.dumps(app.config["app_index"][corpus]["functors"])
 | 
			
		||||
 | 
			
		||||
# INDEX SELECTION -------------------^
 | 
			
		||||
 | 
			
		||||
@ -99,7 +94,7 @@ def api_register():
 | 
			
		||||
    ):
 | 
			
		||||
        return "ERR"
 | 
			
		||||
    email_hash = hashlib.sha256(email.encode("utf-8")).hexdigest()
 | 
			
		||||
    existing = list(valdb[USERS_COLL].find({
 | 
			
		||||
    existing = list(mongo.db.users.find({
 | 
			
		||||
        "$or": [{"username": username}, {"email": email_hash}]
 | 
			
		||||
    }))
 | 
			
		||||
    if len(existing) > 0:
 | 
			
		||||
@ -110,7 +105,7 @@ def api_register():
 | 
			
		||||
            password.encode("utf-8")).hexdigest(),
 | 
			
		||||
        "email": email_hash
 | 
			
		||||
    }
 | 
			
		||||
    valdb[USERS_COLL].insert(entry)
 | 
			
		||||
    mongo.db.users.insert(entry)
 | 
			
		||||
    return "OK"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -122,7 +117,7 @@ def api_login():
 | 
			
		||||
    password = data["password"]
 | 
			
		||||
    hpass = hashlib.sha256(password.encode("utf-8")).hexdigest()
 | 
			
		||||
 | 
			
		||||
    db_user = list(valdb[USERS_COLL].find({
 | 
			
		||||
    db_user = list(mongo.db.users.find({
 | 
			
		||||
        "username": username,
 | 
			
		||||
        "hpass": hpass
 | 
			
		||||
    }))
 | 
			
		||||
@ -136,7 +131,7 @@ def api_login():
 | 
			
		||||
        "date": datetime.datetime.utcnow(),
 | 
			
		||||
        "token": token
 | 
			
		||||
    }
 | 
			
		||||
    valdb[TOKENS_COLL].update(
 | 
			
		||||
    mongo.db.usertokens.update(
 | 
			
		||||
        {"username": token_entry["username"]},
 | 
			
		||||
        token_entry,
 | 
			
		||||
        upsert=True
 | 
			
		||||
@ -179,7 +174,7 @@ def api_new_pass():
 | 
			
		||||
    username = data["username"]
 | 
			
		||||
    email = data["email"]
 | 
			
		||||
    hemail = hashlib.sha256(email.encode("utf-8")).hexdigest()
 | 
			
		||||
    db_res = list(valdb[USERS_COLL].find({
 | 
			
		||||
    db_res = list(mongo.db.users.find({
 | 
			
		||||
        "username": username,
 | 
			
		||||
        "email": hemail
 | 
			
		||||
    }))
 | 
			
		||||
@ -191,7 +186,7 @@ def api_new_pass():
 | 
			
		||||
        string.ascii_letters + string.digits) for i in range(10)])
 | 
			
		||||
    # update locally
 | 
			
		||||
    hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest()
 | 
			
		||||
    valdb[USERS_COLL].update(
 | 
			
		||||
    mongo.db.users.update(
 | 
			
		||||
        {
 | 
			
		||||
            "username": username,
 | 
			
		||||
            "email": hemail
 | 
			
		||||
@ -209,12 +204,12 @@ def token_to_username(token):
 | 
			
		||||
    key = {
 | 
			
		||||
        "token": token
 | 
			
		||||
    }
 | 
			
		||||
    res = list(valdb[TOKENS_COLL].find(key))
 | 
			
		||||
    res = list(mongo.db.usertokens.find(key))
 | 
			
		||||
    if len(res) != 1:
 | 
			
		||||
        return None
 | 
			
		||||
    username = res[0]["username"]
 | 
			
		||||
    # update deletion interval
 | 
			
		||||
    valdb[TOKENS_COLL].update(
 | 
			
		||||
    mongo.db.usertokens.update(
 | 
			
		||||
        key, {"$set": {"date": datetime.datetime.utcnow()}})
 | 
			
		||||
    return username
 | 
			
		||||
 | 
			
		||||
@ -249,19 +244,19 @@ def api_get_frames():
 | 
			
		||||
    RF = reduce_functions[rf_name]["f"]
 | 
			
		||||
 | 
			
		||||
    corpus = request.args.get("cor")
 | 
			
		||||
    if corpus not in CORPORA:
 | 
			
		||||
    if corpus not in app.config["CORPORA"]:
 | 
			
		||||
        return json.dumps({"error": "cor={kres,ssj}"})
 | 
			
		||||
 | 
			
		||||
    cur = valdb[corpus].find({"headwords": hw})
 | 
			
		||||
    cur = mongo.db[corpus].find({"headwords": hw})
 | 
			
		||||
    frames = []
 | 
			
		||||
    for ent in cur[:QUERY_LIMIT]:
 | 
			
		||||
    for ent in cur[:app.config["QUERY_LIMIT"]]:
 | 
			
		||||
        frames += frames_from_db_entry(ent)  # pre-process this step for prod TODO
 | 
			
		||||
    cur.close()
 | 
			
		||||
 | 
			
		||||
    # filter by relevant hw
 | 
			
		||||
    frames = [x for x in frames if x.hw == hw]
 | 
			
		||||
 | 
			
		||||
    ret_frames = RF(frames, valdb[SENSEMAP_COLL])
 | 
			
		||||
    ret_frames = RF(frames, mongo.db.sensemap)
 | 
			
		||||
 | 
			
		||||
    json_ret = {"frames": []}
 | 
			
		||||
    for frame in ret_frames:
 | 
			
		||||
@ -302,12 +297,12 @@ def api_get_functor_frames():
 | 
			
		||||
    RF = reduce_functions[rf_name]["f"]
 | 
			
		||||
 | 
			
		||||
    corpus = request.args.get("cor")
 | 
			
		||||
    if corpus not in CORPORA:
 | 
			
		||||
    if corpus not in app.config["CORPORA"]:
 | 
			
		||||
        return json.dumps({"error": "cor={kres,ssj}"})
 | 
			
		||||
 | 
			
		||||
    cur = valdb[corpus].find({"functors": functor})
 | 
			
		||||
    cur = mongo.db[corpus].find({"functors": functor})
 | 
			
		||||
    frames = []
 | 
			
		||||
    for ent in cur[:QUERY_LIMIT]:
 | 
			
		||||
    for ent in cur[:app.config["QUERY_LIMIT"]]:
 | 
			
		||||
        frames += frames_from_db_entry(ent)  # pre-process this step for prod TODO
 | 
			
		||||
    cur.close()
 | 
			
		||||
 | 
			
		||||
@ -315,7 +310,7 @@ def api_get_functor_frames():
 | 
			
		||||
    frames = [x for x in frames if functor in x.get_functors()]
 | 
			
		||||
 | 
			
		||||
    # raw_frames = vallex.functors_index[functor]  # TODO
 | 
			
		||||
    ret_frames = RF(frames, valdb[SENSEMAP_COLL])
 | 
			
		||||
    ret_frames = RF(frames, mongo.db.sensemap)
 | 
			
		||||
    ret_frames = _aggregate_by_hw(ret_frames)
 | 
			
		||||
 | 
			
		||||
    json_ret = {"frames": []}
 | 
			
		||||
@ -334,10 +329,10 @@ def api_get_functor_frames():
 | 
			
		||||
def api_senses_get():
 | 
			
		||||
    # returns senses and mapping for hw
 | 
			
		||||
    hw = request.args.get("hw")
 | 
			
		||||
    senses = list(valdb[SENSES_COLL].find({
 | 
			
		||||
    senses = list(mongo.db.senses.find({
 | 
			
		||||
        "hw": hw
 | 
			
		||||
    }))
 | 
			
		||||
    sense_map_query = list(valdb[SENSEMAP_COLL].find({
 | 
			
		||||
    sense_map_query = list(mongo.db.sensemap.find({
 | 
			
		||||
        "hw": hw
 | 
			
		||||
    }))
 | 
			
		||||
    # aggregation by max date possible on DB side
 | 
			
		||||
@ -417,7 +412,7 @@ def api_senses_update():
 | 
			
		||||
        print(ns)
 | 
			
		||||
 | 
			
		||||
        # insert into db
 | 
			
		||||
        valdb[SENSES_COLL].insert(ns)
 | 
			
		||||
        mongo.db.senses.insert(ns)
 | 
			
		||||
 | 
			
		||||
    # replace tmp_id with mongo's _id
 | 
			
		||||
    for ssj_id, el in sense_map.items():
 | 
			
		||||
@ -432,7 +427,7 @@ def api_senses_update():
 | 
			
		||||
            "date": datetime.datetime.utcnow()
 | 
			
		||||
        }
 | 
			
		||||
        # vallex.db["v2_sense_map"].update(key, data, upsert=True)
 | 
			
		||||
        valdb[SENSEMAP_COLL].insert(data)
 | 
			
		||||
        mongo.db.sensemap.insert(data)
 | 
			
		||||
    return "OK"
 | 
			
		||||
 | 
			
		||||
# SENSES ----------------------------^
 | 
			
		||||
@ -441,7 +436,7 @@ def api_senses_update():
 | 
			
		||||
# APP PREFLIGHT ---------------------.
 | 
			
		||||
def _is_banned(hw):
 | 
			
		||||
    banned = True
 | 
			
		||||
    if hw in BANNED_HEADWORDS:
 | 
			
		||||
    if hw in app.config["BANNED_HEADWORDS"]:
 | 
			
		||||
        banned = True
 | 
			
		||||
    elif hw in sskj_wordlist["wordlist"]:
 | 
			
		||||
        banned = False
 | 
			
		||||
@ -449,17 +444,17 @@ def _is_banned(hw):
 | 
			
		||||
        banned = False
 | 
			
		||||
    return banned
 | 
			
		||||
 | 
			
		||||
def prepare_app_index(appindex_json):
 | 
			
		||||
def prepare_app_index(appindex_json, sskj_wordlist):
 | 
			
		||||
    log.info("[*] preparing app_index")
 | 
			
		||||
    # create app_index (used in frontend, left side word index)
 | 
			
		||||
    tmp_app_index = {c: {} for c in CORPORA}
 | 
			
		||||
    for corpus in CORPORA:
 | 
			
		||||
    tmp_app_index = {c: {} for c in app.config["CORPORA"]}
 | 
			
		||||
    for corpus in app.config["CORPORA"]:
 | 
			
		||||
        res_hws = {}
 | 
			
		||||
        res_fns = {}
 | 
			
		||||
 | 
			
		||||
        nentries = valdb[corpus].count()
 | 
			
		||||
        nentries = mongo.db[corpus].count()
 | 
			
		||||
        idx = 0
 | 
			
		||||
        for e in valdb[corpus].find({}):
 | 
			
		||||
        for e in mongo.db[corpus].find({}):
 | 
			
		||||
            if "headwords" not in e:
 | 
			
		||||
                continue
 | 
			
		||||
            for hw in e["headwords"]:
 | 
			
		||||
@ -504,6 +499,33 @@ def prepare_app_index(appindex_json):
 | 
			
		||||
# APP PREFLIGHT ---------------------^
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def init_wsgi(app):
 | 
			
		||||
    config = None
 | 
			
		||||
    with Path("/project/prod_conf.yaml").open("r") as fp:
 | 
			
		||||
        config = list(yaml.safe_load_all(fp))[0]
 | 
			
		||||
 | 
			
		||||
    app.debug = False
 | 
			
		||||
    logfile = config["logfile"]
 | 
			
		||||
    if app.debug:
 | 
			
		||||
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
 | 
			
		||||
    else:
 | 
			
		||||
        logging.basicConfig(filename=logfile, level=logging.INFO)
 | 
			
		||||
 | 
			
		||||
    # db login
 | 
			
		||||
    # app index from db
 | 
			
		||||
    with Path(config["appindex"]).open("r") as fp:
 | 
			
		||||
        # a dirty hack but ok
 | 
			
		||||
        app.config["app_index"] = json.load(fp)
 | 
			
		||||
 | 
			
		||||
    # log.info("[*] Starting app.py with config:\n%s".format(config))
 | 
			
		||||
    log.info("[*] Starting app.py with config:\n{}".format(config))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# if we don't pass arguments, assume production environment (gunicorn)
 | 
			
		||||
if len(sys.argv) == 1:
 | 
			
		||||
    init_wsgi()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    print("Starting app.py main()")
 | 
			
		||||
    aparser = argparse.ArgumentParser(description="Arguments for app.py")
 | 
			
		||||
@ -516,7 +538,6 @@ if __name__ == "__main__":
 | 
			
		||||
    aparser.add_argument("--appindex-json", type=str)
 | 
			
		||||
    args = aparser.parse_args()
 | 
			
		||||
 | 
			
		||||
    config = None
 | 
			
		||||
    with Path(args.config_file).open("r") as fp:
 | 
			
		||||
        config = list(yaml.safe_load_all(fp))[0]
 | 
			
		||||
 | 
			
		||||
@ -527,28 +548,31 @@ if __name__ == "__main__":
 | 
			
		||||
    else:
 | 
			
		||||
        logging.basicConfig(filename=logfile, level=logging.INFO)
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    # db login
 | 
			
		||||
    client = MongoClient(
 | 
			
		||||
        "mongodb://{}".format(args.dbaddr),
 | 
			
		||||
        username=args.dbuser,
 | 
			
		||||
        password=args.dbpass,
 | 
			
		||||
        authSource="valdb",
 | 
			
		||||
        authSource="mongo.db",
 | 
			
		||||
        authMechanism='SCRAM-SHA-1'
 | 
			
		||||
    )
 | 
			
		||||
    valdb = client.valdb
 | 
			
		||||
    valdb = client.mongo.db
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    if args.prepare_db:
 | 
			
		||||
        with Path(args.sskj_wordlist).open("r") as fp:
 | 
			
		||||
            sskj_wordlist = json.load(fp)
 | 
			
		||||
        prepare_app_index(args.appindex_json)
 | 
			
		||||
        prepare_app_index(args.appindex_json, sskj_wordlist)
 | 
			
		||||
        sys.exit()
 | 
			
		||||
 | 
			
		||||
    # app index from db
 | 
			
		||||
    with Path(args.appindex_json).open("r") as fp:
 | 
			
		||||
        app_index = json.load(fp)
 | 
			
		||||
        app.config["app_index"] = json.load(fp)
 | 
			
		||||
 | 
			
		||||
    # log.info("[*] Starting app.py with config:\n%s".format(config))
 | 
			
		||||
    log.info("[*] Starting app.py with config:\n{}".format(config))
 | 
			
		||||
 | 
			
		||||
    app.run(host=str(config["host"]), port=int(config["port"]))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -4,3 +4,4 @@ port: 8084
 | 
			
		||||
host: localhost
 | 
			
		||||
logfile: "/var/log/valency_backend.log"
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,6 +1,10 @@
 | 
			
		||||
---
 | 
			
		||||
debug: True
 | 
			
		||||
port: 8084
 | 
			
		||||
host: 0.0.0.0
 | 
			
		||||
logfile: "/var/log/valency_backend.log"
 | 
			
		||||
---
 | 
			
		||||
appindex: /project/data/appindex.json
 | 
			
		||||
 | 
			
		||||
# Same as in root Makefile
 | 
			
		||||
dbaddr: 0.0.0.0:27017
 | 
			
		||||
dbuser: valuser
 | 
			
		||||
dbpass: valuserpass
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										2
									
								
								src/backend_flask/db_config.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								src/backend_flask/db_config.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,2 @@
 | 
			
		||||
MONGO_URI = "mongodb://valuser:valuserpass@127.0.0.1:27017/valdb"
 | 
			
		||||
MONGO_AUTH_SOURCE = 'admin'
 | 
			
		||||
							
								
								
									
										7
									
								
								src/backend_flask/entrypoint.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										7
									
								
								src/backend_flask/entrypoint.sh
									
									
									
									
									
										Executable file
									
								
							@ -0,0 +1,7 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
pip3 install -e /project/src/pkg/cjvt-corpusparser/.
 | 
			
		||||
pip3 install -e /project/src/pkg/valency/.
 | 
			
		||||
pip3 install -e /project/src/pkg/seqparser/.
 | 
			
		||||
 | 
			
		||||
sleep 10000
 | 
			
		||||
@ -1,73 +0,0 @@
 | 
			
		||||
# Deprecated: headword creation moved to be part of corpusparser,
 | 
			
		||||
# index creation moved to app.py as a preprocessing (with exit) step
 | 
			
		||||
 | 
			
		||||
CORPORA = ["kres", "ssj"]
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
 | 
			
		||||
	valdb = None
 | 
			
		||||
 | 
			
		||||
    def helper_tid_to_token(tid, tokens):
 | 
			
		||||
        for t in tokens:
 | 
			
		||||
            if t["tid"] == tid:
 | 
			
		||||
                return t
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    # update entries (add headwords and fuctors for indexing)
 | 
			
		||||
    for corpus in CORPORA:
 | 
			
		||||
        for e in valdb[corpus].find({}):
 | 
			
		||||
            if e["srl_links"] is None:
 | 
			
		||||
                e["headwords"] = []
 | 
			
		||||
                e["functors"] = []
 | 
			
		||||
            else:
 | 
			
		||||
                hw_tids = list(set([x["from"] for x in e["srl_links"]]))
 | 
			
		||||
                hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
 | 
			
		||||
                headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
 | 
			
		||||
                e["headwords"] = headwords
 | 
			
		||||
 | 
			
		||||
                functors = list(set([x["afun"] for x in e["srl_links"]]))
 | 
			
		||||
                e["functors"] = functors
 | 
			
		||||
 | 
			
		||||
            valdb[corpus].save(e)
 | 
			
		||||
 | 
			
		||||
        valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
 | 
			
		||||
        valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
 | 
			
		||||
 | 
			
		||||
    # create app_index (used in frontend, left side word index)
 | 
			
		||||
    tmp_app_index = {c: {} for c in CORPORA}
 | 
			
		||||
    for corpus in CORPORA:
 | 
			
		||||
        res_hws = {}
 | 
			
		||||
        res_fns = {}
 | 
			
		||||
        for e in valdb[corpus].find({}):
 | 
			
		||||
            if "headwords" not in e:
 | 
			
		||||
                continue
 | 
			
		||||
            for hw in e["headwords"]:
 | 
			
		||||
                if hw in res_hws:
 | 
			
		||||
                    res_hws[hw] += 1
 | 
			
		||||
                else:
 | 
			
		||||
                    res_hws[hw] = 1
 | 
			
		||||
            if "functors" not in e:
 | 
			
		||||
                continue
 | 
			
		||||
            for fn in e["functors"]:
 | 
			
		||||
                if fn in res_fns:
 | 
			
		||||
                    res_fns[fn] += 1
 | 
			
		||||
                else:
 | 
			
		||||
                    res_fns[fn] = 1
 | 
			
		||||
 | 
			
		||||
        alphabetical = {}
 | 
			
		||||
        for k, e in res_hws.items():
 | 
			
		||||
            fst = k[0].lower()
 | 
			
		||||
            if fst in alphabetical:
 | 
			
		||||
                alphabetical[fst].append((k, e))
 | 
			
		||||
            else:
 | 
			
		||||
                alphabetical[fst] = [(k, e)]
 | 
			
		||||
 | 
			
		||||
        for k, e in alphabetical.items():
 | 
			
		||||
            alphabetical[k] = sorted(e, key=lambda x: x[0]) 
 | 
			
		||||
        tmp_app_index[corpus]["words"] = alphabetical
 | 
			
		||||
 | 
			
		||||
        functors = [(k, e) for (k, e) in res_fns.items()]
 | 
			
		||||
        functors = sorted(functors, key=lambda x: x[0])
 | 
			
		||||
        tmp_app_index[corpus]["functors"] = functors
 | 
			
		||||
 | 
			
		||||
    valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True)
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user