moved db_preparation logic to fill-database phase
This commit is contained in:
parent
1494d4dfed
commit
c2dea0bf70
23
Makefile
23
Makefile
|
@ -12,8 +12,8 @@ SSJ_FILE = "$(MAKE_ROOT)/data/samples/ssj_xml/ssj500k-sl.body.sample.xml"
|
||||||
KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_xml"
|
KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_xml"
|
||||||
KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/samples/kres_srl_json"
|
KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/samples/kres_srl_json"
|
||||||
|
|
||||||
# OUTPUT = "db"
|
OUTPUT = "db"
|
||||||
OUTPUT = "file"
|
# OUTPUT = "file"
|
||||||
OUTDIR = "/tmp/three" # if you're running this in docker, make sure to mount the volume
|
OUTDIR = "/tmp/three" # if you're running this in docker, make sure to mount the volume
|
||||||
DBADDR = "0.0.0.0:27017" # don't use localhost
|
DBADDR = "0.0.0.0:27017" # don't use localhost
|
||||||
|
|
||||||
|
@ -21,6 +21,8 @@ DB_ADM_USER = valadmin
|
||||||
DB_ADM_PASS = valadminpass
|
DB_ADM_PASS = valadminpass
|
||||||
DB_USR_USER = valuser
|
DB_USR_USER = valuser
|
||||||
DB_USR_PASS = valuserpass
|
DB_USR_PASS = valuserpass
|
||||||
|
|
||||||
|
N_CORES = 1
|
||||||
export
|
export
|
||||||
|
|
||||||
.PHONY: python-env fill-database
|
.PHONY: python-env fill-database
|
||||||
|
@ -63,9 +65,11 @@ fill-database: data/samples
|
||||||
--ssj-file $(SSJ_FILE) --kres-srl-folder $(KRES_SRL_FOLDER) \
|
--ssj-file $(SSJ_FILE) --kres-srl-folder $(KRES_SRL_FOLDER) \
|
||||||
--output $(OUTPUT) --outdir $(OUTDIR) --dbaddr $(DBADDR) \
|
--output $(OUTPUT) --outdir $(OUTDIR) --dbaddr $(DBADDR) \
|
||||||
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) \
|
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) \
|
||||||
--cores 2
|
--cores $(N_CORES)
|
||||||
|
|
||||||
|
|
||||||
## Frontend
|
## Frontend
|
||||||
|
|
||||||
## Run from host
|
## Run from host
|
||||||
## See src/frontend_vue/README.md for port settings etc.
|
## See src/frontend_vue/README.md for port settings etc.
|
||||||
frontend-dev:
|
frontend-dev:
|
||||||
|
@ -76,12 +80,15 @@ frontend-prod:
|
||||||
|
|
||||||
|
|
||||||
## Backend
|
## Backend
|
||||||
backend-dev-init: python-env-install
|
|
||||||
cd ./src/backend_flask; python3 app.py \
|
|
||||||
--config-file ./conf_files/dev_conf_init.yaml \
|
|
||||||
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR)
|
|
||||||
|
|
||||||
backend-dev: python-env-install
|
# runs once and exits before the app starts
|
||||||
|
backend-prepare-db:
|
||||||
|
cd ./src/backend_flask; python3 app.py \
|
||||||
|
--config-file ./conf_files/dev_conf.yaml \
|
||||||
|
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
|
||||||
|
--prepare-db
|
||||||
|
|
||||||
|
backend-dev:
|
||||||
cd ./src/backend_flask; python3 app.py \
|
cd ./src/backend_flask; python3 app.py \
|
||||||
--config-file ./conf_files/dev_conf.yaml \
|
--config-file ./conf_files/dev_conf.yaml \
|
||||||
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR)
|
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR)
|
||||||
|
|
13
README.md
13
README.md
|
@ -47,14 +47,17 @@ If all goes well, we should be able to inspect the database, filled with corpora
|
||||||
### Flask backend (1 container)
|
### Flask backend (1 container)
|
||||||
Relies heavily on the database. Set that up first.
|
Relies heavily on the database. Set that up first.
|
||||||
```bash
|
```bash
|
||||||
|
# spin up container
|
||||||
$ make python-env
|
$ make python-env
|
||||||
|
|
||||||
# development:
|
# install our packages
|
||||||
# run the first time, to prepare the db, then kill
|
$ make python-env-install
|
||||||
# it runs a few minutes, there should be a new collection in the db when finished
|
|
||||||
$ make backend-dev-init
|
|
||||||
|
|
||||||
$ make backend-dev # debug with this one
|
# needs to be ran once to modify a new database
|
||||||
|
$ make backend-prepare-db
|
||||||
|
|
||||||
|
# with debugger
|
||||||
|
$ make backend-dev
|
||||||
|
|
||||||
# production
|
# production
|
||||||
$ make backend-prod
|
$ make backend-prod
|
||||||
|
|
|
@ -431,33 +431,8 @@ def api_senses_update():
|
||||||
|
|
||||||
# APP PREFLIGHT ---------------------.
|
# APP PREFLIGHT ---------------------.
|
||||||
|
|
||||||
def prepare_db():
|
def prepare_app_index():
|
||||||
def helper_tid_to_token(tid, tokens):
|
log.info("[*] preparing app_index")
|
||||||
for t in tokens:
|
|
||||||
if t["tid"] == tid:
|
|
||||||
return t
|
|
||||||
return None
|
|
||||||
|
|
||||||
# update entries (add headwords and fuctors for indexing)
|
|
||||||
for corpus in CORPORA:
|
|
||||||
for e in valdb[corpus].find({}):
|
|
||||||
if e["srl_links"] is None:
|
|
||||||
e["headwords"] = []
|
|
||||||
e["functors"] = []
|
|
||||||
else:
|
|
||||||
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
|
|
||||||
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
|
|
||||||
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
|
|
||||||
e["headwords"] = headwords
|
|
||||||
|
|
||||||
functors = list(set([x["afun"] for x in e["srl_links"]]))
|
|
||||||
e["functors"] = functors
|
|
||||||
|
|
||||||
valdb[corpus].save(e)
|
|
||||||
|
|
||||||
valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
|
|
||||||
valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
|
|
||||||
|
|
||||||
# create app_index (used in frontend, left side word index)
|
# create app_index (used in frontend, left side word index)
|
||||||
tmp_app_index = {c: {} for c in CORPORA}
|
tmp_app_index = {c: {} for c in CORPORA}
|
||||||
for corpus in CORPORA:
|
for corpus in CORPORA:
|
||||||
|
@ -504,6 +479,7 @@ if __name__ == "__main__":
|
||||||
print("Starting app.py main()")
|
print("Starting app.py main()")
|
||||||
aparser = argparse.ArgumentParser(description="Arguments for app.py")
|
aparser = argparse.ArgumentParser(description="Arguments for app.py")
|
||||||
aparser.add_argument("--config-file", type=str, help="check ./conf_files/")
|
aparser.add_argument("--config-file", type=str, help="check ./conf_files/")
|
||||||
|
aparser.add_argument('--prepare-db', action="store_true", default=False)
|
||||||
aparser.add_argument("--dbuser", type=str)
|
aparser.add_argument("--dbuser", type=str)
|
||||||
aparser.add_argument("--dbpass", type=str)
|
aparser.add_argument("--dbpass", type=str)
|
||||||
aparser.add_argument("--dbaddr", type=str)
|
aparser.add_argument("--dbaddr", type=str)
|
||||||
|
@ -530,8 +506,9 @@ if __name__ == "__main__":
|
||||||
)
|
)
|
||||||
valdb = client.valdb
|
valdb = client.valdb
|
||||||
|
|
||||||
if bool(config["prepare_db"]):
|
if args.prepare_db:
|
||||||
prepare_db()
|
prepare_app_index()
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
# app index from db
|
# app index from db
|
||||||
app_index = (valdb.appindex.find_one({"dockey": "appindex"}))["data"]
|
app_index = (valdb.appindex.find_one({"dockey": "appindex"}))["data"]
|
||||||
|
|
|
@ -3,5 +3,4 @@ debug: True
|
||||||
port: 5004
|
port: 5004
|
||||||
host: localhost
|
host: localhost
|
||||||
logfile: "/var/log/valency_backend.log"
|
logfile: "/var/log/valency_backend.log"
|
||||||
prepare_db: False
|
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
---
|
|
||||||
debug: True
|
|
||||||
port: 5004
|
|
||||||
host: localhost
|
|
||||||
logfile: "/var/log/valency_backend.log"
|
|
||||||
prepare_db: True
|
|
||||||
---
|
|
70
src/backend_flask/preprocess.py
Normal file
70
src/backend_flask/preprocess.py
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
CORPORA = ["kres", "ssj"]
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
valdb = None
|
||||||
|
|
||||||
|
def helper_tid_to_token(tid, tokens):
|
||||||
|
for t in tokens:
|
||||||
|
if t["tid"] == tid:
|
||||||
|
return t
|
||||||
|
return None
|
||||||
|
|
||||||
|
# update entries (add headwords and fuctors for indexing)
|
||||||
|
for corpus in CORPORA:
|
||||||
|
for e in valdb[corpus].find({}):
|
||||||
|
if e["srl_links"] is None:
|
||||||
|
e["headwords"] = []
|
||||||
|
e["functors"] = []
|
||||||
|
else:
|
||||||
|
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
|
||||||
|
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
|
||||||
|
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
|
||||||
|
e["headwords"] = headwords
|
||||||
|
|
||||||
|
functors = list(set([x["afun"] for x in e["srl_links"]]))
|
||||||
|
e["functors"] = functors
|
||||||
|
|
||||||
|
valdb[corpus].save(e)
|
||||||
|
|
||||||
|
valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
|
||||||
|
valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
|
||||||
|
|
||||||
|
# create app_index (used in frontend, left side word index)
|
||||||
|
tmp_app_index = {c: {} for c in CORPORA}
|
||||||
|
for corpus in CORPORA:
|
||||||
|
res_hws = {}
|
||||||
|
res_fns = {}
|
||||||
|
for e in valdb[corpus].find({}):
|
||||||
|
if "headwords" not in e:
|
||||||
|
continue
|
||||||
|
for hw in e["headwords"]:
|
||||||
|
if hw in res_hws:
|
||||||
|
res_hws[hw] += 1
|
||||||
|
else:
|
||||||
|
res_hws[hw] = 1
|
||||||
|
if "functors" not in e:
|
||||||
|
continue
|
||||||
|
for fn in e["functors"]:
|
||||||
|
if fn in res_fns:
|
||||||
|
res_fns[fn] += 1
|
||||||
|
else:
|
||||||
|
res_fns[fn] = 1
|
||||||
|
|
||||||
|
alphabetical = {}
|
||||||
|
for k, e in res_hws.items():
|
||||||
|
fst = k[0].lower()
|
||||||
|
if fst in alphabetical:
|
||||||
|
alphabetical[fst].append((k, e))
|
||||||
|
else:
|
||||||
|
alphabetical[fst] = [(k, e)]
|
||||||
|
|
||||||
|
for k, e in alphabetical.items():
|
||||||
|
alphabetical[k] = sorted(e, key=lambda x: x[0])
|
||||||
|
tmp_app_index[corpus]["words"] = alphabetical
|
||||||
|
|
||||||
|
functors = [(k, e) for (k, e) in res_fns.items()]
|
||||||
|
functors = sorted(functors, key=lambda x: x[0])
|
||||||
|
tmp_app_index[corpus]["functors"] = functors
|
||||||
|
|
||||||
|
valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True)
|
28
src/frontend_vue/package-lock.json
generated
28
src/frontend_vue/package-lock.json
generated
|
@ -3632,14 +3632,12 @@
|
||||||
"balanced-match": {
|
"balanced-match": {
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true
|
||||||
"optional": true
|
|
||||||
},
|
},
|
||||||
"brace-expansion": {
|
"brace-expansion": {
|
||||||
"version": "1.1.11",
|
"version": "1.1.11",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
"requires": {
|
||||||
"balanced-match": "^1.0.0",
|
"balanced-match": "^1.0.0",
|
||||||
"concat-map": "0.0.1"
|
"concat-map": "0.0.1"
|
||||||
|
@ -3654,20 +3652,17 @@
|
||||||
"code-point-at": {
|
"code-point-at": {
|
||||||
"version": "1.1.0",
|
"version": "1.1.0",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true
|
||||||
"optional": true
|
|
||||||
},
|
},
|
||||||
"concat-map": {
|
"concat-map": {
|
||||||
"version": "0.0.1",
|
"version": "0.0.1",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true
|
||||||
"optional": true
|
|
||||||
},
|
},
|
||||||
"console-control-strings": {
|
"console-control-strings": {
|
||||||
"version": "1.1.0",
|
"version": "1.1.0",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true
|
||||||
"optional": true
|
|
||||||
},
|
},
|
||||||
"core-util-is": {
|
"core-util-is": {
|
||||||
"version": "1.0.2",
|
"version": "1.0.2",
|
||||||
|
@ -3784,8 +3779,7 @@
|
||||||
"inherits": {
|
"inherits": {
|
||||||
"version": "2.0.3",
|
"version": "2.0.3",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true
|
||||||
"optional": true
|
|
||||||
},
|
},
|
||||||
"ini": {
|
"ini": {
|
||||||
"version": "1.3.5",
|
"version": "1.3.5",
|
||||||
|
@ -3797,7 +3791,6 @@
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
"requires": {
|
||||||
"number-is-nan": "^1.0.0"
|
"number-is-nan": "^1.0.0"
|
||||||
}
|
}
|
||||||
|
@ -3812,7 +3805,6 @@
|
||||||
"version": "3.0.4",
|
"version": "3.0.4",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
"requires": {
|
||||||
"brace-expansion": "^1.1.7"
|
"brace-expansion": "^1.1.7"
|
||||||
}
|
}
|
||||||
|
@ -3820,14 +3812,12 @@
|
||||||
"minimist": {
|
"minimist": {
|
||||||
"version": "0.0.8",
|
"version": "0.0.8",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true
|
||||||
"optional": true
|
|
||||||
},
|
},
|
||||||
"minipass": {
|
"minipass": {
|
||||||
"version": "2.2.4",
|
"version": "2.2.4",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
"requires": {
|
||||||
"safe-buffer": "^5.1.1",
|
"safe-buffer": "^5.1.1",
|
||||||
"yallist": "^3.0.0"
|
"yallist": "^3.0.0"
|
||||||
|
@ -3846,7 +3836,6 @@
|
||||||
"version": "0.5.1",
|
"version": "0.5.1",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
"requires": {
|
||||||
"minimist": "0.0.8"
|
"minimist": "0.0.8"
|
||||||
}
|
}
|
||||||
|
@ -3927,8 +3916,7 @@
|
||||||
"number-is-nan": {
|
"number-is-nan": {
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true
|
||||||
"optional": true
|
|
||||||
},
|
},
|
||||||
"object-assign": {
|
"object-assign": {
|
||||||
"version": "4.1.1",
|
"version": "4.1.1",
|
||||||
|
@ -3940,7 +3928,6 @@
|
||||||
"version": "1.4.0",
|
"version": "1.4.0",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
"requires": {
|
||||||
"wrappy": "1"
|
"wrappy": "1"
|
||||||
}
|
}
|
||||||
|
@ -4062,7 +4049,6 @@
|
||||||
"version": "1.0.2",
|
"version": "1.0.2",
|
||||||
"bundled": true,
|
"bundled": true,
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
"requires": {
|
||||||
"code-point-at": "^1.0.0",
|
"code-point-at": "^1.0.0",
|
||||||
"is-fullwidth-code-point": "^1.0.0",
|
"is-fullwidth-code-point": "^1.0.0",
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 86e56767ddb72b83adcb144c32373b3e92e215dc
|
Subproject commit f0b0abac1bd32ad6e9e29e7b737e4162e28568c2
|
Loading…
Reference in New Issue
Block a user