cjvt-valency/Makefile

141 lines
4.4 KiB
Makefile

# All required components, to create and fill a database,
# instantiate backend and frontend.
MAKE_ROOT = $(shell pwd)
### Input data
# I received ssj500k in one .xml file,
# kres is composed of many .xml files
# I generated srl tags for kres in separate .json files
# (for each kres.xml file there is a kres.json file with srl tags)
# SSJ_FILE = "$(MAKE_ROOT)/data/samples/ssj_xml/ssj500k-sl.body.sample.xml"
SSJ_FILE = "$(MAKE_ROOT)/data/ssj_file_link"
# KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_xml"
# KRES_FOLDER = "$(MAKE_ROOT)/data/kres_xml_folder_link"
KRES_FOLDER = "/home/kristjan/kres_data/payload/kres_xml"
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/samples/kres_srl_json"
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/kres_json_folder_link"
KRES_SRL_FOLDER = "/home/kristjan/kres_data/payload/kres_json"
# This file comes with the source code. Make sure you unpack it and name it right.
SSKJ_WORDLIST = "$(MAKE_ROOT)/data/wordlist.json"
SSKJ_JSON = "$(MAKE_ROOT)/data/sskj_senses.json"
# for pre-generation the index of all headwords and functors
APPINDEX_PATH = "$(MAKE_ROOT)/data/appindex.json"
OUTPUT = "db"
# OUTPUT = "file"
OUTDIR = "/tmp/three" # if you're running this in docker, make sure to mount the volume
DBADDR = "0.0.0.0:27017" # don't use localhost
# credentials from .gitignored file
# create it from env.default
include env.local
N_CORES = 3
# insert kres files into database in chunks, for fewer connections
KRES_CHUNK_SIZE = 30
# Backend parameters found in conf file (see make backend)
export
.PHONY: python-env fill-database
all:
echo "Select an argument"
# create database (run from host):
# !!! might need to run several times, so the containers come online
# Successful if you see the lines:
# Successfully added user: { "user" : "testuser", "roles" : [ "readWrite" ] }
# bye
database-service:
cd dockerfiles/database; $(MAKE) build_run
database-users:
cd dockerfiles/database; $(MAKE) create_users
# also useful, if we want to restart the db
database-clean:
cd dockerfiles/database; $(MAKE) clean_stack
# create python-env container
python-env:
cd dockerfiles/python-env; $(MAKE)
# inside the container, install our packages
python-env-install:
pip3 install -e src/pkg/cjvt-corpusparser/.
pip3 install -e src/pkg/valency/.
pip3 install -e src/pkg/seqparser/.
# from inside python-env container:
data/samples:
cd data; tar xzvf samples.tar.gz
# from inside python-env container:
# you can set OUTPUT = "file" and a valid OUTDIR to test writing to json files instead of DB
fill-database-ssj: data/samples
python3 src/pkg/cjvt-corpusparser/corpusparser/main.py --kres-folder $(KRES_FOLDER) \
--corpus="ssj" \
--ssj-file $(SSJ_FILE) --kres-srl-folder $(KRES_SRL_FOLDER) \
--output $(OUTPUT) --outdir $(OUTDIR) --dbaddr $(DBADDR) \
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) \
--cores $(N_CORES)
fill-database-kres: data/samples
python3 src/pkg/cjvt-corpusparser/corpusparser/main.py --kres-folder $(KRES_FOLDER) \
--corpus="kres" \
--ssj-file $(SSJ_FILE) --kres-srl-folder $(KRES_SRL_FOLDER) \
--output $(OUTPUT) --outdir $(OUTDIR) --dbaddr $(DBADDR) \
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) \
--chunk-size $(KRES_CHUNK_SIZE) \
--cores $(N_CORES)
## Frontend
## Run from host
## See src/frontend_vue/README.md for port settings etc.
frontend-dev:
cd src/frontend_vue/; $(MAKE) dev
frontend-prod:
cd src/frontend_vue/; $(MAKE) prod
## Backend
# runs once and exits before the app starts
# need to extract ./data/sskj_data.tar.gz first
backend-prepare-db:
cd ./src/backend_flask; python3 app.py \
--config-file ./conf_files/dev_conf.yaml \
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
--sskj-wordlist $(SSKJ_WORDLIST) \
--appindex-json $(APPINDEX_PATH) \
--prepare-db
backend-dev:
cd ./src/backend_flask; python3 app.py \
--config-file ./conf_files/dev_conf.yaml \
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
--appindex-json $(APPINDEX_PATH)
backend-prod:
cd ./src/backend_flask; python3 app.py \
--config-file ./conf_files/prod_conf.yaml \
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
--appindex-json $(APPINDEX_PATH)
## add sskj senses to db (generated with pkg/seqparser)
sskj-senses:
python3 ./src/pkg/seqparser/seqparser/main.py \
--sskj-json $(SSKJ_JSON) \
--operation "senses_to_db" \
--dbaddr $(DBADDR) \
--dbuser $(DB_USR_USER) \
--dbpass $(DB_USR_PASS)