First commit on scripts branch

This commit is contained in:
2020-09-15 14:08:16 +02:00
parent c803057164
commit 3d91251905
23 changed files with 2032 additions and 1256209 deletions

View File

@@ -13,10 +13,11 @@ SSJ_FILE = "$(MAKE_ROOT)/data/ssj_file_link"
# KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_xml"
# KRES_FOLDER = "$(MAKE_ROOT)/data/kres_xml_folder_link"
KRES_FOLDER = "/home/kristjan/kres_data/payload/kres_xml"
GIGAFIDA_FOLDER = "/home/lukakrsnik/cjvt-valency/data_all/giga_orig"
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/samples/kres_srl_json"
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/kres_json_folder_link"
KRES_SRL_FOLDER = "/home/kristjan/kres_data/payload/kres_json"
GIGAFIDA_SRL_FOLDER = "/home/lukakrsnik/cjvt-valency/data_all/final_json"
# This file comes with the source code. Make sure you unpack it and name it right.
SSKJ_WORDLIST = "$(MAKE_ROOT)/data/wordlist.json"
SSKJ_JSON = "$(MAKE_ROOT)/data/sskj_senses.json"
@@ -24,8 +25,8 @@ SSKJ_JSON = "$(MAKE_ROOT)/data/sskj_senses.json"
# for pre-generation the index of all headwords and functors
APPINDEX_PATH = "$(MAKE_ROOT)/data/appindex.json"
# OUTPUT = "db"
OUTPUT = "file"
OUTPUT = "db"
# OUTPUT = "file"
OUTDIR = "/project/data" # if you're running this in docker, make sure to mount the volume
DBADDR = "0.0.0.0:27017" # don't use localhost
@@ -33,7 +34,7 @@ DBADDR = "0.0.0.0:27017" # don't use localhost
# create it from env.default
include env.local
N_CORES = 3
N_CORES = 4
# insert kres files into database in chunks, for fewer connections
KRES_CHUNK_SIZE = 30
@@ -93,7 +94,14 @@ fill-database-kres: data/samples
--chunk-size $(KRES_CHUNK_SIZE) \
--cores $(N_CORES)
fill-database-gigafida: data/samples
python3 src/pkg/cjvt-corpusparser/corpusparser/main.py --kres-folder $(GIGAFIDA_FOLDER) \
--corpus="gigafida" \
--ssj-file $(SSJ_FILE) --kres-srl-folder $(GIGAFIDA_SRL_FOLDER) \
--output $(OUTPUT) --outdir $(OUTDIR) --dbaddr $(DBADDR) \
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) \
--chunk-size $(KRES_CHUNK_SIZE) \
--cores $(N_CORES)
## Frontend