forked from kristjan/cjvt-valency
First commit on scripts branch
This commit is contained in:
18
Makefile
18
Makefile
@@ -13,10 +13,11 @@ SSJ_FILE = "$(MAKE_ROOT)/data/ssj_file_link"
|
||||
# KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_xml"
|
||||
# KRES_FOLDER = "$(MAKE_ROOT)/data/kres_xml_folder_link"
|
||||
KRES_FOLDER = "/home/kristjan/kres_data/payload/kres_xml"
|
||||
GIGAFIDA_FOLDER = "/home/lukakrsnik/cjvt-valency/data_all/giga_orig"
|
||||
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/samples/kres_srl_json"
|
||||
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/kres_json_folder_link"
|
||||
KRES_SRL_FOLDER = "/home/kristjan/kres_data/payload/kres_json"
|
||||
|
||||
GIGAFIDA_SRL_FOLDER = "/home/lukakrsnik/cjvt-valency/data_all/final_json"
|
||||
# This file comes with the source code. Make sure you unpack it and name it right.
|
||||
SSKJ_WORDLIST = "$(MAKE_ROOT)/data/wordlist.json"
|
||||
SSKJ_JSON = "$(MAKE_ROOT)/data/sskj_senses.json"
|
||||
@@ -24,8 +25,8 @@ SSKJ_JSON = "$(MAKE_ROOT)/data/sskj_senses.json"
|
||||
# for pre-generation the index of all headwords and functors
|
||||
APPINDEX_PATH = "$(MAKE_ROOT)/data/appindex.json"
|
||||
|
||||
# OUTPUT = "db"
|
||||
OUTPUT = "file"
|
||||
OUTPUT = "db"
|
||||
# OUTPUT = "file"
|
||||
OUTDIR = "/project/data" # if you're running this in docker, make sure to mount the volume
|
||||
DBADDR = "0.0.0.0:27017" # don't use localhost
|
||||
|
||||
@@ -33,7 +34,7 @@ DBADDR = "0.0.0.0:27017" # don't use localhost
|
||||
# create it from env.default
|
||||
include env.local
|
||||
|
||||
N_CORES = 3
|
||||
N_CORES = 4
|
||||
# insert kres files into database in chunks, for fewer connections
|
||||
KRES_CHUNK_SIZE = 30
|
||||
|
||||
@@ -93,7 +94,14 @@ fill-database-kres: data/samples
|
||||
--chunk-size $(KRES_CHUNK_SIZE) \
|
||||
--cores $(N_CORES)
|
||||
|
||||
|
||||
fill-database-gigafida: data/samples
|
||||
python3 src/pkg/cjvt-corpusparser/corpusparser/main.py --kres-folder $(GIGAFIDA_FOLDER) \
|
||||
--corpus="gigafida" \
|
||||
--ssj-file $(SSJ_FILE) --kres-srl-folder $(GIGAFIDA_SRL_FOLDER) \
|
||||
--output $(OUTPUT) --outdir $(OUTDIR) --dbaddr $(DBADDR) \
|
||||
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) \
|
||||
--chunk-size $(KRES_CHUNK_SIZE) \
|
||||
--cores $(N_CORES)
|
||||
|
||||
## Frontend
|
||||
|
||||
|
||||
Reference in New Issue
Block a user