diff --git a/.gitignore b/.gitignore index b621ed8..32d6aff 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ src/frontend_vue/dist/ dockerfiles/database/create.js *__pycache__/ env.local +logs/ diff --git a/Makefile b/Makefile index f8924df..55a92e0 100644 --- a/Makefile +++ b/Makefile @@ -8,12 +8,12 @@ MAKE_ROOT = $(shell pwd) # kres is composed of many .xml files # I generated srl tags for kres in separate .json files # (for each kres.xml file there is a kres.json file with srl tags) -# SSJ_FILE = "$(MAKE_ROOT)/data/samples/ssj_xml/ssj500k-sl.body.sample.xml" -SSJ_FILE = "$(MAKE_ROOT)/data/ssj_file_link" -# KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_xml" -KRES_FOLDER = "$(MAKE_ROOT)/data/kres_xml_folder_link -# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/samples/kres_srl_json" -KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/kres_json_folder_link" +SSJ_FILE = "$(MAKE_ROOT)/data/samples/ssj_xml/ssj500k-sl.body.sample.xml" +# SSJ_FILE = "$(MAKE_ROOT)/data/ssj_file_link" +KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_xml" +# KRES_FOLDER = "$(MAKE_ROOT)/data/kres_xml_folder_link" +KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/samples/kres_srl_json" +# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/kres_json_folder_link" OUTPUT = "db" # OUTPUT = "file" @@ -62,13 +62,23 @@ data/samples: # from inside python-env container: # you can set OUTPUT = "file" and a valid OUTDIR to test writing to json files instead of DB -fill-database: data/samples +fill-database-ssj: data/samples python3 src/pkg/cjvt-corpusparser/corpusparser/main.py --kres-folder $(KRES_FOLDER) \ + --corpus="ssj" \ --ssj-file $(SSJ_FILE) --kres-srl-folder $(KRES_SRL_FOLDER) \ --output $(OUTPUT) --outdir $(OUTDIR) --dbaddr $(DBADDR) \ --dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) \ --cores $(N_CORES) +fill-database-kres: data/samples + python3 src/pkg/cjvt-corpusparser/corpusparser/main.py --kres-folder $(KRES_FOLDER) \ + --corpus="kres" \ + --ssj-file $(SSJ_FILE) --kres-srl-folder $(KRES_SRL_FOLDER) \ + --output $(OUTPUT) --outdir $(OUTDIR) --dbaddr $(DBADDR) \ + --dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) \ + --cores $(N_CORES) + + ## Frontend diff --git a/README.md b/README.md index 08e6552..35bb459 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,14 @@ $ make python-env-install # beforehand, set the data files in Makefile # instead of mounting directories into the container, you can # create a link inside ./data, that points to the desired location -$ make fill-database + +# I've separated the processes for better memory management +$ make fill-database-ssj + +$ make fill-database-kres +# You can detach from the running process using Ctrl-p + Ctrl-q + + # this is a long operation # if running on a remote server, use nohup: diff --git a/dockerfiles/python-env/Makefile b/dockerfiles/python-env/Makefile index 1074b4a..3d4d27f 100644 --- a/dockerfiles/python-env/Makefile +++ b/dockerfiles/python-env/Makefile @@ -1,4 +1,5 @@ IMAGE_NAME="cjvt-python-env" +CNNAME="python-env" all: build run @@ -6,8 +7,10 @@ build: docker build . -t $(IMAGE_NAME) run: + - docker kill $(CNNAME) + - docker rm $(CNNAME) docker run \ - -it \ + -it \ -v /home/${USER}:/home/${USER} \ --user $(shell id -u):$(shell id -g) \ -v /etc/passwd:/etc/passwd \ @@ -15,5 +18,6 @@ run: -v $(MAKE_ROOT):/project \ -w /project \ --net host \ + --name $(CNNAME) \ $(IMAGE_NAME) \ - /bin/bash + /bin/bash diff --git a/src/pkg/cjvt-corpusparser b/src/pkg/cjvt-corpusparser index 00d9192..bcc64c7 160000 --- a/src/pkg/cjvt-corpusparser +++ b/src/pkg/cjvt-corpusparser @@ -1 +1 @@ -Subproject commit 00d9192993a2ccfa45237d7a3a6a851058d4ee09 +Subproject commit bcc64c767c36ed8e41fd07dc896b83a49faf785b