forked from kristjan/cjvt-valency
Compare commits
16 Commits
production
...
scripts
| Author | SHA1 | Date | |
|---|---|---|---|
| ec083a8d63 | |||
| 69c3521e4b | |||
| 75b015dcda | |||
| c18aaff11f | |||
| 34b776be11 | |||
| 26bca0b083 | |||
| 2551a9c6a8 | |||
| 5cdc963c2d | |||
| ce1fb46b4e | |||
| 220529b777 | |||
| ae5f2869bc | |||
| 931b3531b3 | |||
| 3d91251905 | |||
| c803057164 | |||
| b4db4e5255 | |||
| ef02583d72 |
11
.gitignore
vendored
11
.gitignore
vendored
@@ -7,6 +7,17 @@ data/appindex.json
|
|||||||
src/frontend_vue/node_modules/
|
src/frontend_vue/node_modules/
|
||||||
src/frontend_vue/dist/
|
src/frontend_vue/dist/
|
||||||
dockerfiles/database/create.js
|
dockerfiles/database/create.js
|
||||||
|
dockerfiles/database/create_mongo.js
|
||||||
|
dockerfiles/database/create_postgres.js
|
||||||
|
dockerfiles/database/mongo_db.gz
|
||||||
|
dockerfiles/database/postgres_db.tar
|
||||||
|
dockerfiles/database/postgres_db_OLD.tar
|
||||||
*__pycache__/
|
*__pycache__/
|
||||||
env.local
|
env.local
|
||||||
logs/*
|
logs/*
|
||||||
|
.idea/
|
||||||
|
venv*
|
||||||
|
data/
|
||||||
|
data
|
||||||
|
deploy_instructions/
|
||||||
|
run.sh
|
||||||
|
|||||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -1,3 +1,6 @@
|
|||||||
[submodule "src/pkg/cjvt-corpusparser"]
|
[submodule "src/pkg/cjvt-corpusparser"]
|
||||||
path = src/pkg/cjvt-corpusparser
|
path = src/pkg/cjvt-corpusparser
|
||||||
url = git@gitea.cjvt.si:kristjan/cjvt-corpusparser.git
|
url = git@gitea.cjvt.si:kristjan/cjvt-corpusparser.git
|
||||||
|
[submodule "src/pkg/luscenje_struktur"]
|
||||||
|
path = src/pkg/luscenje_struktur
|
||||||
|
url = https://gitea.cjvt.si/ozbolt/luscenje_struktur.git
|
||||||
|
|||||||
26
Makefile
26
Makefile
@@ -13,10 +13,11 @@ SSJ_FILE = "$(MAKE_ROOT)/data/ssj_file_link"
|
|||||||
# KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_xml"
|
# KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_xml"
|
||||||
# KRES_FOLDER = "$(MAKE_ROOT)/data/kres_xml_folder_link"
|
# KRES_FOLDER = "$(MAKE_ROOT)/data/kres_xml_folder_link"
|
||||||
KRES_FOLDER = "/home/kristjan/kres_data/payload/kres_xml"
|
KRES_FOLDER = "/home/kristjan/kres_data/payload/kres_xml"
|
||||||
|
GIGAFIDA_FOLDER = "/home/lukakrsnik/cjvt-valency/data_all/giga_orig"
|
||||||
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/samples/kres_srl_json"
|
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/samples/kres_srl_json"
|
||||||
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/kres_json_folder_link"
|
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/kres_json_folder_link"
|
||||||
KRES_SRL_FOLDER = "/home/kristjan/kres_data/payload/kres_json"
|
KRES_SRL_FOLDER = "/home/kristjan/kres_data/payload/kres_json"
|
||||||
|
GIGAFIDA_SRL_FOLDER = "/home/lukakrsnik/cjvt-valency/data_all/final_json"
|
||||||
# This file comes with the source code. Make sure you unpack it and name it right.
|
# This file comes with the source code. Make sure you unpack it and name it right.
|
||||||
SSKJ_WORDLIST = "$(MAKE_ROOT)/data/wordlist.json"
|
SSKJ_WORDLIST = "$(MAKE_ROOT)/data/wordlist.json"
|
||||||
SSKJ_JSON = "$(MAKE_ROOT)/data/sskj_senses.json"
|
SSKJ_JSON = "$(MAKE_ROOT)/data/sskj_senses.json"
|
||||||
@@ -26,14 +27,14 @@ APPINDEX_PATH = "$(MAKE_ROOT)/data/appindex.json"
|
|||||||
|
|
||||||
OUTPUT = "db"
|
OUTPUT = "db"
|
||||||
# OUTPUT = "file"
|
# OUTPUT = "file"
|
||||||
OUTDIR = "/tmp/three" # if you're running this in docker, make sure to mount the volume
|
OUTDIR = "/project/data" # if you're running this in docker, make sure to mount the volume
|
||||||
DBADDR = "0.0.0.0:27017" # don't use localhost
|
DBADDR = "0.0.0.0:27017" # don't use localhost
|
||||||
|
|
||||||
# credentials from .gitignored file
|
# credentials from .gitignored file
|
||||||
# create it from env.default
|
# create it from env.default
|
||||||
include env.local
|
include env.local
|
||||||
|
|
||||||
N_CORES = 3
|
N_CORES = 4
|
||||||
# insert kres files into database in chunks, for fewer connections
|
# insert kres files into database in chunks, for fewer connections
|
||||||
KRES_CHUNK_SIZE = 30
|
KRES_CHUNK_SIZE = 30
|
||||||
|
|
||||||
@@ -56,6 +57,12 @@ database-service:
|
|||||||
database-users:
|
database-users:
|
||||||
cd dockerfiles/database; $(MAKE) create_users
|
cd dockerfiles/database; $(MAKE) create_users
|
||||||
|
|
||||||
|
database-restore:
|
||||||
|
cd dockerfiles/database; $(MAKE) restore_db
|
||||||
|
|
||||||
|
database-restore-postgres:
|
||||||
|
cd dockerfiles/database; $(MAKE) restore_postgres_db
|
||||||
|
|
||||||
# also useful, if we want to restart the db
|
# also useful, if we want to restart the db
|
||||||
database-clean:
|
database-clean:
|
||||||
cd dockerfiles/database; $(MAKE) clean_stack
|
cd dockerfiles/database; $(MAKE) clean_stack
|
||||||
@@ -69,6 +76,7 @@ python-env-install:
|
|||||||
pip3 install -e src/pkg/cjvt-corpusparser/.
|
pip3 install -e src/pkg/cjvt-corpusparser/.
|
||||||
pip3 install -e src/pkg/valency/.
|
pip3 install -e src/pkg/valency/.
|
||||||
pip3 install -e src/pkg/seqparser/.
|
pip3 install -e src/pkg/seqparser/.
|
||||||
|
pip3 install -e src/pkg/luscenje_struktur/.
|
||||||
|
|
||||||
# from inside python-env container:
|
# from inside python-env container:
|
||||||
data/samples:
|
data/samples:
|
||||||
@@ -93,7 +101,14 @@ fill-database-kres: data/samples
|
|||||||
--chunk-size $(KRES_CHUNK_SIZE) \
|
--chunk-size $(KRES_CHUNK_SIZE) \
|
||||||
--cores $(N_CORES)
|
--cores $(N_CORES)
|
||||||
|
|
||||||
|
fill-database-gigafida: data/samples
|
||||||
|
python3 src/pkg/cjvt-corpusparser/corpusparser/main.py --kres-folder $(GIGAFIDA_FOLDER) \
|
||||||
|
--corpus="gigafida" \
|
||||||
|
--ssj-file $(SSJ_FILE) --kres-srl-folder $(GIGAFIDA_SRL_FOLDER) \
|
||||||
|
--output $(OUTPUT) --outdir $(OUTDIR) --dbaddr $(DBADDR) \
|
||||||
|
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) \
|
||||||
|
--chunk-size $(KRES_CHUNK_SIZE) \
|
||||||
|
--cores $(N_CORES)
|
||||||
|
|
||||||
## Frontend
|
## Frontend
|
||||||
|
|
||||||
@@ -146,4 +161,5 @@ sskj-senses:
|
|||||||
--dbpass $(DB_USR_PASS)
|
--dbpass $(DB_USR_PASS)
|
||||||
|
|
||||||
deploy-prod-stack:
|
deploy-prod-stack:
|
||||||
docker stack deploy -c production.yaml val
|
- docker network create val-backend
|
||||||
|
docker stack deploy -c production.yaml val
|
||||||
|
|||||||
87
README.md
87
README.md
@@ -111,13 +111,17 @@ Prerequisite: machine with free ports 80 and 8084.
|
|||||||
### Database
|
### Database
|
||||||
Either build the database from scratch (lenghty process) using above instructions or just migrate the database from the faculty server (recommended).
|
Either build the database from scratch (lenghty process) using above instructions or just migrate the database from the faculty server (recommended).
|
||||||
|
|
||||||
TODO: build my-mongo
|
Build container my-mongo:
|
||||||
|
```bash
|
||||||
|
# run once and destroy containers
|
||||||
|
$ make database-service
|
||||||
|
```
|
||||||
|
|
||||||
### Backend
|
### Backend
|
||||||
Set database connection details in `/src/backend_flask/db_config.py`.
|
Set database connection details in `/src/backend_flask/db_config.py`.
|
||||||
```
|
Change 'valuser' and 'valuserpass' to the database user.
|
||||||
mongodb://valuser:valuserpass@127.0.0.1:27017/valdb
|
```bash
|
||||||
|
mongodb://valuser:valuserpass@my_mongo/valdb
|
||||||
```
|
```
|
||||||
In the above line, replace `valuser` with the username and `valuserpass` with the password that was used to create the database tables (the values were set in the root Makefile).
|
In the above line, replace `valuser` with the username and `valuserpass` with the password that was used to create the database tables (the values were set in the root Makefile).
|
||||||
|
|
||||||
@@ -140,7 +144,82 @@ $ make build-frontend-prod
|
|||||||
```
|
```
|
||||||
|
|
||||||
All set, now run the stack.
|
All set, now run the stack.
|
||||||
|
Stack configuration in `production.yaml`.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# From git root
|
# From git root
|
||||||
$ make deploy-prod-stack
|
$ make deploy-prod-stack
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Uploading a mongo dump
|
||||||
|
There's a 15GB mongo dump containing the fully processed kres and ssj data.
|
||||||
|
We can use that file to deploy our aplication.
|
||||||
|
With this database, we will need a minimum of 8GB ram to serve the app.
|
||||||
|
If the server is struggling, frontend will throw "Network errors".
|
||||||
|
|
||||||
|
Check `0.0.0.0:8081` and remove (or backup) the current example database `valdb`.
|
||||||
|
|
||||||
|
Run the stack with mongo port mapped:
|
||||||
|
(uncomment the lines in `production.yaml`)
|
||||||
|
```yml
|
||||||
|
ports:
|
||||||
|
- 27017:27017
|
||||||
|
```
|
||||||
|
|
||||||
|
Run a separate my-mongo container with the mounted data:
|
||||||
|
```bash
|
||||||
|
$ mongo run -it --net host -v <local_dump_path>/dumps my-mongo /bin/bash
|
||||||
|
```
|
||||||
|
|
||||||
|
Inside the container (edit the uesrname, password):
|
||||||
|
```bash
|
||||||
|
$ mongorestore /dumps/valdb --db valdb --uri=mongodb://valuser:valuserpass@0.0.0.0:27017
|
||||||
|
```
|
||||||
|
|
||||||
|
After uploading, restart the stack with `27017` commented out.
|
||||||
|
|
||||||
|
## Script running
|
||||||
|
|
||||||
|
### Environment setup
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
pip install git+https://gitea.cjvt.si/ozbolt/luscenje_struktur.git
|
||||||
|
pip install git+https://gitea.cjvt.si/kristjan/cjvt-corpusparser.git
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running on already setup environment
|
||||||
|
```bash
|
||||||
|
make database-service
|
||||||
|
```
|
||||||
|
|
||||||
|
### Setting up environment for running on ramdisk
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# create ramdisk
|
||||||
|
sudo mount -t tmpfs tmpfs /mnt/tmp
|
||||||
|
sudo mount -o remount,size=120G,noexec,nosuid,nodev,noatime /mnt/tmp
|
||||||
|
|
||||||
|
# change volumes to /mnt/tmp:/data/db
|
||||||
|
vim dockerfiles/database/valency-stack.yml
|
||||||
|
|
||||||
|
# change Makefile -runStack to mkdir -p /mnt/tmp
|
||||||
|
vim dockerfiles/database/Makefile
|
||||||
|
|
||||||
|
# run service
|
||||||
|
make database-service
|
||||||
|
|
||||||
|
# run ONLY ONCE to create users and restore database
|
||||||
|
make database-users
|
||||||
|
make database-restore
|
||||||
|
|
||||||
|
# double check if it worked
|
||||||
|
docker exec -it ef0a /bin/bash
|
||||||
|
|
||||||
|
# following steps in docker bash:
|
||||||
|
# check if it worked by
|
||||||
|
mongo --username <REGULAR USER> --password --authenticationDatabase valdb
|
||||||
|
db.getRoles()
|
||||||
|
|
||||||
```
|
```
|
||||||
@@ -1 +0,0 @@
|
|||||||
/home/kristjan/workdir/final_json/
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
/home/kristjan/kres_mount/kres_parsed/tei/
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
/home/kristjan/git/diploma/data/ssj500k-sl.TEI/ssj500k-sl.body.xml
|
|
||||||
Binary file not shown.
File diff suppressed because one or more lines are too long
@@ -1,5 +1,5 @@
|
|||||||
FROM mongo:latest
|
FROM mongo:4.2.9
|
||||||
|
|
||||||
WORKDIR /
|
WORKDIR /
|
||||||
COPY init_inside_container.sh /.
|
COPY init_inside_mongo_container.sh /.
|
||||||
COPY create.js /.
|
COPY create_mongo.js /.
|
||||||
|
|||||||
@@ -2,33 +2,62 @@
|
|||||||
# collection names: lower case, plural
|
# collection names: lower case, plural
|
||||||
# user names?
|
# user names?
|
||||||
|
|
||||||
# mongo admin -u root -p password --eval "db.getSiblingDB('vlDB').addUser('vluser', 'password')"
|
|
||||||
|
|
||||||
STACKNAME = dbstack
|
|
||||||
|
|
||||||
.PHONY: start_db FORCE
|
|
||||||
|
|
||||||
all: build_run create_users
|
all: build_run create_users
|
||||||
|
|
||||||
build_run: build_mongo run_stack
|
build_run: build_mongo run_docker_compose
|
||||||
|
|
||||||
create.js: FORCE
|
postgres_create_roles:
|
||||||
|
echo 'psql -v ON_ERROR_STOP=OFF --username $(DB_ADM_USER) <<-EOSQL' > create_postgres.js
|
||||||
|
echo "create user $(DB_USR_USER) with encrypted password '$(DB_USR_PASS)';" >> create_postgres.js
|
||||||
|
echo "create database superdb_small;" >> create_postgres.js
|
||||||
|
echo "grant all privileges on database superdb_small to $(DB_USR_USER);" >> create_postgres.js
|
||||||
|
echo "grant usage on schema public to $(DB_USR_USER);" >> create_postgres.js
|
||||||
|
echo "grant select on all tables in schema public to $(DB_USR_USER);" >> create_postgres.js
|
||||||
|
echo "EOSQL" >> create_postgres.js
|
||||||
|
chmod +x create_postgres.js
|
||||||
|
|
||||||
FORCE:
|
mongo_create_roles:
|
||||||
echo 'db.auth("$(DB_ADM_USER)", "$(DB_ADM_PASS)")' > create.js
|
echo 'db.auth("$(DB_ADM_USER)", "$(DB_ADM_PASS)")' > create_mongo.js
|
||||||
echo 'use valdb' >> create.js
|
echo 'use valdb' >> create_mongo.js
|
||||||
echo 'db.createUser({user: "$(DB_USR_USER)", pwd: "$(DB_USR_PASS)", roles: ["readWrite"]})' >> create.js
|
echo 'db.createUser({user: "$(DB_USR_USER)", pwd: "$(DB_USR_PASS)", roles: ["readWrite"]})' >> create_mongo.js
|
||||||
|
echo 'db.grantRolesToUser("$(DB_USR_USER)", [{ role: "readWrite", db: "extvaldb"}])' >> create_mongo.js
|
||||||
|
|
||||||
build_mongo: create.js
|
build_mongo: mongo_create_roles
|
||||||
docker build . -t my-mongo --no-cache
|
docker build . -t my-mongo --no-cache
|
||||||
|
|
||||||
clean_stack:
|
# build_postgres: postgres_create_roles
|
||||||
docker stack rm $(STACKNAME)
|
# docker build . -t my-mongo --no-cache
|
||||||
|
|
||||||
run_stack:
|
run_docker_compose:
|
||||||
mkdir -p ${HOME}/mongo_container/data/
|
mkdir -p ${HOME}/valency_data/mongo_container/data/
|
||||||
docker stack deploy --compose-file mongodb-stack.yml $(STACKNAME)
|
#docker kill $(shell ./get_mongo_container_name.sh)
|
||||||
|
#docker kill $(shell ./get_postgres_container_name.sh)
|
||||||
|
#docker-compose stop
|
||||||
|
docker-compose -f valency-stack.yml up -d --force-recreate
|
||||||
|
# docker stack deploy --compose-file mongodb-stack.yml $(STACKNAME)
|
||||||
|
|
||||||
create_users: create.js
|
create_users: create_mongo_users create_postgres_users
|
||||||
docker exec $(shell ./get_container_name.sh) /init_inside_container.sh
|
|
||||||
|
|
||||||
|
create_mongo_users: mongo_create_roles
|
||||||
|
docker exec $(shell ./get_mongo_container_name.sh) /init_inside_mongo_container.sh
|
||||||
# rm create.js
|
# rm create.js
|
||||||
|
|
||||||
|
create_postgres_users: postgres_create_roles
|
||||||
|
docker exec $(shell ./get_postgres_container_name.sh) /scripts/init_inside_postgres_container.sh
|
||||||
|
|
||||||
|
restore_db: restore_mongo_db restore_postgres_db
|
||||||
|
|
||||||
|
restore_mongo_db:
|
||||||
|
ifeq (,$(wildcard ./mongo_db.gz))
|
||||||
|
$(error "mongo_db.gz does not exists. Make sure to have dump of mongo db in 'dockerfiles/database/mongo_db.gz'")
|
||||||
|
else
|
||||||
|
docker exec $(shell ./get_mongo_container_name.sh) sh -c 'mongorestore --gzip --archive=/scripts/mongo_db.gz --db valdb --username $(DB_USR_USER) --password $(DB_USR_PASS) --authenticationDatabase valdb'
|
||||||
|
endif
|
||||||
|
|
||||||
|
restore_postgres_db:
|
||||||
|
ifeq (,$(wildcard ./postgres_db.tar))
|
||||||
|
$(error "postgres_db.tar does not exists. Make sure to have dump of postgres db in 'dockerfiles/database/postgres_db.tar'")
|
||||||
|
else
|
||||||
|
docker exec $(shell ./get_postgres_container_name.sh) sh -c 'pg_restore -U $(DB_ADM_USER) --dbname=superdb_small --create --verbose /scripts/postgres_db.tar'
|
||||||
|
endif
|
||||||
|
|||||||
2
dockerfiles/database/get_postgres_container_name.sh
Executable file
2
dockerfiles/database/get_postgres_container_name.sh
Executable file
@@ -0,0 +1,2 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
docker ps | grep postgres | awk '{print $1}'
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
mongo admin < /create.js
|
|
||||||
3
dockerfiles/database/init_inside_mongo_container.sh
Executable file
3
dockerfiles/database/init_inside_mongo_container.sh
Executable file
@@ -0,0 +1,3 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
mongo admin < /create_mongo.js
|
||||||
3
dockerfiles/database/init_inside_postgres_container.sh
Executable file
3
dockerfiles/database/init_inside_postgres_container.sh
Executable file
@@ -0,0 +1,3 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
/scripts/create_postgres.js
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
version: '3.1'
|
|
||||||
|
|
||||||
services:
|
|
||||||
|
|
||||||
my_mongo:
|
|
||||||
image: my-mongo
|
|
||||||
restart: always
|
|
||||||
ports:
|
|
||||||
- 27017:27017
|
|
||||||
environment:
|
|
||||||
MONGO_INITDB_ROOT_USERNAME: ${DB_ADM_USER}
|
|
||||||
MONGO_INITDB_ROOT_PASSWORD: ${DB_ADM_PASS}
|
|
||||||
volumes:
|
|
||||||
- ${HOME}/mongo_container/data/:/data/db
|
|
||||||
|
|
||||||
mongo_express:
|
|
||||||
image: mongo-express
|
|
||||||
restart: always
|
|
||||||
ports:
|
|
||||||
- 8087:8081
|
|
||||||
environment:
|
|
||||||
ME_CONFIG_BASICAUTH_USERNAME: ${MONGOEXPRESS_USER}
|
|
||||||
ME_CONFIG_BASICAUTH_PASSWORD: ${MONGOEXPRESS_PASS}
|
|
||||||
ME_CONFIG_MONGODB_ADMINUSERNAME: ${DB_ADM_USER}
|
|
||||||
ME_CONFIG_MONGODB_ADMINPASSWORD: ${DB_ADM_PASS}
|
|
||||||
ME_CONFIG_MONGODB_SERVER: my_mongo
|
|
||||||
27
dockerfiles/database/valency-stack.yml
Normal file
27
dockerfiles/database/valency-stack.yml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
version: '3.1'
|
||||||
|
|
||||||
|
services:
|
||||||
|
|
||||||
|
my_mongo:
|
||||||
|
image: my-mongo
|
||||||
|
restart: always
|
||||||
|
ports:
|
||||||
|
- 127.0.0.1:27017:27017
|
||||||
|
environment:
|
||||||
|
MONGO_INITDB_ROOT_USERNAME: ${DB_ADM_USER}
|
||||||
|
MONGO_INITDB_ROOT_PASSWORD: ${DB_ADM_PASS}
|
||||||
|
volumes:
|
||||||
|
- ${HOME}/valency_data/mongo_container/data/:/data/db
|
||||||
|
- ./:/scripts
|
||||||
|
|
||||||
|
my_postgres:
|
||||||
|
image: postgres
|
||||||
|
restart: always
|
||||||
|
ports:
|
||||||
|
- 127.0.0.1:5432:5432
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${DB_ADM_USER}
|
||||||
|
POSTGRES_PASSWORD: ${DB_ADM_PASS}
|
||||||
|
volumes:
|
||||||
|
- ${HOME}/valency_data/postgres_container/data/:/var/lib/postgresql/data
|
||||||
|
- ./:/scripts
|
||||||
@@ -6,7 +6,8 @@ vim \
|
|||||||
python3 \
|
python3 \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
sshfs \
|
sshfs \
|
||||||
curl
|
curl \
|
||||||
|
locales
|
||||||
|
|
||||||
RUN pip3 install --upgrade pip
|
RUN pip3 install --upgrade pip
|
||||||
|
|
||||||
@@ -21,6 +22,16 @@ RUN pip3 install \
|
|||||||
flask_cors \
|
flask_cors \
|
||||||
pymongo \
|
pymongo \
|
||||||
flask-pymongo \
|
flask-pymongo \
|
||||||
gunicorn
|
gunicorn \
|
||||||
|
SQLAlchemy \
|
||||||
|
tqdm \
|
||||||
|
psycopg2-binary
|
||||||
|
|
||||||
|
# Set the locale
|
||||||
|
RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \
|
||||||
|
locale-gen
|
||||||
|
ENV LANG en_US.UTF-8
|
||||||
|
ENV LANGUAGE en_US:en
|
||||||
|
ENV LC_ALL en_US.UTF-8
|
||||||
|
|
||||||
ENV PYTHONIOENCODING UTF-8
|
ENV PYTHONIOENCODING UTF-8
|
||||||
|
|||||||
@@ -7,6 +7,10 @@ server {
|
|||||||
root /srv/dist;
|
root /srv/dist;
|
||||||
index index.html index.htm;
|
index index.html index.htm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
location /home {
|
||||||
|
return 301 /;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# backend
|
# backend
|
||||||
@@ -19,4 +23,7 @@ server {
|
|||||||
proxy_set_header Host $http_host;
|
proxy_set_header Host $http_host;
|
||||||
proxy_pass http://backend_flask:8084;
|
proxy_pass http://backend_flask:8084;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
https://vezljivostni.cjvt.si/api/* -> http://vezljivostni-host.cjvt.si:8084/api/*
|
||||||
|
https://vezljivostni.cjvt.si/* -> http://vezljivostni-host.cjvt.si:80/*
|
||||||
|
|||||||
@@ -18,9 +18,8 @@ services:
|
|||||||
mongo_express:
|
mongo_express:
|
||||||
image: mongo-express
|
image: mongo-express
|
||||||
restart: always
|
restart: always
|
||||||
# TODO comment this out
|
|
||||||
ports:
|
ports:
|
||||||
- 8087:8081
|
- 8081:8081
|
||||||
environment:
|
environment:
|
||||||
ME_CONFIG_BASICAUTH_USERNAME: test
|
ME_CONFIG_BASICAUTH_USERNAME: test
|
||||||
ME_CONFIG_BASICAUTH_PASSWORD: test
|
ME_CONFIG_BASICAUTH_PASSWORD: test
|
||||||
@@ -41,3 +40,4 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./nginx.conf:/etc/nginx/conf.d/default.conf
|
- ./nginx.conf:/etc/nginx/conf.d/default.conf
|
||||||
- ./src/frontend_vue/dist:/srv/dist
|
- ./src/frontend_vue/dist:/srv/dist
|
||||||
|
|
||||||
|
|||||||
37
requirements.txt
Normal file
37
requirements.txt
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
asn1crypto==0.24.0
|
||||||
|
beautifulsoup4==4.8.0
|
||||||
|
bs4==0.0.1
|
||||||
|
cffi==1.12.3
|
||||||
|
Click==7.0
|
||||||
|
cryptography==2.1.4
|
||||||
|
Flask==1.1.1
|
||||||
|
Flask-Cors==3.0.8
|
||||||
|
Flask-PyMongo==2.3.0
|
||||||
|
gunicorn==19.9.0
|
||||||
|
idna==2.6
|
||||||
|
itsdangerous==1.1.0
|
||||||
|
Jinja2==2.10.1
|
||||||
|
joblib==0.13.2
|
||||||
|
keyring==10.6.0
|
||||||
|
keyrings.alt==3.0
|
||||||
|
lxml==4.4.0
|
||||||
|
MarkupSafe==1.1.1
|
||||||
|
numpy==1.17.0
|
||||||
|
pandas==0.25.0
|
||||||
|
pathlib==1.0.1
|
||||||
|
psycopg2==2.8.4
|
||||||
|
pycparser==2.19
|
||||||
|
pycrypto==2.6.1
|
||||||
|
pymongo==3.8.0
|
||||||
|
python-dateutil==2.8.0
|
||||||
|
pytz==2019.2
|
||||||
|
pyxdg==0.25
|
||||||
|
PyYAML==5.1.2
|
||||||
|
scikit-learn==0.21.3
|
||||||
|
scipy==1.3.0
|
||||||
|
SecretStorage==2.3.1
|
||||||
|
six==1.11.0
|
||||||
|
sklearn==0.0
|
||||||
|
soupsieve==1.9.3
|
||||||
|
SQLAlchemy==1.3.12
|
||||||
|
Werkzeug==0.15.5
|
||||||
1708
scripts/create_xml.py
Normal file
1708
scripts/create_xml.py
Normal file
File diff suppressed because it is too large
Load Diff
189
scripts/extract_keywords.py
Normal file
189
scripts/extract_keywords.py
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
import copy
|
||||||
|
import csv
|
||||||
|
from xml.etree import ElementTree
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
import argparse
|
||||||
|
import pickle
|
||||||
|
import time
|
||||||
|
import gc
|
||||||
|
import subprocess
|
||||||
|
import concurrent.futures
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
|
def read_gigafida(path):
|
||||||
|
words = {}
|
||||||
|
with open(path) as tsvfile:
|
||||||
|
reader = csv.reader(tsvfile, delimiter='\t')
|
||||||
|
for row in reader:
|
||||||
|
words[row[0]] = int(row[2])
|
||||||
|
return words
|
||||||
|
|
||||||
|
|
||||||
|
def read_sloleks(path):
|
||||||
|
words = set()
|
||||||
|
with open(path) as tsvfile:
|
||||||
|
reader = csv.reader(tsvfile, delimiter='\t')
|
||||||
|
for row in reader:
|
||||||
|
words.add(row[1])
|
||||||
|
return words
|
||||||
|
|
||||||
|
|
||||||
|
def read_zele(path):
|
||||||
|
with open(path) as f:
|
||||||
|
content = f.readlines()
|
||||||
|
# fix content
|
||||||
|
content[0] = content[0][1:]
|
||||||
|
# a = content[2]
|
||||||
|
# a = content[2].split()
|
||||||
|
# a = content[2].split()[0].split('<IZT>')[1]
|
||||||
|
# a = content[2].split()[0].split('<IZT>')[1].split('</IZT>')[0]
|
||||||
|
content = [x.split()[0].split('<IZT>')[1].split('</IZT>')[0] for x in content]
|
||||||
|
# content = [x.split() for x in content]
|
||||||
|
return set(content)
|
||||||
|
|
||||||
|
|
||||||
|
def read_wordlist(path):
|
||||||
|
with open(path) as f:
|
||||||
|
content = [line[:-1] for line in f.readlines()]
|
||||||
|
print(content[-1])
|
||||||
|
return set(content)
|
||||||
|
|
||||||
|
|
||||||
|
def filter_gigafida(gigafida_raw, min_limit, max_limit):
|
||||||
|
return {word[0]: word[1] for word in gigafida_raw.items() if (word[0][-2:] == 'ti' or word[0][-2:] == 'či') and word[1] > min_limit and word[1] <= max_limit}
|
||||||
|
|
||||||
|
|
||||||
|
def set_list_intersection(gigafida_filtered, sloleks):
|
||||||
|
intersection = {}
|
||||||
|
for word, num in gigafida_filtered.items():
|
||||||
|
if word in sloleks:
|
||||||
|
intersection[word] = num
|
||||||
|
return intersection
|
||||||
|
|
||||||
|
|
||||||
|
def list_list_union(list1, list2):
|
||||||
|
union = copy.copy(list1)
|
||||||
|
for w, n in list2.items():
|
||||||
|
if w not in list1:
|
||||||
|
union[w] = list2[w]
|
||||||
|
return union
|
||||||
|
|
||||||
|
|
||||||
|
def list_list_subtraction(list1, list2):
|
||||||
|
subtraction = {}
|
||||||
|
for w, n in list2.items():
|
||||||
|
# if w == 'dejati':
|
||||||
|
# print('here')
|
||||||
|
if w not in list1:
|
||||||
|
subtraction[w] = n
|
||||||
|
return subtraction
|
||||||
|
|
||||||
|
|
||||||
|
def set_set_subtraction(set1, set2):
|
||||||
|
subtraction = {}
|
||||||
|
for w in set2:
|
||||||
|
if w not in set1:
|
||||||
|
subtraction[w] = -1
|
||||||
|
return subtraction
|
||||||
|
|
||||||
|
|
||||||
|
def create_document(list1, path):
|
||||||
|
with open(path, "w") as text_file:
|
||||||
|
for w, n in list1.items():
|
||||||
|
text_file.write("%s\t%d\n" % (w, n))
|
||||||
|
|
||||||
|
|
||||||
|
def create_document_set(list1, path):
|
||||||
|
with open(path, "w") as text_file:
|
||||||
|
for w in sorted(list(list1)):
|
||||||
|
text_file.write("%s\n" % w)
|
||||||
|
|
||||||
|
|
||||||
|
def gigafida_merge(sloleks, zele, gigafida_raw, giga_min, giga_max):
|
||||||
|
gigafida_filtered = filter_gigafida(gigafida_raw, giga_min, giga_max)
|
||||||
|
sloleks_gf_intersect = set_list_intersection(gigafida_filtered, sloleks)
|
||||||
|
gigafida_filtered1 = filter_gigafida(gigafida_raw, 1, sys.maxsize)
|
||||||
|
zele_gf_intersect = set_list_intersection(gigafida_filtered1, zele)
|
||||||
|
sloleks_zele_union = list_list_union(sloleks_gf_intersect, zele_gf_intersect)
|
||||||
|
sloleks_zele_subtraction = list_list_subtraction(sloleks_zele_union, gigafida_filtered)
|
||||||
|
return sloleks_zele_subtraction
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
gigafida_raw = read_gigafida(args.gigafida_verb_list)
|
||||||
|
sloleks = read_sloleks(args.sloleks)
|
||||||
|
zele = read_zele(args.zele)
|
||||||
|
if args.wordlist is not None:
|
||||||
|
sloleks_wordlist = set()
|
||||||
|
# sloleks_wordlist = set()
|
||||||
|
for el in sloleks:
|
||||||
|
if el in gigafida_raw:
|
||||||
|
sloleks_wordlist.add(el)
|
||||||
|
filtered_wordlist = read_wordlist(args.wordlist)
|
||||||
|
|
||||||
|
# sloleks_wordlist = set()
|
||||||
|
for el in sloleks:
|
||||||
|
if el in gigafida_raw:
|
||||||
|
filtered_wordlist.add(el)
|
||||||
|
|
||||||
|
create_document_set(filtered_wordlist, 'wordlist.tsv')
|
||||||
|
# gigafida_merge(sloleks, zele, gigafida_raw, 3, sys.maxsize)
|
||||||
|
gigafida_filtered3 = filter_gigafida(gigafida_raw, 2, sys.maxsize)
|
||||||
|
sloleks_gf_intersect = set_list_intersection(gigafida_filtered3, sloleks)
|
||||||
|
|
||||||
|
nouns_sloleks_gf_intersect = sorted(sloleks_gf_intersect.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
res = [el[0] for el in nouns_sloleks_gf_intersect]
|
||||||
|
|
||||||
|
gigafida_filtered1 = filter_gigafida(gigafida_raw, 0, sys.maxsize)
|
||||||
|
zele_gf_intersect = set_list_intersection(gigafida_filtered1, zele)
|
||||||
|
sloleks_zele_union = list_list_union(sloleks_gf_intersect, zele_gf_intersect)
|
||||||
|
sloleks_zele_subtraction = set_set_subtraction(sloleks, zele)
|
||||||
|
create_document(gigafida_filtered3, 'gigafida_3+.tsv')
|
||||||
|
# create_document(sloleks_gf_intersect, 'gigafida_3+-sloleks-presek.tsv')
|
||||||
|
create_document(sloleks_zele_union, 'gigafida_3+-sloleks_zele-presek.tsv')
|
||||||
|
create_document(sloleks_zele_subtraction, 'sloleks-zele-razlika.tsv')
|
||||||
|
|
||||||
|
# gigafida_filtered = filter_gigafida(gigafida_raw, 10, sys.maxsize)
|
||||||
|
# sloleks_zele_subtraction = list_list_subtraction(sloleks_zele_union, gigafida_filtered)
|
||||||
|
gigafida_10 = gigafida_merge(sloleks, zele, gigafida_raw, 10, sys.maxsize)
|
||||||
|
create_document(gigafida_10, 'gigafida_10+-sloleks_zele-razlika.tsv')
|
||||||
|
|
||||||
|
# gigafida_filtered = filter_gigafida(gigafida_raw, 3, 10)
|
||||||
|
# sloleks_zele_subtraction = list_list_subtraction(sloleks_zele_union, gigafida_filtered)
|
||||||
|
gigafida_3_10 = gigafida_merge(sloleks, zele, gigafida_raw, 2, 10)
|
||||||
|
create_document(gigafida_3_10, 'gigafida_3-10-sloleks_zele-razlika.tsv')
|
||||||
|
# pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Extract keywords from multiple lists.')
|
||||||
|
parser.add_argument('gigafida_verb_list',
|
||||||
|
help='Path to gigafida list of verbs in tsv format.')
|
||||||
|
parser.add_argument('sloleks',
|
||||||
|
help='Path to Sloleks in tsv format.')
|
||||||
|
parser.add_argument('--zele',
|
||||||
|
help='Path to zele valency dictionary.')
|
||||||
|
parser.add_argument('--wordlist', default=None,
|
||||||
|
help='Path to filtered wordlist.')
|
||||||
|
parser.add_argument('--handchecked_words', default=None,
|
||||||
|
help='Path to handchecked words.')
|
||||||
|
# parser.add_argument('--min_limit',
|
||||||
|
# help='Limit min number of ocurrences',
|
||||||
|
# type=int, default=0)
|
||||||
|
# parser.add_argument('--max_limit',
|
||||||
|
# help='Limit max number of ocurrences',
|
||||||
|
# type=int, default=sys.maxsize)
|
||||||
|
parser.add_argument('--verbose', help='Enable verbose output to stderr',
|
||||||
|
choices=["warning", "info", "debug"], default="info",
|
||||||
|
const="info", nargs='?')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
main(args)
|
||||||
|
logging.info("TIME: {}".format(time.time() - start))
|
||||||
117
scripts/form_csv.py
Normal file
117
scripts/form_csv.py
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
import argparse
|
||||||
|
import csv
|
||||||
|
import os
|
||||||
|
|
||||||
|
from lxml import etree, objectify, html
|
||||||
|
|
||||||
|
|
||||||
|
def write_general_statistics(path, out_list):
|
||||||
|
if len(out_list) == 0:
|
||||||
|
return
|
||||||
|
with open(path, 'w') as csvfile:
|
||||||
|
writer = csv.writer(csvfile, delimiter='\t',
|
||||||
|
quotechar='"')
|
||||||
|
writer.writerow(['Semantic role', 'Valency pattern ratio', 'Valency sentence ratio'])
|
||||||
|
for line in out_list:
|
||||||
|
writer.writerow(line)
|
||||||
|
|
||||||
|
|
||||||
|
def write_statistics(path, out_list):
|
||||||
|
if len(out_list) == 0:
|
||||||
|
return
|
||||||
|
with open(path, 'w') as csvfile:
|
||||||
|
writer = csv.writer(csvfile, delimiter='\t',
|
||||||
|
quotechar='"')
|
||||||
|
writer.writerow(['Valency pattern id', 'Frequency all GF', 'Semantic role', 'Pattern representation', 'Corpus example'])
|
||||||
|
for line in out_list:
|
||||||
|
writer.writerow(line)
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
for file in sorted(os.listdir(args.input)):
|
||||||
|
path = os.path.join(args.input, file)
|
||||||
|
tree = etree.parse(path)
|
||||||
|
gf_output = []
|
||||||
|
ssj_output = []
|
||||||
|
head = next(tree.iter('head'))
|
||||||
|
headword = head.find('headword').find('lemma').text
|
||||||
|
#for div in root.iterfind('.//div'):
|
||||||
|
for elem in tree.iter('statisticsContainer'):
|
||||||
|
# for element in tree.iterfind('statisticsContainer'):
|
||||||
|
# for element in tree.find('statisticsContainer'):
|
||||||
|
semRole = elem.find('semanticRole').text
|
||||||
|
gf_pattern = None
|
||||||
|
gf_sentence = None
|
||||||
|
ssj_pattern = None
|
||||||
|
ssj_sentence = None
|
||||||
|
measure = elem.find('measureList')
|
||||||
|
for el in measure:
|
||||||
|
if el.attrib['type'] == 'valency_pattern_ratio' and el.attrib['source'] == 'Gigafida 2.0':
|
||||||
|
gf_pattern = el.text
|
||||||
|
if el.attrib['type'] == 'valency_sentence_ratio' and el.attrib['source'] == 'Gigafida 2.0':
|
||||||
|
gf_sentence = el.text
|
||||||
|
if el.attrib['type'] == 'valency_pattern_ratio' and el.attrib['source'] == 'ssj500k 2.2':
|
||||||
|
ssj_pattern = el.text
|
||||||
|
if el.attrib['type'] == 'valency_sentence_ratio' and el.attrib['source'] == 'ssj500k 2.2':
|
||||||
|
ssj_sentence = el.text
|
||||||
|
if gf_pattern is not None and gf_sentence is not None:
|
||||||
|
gf_output.append([semRole, gf_pattern, gf_sentence])
|
||||||
|
if ssj_pattern is not None and ssj_sentence is not None:
|
||||||
|
ssj_output.append([semRole, ssj_pattern, ssj_sentence])
|
||||||
|
|
||||||
|
print(file)
|
||||||
|
|
||||||
|
analyze_output = []
|
||||||
|
for elem in tree.iter('valencyPattern'):
|
||||||
|
valency_pattern_id = elem.attrib['id']
|
||||||
|
|
||||||
|
# get frequency
|
||||||
|
measure = ''
|
||||||
|
for measure_el in elem.find('measureList').findall('measure'):
|
||||||
|
if measure_el.attrib['source'] == 'Gigafida 2.0':
|
||||||
|
measure = measure_el.text
|
||||||
|
|
||||||
|
# get semantic roles
|
||||||
|
semantic_roles_list = []
|
||||||
|
for semantic_rol_con in elem.find('semanticRoleContainerList').findall('semanticRoleContainer'):
|
||||||
|
semantic_roles_list.append(semantic_rol_con.find('semanticRole').text)
|
||||||
|
semantic_roles = '_'.join(semantic_roles_list)
|
||||||
|
|
||||||
|
# pattern representation
|
||||||
|
pattern_representation = elem.find('patternRepresentation').text
|
||||||
|
|
||||||
|
# corpus example
|
||||||
|
if elem.find('exampleContainerList') is not None and elem.find('exampleContainerList').find('exampleContainer') is not None and elem.find('exampleContainerList').find('exampleContainer').find('corpusExample') is not None:
|
||||||
|
corpus_example_text = html.tostring(elem.find('exampleContainerList').find('exampleContainer').find('corpusExample'), encoding='unicode')
|
||||||
|
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ugly postprocessing to remove xmlns:xsi=... duh..
|
||||||
|
root = etree.fromstring(corpus_example_text)
|
||||||
|
|
||||||
|
# Remove namespace prefixes
|
||||||
|
for elem in root.getiterator():
|
||||||
|
elem.tag = etree.QName(elem).localname
|
||||||
|
# Remove unused namespace declarations
|
||||||
|
etree.cleanup_namespaces(root)
|
||||||
|
|
||||||
|
corpus_example = etree.tostring(root, encoding='unicode')
|
||||||
|
|
||||||
|
print(f"Valency pattern {valency_pattern_id}")
|
||||||
|
|
||||||
|
|
||||||
|
analyze_output.append([valency_pattern_id, measure, semantic_roles, pattern_representation, corpus_example])
|
||||||
|
|
||||||
|
write_general_statistics(os.path.join(args.output, headword + '_gf_stats.tsv'), gf_output)
|
||||||
|
write_general_statistics(os.path.join(args.output, headword + '_ssj_stats.tsv'), ssj_output)
|
||||||
|
write_statistics(os.path.join(args.output, headword + '_patterns.tsv'), analyze_output)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
arg_parser = argparse.ArgumentParser(description='Export and validate collocation data from DDD database.')
|
||||||
|
arg_parser.add_argument('--input', type=str, help='Input directory')
|
||||||
|
arg_parser.add_argument('--output', type=str, help='Output directory')
|
||||||
|
|
||||||
|
args = arg_parser.parse_args()
|
||||||
|
|
||||||
|
main(args)
|
||||||
1
scripts/valency
Symbolic link
1
scripts/valency
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../src/pkg/valency/valency
|
||||||
8
scripts/xsd_checker.py
Normal file
8
scripts/xsd_checker.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
from lxml import etree as lxml
|
||||||
|
|
||||||
|
with open('../data/inventory.xsd') as f:
|
||||||
|
xmlschema_doc = lxml.parse(f)
|
||||||
|
xmlschema = lxml.XMLSchema(xmlschema_doc)
|
||||||
|
with open('../data/xmls/output.xml') as op:
|
||||||
|
doc = lxml.parse(op)
|
||||||
|
print(xmlschema.validate(doc))
|
||||||
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
@@ -37,7 +37,8 @@ app = Flask(__name__)
|
|||||||
app.config.from_object("db_config")
|
app.config.from_object("db_config")
|
||||||
mongo = PyMongo(app)
|
mongo = PyMongo(app)
|
||||||
|
|
||||||
app.config["CORPORA"] = ["ssj", "kres"]
|
# app.config["CORPORA"] = ["ssj", "kres", "gigafida"]
|
||||||
|
app.config["CORPORA"] = ["gigafida"]
|
||||||
app.config["BANNED_HEADWORDS"] = ["biti"]
|
app.config["BANNED_HEADWORDS"] = ["biti"]
|
||||||
app.config["QUERY_LIMIT"] = 1000
|
app.config["QUERY_LIMIT"] = 1000
|
||||||
|
|
||||||
@@ -247,20 +248,23 @@ def api_get_frames():
|
|||||||
if corpus not in app.config["CORPORA"]:
|
if corpus not in app.config["CORPORA"]:
|
||||||
return json.dumps({"error": "cor={kres,ssj}"})
|
return json.dumps({"error": "cor={kres,ssj}"})
|
||||||
|
|
||||||
|
log.info("Test1")
|
||||||
cur = mongo.db[corpus].find({"headwords": hw})
|
cur = mongo.db[corpus].find({"headwords": hw})
|
||||||
|
log.info("Test2")
|
||||||
frames = []
|
frames = []
|
||||||
for ent in cur[:app.config["QUERY_LIMIT"]]:
|
for ent in cur[:app.config["QUERY_LIMIT"]]:
|
||||||
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
||||||
cur.close()
|
cur.close()
|
||||||
|
log.info("Test3")
|
||||||
# filter by relevant hw
|
# filter by relevant hw
|
||||||
frames = [x for x in frames if x.hw == hw]
|
frames = [x for x in frames if x.hw == hw]
|
||||||
|
|
||||||
ret_frames = RF(frames, mongo.db.sensemap)
|
ret_frames = RF(frames, mongo.db.sensemap)
|
||||||
|
log.info("Test3")
|
||||||
json_ret = {"frames": []}
|
json_ret = {"frames": []}
|
||||||
for frame in ret_frames:
|
for frame in ret_frames:
|
||||||
json_ret["frames"].append(frame.to_json())
|
json_ret["frames"].append(frame.to_json())
|
||||||
|
log.info("Test4")
|
||||||
return json.dumps(json_ret)
|
return json.dumps(json_ret)
|
||||||
# return prepare_frames(ret_frames)
|
# return prepare_frames(ret_frames)
|
||||||
|
|
||||||
@@ -444,7 +448,7 @@ def _is_banned(hw):
|
|||||||
banned = False
|
banned = False
|
||||||
return banned
|
return banned
|
||||||
|
|
||||||
def prepare_app_index(appindex_json, sskj_wordlist):
|
def prepare_app_index(appindex_json):
|
||||||
log.info("[*] preparing app_index")
|
log.info("[*] preparing app_index")
|
||||||
# create app_index (used in frontend, left side word index)
|
# create app_index (used in frontend, left side word index)
|
||||||
tmp_app_index = {c: {} for c in app.config["CORPORA"]}
|
tmp_app_index = {c: {} for c in app.config["CORPORA"]}
|
||||||
@@ -452,6 +456,14 @@ def prepare_app_index(appindex_json, sskj_wordlist):
|
|||||||
res_hws = {}
|
res_hws = {}
|
||||||
res_fns = {}
|
res_fns = {}
|
||||||
|
|
||||||
|
# print('CORPUS...!!...')
|
||||||
|
# print(corpus)
|
||||||
|
# a = mongo.db[corpus]
|
||||||
|
# print('TEST_OK')
|
||||||
|
# print(a)
|
||||||
|
# print(mongo.db)
|
||||||
|
# a = mongo.db.list_collection_names()
|
||||||
|
# print('TEST_OK2')
|
||||||
nentries = mongo.db[corpus].count()
|
nentries = mongo.db[corpus].count()
|
||||||
idx = 0
|
idx = 0
|
||||||
for e in mongo.db[corpus].find({}):
|
for e in mongo.db[corpus].find({}):
|
||||||
@@ -484,6 +496,7 @@ def prepare_app_index(appindex_json, sskj_wordlist):
|
|||||||
|
|
||||||
for letter, words in alphabetical.items():
|
for letter, words in alphabetical.items():
|
||||||
filtered_words = [x for x in words if not _is_banned(x[0])]
|
filtered_words = [x for x in words if not _is_banned(x[0])]
|
||||||
|
# filtered_words = [x for x in words]
|
||||||
alphabetical[letter] = sorted(filtered_words, key=lambda x: x[0])
|
alphabetical[letter] = sorted(filtered_words, key=lambda x: x[0])
|
||||||
|
|
||||||
tmp_app_index[corpus]["words"] = alphabetical
|
tmp_app_index[corpus]["words"] = alphabetical
|
||||||
@@ -560,12 +573,16 @@ if __name__ == "__main__":
|
|||||||
if args.prepare_db:
|
if args.prepare_db:
|
||||||
with Path(args.sskj_wordlist).open("r") as fp:
|
with Path(args.sskj_wordlist).open("r") as fp:
|
||||||
sskj_wordlist = json.load(fp)
|
sskj_wordlist = json.load(fp)
|
||||||
prepare_app_index(args.appindex_json, sskj_wordlist)
|
prepare_app_index(args.appindex_json)
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
# app index from db
|
# app index from db
|
||||||
with Path(args.appindex_json).open("r") as fp:
|
with Path(args.appindex_json).open("r") as fp:
|
||||||
app.config["app_index"] = json.load(fp)
|
app.config["app_index"] = json.load(fp)
|
||||||
|
# a = app.config["app_index"]
|
||||||
|
# b = app.config["app_index"]["kres"]
|
||||||
|
# c = app.config["app_index"]["kres"]["words"]
|
||||||
|
# print('HERE')
|
||||||
|
|
||||||
# log.info("[*] Starting app.py with config:\n%s".format(config))
|
# log.info("[*] Starting app.py with config:\n%s".format(config))
|
||||||
log.info("[*] Starting app.py with config:\n{}".format(config))
|
log.info("[*] Starting app.py with config:\n{}".format(config))
|
||||||
|
|||||||
106
src/backend_flask/build_app_index.py
Normal file
106
src/backend_flask/build_app_index.py
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
|
||||||
|
from flask import Flask
|
||||||
|
from flask_pymongo import PyMongo
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
app.config.from_object("db_config")
|
||||||
|
mongo = PyMongo(app)
|
||||||
|
|
||||||
|
app.config["BANNED_HEADWORDS"] = ["biti"]
|
||||||
|
|
||||||
|
def _is_banned(hw):
|
||||||
|
banned = True
|
||||||
|
if hw in app.config["BANNED_HEADWORDS"]:
|
||||||
|
banned = True
|
||||||
|
elif hw in sskj_wordlist["wordlist"]:
|
||||||
|
banned = False
|
||||||
|
elif (hw + " se") in sskj_wordlist["wordlist"]:
|
||||||
|
banned = False
|
||||||
|
return banned
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_app_index(appindex_json, corporas, previous_json=None):
|
||||||
|
if previous_json:
|
||||||
|
with Path(previous_json).open("r") as fp:
|
||||||
|
tmp_app_index = json.load(fp)
|
||||||
|
else:
|
||||||
|
tmp_app_index = {}
|
||||||
|
# create app_index (used in frontend, left side word index)
|
||||||
|
for c in corporas:
|
||||||
|
tmp_app_index[c] = {}
|
||||||
|
|
||||||
|
for corpus in corporas:
|
||||||
|
res_hws = {}
|
||||||
|
res_fns = {}
|
||||||
|
|
||||||
|
# print('CORPUS...!!...')
|
||||||
|
# print(corpus)
|
||||||
|
# a = mongo.db[corpus]
|
||||||
|
# print('TEST_OK')
|
||||||
|
# print(a)
|
||||||
|
# print(mongo.db)
|
||||||
|
# a = mongo.db.list_collection_names()
|
||||||
|
# print('TEST_OK2')
|
||||||
|
nentries = mongo.db[corpus].count()
|
||||||
|
idx = 0
|
||||||
|
for e in mongo.db[corpus].find({}):
|
||||||
|
if "headwords" not in e:
|
||||||
|
continue
|
||||||
|
for hw in e["headwords"]:
|
||||||
|
if hw in res_hws:
|
||||||
|
res_hws[hw] += 1
|
||||||
|
else:
|
||||||
|
res_hws[hw] = 1
|
||||||
|
if "functors" not in e:
|
||||||
|
continue
|
||||||
|
for fn in e["functors"]:
|
||||||
|
if fn in res_fns:
|
||||||
|
res_fns[fn] += 1
|
||||||
|
else:
|
||||||
|
res_fns[fn] = 1
|
||||||
|
idx += 1
|
||||||
|
if idx % 10000 == 0:
|
||||||
|
print("indexing {}: {}/{}".format(
|
||||||
|
corpus, idx, nentries))
|
||||||
|
|
||||||
|
alphabetical = {}
|
||||||
|
for k, e in res_hws.items():
|
||||||
|
fst = k[0].lower()
|
||||||
|
if fst in alphabetical:
|
||||||
|
alphabetical[fst].append((k, e))
|
||||||
|
else:
|
||||||
|
alphabetical[fst] = [(k, e)]
|
||||||
|
|
||||||
|
for letter, words in alphabetical.items():
|
||||||
|
filtered_words = [x for x in words if not _is_banned(x[0])]
|
||||||
|
# filtered_words = [x for x in words]
|
||||||
|
alphabetical[letter] = sorted(filtered_words, key=lambda x: x[0])
|
||||||
|
|
||||||
|
tmp_app_index[corpus]["words"] = alphabetical
|
||||||
|
|
||||||
|
|
||||||
|
functors = [(k, e) for (k, e) in res_fns.items()]
|
||||||
|
functors = sorted(functors, key=lambda x: x[0])
|
||||||
|
tmp_app_index[corpus]["functors"] = functors
|
||||||
|
|
||||||
|
with Path(appindex_json).open("w") as fp:
|
||||||
|
json.dump(tmp_app_index, fp)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("Starting app.py main()")
|
||||||
|
aparser = argparse.ArgumentParser(description="Arguments for app.py")
|
||||||
|
aparser.add_argument("--previous-json", type=str, default=None)
|
||||||
|
aparser.add_argument("--appindex-json", type=str)
|
||||||
|
aparser.add_argument("--sskj-wordlist", type=str)
|
||||||
|
args = aparser.parse_args()
|
||||||
|
|
||||||
|
corporas = ['gigafida']
|
||||||
|
|
||||||
|
with Path(args.sskj_wordlist).open("r") as fp:
|
||||||
|
sskj_wordlist = json.load(fp)
|
||||||
|
|
||||||
|
prepare_app_index(args.appindex_json, corporas, args.previous_json)
|
||||||
@@ -1,2 +1,2 @@
|
|||||||
MONGO_URI = "mongodb://valuser:valuserpass@my_mongo:27017/valdb"
|
MONGO_URI = "mongodb://user:user@0.0.0.0:27017/valdb"
|
||||||
MONGO_AUTH_SOURCE = 'admin'
|
MONGO_AUTH_SOURCE = 'admin'
|
||||||
|
|||||||
18
src/backend_flask/get_sentence_ids.py
Normal file
18
src/backend_flask/get_sentence_ids.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
input_dir = "/media/luka/Portable Disk/Datasets/gigafida_jos/final_json"
|
||||||
|
output_file = "../../all_sentences.json"
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
filenames = os.listdir(input_dir)
|
||||||
|
len(filenames)
|
||||||
|
for i, filename in enumerate(filenames):
|
||||||
|
if filename.endswith(".json"):
|
||||||
|
with open(os.path.join(input_dir, filename)) as json_file:
|
||||||
|
data = json.load(json_file)
|
||||||
|
results[filename.split('-')[0]] = list(data.keys())
|
||||||
|
print('Progress: %.2f %%' % (i/len(filenames)))
|
||||||
|
|
||||||
|
with open(output_file, 'w') as f:
|
||||||
|
json.dump(results, f)
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"api_addr": "http://192.168.1.117:8084"
|
"api_addr": "http://0.0.0.0:8084"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"api_addr": "http://192.168.1.117:8084"
|
"api_addr": "http://193.2.76.103:8084"
|
||||||
}
|
}
|
||||||
|
|||||||
78
src/frontend_vue/package-lock.json
generated
78
src/frontend_vue/package-lock.json
generated
@@ -120,7 +120,7 @@
|
|||||||
"ansi-styles": {
|
"ansi-styles": {
|
||||||
"version": "3.2.1",
|
"version": "3.2.1",
|
||||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
|
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
|
||||||
"integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
|
"integrity": "sha1-QfuyAkPlCxK+DwS43tvwdSDOhB0=",
|
||||||
"requires": {
|
"requires": {
|
||||||
"color-convert": "^1.9.0"
|
"color-convert": "^1.9.0"
|
||||||
}
|
}
|
||||||
@@ -138,7 +138,7 @@
|
|||||||
"aproba": {
|
"aproba": {
|
||||||
"version": "1.2.0",
|
"version": "1.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/aproba/-/aproba-1.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/aproba/-/aproba-1.2.0.tgz",
|
||||||
"integrity": "sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw==",
|
"integrity": "sha1-aALmJk79GMeQobDVF/DyYnvyyUo=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"argparse": {
|
"argparse": {
|
||||||
@@ -272,7 +272,7 @@
|
|||||||
"async-limiter": {
|
"async-limiter": {
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.0.tgz",
|
||||||
"integrity": "sha512-jp/uFnooOiO+L211eZOoSyzpOITMXx1rBITauYykG3BRYPu8h0UcxsPNB04RR5vo4Tyz3+ay17tR6JVf9qzYWg==",
|
"integrity": "sha1-ePrtjD0HSrgfIrTphdeehzj3IPg=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"atob": {
|
"atob": {
|
||||||
@@ -1469,7 +1469,7 @@
|
|||||||
"buffer-indexof": {
|
"buffer-indexof": {
|
||||||
"version": "1.1.1",
|
"version": "1.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/buffer-indexof/-/buffer-indexof-1.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/buffer-indexof/-/buffer-indexof-1.1.1.tgz",
|
||||||
"integrity": "sha512-4/rOEg86jivtPTeOUUT61jJO1Ya1TrR/OkqCSZDyq84WJh3LuuiphBYJN+fm5xufIk4XAFcEwte/8WzC8If/1g==",
|
"integrity": "sha1-Uvq8xqYG0aADAoAmSO9o9jnaJow=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"buffer-xor": {
|
"buffer-xor": {
|
||||||
@@ -1493,7 +1493,7 @@
|
|||||||
"cacache": {
|
"cacache": {
|
||||||
"version": "10.0.4",
|
"version": "10.0.4",
|
||||||
"resolved": "https://registry.npmjs.org/cacache/-/cacache-10.0.4.tgz",
|
"resolved": "https://registry.npmjs.org/cacache/-/cacache-10.0.4.tgz",
|
||||||
"integrity": "sha512-Dph0MzuH+rTQzGPNT9fAnrPmMmjKfST6trxJeK7NQuHRaVw24VzPRWTmg9MpcwOVQZO0E1FBICUlFeNaKPIfHA==",
|
"integrity": "sha1-ZFI2eZnv+dQYiu/ZoU6dfGomNGA=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"bluebird": "^3.5.1",
|
"bluebird": "^3.5.1",
|
||||||
@@ -1916,7 +1916,7 @@
|
|||||||
"concat-stream": {
|
"concat-stream": {
|
||||||
"version": "1.6.2",
|
"version": "1.6.2",
|
||||||
"resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz",
|
"resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz",
|
||||||
"integrity": "sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==",
|
"integrity": "sha1-kEvfGUzTEi/Gdcd/xKw9T/D9GjQ=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"buffer-from": "^1.0.0",
|
"buffer-from": "^1.0.0",
|
||||||
@@ -1969,7 +1969,7 @@
|
|||||||
"content-type": {
|
"content-type": {
|
||||||
"version": "1.0.4",
|
"version": "1.0.4",
|
||||||
"resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz",
|
"resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz",
|
||||||
"integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==",
|
"integrity": "sha1-4TjMdeBAxyexlm/l5fjJruJW/js=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"convert-hex": {
|
"convert-hex": {
|
||||||
@@ -2003,7 +2003,7 @@
|
|||||||
"copy-concurrently": {
|
"copy-concurrently": {
|
||||||
"version": "1.0.5",
|
"version": "1.0.5",
|
||||||
"resolved": "https://registry.npmjs.org/copy-concurrently/-/copy-concurrently-1.0.5.tgz",
|
"resolved": "https://registry.npmjs.org/copy-concurrently/-/copy-concurrently-1.0.5.tgz",
|
||||||
"integrity": "sha512-f2domd9fsVDFtaFcbaRZuYXwtdmnzqbADSwhSWYxYB/Q8zsdUUFMXVRwXGDMWmbEzAn1kdRrtI1T/KTFOL4X2A==",
|
"integrity": "sha1-kilzmMrjSTf8r9bsgTnBgFHwteA=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"aproba": "^1.1.1",
|
"aproba": "^1.1.1",
|
||||||
@@ -2637,7 +2637,7 @@
|
|||||||
"dns-packet": {
|
"dns-packet": {
|
||||||
"version": "1.3.1",
|
"version": "1.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/dns-packet/-/dns-packet-1.3.1.tgz",
|
"resolved": "https://registry.npmjs.org/dns-packet/-/dns-packet-1.3.1.tgz",
|
||||||
"integrity": "sha512-0UxfQkMhYAUaZI+xrNZOz/as5KgDU0M/fQ9b6SpkyLbk3GEswDi6PADJVaYJradtRVsRIlF1zLyOodbcTCDzUg==",
|
"integrity": "sha1-EqpCaYEHW+UAuRDu3NC0fdfe2lo=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"ip": "^1.1.0",
|
"ip": "^1.1.0",
|
||||||
@@ -2746,7 +2746,7 @@
|
|||||||
"ejs": {
|
"ejs": {
|
||||||
"version": "2.6.1",
|
"version": "2.6.1",
|
||||||
"resolved": "https://registry.npmjs.org/ejs/-/ejs-2.6.1.tgz",
|
"resolved": "https://registry.npmjs.org/ejs/-/ejs-2.6.1.tgz",
|
||||||
"integrity": "sha512-0xy4A/twfrRCnkhfk8ErDi5DqdAsAqeGxht4xkCUrsvhhbQNs7E+4jV0CN7+NKIY0aHE72+XvqtBIXzD31ZbXQ==",
|
"integrity": "sha1-SY7A1JVlWrxvI81hho2SZGQHGqA=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"electron-to-chromium": {
|
"electron-to-chromium": {
|
||||||
@@ -2785,7 +2785,7 @@
|
|||||||
"end-of-stream": {
|
"end-of-stream": {
|
||||||
"version": "1.4.1",
|
"version": "1.4.1",
|
||||||
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.1.tgz",
|
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.1.tgz",
|
||||||
"integrity": "sha512-1MkrZNvWTKCaigbn+W15elq2BB/L22nqrSY5DKlo3X6+vclJm8Bb5djXJBmEX6fS3+zCh/F4VBK5Z2KxJt4s2Q==",
|
"integrity": "sha1-7SljTRm6ukY7bOa4CjchPqtx7EM=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"once": "^1.4.0"
|
"once": "^1.4.0"
|
||||||
@@ -3284,7 +3284,7 @@
|
|||||||
"filesize": {
|
"filesize": {
|
||||||
"version": "3.6.1",
|
"version": "3.6.1",
|
||||||
"resolved": "https://registry.npmjs.org/filesize/-/filesize-3.6.1.tgz",
|
"resolved": "https://registry.npmjs.org/filesize/-/filesize-3.6.1.tgz",
|
||||||
"integrity": "sha512-7KjR1vv6qnicaPMi1iiTcI85CyYwRO/PSFCu6SvqL8jN2Wjt/NIYQTFtFs7fSDCYOstUkEWIQGFUg5YZQfjlcg==",
|
"integrity": "sha1-CQuz7gG2+AGoqL6Z0xcQs0Irsxc=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"fill-range": {
|
"fill-range": {
|
||||||
@@ -3313,7 +3313,7 @@
|
|||||||
"finalhandler": {
|
"finalhandler": {
|
||||||
"version": "1.1.1",
|
"version": "1.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.1.tgz",
|
||||||
"integrity": "sha512-Y1GUDo39ez4aHAw7MysnUD5JzYX+WaIj8I57kO3aEPT1fFRL4sr7mjei97FgnwhAyyzRYmQZaTHb2+9uZ1dPtg==",
|
"integrity": "sha1-7r9O2EAHnIP0JJA4ydcDAIMBsQU=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"debug": "2.6.9",
|
"debug": "2.6.9",
|
||||||
@@ -4389,7 +4389,7 @@
|
|||||||
"http-proxy": {
|
"http-proxy": {
|
||||||
"version": "1.17.0",
|
"version": "1.17.0",
|
||||||
"resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.17.0.tgz",
|
"resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.17.0.tgz",
|
||||||
"integrity": "sha512-Taqn+3nNvYRfJ3bGvKfBSRwy1v6eePlm3oc/aWVxZp57DQr5Eq3xhKJi7Z4hZpS8PC3H4qI+Yly5EmFacGuA/g==",
|
"integrity": "sha1-etOElGWPhGBeL220Q230EPTlvpo=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"eventemitter3": "^3.0.0",
|
"eventemitter3": "^3.0.0",
|
||||||
@@ -4500,7 +4500,7 @@
|
|||||||
"import-local": {
|
"import-local": {
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/import-local/-/import-local-1.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/import-local/-/import-local-1.0.0.tgz",
|
||||||
"integrity": "sha512-vAaZHieK9qjGo58agRBg+bhHX3hoTZU/Oa3GESWLz7t1U62fk63aHuDJJEteXoDeTCcPmUT+z38gkHPZkkmpmQ==",
|
"integrity": "sha1-Xk/9wD9P5sAJxnKb6yljHC+CJ7w=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"pkg-dir": "^2.0.0",
|
"pkg-dir": "^2.0.0",
|
||||||
@@ -4744,7 +4744,7 @@
|
|||||||
"is-path-in-cwd": {
|
"is-path-in-cwd": {
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/is-path-in-cwd/-/is-path-in-cwd-1.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/is-path-in-cwd/-/is-path-in-cwd-1.0.1.tgz",
|
||||||
"integrity": "sha512-FjV1RTW48E7CWM7eE/J2NJvAEEVektecDBVBE5Hh3nM1Jd0kvhHtX68Pr3xsDf857xt3Y4AkwVULK1Vku62aaQ==",
|
"integrity": "sha1-WsSLNF72dTOb1sekipEhELJBz1I=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"is-path-inside": "^1.0.0"
|
"is-path-inside": "^1.0.0"
|
||||||
@@ -5070,7 +5070,7 @@
|
|||||||
"make-dir": {
|
"make-dir": {
|
||||||
"version": "1.3.0",
|
"version": "1.3.0",
|
||||||
"resolved": "https://registry.npmjs.org/make-dir/-/make-dir-1.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/make-dir/-/make-dir-1.3.0.tgz",
|
||||||
"integrity": "sha512-2w31R7SJtieJJnQtGc7RVL2StM2vGYVfqUOvUDxH6bC6aJTxPxTF0GnIgCyu7tjockiUWAYQRbxa7vKn34s5sQ==",
|
"integrity": "sha1-ecEDO4BRW9bSTsmTPoYMp17ifww=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"pify": "^3.0.0"
|
"pify": "^3.0.0"
|
||||||
@@ -5352,7 +5352,7 @@
|
|||||||
"mississippi": {
|
"mississippi": {
|
||||||
"version": "2.0.0",
|
"version": "2.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/mississippi/-/mississippi-2.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/mississippi/-/mississippi-2.0.0.tgz",
|
||||||
"integrity": "sha512-zHo8v+otD1J10j/tC+VNoGK9keCuByhKovAvdn74dmxJl9+mWHnx6EMsDN4lgRoMI/eYo2nchAxniIbUPb5onw==",
|
"integrity": "sha1-NEKlCPr8KFAEhv7qmUCWduTuWm8=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"concat-stream": "^1.5.0",
|
"concat-stream": "^1.5.0",
|
||||||
@@ -5419,7 +5419,7 @@
|
|||||||
"multicast-dns": {
|
"multicast-dns": {
|
||||||
"version": "6.2.3",
|
"version": "6.2.3",
|
||||||
"resolved": "https://registry.npmjs.org/multicast-dns/-/multicast-dns-6.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/multicast-dns/-/multicast-dns-6.2.3.tgz",
|
||||||
"integrity": "sha512-ji6J5enbMyGRHIAkAOu3WdV8nggqviKCEKtXcOqfphZZtQrmHKycfynJ2V7eVPUA4NhJ6V7Wf4TmGbTwKE9B6g==",
|
"integrity": "sha1-oOx72QVcQoL3kMPIL04o2zsxsik=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"dns-packet": "^1.3.1",
|
"dns-packet": "^1.3.1",
|
||||||
@@ -5502,7 +5502,7 @@
|
|||||||
"node-forge": {
|
"node-forge": {
|
||||||
"version": "0.7.5",
|
"version": "0.7.5",
|
||||||
"resolved": "https://registry.npmjs.org/node-forge/-/node-forge-0.7.5.tgz",
|
"resolved": "https://registry.npmjs.org/node-forge/-/node-forge-0.7.5.tgz",
|
||||||
"integrity": "sha512-MmbQJ2MTESTjt3Gi/3yG1wGpIMhUfcIypUCGtTizFR9IiccFwxSpfp0vtIZlkFclEqERemxfnSdZEMR9VqqEFQ==",
|
"integrity": "sha1-bBUsNFzhHFL0ZcKr2VfoY5zWdN8=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"node-libs-browser": {
|
"node-libs-browser": {
|
||||||
@@ -5698,7 +5698,7 @@
|
|||||||
"obuf": {
|
"obuf": {
|
||||||
"version": "1.1.2",
|
"version": "1.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/obuf/-/obuf-1.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/obuf/-/obuf-1.1.2.tgz",
|
||||||
"integrity": "sha512-PX1wu0AmAdPqOL1mWhqmlOd8kOIZQwGZw6rh7uby9fTc5lhaOWFLX3I6R1hrF9k3zUY40e6igsLGkDXK92LJNg==",
|
"integrity": "sha1-Cb6jND1BhZ69RGKS0RydTbYZCE4=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"on-finished": {
|
"on-finished": {
|
||||||
@@ -5836,7 +5836,7 @@
|
|||||||
"p-map": {
|
"p-map": {
|
||||||
"version": "1.2.0",
|
"version": "1.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/p-map/-/p-map-1.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/p-map/-/p-map-1.2.0.tgz",
|
||||||
"integrity": "sha512-r6zKACMNhjPJMTl8KcFH4li//gkrXWfbD6feV8l6doRHlzljFWGJ2AP6iKaCJXyZmAUMOPtvbW7EXkbWO/pLEA==",
|
"integrity": "sha1-5OlPMR6rvIYzoeeZCBZfyiYkG2s=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"p-try": {
|
"p-try": {
|
||||||
@@ -5957,7 +5957,7 @@
|
|||||||
"path-type": {
|
"path-type": {
|
||||||
"version": "3.0.0",
|
"version": "3.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/path-type/-/path-type-3.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/path-type/-/path-type-3.0.0.tgz",
|
||||||
"integrity": "sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg==",
|
"integrity": "sha1-zvMdyOCho7sNEFwM2Xzzv0f0428=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"pify": "^3.0.0"
|
"pify": "^3.0.0"
|
||||||
@@ -8309,7 +8309,7 @@
|
|||||||
"pump": {
|
"pump": {
|
||||||
"version": "2.0.1",
|
"version": "2.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz",
|
||||||
"integrity": "sha512-ruPMNRkN3MHP1cWJc9OWr+T/xDP0jhXYCLfJcBuX54hhfIBnaQmAUMfDcG4DM5UMWByBbJY69QSphm3jtDKIkA==",
|
"integrity": "sha1-Ejma3W5M91Jtlzy8i1zi4pCLOQk=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"end-of-stream": "^1.1.0",
|
"end-of-stream": "^1.1.0",
|
||||||
@@ -8319,7 +8319,7 @@
|
|||||||
"pumpify": {
|
"pumpify": {
|
||||||
"version": "1.5.1",
|
"version": "1.5.1",
|
||||||
"resolved": "https://registry.npmjs.org/pumpify/-/pumpify-1.5.1.tgz",
|
"resolved": "https://registry.npmjs.org/pumpify/-/pumpify-1.5.1.tgz",
|
||||||
"integrity": "sha512-oClZI37HvuUJJxSKKrC17bZ9Cu0ZYhEAGPsPUy9KlMUmv9dKX2o77RUmq7f3XjIxbwyGwYzbzQ1L2Ks8sIradQ==",
|
"integrity": "sha1-NlE74karJ1cLGjdKXOJ4v9dDcM4=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"duplexify": "^3.6.0",
|
"duplexify": "^3.6.0",
|
||||||
@@ -8813,7 +8813,7 @@
|
|||||||
"sax": {
|
"sax": {
|
||||||
"version": "1.2.4",
|
"version": "1.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
|
"resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
|
||||||
"integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==",
|
"integrity": "sha1-KBYjTiN4vdxOU1T6tcqold9xANk=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"schema-utils": {
|
"schema-utils": {
|
||||||
@@ -8863,7 +8863,7 @@
|
|||||||
"send": {
|
"send": {
|
||||||
"version": "0.16.2",
|
"version": "0.16.2",
|
||||||
"resolved": "https://registry.npmjs.org/send/-/send-0.16.2.tgz",
|
"resolved": "https://registry.npmjs.org/send/-/send-0.16.2.tgz",
|
||||||
"integrity": "sha512-E64YFPUssFHEFBvpbbjr44NCLtI1AohxQ8ZSiJjQLskAdKuriYEP6VyGEsRDH8ScozGpkaX1BGvhanqCwkcEZw==",
|
"integrity": "sha1-bsyh4PjBVtFBWXVZhI32RzCmu8E=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"debug": "2.6.9",
|
"debug": "2.6.9",
|
||||||
@@ -8884,7 +8884,7 @@
|
|||||||
"mime": {
|
"mime": {
|
||||||
"version": "1.4.1",
|
"version": "1.4.1",
|
||||||
"resolved": "https://registry.npmjs.org/mime/-/mime-1.4.1.tgz",
|
"resolved": "https://registry.npmjs.org/mime/-/mime-1.4.1.tgz",
|
||||||
"integrity": "sha512-KI1+qOZu5DcW6wayYHSzR/tXKCDC5Om4s1z2QJjDULzLcmf3DvzS7oluY4HCTrc+9FiKmWUgeNLg7W3uIQvxtQ==",
|
"integrity": "sha1-Eh+evEnjdm8xGnbh+hyAA8SwOqY=",
|
||||||
"dev": true
|
"dev": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -8913,7 +8913,7 @@
|
|||||||
"serve-static": {
|
"serve-static": {
|
||||||
"version": "1.13.2",
|
"version": "1.13.2",
|
||||||
"resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.13.2.tgz",
|
"resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.13.2.tgz",
|
||||||
"integrity": "sha512-p/tdJrO4U387R9oMjb1oj7qSMaMfmOyd4j9hOFoxZe2baQszgHcSWjuya/CiT5kgZZKRudHNOA0pYXOl8rQ5nw==",
|
"integrity": "sha1-CV6Ecv1bRiN9tQzkhqQ/S4bGzsE=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"encodeurl": "~1.0.2",
|
"encodeurl": "~1.0.2",
|
||||||
@@ -9011,7 +9011,7 @@
|
|||||||
"shellwords": {
|
"shellwords": {
|
||||||
"version": "0.1.1",
|
"version": "0.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/shellwords/-/shellwords-0.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/shellwords/-/shellwords-0.1.1.tgz",
|
||||||
"integrity": "sha512-vFwSUfQvqybiICwZY5+DAWIPLKsWO31Q91JSKl3UYv+K5c2QRPzn0qzec6QPu1Qc9eHYItiP3NdJqNVqetYAww==",
|
"integrity": "sha1-1rkYHBpI05cyTISHHvvPxz/AZUs=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"signal-exit": {
|
"signal-exit": {
|
||||||
@@ -9137,7 +9137,7 @@
|
|||||||
"sockjs": {
|
"sockjs": {
|
||||||
"version": "0.3.19",
|
"version": "0.3.19",
|
||||||
"resolved": "https://registry.npmjs.org/sockjs/-/sockjs-0.3.19.tgz",
|
"resolved": "https://registry.npmjs.org/sockjs/-/sockjs-0.3.19.tgz",
|
||||||
"integrity": "sha512-V48klKZl8T6MzatbLlzzRNhMepEys9Y4oGFpypBFFn1gLI/QQ9HtLLyWJNbPlwGLelOVOEijUbTTJeLLI59jLw==",
|
"integrity": "sha1-2Xa76ACve9IK4IWY1YI5NQiZPA0=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"faye-websocket": "^0.10.0",
|
"faye-websocket": "^0.10.0",
|
||||||
@@ -9245,7 +9245,7 @@
|
|||||||
"spdx-expression-parse": {
|
"spdx-expression-parse": {
|
||||||
"version": "3.0.0",
|
"version": "3.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/spdx-expression-parse/-/spdx-expression-parse-3.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/spdx-expression-parse/-/spdx-expression-parse-3.0.0.tgz",
|
||||||
"integrity": "sha512-Yg6D3XpRD4kkOmTpdgbUiEJFKghJH03fiC1OPll5h/0sO6neh2jqRDVHOQ4o/LMea0tgCkbMgea5ip/e+MkWyg==",
|
"integrity": "sha1-meEZt6XaAOBUkcn6M4t5BII7QdA=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"spdx-exceptions": "^2.1.0",
|
"spdx-exceptions": "^2.1.0",
|
||||||
@@ -9348,7 +9348,7 @@
|
|||||||
"ssri": {
|
"ssri": {
|
||||||
"version": "5.3.0",
|
"version": "5.3.0",
|
||||||
"resolved": "https://registry.npmjs.org/ssri/-/ssri-5.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/ssri/-/ssri-5.3.0.tgz",
|
||||||
"integrity": "sha512-XRSIPqLij52MtgoQavH/x/dU1qVKtWUAAZeOHsR9c2Ddi4XerFy3mc1alf+dLJKl9EUIm/Ht+EowFkTUOA6GAQ==",
|
"integrity": "sha1-ujhyycbTOgcEp9cf8EXl7EiZnQY=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"safe-buffer": "^5.1.1"
|
"safe-buffer": "^5.1.1"
|
||||||
@@ -10055,7 +10055,7 @@
|
|||||||
"vendors": {
|
"vendors": {
|
||||||
"version": "1.0.2",
|
"version": "1.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/vendors/-/vendors-1.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/vendors/-/vendors-1.0.2.tgz",
|
||||||
"integrity": "sha512-w/hry/368nO21AN9QljsaIhb9ZiZtZARoVH5f3CsFbawdLdayCgKRPup7CggujvySMxx0I91NOyxdVENohprLQ==",
|
"integrity": "sha1-f8te759WI7FWvOqJ7DfWNnbyGAE=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"vm-browserify": {
|
"vm-browserify": {
|
||||||
@@ -10211,7 +10211,7 @@
|
|||||||
"wbuf": {
|
"wbuf": {
|
||||||
"version": "1.7.3",
|
"version": "1.7.3",
|
||||||
"resolved": "https://registry.npmjs.org/wbuf/-/wbuf-1.7.3.tgz",
|
"resolved": "https://registry.npmjs.org/wbuf/-/wbuf-1.7.3.tgz",
|
||||||
"integrity": "sha512-O84QOnr0icsbFGLS0O3bI5FswxzRr8/gHwWkDlQFskhSPryQXvrTMxjxGP4+iWYoauLoBvfDpkrOauZ+0iZpDA==",
|
"integrity": "sha1-wdjRSTFtPqhShIiVy2oL/oh7h98=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"minimalistic-assert": "^1.0.0"
|
"minimalistic-assert": "^1.0.0"
|
||||||
@@ -10344,7 +10344,7 @@
|
|||||||
"webpack-dev-middleware": {
|
"webpack-dev-middleware": {
|
||||||
"version": "1.12.2",
|
"version": "1.12.2",
|
||||||
"resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-1.12.2.tgz",
|
"resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-1.12.2.tgz",
|
||||||
"integrity": "sha512-FCrqPy1yy/sN6U/SaEZcHKRXGlqU0DUaEBL45jkUYoB8foVb6wCnbIJ1HKIx+qUFTW+3JpVcCJCxZ8VATL4e+A==",
|
"integrity": "sha1-+PwRIM47T8VoDO7LQ9d3lmshEF4=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"memory-fs": "~0.4.1",
|
"memory-fs": "~0.4.1",
|
||||||
@@ -10605,7 +10605,7 @@
|
|||||||
"websocket-extensions": {
|
"websocket-extensions": {
|
||||||
"version": "0.1.3",
|
"version": "0.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/websocket-extensions/-/websocket-extensions-0.1.3.tgz",
|
"resolved": "https://registry.npmjs.org/websocket-extensions/-/websocket-extensions-0.1.3.tgz",
|
||||||
"integrity": "sha512-nqHUnMXmBzT0w570r2JpJxfiSD1IzoI+HGVdd3aZ0yNi3ngvQ4jv1dtHt5VGxfI2yj5yqImPhOK4vmIh2xMbGg==",
|
"integrity": "sha1-XS/yKXcAPsaHpLhwc9+7rBRszyk=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"whet.extend": {
|
"whet.extend": {
|
||||||
@@ -10644,7 +10644,7 @@
|
|||||||
"worker-farm": {
|
"worker-farm": {
|
||||||
"version": "1.6.0",
|
"version": "1.6.0",
|
||||||
"resolved": "https://registry.npmjs.org/worker-farm/-/worker-farm-1.6.0.tgz",
|
"resolved": "https://registry.npmjs.org/worker-farm/-/worker-farm-1.6.0.tgz",
|
||||||
"integrity": "sha512-6w+3tHbM87WnSWnENBUvA2pxJPLhQUg5LKwUQHq3r+XPhIM+Gh2R5ycbwPCyuGbNg+lPgdcnQUhuC02kJCvffQ==",
|
"integrity": "sha1-rsxAWXb6talVJhgIRvDboojzpKA=",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"errno": "~0.1.7"
|
"errno": "~0.1.7"
|
||||||
@@ -10715,7 +10715,7 @@
|
|||||||
"y18n": {
|
"y18n": {
|
||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.0.tgz",
|
||||||
"integrity": "sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==",
|
"integrity": "sha1-le+U+F7MgdAHwmThkKEg8KPIVms=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"yallist": {
|
"yallist": {
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ export default {
|
|||||||
name: "Nav",
|
name: "Nav",
|
||||||
props: ["appState"],
|
props: ["appState"],
|
||||||
data() {return {
|
data() {return {
|
||||||
optCorpora: ["kres", "ssj"],
|
optCorpora: ["kres", "ssj", "gigafida"],
|
||||||
optIndexes: [
|
optIndexes: [
|
||||||
{key: "besede", val: "words"},
|
{key: "besede", val: "words"},
|
||||||
{key: "udeleženske vloge", val: "functors"},
|
{key: "udeleženske vloge", val: "functors"},
|
||||||
|
|||||||
0
src/pkg/__init__.py
Normal file
0
src/pkg/__init__.py
Normal file
Submodule src/pkg/cjvt-corpusparser updated: 01adf47b9b...92b3ac4ea3
1
src/pkg/luscenje_struktur
Submodule
1
src/pkg/luscenje_struktur
Submodule
Submodule src/pkg/luscenje_struktur added at 8c87d07b8a
@@ -3,6 +3,41 @@ from corpusparser import enriched_lemma
|
|||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def frames_from_db_entry_headword(dbent, headword):
|
||||||
|
def _full_tid(tid):
|
||||||
|
return ".".join([dbent["sid"], str(tid)])
|
||||||
|
|
||||||
|
token_dict = {str(x["tid"]): x for x in dbent["tokens"]}
|
||||||
|
|
||||||
|
frames = []
|
||||||
|
if "srl_links" not in dbent:
|
||||||
|
return []
|
||||||
|
srldict = {}
|
||||||
|
for srl in dbent["srl_links"]:
|
||||||
|
key = str(srl["from"])
|
||||||
|
if enriched_lemma(token_dict[key]) != headword:
|
||||||
|
continue
|
||||||
|
if key not in srldict:
|
||||||
|
srldict[key] = [srl]
|
||||||
|
else:
|
||||||
|
srldict[key] += [srl]
|
||||||
|
for hwtid, srlarr in srldict.items():
|
||||||
|
frames += [Frame(
|
||||||
|
hw_lemma=enriched_lemma(token_dict[hwtid]),
|
||||||
|
tids=[_full_tid(hwtid)],
|
||||||
|
slots=[
|
||||||
|
Slot(
|
||||||
|
functor=srl["afun"],
|
||||||
|
tids=[_full_tid(srl["to"])]
|
||||||
|
) for srl in srlarr
|
||||||
|
],
|
||||||
|
# sentences=[(dbent["sid"], dbent["tokens"])],
|
||||||
|
sentences=[
|
||||||
|
[(_full_tid(t["tid"]), t) for t in dbent["tokens"]],
|
||||||
|
]
|
||||||
|
)]
|
||||||
|
return frames
|
||||||
|
|
||||||
def frames_from_db_entry(dbent):
|
def frames_from_db_entry(dbent):
|
||||||
def _full_tid(tid):
|
def _full_tid(tid):
|
||||||
return ".".join([dbent["sid"], str(tid)])
|
return ".".join([dbent["sid"], str(tid)])
|
||||||
|
|||||||
Reference in New Issue
Block a user