比较提交
20 次代码提交
作者 | SHA1 | 提交日期 | |
---|---|---|---|
b4db4e5255 | |||
ef02583d72 | |||
155c0b2c3d | |||
c96b199932 | |||
02c0e74798 | |||
2ff339e24c | |||
707034153c | |||
ba72802f1f | |||
14a6e2423b | |||
c5fc78dca1 | |||
439446b1d0 | |||
eca236bc7e | |||
11d3dfc0e6 | |||
1aff111cb9 | |||
910955abb8 | |||
bf0970a90a | |||
81395890ab | |||
fd94627fdb | |||
2c5fef29df | |||
8f747ef75c |
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,4 +1,7 @@
|
|||||||
data/samples/
|
data/samples/
|
||||||
|
data/wordlist.json
|
||||||
|
data/sskj_senses.json
|
||||||
|
data/appindex.json
|
||||||
*egg-info/
|
*egg-info/
|
||||||
*.pyc
|
*.pyc
|
||||||
src/frontend_vue/node_modules/
|
src/frontend_vue/node_modules/
|
||||||
|
16
Dockerfile-backend-flask
普通文件
16
Dockerfile-backend-flask
普通文件
@ -0,0 +1,16 @@
|
|||||||
|
FROM cjvt-python-env
|
||||||
|
|
||||||
|
RUN mkdir -p /project/src/backend_flask
|
||||||
|
RUN mkdir -p /project/src/pkg
|
||||||
|
RUN mkdir -p /project/data
|
||||||
|
|
||||||
|
COPY src/backend_flask /project/src/backend_flask
|
||||||
|
COPY src/pkg /project/src/pkg
|
||||||
|
|
||||||
|
COPY data/appindex.json /project/data
|
||||||
|
|
||||||
|
COPY src/backend_flask/entrypoint.sh /.
|
||||||
|
|
||||||
|
COPY src/backend_flask/conf_files/prod_conf.yaml /project
|
||||||
|
|
||||||
|
ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
|
46
Makefile
46
Makefile
@ -11,9 +11,18 @@ MAKE_ROOT = $(shell pwd)
|
|||||||
# SSJ_FILE = "$(MAKE_ROOT)/data/samples/ssj_xml/ssj500k-sl.body.sample.xml"
|
# SSJ_FILE = "$(MAKE_ROOT)/data/samples/ssj_xml/ssj500k-sl.body.sample.xml"
|
||||||
SSJ_FILE = "$(MAKE_ROOT)/data/ssj_file_link"
|
SSJ_FILE = "$(MAKE_ROOT)/data/ssj_file_link"
|
||||||
# KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_xml"
|
# KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_xml"
|
||||||
KRES_FOLDER = "$(MAKE_ROOT)/data/kres_xml_folder_link"
|
# KRES_FOLDER = "$(MAKE_ROOT)/data/kres_xml_folder_link"
|
||||||
|
KRES_FOLDER = "/home/kristjan/kres_data/payload/kres_xml"
|
||||||
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/samples/kres_srl_json"
|
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/samples/kres_srl_json"
|
||||||
KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/kres_json_folder_link"
|
# KRES_SRL_FOLDER = "$(MAKE_ROOT)/data/kres_json_folder_link"
|
||||||
|
KRES_SRL_FOLDER = "/home/kristjan/kres_data/payload/kres_json"
|
||||||
|
|
||||||
|
# This file comes with the source code. Make sure you unpack it and name it right.
|
||||||
|
SSKJ_WORDLIST = "$(MAKE_ROOT)/data/wordlist.json"
|
||||||
|
SSKJ_JSON = "$(MAKE_ROOT)/data/sskj_senses.json"
|
||||||
|
|
||||||
|
# for pre-generation the index of all headwords and functors
|
||||||
|
APPINDEX_PATH = "$(MAKE_ROOT)/data/appindex.json"
|
||||||
|
|
||||||
OUTPUT = "db"
|
OUTPUT = "db"
|
||||||
# OUTPUT = "file"
|
# OUTPUT = "file"
|
||||||
@ -24,7 +33,7 @@ DBADDR = "0.0.0.0:27017" # don't use localhost
|
|||||||
# create it from env.default
|
# create it from env.default
|
||||||
include env.local
|
include env.local
|
||||||
|
|
||||||
N_CORES = 5
|
N_CORES = 3
|
||||||
# insert kres files into database in chunks, for fewer connections
|
# insert kres files into database in chunks, for fewer connections
|
||||||
KRES_CHUNK_SIZE = 30
|
KRES_CHUNK_SIZE = 30
|
||||||
|
|
||||||
@ -59,6 +68,7 @@ python-env:
|
|||||||
python-env-install:
|
python-env-install:
|
||||||
pip3 install -e src/pkg/cjvt-corpusparser/.
|
pip3 install -e src/pkg/cjvt-corpusparser/.
|
||||||
pip3 install -e src/pkg/valency/.
|
pip3 install -e src/pkg/valency/.
|
||||||
|
pip3 install -e src/pkg/seqparser/.
|
||||||
|
|
||||||
# from inside python-env container:
|
# from inside python-env container:
|
||||||
data/samples:
|
data/samples:
|
||||||
@ -95,22 +105,46 @@ frontend-dev:
|
|||||||
frontend-prod:
|
frontend-prod:
|
||||||
cd src/frontend_vue/; $(MAKE) prod
|
cd src/frontend_vue/; $(MAKE) prod
|
||||||
|
|
||||||
|
build-frontend-prod:
|
||||||
|
cd src/frontend_vue/; $(MAKE) build-prod
|
||||||
|
|
||||||
|
|
||||||
## Backend
|
## Backend
|
||||||
|
|
||||||
# runs once and exits before the app starts
|
# runs once and exits before the app starts
|
||||||
|
# need to extract ./data/sskj_data.tar.gz first
|
||||||
backend-prepare-db:
|
backend-prepare-db:
|
||||||
cd ./src/backend_flask; python3 app.py \
|
cd ./src/backend_flask; python3 app.py \
|
||||||
--config-file ./conf_files/dev_conf.yaml \
|
--config-file ./conf_files/dev_conf.yaml \
|
||||||
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
|
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
|
||||||
|
--sskj-wordlist $(SSKJ_WORDLIST) \
|
||||||
|
--appindex-json $(APPINDEX_PATH) \
|
||||||
--prepare-db
|
--prepare-db
|
||||||
|
|
||||||
backend-dev:
|
backend-dev:
|
||||||
cd ./src/backend_flask; python3 app.py \
|
cd ./src/backend_flask; python3 app.py \
|
||||||
--config-file ./conf_files/dev_conf.yaml \
|
--config-file ./conf_files/dev_conf.yaml \
|
||||||
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR)
|
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
|
||||||
|
--appindex-json $(APPINDEX_PATH)
|
||||||
|
|
||||||
backend-prod:
|
backend-prod-old:
|
||||||
cd ./src/backend_flask; python3 app.py \
|
cd ./src/backend_flask; python3 app.py \
|
||||||
--config-file ./conf_files/prod_conf.yaml \
|
--config-file ./conf_files/prod_conf.yaml \
|
||||||
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR)
|
--dbuser $(DB_USR_USER) --dbpass $(DB_USR_PASS) --dbaddr $(DBADDR) \
|
||||||
|
--appindex-json $(APPINDEX_PATH)
|
||||||
|
|
||||||
|
build-backend-flask:
|
||||||
|
cd ./src/backend_flask; $(MAKE) build
|
||||||
|
|
||||||
|
## add sskj senses to db (generated with pkg/seqparser)
|
||||||
|
sskj-senses:
|
||||||
|
python3 ./src/pkg/seqparser/seqparser/main.py \
|
||||||
|
--sskj-json $(SSKJ_JSON) \
|
||||||
|
--operation "senses_to_db" \
|
||||||
|
--dbaddr $(DBADDR) \
|
||||||
|
--dbuser $(DB_USR_USER) \
|
||||||
|
--dbpass $(DB_USR_PASS)
|
||||||
|
|
||||||
|
deploy-prod-stack:
|
||||||
|
- docker network create val-backend
|
||||||
|
docker stack deploy -c production.yaml val
|
||||||
|
83
README.md
83
README.md
@ -52,8 +52,6 @@ $ make fill-database-ssj
|
|||||||
$ make fill-database-kres
|
$ make fill-database-kres
|
||||||
# You can detach from the running process using Ctrl-p + Ctrl-q
|
# You can detach from the running process using Ctrl-p + Ctrl-q
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# this is a long operation
|
# this is a long operation
|
||||||
# if running on a remote server, use nohup:
|
# if running on a remote server, use nohup:
|
||||||
$ nohup $(make fill-database > fill-database.log) &
|
$ nohup $(make fill-database > fill-database.log) &
|
||||||
@ -74,6 +72,10 @@ $ make python-env-install
|
|||||||
# needs to be ran once to modify a new database
|
# needs to be ran once to modify a new database
|
||||||
$ make backend-prepare-db
|
$ make backend-prepare-db
|
||||||
|
|
||||||
|
# if you have the file prepared (sskj_senses.json), you can
|
||||||
|
# fill the database with some senses
|
||||||
|
$ make sskj-senses
|
||||||
|
|
||||||
# with debugger
|
# with debugger
|
||||||
$ make backend-dev
|
$ make backend-dev
|
||||||
|
|
||||||
@ -100,3 +102,80 @@ $ make frontend-prod
|
|||||||
```
|
```
|
||||||
|
|
||||||
App available on: `http://0.0.0.0:8080`.
|
App available on: `http://0.0.0.0:8080`.
|
||||||
|
|
||||||
|
|
||||||
|
## Production deployment
|
||||||
|
Prerequisite: machine with free ports 80 and 8084.
|
||||||
|
|
||||||
|
|
||||||
|
### Database
|
||||||
|
Either build the database from scratch (lenghty process) using above instructions or just migrate the database from the faculty server (recommended).
|
||||||
|
|
||||||
|
Build container my-mongo:
|
||||||
|
```bash
|
||||||
|
# run once and destroy containers
|
||||||
|
$ make database-service
|
||||||
|
```
|
||||||
|
|
||||||
|
### Backend
|
||||||
|
Set database connection details in `/src/backend_flask/db_config.py`.
|
||||||
|
Change 'valuser' and 'valuserpass' to the database user.
|
||||||
|
```bash
|
||||||
|
mongodb://valuser:valuserpass@my_mongo/valdb
|
||||||
|
```
|
||||||
|
In the above line, replace `valuser` with the username and `valuserpass` with the password that was used to create the database tables (the values were set in the root Makefile).
|
||||||
|
|
||||||
|
You can also set the number of workers in `/src/backend_flask/entrypoint.sh`.
|
||||||
|
In line with `gunicorn -t 4 -b 127.0.0.1:8084 app:app`, edit the `-t` parameter.
|
||||||
|
Rule of thumb is 2x number of available CPU cores.
|
||||||
|
|
||||||
|
Build the backend container:
|
||||||
|
```bash
|
||||||
|
# From git root
|
||||||
|
$ make build-backend-flask
|
||||||
|
```
|
||||||
|
|
||||||
|
### Frontend
|
||||||
|
Set the server address (where backend will be runnig) in `src/frontend_vue/config/config_prod.json`.
|
||||||
|
Build the `/dist` folder that contains the static app (we will be using Nginx to serve it).
|
||||||
|
```bash
|
||||||
|
# From git root
|
||||||
|
$ make build-frontend-prod
|
||||||
|
```
|
||||||
|
|
||||||
|
All set, now run the stack.
|
||||||
|
Stack configuration in `production.yaml`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From git root
|
||||||
|
$ make deploy-prod-stack
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Uploading a mongo dump
|
||||||
|
There's a 15GB mongo dump containing the fully processed kres and ssj data.
|
||||||
|
We can use that file to deploy our aplication.
|
||||||
|
With this database, we will need a minimum of 8GB ram to serve the app.
|
||||||
|
If the server is struggling, frontend will throw "Network errors".
|
||||||
|
|
||||||
|
Check `0.0.0.0:8081` and remove (or backup) the current example database `valdb`.
|
||||||
|
|
||||||
|
Run the stack with mongo port mapped:
|
||||||
|
(uncomment the lines in `production.yaml`)
|
||||||
|
```yml
|
||||||
|
ports:
|
||||||
|
- 27017:27017
|
||||||
|
```
|
||||||
|
|
||||||
|
Run a separate my-mongo container with the mounted data:
|
||||||
|
```bash
|
||||||
|
$ mongo run -it --net host -v <local_dump_path>/dumps my-mongo /bin/bash
|
||||||
|
```
|
||||||
|
|
||||||
|
Inside the container (edit the uesrname, password):
|
||||||
|
```bash
|
||||||
|
$ mongorestore /dumps/valdb --db valdb --uri=mongodb://valuser:valuserpass@0.0.0.0:27017
|
||||||
|
```
|
||||||
|
|
||||||
|
After uploading, restart the stack with `27017` commented out.
|
||||||
|
@ -1 +0,0 @@
|
|||||||
/home/kristjan/kres_data/payload/kres_json/
|
|
@ -1 +0,0 @@
|
|||||||
/home/kristjan/kres_mount/kres_parsed/tei/
|
|
二进制
data/sskj_data.tar.gz
普通文件
二进制
data/sskj_data.tar.gz
普通文件
二进制文件未显示。
1
data/sskj_senses.json
普通文件
1
data/sskj_senses.json
普通文件
文件差异因一行或多行过长而隐藏
@ -2,7 +2,7 @@ version: '3.1'
|
|||||||
|
|
||||||
services:
|
services:
|
||||||
|
|
||||||
my-mongo:
|
my_mongo:
|
||||||
image: my-mongo
|
image: my-mongo
|
||||||
restart: always
|
restart: always
|
||||||
ports:
|
ports:
|
||||||
@ -13,7 +13,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ${HOME}/mongo_container/data/:/data/db
|
- ${HOME}/mongo_container/data/:/data/db
|
||||||
|
|
||||||
mongo-express:
|
mongo_express:
|
||||||
image: mongo-express
|
image: mongo-express
|
||||||
restart: always
|
restart: always
|
||||||
ports:
|
ports:
|
||||||
@ -23,4 +23,4 @@ services:
|
|||||||
ME_CONFIG_BASICAUTH_PASSWORD: ${MONGOEXPRESS_PASS}
|
ME_CONFIG_BASICAUTH_PASSWORD: ${MONGOEXPRESS_PASS}
|
||||||
ME_CONFIG_MONGODB_ADMINUSERNAME: ${DB_ADM_USER}
|
ME_CONFIG_MONGODB_ADMINUSERNAME: ${DB_ADM_USER}
|
||||||
ME_CONFIG_MONGODB_ADMINPASSWORD: ${DB_ADM_PASS}
|
ME_CONFIG_MONGODB_ADMINPASSWORD: ${DB_ADM_PASS}
|
||||||
ME_CONFIG_MONGODB_SERVER: my-mongo
|
ME_CONFIG_MONGODB_SERVER: my_mongo
|
||||||
|
@ -1,26 +1,26 @@
|
|||||||
FROM ubuntu:16.04
|
FROM ubuntu:18.04
|
||||||
|
|
||||||
RUN apt-get update --fix-missing
|
RUN apt-get update --fix-missing
|
||||||
RUN apt-get install -y \
|
RUN apt-get install -y \
|
||||||
vim \
|
vim \
|
||||||
python3 \
|
python3 \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
sshfs
|
sshfs \
|
||||||
|
curl
|
||||||
|
|
||||||
|
RUN pip3 install --upgrade pip
|
||||||
|
|
||||||
RUN pip3 install \
|
RUN pip3 install \
|
||||||
lxml \
|
lxml \
|
||||||
pandas \
|
pandas \
|
||||||
sklearn \
|
sklearn \
|
||||||
argparse \
|
argparse \
|
||||||
|
pyyaml \
|
||||||
pathlib \
|
pathlib \
|
||||||
|
flask \
|
||||||
|
flask_cors \
|
||||||
pymongo \
|
pymongo \
|
||||||
flask
|
flask-pymongo \
|
||||||
|
gunicorn
|
||||||
RUN apt-get install -y \
|
|
||||||
curl
|
|
||||||
|
|
||||||
ENV PYTHONIOENCODING UTF-8
|
ENV PYTHONIOENCODING UTF-8
|
||||||
|
|
||||||
RUN pip3 install \
|
|
||||||
pyyaml \
|
|
||||||
flask_cors
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
IMAGE_NAME="cjvt-python-env"
|
IMAGE_NAME="cjvt-python-env" # don't change, used in backend_flask/Makefile
|
||||||
CNNAME="python-env"
|
CNNAME="python-env"
|
||||||
|
|
||||||
all: build run
|
all: build run
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
echo "testing entrypoint."
|
||||||
|
$(exit 1)
|
||||||
|
exit 0
|
26
nginx.conf
普通文件
26
nginx.conf
普通文件
@ -0,0 +1,26 @@
|
|||||||
|
# frontend
|
||||||
|
server {
|
||||||
|
listen 80;
|
||||||
|
server_name _;
|
||||||
|
|
||||||
|
location / {
|
||||||
|
root /srv/dist;
|
||||||
|
index index.html index.htm;
|
||||||
|
}
|
||||||
|
|
||||||
|
location /home {
|
||||||
|
return 301 /;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# backend
|
||||||
|
server {
|
||||||
|
listen 8084;
|
||||||
|
server_name _;
|
||||||
|
|
||||||
|
location / {
|
||||||
|
proxy_set_header X-Forward-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header Host $http_host;
|
||||||
|
proxy_pass http://backend_flask:8084;
|
||||||
|
}
|
||||||
|
}
|
43
production.yaml
普通文件
43
production.yaml
普通文件
@ -0,0 +1,43 @@
|
|||||||
|
version: '3.1'
|
||||||
|
|
||||||
|
services:
|
||||||
|
|
||||||
|
my_mongo:
|
||||||
|
image: my-mongo
|
||||||
|
restart: always
|
||||||
|
# ports:
|
||||||
|
# - 27017:27017
|
||||||
|
expose:
|
||||||
|
- 27017
|
||||||
|
environment:
|
||||||
|
MONGO_INITDB_ROOT_USERNAME: valuser
|
||||||
|
MONGO_INITDB_ROOT_PASSWORD: valuserpass
|
||||||
|
volumes:
|
||||||
|
- ${HOME}/mongo_container/data/:/data/db
|
||||||
|
|
||||||
|
mongo_express:
|
||||||
|
image: mongo-express
|
||||||
|
restart: always
|
||||||
|
ports:
|
||||||
|
- 8081:8081
|
||||||
|
environment:
|
||||||
|
ME_CONFIG_BASICAUTH_USERNAME: test
|
||||||
|
ME_CONFIG_BASICAUTH_PASSWORD: test
|
||||||
|
ME_CONFIG_MONGODB_ADMINUSERNAME: valadmin
|
||||||
|
ME_CONFIG_MONGODB_ADMINPASSWORD: rolercoaster
|
||||||
|
ME_CONFIG_MONGODB_SERVER: my_mongo
|
||||||
|
|
||||||
|
backend_flask:
|
||||||
|
image: backend-flask
|
||||||
|
expose:
|
||||||
|
- 8084
|
||||||
|
|
||||||
|
proxy:
|
||||||
|
image: nginx
|
||||||
|
ports:
|
||||||
|
- 80:80
|
||||||
|
- 8084:8084
|
||||||
|
volumes:
|
||||||
|
- ./nginx.conf:/etc/nginx/conf.d/default.conf
|
||||||
|
- ./src/frontend_vue/dist:/srv/dist
|
||||||
|
|
16
src/backend_flask/Makefile
普通文件
16
src/backend_flask/Makefile
普通文件
@ -0,0 +1,16 @@
|
|||||||
|
IMG="backend-flask"
|
||||||
|
CNT="backend_flask"
|
||||||
|
|
||||||
|
clean:
|
||||||
|
- docker rm -f $(CNT)
|
||||||
|
|
||||||
|
run: clean build
|
||||||
|
docker run -d --net host --name $(CNT) $(IMG)
|
||||||
|
docker logs -f $(CNT)
|
||||||
|
|
||||||
|
build: build-cjvt-python-env
|
||||||
|
# docker build . -f ../../Dockerfile-backend-flask -t $(IMG)
|
||||||
|
cd ../..; docker build . -f Dockerfile-backend-flask -t $(IMG)
|
||||||
|
|
||||||
|
build-cjvt-python-env:
|
||||||
|
cd ../../dockerfiles/python-env; $(MAKE) build
|
@ -26,23 +26,21 @@ from email.mime.text import MIMEText
|
|||||||
from copy import deepcopy as DC
|
from copy import deepcopy as DC
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pymongo import MongoClient
|
from pymongo import MongoClient
|
||||||
|
from flask_pymongo import PyMongo
|
||||||
import pymongo
|
import pymongo
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
# some db collections
|
|
||||||
USERS_COLL = "users"
|
|
||||||
TOKENS_COLL = "usertokens"
|
|
||||||
SENSES_COLL = "senses"
|
|
||||||
SENSEMAP_COLL = "sensemap"
|
|
||||||
|
|
||||||
# pre-generated data (gui leftside word index)
|
|
||||||
CORPORA = ["ssj", "kres"]
|
|
||||||
app_index = None
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
valdb = None
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
app.config.from_object("db_config")
|
||||||
|
mongo = PyMongo(app)
|
||||||
|
|
||||||
|
app.config["CORPORA"] = ["ssj", "kres"]
|
||||||
|
app.config["BANNED_HEADWORDS"] = ["biti"]
|
||||||
|
app.config["QUERY_LIMIT"] = 1000
|
||||||
|
|
||||||
|
|
||||||
# when running vuejs via webpack
|
# when running vuejs via webpack
|
||||||
# CORS(app)
|
# CORS(app)
|
||||||
@ -57,7 +55,7 @@ CORS(app)
|
|||||||
@app.route("/api/dev")
|
@app.route("/api/dev")
|
||||||
def api_dev():
|
def api_dev():
|
||||||
print("DEV")
|
print("DEV")
|
||||||
cur = valdb.kres.find({"headwords": "nagovarjati"})
|
cur = mongo.db.kres.find({"headwords": "nagovarjati"})
|
||||||
frames = []
|
frames = []
|
||||||
for ent in cur:
|
for ent in cur:
|
||||||
frames += frames_from_db_entry(ent)
|
frames += frames_from_db_entry(ent)
|
||||||
@ -70,12 +68,12 @@ def api_dev():
|
|||||||
@app.route("/api/words/<corpus>")
|
@app.route("/api/words/<corpus>")
|
||||||
def api_words(corpus):
|
def api_words(corpus):
|
||||||
return json.dumps({
|
return json.dumps({
|
||||||
"sorted_words": app_index[corpus]["words"], # todo - make corpus as arg
|
"sorted_words": app.config["app_index"][corpus]["words"], # todo - make corpus as arg
|
||||||
})
|
})
|
||||||
|
|
||||||
@app.route("/api/functors/<corpus>")
|
@app.route("/api/functors/<corpus>")
|
||||||
def api_functors(corpus):
|
def api_functors(corpus):
|
||||||
return json.dumps(app_index[corpus]["functors"])
|
return json.dumps(app.config["app_index"][corpus]["functors"])
|
||||||
|
|
||||||
# INDEX SELECTION -------------------^
|
# INDEX SELECTION -------------------^
|
||||||
|
|
||||||
@ -96,7 +94,7 @@ def api_register():
|
|||||||
):
|
):
|
||||||
return "ERR"
|
return "ERR"
|
||||||
email_hash = hashlib.sha256(email.encode("utf-8")).hexdigest()
|
email_hash = hashlib.sha256(email.encode("utf-8")).hexdigest()
|
||||||
existing = list(valdb[USERS_COLL].find({
|
existing = list(mongo.db.users.find({
|
||||||
"$or": [{"username": username}, {"email": email_hash}]
|
"$or": [{"username": username}, {"email": email_hash}]
|
||||||
}))
|
}))
|
||||||
if len(existing) > 0:
|
if len(existing) > 0:
|
||||||
@ -107,7 +105,7 @@ def api_register():
|
|||||||
password.encode("utf-8")).hexdigest(),
|
password.encode("utf-8")).hexdigest(),
|
||||||
"email": email_hash
|
"email": email_hash
|
||||||
}
|
}
|
||||||
valdb[USERS_COLL].insert(entry)
|
mongo.db.users.insert(entry)
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
||||||
|
|
||||||
@ -119,7 +117,7 @@ def api_login():
|
|||||||
password = data["password"]
|
password = data["password"]
|
||||||
hpass = hashlib.sha256(password.encode("utf-8")).hexdigest()
|
hpass = hashlib.sha256(password.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
db_user = list(valdb[USERS_COLL].find({
|
db_user = list(mongo.db.users.find({
|
||||||
"username": username,
|
"username": username,
|
||||||
"hpass": hpass
|
"hpass": hpass
|
||||||
}))
|
}))
|
||||||
@ -133,7 +131,7 @@ def api_login():
|
|||||||
"date": datetime.datetime.utcnow(),
|
"date": datetime.datetime.utcnow(),
|
||||||
"token": token
|
"token": token
|
||||||
}
|
}
|
||||||
valdb[TOKENS_COLL].update(
|
mongo.db.usertokens.update(
|
||||||
{"username": token_entry["username"]},
|
{"username": token_entry["username"]},
|
||||||
token_entry,
|
token_entry,
|
||||||
upsert=True
|
upsert=True
|
||||||
@ -176,7 +174,7 @@ def api_new_pass():
|
|||||||
username = data["username"]
|
username = data["username"]
|
||||||
email = data["email"]
|
email = data["email"]
|
||||||
hemail = hashlib.sha256(email.encode("utf-8")).hexdigest()
|
hemail = hashlib.sha256(email.encode("utf-8")).hexdigest()
|
||||||
db_res = list(valdb[USERS_COLL].find({
|
db_res = list(mongo.db.users.find({
|
||||||
"username": username,
|
"username": username,
|
||||||
"email": hemail
|
"email": hemail
|
||||||
}))
|
}))
|
||||||
@ -188,7 +186,7 @@ def api_new_pass():
|
|||||||
string.ascii_letters + string.digits) for i in range(10)])
|
string.ascii_letters + string.digits) for i in range(10)])
|
||||||
# update locally
|
# update locally
|
||||||
hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest()
|
hpass = hashlib.sha256(new_pass.encode("utf-8")).hexdigest()
|
||||||
valdb[USERS_COLL].update(
|
mongo.db.users.update(
|
||||||
{
|
{
|
||||||
"username": username,
|
"username": username,
|
||||||
"email": hemail
|
"email": hemail
|
||||||
@ -206,12 +204,12 @@ def token_to_username(token):
|
|||||||
key = {
|
key = {
|
||||||
"token": token
|
"token": token
|
||||||
}
|
}
|
||||||
res = list(valdb[TOKENS_COLL].find(key))
|
res = list(mongo.db.usertokens.find(key))
|
||||||
if len(res) != 1:
|
if len(res) != 1:
|
||||||
return None
|
return None
|
||||||
username = res[0]["username"]
|
username = res[0]["username"]
|
||||||
# update deletion interval
|
# update deletion interval
|
||||||
valdb[TOKENS_COLL].update(
|
mongo.db.usertokens.update(
|
||||||
key, {"$set": {"date": datetime.datetime.utcnow()}})
|
key, {"$set": {"date": datetime.datetime.utcnow()}})
|
||||||
return username
|
return username
|
||||||
|
|
||||||
@ -246,18 +244,19 @@ def api_get_frames():
|
|||||||
RF = reduce_functions[rf_name]["f"]
|
RF = reduce_functions[rf_name]["f"]
|
||||||
|
|
||||||
corpus = request.args.get("cor")
|
corpus = request.args.get("cor")
|
||||||
if corpus not in CORPORA:
|
if corpus not in app.config["CORPORA"]:
|
||||||
return json.dumps({"error": "cor={kres,ssj}"})
|
return json.dumps({"error": "cor={kres,ssj}"})
|
||||||
|
|
||||||
cur = valdb[corpus].find({"headwords": hw})
|
cur = mongo.db[corpus].find({"headwords": hw})
|
||||||
frames = []
|
frames = []
|
||||||
for ent in cur:
|
for ent in cur[:app.config["QUERY_LIMIT"]]:
|
||||||
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
||||||
|
cur.close()
|
||||||
|
|
||||||
# filter by relevant hw
|
# filter by relevant hw
|
||||||
frames = [x for x in frames if x.hw == hw]
|
frames = [x for x in frames if x.hw == hw]
|
||||||
|
|
||||||
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
|
ret_frames = RF(frames, mongo.db.sensemap)
|
||||||
|
|
||||||
json_ret = {"frames": []}
|
json_ret = {"frames": []}
|
||||||
for frame in ret_frames:
|
for frame in ret_frames:
|
||||||
@ -298,19 +297,20 @@ def api_get_functor_frames():
|
|||||||
RF = reduce_functions[rf_name]["f"]
|
RF = reduce_functions[rf_name]["f"]
|
||||||
|
|
||||||
corpus = request.args.get("cor")
|
corpus = request.args.get("cor")
|
||||||
if corpus not in CORPORA:
|
if corpus not in app.config["CORPORA"]:
|
||||||
return json.dumps({"error": "cor={kres,ssj}"})
|
return json.dumps({"error": "cor={kres,ssj}"})
|
||||||
|
|
||||||
cur = valdb[corpus].find({"functors": functor})
|
cur = mongo.db[corpus].find({"functors": functor})
|
||||||
frames = []
|
frames = []
|
||||||
for ent in cur:
|
for ent in cur[:app.config["QUERY_LIMIT"]]:
|
||||||
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
||||||
|
cur.close()
|
||||||
|
|
||||||
# filter by relevant functor
|
# filter by relevant functor
|
||||||
frames = [x for x in frames if functor in x.get_functors()]
|
frames = [x for x in frames if functor in x.get_functors()]
|
||||||
|
|
||||||
# raw_frames = vallex.functors_index[functor] # TODO
|
# raw_frames = vallex.functors_index[functor] # TODO
|
||||||
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
|
ret_frames = RF(frames, mongo.db.sensemap)
|
||||||
ret_frames = _aggregate_by_hw(ret_frames)
|
ret_frames = _aggregate_by_hw(ret_frames)
|
||||||
|
|
||||||
json_ret = {"frames": []}
|
json_ret = {"frames": []}
|
||||||
@ -322,15 +322,17 @@ def api_get_functor_frames():
|
|||||||
|
|
||||||
|
|
||||||
# SENSES ----------------------------.
|
# SENSES ----------------------------.
|
||||||
|
# ssj_id is legacy notation, read
|
||||||
|
# it as general sentence_id
|
||||||
|
|
||||||
@app.route("/api/senses/get")
|
@app.route("/api/senses/get")
|
||||||
def api_senses_get():
|
def api_senses_get():
|
||||||
# returns senses and mapping for hw
|
# returns senses and mapping for hw
|
||||||
hw = request.args.get("hw")
|
hw = request.args.get("hw")
|
||||||
senses = list(valdb[SENSES_COLL].find({
|
senses = list(mongo.db.senses.find({
|
||||||
"hw": hw
|
"hw": hw
|
||||||
}))
|
}))
|
||||||
sense_map_query = list(valdb[SENSEMAP_COLL].find({
|
sense_map_query = list(mongo.db.sensemap.find({
|
||||||
"hw": hw
|
"hw": hw
|
||||||
}))
|
}))
|
||||||
# aggregation by max date possible on DB side
|
# aggregation by max date possible on DB side
|
||||||
@ -407,8 +409,10 @@ def api_senses_update():
|
|||||||
ns["date"] = tmp_dt
|
ns["date"] = tmp_dt
|
||||||
id_map[frontend_sense_id] = new_sense_id
|
id_map[frontend_sense_id] = new_sense_id
|
||||||
|
|
||||||
|
print(ns)
|
||||||
|
|
||||||
# insert into db
|
# insert into db
|
||||||
valdb[SENSES_COLL].insert(ns)
|
mongo.db.senses.insert(ns)
|
||||||
|
|
||||||
# replace tmp_id with mongo's _id
|
# replace tmp_id with mongo's _id
|
||||||
for ssj_id, el in sense_map.items():
|
for ssj_id, el in sense_map.items():
|
||||||
@ -423,22 +427,34 @@ def api_senses_update():
|
|||||||
"date": datetime.datetime.utcnow()
|
"date": datetime.datetime.utcnow()
|
||||||
}
|
}
|
||||||
# vallex.db["v2_sense_map"].update(key, data, upsert=True)
|
# vallex.db["v2_sense_map"].update(key, data, upsert=True)
|
||||||
valdb[SENSEMAP_COLL].insert(data)
|
mongo.db.sensemap.insert(data)
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
||||||
# SENSES ----------------------------^
|
# SENSES ----------------------------^
|
||||||
|
|
||||||
|
|
||||||
# APP PREFLIGHT ---------------------.
|
# APP PREFLIGHT ---------------------.
|
||||||
|
def _is_banned(hw):
|
||||||
|
banned = True
|
||||||
|
if hw in app.config["BANNED_HEADWORDS"]:
|
||||||
|
banned = True
|
||||||
|
elif hw in sskj_wordlist["wordlist"]:
|
||||||
|
banned = False
|
||||||
|
elif (hw + " se") in sskj_wordlist["wordlist"]:
|
||||||
|
banned = False
|
||||||
|
return banned
|
||||||
|
|
||||||
def prepare_app_index():
|
def prepare_app_index(appindex_json, sskj_wordlist):
|
||||||
log.info("[*] preparing app_index")
|
log.info("[*] preparing app_index")
|
||||||
# create app_index (used in frontend, left side word index)
|
# create app_index (used in frontend, left side word index)
|
||||||
tmp_app_index = {c: {} for c in CORPORA}
|
tmp_app_index = {c: {} for c in app.config["CORPORA"]}
|
||||||
for corpus in CORPORA:
|
for corpus in app.config["CORPORA"]:
|
||||||
res_hws = {}
|
res_hws = {}
|
||||||
res_fns = {}
|
res_fns = {}
|
||||||
for e in valdb[corpus].find({}):
|
|
||||||
|
nentries = mongo.db[corpus].count()
|
||||||
|
idx = 0
|
||||||
|
for e in mongo.db[corpus].find({}):
|
||||||
if "headwords" not in e:
|
if "headwords" not in e:
|
||||||
continue
|
continue
|
||||||
for hw in e["headwords"]:
|
for hw in e["headwords"]:
|
||||||
@ -453,6 +469,10 @@ def prepare_app_index():
|
|||||||
res_fns[fn] += 1
|
res_fns[fn] += 1
|
||||||
else:
|
else:
|
||||||
res_fns[fn] = 1
|
res_fns[fn] = 1
|
||||||
|
idx += 1
|
||||||
|
if idx % 10000 == 0:
|
||||||
|
log.debug("indexing {}: {}/{}".format(
|
||||||
|
corpus, idx, nentries))
|
||||||
|
|
||||||
alphabetical = {}
|
alphabetical = {}
|
||||||
for k, e in res_hws.items():
|
for k, e in res_hws.items():
|
||||||
@ -462,19 +482,47 @@ def prepare_app_index():
|
|||||||
else:
|
else:
|
||||||
alphabetical[fst] = [(k, e)]
|
alphabetical[fst] = [(k, e)]
|
||||||
|
|
||||||
for k, e in alphabetical.items():
|
for letter, words in alphabetical.items():
|
||||||
alphabetical[k] = sorted(e, key=lambda x: x[0])
|
filtered_words = [x for x in words if not _is_banned(x[0])]
|
||||||
|
alphabetical[letter] = sorted(filtered_words, key=lambda x: x[0])
|
||||||
|
|
||||||
tmp_app_index[corpus]["words"] = alphabetical
|
tmp_app_index[corpus]["words"] = alphabetical
|
||||||
|
|
||||||
|
|
||||||
functors = [(k, e) for (k, e) in res_fns.items()]
|
functors = [(k, e) for (k, e) in res_fns.items()]
|
||||||
functors = sorted(functors, key=lambda x: x[0])
|
functors = sorted(functors, key=lambda x: x[0])
|
||||||
tmp_app_index[corpus]["functors"] = functors
|
tmp_app_index[corpus]["functors"] = functors
|
||||||
|
|
||||||
valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True)
|
with Path(appindex_json).open("w") as fp:
|
||||||
|
json.dump(tmp_app_index, fp)
|
||||||
|
|
||||||
# APP PREFLIGHT ---------------------^
|
# APP PREFLIGHT ---------------------^
|
||||||
|
|
||||||
|
|
||||||
|
def init_wsgi(app):
|
||||||
|
print("Initiating wsgi")
|
||||||
|
config = None
|
||||||
|
with Path("/project/prod_conf.yaml").open("r") as fp:
|
||||||
|
config = list(yaml.safe_load_all(fp))[0]
|
||||||
|
|
||||||
|
app.debug = False
|
||||||
|
logfile = config["logfile"]
|
||||||
|
logging.basicConfig(filename=logfile, level=logging.INFO)
|
||||||
|
|
||||||
|
# app index from db
|
||||||
|
with Path(config["appindex"]).open("r") as fp:
|
||||||
|
# a dirty hack but ok
|
||||||
|
app.config["app_index"] = json.load(fp)
|
||||||
|
|
||||||
|
# log.info("[*] Starting app.py with config:\n%s".format(config))
|
||||||
|
log.info("[*] Starting app.py with config:\n{}".format(config))
|
||||||
|
|
||||||
|
|
||||||
|
# if we don't pass arguments, assume production environment (gunicorn)
|
||||||
|
if "gunicorn" in sys.argv[0]:
|
||||||
|
init_wsgi(app)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("Starting app.py main()")
|
print("Starting app.py main()")
|
||||||
aparser = argparse.ArgumentParser(description="Arguments for app.py")
|
aparser = argparse.ArgumentParser(description="Arguments for app.py")
|
||||||
@ -483,9 +531,10 @@ if __name__ == "__main__":
|
|||||||
aparser.add_argument("--dbuser", type=str)
|
aparser.add_argument("--dbuser", type=str)
|
||||||
aparser.add_argument("--dbpass", type=str)
|
aparser.add_argument("--dbpass", type=str)
|
||||||
aparser.add_argument("--dbaddr", type=str)
|
aparser.add_argument("--dbaddr", type=str)
|
||||||
|
aparser.add_argument("--sskj-wordlist", type=str)
|
||||||
|
aparser.add_argument("--appindex-json", type=str)
|
||||||
args = aparser.parse_args()
|
args = aparser.parse_args()
|
||||||
|
|
||||||
config = None
|
|
||||||
with Path(args.config_file).open("r") as fp:
|
with Path(args.config_file).open("r") as fp:
|
||||||
config = list(yaml.safe_load_all(fp))[0]
|
config = list(yaml.safe_load_all(fp))[0]
|
||||||
|
|
||||||
@ -496,25 +545,31 @@ if __name__ == "__main__":
|
|||||||
else:
|
else:
|
||||||
logging.basicConfig(filename=logfile, level=logging.INFO)
|
logging.basicConfig(filename=logfile, level=logging.INFO)
|
||||||
|
|
||||||
|
"""
|
||||||
# db login
|
# db login
|
||||||
client = MongoClient(
|
client = MongoClient(
|
||||||
"mongodb://{}".format(args.dbaddr),
|
"mongodb://{}".format(args.dbaddr),
|
||||||
username=args.dbuser,
|
username=args.dbuser,
|
||||||
password=args.dbpass,
|
password=args.dbpass,
|
||||||
authSource="valdb",
|
authSource="mongo.db",
|
||||||
authMechanism='SCRAM-SHA-1'
|
authMechanism='SCRAM-SHA-1'
|
||||||
)
|
)
|
||||||
valdb = client.valdb
|
valdb = client.mongo.db
|
||||||
|
"""
|
||||||
|
|
||||||
if args.prepare_db:
|
if args.prepare_db:
|
||||||
prepare_app_index()
|
with Path(args.sskj_wordlist).open("r") as fp:
|
||||||
|
sskj_wordlist = json.load(fp)
|
||||||
|
prepare_app_index(args.appindex_json, sskj_wordlist)
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
# app index from db
|
# app index from db
|
||||||
app_index = (valdb.appindex.find_one({"dockey": "appindex"}))["data"]
|
with Path(args.appindex_json).open("r") as fp:
|
||||||
|
app.config["app_index"] = json.load(fp)
|
||||||
|
|
||||||
# log.info("[*] Starting app.py with config:\n%s".format(config))
|
# log.info("[*] Starting app.py with config:\n%s".format(config))
|
||||||
log.info("[*] Starting app.py with config:\n{}".format(config))
|
log.info("[*] Starting app.py with config:\n{}".format(config))
|
||||||
|
|
||||||
app.run(host=str(config["host"]), port=int(config["port"]))
|
app.run(host=str(config["host"]), port=int(config["port"]))
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,3 +4,4 @@ port: 8084
|
|||||||
host: localhost
|
host: localhost
|
||||||
logfile: "/var/log/valency_backend.log"
|
logfile: "/var/log/valency_backend.log"
|
||||||
---
|
---
|
||||||
|
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
---
|
---
|
||||||
debug: True
|
|
||||||
port: 8084
|
port: 8084
|
||||||
host: 0.0.0.0
|
host: 0.0.0.0
|
||||||
logfile: "/var/log/valency_backend.log"
|
logfile: "/var/log/valency_backend.log"
|
||||||
---
|
appindex: /project/data/appindex.json
|
||||||
|
@ -0,0 +1,2 @@
|
|||||||
|
MONGO_URI = "mongodb://sizif:p5e3r4u8t7@my_mongo:27017/valdb"
|
||||||
|
MONGO_AUTH_SOURCE = 'admin'
|
8
src/backend_flask/entrypoint.sh
可执行文件
8
src/backend_flask/entrypoint.sh
可执行文件
@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
pip3 install -e /project/src/pkg/cjvt-corpusparser/.
|
||||||
|
pip3 install -e /project/src/pkg/valency/.
|
||||||
|
pip3 install -e /project/src/pkg/seqparser/.
|
||||||
|
|
||||||
|
cd /project/src/backend_flask
|
||||||
|
gunicorn -t 4 -b 0.0.0.0:8084 app:app
|
@ -1,73 +0,0 @@
|
|||||||
# Deprecated: headword creation moved to be part of corpusparser,
|
|
||||||
# index creation moved to app.py as a preprocessing (with exit) step
|
|
||||||
|
|
||||||
CORPORA = ["kres", "ssj"]
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
|
|
||||||
valdb = None
|
|
||||||
|
|
||||||
def helper_tid_to_token(tid, tokens):
|
|
||||||
for t in tokens:
|
|
||||||
if t["tid"] == tid:
|
|
||||||
return t
|
|
||||||
return None
|
|
||||||
|
|
||||||
# update entries (add headwords and fuctors for indexing)
|
|
||||||
for corpus in CORPORA:
|
|
||||||
for e in valdb[corpus].find({}):
|
|
||||||
if e["srl_links"] is None:
|
|
||||||
e["headwords"] = []
|
|
||||||
e["functors"] = []
|
|
||||||
else:
|
|
||||||
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
|
|
||||||
hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
|
|
||||||
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
|
|
||||||
e["headwords"] = headwords
|
|
||||||
|
|
||||||
functors = list(set([x["afun"] for x in e["srl_links"]]))
|
|
||||||
e["functors"] = functors
|
|
||||||
|
|
||||||
valdb[corpus].save(e)
|
|
||||||
|
|
||||||
valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
|
|
||||||
valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
|
|
||||||
|
|
||||||
# create app_index (used in frontend, left side word index)
|
|
||||||
tmp_app_index = {c: {} for c in CORPORA}
|
|
||||||
for corpus in CORPORA:
|
|
||||||
res_hws = {}
|
|
||||||
res_fns = {}
|
|
||||||
for e in valdb[corpus].find({}):
|
|
||||||
if "headwords" not in e:
|
|
||||||
continue
|
|
||||||
for hw in e["headwords"]:
|
|
||||||
if hw in res_hws:
|
|
||||||
res_hws[hw] += 1
|
|
||||||
else:
|
|
||||||
res_hws[hw] = 1
|
|
||||||
if "functors" not in e:
|
|
||||||
continue
|
|
||||||
for fn in e["functors"]:
|
|
||||||
if fn in res_fns:
|
|
||||||
res_fns[fn] += 1
|
|
||||||
else:
|
|
||||||
res_fns[fn] = 1
|
|
||||||
|
|
||||||
alphabetical = {}
|
|
||||||
for k, e in res_hws.items():
|
|
||||||
fst = k[0].lower()
|
|
||||||
if fst in alphabetical:
|
|
||||||
alphabetical[fst].append((k, e))
|
|
||||||
else:
|
|
||||||
alphabetical[fst] = [(k, e)]
|
|
||||||
|
|
||||||
for k, e in alphabetical.items():
|
|
||||||
alphabetical[k] = sorted(e, key=lambda x: x[0])
|
|
||||||
tmp_app_index[corpus]["words"] = alphabetical
|
|
||||||
|
|
||||||
functors = [(k, e) for (k, e) in res_fns.items()]
|
|
||||||
functors = sorted(functors, key=lambda x: x[0])
|
|
||||||
tmp_app_index[corpus]["functors"] = functors
|
|
||||||
|
|
||||||
valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True)
|
|
@ -9,8 +9,7 @@ info:
|
|||||||
echo "Pick either dev or prod."
|
echo "Pick either dev or prod."
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
- docker kill $(CONNAME)
|
- docker rm -f $(CONNAME)
|
||||||
- docker rm $(CONNAME)
|
|
||||||
|
|
||||||
build-container:
|
build-container:
|
||||||
docker build . -t $(IMGNAME)
|
docker build . -t $(IMGNAME)
|
||||||
@ -19,5 +18,12 @@ dev: build-container clean
|
|||||||
docker run --name $(CONNAME) -d -p 8080:8080 -v $(shell pwd):/src $(IMGNAME) /src/ops_scripts/dev.sh
|
docker run --name $(CONNAME) -d -p 8080:8080 -v $(shell pwd):/src $(IMGNAME) /src/ops_scripts/dev.sh
|
||||||
|
|
||||||
prod: build-container clean
|
prod: build-container clean
|
||||||
docker run --name $(CONNAME) -d -p 8080:8080 -v $(shell pwd):/src $(IMGNAME) /src/ops_scripts/prod.sh
|
docker run --restart always --name $(CONNAME) -d -p 8080:8080 -v $(shell pwd):/src $(IMGNAME) /src/ops_scripts/prod.sh
|
||||||
|
|
||||||
|
node-env: clean
|
||||||
|
docker run --name $(CONNAME) -it -p 8080:8080 -v $(shell pwd):/src $(IMGNAME)
|
||||||
|
|
||||||
|
build-prod: build-container clean
|
||||||
|
docker run --rm -v $(shell pwd):/src $(IMGNAME) /src/ops_scripts/prod.sh
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"api_addr": "http://193.2.76.103"
|
"api_addr": "http://193.2.76.103:8084"
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"api_addr": "http://193.2.76.103:8084"
|
"api_addr": "http://0.0.0.0:8084"
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
<!DOCTYPE html><html><head><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><title>vue_frontend</title><link href=/static/css/app.05a420a551b5bded5dfec6b370d3edca.css rel=stylesheet></head><body><div id=app></div><script type=text/javascript src=/static/js/manifest.2ae2e69a05c33dfc65f8.js></script><script type=text/javascript src=/static/js/vendor.5d3d2fd333c62579d227.js></script><script type=text/javascript src=/static/js/app.8538f7133303d3e391b2.js></script></body></html>
|
文件差异因一行或多行过长而隐藏
文件差异因一行或多行过长而隐藏
文件差异因一行或多行过长而隐藏
文件差异因一行或多行过长而隐藏
@ -0,0 +1,2 @@
|
|||||||
|
!function(r){var n=window.webpackJsonp;window.webpackJsonp=function(e,u,c){for(var f,i,p,a=0,l=[];a<e.length;a++)i=e[a],o[i]&&l.push(o[i][0]),o[i]=0;for(f in u)Object.prototype.hasOwnProperty.call(u,f)&&(r[f]=u[f]);for(n&&n(e,u,c);l.length;)l.shift()();if(c)for(a=0;a<c.length;a++)p=t(t.s=c[a]);return p};var e={},o={2:0};function t(n){if(e[n])return e[n].exports;var o=e[n]={i:n,l:!1,exports:{}};return r[n].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=r,t.c=e,t.d=function(r,n,e){t.o(r,n)||Object.defineProperty(r,n,{configurable:!1,enumerable:!0,get:e})},t.n=function(r){var n=r&&r.__esModule?function(){return r.default}:function(){return r};return t.d(n,"a",n),n},t.o=function(r,n){return Object.prototype.hasOwnProperty.call(r,n)},t.p="/",t.oe=function(r){throw console.error(r),r}}([]);
|
||||||
|
//# sourceMappingURL=manifest.2ae2e69a05c33dfc65f8.js.map
|
@ -0,0 +1 @@
|
|||||||
|
{"version":3,"sources":["webpack:///webpack/bootstrap d176f5affa434246605f"],"names":["parentJsonpFunction","window","chunkIds","moreModules","executeModules","moduleId","chunkId","result","i","resolves","length","installedChunks","push","Object","prototype","hasOwnProperty","call","modules","shift","__webpack_require__","s","installedModules","2","exports","module","l","m","c","d","name","getter","o","defineProperty","configurable","enumerable","get","n","__esModule","object","property","p","oe","err","console","error"],"mappings":"aACA,IAAAA,EAAAC,OAAA,aACAA,OAAA,sBAAAC,EAAAC,EAAAC,GAIA,IADA,IAAAC,EAAAC,EAAAC,EAAAC,EAAA,EAAAC,KACQD,EAAAN,EAAAQ,OAAoBF,IAC5BF,EAAAJ,EAAAM,GACAG,EAAAL,IACAG,EAAAG,KAAAD,EAAAL,GAAA,IAEAK,EAAAL,GAAA,EAEA,IAAAD,KAAAF,EACAU,OAAAC,UAAAC,eAAAC,KAAAb,EAAAE,KACAY,EAAAZ,GAAAF,EAAAE,IAIA,IADAL,KAAAE,EAAAC,EAAAC,GACAK,EAAAC,QACAD,EAAAS,OAAAT,GAEA,GAAAL,EACA,IAAAI,EAAA,EAAYA,EAAAJ,EAAAM,OAA2BF,IACvCD,EAAAY,IAAAC,EAAAhB,EAAAI,IAGA,OAAAD,GAIA,IAAAc,KAGAV,GACAW,EAAA,GAIA,SAAAH,EAAAd,GAGA,GAAAgB,EAAAhB,GACA,OAAAgB,EAAAhB,GAAAkB,QAGA,IAAAC,EAAAH,EAAAhB,IACAG,EAAAH,EACAoB,GAAA,EACAF,YAUA,OANAN,EAAAZ,GAAAW,KAAAQ,EAAAD,QAAAC,IAAAD,QAAAJ,GAGAK,EAAAC,GAAA,EAGAD,EAAAD,QAKAJ,EAAAO,EAAAT,EAGAE,EAAAQ,EAAAN,EAGAF,EAAAS,EAAA,SAAAL,EAAAM,EAAAC,GACAX,EAAAY,EAAAR,EAAAM,IACAhB,OAAAmB,eAAAT,EAAAM,GACAI,cAAA,EACAC,YAAA,EACAC,IAAAL,KAMAX,EAAAiB,EAAA,SAAAZ,GACA,IAAAM,EAAAN,KAAAa,WACA,WAA2B,OAAAb,EAAA,SAC3B,WAAiC,OAAAA,GAEjC,OADAL,EAAAS,EAAAE,EAAA,IAAAA,GACAA,GAIAX,EAAAY,EAAA,SAAAO,EAAAC,GAAsD,OAAA1B,OAAAC,UAAAC,eAAAC,KAAAsB,EAAAC,IAGtDpB,EAAAqB,EAAA,IAGArB,EAAAsB,GAAA,SAAAC,GAA8D,MAApBC,QAAAC,MAAAF,GAAoBA","file":"static/js/manifest.2ae2e69a05c33dfc65f8.js","sourcesContent":[" \t// install a JSONP callback for chunk loading\n \tvar parentJsonpFunction = window[\"webpackJsonp\"];\n \twindow[\"webpackJsonp\"] = function webpackJsonpCallback(chunkIds, moreModules, executeModules) {\n \t\t// add \"moreModules\" to the modules object,\n \t\t// then flag all \"chunkIds\" as loaded and fire callback\n \t\tvar moduleId, chunkId, i = 0, resolves = [], result;\n \t\tfor(;i < chunkIds.length; i++) {\n \t\t\tchunkId = chunkIds[i];\n \t\t\tif(installedChunks[chunkId]) {\n \t\t\t\tresolves.push(installedChunks[chunkId][0]);\n \t\t\t}\n \t\t\tinstalledChunks[chunkId] = 0;\n \t\t}\n \t\tfor(moduleId in moreModules) {\n \t\t\tif(Object.prototype.hasOwnProperty.call(moreModules, moduleId)) {\n \t\t\t\tmodules[moduleId] = moreModules[moduleId];\n \t\t\t}\n \t\t}\n \t\tif(parentJsonpFunction) parentJsonpFunction(chunkIds, moreModules, executeModules);\n \t\twhile(resolves.length) {\n \t\t\tresolves.shift()();\n \t\t}\n \t\tif(executeModules) {\n \t\t\tfor(i=0; i < executeModules.length; i++) {\n \t\t\t\tresult = __webpack_require__(__webpack_require__.s = executeModules[i]);\n \t\t\t}\n \t\t}\n \t\treturn result;\n \t};\n\n \t// The module cache\n \tvar installedModules = {};\n\n \t// objects to store loaded and loading chunks\n \tvar installedChunks = {\n \t\t2: 0\n \t};\n\n \t// The require function\n \tfunction __webpack_require__(moduleId) {\n\n \t\t// Check if module is in cache\n \t\tif(installedModules[moduleId]) {\n \t\t\treturn installedModules[moduleId].exports;\n \t\t}\n \t\t// Create a new module (and put it into the cache)\n \t\tvar module = installedModules[moduleId] = {\n \t\t\ti: moduleId,\n \t\t\tl: false,\n \t\t\texports: {}\n \t\t};\n\n \t\t// Execute the module function\n \t\tmodules[moduleId].call(module.exports, module, module.exports, __webpack_require__);\n\n \t\t// Flag the module as loaded\n \t\tmodule.l = true;\n\n \t\t// Return the exports of the module\n \t\treturn module.exports;\n \t}\n\n\n \t// expose the modules object (__webpack_modules__)\n \t__webpack_require__.m = modules;\n\n \t// expose the module cache\n \t__webpack_require__.c = installedModules;\n\n \t// define getter function for harmony exports\n \t__webpack_require__.d = function(exports, name, getter) {\n \t\tif(!__webpack_require__.o(exports, name)) {\n \t\t\tObject.defineProperty(exports, name, {\n \t\t\t\tconfigurable: false,\n \t\t\t\tenumerable: true,\n \t\t\t\tget: getter\n \t\t\t});\n \t\t}\n \t};\n\n \t// getDefaultExport function for compatibility with non-harmony modules\n \t__webpack_require__.n = function(module) {\n \t\tvar getter = module && module.__esModule ?\n \t\t\tfunction getDefault() { return module['default']; } :\n \t\t\tfunction getModuleExports() { return module; };\n \t\t__webpack_require__.d(getter, 'a', getter);\n \t\treturn getter;\n \t};\n\n \t// Object.prototype.hasOwnProperty.call\n \t__webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); };\n\n \t// __webpack_public_path__\n \t__webpack_require__.p = \"/\";\n\n \t// on error function for async loading\n \t__webpack_require__.oe = function(err) { console.error(err); throw err; };\n\n\n\n// WEBPACK FOOTER //\n// webpack/bootstrap d176f5affa434246605f"],"sourceRoot":""}
|
文件差异因一行或多行过长而隐藏
文件差异因一行或多行过长而隐藏
@ -4,4 +4,4 @@ cp ./config/config_prod.json ./config/config.json
|
|||||||
|
|
||||||
npm install
|
npm install
|
||||||
npm run build
|
npm run build
|
||||||
http-server /src/dist
|
# http-server /src/dist
|
||||||
|
2611
src/frontend_vue/package-lock.json
自动生成的
2611
src/frontend_vue/package-lock.json
自动生成的
文件差异内容过多而无法显示
加载差异
@ -10,50 +10,52 @@
|
|||||||
"build": "node build/build.js"
|
"build": "node build/build.js"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"ajv": "^6.10.0",
|
||||||
"axios": "^0.18.0",
|
"axios": "^0.18.0",
|
||||||
"bootstrap-vue": "^2.0.0-rc.11",
|
"bootstrap-vue": "^2.0.0-rc.19",
|
||||||
|
"jquery": "^3.4.0",
|
||||||
"sha256": "^0.2.0",
|
"sha256": "^0.2.0",
|
||||||
"vue": "^2.5.2",
|
"vue": "^2.6.10",
|
||||||
"vue-cookies": "^1.5.6",
|
"vue-cookies": "^1.5.13",
|
||||||
"vue-router": "^3.0.1",
|
"vue-router": "^3.0.6",
|
||||||
"vue-spinner": "^1.0.3"
|
"vue-spinner": "^1.0.3"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"autoprefixer": "^7.1.2",
|
"autoprefixer": "^7.1.2",
|
||||||
"babel-core": "^6.22.1",
|
"babel-core": "^6.22.1",
|
||||||
"babel-helper-vue-jsx-merge-props": "^2.0.3",
|
"babel-helper-vue-jsx-merge-props": "^2.0.3",
|
||||||
"babel-loader": "^7.1.1",
|
"babel-loader": "^7.1.5",
|
||||||
"babel-plugin-syntax-jsx": "^6.18.0",
|
"babel-plugin-syntax-jsx": "^6.18.0",
|
||||||
"babel-plugin-transform-runtime": "^6.22.0",
|
"babel-plugin-transform-runtime": "^6.22.0",
|
||||||
"babel-plugin-transform-vue-jsx": "^3.5.0",
|
"babel-plugin-transform-vue-jsx": "^3.5.0",
|
||||||
"babel-preset-env": "^1.3.2",
|
"babel-preset-env": "^1.3.2",
|
||||||
"babel-preset-stage-2": "^6.22.0",
|
"babel-preset-stage-2": "^6.22.0",
|
||||||
"chalk": "^2.0.1",
|
"chalk": "^2.4.2",
|
||||||
"copy-webpack-plugin": "^4.0.1",
|
"copy-webpack-plugin": "^4.6.0",
|
||||||
"css-loader": "^0.28.0",
|
"css-loader": "^2.1.1",
|
||||||
"extract-text-webpack-plugin": "^3.0.0",
|
"extract-text-webpack-plugin": "^3.0.0",
|
||||||
"file-loader": "^1.1.4",
|
"file-loader": "^1.1.4",
|
||||||
"friendly-errors-webpack-plugin": "^1.6.1",
|
"friendly-errors-webpack-plugin": "^1.6.1",
|
||||||
"html-webpack-plugin": "^2.30.1",
|
"html-webpack-plugin": "^2.30.1",
|
||||||
"node-notifier": "^5.1.2",
|
"node-notifier": "^5.4.0",
|
||||||
"optimize-css-assets-webpack-plugin": "^3.2.0",
|
"optimize-css-assets-webpack-plugin": "^3.2.0",
|
||||||
"ora": "^1.2.0",
|
"ora": "^1.2.0",
|
||||||
"portfinder": "^1.0.13",
|
"portfinder": "^1.0.20",
|
||||||
"postcss-import": "^11.0.0",
|
"postcss-import": "^11.0.0",
|
||||||
"postcss-loader": "^2.0.8",
|
"postcss-loader": "^2.1.6",
|
||||||
"postcss-url": "^7.2.1",
|
"postcss-url": "^7.2.1",
|
||||||
"rimraf": "^2.6.0",
|
"rimraf": "^2.6.3",
|
||||||
"semver": "^5.3.0",
|
"semver": "^5.7.0",
|
||||||
"shelljs": "^0.7.6",
|
"shelljs": "^0.7.6",
|
||||||
"uglifyjs-webpack-plugin": "^1.1.1",
|
"uglifyjs-webpack-plugin": "^1.3.0",
|
||||||
"url-loader": "^0.5.8",
|
"url-loader": "^1.1.2",
|
||||||
"vue-loader": "^13.3.0",
|
"vue-loader": "^13.7.3",
|
||||||
"vue-style-loader": "^3.0.1",
|
"vue-style-loader": "^3.0.1",
|
||||||
"vue-template-compiler": "^2.5.2",
|
"vue-template-compiler": "^2.6.10",
|
||||||
"webpack": "^3.6.0",
|
"webpack": "^3.6.0",
|
||||||
"webpack-bundle-analyzer": "^2.9.0",
|
"webpack-bundle-analyzer": "^3.3.2",
|
||||||
"webpack-dev-server": "^2.9.1",
|
"webpack-dev-server": "^2.11.5",
|
||||||
"webpack-merge": "^4.1.0"
|
"webpack-merge": "^4.2.1"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">= 6.0.0",
|
"node": ">= 6.0.0",
|
||||||
|
@ -7,3 +7,25 @@ export default {
|
|||||||
name: 'App',
|
name: 'App',
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
body {
|
||||||
|
font-family: cambria;
|
||||||
|
}
|
||||||
|
.ulred {
|
||||||
|
color: #b71511;
|
||||||
|
color: rgb(183,21,17);
|
||||||
|
}
|
||||||
|
.lmenu td {
|
||||||
|
color: #9e9e9e;
|
||||||
|
}
|
||||||
|
.redlinks a {
|
||||||
|
color: #9e9e9e;
|
||||||
|
}
|
||||||
|
.redlinks a:hover {
|
||||||
|
color: #b71511;
|
||||||
|
}
|
||||||
|
.text-secondary {
|
||||||
|
color: #9e9e9e !important;
|
||||||
|
}
|
||||||
|
</style>
|
@ -6,7 +6,12 @@
|
|||||||
<div class="col-sm-7">
|
<div class="col-sm-7">
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col-sm-12">
|
<div class="col-sm-12">
|
||||||
|
<span v-if="frameData.sentences.length < frameData.sentence_count">
|
||||||
|
št. povedi: {{ frameData.sentence_count }} (prikazanih {{ frameData.sentences.length }})
|
||||||
|
</span>
|
||||||
|
<span v-else>
|
||||||
št. povedi: {{ frameData.sentences.length }}
|
št. povedi: {{ frameData.sentences.length }}
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -1,20 +1,17 @@
|
|||||||
<template>
|
<template>
|
||||||
<div>
|
<div>
|
||||||
<p
|
|
||||||
v-if="this.$root.store.api_error !== null"
|
|
||||||
class="text-warning"
|
|
||||||
>
|
|
||||||
api_error: {{ this.$root.store.api_error }}
|
|
||||||
</p>
|
|
||||||
<Nav></Nav>
|
<Nav></Nav>
|
||||||
<div class="my-home container-fluid">
|
<div class="my-home container-fluid">
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div id="serach" class="col-sm-2 border-right fill" :key=this.$root.store.indexReloader>
|
<div id="search" class="col-sm-2 border-right fill" :key=this.$root.store.indexReloader>
|
||||||
<LWords
|
<LWords
|
||||||
v-if="this.$root.store.selIndex.val === 'words'"></LWords>
|
v-if="this.$root.store.selIndex.val === 'words'"></LWords>
|
||||||
<LFunctors v-else></LFunctors>
|
<LFunctors v-else></LFunctors>
|
||||||
</div>
|
</div>
|
||||||
<div class="col-sm-10">
|
<div class="col-sm-10">
|
||||||
|
<p class="text-danger" v-if="this.$root.store.api_error != null">
|
||||||
|
{{ this.$root.store.api_error }}
|
||||||
|
</p>
|
||||||
<router-view></router-view>
|
<router-view></router-view>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
<template>
|
<template>
|
||||||
<div>
|
<div class="redlinks">
|
||||||
<table>
|
<table>
|
||||||
<tr v-for="functor in functors">
|
<tr v-for="functor in functors">
|
||||||
<td><a href="#" v-on:click="selectFunctor(functor)">{{ functor[0] }}</a></td>
|
<td><a href="#" v-on:click="selectFunctor(functor)">{{ functor[0] }}</a></td>
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
<template>
|
<template>
|
||||||
<div>
|
<div class="redlinks">
|
||||||
<select v-model="selectedLetter">
|
<select v-model="selectedLetter">
|
||||||
<option v-for="letter in alphabet" :value="letter">
|
<option v-for="letter in alphabet" :value="letter">
|
||||||
{{ letter.toUpperCase() }} ({{ getNumWords(letter) }})
|
{{ letter.toUpperCase() }} ({{ getNumWords(letter) }})
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
<template>
|
<template>
|
||||||
<div>
|
<div class="redlinks">
|
||||||
<div class="col-sm-2">
|
<div class="col-sm-2">
|
||||||
<a href="#" v-on:click="this.$root.routeBack">Nazaj</a>
|
<a href="#" v-on:click="this.$root.routeBack">Nazaj</a>
|
||||||
</div>
|
</div>
|
||||||
|
@ -1,6 +1,11 @@
|
|||||||
<template>
|
<template>
|
||||||
|
|
||||||
|
<!--in case of error-->
|
||||||
|
<div v-if="this.$root.store.api_error != null">
|
||||||
|
</div>
|
||||||
|
|
||||||
<!--load mode-->
|
<!--load mode-->
|
||||||
<div v-if="show_loader">
|
<div v-else-if="state === 'loading'">
|
||||||
<pulse-loader :color="loader_color"></pulse-loader>
|
<pulse-loader :color="loader_color"></pulse-loader>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -76,16 +81,11 @@ export default {
|
|||||||
},
|
},
|
||||||
state: "loading", // editing, normal
|
state: "loading", // editing, normal
|
||||||
request_reload: false,
|
request_reload: false,
|
||||||
loader_color: "#007bff",
|
loader_color: "#b71511",
|
||||||
}},
|
}},
|
||||||
created: function () {
|
created: function () {
|
||||||
this.reload()
|
this.reload()
|
||||||
},
|
},
|
||||||
computed: {
|
|
||||||
show_loader: function () {
|
|
||||||
return this.state === "loading" && this.$root.store.api_error !== null
|
|
||||||
}
|
|
||||||
},
|
|
||||||
watch: {
|
watch: {
|
||||||
hw: function () {
|
hw: function () {
|
||||||
this.reload()
|
this.reload()
|
||||||
@ -118,6 +118,7 @@ export default {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
var component = this
|
var component = this
|
||||||
|
component.state = "loading"
|
||||||
this.$http.get(
|
this.$http.get(
|
||||||
this.$root.store.api_addr +
|
this.$root.store.api_addr +
|
||||||
"/api/functor-frames" +
|
"/api/functor-frames" +
|
||||||
@ -131,6 +132,7 @@ export default {
|
|||||||
})
|
})
|
||||||
.catch(function(error) {
|
.catch(function(error) {
|
||||||
component.$root.store.api_error = error
|
component.$root.store.api_error = error
|
||||||
|
component.state = "error"
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
getFrames: function (hw, reduce_fun=null) {
|
getFrames: function (hw, reduce_fun=null) {
|
||||||
@ -149,6 +151,7 @@ export default {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
var component = this
|
var component = this
|
||||||
|
component.state = "loading"
|
||||||
this.$http.get(
|
this.$http.get(
|
||||||
this.$root.store.api_addr + "/api/frames" +
|
this.$root.store.api_addr + "/api/frames" +
|
||||||
"?hw=" + hw + "&rf=" + reduce_fun +
|
"?hw=" + hw + "&rf=" + reduce_fun +
|
||||||
@ -161,6 +164,7 @@ export default {
|
|||||||
})
|
})
|
||||||
.catch(function(error) {
|
.catch(function(error) {
|
||||||
component.$root.store.api_error = error
|
component.$root.store.api_error = error
|
||||||
|
component.state = "error"
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
buildSentences: function () {
|
buildSentences: function () {
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
<template>
|
<template>
|
||||||
<nav>
|
<nav>
|
||||||
<b-navbar toggleable="md" type="light" variant="light">
|
<b-navbar id="nav-red-bg" toggleable="md" type="light" variant="light">
|
||||||
<b-navbar-toggle target="nav_collapse"></b-navbar-toggle>
|
<b-navbar-toggle target="nav_collapse"></b-navbar-toggle>
|
||||||
<b-navbar-brand>Vezljivostni vzorci slovenskih glagolov</b-navbar-brand>
|
<!--b-navbar-brand>Vezljivostni vzorci slovenskih glagolov</b-navbar-brand-->
|
||||||
|
<b-navbar-brand class=cursorpointer v-on:click="goHome">
|
||||||
|
VEZLJIVOSTNI VZORCI SLOVENSKIH GLAGOLOV
|
||||||
|
</b-navbar-brand>
|
||||||
<b-collapse is-nav id="nav_collapse">
|
<b-collapse is-nav id="nav_collapse">
|
||||||
|
|
||||||
<b-navbar-nav>
|
<b-navbar-nav>
|
||||||
@ -100,7 +103,25 @@ export default {
|
|||||||
this.$router.push({
|
this.$router.push({
|
||||||
name: "Home"
|
name: "Home"
|
||||||
})
|
})
|
||||||
|
},
|
||||||
|
goHome() {
|
||||||
|
this.$router.replace({path: "/home"})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
#nav-red-bg {
|
||||||
|
background-color: rgb(183,21,17,0.9) !important;
|
||||||
|
}
|
||||||
|
nav a {
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
nav a:hover {
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
.cursorpointer {
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
</style>
|
@ -1,5 +1,5 @@
|
|||||||
<template>
|
<template>
|
||||||
<div>
|
<div class=redlinks>
|
||||||
<div class="col-sm-2">
|
<div class="col-sm-2">
|
||||||
<a href="#" v-on:click="this.$root.routeBack">Nazaj</a>
|
<a href="#" v-on:click="this.$root.routeBack">Nazaj</a>
|
||||||
</div>
|
</div>
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
<template>
|
<template>
|
||||||
<div>
|
<div class="redlinks">
|
||||||
<div class="col-sm-2">
|
<div class="col-sm-2">
|
||||||
<a href="#" v-on:click="this.$root.routeBack">Nazaj</a>
|
<a href="#" v-on:click="this.$root.routeBack">Nazaj</a>
|
||||||
</div>
|
</div>
|
||||||
@ -9,19 +9,19 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
<input
|
<input
|
||||||
type="text"
|
type="email"
|
||||||
class="form-control js-login__username"
|
class="form-control"
|
||||||
placeholder="Uporabnik"
|
placeholder="e-pošta"
|
||||||
v-model="credentials.username"
|
v-model="credentials.email"
|
||||||
autocomplete="off"
|
autocomplete="off"
|
||||||
>
|
>
|
||||||
</div>
|
</div>
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
<input
|
<input
|
||||||
type="email"
|
type="text"
|
||||||
class="form-control"
|
class="form-control js-login__username"
|
||||||
placeholder="e-pošta"
|
placeholder="Uporabnik"
|
||||||
v-model="credentials.email"
|
v-model="credentials.username"
|
||||||
autocomplete="off"
|
autocomplete="off"
|
||||||
>
|
>
|
||||||
</div>
|
</div>
|
||||||
@ -38,7 +38,7 @@
|
|||||||
<input
|
<input
|
||||||
type="password"
|
type="password"
|
||||||
class="form-control js-login__password "
|
class="form-control js-login__password "
|
||||||
placeholder="Ponovite geslo."
|
placeholder="Ponovite geslo"
|
||||||
v-model="credentials.snd_password"
|
v-model="credentials.snd_password"
|
||||||
autocomplete="off"
|
autocomplete="off"
|
||||||
>
|
>
|
||||||
|
9
src/pkg/seqparser/Makefile
普通文件
9
src/pkg/seqparser/Makefile
普通文件
@ -0,0 +1,9 @@
|
|||||||
|
SSKJ_HTML = /home/kristjan/git/diploma/data/sskj/sskj2_v1.html
|
||||||
|
SSKJ_JSON = "./sskj_senses.json"
|
||||||
|
WORDLIST = "./wordlist.json"
|
||||||
|
|
||||||
|
gen_json_files:
|
||||||
|
cd seqparser; python3 main.py \
|
||||||
|
--sskj-html=$(SSKJ_HTML) \
|
||||||
|
--sskj-json=$(SSKJ_JSON) \
|
||||||
|
--wordlist=$(WORDLIST)
|
@ -0,0 +1 @@
|
|||||||
|
bs4
|
@ -0,0 +1,313 @@
|
|||||||
|
from bs4 import BeautifulSoup as BS
|
||||||
|
import re
|
||||||
|
from collections import defaultdict
|
||||||
|
from time import time
|
||||||
|
import pickle
|
||||||
|
import json
|
||||||
|
from copy import deepcopy as DC
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Match sese ordinals (1., 2., ...)
|
||||||
|
rord = re.compile(r"^ *[0-9]+\. *$")
|
||||||
|
|
||||||
|
# Get rid of accented characters.
|
||||||
|
intab = "ÁÉÍÓÚàáäçèéêìíîñòóôöùúüčŔŕ"
|
||||||
|
outtb = "AEIOUaaaceeeiiinoooouuučRr"
|
||||||
|
transtab = str.maketrans(intab, outtb)
|
||||||
|
|
||||||
|
def d_time(fun):
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
tstart = time()
|
||||||
|
fun(*args, **kwargs)
|
||||||
|
duration = time() - tstart
|
||||||
|
print("Function {} ran for {:.2f} s.".format(
|
||||||
|
fun.__name__, duration))
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
class Seqparser:
|
||||||
|
def __init__(sskj_file):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@d_time
|
||||||
|
def html_to_verb_adj_json(self, infile, outfile):
|
||||||
|
out_dict = defaultdict(list)
|
||||||
|
with Path(infile).open("rb") as fp:
|
||||||
|
for line in fp:
|
||||||
|
data = self.parse_line(line)
|
||||||
|
if data is None: continue
|
||||||
|
out_dict[data["izt_clean"]].append(data)
|
||||||
|
with Path(outfile).open("w") as fp:
|
||||||
|
json.dump(dict(out_dict), fp)
|
||||||
|
|
||||||
|
@d_time
|
||||||
|
def generate_sskj_wordlist(self, in_json_file, out_wordlist):
|
||||||
|
wordlist = None
|
||||||
|
with Path(in_json_file).open("r") as fp:
|
||||||
|
jdata = json.load(fp)
|
||||||
|
wordlist = list(jdata.keys())
|
||||||
|
with Path(out_wordlist).open("w") as fp:
|
||||||
|
json.dump({"wordlist": wordlist}, fp)
|
||||||
|
|
||||||
|
# main functions
|
||||||
|
def html_to_raw_pickle(self, sskj_html_filepath, raw_pickle_filepath):
|
||||||
|
entries = dict(self.parse_file(sskj_html_filepath, self.parse_line))
|
||||||
|
print("entries len: " + str(len(entries)))
|
||||||
|
with open(raw_pickle_filepath, "wb") as f:
|
||||||
|
tmpstr = json.dumps(dict(entries))
|
||||||
|
pickle.dump(tmpstr, f)
|
||||||
|
# debugging
|
||||||
|
|
||||||
|
def raw_pickle_to_parsed_pickle(
|
||||||
|
self, raw_pickle_filepath, parsed_pickle_filepath,
|
||||||
|
se_list_filepath
|
||||||
|
):
|
||||||
|
data = self.load_raw_pickle(raw_pickle_filepath)
|
||||||
|
print("raw_pickle data len: " + str(len(data)))
|
||||||
|
se_list = self.gen_se_list(data)
|
||||||
|
print("se_list len: " + str(len(se_list)))
|
||||||
|
with open(se_list_filepath, "wb") as f:
|
||||||
|
pickle.dump(se_list, f)
|
||||||
|
data1 = self.remove_se(data)
|
||||||
|
data2 = self.reorganize(data1, se_list)
|
||||||
|
print("data2 len: " + str(len(data2.keys())))
|
||||||
|
with open(parsed_pickle_filepath, "wb") as f:
|
||||||
|
pickle.dump(data2, f)
|
||||||
|
|
||||||
|
# helper html reading functions
|
||||||
|
def parse_file(self, path, f_parse_line):
|
||||||
|
tstart = time()
|
||||||
|
entries = defaultdict(list)
|
||||||
|
with open(path, "r") as f:
|
||||||
|
for line in f:
|
||||||
|
data = f_parse_line(line)
|
||||||
|
if data is not None:
|
||||||
|
entries[data["izt_clean"]].append(data)
|
||||||
|
print("parse_file({}) in {:.2f}s".format(path, time() - tstart))
|
||||||
|
return entries
|
||||||
|
|
||||||
|
def parse_line(self, line):
|
||||||
|
def helper_bv_set(g_or_p):
|
||||||
|
if g_or_p not in ["G", "P"]:
|
||||||
|
print("Err g_or_p.")
|
||||||
|
exit(1)
|
||||||
|
if data.get("bv") is not None:
|
||||||
|
if data["bv"] != g_or_p:
|
||||||
|
print(str(line))
|
||||||
|
# exit(1)
|
||||||
|
data["bv"] = g_or_p
|
||||||
|
data = {
|
||||||
|
"izt": "",
|
||||||
|
"izt_clean": "",
|
||||||
|
"senses": defaultdict(list)
|
||||||
|
}
|
||||||
|
soup = BS(line, "html.parser")
|
||||||
|
|
||||||
|
current_sense_id = "0"
|
||||||
|
for span in soup.find_all("span"):
|
||||||
|
|
||||||
|
# sense id
|
||||||
|
if span.string is not None:
|
||||||
|
rmatch = rord.match(span.string)
|
||||||
|
if rmatch is not None:
|
||||||
|
current_sense_id = rmatch.group().strip()
|
||||||
|
|
||||||
|
title = span.attrs.get("title")
|
||||||
|
if title is not None:
|
||||||
|
title = title.lower()
|
||||||
|
|
||||||
|
# only verbs and adjectives
|
||||||
|
if "glagol" in title:
|
||||||
|
helper_bv_set("G")
|
||||||
|
data["bv_full"] = title
|
||||||
|
elif "pridevn" in title:
|
||||||
|
helper_bv_set("P")
|
||||||
|
data["bv_full"] = title
|
||||||
|
|
||||||
|
# žšč
|
||||||
|
if title == "iztočnica":
|
||||||
|
data["izt"] = span.string
|
||||||
|
data["izt_clean"] = span.string.translate(transtab).lower()
|
||||||
|
|
||||||
|
# sense description
|
||||||
|
if title == "razlaga" and span.string is not None:
|
||||||
|
data["senses"][current_sense_id].append(
|
||||||
|
("razl", span.string))
|
||||||
|
if "pridevnik od" in span.string:
|
||||||
|
helper_bv_set("P")
|
||||||
|
|
||||||
|
if title == "sopomenka":
|
||||||
|
subspan = span.find_all("a")[0]
|
||||||
|
if subspan.string is not None:
|
||||||
|
data["senses"][current_sense_id].append(
|
||||||
|
("sopo", subspan.string))
|
||||||
|
|
||||||
|
# save verbs and adjectives
|
||||||
|
if (
|
||||||
|
("bv" not in data) or
|
||||||
|
(data["bv"] != "P" and data["bv"] != "G")
|
||||||
|
):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# sanity check
|
||||||
|
if data["bv"] == "P" and " se" in data["izt_clean"]:
|
||||||
|
print(data)
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# append _ to adjective keywords
|
||||||
|
if data["bv"] == "P":
|
||||||
|
data["izt_clean"] = data["izt_clean"] + "_"
|
||||||
|
|
||||||
|
# cleanup
|
||||||
|
if "bv" not in data:
|
||||||
|
print("Should not be here (no bv).")
|
||||||
|
exit(1)
|
||||||
|
del(data["bv"])
|
||||||
|
if "bv_full" in data:
|
||||||
|
del(data["bv_full"])
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
# helper functions
|
||||||
|
def load_raw_pickle(self, raw_pickle_filepath):
|
||||||
|
with open(raw_pickle_filepath, "rb") as f:
|
||||||
|
tmpstr = pickle.load(f)
|
||||||
|
return json.loads(tmpstr)
|
||||||
|
|
||||||
|
def helper_loop(self, data, fnc):
|
||||||
|
for k, lst in data.items():
|
||||||
|
for el in lst:
|
||||||
|
fnc(el)
|
||||||
|
|
||||||
|
def gen_se_list(self, data):
|
||||||
|
|
||||||
|
def fnc1(el):
|
||||||
|
ic = el["izt_clean"]
|
||||||
|
if " se" in ic:
|
||||||
|
se_list.append(ic)
|
||||||
|
|
||||||
|
def fnc2(el):
|
||||||
|
ic = el["izt_clean"]
|
||||||
|
if ic in se_pruned:
|
||||||
|
se_pruned.remove(ic)
|
||||||
|
|
||||||
|
# hw entries that only exist with " se"
|
||||||
|
se_list = []
|
||||||
|
self.helper_loop(data, fnc1)
|
||||||
|
se_pruned = set([hw.split(" se")[0] for hw in se_list])
|
||||||
|
self.helper_loop(data, fnc2)
|
||||||
|
return sorted(list(se_pruned))
|
||||||
|
|
||||||
|
def remove_se(self, data):
|
||||||
|
|
||||||
|
def fnc1(el):
|
||||||
|
nel = DC(el)
|
||||||
|
ic = nel["izt_clean"]
|
||||||
|
if " se" in ic:
|
||||||
|
nic = ic.split(" se")[0]
|
||||||
|
nel["izt_clean"] = nic
|
||||||
|
data_new[nel["izt_clean"]].append(nel)
|
||||||
|
|
||||||
|
data_new = defaultdict(list)
|
||||||
|
self.helper_loop(data, fnc1)
|
||||||
|
return dict(data_new)
|
||||||
|
|
||||||
|
def reorganize(self, data, se_list):
|
||||||
|
# some hw entries have several headwords,
|
||||||
|
# some senses have subsenses
|
||||||
|
# index everything, make 1 object per hw
|
||||||
|
|
||||||
|
def helper_prune(sense_str):
|
||||||
|
# remove space padding
|
||||||
|
sense_str = sense_str.strip()
|
||||||
|
|
||||||
|
if len(sense_str) == 1:
|
||||||
|
return sense_str
|
||||||
|
|
||||||
|
# remove banned characters from string ending
|
||||||
|
banned = ": ; . , - ! ?".split(" ")
|
||||||
|
if sense_str[-1] in banned:
|
||||||
|
return sense_str[:-1]
|
||||||
|
|
||||||
|
return sense_str
|
||||||
|
|
||||||
|
data_new = {}
|
||||||
|
for k, lst in data.items():
|
||||||
|
new_el = {
|
||||||
|
"hw": k,
|
||||||
|
"has_se": k in se_list,
|
||||||
|
"senses": []
|
||||||
|
}
|
||||||
|
|
||||||
|
# if there is a single hw entry, hw_id is 0
|
||||||
|
if len(lst) == 1:
|
||||||
|
homonym_id = -1
|
||||||
|
else:
|
||||||
|
homonym_id = 0
|
||||||
|
|
||||||
|
# loop homonyms
|
||||||
|
for el in lst:
|
||||||
|
homonym_id += 1
|
||||||
|
# loop top lvl sense ids
|
||||||
|
for sense_id, sens_lst in el["senses"].items():
|
||||||
|
# loop subsenses
|
||||||
|
for i, sens in enumerate(sens_lst):
|
||||||
|
nsid = sense_id.split(".")[0]
|
||||||
|
if len(sens_lst) == 1:
|
||||||
|
nsid += "-0"
|
||||||
|
else:
|
||||||
|
nsid += ("-" + str(i + 1))
|
||||||
|
new_sense = {
|
||||||
|
"homonym_id": homonym_id,
|
||||||
|
# sense_id: sense_id-subsense_id
|
||||||
|
"sense_id": nsid,
|
||||||
|
"sense_type": sens[0],
|
||||||
|
"sense_desc": helper_prune(sens[1]),
|
||||||
|
}
|
||||||
|
new_el["senses"].append(new_sense)
|
||||||
|
hw = new_el["hw"]
|
||||||
|
if hw in data_new:
|
||||||
|
print("Shouldn't be here.")
|
||||||
|
print(new_el)
|
||||||
|
exit(1)
|
||||||
|
data_new[hw] = DC(new_el)
|
||||||
|
# return data_new
|
||||||
|
|
||||||
|
# check
|
||||||
|
for hw, el in data_new.items():
|
||||||
|
for sens in el["senses"]:
|
||||||
|
if sens["sense_desc"] is None:
|
||||||
|
print(sens)
|
||||||
|
|
||||||
|
return data_new
|
||||||
|
|
||||||
|
|
||||||
|
def plst(lst):
|
||||||
|
for el in lst:
|
||||||
|
print(el)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
datapath = "../../../data"
|
||||||
|
html_filepath = datapath + "/sskj/sskj2_v1.html"
|
||||||
|
raw_pickle_filepath = datapath + "/tmp_pickles/raw_sskj.pickle"
|
||||||
|
parsed_pickle_filepath = datapath + "/no_del_pickles/sskj_senses.pickle"
|
||||||
|
se_list_filepath = datapath + "/no_del_pickles/se_list.pickle"
|
||||||
|
|
||||||
|
p = Seqparser()
|
||||||
|
|
||||||
|
if True:
|
||||||
|
print("html_to_raw_pickle({}, {})".format(
|
||||||
|
html_filepath, raw_pickle_filepath))
|
||||||
|
print("Big file, this might take a while (2 min).")
|
||||||
|
tstart = time()
|
||||||
|
p.html_to_raw_pickle(html_filepath, raw_pickle_filepath)
|
||||||
|
print("Finished in {:.2f}.".format(time() - tstart))
|
||||||
|
|
||||||
|
if False:
|
||||||
|
print("raw_pickle_to_parsed_pickle({}, {}, {})".format(
|
||||||
|
raw_pickle_filepath, parsed_pickle_filepath, se_list_filepath))
|
||||||
|
tstart = time()
|
||||||
|
p.raw_pickle_to_parsed_pickle(
|
||||||
|
raw_pickle_filepath, parsed_pickle_filepath, se_list_filepath)
|
||||||
|
print("Finished in {:.2f}.".format(time() - tstart))
|
||||||
|
print("Done.")
|
@ -0,0 +1,68 @@
|
|||||||
|
from Seqparser import Seqparser
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
import json
|
||||||
|
import datetime
|
||||||
|
import hashlib
|
||||||
|
from pymongo import MongoClient
|
||||||
|
|
||||||
|
SSKJ_USER = "sskj2"
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
aparser = argparse.ArgumentParser()
|
||||||
|
aparser.add_argument("--sskj-html", type=str)
|
||||||
|
aparser.add_argument("--sskj-json", type=str)
|
||||||
|
aparser.add_argument("--wordlist", type=str)
|
||||||
|
aparser.add_argument("--operation", type=str)
|
||||||
|
aparser.add_argument("--dbaddr", type=str)
|
||||||
|
aparser.add_argument("--dbuser", type=str)
|
||||||
|
aparser.add_argument("--dbpass", type=str)
|
||||||
|
args = aparser.parse_args()
|
||||||
|
|
||||||
|
if args.operation == "gen_sskj_json":
|
||||||
|
sqp = Seqparser()
|
||||||
|
sqp.html_to_verb_adj_json(args.sskj_html, args.sskj_json)
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
if args.operation == "gen_wordlist":
|
||||||
|
sqp = Seqparser()
|
||||||
|
sqp.generate_sskj_wordlist(args.sskj_senses, args.wordlist)
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
if args.operation == "senses_to_db":
|
||||||
|
db_entries = []
|
||||||
|
tmp_dt = datetime.datetime.utcnow()
|
||||||
|
with Path(args.sskj_json).open("r") as fp:
|
||||||
|
jdata = json.load(fp)
|
||||||
|
# print(jdata[list(jdata.keys())[201]])
|
||||||
|
for hw, entry in jdata.items():
|
||||||
|
for key, sense in entry[0]["senses"].items():
|
||||||
|
desc = sense[0][1]
|
||||||
|
if sense[0][0] == "razl":
|
||||||
|
desc = desc[:-1] # for some reason, descriptions contain a ':'
|
||||||
|
else:
|
||||||
|
desc = sense[0][0] + ": " + desc
|
||||||
|
tmp_entry = {
|
||||||
|
"desc": desc,
|
||||||
|
"hw": hw,
|
||||||
|
"author": SSKJ_USER
|
||||||
|
}
|
||||||
|
tmp_entry["sense_id"] = "{}-{}".format(
|
||||||
|
SSKJ_USER,
|
||||||
|
hashlib.sha256(str(tmp_entry).encode("utf-8")).hexdigest()[:10]
|
||||||
|
)
|
||||||
|
tmp_entry["date"] = tmp_dt
|
||||||
|
db_entries.append(tmp_entry)
|
||||||
|
print(len(db_entries))
|
||||||
|
|
||||||
|
# db login
|
||||||
|
client = MongoClient(
|
||||||
|
"mongodb://{}".format(args.dbaddr),
|
||||||
|
username=args.dbuser,
|
||||||
|
password=args.dbpass,
|
||||||
|
authSource="valdb",
|
||||||
|
authMechanism='SCRAM-SHA-1'
|
||||||
|
)
|
||||||
|
valdb = client.valdb
|
||||||
|
valdb.senses.insert_many(db_entries)
|
11
src/pkg/seqparser/setup.py
普通文件
11
src/pkg/seqparser/setup.py
普通文件
@ -0,0 +1,11 @@
|
|||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='seqparser',
|
||||||
|
version='0.0.1',
|
||||||
|
description='Parser for sskj2 html dump.',
|
||||||
|
author='Kristjan Voje',
|
||||||
|
author_email='kristjan.voje@gmail.com',
|
||||||
|
license='MIT',
|
||||||
|
packages=['seqparser'],
|
||||||
|
)
|
@ -37,7 +37,8 @@ def frames_from_db_entry(dbent):
|
|||||||
return frames
|
return frames
|
||||||
|
|
||||||
class Frame():
|
class Frame():
|
||||||
def __init__(self, tids, deep_links=None, slots=None, hw_lemma=None, sentences=None):
|
def __init__(self, tids, deep_links=None, slots=None,
|
||||||
|
hw_lemma=None, sentences=None, sentence_count=None):
|
||||||
self.hw = hw_lemma
|
self.hw = hw_lemma
|
||||||
self.tids = tids # list of tokens with the same hw_lemma
|
self.tids = tids # list of tokens with the same hw_lemma
|
||||||
# Each tid = "S123.t123";
|
# Each tid = "S123.t123";
|
||||||
@ -50,6 +51,8 @@ class Frame():
|
|||||||
self.sense_info = {}
|
self.sense_info = {}
|
||||||
self.sentences = sentences
|
self.sentences = sentences
|
||||||
self.aggr_sent = None # Dictionary { hw: self.sentences idx }
|
self.aggr_sent = None # Dictionary { hw: self.sentences idx }
|
||||||
|
self.sentence_count = sentence_count # paging, optimization
|
||||||
|
|
||||||
|
|
||||||
def get_functors(self):
|
def get_functors(self):
|
||||||
return [slot.functor for slot in self.slots]
|
return [slot.functor for slot in self.slots]
|
||||||
@ -62,7 +65,8 @@ class Frame():
|
|||||||
"slots": [slot.to_json() for slot in self.slots],
|
"slots": [slot.to_json() for slot in self.slots],
|
||||||
"sentences": self.sentences,
|
"sentences": self.sentences,
|
||||||
"aggr_sent": self.aggr_sent,
|
"aggr_sent": self.aggr_sent,
|
||||||
"sense_info": self.sense_info
|
"sense_info": self.sense_info,
|
||||||
|
"sentence_count": self.sentence_count
|
||||||
}
|
}
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
@ -1,96 +0,0 @@
|
|||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class Frame():
|
|
||||||
def __init__(self, tids, deep_links=None, slots=None, hw=None):
|
|
||||||
self.hw = hw
|
|
||||||
self.tids = tids # list of tokens with the same hw_lemma
|
|
||||||
# Each tid = "S123.t123";
|
|
||||||
# you can get sentence with vallex.get_sentence(S123)
|
|
||||||
self.slots = []
|
|
||||||
if slots is None:
|
|
||||||
self.slots = self.init_slots(deep_links)
|
|
||||||
else:
|
|
||||||
self.slots = slots
|
|
||||||
self.sense_info = {}
|
|
||||||
self.sentences = None # Used for passing to view in app.py, get_frames
|
|
||||||
self.aggr_sent = None # Dictionary { hw: self.sentences idx }
|
|
||||||
|
|
||||||
def to_json(self):
|
|
||||||
ret = {
|
|
||||||
"hw": self.hw,
|
|
||||||
"tids": self.tids,
|
|
||||||
"slots": [slot.to_json() for slot in self.slots],
|
|
||||||
"sentences": self.sentences,
|
|
||||||
"aggr_sent": self.aggr_sent,
|
|
||||||
"sense_info": self.sense_info
|
|
||||||
}
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def init_slots(self, deep):
|
|
||||||
slots = []
|
|
||||||
for link in deep:
|
|
||||||
slots.append(Slot(
|
|
||||||
functor=link["functor"],
|
|
||||||
tids=[link["to"]]
|
|
||||||
))
|
|
||||||
return slots
|
|
||||||
|
|
||||||
def sort_slots(self):
|
|
||||||
# ACT, PAT, alphabetically
|
|
||||||
srt1 = [
|
|
||||||
x for x in self.slots
|
|
||||||
if (x.functor == "ACT" or
|
|
||||||
x.functor == "PAT")
|
|
||||||
]
|
|
||||||
srt1 = sorted(srt1, key=lambda x: x.functor)
|
|
||||||
srt2 = [
|
|
||||||
x for x in self.slots
|
|
||||||
if (x.functor != "ACT" and
|
|
||||||
x.functor != "PAT")
|
|
||||||
]
|
|
||||||
srt2 = sorted(srt2, key=lambda x: x.functor)
|
|
||||||
self.slots = (srt1 + srt2)
|
|
||||||
|
|
||||||
def to_string(self):
|
|
||||||
ret = "Frame:\n"
|
|
||||||
ret += "sense_info: {}\n".format(str(self.sense_info))
|
|
||||||
ret += "tids: ["
|
|
||||||
for t in self.tids:
|
|
||||||
ret += (str(t) + ", ")
|
|
||||||
ret += "]\n"
|
|
||||||
if self.slots is not None:
|
|
||||||
ret += "slots:\n"
|
|
||||||
for sl in self.slots:
|
|
||||||
ret += (sl.to_string() + "\n")
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class Slot():
|
|
||||||
# Each slot is identified by its functor (ACT, PAT, ...)
|
|
||||||
# It consists of different tokens.
|
|
||||||
def __init__(self, functor, tids=None, count=None):
|
|
||||||
self.functor = functor
|
|
||||||
self.tids = tids or [] # combining multiple sentences vertically
|
|
||||||
self.count = count or 1
|
|
||||||
|
|
||||||
def to_string(self):
|
|
||||||
ret = "---- Slot:\n"
|
|
||||||
ret += "functor: {}\n".format(self.functor)
|
|
||||||
ret += "tids: ["
|
|
||||||
for t in self.tids:
|
|
||||||
ret += (str(t) + ", ")
|
|
||||||
ret += "]\n"
|
|
||||||
ret += "]\n"
|
|
||||||
ret += "----\n"
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def to_json(self):
|
|
||||||
ret = {
|
|
||||||
"functor": self.functor,
|
|
||||||
"tids": self.tids,
|
|
||||||
"count": self.count
|
|
||||||
}
|
|
||||||
return ret
|
|
@ -9,6 +9,7 @@ import logging
|
|||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
SENSE_UNDEFINED = "nedefinirano"
|
SENSE_UNDEFINED = "nedefinirano"
|
||||||
|
SENTENCE_LIMIT = 10
|
||||||
|
|
||||||
## TIDI: use frame.py
|
## TIDI: use frame.py
|
||||||
## TODO: build a list of [Frame] with lists of [Slot]
|
## TODO: build a list of [Frame] with lists of [Slot]
|
||||||
@ -70,7 +71,10 @@ def reduce_1(frames, valdb_sensemap=None):
|
|||||||
for functor in fs[0]:
|
for functor in fs[0]:
|
||||||
slots[functor] = Slot(functor=functor)
|
slots[functor] = Slot(functor=functor)
|
||||||
# Reduce slots from all frames. (Merge ACT from all frames, ...)
|
# Reduce slots from all frames. (Merge ACT from all frames, ...)
|
||||||
|
sentence_count = len(fs[1])
|
||||||
for frame in fs[1]:
|
for frame in fs[1]:
|
||||||
|
if len(tids) >= SENTENCE_LIMIT:
|
||||||
|
break
|
||||||
tids += frame.tids
|
tids += frame.tids
|
||||||
sentences += frame.sentences
|
sentences += frame.sentences
|
||||||
for sl in frame.slots:
|
for sl in frame.slots:
|
||||||
@ -78,8 +82,13 @@ def reduce_1(frames, valdb_sensemap=None):
|
|||||||
slots_list = []
|
slots_list = []
|
||||||
for k, e in slots.items():
|
for k, e in slots.items():
|
||||||
slots_list.append(e)
|
slots_list.append(e)
|
||||||
# TODO does appending hw_lemma of first frame work for functor frames too?
|
rf = Frame(
|
||||||
rf = Frame(hw_lemma=fs[1][0].hw, tids=tids, slots=slots_list, sentences=sentences)
|
hw_lemma=fs[1][0].hw,
|
||||||
|
tids=tids,
|
||||||
|
slots=slots_list,
|
||||||
|
sentences=sentences,
|
||||||
|
sentence_count=sentence_count
|
||||||
|
)
|
||||||
rf.sort_slots()
|
rf.sort_slots()
|
||||||
ret_frames.append(rf)
|
ret_frames.append(rf)
|
||||||
return sorted_by_len_tids(ret_frames)
|
return sorted_by_len_tids(ret_frames)
|
||||||
@ -182,7 +191,11 @@ def frames_from_sense_ids(raw_frames, id_map):
|
|||||||
tids = []
|
tids = []
|
||||||
reduced_slots = []
|
reduced_slots = []
|
||||||
sentences = []
|
sentences = []
|
||||||
|
|
||||||
|
sentence_count = len(frames)
|
||||||
for frame in frames:
|
for frame in frames:
|
||||||
|
if len(tids) >= SENTENCE_LIMIT:
|
||||||
|
break
|
||||||
tids += frame.tids
|
tids += frame.tids
|
||||||
sentences += frame.sentences
|
sentences += frame.sentences
|
||||||
for slot in frame.slots:
|
for slot in frame.slots:
|
||||||
@ -204,7 +217,8 @@ def frames_from_sense_ids(raw_frames, id_map):
|
|||||||
hw_lemma="derp",
|
hw_lemma="derp",
|
||||||
tids=tids,
|
tids=tids,
|
||||||
slots=reduced_slots,
|
slots=reduced_slots,
|
||||||
sentences=sentences
|
sentences=sentences,
|
||||||
|
sentence_count=sentence_count,
|
||||||
)
|
)
|
||||||
id_map_entry = (
|
id_map_entry = (
|
||||||
id_map.get(tids[0]) or
|
id_map.get(tids[0]) or
|
||||||
|
正在加载...
在新工单中引用
屏蔽一个用户