forked from kristjan/cjvt-valency
Added gigafida to file creation
This commit is contained in:
parent
b4db4e5255
commit
c803057164
2
.gitignore
vendored
2
.gitignore
vendored
@ -10,3 +10,5 @@ dockerfiles/database/create.js
|
|||||||
*__pycache__/
|
*__pycache__/
|
||||||
env.local
|
env.local
|
||||||
logs/*
|
logs/*
|
||||||
|
.idea/
|
||||||
|
venv/
|
||||||
|
6
Makefile
6
Makefile
@ -24,9 +24,9 @@ SSKJ_JSON = "$(MAKE_ROOT)/data/sskj_senses.json"
|
|||||||
# for pre-generation the index of all headwords and functors
|
# for pre-generation the index of all headwords and functors
|
||||||
APPINDEX_PATH = "$(MAKE_ROOT)/data/appindex.json"
|
APPINDEX_PATH = "$(MAKE_ROOT)/data/appindex.json"
|
||||||
|
|
||||||
OUTPUT = "db"
|
# OUTPUT = "db"
|
||||||
# OUTPUT = "file"
|
OUTPUT = "file"
|
||||||
OUTDIR = "/tmp/three" # if you're running this in docker, make sure to mount the volume
|
OUTDIR = "/project/data" # if you're running this in docker, make sure to mount the volume
|
||||||
DBADDR = "0.0.0.0:27017" # don't use localhost
|
DBADDR = "0.0.0.0:27017" # don't use localhost
|
||||||
|
|
||||||
# credentials from .gitignored file
|
# credentials from .gitignored file
|
||||||
|
@ -1 +0,0 @@
|
|||||||
/home/kristjan/git/diploma/data/ssj500k-sl.TEI/ssj500k-sl.body.xml
|
|
1256152
data/ssj_file_link
Executable file
1256152
data/ssj_file_link
Executable file
File diff suppressed because it is too large
Load Diff
36
requirements.txt
Normal file
36
requirements.txt
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
asn1crypto==0.24.0
|
||||||
|
Click==7.0
|
||||||
|
# corpusparser==0.0.1
|
||||||
|
cryptography==2.1.4
|
||||||
|
Flask==1.1.1
|
||||||
|
Flask-Cors==3.0.8
|
||||||
|
Flask-PyMongo==2.3.0
|
||||||
|
gunicorn==19.9.0
|
||||||
|
idna==2.6
|
||||||
|
itsdangerous==1.1.0
|
||||||
|
Jinja2==2.10.1
|
||||||
|
joblib==0.13.2
|
||||||
|
keyring==10.6.0
|
||||||
|
keyrings.alt==3.0
|
||||||
|
lxml==4.4.0
|
||||||
|
MarkupSafe==1.1.1
|
||||||
|
numpy==1.17.0
|
||||||
|
pandas==0.25.0
|
||||||
|
pathlib==1.0.1
|
||||||
|
pycrypto==2.6.1
|
||||||
|
# pygobject
|
||||||
|
pymongo==3.8.0
|
||||||
|
python-dateutil==2.8.0
|
||||||
|
pytz==2019.2
|
||||||
|
pyxdg==0.25
|
||||||
|
PyYAML==5.1.2
|
||||||
|
scikit-learn==0.21.3
|
||||||
|
scipy==1.3.0
|
||||||
|
SecretStorage==2.3.1
|
||||||
|
# Editable install with no version control (seqparser==0.0.1)
|
||||||
|
six==1.11.0
|
||||||
|
sklearn==0.0
|
||||||
|
# Editable install with no version control (valency==0.1.1)
|
||||||
|
|
||||||
|
Werkzeug==0.15.5
|
||||||
|
|
@ -37,7 +37,8 @@ app = Flask(__name__)
|
|||||||
app.config.from_object("db_config")
|
app.config.from_object("db_config")
|
||||||
mongo = PyMongo(app)
|
mongo = PyMongo(app)
|
||||||
|
|
||||||
app.config["CORPORA"] = ["ssj", "kres"]
|
# app.config["CORPORA"] = ["ssj", "kres"]
|
||||||
|
app.config["CORPORA"] = ["ssj"]
|
||||||
app.config["BANNED_HEADWORDS"] = ["biti"]
|
app.config["BANNED_HEADWORDS"] = ["biti"]
|
||||||
app.config["QUERY_LIMIT"] = 1000
|
app.config["QUERY_LIMIT"] = 1000
|
||||||
|
|
||||||
@ -452,9 +453,18 @@ def prepare_app_index(appindex_json, sskj_wordlist):
|
|||||||
res_hws = {}
|
res_hws = {}
|
||||||
res_fns = {}
|
res_fns = {}
|
||||||
|
|
||||||
|
print('CORPUS...!!...')
|
||||||
|
print(corpus)
|
||||||
|
a = mongo.db[corpus]
|
||||||
|
print('TEST_OK')
|
||||||
|
print(a)
|
||||||
|
print(mongo.db)
|
||||||
|
a = mongo.db.list_collection_names()
|
||||||
|
print('TEST_OK2')
|
||||||
nentries = mongo.db[corpus].count()
|
nentries = mongo.db[corpus].count()
|
||||||
idx = 0
|
idx = 0
|
||||||
for e in mongo.db[corpus].find({}):
|
for e in mongo.db[corpus].find({}):
|
||||||
|
print('aaa')
|
||||||
if "headwords" not in e:
|
if "headwords" not in e:
|
||||||
continue
|
continue
|
||||||
for hw in e["headwords"]:
|
for hw in e["headwords"]:
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit 01adf47b9b63b43f86bff52429792b0de2327ddd
|
Subproject commit 2e1d8d06b33eb4e64d3558fea2161811e81f6a28
|
Loading…
Reference in New Issue
Block a user