Added gigafida to file creation

This commit is contained in:
Luka 2019-09-06 10:27:03 +02:00
parent b4db4e5255
commit c803057164
6 changed files with 1256205 additions and 6 deletions

2
.gitignore vendored
View File

@ -10,3 +10,5 @@ dockerfiles/database/create.js
*__pycache__/
env.local
logs/*
.idea/
venv/

View File

@ -24,9 +24,9 @@ SSKJ_JSON = "$(MAKE_ROOT)/data/sskj_senses.json"
# for pre-generation the index of all headwords and functors
APPINDEX_PATH = "$(MAKE_ROOT)/data/appindex.json"
OUTPUT = "db"
# OUTPUT = "file"
OUTDIR = "/tmp/three" # if you're running this in docker, make sure to mount the volume
# OUTPUT = "db"
OUTPUT = "file"
OUTDIR = "/project/data" # if you're running this in docker, make sure to mount the volume
DBADDR = "0.0.0.0:27017" # don't use localhost
# credentials from .gitignored file

View File

@ -1 +0,0 @@
/home/kristjan/git/diploma/data/ssj500k-sl.TEI/ssj500k-sl.body.xml

1256152
data/ssj_file_link Executable file

File diff suppressed because it is too large Load Diff

36
requirements.txt Normal file
View File

@ -0,0 +1,36 @@
asn1crypto==0.24.0
Click==7.0
# corpusparser==0.0.1
cryptography==2.1.4
Flask==1.1.1
Flask-Cors==3.0.8
Flask-PyMongo==2.3.0
gunicorn==19.9.0
idna==2.6
itsdangerous==1.1.0
Jinja2==2.10.1
joblib==0.13.2
keyring==10.6.0
keyrings.alt==3.0
lxml==4.4.0
MarkupSafe==1.1.1
numpy==1.17.0
pandas==0.25.0
pathlib==1.0.1
pycrypto==2.6.1
# pygobject
pymongo==3.8.0
python-dateutil==2.8.0
pytz==2019.2
pyxdg==0.25
PyYAML==5.1.2
scikit-learn==0.21.3
scipy==1.3.0
SecretStorage==2.3.1
# Editable install with no version control (seqparser==0.0.1)
six==1.11.0
sklearn==0.0
# Editable install with no version control (valency==0.1.1)
Werkzeug==0.15.5

View File

@ -37,7 +37,8 @@ app = Flask(__name__)
app.config.from_object("db_config")
mongo = PyMongo(app)
app.config["CORPORA"] = ["ssj", "kres"]
# app.config["CORPORA"] = ["ssj", "kres"]
app.config["CORPORA"] = ["ssj"]
app.config["BANNED_HEADWORDS"] = ["biti"]
app.config["QUERY_LIMIT"] = 1000
@ -452,9 +453,18 @@ def prepare_app_index(appindex_json, sskj_wordlist):
res_hws = {}
res_fns = {}
print('CORPUS...!!...')
print(corpus)
a = mongo.db[corpus]
print('TEST_OK')
print(a)
print(mongo.db)
a = mongo.db.list_collection_names()
print('TEST_OK2')
nentries = mongo.db[corpus].count()
idx = 0
for e in mongo.db[corpus].find({}):
print('aaa')
if "headwords" not in e:
continue
for hw in e["headwords"]:

@ -1 +1 @@
Subproject commit 01adf47b9b63b43f86bff52429792b0de2327ddd
Subproject commit 2e1d8d06b33eb4e64d3558fea2161811e81f6a28