@ -26,21 +26,25 @@ from email.mime.text import MIMEText
from copy import deepcopy as DC
from copy import deepcopy as DC
from pathlib import Path
from pathlib import Path
from pymongo import MongoClient
from pymongo import MongoClient
from flask_pymongo import PyMongo
import pymongo
import pymongo
import argparse
import argparse
# some db collections
USERS_COLL = " users "
TOKENS_COLL = " usertokens "
SENSES_COLL = " senses "
SENSEMAP_COLL = " sensemap "
# pre-generated data (gui leftside word index)
CORPORA = [ " ssj " , " kres " ]
app_index = None
sskj_wordlist = None # used by _is_banned(hw)
BANNED_HEADWORDS = [ " biti " ]
log = logging . getLogger ( __name__ )
log = logging . getLogger ( __name__ )
valdb = None
app = Flask ( __name__ )
app = Flask ( __name__ )
app . config . from_object ( " db_config " )
mongo = PyMongo ( app )
app . config [ " CORPORA " ] = [ " ssj " , " kres " ]
app . config [ " BANNED_HEADWORDS " ] = [ " biti " ]
app . config [ " QUERY_LIMIT " ] = 1000
# when running vuejs via webpack
# when running vuejs via webpack
# CORS(app)
# CORS(app)
@ -55,7 +59,7 @@ CORS(app)
@app.route ( " /api/dev " )
@app.route ( " /api/dev " )
def api_dev ( ) :
def api_dev ( ) :
print ( " DEV " )
print ( " DEV " )
cur = mongo. db. kres . find ( { " headwords " : " nagovarjati " } )
cur = val db. kres . find ( { " headwords " : " nagovarjati " } )
frames = [ ]
frames = [ ]
for ent in cur :
for ent in cur :
frames + = frames_from_db_entry ( ent )
frames + = frames_from_db_entry ( ent )
@ -68,12 +72,12 @@ def api_dev():
@app.route ( " /api/words/<corpus> " )
@app.route ( " /api/words/<corpus> " )
def api_words ( corpus ) :
def api_words ( corpus ) :
return json . dumps ( {
return json . dumps ( {
" sorted_words " : app . config [ " app _index" ] [ corpus ] [ " words " ] , # todo - make corpus as arg
" sorted_words " : app _index[ corpus ] [ " words " ] , # todo - make corpus as arg
} )
} )
@app.route ( " /api/functors/<corpus> " )
@app.route ( " /api/functors/<corpus> " )
def api_functors ( corpus ) :
def api_functors ( corpus ) :
return json . dumps ( app . config [ " app _index" ] [ corpus ] [ " functors " ] )
return json . dumps ( app _index[ corpus ] [ " functors " ] )
# INDEX SELECTION -------------------^
# INDEX SELECTION -------------------^
@ -94,7 +98,7 @@ def api_register():
) :
) :
return " ERR "
return " ERR "
email_hash = hashlib . sha256 ( email . encode ( " utf-8 " ) ) . hexdigest ( )
email_hash = hashlib . sha256 ( email . encode ( " utf-8 " ) ) . hexdigest ( )
existing = list ( mongo. db . users . find ( {
existing = list ( valdb[ USERS_COLL ] . find ( {
" $or " : [ { " username " : username } , { " email " : email_hash } ]
" $or " : [ { " username " : username } , { " email " : email_hash } ]
} ) )
} ) )
if len ( existing ) > 0 :
if len ( existing ) > 0 :
@ -105,7 +109,7 @@ def api_register():
password . encode ( " utf-8 " ) ) . hexdigest ( ) ,
password . encode ( " utf-8 " ) ) . hexdigest ( ) ,
" email " : email_hash
" email " : email_hash
}
}
mongo. db . users . insert ( entry )
valdb[ USERS_COLL ] . insert ( entry )
return " OK "
return " OK "
@ -117,7 +121,7 @@ def api_login():
password = data [ " password " ]
password = data [ " password " ]
hpass = hashlib . sha256 ( password . encode ( " utf-8 " ) ) . hexdigest ( )
hpass = hashlib . sha256 ( password . encode ( " utf-8 " ) ) . hexdigest ( )
db_user = list ( mongo. db . users . find ( {
db_user = list ( valdb[ USERS_COLL ] . find ( {
" username " : username ,
" username " : username ,
" hpass " : hpass
" hpass " : hpass
} ) )
} ) )
@ -131,7 +135,7 @@ def api_login():
" date " : datetime . datetime . utcnow ( ) ,
" date " : datetime . datetime . utcnow ( ) ,
" token " : token
" token " : token
}
}
mongo. db . usertokens . update (
valdb[ TOKENS_COLL ] . update (
{ " username " : token_entry [ " username " ] } ,
{ " username " : token_entry [ " username " ] } ,
token_entry ,
token_entry ,
upsert = True
upsert = True
@ -174,7 +178,7 @@ def api_new_pass():
username = data [ " username " ]
username = data [ " username " ]
email = data [ " email " ]
email = data [ " email " ]
hemail = hashlib . sha256 ( email . encode ( " utf-8 " ) ) . hexdigest ( )
hemail = hashlib . sha256 ( email . encode ( " utf-8 " ) ) . hexdigest ( )
db_res = list ( mongo. db . users . find ( {
db_res = list ( valdb[ USERS_COLL ] . find ( {
" username " : username ,
" username " : username ,
" email " : hemail
" email " : hemail
} ) )
} ) )
@ -186,7 +190,7 @@ def api_new_pass():
string . ascii_letters + string . digits ) for i in range ( 10 ) ] )
string . ascii_letters + string . digits ) for i in range ( 10 ) ] )
# update locally
# update locally
hpass = hashlib . sha256 ( new_pass . encode ( " utf-8 " ) ) . hexdigest ( )
hpass = hashlib . sha256 ( new_pass . encode ( " utf-8 " ) ) . hexdigest ( )
mongo. db . users . update (
valdb[ USERS_COLL ] . update (
{
{
" username " : username ,
" username " : username ,
" email " : hemail
" email " : hemail
@ -204,12 +208,12 @@ def token_to_username(token):
key = {
key = {
" token " : token
" token " : token
}
}
res = list ( mongo. db . usertokens . find ( key ) )
res = list ( valdb[ TOKENS_COLL ] . find ( key ) )
if len ( res ) != 1 :
if len ( res ) != 1 :
return None
return None
username = res [ 0 ] [ " username " ]
username = res [ 0 ] [ " username " ]
# update deletion interval
# update deletion interval
mongo. db . usertokens . update (
valdb[ TOKENS_COLL ] . update (
key , { " $set " : { " date " : datetime . datetime . utcnow ( ) } } )
key , { " $set " : { " date " : datetime . datetime . utcnow ( ) } } )
return username
return username
@ -244,19 +248,18 @@ def api_get_frames():
RF = reduce_functions [ rf_name ] [ " f " ]
RF = reduce_functions [ rf_name ] [ " f " ]
corpus = request . args . get ( " cor " )
corpus = request . args . get ( " cor " )
if corpus not in app. config [ " CORPORA" ] :
if corpus not in CORPORA:
return json . dumps ( { " error " : " cor= { kres,ssj} " } )
return json . dumps ( { " error " : " cor= { kres,ssj} " } )
cur = mongo. db[ corpus ] . find ( { " headwords " : hw } )
cur = val db[ corpus ] . find ( { " headwords " : hw } )
frames = [ ]
frames = [ ]
for ent in cur [: app . config [ " QUERY_LIMIT " ] ] :
for ent in cur :
frames + = frames_from_db_entry ( ent ) # pre-process this step for prod TODO
frames + = frames_from_db_entry ( ent ) # pre-process this step for prod TODO
cur . close ( )
# filter by relevant hw
# filter by relevant hw
frames = [ x for x in frames if x . hw == hw ]
frames = [ x for x in frames if x . hw == hw ]
ret_frames = RF ( frames , mongo. db . sensemap )
ret_frames = RF ( frames , valdb[ SENSEMAP_COLL ] )
json_ret = { " frames " : [ ] }
json_ret = { " frames " : [ ] }
for frame in ret_frames :
for frame in ret_frames :
@ -297,20 +300,19 @@ def api_get_functor_frames():
RF = reduce_functions [ rf_name ] [ " f " ]
RF = reduce_functions [ rf_name ] [ " f " ]
corpus = request . args . get ( " cor " )
corpus = request . args . get ( " cor " )
if corpus not in app. config [ " CORPORA" ] :
if corpus not in CORPORA:
return json . dumps ( { " error " : " cor= { kres,ssj} " } )
return json . dumps ( { " error " : " cor= { kres,ssj} " } )
cur = mongo. db[ corpus ] . find ( { " functors " : functor } )
cur = val db[ corpus ] . find ( { " functors " : functor } )
frames = [ ]
frames = [ ]
for ent in cur [: app . config [ " QUERY_LIMIT " ] ] :
for ent in cur :
frames + = frames_from_db_entry ( ent ) # pre-process this step for prod TODO
frames + = frames_from_db_entry ( ent ) # pre-process this step for prod TODO
cur . close ( )
# filter by relevant functor
# filter by relevant functor
frames = [ x for x in frames if functor in x . get_functors ( ) ]
frames = [ x for x in frames if functor in x . get_functors ( ) ]
# raw_frames = vallex.functors_index[functor] # TODO
# raw_frames = vallex.functors_index[functor] # TODO
ret_frames = RF ( frames , mongo. db . sensemap )
ret_frames = RF ( frames , valdb[ SENSEMAP_COLL ] )
ret_frames = _aggregate_by_hw ( ret_frames )
ret_frames = _aggregate_by_hw ( ret_frames )
json_ret = { " frames " : [ ] }
json_ret = { " frames " : [ ] }
@ -329,10 +331,10 @@ def api_get_functor_frames():
def api_senses_get ( ) :
def api_senses_get ( ) :
# returns senses and mapping for hw
# returns senses and mapping for hw
hw = request . args . get ( " hw " )
hw = request . args . get ( " hw " )
senses = list ( mongo. db . senses . find ( {
senses = list ( valdb[ SENSES_COLL ] . find ( {
" hw " : hw
" hw " : hw
} ) )
} ) )
sense_map_query = list ( mongo. db . sensemap . find ( {
sense_map_query = list ( valdb[ SENSEMAP_COLL ] . find ( {
" hw " : hw
" hw " : hw
} ) )
} ) )
# aggregation by max date possible on DB side
# aggregation by max date possible on DB side
@ -412,7 +414,7 @@ def api_senses_update():
print ( ns )
print ( ns )
# insert into db
# insert into db
mongo. db . senses . insert ( ns )
valdb[ SENSES_COLL ] . insert ( ns )
# replace tmp_id with mongo's _id
# replace tmp_id with mongo's _id
for ssj_id , el in sense_map . items ( ) :
for ssj_id , el in sense_map . items ( ) :
@ -427,7 +429,7 @@ def api_senses_update():
" date " : datetime . datetime . utcnow ( )
" date " : datetime . datetime . utcnow ( )
}
}
# vallex.db["v2_sense_map"].update(key, data, upsert=True)
# vallex.db["v2_sense_map"].update(key, data, upsert=True)
mongo. db . sensemap . insert ( data )
valdb[ SENSEMAP_COLL ] . insert ( data )
return " OK "
return " OK "
# SENSES ----------------------------^
# SENSES ----------------------------^
@ -436,25 +438,25 @@ def api_senses_update():
# APP PREFLIGHT ---------------------.
# APP PREFLIGHT ---------------------.
def _is_banned ( hw ) :
def _is_banned ( hw ) :
banned = True
banned = True
if hw in app. config [ " BANNED_HEADWORDS" ] :
if hw in BANNED_HEADWORDS:
banned = True
banned = True
elif hw in sskj_wordlist [ " wordlist " ] :
elif hw in sskj_wordlist [ " wordlist " ] :
banned = False
banned = False
elif ( hw + " se " ) in sskj_wordlist [ " wordlist " ] :
elif ( hw + " se " ) in sskj_wordlist [ " wordlist " ] :
banned = False
banned = False
if hw [ - 1 ] == " _ " :
log . debug ( " hw: {} , banned: {} " . format ( hw , banned ) )
return banned
return banned
def prepare_app_index ( appindex_json , sskj_wordlist ) :
def prepare_app_index ( ) :
log . info ( " [*] preparing app_index " )
log . info ( " [*] preparing app_index " )
# create app_index (used in frontend, left side word index)
# create app_index (used in frontend, left side word index)
tmp_app_index = { c : { } for c in app. config [ " CORPORA" ] }
tmp_app_index = { c : { } for c in CORPORA}
for corpus in app. config [ " CORPORA" ] :
for corpus in CORPORA:
res_hws = { }
res_hws = { }
res_fns = { }
res_fns = { }
for e in valdb [ corpus ] . find ( { } ) :
nentries = mongo . db [ corpus ] . count ( )
idx = 0
for e in mongo . db [ corpus ] . find ( { } ) :
if " headwords " not in e :
if " headwords " not in e :
continue
continue
for hw in e [ " headwords " ] :
for hw in e [ " headwords " ] :
@ -469,10 +471,6 @@ def prepare_app_index(appindex_json, sskj_wordlist):
res_fns [ fn ] + = 1
res_fns [ fn ] + = 1
else :
else :
res_fns [ fn ] = 1
res_fns [ fn ] = 1
idx + = 1
if idx % 10000 == 0 :
log . debug ( " indexing {} : {} / {} " . format (
corpus , idx , nentries ) )
alphabetical = { }
alphabetical = { }
for k , e in res_hws . items ( ) :
for k , e in res_hws . items ( ) :
@ -488,41 +486,15 @@ def prepare_app_index(appindex_json, sskj_wordlist):
tmp_app_index [ corpus ] [ " words " ] = alphabetical
tmp_app_index [ corpus ] [ " words " ] = alphabetical
functors = [ ( k , e ) for ( k , e ) in res_fns . items ( ) ]
functors = [ ( k , e ) for ( k , e ) in res_fns . items ( ) ]
functors = sorted ( functors , key = lambda x : x [ 0 ] )
functors = sorted ( functors , key = lambda x : x [ 0 ] )
tmp_app_index [ corpus ] [ " functors " ] = functors
tmp_app_index [ corpus ] [ " functors " ] = functors
with Path ( appindex_json ) . open ( " w " ) as fp :
valdb . appindex . update ( { " dockey " : " appindex " } , { " dockey " : " appindex " , " data " : tmp_app_index } , upsert = True )
json . dump ( tmp_app_index , fp )
# APP PREFLIGHT ---------------------^
# APP PREFLIGHT ---------------------^
def init_wsgi ( app ) :
print ( " Initiating wsgi " )
config = None
with Path ( " /project/prod_conf.yaml " ) . open ( " r " ) as fp :
config = list ( yaml . safe_load_all ( fp ) ) [ 0 ]
app . debug = False
logfile = config [ " logfile " ]
logging . basicConfig ( filename = logfile , level = logging . INFO )
# app index from db
with Path ( config [ " appindex " ] ) . open ( " r " ) as fp :
# a dirty hack but ok
app . config [ " app_index " ] = json . load ( fp )
# log.info("[*] Starting app.py with config:\n%s".format(config))
log . info ( " [*] Starting app.py with config: \n {} " . format ( config ) )
# if we don't pass arguments, assume production environment (gunicorn)
if " gunicorn " in sys . argv [ 0 ] :
init_wsgi ( app )
if __name__ == " __main__ " :
if __name__ == " __main__ " :
print ( " Starting app.py main() " )
print ( " Starting app.py main() " )
aparser = argparse . ArgumentParser ( description = " Arguments for app.py " )
aparser = argparse . ArgumentParser ( description = " Arguments for app.py " )
@ -532,9 +504,9 @@ if __name__ == "__main__":
aparser . add_argument ( " --dbpass " , type = str )
aparser . add_argument ( " --dbpass " , type = str )
aparser . add_argument ( " --dbaddr " , type = str )
aparser . add_argument ( " --dbaddr " , type = str )
aparser . add_argument ( " --sskj-wordlist " , type = str )
aparser . add_argument ( " --sskj-wordlist " , type = str )
aparser . add_argument ( " --appindex-json " , type = str )
args = aparser . parse_args ( )
args = aparser . parse_args ( )
config = None
with Path ( args . config_file ) . open ( " r " ) as fp :
with Path ( args . config_file ) . open ( " r " ) as fp :
config = list ( yaml . safe_load_all ( fp ) ) [ 0 ]
config = list ( yaml . safe_load_all ( fp ) ) [ 0 ]
@ -545,31 +517,27 @@ if __name__ == "__main__":
else :
else :
logging . basicConfig ( filename = logfile , level = logging . INFO )
logging . basicConfig ( filename = logfile , level = logging . INFO )
"""
# db login
# db login
client = MongoClient (
client = MongoClient (
" mongodb:// {} " . format ( args . dbaddr ) ,
" mongodb:// {} " . format ( args . dbaddr ) ,
username = args . dbuser ,
username = args . dbuser ,
password = args . dbpass ,
password = args . dbpass ,
authSource = " mongo. db" ,
authSource = " val db" ,
authMechanism = ' SCRAM-SHA-1 '
authMechanism = ' SCRAM-SHA-1 '
)
)
valdb = client . mongo . db
valdb = client . valdb
"""
if args . prepare_db :
if args . prepare_db :
with Path ( args . sskj_wordlist ) . open ( " r " ) as fp :
with Path ( args . sskj_wordlist ) . open ( " r " ) as fp :
sskj_wordlist = json . load ( fp )
sskj_wordlist = json . load ( fp )
prepare_app_index ( args . appindex_json , sskj_wordlist )
prepare_app_index ( )
sys . exit ( )
sys . exit ( )
# app index from db
# app index from db
with Path ( args . appindex_json ) . open ( " r " ) as fp :
app_index = ( valdb . appindex . find_one ( { " dockey " : " appindex " } ) ) [ " data " ]
app . config [ " app_index " ] = json . load ( fp )
# log.info("[*] Starting app.py with config:\n%s".format(config))
# log.info("[*] Starting app.py with config:\n%s".format(config))
log . info ( " [*] Starting app.py with config: \n {} " . format ( config ) )
log . info ( " [*] Starting app.py with config: \n {} " . format ( config ) )
app . run ( host = str ( config [ " host " ] ) , port = int ( config [ " port " ] ) )
app . run ( host = str ( config [ " host " ] ) , port = int ( config [ " port " ] ) )