@ -26,25 +26,21 @@ from email.mime.text import MIMEText
from copy import deepcopy as DC
from copy import deepcopy as DC
from pathlib import Path
from pathlib import Path
from pymongo import MongoClient
from pymongo import MongoClient
from flask_pymongo import PyMongo
import pymongo
import pymongo
import argparse
import argparse
# some db collections
USERS_COLL = " users "
TOKENS_COLL = " usertokens "
SENSES_COLL = " senses "
SENSEMAP_COLL = " sensemap "
# pre-generated data (gui leftside word index)
CORPORA = [ " ssj " , " kres " ]
app_index = None
sskj_wordlist = None # used by _is_banned(hw)
BANNED_HEADWORDS = [ " biti " ]
log = logging . getLogger ( __name__ )
log = logging . getLogger ( __name__ )
valdb = None
app = Flask ( __name__ )
app = Flask ( __name__ )
app . config . from_object ( " db_config " )
mongo = PyMongo ( app )
app . config [ " CORPORA " ] = [ " ssj " , " kres " ]
app . config [ " BANNED_HEADWORDS " ] = [ " biti " ]
app . config [ " QUERY_LIMIT " ] = 1000
# when running vuejs via webpack
# when running vuejs via webpack
# CORS(app)
# CORS(app)
@ -59,7 +55,7 @@ CORS(app)
@app.route ( " /api/dev " )
@app.route ( " /api/dev " )
def api_dev ( ) :
def api_dev ( ) :
print ( " DEV " )
print ( " DEV " )
cur = val db. kres . find ( { " headwords " : " nagovarjati " } )
cur = mongo. db. kres . find ( { " headwords " : " nagovarjati " } )
frames = [ ]
frames = [ ]
for ent in cur :
for ent in cur :
frames + = frames_from_db_entry ( ent )
frames + = frames_from_db_entry ( ent )
@ -72,12 +68,12 @@ def api_dev():
@app.route ( " /api/words/<corpus> " )
@app.route ( " /api/words/<corpus> " )
def api_words ( corpus ) :
def api_words ( corpus ) :
return json . dumps ( {
return json . dumps ( {
" sorted_words " : app _index[ corpus ] [ " words " ] , # todo - make corpus as arg
" sorted_words " : app . config [ " app _index" ] [ corpus ] [ " words " ] , # todo - make corpus as arg
} )
} )
@app.route ( " /api/functors/<corpus> " )
@app.route ( " /api/functors/<corpus> " )
def api_functors ( corpus ) :
def api_functors ( corpus ) :
return json . dumps ( app _index[ corpus ] [ " functors " ] )
return json . dumps ( app . config [ " app _index" ] [ corpus ] [ " functors " ] )
# INDEX SELECTION -------------------^
# INDEX SELECTION -------------------^
@ -98,7 +94,7 @@ def api_register():
) :
) :
return " ERR "
return " ERR "
email_hash = hashlib . sha256 ( email . encode ( " utf-8 " ) ) . hexdigest ( )
email_hash = hashlib . sha256 ( email . encode ( " utf-8 " ) ) . hexdigest ( )
existing = list ( valdb[ USERS_COLL ] . find ( {
existing = list ( mongo. db . users . find ( {
" $or " : [ { " username " : username } , { " email " : email_hash } ]
" $or " : [ { " username " : username } , { " email " : email_hash } ]
} ) )
} ) )
if len ( existing ) > 0 :
if len ( existing ) > 0 :
@ -109,7 +105,7 @@ def api_register():
password . encode ( " utf-8 " ) ) . hexdigest ( ) ,
password . encode ( " utf-8 " ) ) . hexdigest ( ) ,
" email " : email_hash
" email " : email_hash
}
}
valdb[ USERS_COLL ] . insert ( entry )
mongo. db . users . insert ( entry )
return " OK "
return " OK "
@ -121,7 +117,7 @@ def api_login():
password = data [ " password " ]
password = data [ " password " ]
hpass = hashlib . sha256 ( password . encode ( " utf-8 " ) ) . hexdigest ( )
hpass = hashlib . sha256 ( password . encode ( " utf-8 " ) ) . hexdigest ( )
db_user = list ( valdb[ USERS_COLL ] . find ( {
db_user = list ( mongo. db . users . find ( {
" username " : username ,
" username " : username ,
" hpass " : hpass
" hpass " : hpass
} ) )
} ) )
@ -135,7 +131,7 @@ def api_login():
" date " : datetime . datetime . utcnow ( ) ,
" date " : datetime . datetime . utcnow ( ) ,
" token " : token
" token " : token
}
}
valdb[ TOKENS_COLL ] . update (
mongo. db . usertokens . update (
{ " username " : token_entry [ " username " ] } ,
{ " username " : token_entry [ " username " ] } ,
token_entry ,
token_entry ,
upsert = True
upsert = True
@ -178,7 +174,7 @@ def api_new_pass():
username = data [ " username " ]
username = data [ " username " ]
email = data [ " email " ]
email = data [ " email " ]
hemail = hashlib . sha256 ( email . encode ( " utf-8 " ) ) . hexdigest ( )
hemail = hashlib . sha256 ( email . encode ( " utf-8 " ) ) . hexdigest ( )
db_res = list ( valdb[ USERS_COLL ] . find ( {
db_res = list ( mongo. db . users . find ( {
" username " : username ,
" username " : username ,
" email " : hemail
" email " : hemail
} ) )
} ) )
@ -190,7 +186,7 @@ def api_new_pass():
string . ascii_letters + string . digits ) for i in range ( 10 ) ] )
string . ascii_letters + string . digits ) for i in range ( 10 ) ] )
# update locally
# update locally
hpass = hashlib . sha256 ( new_pass . encode ( " utf-8 " ) ) . hexdigest ( )
hpass = hashlib . sha256 ( new_pass . encode ( " utf-8 " ) ) . hexdigest ( )
valdb[ USERS_COLL ] . update (
mongo. db . users . update (
{
{
" username " : username ,
" username " : username ,
" email " : hemail
" email " : hemail
@ -208,12 +204,12 @@ def token_to_username(token):
key = {
key = {
" token " : token
" token " : token
}
}
res = list ( valdb[ TOKENS_COLL ] . find ( key ) )
res = list ( mongo. db . usertokens . find ( key ) )
if len ( res ) != 1 :
if len ( res ) != 1 :
return None
return None
username = res [ 0 ] [ " username " ]
username = res [ 0 ] [ " username " ]
# update deletion interval
# update deletion interval
valdb[ TOKENS_COLL ] . update (
mongo. db . usertokens . update (
key , { " $set " : { " date " : datetime . datetime . utcnow ( ) } } )
key , { " $set " : { " date " : datetime . datetime . utcnow ( ) } } )
return username
return username
@ -248,18 +244,19 @@ def api_get_frames():
RF = reduce_functions [ rf_name ] [ " f " ]
RF = reduce_functions [ rf_name ] [ " f " ]
corpus = request . args . get ( " cor " )
corpus = request . args . get ( " cor " )
if corpus not in CORPORA:
if corpus not in app. config [ " CORPORA" ] :
return json . dumps ( { " error " : " cor= { kres,ssj} " } )
return json . dumps ( { " error " : " cor= { kres,ssj} " } )
cur = val db[ corpus ] . find ( { " headwords " : hw } )
cur = mongo. db[ corpus ] . find ( { " headwords " : hw } )
frames = [ ]
frames = [ ]
for ent in cur :
for ent in cur [: app . config [ " QUERY_LIMIT " ] ] :
frames + = frames_from_db_entry ( ent ) # pre-process this step for prod TODO
frames + = frames_from_db_entry ( ent ) # pre-process this step for prod TODO
cur . close ( )
# filter by relevant hw
# filter by relevant hw
frames = [ x for x in frames if x . hw == hw ]
frames = [ x for x in frames if x . hw == hw ]
ret_frames = RF ( frames , valdb[ SENSEMAP_COLL ] )
ret_frames = RF ( frames , mongo. db . sensemap )
json_ret = { " frames " : [ ] }
json_ret = { " frames " : [ ] }
for frame in ret_frames :
for frame in ret_frames :
@ -300,19 +297,20 @@ def api_get_functor_frames():
RF = reduce_functions [ rf_name ] [ " f " ]
RF = reduce_functions [ rf_name ] [ " f " ]
corpus = request . args . get ( " cor " )
corpus = request . args . get ( " cor " )
if corpus not in CORPORA:
if corpus not in app. config [ " CORPORA" ] :
return json . dumps ( { " error " : " cor= { kres,ssj} " } )
return json . dumps ( { " error " : " cor= { kres,ssj} " } )
cur = val db[ corpus ] . find ( { " functors " : functor } )
cur = mongo. db[ corpus ] . find ( { " functors " : functor } )
frames = [ ]
frames = [ ]
for ent in cur :
for ent in cur [: app . config [ " QUERY_LIMIT " ] ] :
frames + = frames_from_db_entry ( ent ) # pre-process this step for prod TODO
frames + = frames_from_db_entry ( ent ) # pre-process this step for prod TODO
cur . close ( )
# filter by relevant functor
# filter by relevant functor
frames = [ x for x in frames if functor in x . get_functors ( ) ]
frames = [ x for x in frames if functor in x . get_functors ( ) ]
# raw_frames = vallex.functors_index[functor] # TODO
# raw_frames = vallex.functors_index[functor] # TODO
ret_frames = RF ( frames , valdb[ SENSEMAP_COLL ] )
ret_frames = RF ( frames , mongo. db . sensemap )
ret_frames = _aggregate_by_hw ( ret_frames )
ret_frames = _aggregate_by_hw ( ret_frames )
json_ret = { " frames " : [ ] }
json_ret = { " frames " : [ ] }
@ -331,10 +329,10 @@ def api_get_functor_frames():
def api_senses_get ( ) :
def api_senses_get ( ) :
# returns senses and mapping for hw
# returns senses and mapping for hw
hw = request . args . get ( " hw " )
hw = request . args . get ( " hw " )
senses = list ( valdb[ SENSES_COLL ] . find ( {
senses = list ( mongo. db . senses . find ( {
" hw " : hw
" hw " : hw
} ) )
} ) )
sense_map_query = list ( valdb[ SENSEMAP_COLL ] . find ( {
sense_map_query = list ( mongo. db . sensemap . find ( {
" hw " : hw
" hw " : hw
} ) )
} ) )
# aggregation by max date possible on DB side
# aggregation by max date possible on DB side
@ -414,7 +412,7 @@ def api_senses_update():
print ( ns )
print ( ns )
# insert into db
# insert into db
valdb[ SENSES_COLL ] . insert ( ns )
mongo. db . senses . insert ( ns )
# replace tmp_id with mongo's _id
# replace tmp_id with mongo's _id
for ssj_id , el in sense_map . items ( ) :
for ssj_id , el in sense_map . items ( ) :
@ -429,7 +427,7 @@ def api_senses_update():
" date " : datetime . datetime . utcnow ( )
" date " : datetime . datetime . utcnow ( )
}
}
# vallex.db["v2_sense_map"].update(key, data, upsert=True)
# vallex.db["v2_sense_map"].update(key, data, upsert=True)
valdb[ SENSEMAP_COLL ] . insert ( data )
mongo. db . sensemap . insert ( data )
return " OK "
return " OK "
# SENSES ----------------------------^
# SENSES ----------------------------^
@ -438,25 +436,25 @@ def api_senses_update():
# APP PREFLIGHT ---------------------.
# APP PREFLIGHT ---------------------.
def _is_banned ( hw ) :
def _is_banned ( hw ) :
banned = True
banned = True
if hw in BANNED_HEADWORDS:
if hw in app. config [ " BANNED_HEADWORDS" ] :
banned = True
banned = True
elif hw in sskj_wordlist [ " wordlist " ] :
elif hw in sskj_wordlist [ " wordlist " ] :
banned = False
banned = False
elif ( hw + " se " ) in sskj_wordlist [ " wordlist " ] :
elif ( hw + " se " ) in sskj_wordlist [ " wordlist " ] :
banned = False
banned = False
if hw [ - 1 ] == " _ " :
log . debug ( " hw: {} , banned: {} " . format ( hw , banned ) )
return banned
return banned
def prepare_app_index ( ) :
def prepare_app_index ( appindex_json , sskj_wordlist ) :
log . info ( " [*] preparing app_index " )
log . info ( " [*] preparing app_index " )
# create app_index (used in frontend, left side word index)
# create app_index (used in frontend, left side word index)
tmp_app_index = { c : { } for c in CORPORA}
tmp_app_index = { c : { } for c in app. config [ " CORPORA" ] }
for corpus in CORPORA:
for corpus in app. config [ " CORPORA" ] :
res_hws = { }
res_hws = { }
res_fns = { }
res_fns = { }
for e in valdb [ corpus ] . find ( { } ) :
nentries = mongo . db [ corpus ] . count ( )
idx = 0
for e in mongo . db [ corpus ] . find ( { } ) :
if " headwords " not in e :
if " headwords " not in e :
continue
continue
for hw in e [ " headwords " ] :
for hw in e [ " headwords " ] :
@ -471,6 +469,10 @@ def prepare_app_index():
res_fns [ fn ] + = 1
res_fns [ fn ] + = 1
else :
else :
res_fns [ fn ] = 1
res_fns [ fn ] = 1
idx + = 1
if idx % 10000 == 0 :
log . debug ( " indexing {} : {} / {} " . format (
corpus , idx , nentries ) )
alphabetical = { }
alphabetical = { }
for k , e in res_hws . items ( ) :
for k , e in res_hws . items ( ) :
@ -486,15 +488,41 @@ def prepare_app_index():
tmp_app_index [ corpus ] [ " words " ] = alphabetical
tmp_app_index [ corpus ] [ " words " ] = alphabetical
functors = [ ( k , e ) for ( k , e ) in res_fns . items ( ) ]
functors = [ ( k , e ) for ( k , e ) in res_fns . items ( ) ]
functors = sorted ( functors , key = lambda x : x [ 0 ] )
functors = sorted ( functors , key = lambda x : x [ 0 ] )
tmp_app_index [ corpus ] [ " functors " ] = functors
tmp_app_index [ corpus ] [ " functors " ] = functors
valdb . appindex . update ( { " dockey " : " appindex " } , { " dockey " : " appindex " , " data " : tmp_app_index } , upsert = True )
with Path ( appindex_json ) . open ( " w " ) as fp :
json . dump ( tmp_app_index , fp )
# APP PREFLIGHT ---------------------^
# APP PREFLIGHT ---------------------^
def init_wsgi ( app ) :
print ( " Initiating wsgi " )
config = None
with Path ( " /project/prod_conf.yaml " ) . open ( " r " ) as fp :
config = list ( yaml . safe_load_all ( fp ) ) [ 0 ]
app . debug = False
logfile = config [ " logfile " ]
logging . basicConfig ( filename = logfile , level = logging . INFO )
# app index from db
with Path ( config [ " appindex " ] ) . open ( " r " ) as fp :
# a dirty hack but ok
app . config [ " app_index " ] = json . load ( fp )
# log.info("[*] Starting app.py with config:\n%s".format(config))
log . info ( " [*] Starting app.py with config: \n {} " . format ( config ) )
# if we don't pass arguments, assume production environment (gunicorn)
if " gunicorn " in sys . argv [ 0 ] :
init_wsgi ( app )
if __name__ == " __main__ " :
if __name__ == " __main__ " :
print ( " Starting app.py main() " )
print ( " Starting app.py main() " )
aparser = argparse . ArgumentParser ( description = " Arguments for app.py " )
aparser = argparse . ArgumentParser ( description = " Arguments for app.py " )
@ -504,9 +532,9 @@ if __name__ == "__main__":
aparser . add_argument ( " --dbpass " , type = str )
aparser . add_argument ( " --dbpass " , type = str )
aparser . add_argument ( " --dbaddr " , type = str )
aparser . add_argument ( " --dbaddr " , type = str )
aparser . add_argument ( " --sskj-wordlist " , type = str )
aparser . add_argument ( " --sskj-wordlist " , type = str )
aparser . add_argument ( " --appindex-json " , type = str )
args = aparser . parse_args ( )
args = aparser . parse_args ( )
config = None
with Path ( args . config_file ) . open ( " r " ) as fp :
with Path ( args . config_file ) . open ( " r " ) as fp :
config = list ( yaml . safe_load_all ( fp ) ) [ 0 ]
config = list ( yaml . safe_load_all ( fp ) ) [ 0 ]
@ -517,27 +545,31 @@ if __name__ == "__main__":
else :
else :
logging . basicConfig ( filename = logfile , level = logging . INFO )
logging . basicConfig ( filename = logfile , level = logging . INFO )
"""
# db login
# db login
client = MongoClient (
client = MongoClient (
" mongodb:// {} " . format ( args . dbaddr ) ,
" mongodb:// {} " . format ( args . dbaddr ) ,
username = args . dbuser ,
username = args . dbuser ,
password = args . dbpass ,
password = args . dbpass ,
authSource = " val db" ,
authSource = " mongo. db" ,
authMechanism = ' SCRAM-SHA-1 '
authMechanism = ' SCRAM-SHA-1 '
)
)
valdb = client . valdb
valdb = client . mongo . db
"""
if args . prepare_db :
if args . prepare_db :
with Path ( args . sskj_wordlist ) . open ( " r " ) as fp :
with Path ( args . sskj_wordlist ) . open ( " r " ) as fp :
sskj_wordlist = json . load ( fp )
sskj_wordlist = json . load ( fp )
prepare_app_index ( )
prepare_app_index ( args . appindex_json , sskj_wordlist )
sys . exit ( )
sys . exit ( )
# app index from db
# app index from db
app_index = ( valdb . appindex . find_one ( { " dockey " : " appindex " } ) ) [ " data " ]
with Path ( args . appindex_json ) . open ( " r " ) as fp :
app . config [ " app_index " ] = json . load ( fp )
# log.info("[*] Starting app.py with config:\n%s".format(config))
# log.info("[*] Starting app.py with config:\n%s".format(config))
log . info ( " [*] Starting app.py with config: \n {} " . format ( config ) )
log . info ( " [*] Starting app.py with config: \n {} " . format ( config ) )
app . run ( host = str ( config [ " host " ] ) , port = int ( config [ " port " ] ) )
app . run ( host = str ( config [ " host " ] ) , port = int ( config [ " port " ] ) )