|
|
|
@ -90,6 +90,7 @@ def _handle_kres_file_tpl(kres_file_tpl):
|
|
|
|
|
kres_col.insert_many(kres_data_1) # much much better (just make sure sid has a unique index)
|
|
|
|
|
logger.info("Inserted data from {} ({}/{}) in {:.2f} s".format(
|
|
|
|
|
kres_file, kres_file_idx, n_kres_files, time.time() - tstart))
|
|
|
|
|
dbclient.close()
|
|
|
|
|
|
|
|
|
|
def _get_dbclient(args):
|
|
|
|
|
dbclient = MongoClient(
|
|
|
|
@ -130,6 +131,7 @@ if __name__ == "__main__":
|
|
|
|
|
dbclient.valdb[corpus].ensure_index([("sid", pymongo.ASCENDING)])
|
|
|
|
|
dbclient.valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)])
|
|
|
|
|
dbclient.valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)])
|
|
|
|
|
dbclient.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if corpus == "ssj":
|
|
|
|
@ -147,6 +149,7 @@ if __name__ == "__main__":
|
|
|
|
|
for sentence in ssj_data:
|
|
|
|
|
sentence = _db_preprocess(sentence)
|
|
|
|
|
ssj_col.update({"sid": sentence["sid"]}, sentence, upsert=True)
|
|
|
|
|
dbclient.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if corpus == "kres":
|
|
|
|
|