From 2582314c4d69aa6f6a7be9fdf898a2c3fa590d5e Mon Sep 17 00:00:00 2001 From: voje Date: Sun, 21 Apr 2019 22:24:36 +0200 Subject: [PATCH] bugfix --- corpusparser/main.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/corpusparser/main.py b/corpusparser/main.py index 83b41fc..1bcafd4 100644 --- a/corpusparser/main.py +++ b/corpusparser/main.py @@ -87,10 +87,10 @@ def _handle_kres_file_chunk(kres_file_chunk): kres_data_1 = [_db_preprocess(x) for x in kres_data] db_payload += kres_data_1 - dbclient.valdb["kres"].insert_many(db_payload) # much much better (just make sure sid has a unique index) - logger.info("Inserted kres files chunk ({}/{}) in {:.2f} s".format( - kres_chunk_idx, n_chunks, time.time() - tstart)) - dbclient.close() + dbclient.valdb["kres"].insert_many(db_payload, ordered=False) # much much better (just make sure sid has a unique index) + logger.info("Inserted kres files chunk ({}/{}) in {:.2f} s".format( + kres_chunk_idx, n_chunks, time.time() - tstart)) + dbclient.close() def _get_dbclient(args): dbclient = MongoClient( @@ -163,6 +163,7 @@ if __name__ == "__main__": ) kres_files = [x for x in Path(args.kres_folder).iterdir()] + kres_files = sorted(kres_files, key=lambda x: x.name) kres_files_chunks = [] i = 0 while i < len(kres_files):