a
This commit is contained in:
parent
2582314c4d
commit
01adf47b9b
|
@ -21,7 +21,7 @@ lfh.setFormatter(formatter)
|
||||||
logger.addHandler(lfh)
|
logger.addHandler(lfh)
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
n_chunks = -1 # for logging
|
n_chunks = -1
|
||||||
|
|
||||||
|
|
||||||
def enriched_lemma(token):
|
def enriched_lemma(token):
|
||||||
|
@ -69,7 +69,11 @@ def _handle_kres_file_chunk(kres_file_chunk):
|
||||||
)
|
)
|
||||||
# dbclient.valdb["kres"]
|
# dbclient.valdb["kres"]
|
||||||
for kres_file in kres_chunk:
|
for kres_file in kres_chunk:
|
||||||
|
try:
|
||||||
kres_data = kres_parser.parse_xml_file(kres_file)
|
kres_data = kres_parser.parse_xml_file(kres_file)
|
||||||
|
except:
|
||||||
|
logger.error("Failed to parse file: {}".format(kres_file))
|
||||||
|
continue
|
||||||
if args.output == "file":
|
if args.output == "file":
|
||||||
kres_outdir = outdir / "kres_json"
|
kres_outdir = outdir / "kres_json"
|
||||||
kres_outdir.mkdir(parents=True, exist_ok=True)
|
kres_outdir.mkdir(parents=True, exist_ok=True)
|
||||||
|
@ -78,17 +82,23 @@ def _handle_kres_file_chunk(kres_file_chunk):
|
||||||
json.dump(kres_data, fp)
|
json.dump(kres_data, fp)
|
||||||
|
|
||||||
elif args.output == "db":
|
elif args.output == "db":
|
||||||
|
"""
|
||||||
if dbclient.valdb["kres"].find({"sid": kres_data[0]["sid"]}).count() > 0:
|
if dbclient.valdb["kres"].find({"sid": kres_data[0]["sid"]}).count() > 0:
|
||||||
logger.info("File {} already in DB, closing chunk ({}/{})".format(
|
logger.info("File {} already in DB, closing chunk ({}/{})".format(
|
||||||
kres_file, kres_chunk_idx, n_chunks))
|
kres_file, kres_chunk_idx, n_chunks))
|
||||||
dbclient.close()
|
dbclient.close()
|
||||||
return
|
return
|
||||||
|
"""
|
||||||
|
|
||||||
kres_data_1 = [_db_preprocess(x) for x in kres_data]
|
kres_data_1 = [_db_preprocess(x) for x in kres_data]
|
||||||
db_payload += kres_data_1
|
db_payload += kres_data_1
|
||||||
|
|
||||||
|
try:
|
||||||
dbclient.valdb["kres"].insert_many(db_payload, ordered=False) # much much better (just make sure sid has a unique index)
|
dbclient.valdb["kres"].insert_many(db_payload, ordered=False) # much much better (just make sure sid has a unique index)
|
||||||
logger.info("Inserted kres files chunk ({}/{}) in {:.2f} s".format(
|
except:
|
||||||
|
logger.error("Failed inserting kres files chunk ({}/{}) in {:.2f} s".format(
|
||||||
|
kres_chunk_idx, n_chunks, time.time() - tstart))
|
||||||
|
logger.info("Db insert: chunks ({}/{}) in {:.2f} s".format(
|
||||||
kres_chunk_idx, n_chunks, time.time() - tstart))
|
kres_chunk_idx, n_chunks, time.time() - tstart))
|
||||||
dbclient.close()
|
dbclient.close()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user