From 2b7339ac5abb52958f7875a3e0a0eb1899728730 Mon Sep 17 00:00:00 2001 From: voje Date: Thu, 11 Apr 2019 07:55:44 +0200 Subject: [PATCH] update instead of insert, fixing sentence duplication in db --- corpusparser/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/corpusparser/main.py b/corpusparser/main.py index 89b17b9..3e7c11a 100644 --- a/corpusparser/main.py +++ b/corpusparser/main.py @@ -70,7 +70,8 @@ def data_to_valdb(sentence_generator, dbaddr, username, password, collection_nam col = valdb[collection_name] for s in sentence_generator: sdata = s[1] - col.insert_one(sdata) + # col.insert_one(sdata) + col.update({"sid": sdata["sid"]}, sdata, upsert=True) if __name__ == "__main__":