logging when indexing
这个提交包含在:
父节点
c5fc78dca1
当前提交
14a6e2423b
@ -453,6 +453,9 @@ def prepare_app_index(appindex_json):
|
|||||||
for corpus in CORPORA:
|
for corpus in CORPORA:
|
||||||
res_hws = {}
|
res_hws = {}
|
||||||
res_fns = {}
|
res_fns = {}
|
||||||
|
|
||||||
|
nentries = valdb[corpus].count()
|
||||||
|
idx = 0
|
||||||
for e in valdb[corpus].find({}):
|
for e in valdb[corpus].find({}):
|
||||||
if "headwords" not in e:
|
if "headwords" not in e:
|
||||||
continue
|
continue
|
||||||
@ -468,6 +471,10 @@ def prepare_app_index(appindex_json):
|
|||||||
res_fns[fn] += 1
|
res_fns[fn] += 1
|
||||||
else:
|
else:
|
||||||
res_fns[fn] = 1
|
res_fns[fn] = 1
|
||||||
|
idx += 1
|
||||||
|
if idx % 10000 == 0:
|
||||||
|
log.debug("indexing {}: {}/{}".format(
|
||||||
|
corpus, idx, nentries))
|
||||||
|
|
||||||
alphabetical = {}
|
alphabetical = {}
|
||||||
for k, e in res_hws.items():
|
for k, e in res_hws.items():
|
||||||
|
正在加载...
在新工单中引用
屏蔽一个用户