CORPORA = ["kres", "ssj"] if __name__ == "__main__": valdb = None def helper_tid_to_token(tid, tokens): for t in tokens: if t["tid"] == tid: return t return None # update entries (add headwords and fuctors for indexing) for corpus in CORPORA: for e in valdb[corpus].find({}): if e["srl_links"] is None: e["headwords"] = [] e["functors"] = [] else: hw_tids = list(set([x["from"] for x in e["srl_links"]])) hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids] headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens] e["headwords"] = headwords functors = list(set([x["afun"] for x in e["srl_links"]])) e["functors"] = functors valdb[corpus].save(e) valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)]) valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)]) # create app_index (used in frontend, left side word index) tmp_app_index = {c: {} for c in CORPORA} for corpus in CORPORA: res_hws = {} res_fns = {} for e in valdb[corpus].find({}): if "headwords" not in e: continue for hw in e["headwords"]: if hw in res_hws: res_hws[hw] += 1 else: res_hws[hw] = 1 if "functors" not in e: continue for fn in e["functors"]: if fn in res_fns: res_fns[fn] += 1 else: res_fns[fn] = 1 alphabetical = {} for k, e in res_hws.items(): fst = k[0].lower() if fst in alphabetical: alphabetical[fst].append((k, e)) else: alphabetical[fst] = [(k, e)] for k, e in alphabetical.items(): alphabetical[k] = sorted(e, key=lambda x: x[0]) tmp_app_index[corpus]["words"] = alphabetical functors = [(k, e) for (k, e) in res_fns.items()] functors = sorted(functors, key=lambda x: x[0]) tmp_app_index[corpus]["functors"] = functors valdb.appindex.update({"dockey": "appindex"}, {"dockey": "appindex", "data": tmp_app_index}, upsert=True)