working on api changes
This commit is contained in:
		
							parent
							
								
									ca942344d7
								
							
						
					
					
						commit
						044fa66910
					
				| @ -26,6 +26,7 @@ from email.mime.text import MIMEText | ||||
| from copy import deepcopy as DC | ||||
| from pathlib import Path | ||||
| from pymongo import MongoClient | ||||
| import pymongo | ||||
| import argparse | ||||
| 
 | ||||
| # some db collections | ||||
| @ -55,7 +56,8 @@ CORS(app) | ||||
| 
 | ||||
| @app.route("/api/dev") | ||||
| def api_dev(): | ||||
|     cur = valdb.kres.find({"sid": "F0015940.37.2"}) | ||||
|     print("DEV") | ||||
|     cur = valdb.kres.find({"headwords": "nagovarjati"}) | ||||
|     frames = [] | ||||
|     for ent in cur: | ||||
|         frames += frames_from_db_entry(ent) | ||||
| @ -236,20 +238,18 @@ def api_token(): | ||||
| def prepare_frames(ret_frames): | ||||
|     # append sentences | ||||
|     for frame in ret_frames: | ||||
|         frame.sentences = [] | ||||
|         unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids} | ||||
|         log.debug(str(unique_sids)) | ||||
|         frame.sentences = [] | ||||
|         # frame.sentences = [] | ||||
|         frame.aggr_sent = {} | ||||
|         # sid, tid==hw | ||||
|         for sid, tid in unique_sids.items(): | ||||
|             hwl = vallex.get_token(tid)["lemma"] | ||||
|             # hwl = vallex.get_token(tid)["lemma"] | ||||
|             hwl = "derp" | ||||
|             tmp_idx = len(frame.sentences) | ||||
|             if hwl not in frame.aggr_sent: | ||||
|                 frame.aggr_sent[hwl] = [] | ||||
|             frame.aggr_sent[hwl].append(tmp_idx) | ||||
|             frame.sentences.append( | ||||
|                 vallex.get_tokenized_sentence(tid) | ||||
|             ) | ||||
|     # return (n-frames, rendered template) | ||||
|     # json frames | ||||
|     json_ret = {"frames": []} | ||||
| @ -259,7 +259,7 @@ def prepare_frames(ret_frames): | ||||
| 
 | ||||
| 
 | ||||
| # input: hw, reduct_function | ||||
| @app.route("/api/hw-frames") | ||||
| @app.route("/api/frames") | ||||
| def api_get_frames(): | ||||
|     hw = request.args.get("hw") | ||||
|     if hw is None: | ||||
| @ -267,8 +267,21 @@ def api_get_frames(): | ||||
| 
 | ||||
|     rf_name = request.args.get("rf", "reduce_0")  # 2nd is default | ||||
|     RF = reduce_functions[rf_name]["f"] | ||||
|     entry = vallex.entries[hw]  # TODO  hw -> [Frame,] | ||||
|     ret_frames = RF(entry.raw_frames, vallex) | ||||
| 
 | ||||
|     # entry = vallex.entries[hw]  # TODO  hw -> [Frame,] | ||||
|     cur = valdb.kres.find({"headwords": hw}) | ||||
|     frames = [] | ||||
|     for ent in cur: | ||||
|         # TODO: maybe filter by hw? | ||||
|         frames += frames_from_db_entry(ent) | ||||
| 
 | ||||
|     # return json.dumps([x.to_json() for x in frames]) | ||||
| 
 | ||||
|     print("A") | ||||
|     print(frames[0].to_json()) | ||||
|     ret_frames = RF(frames, None) | ||||
|     print("B") | ||||
|     print(ret_frames[0].to_json()) | ||||
|     return prepare_frames(ret_frames) | ||||
| 
 | ||||
| 
 | ||||
| @ -408,18 +421,22 @@ def prepare_db(): | ||||
|     for corpus in CORPORA: | ||||
|         for e in valdb[corpus].find({}): | ||||
|             if e["srl_links"] is None: | ||||
|                 continue | ||||
|             hw_tids = list(set([x["from"] for x in e["srl_links"]])) | ||||
|             hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids] | ||||
|             headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens] | ||||
|             e["headwords"] = headwords | ||||
|                 e["headwords"] = [] | ||||
|                 e["functors"] = [] | ||||
|             else: | ||||
|                 hw_tids = list(set([x["from"] for x in e["srl_links"]])) | ||||
|                 hw_tokens = [helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids] | ||||
|                 headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens] | ||||
|                 e["headwords"] = headwords | ||||
| 
 | ||||
|             functors = list(set([x["afun"] for x in e["srl_links"]])) | ||||
|             e["headwords"] = headwords | ||||
|             e["functors"] = functors | ||||
|                 functors = list(set([x["afun"] for x in e["srl_links"]])) | ||||
|                 e["functors"] = functors | ||||
| 
 | ||||
|             valdb[corpus].save(e) | ||||
| 
 | ||||
|         valdb[corpus].ensure_index([("headwords", pymongo.ASCENDING)]) | ||||
|         valdb[corpus].ensure_index([("functors", pymongo.ASCENDING)]) | ||||
| 
 | ||||
|     # create app_index (used in frontend, left side word index) | ||||
|     for corpus in CORPORA: | ||||
|         res_hws = {} | ||||
|  | ||||
| @ -6,6 +6,8 @@ def frames_from_db_entry(dbent): | ||||
|     def _full_tid(tid): | ||||
|         return ".".join([dbent["sid"], str(tid)]) | ||||
| 
 | ||||
|     token_dict = {str(x["tid"]): x for x in dbent["tokens"]} | ||||
| 
 | ||||
|     frames = [] | ||||
|     if "srl_links" not in dbent: | ||||
|         return [] | ||||
| @ -18,19 +20,20 @@ def frames_from_db_entry(dbent): | ||||
|             srldict[key] += [srl] | ||||
|     for hwtid, srlarr in srldict.items(): | ||||
|         frames += [Frame( | ||||
|             hw_lemma=_full_tid(hwtid), | ||||
|             tids=[_full_tid(x["to"]) for x in srlarr], | ||||
|             hw_lemma=token_dict[hwtid]["text"], | ||||
|             tids=[_full_tid(hwtid)], | ||||
|             slots=[ | ||||
|                 Slot( | ||||
|                     functor=srl["afun"], | ||||
|                     tids=[_full_tid(srl["to"])] | ||||
|                 ) for srl in srlarr | ||||
|             ] | ||||
|             ], | ||||
|             sentences=[dbent["tokens"]], | ||||
|         )] | ||||
|     return frames | ||||
| 
 | ||||
| class Frame(): | ||||
|     def __init__(self, tids, deep_links=None, slots=None, hw_lemma=None): | ||||
|     def __init__(self, tids, deep_links=None, slots=None, hw_lemma=None, sentences=None): | ||||
|         self.hw = hw_lemma | ||||
|         self.tids = tids   # list of tokens with the same hw_lemma | ||||
|         # Each tid = "S123.t123"; | ||||
| @ -41,7 +44,7 @@ class Frame(): | ||||
|         else: | ||||
|             self.slots = slots | ||||
|         self.sense_info = {} | ||||
|         self.sentences = None  # Used for passing to view in app.py, get_frames | ||||
|         self.sentences = sentences | ||||
|         self.aggr_sent = None  # Dictionary { hw: self.sentences idx } | ||||
| 
 | ||||
|     def to_json(self): | ||||
|  | ||||
| @ -56,11 +56,13 @@ def reduce_1(frames, vallex=None): | ||||
|                 break | ||||
|         else: | ||||
|             # Python for else -> fires if loop has ended. | ||||
|             # init new frame set [set_of_functors, list_of_one_frame] | ||||
|             frame_sets.append([functors, [frame]]) | ||||
| 
 | ||||
|     ret_frames = [] | ||||
|     for fs in frame_sets: | ||||
|         tids = [] | ||||
|         sentences = [] | ||||
|         slots = {} | ||||
|         # All possible slots in this frame. | ||||
|         for functor in fs[0]: | ||||
| @ -68,12 +70,13 @@ def reduce_1(frames, vallex=None): | ||||
|         # Reduce slots from all frames. (Merge ACT from all frames, ...) | ||||
|         for frame in fs[1]: | ||||
|             tids += frame.tids | ||||
|             sentences += frame.sentences | ||||
|             for sl in frame.slots: | ||||
|                 slots[sl.functor].tids += sl.tids | ||||
|         slots_list = [] | ||||
|         for k, e in slots.items(): | ||||
|             slots_list.append(e) | ||||
|         rf = Frame(tids=tids, slots=slots_list) | ||||
|         rf = Frame(tids=tids, slots=slots_list, sentences=sentences) | ||||
|         rf.sort_slots() | ||||
|         ret_frames.append(rf) | ||||
|     return sorted_by_len_tids(ret_frames) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user