From 40f6aea2e75309780f79dddaa7c2918f5d762627 Mon Sep 17 00:00:00 2001 From: voje Date: Sun, 7 Apr 2019 21:43:42 +0200 Subject: [PATCH] senses working --- src/backend_flask/app.py | 9 +++-- src/frontend_vue/src/components/MainDispl.vue | 3 +- src/pkg/valency/valency/Frame.py | 2 +- src/pkg/valency/valency/reduce_functions.py | 33 +++++++++++++------ 4 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/backend_flask/app.py b/src/backend_flask/app.py index f4d2c4b..bdd6ebc 100644 --- a/src/backend_flask/app.py +++ b/src/backend_flask/app.py @@ -244,7 +244,7 @@ def prepare_frames(ret_frames): # sid, tid==hw for sid, tid in unique_sids.items(): # hwl = vallex.get_token(tid)["lemma"] - hwl = "derp" + hwl = frame.hw tmp_idx = len(frame.sentences) if hwl not in frame.aggr_sent: frame.aggr_sent[hwl] = [] @@ -273,15 +273,14 @@ def api_get_frames(): # entry = vallex.entries[hw] # TODO hw -> [Frame,] cur = valdb[corpus].find({"headwords": hw}) - print("N_results: " + str(cur.count())) frames = [] for ent in cur: # TODO: maybe filter by hw? - frames += frames_from_db_entry(ent) + frames += frames_from_db_entry(ent) # pre-process this step for prod TODO - # return json.dumps([x.to_json() for x in frames]) + frames = [x for x in frames if x.hw == hw] - ret_frames = RF(frames, None) + ret_frames = RF(frames, valdb[SENSEMAP_COLL]) return prepare_frames(ret_frames) diff --git a/src/frontend_vue/src/components/MainDispl.vue b/src/frontend_vue/src/components/MainDispl.vue index 7135b09..2804fef 100644 --- a/src/frontend_vue/src/components/MainDispl.vue +++ b/src/frontend_vue/src/components/MainDispl.vue @@ -169,6 +169,7 @@ export default { } this.sentences = {} for (var fi in this.frames) { + console.log(this.frames[fi].sentences) for (var si in this.frames[fi].sentences) { var sentence = this.frames[fi].sentences[si] // get ssj_id without .t123 @@ -184,7 +185,7 @@ export default { } */ for (var i in sentence) { - words.push(sentence[i][1].word) + words.push(sentence[i][1].text) if (sentence[i][1].lemma === tmp_hw && hw_idx == -1) { hw_idx = i } diff --git a/src/pkg/valency/valency/Frame.py b/src/pkg/valency/valency/Frame.py index 970318f..353ca5a 100644 --- a/src/pkg/valency/valency/Frame.py +++ b/src/pkg/valency/valency/Frame.py @@ -20,7 +20,7 @@ def frames_from_db_entry(dbent): srldict[key] += [srl] for hwtid, srlarr in srldict.items(): frames += [Frame( - hw_lemma=token_dict[hwtid]["text"], + hw_lemma=token_dict[hwtid]["lemma"], tids=[_full_tid(hwtid)], slots=[ Slot( diff --git a/src/pkg/valency/valency/reduce_functions.py b/src/pkg/valency/valency/reduce_functions.py index 846a7ac..34853e2 100644 --- a/src/pkg/valency/valency/reduce_functions.py +++ b/src/pkg/valency/valency/reduce_functions.py @@ -22,7 +22,7 @@ def sorted_by_len_tids(frames): ) -def reduce_0(frames, vallex=None): +def reduce_0(frames, valdb_sensemap=None): # new request... frames should be sorded by # functors list (basically reduce_1, just each # sentence gets its own frame) @@ -43,7 +43,7 @@ def reduce_0(frames, vallex=None): return sorted_sep_frames -def reduce_1(frames, vallex=None): +def reduce_1(frames, valdb_sensemap=None): # Combine frames with the same set of functors. # The order of functors is not important. frame_sets = [] # [set of functors, list of frames] @@ -76,12 +76,15 @@ def reduce_1(frames, vallex=None): slots_list = [] for k, e in slots.items(): slots_list.append(e) - rf = Frame(tids=tids, slots=slots_list, sentences=sentences) + # TODO does appending hw_lemma of first frame work for functor frames too? + rf = Frame(hw_lemma=fs[1][0].hw, tids=tids, slots=slots_list, sentences=sentences) rf.sort_slots() ret_frames.append(rf) return sorted_by_len_tids(ret_frames) +# deprecated +""" def reduce_3(raw_frames, vallex): # sskj simple lesk ids ssj_ids = [frame.tids[0] for frame in raw_frames] @@ -94,8 +97,11 @@ def reduce_3(raw_frames, vallex): "sense_desc": entry.get("sense_desc") }}) return frames_from_sense_ids(raw_frames, id_map) +""" +#deprecated +""" def reduce_4(raw_frames, vallex): # kmeans ids ssj_ids = [frame.tids[0] for frame in raw_frames] @@ -107,15 +113,15 @@ def reduce_4(raw_frames, vallex): "sense_id": entry["sense_id"] }}) return frames_from_sense_ids(raw_frames, id_map) +""" -def reduce_5(raw_frames, vallex): - USER_SENSE_COLL = "v2_sense_map" +def reduce_5(raw_frames, valdb_sensemap): headword = raw_frames[0].hw ssj_ids_full = [frame.tids[0] for frame in raw_frames] # v2_sense_map stores only sentence half of ssj_id ssj_ids = [".".join(ssj_id.split(".")[:-1]) for ssj_id in ssj_ids_full] - db_results = list(vallex.db[USER_SENSE_COLL].find({ + db_results = list(valdb_sensemap.find({ "ssj_id": {"$in": ssj_ids}, "hw": headword, })) @@ -173,8 +179,10 @@ def frames_from_sense_ids(raw_frames, id_map): for sense_id, frames in id_dict.items(): tids = [] reduced_slots = [] + sentences = [] for frame in frames: - tids.extend(frame.tids) + tids += frame.tids + sentences += frame.sentences for slot in frame.slots: # if functor not in reduced slots, # add new slot; else increase count @@ -190,7 +198,12 @@ def frames_from_sense_ids(raw_frames, id_map): tids=slot.tids, count=1 )) - reduced_frame = Frame(tids, slots=reduced_slots) + reduced_frame = Frame( + hw_lemma="derp", + tids=tids, + slots=reduced_slots, + sentences=sentences + ) id_map_entry = ( id_map.get(tids[0]) or id_map.get(".".join(tids[0].split(".")[:-1])) @@ -222,14 +235,14 @@ reduce_functions = { "simple_name": "združeni stavki" }, "reduce_3": { - "f": reduce_3, + "f": None, # deprecated "desc": "Združevanje stavčnih vzorcev na osnovi pomenov povedi v SSKJ. " "Pomeni so dodeljeni s pomočjo algoritma Simple Lesk.", "simple_name": "SSKJ_pomeni" }, "reduce_4": { - "f": reduce_4, + "f": None, # deprecated "desc": "Združevanje stavčnih vzorcev na osnovi pomenov povedi " "s pomočjo algoritma K-Means. Število predvidenih pomenov "