senses working

This commit is contained in:
voje 2019-04-07 21:43:42 +02:00
parent e4730c40e1
commit 40f6aea2e7
4 changed files with 30 additions and 17 deletions

View File

@ -244,7 +244,7 @@ def prepare_frames(ret_frames):
# sid, tid==hw # sid, tid==hw
for sid, tid in unique_sids.items(): for sid, tid in unique_sids.items():
# hwl = vallex.get_token(tid)["lemma"] # hwl = vallex.get_token(tid)["lemma"]
hwl = "derp" hwl = frame.hw
tmp_idx = len(frame.sentences) tmp_idx = len(frame.sentences)
if hwl not in frame.aggr_sent: if hwl not in frame.aggr_sent:
frame.aggr_sent[hwl] = [] frame.aggr_sent[hwl] = []
@ -273,15 +273,14 @@ def api_get_frames():
# entry = vallex.entries[hw] # TODO hw -> [Frame,] # entry = vallex.entries[hw] # TODO hw -> [Frame,]
cur = valdb[corpus].find({"headwords": hw}) cur = valdb[corpus].find({"headwords": hw})
print("N_results: " + str(cur.count()))
frames = [] frames = []
for ent in cur: for ent in cur:
# TODO: maybe filter by hw? # TODO: maybe filter by hw?
frames += frames_from_db_entry(ent) frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
# return json.dumps([x.to_json() for x in frames]) frames = [x for x in frames if x.hw == hw]
ret_frames = RF(frames, None) ret_frames = RF(frames, valdb[SENSEMAP_COLL])
return prepare_frames(ret_frames) return prepare_frames(ret_frames)

View File

@ -169,6 +169,7 @@ export default {
} }
this.sentences = {} this.sentences = {}
for (var fi in this.frames) { for (var fi in this.frames) {
console.log(this.frames[fi].sentences)
for (var si in this.frames[fi].sentences) { for (var si in this.frames[fi].sentences) {
var sentence = this.frames[fi].sentences[si] var sentence = this.frames[fi].sentences[si]
// get ssj_id without .t123 // get ssj_id without .t123
@ -184,7 +185,7 @@ export default {
} }
*/ */
for (var i in sentence) { for (var i in sentence) {
words.push(sentence[i][1].word) words.push(sentence[i][1].text)
if (sentence[i][1].lemma === tmp_hw && hw_idx == -1) { if (sentence[i][1].lemma === tmp_hw && hw_idx == -1) {
hw_idx = i hw_idx = i
} }

View File

@ -20,7 +20,7 @@ def frames_from_db_entry(dbent):
srldict[key] += [srl] srldict[key] += [srl]
for hwtid, srlarr in srldict.items(): for hwtid, srlarr in srldict.items():
frames += [Frame( frames += [Frame(
hw_lemma=token_dict[hwtid]["text"], hw_lemma=token_dict[hwtid]["lemma"],
tids=[_full_tid(hwtid)], tids=[_full_tid(hwtid)],
slots=[ slots=[
Slot( Slot(

View File

@ -22,7 +22,7 @@ def sorted_by_len_tids(frames):
) )
def reduce_0(frames, vallex=None): def reduce_0(frames, valdb_sensemap=None):
# new request... frames should be sorded by # new request... frames should be sorded by
# functors list (basically reduce_1, just each # functors list (basically reduce_1, just each
# sentence gets its own frame) # sentence gets its own frame)
@ -43,7 +43,7 @@ def reduce_0(frames, vallex=None):
return sorted_sep_frames return sorted_sep_frames
def reduce_1(frames, vallex=None): def reduce_1(frames, valdb_sensemap=None):
# Combine frames with the same set of functors. # Combine frames with the same set of functors.
# The order of functors is not important. # The order of functors is not important.
frame_sets = [] # [set of functors, list of frames] frame_sets = [] # [set of functors, list of frames]
@ -76,12 +76,15 @@ def reduce_1(frames, vallex=None):
slots_list = [] slots_list = []
for k, e in slots.items(): for k, e in slots.items():
slots_list.append(e) slots_list.append(e)
rf = Frame(tids=tids, slots=slots_list, sentences=sentences) # TODO does appending hw_lemma of first frame work for functor frames too?
rf = Frame(hw_lemma=fs[1][0].hw, tids=tids, slots=slots_list, sentences=sentences)
rf.sort_slots() rf.sort_slots()
ret_frames.append(rf) ret_frames.append(rf)
return sorted_by_len_tids(ret_frames) return sorted_by_len_tids(ret_frames)
# deprecated
"""
def reduce_3(raw_frames, vallex): def reduce_3(raw_frames, vallex):
# sskj simple lesk ids # sskj simple lesk ids
ssj_ids = [frame.tids[0] for frame in raw_frames] ssj_ids = [frame.tids[0] for frame in raw_frames]
@ -94,8 +97,11 @@ def reduce_3(raw_frames, vallex):
"sense_desc": entry.get("sense_desc") "sense_desc": entry.get("sense_desc")
}}) }})
return frames_from_sense_ids(raw_frames, id_map) return frames_from_sense_ids(raw_frames, id_map)
"""
#deprecated
"""
def reduce_4(raw_frames, vallex): def reduce_4(raw_frames, vallex):
# kmeans ids # kmeans ids
ssj_ids = [frame.tids[0] for frame in raw_frames] ssj_ids = [frame.tids[0] for frame in raw_frames]
@ -107,15 +113,15 @@ def reduce_4(raw_frames, vallex):
"sense_id": entry["sense_id"] "sense_id": entry["sense_id"]
}}) }})
return frames_from_sense_ids(raw_frames, id_map) return frames_from_sense_ids(raw_frames, id_map)
"""
def reduce_5(raw_frames, vallex): def reduce_5(raw_frames, valdb_sensemap):
USER_SENSE_COLL = "v2_sense_map"
headword = raw_frames[0].hw headword = raw_frames[0].hw
ssj_ids_full = [frame.tids[0] for frame in raw_frames] ssj_ids_full = [frame.tids[0] for frame in raw_frames]
# v2_sense_map stores only sentence half of ssj_id # v2_sense_map stores only sentence half of ssj_id
ssj_ids = [".".join(ssj_id.split(".")[:-1]) for ssj_id in ssj_ids_full] ssj_ids = [".".join(ssj_id.split(".")[:-1]) for ssj_id in ssj_ids_full]
db_results = list(vallex.db[USER_SENSE_COLL].find({ db_results = list(valdb_sensemap.find({
"ssj_id": {"$in": ssj_ids}, "ssj_id": {"$in": ssj_ids},
"hw": headword, "hw": headword,
})) }))
@ -173,8 +179,10 @@ def frames_from_sense_ids(raw_frames, id_map):
for sense_id, frames in id_dict.items(): for sense_id, frames in id_dict.items():
tids = [] tids = []
reduced_slots = [] reduced_slots = []
sentences = []
for frame in frames: for frame in frames:
tids.extend(frame.tids) tids += frame.tids
sentences += frame.sentences
for slot in frame.slots: for slot in frame.slots:
# if functor not in reduced slots, # if functor not in reduced slots,
# add new slot; else increase count # add new slot; else increase count
@ -190,7 +198,12 @@ def frames_from_sense_ids(raw_frames, id_map):
tids=slot.tids, tids=slot.tids,
count=1 count=1
)) ))
reduced_frame = Frame(tids, slots=reduced_slots) reduced_frame = Frame(
hw_lemma="derp",
tids=tids,
slots=reduced_slots,
sentences=sentences
)
id_map_entry = ( id_map_entry = (
id_map.get(tids[0]) or id_map.get(tids[0]) or
id_map.get(".".join(tids[0].split(".")[:-1])) id_map.get(".".join(tids[0].split(".")[:-1]))
@ -222,14 +235,14 @@ reduce_functions = {
"simple_name": "združeni stavki" "simple_name": "združeni stavki"
}, },
"reduce_3": { "reduce_3": {
"f": reduce_3, "f": None, # deprecated
"desc": "desc":
"Združevanje stavčnih vzorcev na osnovi pomenov povedi v SSKJ. " "Združevanje stavčnih vzorcev na osnovi pomenov povedi v SSKJ. "
"Pomeni so dodeljeni s pomočjo algoritma Simple Lesk.", "Pomeni so dodeljeni s pomočjo algoritma Simple Lesk.",
"simple_name": "SSKJ_pomeni" "simple_name": "SSKJ_pomeni"
}, },
"reduce_4": { "reduce_4": {
"f": reduce_4, "f": None, # deprecated
"desc": "desc":
"Združevanje stavčnih vzorcev na osnovi pomenov povedi " "Združevanje stavčnih vzorcev na osnovi pomenov povedi "
"s pomočjo algoritma K-Means. Število predvidenih pomenov " "s pomočjo algoritma K-Means. Število predvidenih pomenov "