senses working

This commit is contained in:
voje 2019-04-07 21:43:42 +02:00
parent e4730c40e1
commit 40f6aea2e7
4 changed files with 30 additions and 17 deletions

View File

@ -244,7 +244,7 @@ def prepare_frames(ret_frames):
# sid, tid==hw
for sid, tid in unique_sids.items():
# hwl = vallex.get_token(tid)["lemma"]
hwl = "derp"
hwl = frame.hw
tmp_idx = len(frame.sentences)
if hwl not in frame.aggr_sent:
frame.aggr_sent[hwl] = []
@ -273,15 +273,14 @@ def api_get_frames():
# entry = vallex.entries[hw] # TODO hw -> [Frame,]
cur = valdb[corpus].find({"headwords": hw})
print("N_results: " + str(cur.count()))
frames = []
for ent in cur:
# TODO: maybe filter by hw?
frames += frames_from_db_entry(ent)
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
# return json.dumps([x.to_json() for x in frames])
frames = [x for x in frames if x.hw == hw]
ret_frames = RF(frames, None)
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
return prepare_frames(ret_frames)

View File

@ -169,6 +169,7 @@ export default {
}
this.sentences = {}
for (var fi in this.frames) {
console.log(this.frames[fi].sentences)
for (var si in this.frames[fi].sentences) {
var sentence = this.frames[fi].sentences[si]
// get ssj_id without .t123
@ -184,7 +185,7 @@ export default {
}
*/
for (var i in sentence) {
words.push(sentence[i][1].word)
words.push(sentence[i][1].text)
if (sentence[i][1].lemma === tmp_hw && hw_idx == -1) {
hw_idx = i
}

View File

@ -20,7 +20,7 @@ def frames_from_db_entry(dbent):
srldict[key] += [srl]
for hwtid, srlarr in srldict.items():
frames += [Frame(
hw_lemma=token_dict[hwtid]["text"],
hw_lemma=token_dict[hwtid]["lemma"],
tids=[_full_tid(hwtid)],
slots=[
Slot(

View File

@ -22,7 +22,7 @@ def sorted_by_len_tids(frames):
)
def reduce_0(frames, vallex=None):
def reduce_0(frames, valdb_sensemap=None):
# new request... frames should be sorded by
# functors list (basically reduce_1, just each
# sentence gets its own frame)
@ -43,7 +43,7 @@ def reduce_0(frames, vallex=None):
return sorted_sep_frames
def reduce_1(frames, vallex=None):
def reduce_1(frames, valdb_sensemap=None):
# Combine frames with the same set of functors.
# The order of functors is not important.
frame_sets = [] # [set of functors, list of frames]
@ -76,12 +76,15 @@ def reduce_1(frames, vallex=None):
slots_list = []
for k, e in slots.items():
slots_list.append(e)
rf = Frame(tids=tids, slots=slots_list, sentences=sentences)
# TODO does appending hw_lemma of first frame work for functor frames too?
rf = Frame(hw_lemma=fs[1][0].hw, tids=tids, slots=slots_list, sentences=sentences)
rf.sort_slots()
ret_frames.append(rf)
return sorted_by_len_tids(ret_frames)
# deprecated
"""
def reduce_3(raw_frames, vallex):
# sskj simple lesk ids
ssj_ids = [frame.tids[0] for frame in raw_frames]
@ -94,8 +97,11 @@ def reduce_3(raw_frames, vallex):
"sense_desc": entry.get("sense_desc")
}})
return frames_from_sense_ids(raw_frames, id_map)
"""
#deprecated
"""
def reduce_4(raw_frames, vallex):
# kmeans ids
ssj_ids = [frame.tids[0] for frame in raw_frames]
@ -107,15 +113,15 @@ def reduce_4(raw_frames, vallex):
"sense_id": entry["sense_id"]
}})
return frames_from_sense_ids(raw_frames, id_map)
"""
def reduce_5(raw_frames, vallex):
USER_SENSE_COLL = "v2_sense_map"
def reduce_5(raw_frames, valdb_sensemap):
headword = raw_frames[0].hw
ssj_ids_full = [frame.tids[0] for frame in raw_frames]
# v2_sense_map stores only sentence half of ssj_id
ssj_ids = [".".join(ssj_id.split(".")[:-1]) for ssj_id in ssj_ids_full]
db_results = list(vallex.db[USER_SENSE_COLL].find({
db_results = list(valdb_sensemap.find({
"ssj_id": {"$in": ssj_ids},
"hw": headword,
}))
@ -173,8 +179,10 @@ def frames_from_sense_ids(raw_frames, id_map):
for sense_id, frames in id_dict.items():
tids = []
reduced_slots = []
sentences = []
for frame in frames:
tids.extend(frame.tids)
tids += frame.tids
sentences += frame.sentences
for slot in frame.slots:
# if functor not in reduced slots,
# add new slot; else increase count
@ -190,7 +198,12 @@ def frames_from_sense_ids(raw_frames, id_map):
tids=slot.tids,
count=1
))
reduced_frame = Frame(tids, slots=reduced_slots)
reduced_frame = Frame(
hw_lemma="derp",
tids=tids,
slots=reduced_slots,
sentences=sentences
)
id_map_entry = (
id_map.get(tids[0]) or
id_map.get(".".join(tids[0].split(".")[:-1]))
@ -222,14 +235,14 @@ reduce_functions = {
"simple_name": "združeni stavki"
},
"reduce_3": {
"f": reduce_3,
"f": None, # deprecated
"desc":
"Združevanje stavčnih vzorcev na osnovi pomenov povedi v SSKJ. "
"Pomeni so dodeljeni s pomočjo algoritma Simple Lesk.",
"simple_name": "SSKJ_pomeni"
},
"reduce_4": {
"f": reduce_4,
"f": None, # deprecated
"desc":
"Združevanje stavčnih vzorcev na osnovi pomenov povedi "
"s pomočjo algoritma K-Means. Število predvidenih pomenov "