forked from kristjan/cjvt-valency
senses working
This commit is contained in:
parent
e4730c40e1
commit
40f6aea2e7
|
@ -244,7 +244,7 @@ def prepare_frames(ret_frames):
|
||||||
# sid, tid==hw
|
# sid, tid==hw
|
||||||
for sid, tid in unique_sids.items():
|
for sid, tid in unique_sids.items():
|
||||||
# hwl = vallex.get_token(tid)["lemma"]
|
# hwl = vallex.get_token(tid)["lemma"]
|
||||||
hwl = "derp"
|
hwl = frame.hw
|
||||||
tmp_idx = len(frame.sentences)
|
tmp_idx = len(frame.sentences)
|
||||||
if hwl not in frame.aggr_sent:
|
if hwl not in frame.aggr_sent:
|
||||||
frame.aggr_sent[hwl] = []
|
frame.aggr_sent[hwl] = []
|
||||||
|
@ -273,15 +273,14 @@ def api_get_frames():
|
||||||
|
|
||||||
# entry = vallex.entries[hw] # TODO hw -> [Frame,]
|
# entry = vallex.entries[hw] # TODO hw -> [Frame,]
|
||||||
cur = valdb[corpus].find({"headwords": hw})
|
cur = valdb[corpus].find({"headwords": hw})
|
||||||
print("N_results: " + str(cur.count()))
|
|
||||||
frames = []
|
frames = []
|
||||||
for ent in cur:
|
for ent in cur:
|
||||||
# TODO: maybe filter by hw?
|
# TODO: maybe filter by hw?
|
||||||
frames += frames_from_db_entry(ent)
|
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
|
||||||
|
|
||||||
# return json.dumps([x.to_json() for x in frames])
|
frames = [x for x in frames if x.hw == hw]
|
||||||
|
|
||||||
ret_frames = RF(frames, None)
|
ret_frames = RF(frames, valdb[SENSEMAP_COLL])
|
||||||
return prepare_frames(ret_frames)
|
return prepare_frames(ret_frames)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -169,6 +169,7 @@ export default {
|
||||||
}
|
}
|
||||||
this.sentences = {}
|
this.sentences = {}
|
||||||
for (var fi in this.frames) {
|
for (var fi in this.frames) {
|
||||||
|
console.log(this.frames[fi].sentences)
|
||||||
for (var si in this.frames[fi].sentences) {
|
for (var si in this.frames[fi].sentences) {
|
||||||
var sentence = this.frames[fi].sentences[si]
|
var sentence = this.frames[fi].sentences[si]
|
||||||
// get ssj_id without .t123
|
// get ssj_id without .t123
|
||||||
|
@ -184,7 +185,7 @@ export default {
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
for (var i in sentence) {
|
for (var i in sentence) {
|
||||||
words.push(sentence[i][1].word)
|
words.push(sentence[i][1].text)
|
||||||
if (sentence[i][1].lemma === tmp_hw && hw_idx == -1) {
|
if (sentence[i][1].lemma === tmp_hw && hw_idx == -1) {
|
||||||
hw_idx = i
|
hw_idx = i
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,7 @@ def frames_from_db_entry(dbent):
|
||||||
srldict[key] += [srl]
|
srldict[key] += [srl]
|
||||||
for hwtid, srlarr in srldict.items():
|
for hwtid, srlarr in srldict.items():
|
||||||
frames += [Frame(
|
frames += [Frame(
|
||||||
hw_lemma=token_dict[hwtid]["text"],
|
hw_lemma=token_dict[hwtid]["lemma"],
|
||||||
tids=[_full_tid(hwtid)],
|
tids=[_full_tid(hwtid)],
|
||||||
slots=[
|
slots=[
|
||||||
Slot(
|
Slot(
|
||||||
|
|
|
@ -22,7 +22,7 @@ def sorted_by_len_tids(frames):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def reduce_0(frames, vallex=None):
|
def reduce_0(frames, valdb_sensemap=None):
|
||||||
# new request... frames should be sorded by
|
# new request... frames should be sorded by
|
||||||
# functors list (basically reduce_1, just each
|
# functors list (basically reduce_1, just each
|
||||||
# sentence gets its own frame)
|
# sentence gets its own frame)
|
||||||
|
@ -43,7 +43,7 @@ def reduce_0(frames, vallex=None):
|
||||||
return sorted_sep_frames
|
return sorted_sep_frames
|
||||||
|
|
||||||
|
|
||||||
def reduce_1(frames, vallex=None):
|
def reduce_1(frames, valdb_sensemap=None):
|
||||||
# Combine frames with the same set of functors.
|
# Combine frames with the same set of functors.
|
||||||
# The order of functors is not important.
|
# The order of functors is not important.
|
||||||
frame_sets = [] # [set of functors, list of frames]
|
frame_sets = [] # [set of functors, list of frames]
|
||||||
|
@ -76,12 +76,15 @@ def reduce_1(frames, vallex=None):
|
||||||
slots_list = []
|
slots_list = []
|
||||||
for k, e in slots.items():
|
for k, e in slots.items():
|
||||||
slots_list.append(e)
|
slots_list.append(e)
|
||||||
rf = Frame(tids=tids, slots=slots_list, sentences=sentences)
|
# TODO does appending hw_lemma of first frame work for functor frames too?
|
||||||
|
rf = Frame(hw_lemma=fs[1][0].hw, tids=tids, slots=slots_list, sentences=sentences)
|
||||||
rf.sort_slots()
|
rf.sort_slots()
|
||||||
ret_frames.append(rf)
|
ret_frames.append(rf)
|
||||||
return sorted_by_len_tids(ret_frames)
|
return sorted_by_len_tids(ret_frames)
|
||||||
|
|
||||||
|
|
||||||
|
# deprecated
|
||||||
|
"""
|
||||||
def reduce_3(raw_frames, vallex):
|
def reduce_3(raw_frames, vallex):
|
||||||
# sskj simple lesk ids
|
# sskj simple lesk ids
|
||||||
ssj_ids = [frame.tids[0] for frame in raw_frames]
|
ssj_ids = [frame.tids[0] for frame in raw_frames]
|
||||||
|
@ -94,8 +97,11 @@ def reduce_3(raw_frames, vallex):
|
||||||
"sense_desc": entry.get("sense_desc")
|
"sense_desc": entry.get("sense_desc")
|
||||||
}})
|
}})
|
||||||
return frames_from_sense_ids(raw_frames, id_map)
|
return frames_from_sense_ids(raw_frames, id_map)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
#deprecated
|
||||||
|
"""
|
||||||
def reduce_4(raw_frames, vallex):
|
def reduce_4(raw_frames, vallex):
|
||||||
# kmeans ids
|
# kmeans ids
|
||||||
ssj_ids = [frame.tids[0] for frame in raw_frames]
|
ssj_ids = [frame.tids[0] for frame in raw_frames]
|
||||||
|
@ -107,15 +113,15 @@ def reduce_4(raw_frames, vallex):
|
||||||
"sense_id": entry["sense_id"]
|
"sense_id": entry["sense_id"]
|
||||||
}})
|
}})
|
||||||
return frames_from_sense_ids(raw_frames, id_map)
|
return frames_from_sense_ids(raw_frames, id_map)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
def reduce_5(raw_frames, vallex):
|
def reduce_5(raw_frames, valdb_sensemap):
|
||||||
USER_SENSE_COLL = "v2_sense_map"
|
|
||||||
headword = raw_frames[0].hw
|
headword = raw_frames[0].hw
|
||||||
ssj_ids_full = [frame.tids[0] for frame in raw_frames]
|
ssj_ids_full = [frame.tids[0] for frame in raw_frames]
|
||||||
# v2_sense_map stores only sentence half of ssj_id
|
# v2_sense_map stores only sentence half of ssj_id
|
||||||
ssj_ids = [".".join(ssj_id.split(".")[:-1]) for ssj_id in ssj_ids_full]
|
ssj_ids = [".".join(ssj_id.split(".")[:-1]) for ssj_id in ssj_ids_full]
|
||||||
db_results = list(vallex.db[USER_SENSE_COLL].find({
|
db_results = list(valdb_sensemap.find({
|
||||||
"ssj_id": {"$in": ssj_ids},
|
"ssj_id": {"$in": ssj_ids},
|
||||||
"hw": headword,
|
"hw": headword,
|
||||||
}))
|
}))
|
||||||
|
@ -173,8 +179,10 @@ def frames_from_sense_ids(raw_frames, id_map):
|
||||||
for sense_id, frames in id_dict.items():
|
for sense_id, frames in id_dict.items():
|
||||||
tids = []
|
tids = []
|
||||||
reduced_slots = []
|
reduced_slots = []
|
||||||
|
sentences = []
|
||||||
for frame in frames:
|
for frame in frames:
|
||||||
tids.extend(frame.tids)
|
tids += frame.tids
|
||||||
|
sentences += frame.sentences
|
||||||
for slot in frame.slots:
|
for slot in frame.slots:
|
||||||
# if functor not in reduced slots,
|
# if functor not in reduced slots,
|
||||||
# add new slot; else increase count
|
# add new slot; else increase count
|
||||||
|
@ -190,7 +198,12 @@ def frames_from_sense_ids(raw_frames, id_map):
|
||||||
tids=slot.tids,
|
tids=slot.tids,
|
||||||
count=1
|
count=1
|
||||||
))
|
))
|
||||||
reduced_frame = Frame(tids, slots=reduced_slots)
|
reduced_frame = Frame(
|
||||||
|
hw_lemma="derp",
|
||||||
|
tids=tids,
|
||||||
|
slots=reduced_slots,
|
||||||
|
sentences=sentences
|
||||||
|
)
|
||||||
id_map_entry = (
|
id_map_entry = (
|
||||||
id_map.get(tids[0]) or
|
id_map.get(tids[0]) or
|
||||||
id_map.get(".".join(tids[0].split(".")[:-1]))
|
id_map.get(".".join(tids[0].split(".")[:-1]))
|
||||||
|
@ -222,14 +235,14 @@ reduce_functions = {
|
||||||
"simple_name": "združeni stavki"
|
"simple_name": "združeni stavki"
|
||||||
},
|
},
|
||||||
"reduce_3": {
|
"reduce_3": {
|
||||||
"f": reduce_3,
|
"f": None, # deprecated
|
||||||
"desc":
|
"desc":
|
||||||
"Združevanje stavčnih vzorcev na osnovi pomenov povedi v SSKJ. "
|
"Združevanje stavčnih vzorcev na osnovi pomenov povedi v SSKJ. "
|
||||||
"Pomeni so dodeljeni s pomočjo algoritma Simple Lesk.",
|
"Pomeni so dodeljeni s pomočjo algoritma Simple Lesk.",
|
||||||
"simple_name": "SSKJ_pomeni"
|
"simple_name": "SSKJ_pomeni"
|
||||||
},
|
},
|
||||||
"reduce_4": {
|
"reduce_4": {
|
||||||
"f": reduce_4,
|
"f": None, # deprecated
|
||||||
"desc":
|
"desc":
|
||||||
"Združevanje stavčnih vzorcev na osnovi pomenov povedi "
|
"Združevanje stavčnih vzorcev na osnovi pomenov povedi "
|
||||||
"s pomočjo algoritma K-Means. Število predvidenih pomenov "
|
"s pomočjo algoritma K-Means. Število predvidenih pomenov "
|
||||||
|
|
Loading…
Reference in New Issue
Block a user