fixed functor-frames

This commit is contained in:
voje 2019-04-14 00:26:15 +02:00
parent 1b46ac9c22
commit 63041cc55b
6 changed files with 68 additions and 37 deletions

View File

@ -10,8 +10,8 @@ MAKE_ROOT = $(shell pwd)
# (for each kres.xml file there is a kres.json file with srl tags) # (for each kres.xml file there is a kres.json file with srl tags)
SSJ_FILE = "$(MAKE_ROOT)/data/samples/ssj_example/ssj500k-sl.body.sample.xml" SSJ_FILE = "$(MAKE_ROOT)/data/samples/ssj_example/ssj500k-sl.body.sample.xml"
KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_example" KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_example"
# KRES_SRL_FOLDER = "/home/kristjan/kres_srl/final_json/" # t420 KRES_SRL_FOLDER = "/home/kristjan/kres_srl/final_json/" # t420
KRES_SRL_FOLDER = "/home/voje/work_data/final_json" # work-pc # KRES_SRL_FOLDER = "/home/voje/work_data/final_json" # work-pc
OUTPUT = "db" OUTPUT = "db"
OUTDIR = "/home/voje/workdir/test_out" OUTDIR = "/home/voje/workdir/test_out"

View File

@ -37,6 +37,7 @@ $ make python-env
$ make python-env-install $ make python-env-install
# run the code # run the code
# beforehand, set the data files in Makefile
$ make fill-database $ make fill-database
``` ```
@ -49,7 +50,10 @@ Relies heavily on the database. Set that up first.
$ make python-env $ make python-env
# development: # development:
$ make backend-dev-init # run the first time, to prepare the db, then kill # run the first time, to prepare the db, then kill
# it runs a few minutes, there should be a new collection in the db when finished
$ make backend-dev-init
$ make backend-dev # debug with this one $ make backend-dev # debug with this one
# production # production

View File

@ -235,28 +235,6 @@ def api_token():
# FRAMES ----------------------------. # FRAMES ----------------------------.
def prepare_frames(ret_frames):
# append sentences
for frame in ret_frames:
unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids}
# frame.sentences = []
frame.aggr_sent = {}
# sid, tid==hw
for sid, tid in unique_sids.items():
# hwl = vallex.get_token(tid)["lemma"]
hwl = frame.hw
tmp_idx = len(frame.sentences)
if hwl not in frame.aggr_sent:
frame.aggr_sent[hwl] = []
frame.aggr_sent[hwl].append(tmp_idx)
# return (n-frames, rendered template)
# json frames
json_ret = {"frames": []}
for frame in ret_frames:
json_ret["frames"].append(DC(frame.to_json()))
return json.dumps(json_ret)
# input: hw, reduct_function # input: hw, reduct_function
@app.route("/api/frames") @app.route("/api/frames")
def api_get_frames(): def api_get_frames():
@ -280,8 +258,34 @@ def api_get_frames():
frames = [x for x in frames if x.hw == hw] frames = [x for x in frames if x.hw == hw]
ret_frames = RF(frames, valdb[SENSEMAP_COLL]) ret_frames = RF(frames, valdb[SENSEMAP_COLL])
return prepare_frames(ret_frames)
json_ret = {"frames": []}
for frame in ret_frames:
json_ret["frames"].append(frame.to_json())
return json.dumps(json_ret)
# return prepare_frames(ret_frames)
def _aggregate_by_hw(ret_frames):
def _tid_to_lemma(tid, sentence):
# slow and hackish
for pair in sentence:
if pair[0] == tid:
return pair[1]["lemma"]
return None
# append sentences
for frame in ret_frames:
# unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids}
frame.aggr_sent = {} # map of headword: [sentence indexes]
# sid, tid==hw
for i, tid in enumerate(frame.tids):
# hwl = vallex.get_token(tid)["lemma"]
hwl = _tid_to_lemma(tid, frame.sentences[i])
if hwl not in frame.aggr_sent:
frame.aggr_sent[hwl] = []
frame.aggr_sent[hwl].append(i)
return ret_frames
# input: functor, reduce_function # input: functor, reduce_function
@app.route("/api/functor-frames") @app.route("/api/functor-frames")
@ -302,12 +306,17 @@ def api_get_functor_frames():
for ent in cur: for ent in cur:
frames += frames_from_db_entry(ent) # pre-process this step for prod TODO frames += frames_from_db_entry(ent) # pre-process this step for prod TODO
for f in frames: # filter by relevant functor
print(f.to_json()) frames = [x for x in frames if functor in x.get_functors()]
# raw_frames = vallex.functors_index[functor] # TODO # raw_frames = vallex.functors_index[functor] # TODO
ret_frames = RF(frames, valdb[SENSEMAP_COLL]) ret_frames = RF(frames, valdb[SENSEMAP_COLL])
return prepare_frames(ret_frames) ret_frames = _aggregate_by_hw(ret_frames)
json_ret = {"frames": []}
for frame in ret_frames:
json_ret["frames"].append(DC(frame.to_json()))
return json.dumps(json_ret)
# FRAMES ----------------------------^ # FRAMES ----------------------------^

View File

@ -3632,12 +3632,14 @@
"balanced-match": { "balanced-match": {
"version": "1.0.0", "version": "1.0.0",
"bundled": true, "bundled": true,
"dev": true "dev": true,
"optional": true
}, },
"brace-expansion": { "brace-expansion": {
"version": "1.1.11", "version": "1.1.11",
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"optional": true,
"requires": { "requires": {
"balanced-match": "^1.0.0", "balanced-match": "^1.0.0",
"concat-map": "0.0.1" "concat-map": "0.0.1"
@ -3652,17 +3654,20 @@
"code-point-at": { "code-point-at": {
"version": "1.1.0", "version": "1.1.0",
"bundled": true, "bundled": true,
"dev": true "dev": true,
"optional": true
}, },
"concat-map": { "concat-map": {
"version": "0.0.1", "version": "0.0.1",
"bundled": true, "bundled": true,
"dev": true "dev": true,
"optional": true
}, },
"console-control-strings": { "console-control-strings": {
"version": "1.1.0", "version": "1.1.0",
"bundled": true, "bundled": true,
"dev": true "dev": true,
"optional": true
}, },
"core-util-is": { "core-util-is": {
"version": "1.0.2", "version": "1.0.2",
@ -3779,7 +3784,8 @@
"inherits": { "inherits": {
"version": "2.0.3", "version": "2.0.3",
"bundled": true, "bundled": true,
"dev": true "dev": true,
"optional": true
}, },
"ini": { "ini": {
"version": "1.3.5", "version": "1.3.5",
@ -3791,6 +3797,7 @@
"version": "1.0.0", "version": "1.0.0",
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"optional": true,
"requires": { "requires": {
"number-is-nan": "^1.0.0" "number-is-nan": "^1.0.0"
} }
@ -3805,6 +3812,7 @@
"version": "3.0.4", "version": "3.0.4",
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"optional": true,
"requires": { "requires": {
"brace-expansion": "^1.1.7" "brace-expansion": "^1.1.7"
} }
@ -3812,12 +3820,14 @@
"minimist": { "minimist": {
"version": "0.0.8", "version": "0.0.8",
"bundled": true, "bundled": true,
"dev": true "dev": true,
"optional": true
}, },
"minipass": { "minipass": {
"version": "2.2.4", "version": "2.2.4",
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"optional": true,
"requires": { "requires": {
"safe-buffer": "^5.1.1", "safe-buffer": "^5.1.1",
"yallist": "^3.0.0" "yallist": "^3.0.0"
@ -3836,6 +3846,7 @@
"version": "0.5.1", "version": "0.5.1",
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"optional": true,
"requires": { "requires": {
"minimist": "0.0.8" "minimist": "0.0.8"
} }
@ -3916,7 +3927,8 @@
"number-is-nan": { "number-is-nan": {
"version": "1.0.1", "version": "1.0.1",
"bundled": true, "bundled": true,
"dev": true "dev": true,
"optional": true
}, },
"object-assign": { "object-assign": {
"version": "4.1.1", "version": "4.1.1",
@ -3928,6 +3940,7 @@
"version": "1.4.0", "version": "1.4.0",
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"optional": true,
"requires": { "requires": {
"wrappy": "1" "wrappy": "1"
} }
@ -4049,6 +4062,7 @@
"version": "1.0.2", "version": "1.0.2",
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"optional": true,
"requires": { "requires": {
"code-point-at": "^1.0.0", "code-point-at": "^1.0.0",
"is-fullwidth-code-point": "^1.0.0", "is-fullwidth-code-point": "^1.0.0",

View File

@ -169,7 +169,7 @@ export default {
} }
this.sentences = {} this.sentences = {}
for (var fi in this.frames) { for (var fi in this.frames) {
console.log(this.frames[fi].sentences) // console.log(this.frames[fi].sentences)
for (var si in this.frames[fi].sentences) { for (var si in this.frames[fi].sentences) {
var sentence = this.frames[fi].sentences[si] var sentence = this.frames[fi].sentences[si]
// get ssj_id without .t123 // get ssj_id without .t123

View File

@ -50,6 +50,10 @@ class Frame():
self.sentences = sentences self.sentences = sentences
self.aggr_sent = None # Dictionary { hw: self.sentences idx } self.aggr_sent = None # Dictionary { hw: self.sentences idx }
def get_functors(self):
return [slot.functor for slot in self.slots]
def to_json(self): def to_json(self):
ret = { ret = {
"hw": self.hw, "hw": self.hw,