diff --git a/Makefile b/Makefile index 566fd01..5096bf8 100644 --- a/Makefile +++ b/Makefile @@ -10,8 +10,8 @@ MAKE_ROOT = $(shell pwd) # (for each kres.xml file there is a kres.json file with srl tags) SSJ_FILE = "$(MAKE_ROOT)/data/samples/ssj_example/ssj500k-sl.body.sample.xml" KRES_FOLDER = "$(MAKE_ROOT)/data/samples/kres_example" -# KRES_SRL_FOLDER = "/home/kristjan/kres_srl/final_json/" # t420 -KRES_SRL_FOLDER = "/home/voje/work_data/final_json" # work-pc +KRES_SRL_FOLDER = "/home/kristjan/kres_srl/final_json/" # t420 +# KRES_SRL_FOLDER = "/home/voje/work_data/final_json" # work-pc OUTPUT = "db" OUTDIR = "/home/voje/workdir/test_out" diff --git a/README.md b/README.md index df65013..ea9b10a 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ $ make python-env $ make python-env-install # run the code +# beforehand, set the data files in Makefile $ make fill-database ``` @@ -49,7 +50,10 @@ Relies heavily on the database. Set that up first. $ make python-env # development: -$ make backend-dev-init # run the first time, to prepare the db, then kill +# run the first time, to prepare the db, then kill +# it runs a few minutes, there should be a new collection in the db when finished +$ make backend-dev-init + $ make backend-dev # debug with this one # production diff --git a/src/backend_flask/app.py b/src/backend_flask/app.py index fb05a2c..62024d0 100644 --- a/src/backend_flask/app.py +++ b/src/backend_flask/app.py @@ -235,28 +235,6 @@ def api_token(): # FRAMES ----------------------------. -def prepare_frames(ret_frames): - # append sentences - for frame in ret_frames: - unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids} - # frame.sentences = [] - frame.aggr_sent = {} - # sid, tid==hw - for sid, tid in unique_sids.items(): - # hwl = vallex.get_token(tid)["lemma"] - hwl = frame.hw - tmp_idx = len(frame.sentences) - if hwl not in frame.aggr_sent: - frame.aggr_sent[hwl] = [] - frame.aggr_sent[hwl].append(tmp_idx) - # return (n-frames, rendered template) - # json frames - json_ret = {"frames": []} - for frame in ret_frames: - json_ret["frames"].append(DC(frame.to_json())) - return json.dumps(json_ret) - - # input: hw, reduct_function @app.route("/api/frames") def api_get_frames(): @@ -280,8 +258,34 @@ def api_get_frames(): frames = [x for x in frames if x.hw == hw] ret_frames = RF(frames, valdb[SENSEMAP_COLL]) - return prepare_frames(ret_frames) + json_ret = {"frames": []} + for frame in ret_frames: + json_ret["frames"].append(frame.to_json()) + return json.dumps(json_ret) + # return prepare_frames(ret_frames) + +def _aggregate_by_hw(ret_frames): + + def _tid_to_lemma(tid, sentence): + # slow and hackish + for pair in sentence: + if pair[0] == tid: + return pair[1]["lemma"] + return None + + # append sentences + for frame in ret_frames: + # unique_sids = {".".join(x.split(".")[:-1]): x for x in frame.tids} + frame.aggr_sent = {} # map of headword: [sentence indexes] + # sid, tid==hw + for i, tid in enumerate(frame.tids): + # hwl = vallex.get_token(tid)["lemma"] + hwl = _tid_to_lemma(tid, frame.sentences[i]) + if hwl not in frame.aggr_sent: + frame.aggr_sent[hwl] = [] + frame.aggr_sent[hwl].append(i) + return ret_frames # input: functor, reduce_function @app.route("/api/functor-frames") @@ -302,12 +306,17 @@ def api_get_functor_frames(): for ent in cur: frames += frames_from_db_entry(ent) # pre-process this step for prod TODO - for f in frames: - print(f.to_json()) + # filter by relevant functor + frames = [x for x in frames if functor in x.get_functors()] # raw_frames = vallex.functors_index[functor] # TODO ret_frames = RF(frames, valdb[SENSEMAP_COLL]) - return prepare_frames(ret_frames) + ret_frames = _aggregate_by_hw(ret_frames) + + json_ret = {"frames": []} + for frame in ret_frames: + json_ret["frames"].append(DC(frame.to_json())) + return json.dumps(json_ret) # FRAMES ----------------------------^ diff --git a/src/frontend_vue/package-lock.json b/src/frontend_vue/package-lock.json index a2a9f2f..aedaf92 100644 --- a/src/frontend_vue/package-lock.json +++ b/src/frontend_vue/package-lock.json @@ -3632,12 +3632,14 @@ "balanced-match": { "version": "1.0.0", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "brace-expansion": { "version": "1.1.11", "bundled": true, "dev": true, + "optional": true, "requires": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" @@ -3652,17 +3654,20 @@ "code-point-at": { "version": "1.1.0", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "concat-map": { "version": "0.0.1", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "console-control-strings": { "version": "1.1.0", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "core-util-is": { "version": "1.0.2", @@ -3779,7 +3784,8 @@ "inherits": { "version": "2.0.3", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "ini": { "version": "1.3.5", @@ -3791,6 +3797,7 @@ "version": "1.0.0", "bundled": true, "dev": true, + "optional": true, "requires": { "number-is-nan": "^1.0.0" } @@ -3805,6 +3812,7 @@ "version": "3.0.4", "bundled": true, "dev": true, + "optional": true, "requires": { "brace-expansion": "^1.1.7" } @@ -3812,12 +3820,14 @@ "minimist": { "version": "0.0.8", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "minipass": { "version": "2.2.4", "bundled": true, "dev": true, + "optional": true, "requires": { "safe-buffer": "^5.1.1", "yallist": "^3.0.0" @@ -3836,6 +3846,7 @@ "version": "0.5.1", "bundled": true, "dev": true, + "optional": true, "requires": { "minimist": "0.0.8" } @@ -3916,7 +3927,8 @@ "number-is-nan": { "version": "1.0.1", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "object-assign": { "version": "4.1.1", @@ -3928,6 +3940,7 @@ "version": "1.4.0", "bundled": true, "dev": true, + "optional": true, "requires": { "wrappy": "1" } @@ -4049,6 +4062,7 @@ "version": "1.0.2", "bundled": true, "dev": true, + "optional": true, "requires": { "code-point-at": "^1.0.0", "is-fullwidth-code-point": "^1.0.0", diff --git a/src/frontend_vue/src/components/MainDispl.vue b/src/frontend_vue/src/components/MainDispl.vue index 2804fef..d007925 100644 --- a/src/frontend_vue/src/components/MainDispl.vue +++ b/src/frontend_vue/src/components/MainDispl.vue @@ -169,7 +169,7 @@ export default { } this.sentences = {} for (var fi in this.frames) { - console.log(this.frames[fi].sentences) + // console.log(this.frames[fi].sentences) for (var si in this.frames[fi].sentences) { var sentence = this.frames[fi].sentences[si] // get ssj_id without .t123 diff --git a/src/pkg/valency/valency/Frame.py b/src/pkg/valency/valency/Frame.py index e02fe9f..26e099b 100644 --- a/src/pkg/valency/valency/Frame.py +++ b/src/pkg/valency/valency/Frame.py @@ -50,6 +50,10 @@ class Frame(): self.sentences = sentences self.aggr_sent = None # Dictionary { hw: self.sentences idx } + def get_functors(self): + return [slot.functor for slot in self.slots] + + def to_json(self): ret = { "hw": self.hw,