diff --git a/src/pkg/valency/valency/Frame.py b/src/pkg/valency/valency/Frame.py index dc1148e..89b7116 100644 --- a/src/pkg/valency/valency/Frame.py +++ b/src/pkg/valency/valency/Frame.py @@ -37,7 +37,8 @@ def frames_from_db_entry(dbent): return frames class Frame(): - def __init__(self, tids, deep_links=None, slots=None, hw_lemma=None, sentences=None): + def __init__(self, tids, deep_links=None, slots=None, + hw_lemma=None, sentences=None, sentence_count=None): self.hw = hw_lemma self.tids = tids # list of tokens with the same hw_lemma # Each tid = "S123.t123"; @@ -50,6 +51,8 @@ class Frame(): self.sense_info = {} self.sentences = sentences self.aggr_sent = None # Dictionary { hw: self.sentences idx } + self.sentence_count = sentence_count # paging, optimization + def get_functors(self): return [slot.functor for slot in self.slots] @@ -62,7 +65,8 @@ class Frame(): "slots": [slot.to_json() for slot in self.slots], "sentences": self.sentences, "aggr_sent": self.aggr_sent, - "sense_info": self.sense_info + "sense_info": self.sense_info, + "sentence_count": self.sentence_count } return ret diff --git a/src/pkg/valency/valency/frame.py b/src/pkg/valency/valency/frame.py deleted file mode 100644 index ea7c0c5..0000000 --- a/src/pkg/valency/valency/frame.py +++ /dev/null @@ -1,96 +0,0 @@ -import logging - -log = logging.getLogger(__name__) - - -class Frame(): - def __init__(self, tids, deep_links=None, slots=None, hw=None): - self.hw = hw - self.tids = tids # list of tokens with the same hw_lemma - # Each tid = "S123.t123"; - # you can get sentence with vallex.get_sentence(S123) - self.slots = [] - if slots is None: - self.slots = self.init_slots(deep_links) - else: - self.slots = slots - self.sense_info = {} - self.sentences = None # Used for passing to view in app.py, get_frames - self.aggr_sent = None # Dictionary { hw: self.sentences idx } - - def to_json(self): - ret = { - "hw": self.hw, - "tids": self.tids, - "slots": [slot.to_json() for slot in self.slots], - "sentences": self.sentences, - "aggr_sent": self.aggr_sent, - "sense_info": self.sense_info - } - return ret - - def init_slots(self, deep): - slots = [] - for link in deep: - slots.append(Slot( - functor=link["functor"], - tids=[link["to"]] - )) - return slots - - def sort_slots(self): - # ACT, PAT, alphabetically - srt1 = [ - x for x in self.slots - if (x.functor == "ACT" or - x.functor == "PAT") - ] - srt1 = sorted(srt1, key=lambda x: x.functor) - srt2 = [ - x for x in self.slots - if (x.functor != "ACT" and - x.functor != "PAT") - ] - srt2 = sorted(srt2, key=lambda x: x.functor) - self.slots = (srt1 + srt2) - - def to_string(self): - ret = "Frame:\n" - ret += "sense_info: {}\n".format(str(self.sense_info)) - ret += "tids: [" - for t in self.tids: - ret += (str(t) + ", ") - ret += "]\n" - if self.slots is not None: - ret += "slots:\n" - for sl in self.slots: - ret += (sl.to_string() + "\n") - return ret - - -class Slot(): - # Each slot is identified by its functor (ACT, PAT, ...) - # It consists of different tokens. - def __init__(self, functor, tids=None, count=None): - self.functor = functor - self.tids = tids or [] # combining multiple sentences vertically - self.count = count or 1 - - def to_string(self): - ret = "---- Slot:\n" - ret += "functor: {}\n".format(self.functor) - ret += "tids: [" - for t in self.tids: - ret += (str(t) + ", ") - ret += "]\n" - ret += "]\n" - ret += "----\n" - return ret - - def to_json(self): - ret = { - "functor": self.functor, - "tids": self.tids, - "count": self.count - } - return ret diff --git a/src/pkg/valency/valency/reduce_functions.py b/src/pkg/valency/valency/reduce_functions.py index 50bd5e1..bf718ef 100644 --- a/src/pkg/valency/valency/reduce_functions.py +++ b/src/pkg/valency/valency/reduce_functions.py @@ -9,6 +9,7 @@ import logging log = logging.getLogger(__name__) SENSE_UNDEFINED = "nedefinirano" +SENTENCE_LIMIT = 10 ## TIDI: use frame.py ## TODO: build a list of [Frame] with lists of [Slot] @@ -70,7 +71,10 @@ def reduce_1(frames, valdb_sensemap=None): for functor in fs[0]: slots[functor] = Slot(functor=functor) # Reduce slots from all frames. (Merge ACT from all frames, ...) + sentence_count = len(fs[1]) for frame in fs[1]: + if len(tids) >= SENTENCE_LIMIT: + break tids += frame.tids sentences += frame.sentences for sl in frame.slots: @@ -78,8 +82,13 @@ def reduce_1(frames, valdb_sensemap=None): slots_list = [] for k, e in slots.items(): slots_list.append(e) - # TODO does appending hw_lemma of first frame work for functor frames too? - rf = Frame(hw_lemma=fs[1][0].hw, tids=tids, slots=slots_list, sentences=sentences) + rf = Frame( + hw_lemma=fs[1][0].hw, + tids=tids, + slots=slots_list, + sentences=sentences, + sentence_count=sentence_count + ) rf.sort_slots() ret_frames.append(rf) return sorted_by_len_tids(ret_frames) @@ -182,7 +191,11 @@ def frames_from_sense_ids(raw_frames, id_map): tids = [] reduced_slots = [] sentences = [] + + sentence_count = len(frames) for frame in frames: + if len(tids) >= SENTENCE_LIMIT: + break tids += frame.tids sentences += frame.sentences for slot in frame.slots: @@ -204,7 +217,8 @@ def frames_from_sense_ids(raw_frames, id_map): hw_lemma="derp", tids=tids, slots=reduced_slots, - sentences=sentences + sentences=sentences, + sentence_count=sentence_count, ) id_map_entry = ( id_map.get(tids[0]) or