import logging log = logging.getLogger(__name__) def frames_from_db_entry(dbent): def _full_tid(tid): return ".".join([dbent["sid"], str(tid)]) token_dict = {str(x["tid"]): x for x in dbent["tokens"]} frames = [] if "srl_links" not in dbent: return [] srldict = {} for srl in dbent["srl_links"]: key = str(srl["from"]) if key not in srldict: srldict[key] = [srl] else: srldict[key] += [srl] for hwtid, srlarr in srldict.items(): frames += [Frame( hw_lemma=token_dict[hwtid]["lemma"], tids=[_full_tid(hwtid)], slots=[ Slot( functor=srl["afun"], tids=[_full_tid(srl["to"])] ) for srl in srlarr ], # sentences=[(dbent["sid"], dbent["tokens"])], sentences=[ [(_full_tid(t["tid"]), t) for t in dbent["tokens"]], ] )] return frames class Frame(): def __init__(self, tids, deep_links=None, slots=None, hw_lemma=None, sentences=None): self.hw = hw_lemma self.tids = tids # list of tokens with the same hw_lemma # Each tid = "S123.t123"; # you can get sentence with vallex.get_sentence(S123) self.slots = [] if slots is None: self.slots = self.init_slots(deep_links) else: self.slots = slots self.sense_info = {} self.sentences = sentences self.aggr_sent = None # Dictionary { hw: self.sentences idx } def get_functors(self): return [slot.functor for slot in self.slots] def to_json(self): ret = { "hw": self.hw, "tids": self.tids, "slots": [slot.to_json() for slot in self.slots], "sentences": self.sentences, "aggr_sent": self.aggr_sent, "sense_info": self.sense_info } return ret def init_slots(self, deep): slots = [] for link in deep: slots.append(Slot( functor=link["afun"], tids=[link["to"]] )) return slots def sort_slots(self): # ACT, PAT, alphabetically srt1 = [ x for x in self.slots if (x.functor == "ACT" or x.functor == "PAT") ] srt1 = sorted(srt1, key=lambda x: x.functor) srt2 = [ x for x in self.slots if (x.functor != "ACT" and x.functor != "PAT") ] srt2 = sorted(srt2, key=lambda x: x.functor) self.slots = (srt1 + srt2) def to_string(self): ret = "Frame:\n" ret += "sense_info: {}\n".format(str(self.sense_info)) ret += "tids: [" for t in self.tids: ret += (str(t) + ", ") ret += "]\n" if self.slots is not None: ret += "slots:\n" for sl in self.slots: ret += (sl.to_string() + "\n") return ret class Slot(): # Each slot is identified by its functor (ACT, PAT, ...) # It consists of different tokens. def __init__(self, functor, tids=None, count=None): self.functor = functor self.tids = tids or [] # combining multiple sentences vertically self.count = count or 1 def to_string(self): ret = "---- Slot:\n" ret += "functor: {}\n".format(self.functor) ret += "tids: [" for t in self.tids: ret += (str(t) + ", ") ret += "]\n" ret += "]\n" ret += "----\n" return ret def to_json(self): ret = { "functor": self.functor, "tids": self.tids, "count": self.count } return ret