from collections import defaultdict from match import StructureMatch from representation_assigner import RepresentationAssigner from progress_bar import progress class MatchStore: def __init__(self, args): self.data = {} self.min_frequency = args.min_freq self.dispersions = {} def _add_match(self, key, structure, match): if key not in self.data: self.data[key] = StructureMatch(str(len(self.data) + 1), structure) self.data[key].append(match) def get(self, key, n): return self.data[key][n] def add_matches(self, matches): for structure, nms in matches.items(): for nm in nms: self._add_match(nm[1], structure, nm[0]) def get_matches_for(self, structure): for _cid_tup, sm in self.data.items(): if sm.structure != structure: continue yield sm def set_representations(self, word_renderer): for _1, sm in progress(self.data.items(), "representations"): RepresentationAssigner.set_representations(sm, word_renderer) def determine_colocation_dispersions(self): dispersions = defaultdict(int) for (structure_id, *word_tups) in self.data.keys(): for component_id, lemma in word_tups: dispersions[(structure_id, component_id, lemma)] += 1 self.dispersions = dict(dispersions)