luscenje_struktur/src/match_store.py

from collections import defaultdict

from match import StructureMatch
from representation_assigner import RepresentationAssigner

try:
    from tqdm import tqdm
except ImportError:
    tqdm = lambda x: x

class MatchStore:
    def __init__(self, args):
        self.data = {}
        self.min_frequency = args.min_freq
        self.dispersions = {}

    def _add_match(self, key, structure, match):
        if key not in self.data:
            self.data[key] = StructureMatch(str(len(self.data) + 1), structure)
        self.data[key].append(match)

    def get(self, key, n):
        return self.data[key][n]

    def add_matches(self, matches):
        for structure, nms in matches.items():
            for nm in nms:
                self._add_match(nm[1], structure, nm[0])

    def get_matches_for(self, structure):
        for _cid_tup, sm in self.data.items():
            if sm.structure != structure:
                continue

            yield sm

    def set_representations(self, word_renderer):
        for _1, sm in tqdm(self.data.items()):
            RepresentationAssigner.set_representations(sm, word_renderer)

    def determine_colocation_dispersions(self):
        dispersions = defaultdict(int)
        for (structure_id, *word_tups) in self.data.keys():
            for component_id, lemma in word_tups:
                dispersions[(structure_id, component_id, lemma)] += 1
        self.dispersions = dict(dispersions)
HUGE refactor, creating lots of modules, no code changes though! 2019-06-15 16:55:35 +00:00			`from collections import defaultdict`

			`from match import StructureMatch`
			`from representation_assigner import RepresentationAssigner`

			`try:`
			`from tqdm import tqdm`
			`except ImportError:`
			`tqdm = lambda x: x`

			`class MatchStore:`
			`def __init__(self, args):`
			`self.data = {}`
			`self.min_frequency = args.min_freq`
			`self.dispersions = {}`

			`def _add_match(self, key, structure, match):`
			`if key not in self.data:`
			`self.data[key] = StructureMatch(str(len(self.data) + 1), structure)`
			`self.data[key].append(match)`

			`def get(self, key, n):`
			`return self.data[key][n]`

			`def add_matches(self, matches):`
			`for structure, nms in matches.items():`
			`for nm in nms:`
			`self._add_match(nm[1], structure, nm[0])`

			`def get_matches_for(self, structure):`
			`for _cid_tup, sm in self.data.items():`
			`if sm.structure != structure:`
			`continue`

			`yield sm`

			`def set_representations(self, word_renderer):`
			`for _1, sm in tqdm(self.data.items()):`
			`RepresentationAssigner.set_representations(sm, word_renderer)`

			`def determine_colocation_dispersions(self):`
			`dispersions = defaultdict(int)`
			`for (structure_id, *word_tups) in self.data.keys():`
			`for component_id, lemma in word_tups:`
			`dispersions[(structure_id, component_id, lemma)] += 1`
			`self.dispersions = dict(dispersions)`