From 8c20295adf7bd83798922a376c278ffd0c40987b Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Thu, 27 Jun 2019 22:04:33 +0200 Subject: [PATCH] Adding dispersions to sqlite, finished moving to it. --- src/match.py | 4 ++-- src/match_store.py | 20 +++++++++----------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/match.py b/src/match.py index 5e241d7..294029d 100644 --- a/src/match.py +++ b/src/match.py @@ -2,7 +2,7 @@ from word import Word class StructureMatch: def __init__(self, match_id, structure): - self.match_id = match_id + self.match_id = str(match_id) self.structure = structure self.matches = [] @@ -10,7 +10,7 @@ class StructureMatch: @staticmethod def from_db(db, colocation_id, structure): - result = StructureMatch(str(colocation_id), structure) + result = StructureMatch(colocation_id, structure) for match_id in db.execute("SELECT mid_match_id FROM ColocationMatches WHERE mid_colocation_id=?", (colocation_id,)): to_add = {} diff --git a/src/match_store.py b/src/match_store.py index e305847..2a712f2 100644 --- a/src/match_store.py +++ b/src/match_store.py @@ -1,4 +1,5 @@ from collections import defaultdict +from ast import literal_eval from match import StructureMatch from representation_assigner import RepresentationAssigner @@ -7,14 +8,13 @@ from progress_bar import progress class MatchStore: def __init__(self, args, db): self.db = db - self.data = {} self.min_frequency = args.min_freq self.dispersions = {} self.match_num = 0 self.db.init("""CREATE TABLE Colocations ( colocation_id INTEGER PRIMARY KEY, - structure_id INTEGER, + structure_id varchar(8), key varchar(256)) """) self.db.init("""CREATE TABLE Matches ( @@ -73,24 +73,20 @@ class MatchStore: self.match_num += 1 - if key not in self.data: - self.data[key] = StructureMatch(str(len(self.data) + 1), structure) - self.data[key].append(match) - def add_matches(self, matches): - for structure, nms in matches.items(): + for structure, nms in progress(matches.items(), 'adding-matches'): for nm in nms: self._add_match(nm[1], structure, nm[0]) def get_matches_for(self, structure): - print(structure.id) for cid in self.db.execute("SELECT colocation_id FROM Colocations WHERE structure_id=?", (structure.id,)): yield StructureMatch.from_db(self.db, cid[0], structure) def set_representations(self, word_renderer, structures): + structures_dict = {s.id: s for s in structures} for cid, sid in progress(self.db.execute("SELECT colocation_id, structure_id FROM Colocations"), "representations"): - structure = structures[sid - 1] + structure = structures_dict[sid] match = StructureMatch.from_db(self.db, cid, structure) RepresentationAssigner.set_representations(match, word_renderer) for component_id, text in match.representations.items(): @@ -101,7 +97,9 @@ class MatchStore: def determine_colocation_dispersions(self): dispersions = defaultdict(int) - for (structure_id, *word_tups) in self.data.keys(): + for structure_id, word_tups_str in self.db.execute("SELECT structure_id, key FROM Colocations"): + word_tups = literal_eval(word_tups_str) for component_id, lemma in word_tups: - dispersions[(structure_id, component_id, lemma)] += 1 + dispersions[(str(structure_id), component_id, lemma)] += 1 + self.dispersions = dict(dispersions)