From 3ea62ed24251f33a1c28558d9cd94dff5a713dd3 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Wed, 21 Aug 2019 12:49:03 +0200 Subject: [PATCH] dispersions now loaded into database and stored/loaded. --- src/match_store.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/match_store.py b/src/match_store.py index 29aaba1..9835b91 100644 --- a/src/match_store.py +++ b/src/match_store.py @@ -11,6 +11,7 @@ class MatchStore: self.dispersions = {} self.match_num = 0 + self.db.init("""CREATE TABLE Colocations ( colocation_id INTEGER PRIMARY KEY, structure_id varchar(8), @@ -36,13 +37,19 @@ class MatchStore: text varchar(32), FOREIGN KEY(colocation_id) REFERENCES Colocations(colocation_id)) """) + self.db.init("""CREATE TABLE Dispersions ( + structure_id varchar(64), + component_id varchar(64), + lemma varchar(128), + dispersion INTEGER) + """) self.db.init("CREATE INDEX key_sid_c ON Colocations (key, structure_id)") self.db.init("CREATE INDEX sid_c ON Colocations (structure_id)") self.db.init("CREATE INDEX mmid_cm ON ColocationMatches (mid_colocation_id)") self.db.init("CREATE INDEX mid_m ON Matches (match_id)") self.db.init("CREATE INDEX col_r ON Representations (colocation_id)") - + self.db.init("CREATE INDEX disp_key ON Dispersions (structure_id, component_id, lemma)") def _add_match(self, key, structure, match): structure_id, key_str = key[0], str(key[1:]) @@ -96,6 +103,13 @@ class MatchStore: def determine_colocation_dispersions(self): + step_name = 'dispersions' + wc_done = self.db.execute("SELECT count(*) FROM StepsDone WHERE step=?", (step_name, )).fetchone() + + if wc_done[0] == 1: + self.load_dispersions() + return + dispersions = defaultdict(int) for structure_id, word_tups_str in progress(self.db.execute("SELECT structure_id, key FROM Colocations"), "dispersion"): word_tups = literal_eval(word_tups_str) @@ -103,3 +117,19 @@ class MatchStore: dispersions[(str(structure_id), component_id, lemma)] += 1 self.dispersions = dict(dispersions) + print("Storing dispersions...") + self.store_dispersions() + + self.db.execute("INSERT INTO StepsDone (step) VALUES (?)", (step_name, )) + self.db.commit() + + + def store_dispersions(self): + for (structure_id, component_id, lemma), disp in self.dispersions.items(): + self.db.execute("INSERT INTO Dispersions (structure_id, component_id, lemma, dispersion) VALUES (?, ?, ?, ?)", + (structure_id, component_id, lemma, disp)) + + def load_dispersions(self): + self.dispersions = {} + for structure_id, component_id, lemma, dispersion in progress(self.db.execute("SELECT * FROM Dispersions"), "load-dispersions"): + self.dispersions[structure_id, component_id, lemma] = dispersion