From b5e281bdf457ac9ad29db2c707d3829087ee42e9 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Thu, 27 Jun 2019 17:16:27 +0200 Subject: [PATCH] adding indexes for speed and set_representations via database --- src/match_store.py | 35 ++++++++++++++++++----------------- src/wani.py | 2 +- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/match_store.py b/src/match_store.py index 1194172..e305847 100644 --- a/src/match_store.py +++ b/src/match_store.py @@ -10,6 +10,7 @@ class MatchStore: self.data = {} self.min_frequency = args.min_freq self.dispersions = {} + self.match_num = 0 self.db.init("""CREATE TABLE Colocations ( colocation_id INTEGER PRIMARY KEY, @@ -37,6 +38,12 @@ class MatchStore: FOREIGN KEY(colocation_id) REFERENCES Colocations(colocation_id)) """) + self.db.init("CREATE INDEX key_sid_c ON Colocations (key, structure_id)") + self.db.init("CREATE INDEX sid_c ON Colocations (structure_id)") + self.db.init("CREATE INDEX mmid_cm ON ColocationMatches (mid_colocation_id)") + self.db.init("CREATE INDEX mid_m ON Matches (match_id)") + self.db.init("CREATE INDEX col_r ON Representations (colocation_id)") + def _add_match(self, key, structure, match): structure_id, key_str = key[0], str(key[1:]) @@ -49,15 +56,12 @@ class MatchStore: cid = self.db.execute("SELECT colocation_id FROM Colocations WHERE key=? AND structure_id=?", (key_str, structure_id)).fetchone() - mid = self.db.execute("SELECT max(match_id) + 1 FROM Matches").fetchone() - mid = 0 if mid[0] is None else mid[0] - for component_id, word in match.items(): self.db.execute(""" INSERT INTO Matches (match_id, component_id, word_lemma, word_text, word_msd, word_id) VALUES (:match_id, :component_id, :word_lemma, :word_text, :word_msd, :word_id)""", { "component_id": component_id, - "match_id": mid, + "match_id": self.match_num, "word_lemma": word.lemma, "word_msd": word.msd, "word_text": word.text, @@ -65,7 +69,9 @@ class MatchStore: }) self.db.execute("INSERT INTO ColocationMatches (mid_colocation_id, mid_match_id) VALUES (?,?)", - (cid[0], mid)) + (cid[0], self.match_num)) + + self.match_num += 1 if key not in self.data: self.data[key] = StructureMatch(str(len(self.data) + 1), structure) @@ -82,20 +88,15 @@ class MatchStore: (structure.id,)): yield StructureMatch.from_db(self.db, cid[0], structure) - # for _cid_tup, sm in self.data.items(): - # if sm.structure != structure: - # continue - - # # print(sm.matches, sm.match_id, sm.representations) - # yield sm - - def set_representations(self, word_renderer): - for _1, sm in progress(self.data.items(), "representations"): - RepresentationAssigner.set_representations(sm, word_renderer) - for component_id, text in sm.representations.items(): + def set_representations(self, word_renderer, structures): + for cid, sid in progress(self.db.execute("SELECT colocation_id, structure_id FROM Colocations"), "representations"): + structure = structures[sid - 1] + match = StructureMatch.from_db(self.db, cid, structure) + RepresentationAssigner.set_representations(match, word_renderer) + for component_id, text in match.representations.items(): self.db.execute(""" INSERT INTO Representations (colocation_id, component_id, text) - VALUES (?,?,?)""", (sm.match_id, component_id, text)) + VALUES (?,?,?)""", (match.match_id, component_id, text)) def determine_colocation_dispersions(self): diff --git a/src/wani.py b/src/wani.py index 6a5e604..08b68a8 100644 --- a/src/wani.py +++ b/src/wani.py @@ -92,7 +92,7 @@ def main(args): # figure out representations! if args.out or args.out_no_stat: - match_store.set_representations(word_stats) + match_store.set_representations(word_stats, structures) Writer.make_output_writer(args, max_num_components, match_store, word_stats).write_out( structures, match_store)