adding indexes for speed and set_representations via database

This commit is contained in:
Ozbolt Menegatti 2019-06-27 17:16:27 +02:00
parent 188763c06a
commit b5e281bdf4
2 changed files with 19 additions and 18 deletions

View File

@ -10,6 +10,7 @@ class MatchStore:
self.data = {} self.data = {}
self.min_frequency = args.min_freq self.min_frequency = args.min_freq
self.dispersions = {} self.dispersions = {}
self.match_num = 0
self.db.init("""CREATE TABLE Colocations ( self.db.init("""CREATE TABLE Colocations (
colocation_id INTEGER PRIMARY KEY, colocation_id INTEGER PRIMARY KEY,
@ -37,6 +38,12 @@ class MatchStore:
FOREIGN KEY(colocation_id) REFERENCES Colocations(colocation_id)) FOREIGN KEY(colocation_id) REFERENCES Colocations(colocation_id))
""") """)
self.db.init("CREATE INDEX key_sid_c ON Colocations (key, structure_id)")
self.db.init("CREATE INDEX sid_c ON Colocations (structure_id)")
self.db.init("CREATE INDEX mmid_cm ON ColocationMatches (mid_colocation_id)")
self.db.init("CREATE INDEX mid_m ON Matches (match_id)")
self.db.init("CREATE INDEX col_r ON Representations (colocation_id)")
def _add_match(self, key, structure, match): def _add_match(self, key, structure, match):
structure_id, key_str = key[0], str(key[1:]) structure_id, key_str = key[0], str(key[1:])
@ -49,15 +56,12 @@ class MatchStore:
cid = self.db.execute("SELECT colocation_id FROM Colocations WHERE key=? AND structure_id=?", cid = self.db.execute("SELECT colocation_id FROM Colocations WHERE key=? AND structure_id=?",
(key_str, structure_id)).fetchone() (key_str, structure_id)).fetchone()
mid = self.db.execute("SELECT max(match_id) + 1 FROM Matches").fetchone()
mid = 0 if mid[0] is None else mid[0]
for component_id, word in match.items(): for component_id, word in match.items():
self.db.execute(""" self.db.execute("""
INSERT INTO Matches (match_id, component_id, word_lemma, word_text, word_msd, word_id) INSERT INTO Matches (match_id, component_id, word_lemma, word_text, word_msd, word_id)
VALUES (:match_id, :component_id, :word_lemma, :word_text, :word_msd, :word_id)""", { VALUES (:match_id, :component_id, :word_lemma, :word_text, :word_msd, :word_id)""", {
"component_id": component_id, "component_id": component_id,
"match_id": mid, "match_id": self.match_num,
"word_lemma": word.lemma, "word_lemma": word.lemma,
"word_msd": word.msd, "word_msd": word.msd,
"word_text": word.text, "word_text": word.text,
@ -65,7 +69,9 @@ class MatchStore:
}) })
self.db.execute("INSERT INTO ColocationMatches (mid_colocation_id, mid_match_id) VALUES (?,?)", self.db.execute("INSERT INTO ColocationMatches (mid_colocation_id, mid_match_id) VALUES (?,?)",
(cid[0], mid)) (cid[0], self.match_num))
self.match_num += 1
if key not in self.data: if key not in self.data:
self.data[key] = StructureMatch(str(len(self.data) + 1), structure) self.data[key] = StructureMatch(str(len(self.data) + 1), structure)
@ -82,20 +88,15 @@ class MatchStore:
(structure.id,)): (structure.id,)):
yield StructureMatch.from_db(self.db, cid[0], structure) yield StructureMatch.from_db(self.db, cid[0], structure)
# for _cid_tup, sm in self.data.items(): def set_representations(self, word_renderer, structures):
# if sm.structure != structure: for cid, sid in progress(self.db.execute("SELECT colocation_id, structure_id FROM Colocations"), "representations"):
# continue structure = structures[sid - 1]
match = StructureMatch.from_db(self.db, cid, structure)
# # print(sm.matches, sm.match_id, sm.representations) RepresentationAssigner.set_representations(match, word_renderer)
# yield sm for component_id, text in match.representations.items():
def set_representations(self, word_renderer):
for _1, sm in progress(self.data.items(), "representations"):
RepresentationAssigner.set_representations(sm, word_renderer)
for component_id, text in sm.representations.items():
self.db.execute(""" self.db.execute("""
INSERT INTO Representations (colocation_id, component_id, text) INSERT INTO Representations (colocation_id, component_id, text)
VALUES (?,?,?)""", (sm.match_id, component_id, text)) VALUES (?,?,?)""", (match.match_id, component_id, text))
def determine_colocation_dispersions(self): def determine_colocation_dispersions(self):

View File

@ -92,7 +92,7 @@ def main(args):
# figure out representations! # figure out representations!
if args.out or args.out_no_stat: if args.out or args.out_no_stat:
match_store.set_representations(word_stats) match_store.set_representations(word_stats, structures)
Writer.make_output_writer(args, max_num_components, match_store, word_stats).write_out( Writer.make_output_writer(args, max_num_components, match_store, word_stats).write_out(
structures, match_store) structures, match_store)