adding indexes for speed and set_representations via database
This commit is contained in:
parent
188763c06a
commit
b5e281bdf4
|
@ -10,6 +10,7 @@ class MatchStore:
|
|||
self.data = {}
|
||||
self.min_frequency = args.min_freq
|
||||
self.dispersions = {}
|
||||
self.match_num = 0
|
||||
|
||||
self.db.init("""CREATE TABLE Colocations (
|
||||
colocation_id INTEGER PRIMARY KEY,
|
||||
|
@ -37,6 +38,12 @@ class MatchStore:
|
|||
FOREIGN KEY(colocation_id) REFERENCES Colocations(colocation_id))
|
||||
""")
|
||||
|
||||
self.db.init("CREATE INDEX key_sid_c ON Colocations (key, structure_id)")
|
||||
self.db.init("CREATE INDEX sid_c ON Colocations (structure_id)")
|
||||
self.db.init("CREATE INDEX mmid_cm ON ColocationMatches (mid_colocation_id)")
|
||||
self.db.init("CREATE INDEX mid_m ON Matches (match_id)")
|
||||
self.db.init("CREATE INDEX col_r ON Representations (colocation_id)")
|
||||
|
||||
|
||||
def _add_match(self, key, structure, match):
|
||||
structure_id, key_str = key[0], str(key[1:])
|
||||
|
@ -49,15 +56,12 @@ class MatchStore:
|
|||
cid = self.db.execute("SELECT colocation_id FROM Colocations WHERE key=? AND structure_id=?",
|
||||
(key_str, structure_id)).fetchone()
|
||||
|
||||
mid = self.db.execute("SELECT max(match_id) + 1 FROM Matches").fetchone()
|
||||
mid = 0 if mid[0] is None else mid[0]
|
||||
|
||||
for component_id, word in match.items():
|
||||
self.db.execute("""
|
||||
INSERT INTO Matches (match_id, component_id, word_lemma, word_text, word_msd, word_id)
|
||||
VALUES (:match_id, :component_id, :word_lemma, :word_text, :word_msd, :word_id)""", {
|
||||
"component_id": component_id,
|
||||
"match_id": mid,
|
||||
"match_id": self.match_num,
|
||||
"word_lemma": word.lemma,
|
||||
"word_msd": word.msd,
|
||||
"word_text": word.text,
|
||||
|
@ -65,7 +69,9 @@ class MatchStore:
|
|||
})
|
||||
|
||||
self.db.execute("INSERT INTO ColocationMatches (mid_colocation_id, mid_match_id) VALUES (?,?)",
|
||||
(cid[0], mid))
|
||||
(cid[0], self.match_num))
|
||||
|
||||
self.match_num += 1
|
||||
|
||||
if key not in self.data:
|
||||
self.data[key] = StructureMatch(str(len(self.data) + 1), structure)
|
||||
|
@ -82,20 +88,15 @@ class MatchStore:
|
|||
(structure.id,)):
|
||||
yield StructureMatch.from_db(self.db, cid[0], structure)
|
||||
|
||||
# for _cid_tup, sm in self.data.items():
|
||||
# if sm.structure != structure:
|
||||
# continue
|
||||
|
||||
# # print(sm.matches, sm.match_id, sm.representations)
|
||||
# yield sm
|
||||
|
||||
def set_representations(self, word_renderer):
|
||||
for _1, sm in progress(self.data.items(), "representations"):
|
||||
RepresentationAssigner.set_representations(sm, word_renderer)
|
||||
for component_id, text in sm.representations.items():
|
||||
def set_representations(self, word_renderer, structures):
|
||||
for cid, sid in progress(self.db.execute("SELECT colocation_id, structure_id FROM Colocations"), "representations"):
|
||||
structure = structures[sid - 1]
|
||||
match = StructureMatch.from_db(self.db, cid, structure)
|
||||
RepresentationAssigner.set_representations(match, word_renderer)
|
||||
for component_id, text in match.representations.items():
|
||||
self.db.execute("""
|
||||
INSERT INTO Representations (colocation_id, component_id, text)
|
||||
VALUES (?,?,?)""", (sm.match_id, component_id, text))
|
||||
VALUES (?,?,?)""", (match.match_id, component_id, text))
|
||||
|
||||
|
||||
def determine_colocation_dispersions(self):
|
||||
|
|
|
@ -92,7 +92,7 @@ def main(args):
|
|||
|
||||
# figure out representations!
|
||||
if args.out or args.out_no_stat:
|
||||
match_store.set_representations(word_stats)
|
||||
match_store.set_representations(word_stats, structures)
|
||||
|
||||
Writer.make_output_writer(args, max_num_components, match_store, word_stats).write_out(
|
||||
structures, match_store)
|
||||
|
|
Loading…
Reference in New Issue
Block a user