adding indexes for speed and set_representations via database
This commit is contained in:
parent
188763c06a
commit
b5e281bdf4
|
@ -10,6 +10,7 @@ class MatchStore:
|
||||||
self.data = {}
|
self.data = {}
|
||||||
self.min_frequency = args.min_freq
|
self.min_frequency = args.min_freq
|
||||||
self.dispersions = {}
|
self.dispersions = {}
|
||||||
|
self.match_num = 0
|
||||||
|
|
||||||
self.db.init("""CREATE TABLE Colocations (
|
self.db.init("""CREATE TABLE Colocations (
|
||||||
colocation_id INTEGER PRIMARY KEY,
|
colocation_id INTEGER PRIMARY KEY,
|
||||||
|
@ -37,6 +38,12 @@ class MatchStore:
|
||||||
FOREIGN KEY(colocation_id) REFERENCES Colocations(colocation_id))
|
FOREIGN KEY(colocation_id) REFERENCES Colocations(colocation_id))
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
self.db.init("CREATE INDEX key_sid_c ON Colocations (key, structure_id)")
|
||||||
|
self.db.init("CREATE INDEX sid_c ON Colocations (structure_id)")
|
||||||
|
self.db.init("CREATE INDEX mmid_cm ON ColocationMatches (mid_colocation_id)")
|
||||||
|
self.db.init("CREATE INDEX mid_m ON Matches (match_id)")
|
||||||
|
self.db.init("CREATE INDEX col_r ON Representations (colocation_id)")
|
||||||
|
|
||||||
|
|
||||||
def _add_match(self, key, structure, match):
|
def _add_match(self, key, structure, match):
|
||||||
structure_id, key_str = key[0], str(key[1:])
|
structure_id, key_str = key[0], str(key[1:])
|
||||||
|
@ -49,15 +56,12 @@ class MatchStore:
|
||||||
cid = self.db.execute("SELECT colocation_id FROM Colocations WHERE key=? AND structure_id=?",
|
cid = self.db.execute("SELECT colocation_id FROM Colocations WHERE key=? AND structure_id=?",
|
||||||
(key_str, structure_id)).fetchone()
|
(key_str, structure_id)).fetchone()
|
||||||
|
|
||||||
mid = self.db.execute("SELECT max(match_id) + 1 FROM Matches").fetchone()
|
|
||||||
mid = 0 if mid[0] is None else mid[0]
|
|
||||||
|
|
||||||
for component_id, word in match.items():
|
for component_id, word in match.items():
|
||||||
self.db.execute("""
|
self.db.execute("""
|
||||||
INSERT INTO Matches (match_id, component_id, word_lemma, word_text, word_msd, word_id)
|
INSERT INTO Matches (match_id, component_id, word_lemma, word_text, word_msd, word_id)
|
||||||
VALUES (:match_id, :component_id, :word_lemma, :word_text, :word_msd, :word_id)""", {
|
VALUES (:match_id, :component_id, :word_lemma, :word_text, :word_msd, :word_id)""", {
|
||||||
"component_id": component_id,
|
"component_id": component_id,
|
||||||
"match_id": mid,
|
"match_id": self.match_num,
|
||||||
"word_lemma": word.lemma,
|
"word_lemma": word.lemma,
|
||||||
"word_msd": word.msd,
|
"word_msd": word.msd,
|
||||||
"word_text": word.text,
|
"word_text": word.text,
|
||||||
|
@ -65,7 +69,9 @@ class MatchStore:
|
||||||
})
|
})
|
||||||
|
|
||||||
self.db.execute("INSERT INTO ColocationMatches (mid_colocation_id, mid_match_id) VALUES (?,?)",
|
self.db.execute("INSERT INTO ColocationMatches (mid_colocation_id, mid_match_id) VALUES (?,?)",
|
||||||
(cid[0], mid))
|
(cid[0], self.match_num))
|
||||||
|
|
||||||
|
self.match_num += 1
|
||||||
|
|
||||||
if key not in self.data:
|
if key not in self.data:
|
||||||
self.data[key] = StructureMatch(str(len(self.data) + 1), structure)
|
self.data[key] = StructureMatch(str(len(self.data) + 1), structure)
|
||||||
|
@ -82,20 +88,15 @@ class MatchStore:
|
||||||
(structure.id,)):
|
(structure.id,)):
|
||||||
yield StructureMatch.from_db(self.db, cid[0], structure)
|
yield StructureMatch.from_db(self.db, cid[0], structure)
|
||||||
|
|
||||||
# for _cid_tup, sm in self.data.items():
|
def set_representations(self, word_renderer, structures):
|
||||||
# if sm.structure != structure:
|
for cid, sid in progress(self.db.execute("SELECT colocation_id, structure_id FROM Colocations"), "representations"):
|
||||||
# continue
|
structure = structures[sid - 1]
|
||||||
|
match = StructureMatch.from_db(self.db, cid, structure)
|
||||||
# # print(sm.matches, sm.match_id, sm.representations)
|
RepresentationAssigner.set_representations(match, word_renderer)
|
||||||
# yield sm
|
for component_id, text in match.representations.items():
|
||||||
|
|
||||||
def set_representations(self, word_renderer):
|
|
||||||
for _1, sm in progress(self.data.items(), "representations"):
|
|
||||||
RepresentationAssigner.set_representations(sm, word_renderer)
|
|
||||||
for component_id, text in sm.representations.items():
|
|
||||||
self.db.execute("""
|
self.db.execute("""
|
||||||
INSERT INTO Representations (colocation_id, component_id, text)
|
INSERT INTO Representations (colocation_id, component_id, text)
|
||||||
VALUES (?,?,?)""", (sm.match_id, component_id, text))
|
VALUES (?,?,?)""", (match.match_id, component_id, text))
|
||||||
|
|
||||||
|
|
||||||
def determine_colocation_dispersions(self):
|
def determine_colocation_dispersions(self):
|
||||||
|
|
|
@ -92,7 +92,7 @@ def main(args):
|
||||||
|
|
||||||
# figure out representations!
|
# figure out representations!
|
||||||
if args.out or args.out_no_stat:
|
if args.out or args.out_no_stat:
|
||||||
match_store.set_representations(word_stats)
|
match_store.set_representations(word_stats, structures)
|
||||||
|
|
||||||
Writer.make_output_writer(args, max_num_components, match_store, word_stats).write_out(
|
Writer.make_output_writer(args, max_num_components, match_store, word_stats).write_out(
|
||||||
structures, match_store)
|
structures, match_store)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user