dispersions now loaded into database and stored/loaded.

This commit is contained in:
Ozbolt Menegatti 2019-08-21 12:49:03 +02:00
parent dedc031696
commit 3ea62ed242

View File

@ -11,6 +11,7 @@ class MatchStore:
self.dispersions = {}
self.match_num = 0
self.db.init("""CREATE TABLE Colocations (
colocation_id INTEGER PRIMARY KEY,
structure_id varchar(8),
@ -36,13 +37,19 @@ class MatchStore:
text varchar(32),
FOREIGN KEY(colocation_id) REFERENCES Colocations(colocation_id))
""")
self.db.init("""CREATE TABLE Dispersions (
structure_id varchar(64),
component_id varchar(64),
lemma varchar(128),
dispersion INTEGER)
""")
self.db.init("CREATE INDEX key_sid_c ON Colocations (key, structure_id)")
self.db.init("CREATE INDEX sid_c ON Colocations (structure_id)")
self.db.init("CREATE INDEX mmid_cm ON ColocationMatches (mid_colocation_id)")
self.db.init("CREATE INDEX mid_m ON Matches (match_id)")
self.db.init("CREATE INDEX col_r ON Representations (colocation_id)")
self.db.init("CREATE INDEX disp_key ON Dispersions (structure_id, component_id, lemma)")
def _add_match(self, key, structure, match):
structure_id, key_str = key[0], str(key[1:])
@ -96,6 +103,13 @@ class MatchStore:
def determine_colocation_dispersions(self):
step_name = 'dispersions'
wc_done = self.db.execute("SELECT count(*) FROM StepsDone WHERE step=?", (step_name, )).fetchone()
if wc_done[0] == 1:
self.load_dispersions()
return
dispersions = defaultdict(int)
for structure_id, word_tups_str in progress(self.db.execute("SELECT structure_id, key FROM Colocations"), "dispersion"):
word_tups = literal_eval(word_tups_str)
@ -103,3 +117,19 @@ class MatchStore:
dispersions[(str(structure_id), component_id, lemma)] += 1
self.dispersions = dict(dispersions)
print("Storing dispersions...")
self.store_dispersions()
self.db.execute("INSERT INTO StepsDone (step) VALUES (?)", (step_name, ))
self.db.commit()
def store_dispersions(self):
for (structure_id, component_id, lemma), disp in self.dispersions.items():
self.db.execute("INSERT INTO Dispersions (structure_id, component_id, lemma, dispersion) VALUES (?, ?, ?, ?)",
(structure_id, component_id, lemma, disp))
def load_dispersions(self):
self.dispersions = {}
for structure_id, component_id, lemma, dispersion in progress(self.db.execute("SELECT * FROM Dispersions"), "load-dispersions"):
self.dispersions[structure_id, component_id, lemma] = dispersion