dispersions now loaded into database and stored/loaded.
This commit is contained in:
parent
dedc031696
commit
3ea62ed242
|
@ -11,6 +11,7 @@ class MatchStore:
|
||||||
self.dispersions = {}
|
self.dispersions = {}
|
||||||
self.match_num = 0
|
self.match_num = 0
|
||||||
|
|
||||||
|
|
||||||
self.db.init("""CREATE TABLE Colocations (
|
self.db.init("""CREATE TABLE Colocations (
|
||||||
colocation_id INTEGER PRIMARY KEY,
|
colocation_id INTEGER PRIMARY KEY,
|
||||||
structure_id varchar(8),
|
structure_id varchar(8),
|
||||||
|
@ -36,13 +37,19 @@ class MatchStore:
|
||||||
text varchar(32),
|
text varchar(32),
|
||||||
FOREIGN KEY(colocation_id) REFERENCES Colocations(colocation_id))
|
FOREIGN KEY(colocation_id) REFERENCES Colocations(colocation_id))
|
||||||
""")
|
""")
|
||||||
|
self.db.init("""CREATE TABLE Dispersions (
|
||||||
|
structure_id varchar(64),
|
||||||
|
component_id varchar(64),
|
||||||
|
lemma varchar(128),
|
||||||
|
dispersion INTEGER)
|
||||||
|
""")
|
||||||
|
|
||||||
self.db.init("CREATE INDEX key_sid_c ON Colocations (key, structure_id)")
|
self.db.init("CREATE INDEX key_sid_c ON Colocations (key, structure_id)")
|
||||||
self.db.init("CREATE INDEX sid_c ON Colocations (structure_id)")
|
self.db.init("CREATE INDEX sid_c ON Colocations (structure_id)")
|
||||||
self.db.init("CREATE INDEX mmid_cm ON ColocationMatches (mid_colocation_id)")
|
self.db.init("CREATE INDEX mmid_cm ON ColocationMatches (mid_colocation_id)")
|
||||||
self.db.init("CREATE INDEX mid_m ON Matches (match_id)")
|
self.db.init("CREATE INDEX mid_m ON Matches (match_id)")
|
||||||
self.db.init("CREATE INDEX col_r ON Representations (colocation_id)")
|
self.db.init("CREATE INDEX col_r ON Representations (colocation_id)")
|
||||||
|
self.db.init("CREATE INDEX disp_key ON Dispersions (structure_id, component_id, lemma)")
|
||||||
|
|
||||||
def _add_match(self, key, structure, match):
|
def _add_match(self, key, structure, match):
|
||||||
structure_id, key_str = key[0], str(key[1:])
|
structure_id, key_str = key[0], str(key[1:])
|
||||||
|
@ -96,6 +103,13 @@ class MatchStore:
|
||||||
|
|
||||||
|
|
||||||
def determine_colocation_dispersions(self):
|
def determine_colocation_dispersions(self):
|
||||||
|
step_name = 'dispersions'
|
||||||
|
wc_done = self.db.execute("SELECT count(*) FROM StepsDone WHERE step=?", (step_name, )).fetchone()
|
||||||
|
|
||||||
|
if wc_done[0] == 1:
|
||||||
|
self.load_dispersions()
|
||||||
|
return
|
||||||
|
|
||||||
dispersions = defaultdict(int)
|
dispersions = defaultdict(int)
|
||||||
for structure_id, word_tups_str in progress(self.db.execute("SELECT structure_id, key FROM Colocations"), "dispersion"):
|
for structure_id, word_tups_str in progress(self.db.execute("SELECT structure_id, key FROM Colocations"), "dispersion"):
|
||||||
word_tups = literal_eval(word_tups_str)
|
word_tups = literal_eval(word_tups_str)
|
||||||
|
@ -103,3 +117,19 @@ class MatchStore:
|
||||||
dispersions[(str(structure_id), component_id, lemma)] += 1
|
dispersions[(str(structure_id), component_id, lemma)] += 1
|
||||||
|
|
||||||
self.dispersions = dict(dispersions)
|
self.dispersions = dict(dispersions)
|
||||||
|
print("Storing dispersions...")
|
||||||
|
self.store_dispersions()
|
||||||
|
|
||||||
|
self.db.execute("INSERT INTO StepsDone (step) VALUES (?)", (step_name, ))
|
||||||
|
self.db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def store_dispersions(self):
|
||||||
|
for (structure_id, component_id, lemma), disp in self.dispersions.items():
|
||||||
|
self.db.execute("INSERT INTO Dispersions (structure_id, component_id, lemma, dispersion) VALUES (?, ?, ?, ?)",
|
||||||
|
(structure_id, component_id, lemma, disp))
|
||||||
|
|
||||||
|
def load_dispersions(self):
|
||||||
|
self.dispersions = {}
|
||||||
|
for structure_id, component_id, lemma, dispersion in progress(self.db.execute("SELECT * FROM Dispersions"), "load-dispersions"):
|
||||||
|
self.dispersions[structure_id, component_id, lemma] = dispersion
|
||||||
|
|
Loading…
Reference in New Issue
Block a user