|
|
|
@ -11,6 +11,7 @@ class MatchStore:
|
|
|
|
|
self.dispersions = {}
|
|
|
|
|
self.match_num = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.db.init("""CREATE TABLE Colocations (
|
|
|
|
|
colocation_id INTEGER PRIMARY KEY,
|
|
|
|
|
structure_id varchar(8),
|
|
|
|
@ -36,13 +37,19 @@ class MatchStore:
|
|
|
|
|
text varchar(32),
|
|
|
|
|
FOREIGN KEY(colocation_id) REFERENCES Colocations(colocation_id))
|
|
|
|
|
""")
|
|
|
|
|
self.db.init("""CREATE TABLE Dispersions (
|
|
|
|
|
structure_id varchar(64),
|
|
|
|
|
component_id varchar(64),
|
|
|
|
|
lemma varchar(128),
|
|
|
|
|
dispersion INTEGER)
|
|
|
|
|
""")
|
|
|
|
|
|
|
|
|
|
self.db.init("CREATE INDEX key_sid_c ON Colocations (key, structure_id)")
|
|
|
|
|
self.db.init("CREATE INDEX sid_c ON Colocations (structure_id)")
|
|
|
|
|
self.db.init("CREATE INDEX mmid_cm ON ColocationMatches (mid_colocation_id)")
|
|
|
|
|
self.db.init("CREATE INDEX mid_m ON Matches (match_id)")
|
|
|
|
|
self.db.init("CREATE INDEX col_r ON Representations (colocation_id)")
|
|
|
|
|
|
|
|
|
|
self.db.init("CREATE INDEX disp_key ON Dispersions (structure_id, component_id, lemma)")
|
|
|
|
|
|
|
|
|
|
def _add_match(self, key, structure, match):
|
|
|
|
|
structure_id, key_str = key[0], str(key[1:])
|
|
|
|
@ -96,6 +103,13 @@ class MatchStore:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def determine_colocation_dispersions(self):
|
|
|
|
|
step_name = 'dispersions'
|
|
|
|
|
wc_done = self.db.execute("SELECT count(*) FROM StepsDone WHERE step=?", (step_name, )).fetchone()
|
|
|
|
|
|
|
|
|
|
if wc_done[0] == 1:
|
|
|
|
|
self.load_dispersions()
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
dispersions = defaultdict(int)
|
|
|
|
|
for structure_id, word_tups_str in progress(self.db.execute("SELECT structure_id, key FROM Colocations"), "dispersion"):
|
|
|
|
|
word_tups = literal_eval(word_tups_str)
|
|
|
|
@ -103,3 +117,19 @@ class MatchStore:
|
|
|
|
|
dispersions[(str(structure_id), component_id, lemma)] += 1
|
|
|
|
|
|
|
|
|
|
self.dispersions = dict(dispersions)
|
|
|
|
|
print("Storing dispersions...")
|
|
|
|
|
self.store_dispersions()
|
|
|
|
|
|
|
|
|
|
self.db.execute("INSERT INTO StepsDone (step) VALUES (?)", (step_name, ))
|
|
|
|
|
self.db.commit()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def store_dispersions(self):
|
|
|
|
|
for (structure_id, component_id, lemma), disp in self.dispersions.items():
|
|
|
|
|
self.db.execute("INSERT INTO Dispersions (structure_id, component_id, lemma, dispersion) VALUES (?, ?, ?, ?)",
|
|
|
|
|
(structure_id, component_id, lemma, disp))
|
|
|
|
|
|
|
|
|
|
def load_dispersions(self):
|
|
|
|
|
self.dispersions = {}
|
|
|
|
|
for structure_id, component_id, lemma, dispersion in progress(self.db.execute("SELECT * FROM Dispersions"), "load-dispersions"):
|
|
|
|
|
self.dispersions[structure_id, component_id, lemma] = dispersion
|
|
|
|
|