sql-join-test #1
|
@ -9,6 +9,7 @@ class MatchStore:
|
||||||
def __init__(self, args, db):
|
def __init__(self, args, db):
|
||||||
self.db = db
|
self.db = db
|
||||||
self.dispersions = {}
|
self.dispersions = {}
|
||||||
|
self.min_freq = args.min_freq
|
||||||
|
|
||||||
|
|
||||||
self.db.init("""CREATE TABLE Colocations (
|
self.db.init("""CREATE TABLE Colocations (
|
||||||
|
@ -117,6 +118,10 @@ class MatchStore:
|
||||||
|
|
||||||
self.db.step_is_done(step_name)
|
self.db.step_is_done(step_name)
|
||||||
|
|
||||||
|
def has_colocation_id_enough_frequency(self, colocation_id):
|
||||||
|
matches = self.db.execute("SELECT MIN(MAX(COUNT(*), ?), ?) FROM ColocationMatches WHERE mid_colocation_id=?", (self.min_freq - 1, self.min_freq, colocation_id)).fetchone()[0]
|
||||||
|
return matches >= self.min_freq
|
||||||
|
|
||||||
def determine_colocation_dispersions(self):
|
def determine_colocation_dispersions(self):
|
||||||
step_name = 'dispersions'
|
step_name = 'dispersions'
|
||||||
if self.db.is_step_done(step_name):
|
if self.db.is_step_done(step_name):
|
||||||
|
@ -124,7 +129,10 @@ class MatchStore:
|
||||||
return
|
return
|
||||||
|
|
||||||
dispersions = defaultdict(int)
|
dispersions = defaultdict(int)
|
||||||
for structure_id, word_tups_str in progress(self.db.execute("SELECT structure_id, key FROM Colocations"), "dispersion"):
|
for colocation_id, structure_id, word_tups_str in progress(self.db.execute("SELECT colocation_id, structure_id, key FROM Colocations"), "dispersion"):
|
||||||
|
if not self.has_colocation_id_enough_frequency(colocation_id):
|
||||||
|
continue
|
||||||
|
|
||||||
word_tups = literal_eval(word_tups_str)
|
word_tups = literal_eval(word_tups_str)
|
||||||
for component_id, lemma in word_tups:
|
for component_id, lemma in word_tups:
|
||||||
dispersions[(str(structure_id), component_id, lemma)] += 1
|
dispersions[(str(structure_id), component_id, lemma)] += 1
|
||||||
|
|
|
@ -77,7 +77,7 @@ class Writer:
|
||||||
rows = []
|
rows = []
|
||||||
components = structure.components
|
components = structure.components
|
||||||
|
|
||||||
for match in colocation_ids.get_matches_for(structure):
|
for match in progress(colocation_ids.get_matches_for(structure), "Writing matches: {}".format(structure.id)):
|
||||||
if len(match) < self.min_frequency:
|
if len(match) < self.min_frequency:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user