Adding dispersions to sqlite, finished moving to it.
This commit is contained in:
parent
b5e281bdf4
commit
8c20295adf
|
@ -2,7 +2,7 @@ from word import Word
|
||||||
|
|
||||||
class StructureMatch:
|
class StructureMatch:
|
||||||
def __init__(self, match_id, structure):
|
def __init__(self, match_id, structure):
|
||||||
self.match_id = match_id
|
self.match_id = str(match_id)
|
||||||
self.structure = structure
|
self.structure = structure
|
||||||
|
|
||||||
self.matches = []
|
self.matches = []
|
||||||
|
@ -10,7 +10,7 @@ class StructureMatch:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_db(db, colocation_id, structure):
|
def from_db(db, colocation_id, structure):
|
||||||
result = StructureMatch(str(colocation_id), structure)
|
result = StructureMatch(colocation_id, structure)
|
||||||
for match_id in db.execute("SELECT mid_match_id FROM ColocationMatches WHERE mid_colocation_id=?", (colocation_id,)):
|
for match_id in db.execute("SELECT mid_match_id FROM ColocationMatches WHERE mid_colocation_id=?", (colocation_id,)):
|
||||||
to_add = {}
|
to_add = {}
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from ast import literal_eval
|
||||||
|
|
||||||
from match import StructureMatch
|
from match import StructureMatch
|
||||||
from representation_assigner import RepresentationAssigner
|
from representation_assigner import RepresentationAssigner
|
||||||
|
@ -7,14 +8,13 @@ from progress_bar import progress
|
||||||
class MatchStore:
|
class MatchStore:
|
||||||
def __init__(self, args, db):
|
def __init__(self, args, db):
|
||||||
self.db = db
|
self.db = db
|
||||||
self.data = {}
|
|
||||||
self.min_frequency = args.min_freq
|
self.min_frequency = args.min_freq
|
||||||
self.dispersions = {}
|
self.dispersions = {}
|
||||||
self.match_num = 0
|
self.match_num = 0
|
||||||
|
|
||||||
self.db.init("""CREATE TABLE Colocations (
|
self.db.init("""CREATE TABLE Colocations (
|
||||||
colocation_id INTEGER PRIMARY KEY,
|
colocation_id INTEGER PRIMARY KEY,
|
||||||
structure_id INTEGER,
|
structure_id varchar(8),
|
||||||
key varchar(256))
|
key varchar(256))
|
||||||
""")
|
""")
|
||||||
self.db.init("""CREATE TABLE Matches (
|
self.db.init("""CREATE TABLE Matches (
|
||||||
|
@ -73,24 +73,20 @@ class MatchStore:
|
||||||
|
|
||||||
self.match_num += 1
|
self.match_num += 1
|
||||||
|
|
||||||
if key not in self.data:
|
|
||||||
self.data[key] = StructureMatch(str(len(self.data) + 1), structure)
|
|
||||||
self.data[key].append(match)
|
|
||||||
|
|
||||||
def add_matches(self, matches):
|
def add_matches(self, matches):
|
||||||
for structure, nms in matches.items():
|
for structure, nms in progress(matches.items(), 'adding-matches'):
|
||||||
for nm in nms:
|
for nm in nms:
|
||||||
self._add_match(nm[1], structure, nm[0])
|
self._add_match(nm[1], structure, nm[0])
|
||||||
|
|
||||||
def get_matches_for(self, structure):
|
def get_matches_for(self, structure):
|
||||||
print(structure.id)
|
|
||||||
for cid in self.db.execute("SELECT colocation_id FROM Colocations WHERE structure_id=?",
|
for cid in self.db.execute("SELECT colocation_id FROM Colocations WHERE structure_id=?",
|
||||||
(structure.id,)):
|
(structure.id,)):
|
||||||
yield StructureMatch.from_db(self.db, cid[0], structure)
|
yield StructureMatch.from_db(self.db, cid[0], structure)
|
||||||
|
|
||||||
def set_representations(self, word_renderer, structures):
|
def set_representations(self, word_renderer, structures):
|
||||||
|
structures_dict = {s.id: s for s in structures}
|
||||||
for cid, sid in progress(self.db.execute("SELECT colocation_id, structure_id FROM Colocations"), "representations"):
|
for cid, sid in progress(self.db.execute("SELECT colocation_id, structure_id FROM Colocations"), "representations"):
|
||||||
structure = structures[sid - 1]
|
structure = structures_dict[sid]
|
||||||
match = StructureMatch.from_db(self.db, cid, structure)
|
match = StructureMatch.from_db(self.db, cid, structure)
|
||||||
RepresentationAssigner.set_representations(match, word_renderer)
|
RepresentationAssigner.set_representations(match, word_renderer)
|
||||||
for component_id, text in match.representations.items():
|
for component_id, text in match.representations.items():
|
||||||
|
@ -101,7 +97,9 @@ class MatchStore:
|
||||||
|
|
||||||
def determine_colocation_dispersions(self):
|
def determine_colocation_dispersions(self):
|
||||||
dispersions = defaultdict(int)
|
dispersions = defaultdict(int)
|
||||||
for (structure_id, *word_tups) in self.data.keys():
|
for structure_id, word_tups_str in self.db.execute("SELECT structure_id, key FROM Colocations"):
|
||||||
|
word_tups = literal_eval(word_tups_str)
|
||||||
for component_id, lemma in word_tups:
|
for component_id, lemma in word_tups:
|
||||||
dispersions[(structure_id, component_id, lemma)] += 1
|
dispersions[(str(structure_id), component_id, lemma)] += 1
|
||||||
|
|
||||||
self.dispersions = dict(dispersions)
|
self.dispersions = dict(dispersions)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user