Adding dispersions to sqlite, finished moving to it.

This commit is contained in:
Ozbolt Menegatti 2019-06-27 22:04:33 +02:00
parent b5e281bdf4
commit 8c20295adf
2 changed files with 11 additions and 13 deletions

View File

@ -2,7 +2,7 @@ from word import Word
class StructureMatch:
def __init__(self, match_id, structure):
self.match_id = match_id
self.match_id = str(match_id)
self.structure = structure
self.matches = []
@ -10,7 +10,7 @@ class StructureMatch:
@staticmethod
def from_db(db, colocation_id, structure):
result = StructureMatch(str(colocation_id), structure)
result = StructureMatch(colocation_id, structure)
for match_id in db.execute("SELECT mid_match_id FROM ColocationMatches WHERE mid_colocation_id=?", (colocation_id,)):
to_add = {}

View File

@ -1,4 +1,5 @@
from collections import defaultdict
from ast import literal_eval
from match import StructureMatch
from representation_assigner import RepresentationAssigner
@ -7,14 +8,13 @@ from progress_bar import progress
class MatchStore:
def __init__(self, args, db):
self.db = db
self.data = {}
self.min_frequency = args.min_freq
self.dispersions = {}
self.match_num = 0
self.db.init("""CREATE TABLE Colocations (
colocation_id INTEGER PRIMARY KEY,
structure_id INTEGER,
structure_id varchar(8),
key varchar(256))
""")
self.db.init("""CREATE TABLE Matches (
@ -73,24 +73,20 @@ class MatchStore:
self.match_num += 1
if key not in self.data:
self.data[key] = StructureMatch(str(len(self.data) + 1), structure)
self.data[key].append(match)
def add_matches(self, matches):
for structure, nms in matches.items():
for structure, nms in progress(matches.items(), 'adding-matches'):
for nm in nms:
self._add_match(nm[1], structure, nm[0])
def get_matches_for(self, structure):
print(structure.id)
for cid in self.db.execute("SELECT colocation_id FROM Colocations WHERE structure_id=?",
(structure.id,)):
yield StructureMatch.from_db(self.db, cid[0], structure)
def set_representations(self, word_renderer, structures):
structures_dict = {s.id: s for s in structures}
for cid, sid in progress(self.db.execute("SELECT colocation_id, structure_id FROM Colocations"), "representations"):
structure = structures[sid - 1]
structure = structures_dict[sid]
match = StructureMatch.from_db(self.db, cid, structure)
RepresentationAssigner.set_representations(match, word_renderer)
for component_id, text in match.representations.items():
@ -101,7 +97,9 @@ class MatchStore:
def determine_colocation_dispersions(self):
dispersions = defaultdict(int)
for (structure_id, *word_tups) in self.data.keys():
for structure_id, word_tups_str in self.db.execute("SELECT structure_id, key FROM Colocations"):
word_tups = literal_eval(word_tups_str)
for component_id, lemma in word_tups:
dispersions[(structure_id, component_id, lemma)] += 1
dispersions[(str(structure_id), component_id, lemma)] += 1
self.dispersions = dict(dispersions)