Adding dispersions to sqlite, finished moving to it.
This commit is contained in:
parent
b5e281bdf4
commit
8c20295adf
|
@ -2,7 +2,7 @@ from word import Word
|
|||
|
||||
class StructureMatch:
|
||||
def __init__(self, match_id, structure):
|
||||
self.match_id = match_id
|
||||
self.match_id = str(match_id)
|
||||
self.structure = structure
|
||||
|
||||
self.matches = []
|
||||
|
@ -10,7 +10,7 @@ class StructureMatch:
|
|||
|
||||
@staticmethod
|
||||
def from_db(db, colocation_id, structure):
|
||||
result = StructureMatch(str(colocation_id), structure)
|
||||
result = StructureMatch(colocation_id, structure)
|
||||
for match_id in db.execute("SELECT mid_match_id FROM ColocationMatches WHERE mid_colocation_id=?", (colocation_id,)):
|
||||
to_add = {}
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from collections import defaultdict
|
||||
from ast import literal_eval
|
||||
|
||||
from match import StructureMatch
|
||||
from representation_assigner import RepresentationAssigner
|
||||
|
@ -7,14 +8,13 @@ from progress_bar import progress
|
|||
class MatchStore:
|
||||
def __init__(self, args, db):
|
||||
self.db = db
|
||||
self.data = {}
|
||||
self.min_frequency = args.min_freq
|
||||
self.dispersions = {}
|
||||
self.match_num = 0
|
||||
|
||||
self.db.init("""CREATE TABLE Colocations (
|
||||
colocation_id INTEGER PRIMARY KEY,
|
||||
structure_id INTEGER,
|
||||
structure_id varchar(8),
|
||||
key varchar(256))
|
||||
""")
|
||||
self.db.init("""CREATE TABLE Matches (
|
||||
|
@ -73,24 +73,20 @@ class MatchStore:
|
|||
|
||||
self.match_num += 1
|
||||
|
||||
if key not in self.data:
|
||||
self.data[key] = StructureMatch(str(len(self.data) + 1), structure)
|
||||
self.data[key].append(match)
|
||||
|
||||
def add_matches(self, matches):
|
||||
for structure, nms in matches.items():
|
||||
for structure, nms in progress(matches.items(), 'adding-matches'):
|
||||
for nm in nms:
|
||||
self._add_match(nm[1], structure, nm[0])
|
||||
|
||||
def get_matches_for(self, structure):
|
||||
print(structure.id)
|
||||
for cid in self.db.execute("SELECT colocation_id FROM Colocations WHERE structure_id=?",
|
||||
(structure.id,)):
|
||||
yield StructureMatch.from_db(self.db, cid[0], structure)
|
||||
|
||||
def set_representations(self, word_renderer, structures):
|
||||
structures_dict = {s.id: s for s in structures}
|
||||
for cid, sid in progress(self.db.execute("SELECT colocation_id, structure_id FROM Colocations"), "representations"):
|
||||
structure = structures[sid - 1]
|
||||
structure = structures_dict[sid]
|
||||
match = StructureMatch.from_db(self.db, cid, structure)
|
||||
RepresentationAssigner.set_representations(match, word_renderer)
|
||||
for component_id, text in match.representations.items():
|
||||
|
@ -101,7 +97,9 @@ class MatchStore:
|
|||
|
||||
def determine_colocation_dispersions(self):
|
||||
dispersions = defaultdict(int)
|
||||
for (structure_id, *word_tups) in self.data.keys():
|
||||
for structure_id, word_tups_str in self.db.execute("SELECT structure_id, key FROM Colocations"):
|
||||
word_tups = literal_eval(word_tups_str)
|
||||
for component_id, lemma in word_tups:
|
||||
dispersions[(structure_id, component_id, lemma)] += 1
|
||||
dispersions[(str(structure_id), component_id, lemma)] += 1
|
||||
|
||||
self.dispersions = dict(dispersions)
|
||||
|
|
Loading…
Reference in New Issue
Block a user