Incorporating database also in MatchStore
This commit is contained in:
parent
c25844a335
commit
188763c06a
23
src/match.py
23
src/match.py
|
@ -1,3 +1,5 @@
|
|||
from word import Word
|
||||
|
||||
class StructureMatch:
|
||||
def __init__(self, match_id, structure):
|
||||
self.match_id = match_id
|
||||
|
@ -5,6 +7,25 @@ class StructureMatch:
|
|||
|
||||
self.matches = []
|
||||
self.representations = {}
|
||||
|
||||
@staticmethod
|
||||
def from_db(db, colocation_id, structure):
|
||||
result = StructureMatch(str(colocation_id), structure)
|
||||
for match_id in db.execute("SELECT mid_match_id FROM ColocationMatches WHERE mid_colocation_id=?", (colocation_id,)):
|
||||
to_add = {}
|
||||
|
||||
for component_id, word_lemma, word_text, word_msd, word_id in db.execute("""
|
||||
SELECT component_id, word_lemma, word_text, word_msd, word_id
|
||||
FROM Matches WHERE match_id=?""", match_id):
|
||||
|
||||
to_add[str(component_id)] = Word(word_lemma, word_msd, word_id, word_text, False)
|
||||
|
||||
result.matches.append(to_add)
|
||||
|
||||
for component_id, text in db.execute("SELECT component_id, text FROM Representations WHERE colocation_id=?", (colocation_id,)):
|
||||
result.representations[str(component_id)] = text
|
||||
|
||||
return result
|
||||
|
||||
def distinct_forms(self):
|
||||
dm = set()
|
||||
|
@ -17,4 +38,4 @@ class StructureMatch:
|
|||
self.matches.append(match)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.matches)
|
||||
return len(self.matches)
|
||||
|
|
|
@ -5,34 +5,98 @@ from representation_assigner import RepresentationAssigner
|
|||
from progress_bar import progress
|
||||
|
||||
class MatchStore:
|
||||
def __init__(self, args):
|
||||
def __init__(self, args, db):
|
||||
self.db = db
|
||||
self.data = {}
|
||||
self.min_frequency = args.min_freq
|
||||
self.dispersions = {}
|
||||
|
||||
self.db.init("""CREATE TABLE Colocations (
|
||||
colocation_id INTEGER PRIMARY KEY,
|
||||
structure_id INTEGER,
|
||||
key varchar(256))
|
||||
""")
|
||||
self.db.init("""CREATE TABLE Matches (
|
||||
match_id INTEGER,
|
||||
component_id INTEGER NOT NULL,
|
||||
word_lemma varchar(32) NOT NULL,
|
||||
word_id varchar(32) NOT NULL,
|
||||
word_msd varchar(16) NOT NULL,
|
||||
word_text varchar(32) NOT NULL)
|
||||
""")
|
||||
self.db.init("""CREATE TABLE ColocationMatches (
|
||||
mid_match_id INTEGER,
|
||||
mid_colocation_id INTEGER,
|
||||
FOREIGN KEY(mid_colocation_id) REFERENCES Colocations(colocation_id),
|
||||
FOREIGN KEY(mid_match_id) REFERENCES Matches(match_id))
|
||||
""")
|
||||
self.db.init("""CREATE TABLE Representations (
|
||||
colocation_id INTEGER,
|
||||
component_id INTEGER,
|
||||
text varchar(32),
|
||||
FOREIGN KEY(colocation_id) REFERENCES Colocations(colocation_id))
|
||||
""")
|
||||
|
||||
|
||||
def _add_match(self, key, structure, match):
|
||||
structure_id, key_str = key[0], str(key[1:])
|
||||
cid = self.db.execute("SELECT colocation_id FROM Colocations WHERE key=? AND structure_id=?",
|
||||
(key_str, structure_id)).fetchone()
|
||||
|
||||
if cid is None:
|
||||
self.db.execute("INSERT INTO Colocations (structure_id, key) VALUES (?,?)",
|
||||
(structure_id, key_str))
|
||||
cid = self.db.execute("SELECT colocation_id FROM Colocations WHERE key=? AND structure_id=?",
|
||||
(key_str, structure_id)).fetchone()
|
||||
|
||||
mid = self.db.execute("SELECT max(match_id) + 1 FROM Matches").fetchone()
|
||||
mid = 0 if mid[0] is None else mid[0]
|
||||
|
||||
for component_id, word in match.items():
|
||||
self.db.execute("""
|
||||
INSERT INTO Matches (match_id, component_id, word_lemma, word_text, word_msd, word_id)
|
||||
VALUES (:match_id, :component_id, :word_lemma, :word_text, :word_msd, :word_id)""", {
|
||||
"component_id": component_id,
|
||||
"match_id": mid,
|
||||
"word_lemma": word.lemma,
|
||||
"word_msd": word.msd,
|
||||
"word_text": word.text,
|
||||
"word_id": word.id,
|
||||
})
|
||||
|
||||
self.db.execute("INSERT INTO ColocationMatches (mid_colocation_id, mid_match_id) VALUES (?,?)",
|
||||
(cid[0], mid))
|
||||
|
||||
if key not in self.data:
|
||||
self.data[key] = StructureMatch(str(len(self.data) + 1), structure)
|
||||
self.data[key].append(match)
|
||||
|
||||
def get(self, key, n):
|
||||
return self.data[key][n]
|
||||
|
||||
def add_matches(self, matches):
|
||||
for structure, nms in matches.items():
|
||||
for nm in nms:
|
||||
self._add_match(nm[1], structure, nm[0])
|
||||
|
||||
def get_matches_for(self, structure):
|
||||
for _cid_tup, sm in self.data.items():
|
||||
if sm.structure != structure:
|
||||
continue
|
||||
print(structure.id)
|
||||
for cid in self.db.execute("SELECT colocation_id FROM Colocations WHERE structure_id=?",
|
||||
(structure.id,)):
|
||||
yield StructureMatch.from_db(self.db, cid[0], structure)
|
||||
|
||||
yield sm
|
||||
# for _cid_tup, sm in self.data.items():
|
||||
# if sm.structure != structure:
|
||||
# continue
|
||||
|
||||
# # print(sm.matches, sm.match_id, sm.representations)
|
||||
# yield sm
|
||||
|
||||
def set_representations(self, word_renderer):
|
||||
for _1, sm in progress(self.data.items(), "representations"):
|
||||
RepresentationAssigner.set_representations(sm, word_renderer)
|
||||
for component_id, text in sm.representations.items():
|
||||
self.db.execute("""
|
||||
INSERT INTO Representations (colocation_id, component_id, text)
|
||||
VALUES (?,?,?)""", (sm.match_id, component_id, text))
|
||||
|
||||
|
||||
def determine_colocation_dispersions(self):
|
||||
dispersions = defaultdict(int)
|
||||
|
|
|
@ -39,7 +39,7 @@ def main(args):
|
|||
structures, lemma_msds, max_num_components = build_structures(args)
|
||||
|
||||
database = Database(args)
|
||||
match_store = MatchStore(args)
|
||||
match_store = MatchStore(args, database)
|
||||
word_stats = WordStats(lemma_msds, database)
|
||||
|
||||
if args.parallel:
|
||||
|
|
Loading…
Reference in New Issue
Block a user