num-words now part of database

This commit is contained in:
Ozbolt Menegatti 2019-07-03 13:08:32 +02:00
parent ea92b44d71
commit ec02242f47

View File

@ -7,9 +7,7 @@ class WordStats:
def __init__(self, lemma_features, db):
self.lemma_features = lemma_features
self.db = db
self.all_words = 0
self.memoized_msd_merges = {}
self.all_words = None
self.db.init("""CREATE TABLE UniqWords (
uw_id INTEGER PRIMARY KEY,
@ -19,6 +17,7 @@ class WordStats:
frequency int
)""")
self.db.init("CREATE TABLE WordCount (lemma varchar(64), msd0 char, frequency int)")
self.db.init("CREATE TABLE NumWords (id INTEGER PRIMARY KEY, n INTEGER)")
self.db.init("CREATE INDEX lemma_msd_text_on_uw ON UniqWords (lemma, msd, text)")
self.db.init("CREATE INDEX lemma_on_uw ON UniqWords (lemma)")
@ -34,10 +33,12 @@ class WordStats:
self.db.execute("""INSERT INTO UniqWords (lemma, msd, text, frequency)
VALUES (:lemma, :msd, :text, 1)""", params)
self.db.commit()
self.all_words += len(words)
self.db.execute("INSERT INTO NumWords (n) VALUES (?)", (len(words),))
def num_all_words(self):
if self.all_words is None:
cur = self.db.execute("SELECT sum(n) FROM NumWords")
self.all_words = int(cur.fetchone()[0])
return self.all_words
def generate_renders(self):