num-words now part of database

This commit is contained in:
Ozbolt Menegatti 2019-07-03 13:08:32 +02:00
parent ea92b44d71
commit ec02242f47

View File

@ -7,9 +7,7 @@ class WordStats:
def __init__(self, lemma_features, db): def __init__(self, lemma_features, db):
self.lemma_features = lemma_features self.lemma_features = lemma_features
self.db = db self.db = db
self.all_words = None
self.all_words = 0
self.memoized_msd_merges = {}
self.db.init("""CREATE TABLE UniqWords ( self.db.init("""CREATE TABLE UniqWords (
uw_id INTEGER PRIMARY KEY, uw_id INTEGER PRIMARY KEY,
@ -19,6 +17,7 @@ class WordStats:
frequency int frequency int
)""") )""")
self.db.init("CREATE TABLE WordCount (lemma varchar(64), msd0 char, frequency int)") self.db.init("CREATE TABLE WordCount (lemma varchar(64), msd0 char, frequency int)")
self.db.init("CREATE TABLE NumWords (id INTEGER PRIMARY KEY, n INTEGER)")
self.db.init("CREATE INDEX lemma_msd_text_on_uw ON UniqWords (lemma, msd, text)") self.db.init("CREATE INDEX lemma_msd_text_on_uw ON UniqWords (lemma, msd, text)")
self.db.init("CREATE INDEX lemma_on_uw ON UniqWords (lemma)") self.db.init("CREATE INDEX lemma_on_uw ON UniqWords (lemma)")
@ -34,10 +33,12 @@ class WordStats:
self.db.execute("""INSERT INTO UniqWords (lemma, msd, text, frequency) self.db.execute("""INSERT INTO UniqWords (lemma, msd, text, frequency)
VALUES (:lemma, :msd, :text, 1)""", params) VALUES (:lemma, :msd, :text, 1)""", params)
self.db.commit() self.db.execute("INSERT INTO NumWords (n) VALUES (?)", (len(words),))
self.all_words += len(words)
def num_all_words(self): def num_all_words(self):
if self.all_words is None:
cur = self.db.execute("SELECT sum(n) FROM NumWords")
self.all_words = int(cur.fetchone()[0])
return self.all_words return self.all_words
def generate_renders(self): def generate_renders(self):