num-words now part of database
This commit is contained in:
parent
ea92b44d71
commit
ec02242f47
|
@ -7,9 +7,7 @@ class WordStats:
|
|||
def __init__(self, lemma_features, db):
|
||||
self.lemma_features = lemma_features
|
||||
self.db = db
|
||||
|
||||
self.all_words = 0
|
||||
self.memoized_msd_merges = {}
|
||||
self.all_words = None
|
||||
|
||||
self.db.init("""CREATE TABLE UniqWords (
|
||||
uw_id INTEGER PRIMARY KEY,
|
||||
|
@ -19,6 +17,7 @@ class WordStats:
|
|||
frequency int
|
||||
)""")
|
||||
self.db.init("CREATE TABLE WordCount (lemma varchar(64), msd0 char, frequency int)")
|
||||
self.db.init("CREATE TABLE NumWords (id INTEGER PRIMARY KEY, n INTEGER)")
|
||||
|
||||
self.db.init("CREATE INDEX lemma_msd_text_on_uw ON UniqWords (lemma, msd, text)")
|
||||
self.db.init("CREATE INDEX lemma_on_uw ON UniqWords (lemma)")
|
||||
|
@ -34,10 +33,12 @@ class WordStats:
|
|||
self.db.execute("""INSERT INTO UniqWords (lemma, msd, text, frequency)
|
||||
VALUES (:lemma, :msd, :text, 1)""", params)
|
||||
|
||||
self.db.commit()
|
||||
self.all_words += len(words)
|
||||
self.db.execute("INSERT INTO NumWords (n) VALUES (?)", (len(words),))
|
||||
|
||||
def num_all_words(self):
|
||||
if self.all_words is None:
|
||||
cur = self.db.execute("SELECT sum(n) FROM NumWords")
|
||||
self.all_words = int(cur.fetchone()[0])
|
||||
return self.all_words
|
||||
|
||||
def generate_renders(self):
|
||||
|
|
Loading…
Reference in New Issue
Block a user