num-words now part of database
This commit is contained in:
parent
ea92b44d71
commit
ec02242f47
|
@ -7,9 +7,7 @@ class WordStats:
|
||||||
def __init__(self, lemma_features, db):
|
def __init__(self, lemma_features, db):
|
||||||
self.lemma_features = lemma_features
|
self.lemma_features = lemma_features
|
||||||
self.db = db
|
self.db = db
|
||||||
|
self.all_words = None
|
||||||
self.all_words = 0
|
|
||||||
self.memoized_msd_merges = {}
|
|
||||||
|
|
||||||
self.db.init("""CREATE TABLE UniqWords (
|
self.db.init("""CREATE TABLE UniqWords (
|
||||||
uw_id INTEGER PRIMARY KEY,
|
uw_id INTEGER PRIMARY KEY,
|
||||||
|
@ -19,6 +17,7 @@ class WordStats:
|
||||||
frequency int
|
frequency int
|
||||||
)""")
|
)""")
|
||||||
self.db.init("CREATE TABLE WordCount (lemma varchar(64), msd0 char, frequency int)")
|
self.db.init("CREATE TABLE WordCount (lemma varchar(64), msd0 char, frequency int)")
|
||||||
|
self.db.init("CREATE TABLE NumWords (id INTEGER PRIMARY KEY, n INTEGER)")
|
||||||
|
|
||||||
self.db.init("CREATE INDEX lemma_msd_text_on_uw ON UniqWords (lemma, msd, text)")
|
self.db.init("CREATE INDEX lemma_msd_text_on_uw ON UniqWords (lemma, msd, text)")
|
||||||
self.db.init("CREATE INDEX lemma_on_uw ON UniqWords (lemma)")
|
self.db.init("CREATE INDEX lemma_on_uw ON UniqWords (lemma)")
|
||||||
|
@ -34,10 +33,12 @@ class WordStats:
|
||||||
self.db.execute("""INSERT INTO UniqWords (lemma, msd, text, frequency)
|
self.db.execute("""INSERT INTO UniqWords (lemma, msd, text, frequency)
|
||||||
VALUES (:lemma, :msd, :text, 1)""", params)
|
VALUES (:lemma, :msd, :text, 1)""", params)
|
||||||
|
|
||||||
self.db.commit()
|
self.db.execute("INSERT INTO NumWords (n) VALUES (?)", (len(words),))
|
||||||
self.all_words += len(words)
|
|
||||||
|
|
||||||
def num_all_words(self):
|
def num_all_words(self):
|
||||||
|
if self.all_words is None:
|
||||||
|
cur = self.db.execute("SELECT sum(n) FROM NumWords")
|
||||||
|
self.all_words = int(cur.fetchone()[0])
|
||||||
return self.all_words
|
return self.all_words
|
||||||
|
|
||||||
def generate_renders(self):
|
def generate_renders(self):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user