adding separate database class
This commit is contained in:
parent
fa8a5e55f8
commit
c25844a335
23
src/database.py
Normal file
23
src/database.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
import sqlite3
|
||||
import os
|
||||
|
||||
class Database:
|
||||
def __init__(self, args):
|
||||
filename = ":memory:" if args.db is None else args.db
|
||||
|
||||
if not args.keep_db and os.path.exists(filename):
|
||||
os.remove(filename)
|
||||
|
||||
self.new = not os.path.exists(filename)
|
||||
self.db = sqlite3.connect(filename)
|
||||
|
||||
def execute(self, *args, **kwargs):
|
||||
return self.db.execute(*args, **kwargs)
|
||||
|
||||
def init(self, *args, **kwargs):
|
||||
# same as execute, only skipped if not a new database file
|
||||
if self.new:
|
||||
return self.execute(*args, **kwargs)
|
||||
|
||||
def commit(self):
|
||||
self.db.commit()
|
|
@ -16,6 +16,7 @@ from match_store import MatchStore
|
|||
from word_stats import WordStats
|
||||
from writer import Writer
|
||||
from loader import load_files
|
||||
from database import Database
|
||||
|
||||
|
||||
def match_file(words, structures):
|
||||
|
@ -37,8 +38,9 @@ def match_file(words, structures):
|
|||
def main(args):
|
||||
structures, lemma_msds, max_num_components = build_structures(args)
|
||||
|
||||
database = Database(args)
|
||||
match_store = MatchStore(args)
|
||||
word_stats = WordStats(lemma_msds)
|
||||
word_stats = WordStats(lemma_msds, database)
|
||||
|
||||
if args.parallel:
|
||||
num_parallel = int(args.parallel)
|
||||
|
@ -139,6 +141,11 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--sort-reversed',
|
||||
help="Sort in reversed ored", action='store_true')
|
||||
|
||||
parser.add_argument('--db',
|
||||
help="Database file to use (instead of memory)", default=None)
|
||||
parser.add_argument('--keep-db',
|
||||
help="Does not recreate new database file", action='store_true')
|
||||
|
||||
parser.add_argument('--pc-tag',
|
||||
help='Tag for separators, usually pc or c', default="pc")
|
||||
parser.add_argument('--parallel',
|
||||
|
|
|
@ -1,34 +1,30 @@
|
|||
from collections import defaultdict, Counter
|
||||
|
||||
from progress_bar import progress
|
||||
import sqlite3
|
||||
|
||||
|
||||
class WordStats:
|
||||
def __init__(self, lemma_features):
|
||||
def __init__(self, lemma_features, db):
|
||||
self.lemma_features = lemma_features
|
||||
self.db = db
|
||||
|
||||
self.all_words = 0
|
||||
self.memoized_msd_merges = {}
|
||||
|
||||
with open("sqlite.db", 'w') as fp:
|
||||
fp.write("")
|
||||
|
||||
self.db = sqlite3.connect('sqlite.db')
|
||||
self.db.execute("""CREATE TABLE UniqWords (
|
||||
self.db.init("""CREATE TABLE UniqWords (
|
||||
uw_id INTEGER PRIMARY KEY,
|
||||
lemma varchar(64),
|
||||
msd varchar(16),
|
||||
text varchar(64),
|
||||
frequency int
|
||||
)""")
|
||||
self.db.execute("CREATE TABLE CommonMsd (lemma varchar(64), msd varchar(16))")
|
||||
self.db.execute("CREATE TABLE WordCount (lemma varchar(64), msd0 char, frequency int)")
|
||||
self.db.init("CREATE TABLE CommonMsd (lemma varchar(64), msd varchar(16))")
|
||||
self.db.init("CREATE TABLE WordCount (lemma varchar(64), msd0 char, frequency int)")
|
||||
|
||||
self.db.execute("CREATE INDEX lemma_msd_text_on_uw ON UniqWords (lemma, msd, text)")
|
||||
self.db.execute("CREATE INDEX lemma_on_uw ON UniqWords (lemma)")
|
||||
self.db.execute("CREATE INDEX lemma_on_cm ON CommonMsd (lemma)")
|
||||
self.db.execute("CREATE INDEX lemma_msd0_on_wc ON WordCount (lemma, msd0)")
|
||||
self.db.init("CREATE INDEX lemma_msd_text_on_uw ON UniqWords (lemma, msd, text)")
|
||||
self.db.init("CREATE INDEX lemma_on_uw ON UniqWords (lemma)")
|
||||
self.db.init("CREATE INDEX lemma_on_cm ON CommonMsd (lemma)")
|
||||
self.db.init("CREATE INDEX lemma_msd0_on_wc ON WordCount (lemma, msd0)")
|
||||
|
||||
def add_words(self, words):
|
||||
for w in progress(words, "adding-words"):
|
||||
|
|
Loading…
Reference in New Issue
Block a user