adding separate database class

This commit is contained in:
Ozbolt Menegatti 2019-06-27 12:37:23 +02:00
parent fa8a5e55f8
commit c25844a335
3 changed files with 40 additions and 14 deletions

23
src/database.py Normal file
View File

@ -0,0 +1,23 @@
import sqlite3
import os
class Database:
def __init__(self, args):
filename = ":memory:" if args.db is None else args.db
if not args.keep_db and os.path.exists(filename):
os.remove(filename)
self.new = not os.path.exists(filename)
self.db = sqlite3.connect(filename)
def execute(self, *args, **kwargs):
return self.db.execute(*args, **kwargs)
def init(self, *args, **kwargs):
# same as execute, only skipped if not a new database file
if self.new:
return self.execute(*args, **kwargs)
def commit(self):
self.db.commit()

View File

@ -16,6 +16,7 @@ from match_store import MatchStore
from word_stats import WordStats
from writer import Writer
from loader import load_files
from database import Database
def match_file(words, structures):
@ -37,8 +38,9 @@ def match_file(words, structures):
def main(args):
structures, lemma_msds, max_num_components = build_structures(args)
database = Database(args)
match_store = MatchStore(args)
word_stats = WordStats(lemma_msds)
word_stats = WordStats(lemma_msds, database)
if args.parallel:
num_parallel = int(args.parallel)
@ -139,6 +141,11 @@ if __name__ == '__main__':
parser.add_argument('--sort-reversed',
help="Sort in reversed ored", action='store_true')
parser.add_argument('--db',
help="Database file to use (instead of memory)", default=None)
parser.add_argument('--keep-db',
help="Does not recreate new database file", action='store_true')
parser.add_argument('--pc-tag',
help='Tag for separators, usually pc or c', default="pc")
parser.add_argument('--parallel',

View File

@ -1,34 +1,30 @@
from collections import defaultdict, Counter
from progress_bar import progress
import sqlite3
class WordStats:
def __init__(self, lemma_features):
def __init__(self, lemma_features, db):
self.lemma_features = lemma_features
self.db = db
self.all_words = 0
self.memoized_msd_merges = {}
with open("sqlite.db", 'w') as fp:
fp.write("")
self.db = sqlite3.connect('sqlite.db')
self.db.execute("""CREATE TABLE UniqWords (
self.db.init("""CREATE TABLE UniqWords (
uw_id INTEGER PRIMARY KEY,
lemma varchar(64),
msd varchar(16),
text varchar(64),
frequency int
)""")
self.db.execute("CREATE TABLE CommonMsd (lemma varchar(64), msd varchar(16))")
self.db.execute("CREATE TABLE WordCount (lemma varchar(64), msd0 char, frequency int)")
self.db.init("CREATE TABLE CommonMsd (lemma varchar(64), msd varchar(16))")
self.db.init("CREATE TABLE WordCount (lemma varchar(64), msd0 char, frequency int)")
self.db.execute("CREATE INDEX lemma_msd_text_on_uw ON UniqWords (lemma, msd, text)")
self.db.execute("CREATE INDEX lemma_on_uw ON UniqWords (lemma)")
self.db.execute("CREATE INDEX lemma_on_cm ON CommonMsd (lemma)")
self.db.execute("CREATE INDEX lemma_msd0_on_wc ON WordCount (lemma, msd0)")
self.db.init("CREATE INDEX lemma_msd_text_on_uw ON UniqWords (lemma, msd, text)")
self.db.init("CREATE INDEX lemma_on_uw ON UniqWords (lemma)")
self.db.init("CREATE INDEX lemma_on_cm ON CommonMsd (lemma)")
self.db.init("CREATE INDEX lemma_msd0_on_wc ON WordCount (lemma, msd0)")
def add_words(self, words):
for w in progress(words, "adding-words"):