adding separate database class
This commit is contained in:
parent
fa8a5e55f8
commit
c25844a335
23
src/database.py
Normal file
23
src/database.py
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
import sqlite3
|
||||||
|
import os
|
||||||
|
|
||||||
|
class Database:
|
||||||
|
def __init__(self, args):
|
||||||
|
filename = ":memory:" if args.db is None else args.db
|
||||||
|
|
||||||
|
if not args.keep_db and os.path.exists(filename):
|
||||||
|
os.remove(filename)
|
||||||
|
|
||||||
|
self.new = not os.path.exists(filename)
|
||||||
|
self.db = sqlite3.connect(filename)
|
||||||
|
|
||||||
|
def execute(self, *args, **kwargs):
|
||||||
|
return self.db.execute(*args, **kwargs)
|
||||||
|
|
||||||
|
def init(self, *args, **kwargs):
|
||||||
|
# same as execute, only skipped if not a new database file
|
||||||
|
if self.new:
|
||||||
|
return self.execute(*args, **kwargs)
|
||||||
|
|
||||||
|
def commit(self):
|
||||||
|
self.db.commit()
|
|
@ -16,6 +16,7 @@ from match_store import MatchStore
|
||||||
from word_stats import WordStats
|
from word_stats import WordStats
|
||||||
from writer import Writer
|
from writer import Writer
|
||||||
from loader import load_files
|
from loader import load_files
|
||||||
|
from database import Database
|
||||||
|
|
||||||
|
|
||||||
def match_file(words, structures):
|
def match_file(words, structures):
|
||||||
|
@ -37,8 +38,9 @@ def match_file(words, structures):
|
||||||
def main(args):
|
def main(args):
|
||||||
structures, lemma_msds, max_num_components = build_structures(args)
|
structures, lemma_msds, max_num_components = build_structures(args)
|
||||||
|
|
||||||
|
database = Database(args)
|
||||||
match_store = MatchStore(args)
|
match_store = MatchStore(args)
|
||||||
word_stats = WordStats(lemma_msds)
|
word_stats = WordStats(lemma_msds, database)
|
||||||
|
|
||||||
if args.parallel:
|
if args.parallel:
|
||||||
num_parallel = int(args.parallel)
|
num_parallel = int(args.parallel)
|
||||||
|
@ -139,6 +141,11 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--sort-reversed',
|
parser.add_argument('--sort-reversed',
|
||||||
help="Sort in reversed ored", action='store_true')
|
help="Sort in reversed ored", action='store_true')
|
||||||
|
|
||||||
|
parser.add_argument('--db',
|
||||||
|
help="Database file to use (instead of memory)", default=None)
|
||||||
|
parser.add_argument('--keep-db',
|
||||||
|
help="Does not recreate new database file", action='store_true')
|
||||||
|
|
||||||
parser.add_argument('--pc-tag',
|
parser.add_argument('--pc-tag',
|
||||||
help='Tag for separators, usually pc or c', default="pc")
|
help='Tag for separators, usually pc or c', default="pc")
|
||||||
parser.add_argument('--parallel',
|
parser.add_argument('--parallel',
|
||||||
|
|
|
@ -1,34 +1,30 @@
|
||||||
from collections import defaultdict, Counter
|
from collections import defaultdict, Counter
|
||||||
|
|
||||||
from progress_bar import progress
|
from progress_bar import progress
|
||||||
import sqlite3
|
|
||||||
|
|
||||||
|
|
||||||
class WordStats:
|
class WordStats:
|
||||||
def __init__(self, lemma_features):
|
def __init__(self, lemma_features, db):
|
||||||
self.lemma_features = lemma_features
|
self.lemma_features = lemma_features
|
||||||
|
self.db = db
|
||||||
|
|
||||||
self.all_words = 0
|
self.all_words = 0
|
||||||
self.memoized_msd_merges = {}
|
self.memoized_msd_merges = {}
|
||||||
|
|
||||||
with open("sqlite.db", 'w') as fp:
|
self.db.init("""CREATE TABLE UniqWords (
|
||||||
fp.write("")
|
|
||||||
|
|
||||||
self.db = sqlite3.connect('sqlite.db')
|
|
||||||
self.db.execute("""CREATE TABLE UniqWords (
|
|
||||||
uw_id INTEGER PRIMARY KEY,
|
uw_id INTEGER PRIMARY KEY,
|
||||||
lemma varchar(64),
|
lemma varchar(64),
|
||||||
msd varchar(16),
|
msd varchar(16),
|
||||||
text varchar(64),
|
text varchar(64),
|
||||||
frequency int
|
frequency int
|
||||||
)""")
|
)""")
|
||||||
self.db.execute("CREATE TABLE CommonMsd (lemma varchar(64), msd varchar(16))")
|
self.db.init("CREATE TABLE CommonMsd (lemma varchar(64), msd varchar(16))")
|
||||||
self.db.execute("CREATE TABLE WordCount (lemma varchar(64), msd0 char, frequency int)")
|
self.db.init("CREATE TABLE WordCount (lemma varchar(64), msd0 char, frequency int)")
|
||||||
|
|
||||||
self.db.execute("CREATE INDEX lemma_msd_text_on_uw ON UniqWords (lemma, msd, text)")
|
self.db.init("CREATE INDEX lemma_msd_text_on_uw ON UniqWords (lemma, msd, text)")
|
||||||
self.db.execute("CREATE INDEX lemma_on_uw ON UniqWords (lemma)")
|
self.db.init("CREATE INDEX lemma_on_uw ON UniqWords (lemma)")
|
||||||
self.db.execute("CREATE INDEX lemma_on_cm ON CommonMsd (lemma)")
|
self.db.init("CREATE INDEX lemma_on_cm ON CommonMsd (lemma)")
|
||||||
self.db.execute("CREATE INDEX lemma_msd0_on_wc ON WordCount (lemma, msd0)")
|
self.db.init("CREATE INDEX lemma_msd0_on_wc ON WordCount (lemma, msd0)")
|
||||||
|
|
||||||
def add_words(self, words):
|
def add_words(self, words):
|
||||||
for w in progress(words, "adding-words"):
|
for w in progress(words, "adding-words"):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user