|
|
|
@ -13,15 +13,23 @@ def is_root_id(id_):
|
|
|
|
|
return len(id_.split('.')) == 3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_files(args):
|
|
|
|
|
def load_files(args, database):
|
|
|
|
|
filenames = args.input
|
|
|
|
|
skip_id_check = args.skip_id_check
|
|
|
|
|
do_msd_translate = not args.no_msd_translate
|
|
|
|
|
|
|
|
|
|
database.init("CREATE TABLE Files ( filename varchar(2048) )")
|
|
|
|
|
|
|
|
|
|
for idx, fname in enumerate(filenames):
|
|
|
|
|
print("FILE ", fname, "{}/{}".format(idx, len(filenames)))
|
|
|
|
|
extension = pathlib.Path(fname).suffix
|
|
|
|
|
|
|
|
|
|
# check if file with the same name already loaded...
|
|
|
|
|
loaded = database.execute("SELECT * FROM Files WHERE filename=?", (fname, )).fetchone()
|
|
|
|
|
if loaded is not None:
|
|
|
|
|
print("ALREADY LOADED")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if extension == ".xml":
|
|
|
|
|
et = load_xml(fname)
|
|
|
|
|
yield file_sentence_generator(et, skip_id_check, do_msd_translate, args.pc_tag)
|
|
|
|
@ -32,6 +40,10 @@ def load_files(args):
|
|
|
|
|
# else:
|
|
|
|
|
# raise NotImplementedError("Unknown file extension: {}".format(extension))
|
|
|
|
|
|
|
|
|
|
database.execute("INSERT INTO Files (filename) VALUES (?)", (fname, ))
|
|
|
|
|
database.commit()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def lines_gz(filename):
|
|
|
|
|
with gzip.open(filename, 'r') as fp:
|
|
|
|
|
for line in progress(fp, 'load-gz'):
|
|
|
|
|