simplifying progress, because I will remove the parallel stuff
This commit is contained in:
parent
577983427e
commit
a07d14011d
|
@ -18,7 +18,8 @@ def load_files(args):
|
||||||
skip_id_check = args.skip_id_check
|
skip_id_check = args.skip_id_check
|
||||||
do_msd_translate = not args.no_msd_translate
|
do_msd_translate = not args.no_msd_translate
|
||||||
|
|
||||||
for fname in progress(filenames, "files", outfile=True):
|
for idx, fname in enumerate(filenames):
|
||||||
|
print("FILE ", fname, "{}/{}".format(idx, len(filenames)))
|
||||||
extension = pathlib.Path(fname).suffix
|
extension = pathlib.Path(fname).suffix
|
||||||
|
|
||||||
if extension == ".xml":
|
if extension == ".xml":
|
||||||
|
@ -49,7 +50,7 @@ def load_gz(filename):
|
||||||
result.extend(words.values())
|
result.extend(words.values())
|
||||||
|
|
||||||
with gzip.open(filename, 'r') as fp:
|
with gzip.open(filename, 'r') as fp:
|
||||||
for line in progress(fp, 'load-gz', infile=True):
|
for line in progress(fp, 'load-gz'):
|
||||||
line_str = line.decode('utf8').strip()
|
line_str = line.decode('utf8').strip()
|
||||||
line_fixed = line_str.replace(',', '\t').replace('\t\t\t', '\t,\t')
|
line_fixed = line_str.replace(',', '\t').replace('\t\t\t', '\t,\t')
|
||||||
line_split = line_fixed.split("\t")
|
line_split = line_fixed.split("\t")
|
||||||
|
@ -85,7 +86,7 @@ def load_xml(filename):
|
||||||
def file_sentence_generator(et, skip_id_check, do_msd_translate, pc_tag):
|
def file_sentence_generator(et, skip_id_check, do_msd_translate, pc_tag):
|
||||||
words = {}
|
words = {}
|
||||||
sentences = list(et.iter('s'))
|
sentences = list(et.iter('s'))
|
||||||
for sentence in progress(sentences, "load-text", infile=True):
|
for sentence in progress(sentences, "load-text"):
|
||||||
for w in sentence.iter("w"):
|
for w in sentence.iter("w"):
|
||||||
words[w.get('id')] = Word.from_xml(w, do_msd_translate)
|
words[w.get('id')] = Word.from_xml(w, do_msd_translate)
|
||||||
for pc in sentence.iter(pc_tag):
|
for pc in sentence.iter(pc_tag):
|
||||||
|
|
|
@ -1,41 +1,32 @@
|
||||||
|
import time
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
except ImportError:
|
except ImportError:
|
||||||
tqdm = None
|
tqdm = None
|
||||||
|
|
||||||
|
|
||||||
|
REPORT_ON = 0.3
|
||||||
|
|
||||||
class Progress:
|
class Progress:
|
||||||
def __init__(self):
|
def __call__(self, iterable, description):
|
||||||
self.hide_inner = False
|
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, iterable, description, infile=False, outfile=False):
|
|
||||||
show_progress = True
|
|
||||||
if True in (infile, outfile):
|
|
||||||
assert False in (infile, outfile)
|
|
||||||
show_progress = outfile == self.hide_inner
|
|
||||||
|
|
||||||
if not show_progress:
|
|
||||||
yield from iterable
|
|
||||||
return
|
|
||||||
|
|
||||||
if tqdm is None:
|
if tqdm is None:
|
||||||
iterlist = list(iterable)
|
try:
|
||||||
proc = -1
|
ln = len(iterable)
|
||||||
for n, el in enumerate(iterlist):
|
except TypeError:
|
||||||
nxt_proc = int(n / len(iterlist) * 100)
|
ln = -1
|
||||||
if nxt_proc > proc:
|
|
||||||
print("\r{}: {:02d}% ({}/{})".format(description, nxt_proc, n, len(iterlist)), end="")
|
last_report = time.time() - REPORT_ON
|
||||||
proc = nxt_proc
|
for n, el in enumerate(iterable):
|
||||||
|
now = time.time()
|
||||||
|
if now - last_report > REPORT_ON:
|
||||||
|
print("\r{}: {}/{}".format(description, n, ln), end="")
|
||||||
|
last_report = now
|
||||||
yield el
|
yield el
|
||||||
print("")
|
print("")
|
||||||
else:
|
else:
|
||||||
yield from tqdm(iterable, desc=description)
|
yield from tqdm(iterable, desc=description)
|
||||||
|
|
||||||
|
|
||||||
def init(self, args):
|
|
||||||
self.hide_inner = args.hide_inner_progress
|
|
||||||
|
|
||||||
|
|
||||||
progress = Progress()
|
progress = Progress()
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ from database import Database
|
||||||
def match_file(words, structures):
|
def match_file(words, structures):
|
||||||
matches = {s: [] for s in structures}
|
matches = {s: [] for s in structures}
|
||||||
|
|
||||||
for s in progress(structures, "matching", infile=True):
|
for s in progress(structures, "matching"):
|
||||||
for w in words:
|
for w in words:
|
||||||
mhere = s.match(w)
|
mhere = s.match(w)
|
||||||
for match in mhere:
|
for match in mhere:
|
||||||
|
@ -153,11 +153,9 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
parser.add_argument('--match-to-file', help='Do not use!')
|
parser.add_argument('--match-to-file', help='Do not use!')
|
||||||
parser.add_argument('--pickled-structures', help='Do not use!', action='store_true')
|
parser.add_argument('--pickled-structures', help='Do not use!', action='store_true')
|
||||||
parser.add_argument('--hide-inner-progress', help='Do not use!', action='store_true')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
||||||
progress.init(args)
|
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
main(args)
|
main(args)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user