simplifying progress, because I will remove the parallel stuff

This commit is contained in:
Ozbolt Menegatti 2019-07-03 10:23:18 +02:00
parent 577983427e
commit a07d14011d
3 changed files with 21 additions and 31 deletions

View File

@ -18,7 +18,8 @@ def load_files(args):
skip_id_check = args.skip_id_check skip_id_check = args.skip_id_check
do_msd_translate = not args.no_msd_translate do_msd_translate = not args.no_msd_translate
for fname in progress(filenames, "files", outfile=True): for idx, fname in enumerate(filenames):
print("FILE ", fname, "{}/{}".format(idx, len(filenames)))
extension = pathlib.Path(fname).suffix extension = pathlib.Path(fname).suffix
if extension == ".xml": if extension == ".xml":
@ -49,7 +50,7 @@ def load_gz(filename):
result.extend(words.values()) result.extend(words.values())
with gzip.open(filename, 'r') as fp: with gzip.open(filename, 'r') as fp:
for line in progress(fp, 'load-gz', infile=True): for line in progress(fp, 'load-gz'):
line_str = line.decode('utf8').strip() line_str = line.decode('utf8').strip()
line_fixed = line_str.replace(',', '\t').replace('\t\t\t', '\t,\t') line_fixed = line_str.replace(',', '\t').replace('\t\t\t', '\t,\t')
line_split = line_fixed.split("\t") line_split = line_fixed.split("\t")
@ -85,7 +86,7 @@ def load_xml(filename):
def file_sentence_generator(et, skip_id_check, do_msd_translate, pc_tag): def file_sentence_generator(et, skip_id_check, do_msd_translate, pc_tag):
words = {} words = {}
sentences = list(et.iter('s')) sentences = list(et.iter('s'))
for sentence in progress(sentences, "load-text", infile=True): for sentence in progress(sentences, "load-text"):
for w in sentence.iter("w"): for w in sentence.iter("w"):
words[w.get('id')] = Word.from_xml(w, do_msd_translate) words[w.get('id')] = Word.from_xml(w, do_msd_translate)
for pc in sentence.iter(pc_tag): for pc in sentence.iter(pc_tag):

View File

@ -1,41 +1,32 @@
import time
try: try:
from tqdm import tqdm from tqdm import tqdm
except ImportError: except ImportError:
tqdm = None tqdm = None
REPORT_ON = 0.3
class Progress: class Progress:
def __init__(self): def __call__(self, iterable, description):
self.hide_inner = False
def __call__(self, iterable, description, infile=False, outfile=False):
show_progress = True
if True in (infile, outfile):
assert False in (infile, outfile)
show_progress = outfile == self.hide_inner
if not show_progress:
yield from iterable
return
if tqdm is None: if tqdm is None:
iterlist = list(iterable) try:
proc = -1 ln = len(iterable)
for n, el in enumerate(iterlist): except TypeError:
nxt_proc = int(n / len(iterlist) * 100) ln = -1
if nxt_proc > proc:
print("\r{}: {:02d}% ({}/{})".format(description, nxt_proc, n, len(iterlist)), end="") last_report = time.time() - REPORT_ON
proc = nxt_proc for n, el in enumerate(iterable):
now = time.time()
if now - last_report > REPORT_ON:
print("\r{}: {}/{}".format(description, n, ln), end="")
last_report = now
yield el yield el
print("") print("")
else: else:
yield from tqdm(iterable, desc=description) yield from tqdm(iterable, desc=description)
def init(self, args):
self.hide_inner = args.hide_inner_progress
progress = Progress() progress = Progress()

View File

@ -22,7 +22,7 @@ from database import Database
def match_file(words, structures): def match_file(words, structures):
matches = {s: [] for s in structures} matches = {s: [] for s in structures}
for s in progress(structures, "matching", infile=True): for s in progress(structures, "matching"):
for w in words: for w in words:
mhere = s.match(w) mhere = s.match(w)
for match in mhere: for match in mhere:
@ -153,11 +153,9 @@ if __name__ == '__main__':
parser.add_argument('--match-to-file', help='Do not use!') parser.add_argument('--match-to-file', help='Do not use!')
parser.add_argument('--pickled-structures', help='Do not use!', action='store_true') parser.add_argument('--pickled-structures', help='Do not use!', action='store_true')
parser.add_argument('--hide-inner-progress', help='Do not use!', action='store_true')
args = parser.parse_args() args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper()) logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
progress.init(args)
start = time.time() start = time.time()
main(args) main(args)