simplifying progress, because I will remove the parallel stuff

This commit is contained in:
2019-07-03 10:23:18 +02:00
parent 577983427e
commit a07d14011d
3 changed files with 21 additions and 31 deletions

View File

@@ -18,7 +18,8 @@ def load_files(args):
skip_id_check = args.skip_id_check
do_msd_translate = not args.no_msd_translate
for fname in progress(filenames, "files", outfile=True):
for idx, fname in enumerate(filenames):
print("FILE ", fname, "{}/{}".format(idx, len(filenames)))
extension = pathlib.Path(fname).suffix
if extension == ".xml":
@@ -49,7 +50,7 @@ def load_gz(filename):
result.extend(words.values())
with gzip.open(filename, 'r') as fp:
for line in progress(fp, 'load-gz', infile=True):
for line in progress(fp, 'load-gz'):
line_str = line.decode('utf8').strip()
line_fixed = line_str.replace(',', '\t').replace('\t\t\t', '\t,\t')
line_split = line_fixed.split("\t")
@@ -85,7 +86,7 @@ def load_xml(filename):
def file_sentence_generator(et, skip_id_check, do_msd_translate, pc_tag):
words = {}
sentences = list(et.iter('s'))
for sentence in progress(sentences, "load-text", infile=True):
for sentence in progress(sentences, "load-text"):
for w in sentence.iter("w"):
words[w.get('id')] = Word.from_xml(w, do_msd_translate)
for pc in sentence.iter(pc_tag):