simplifying progress, because I will remove the parallel stuff
This commit is contained in:
@@ -18,7 +18,8 @@ def load_files(args):
|
||||
skip_id_check = args.skip_id_check
|
||||
do_msd_translate = not args.no_msd_translate
|
||||
|
||||
for fname in progress(filenames, "files", outfile=True):
|
||||
for idx, fname in enumerate(filenames):
|
||||
print("FILE ", fname, "{}/{}".format(idx, len(filenames)))
|
||||
extension = pathlib.Path(fname).suffix
|
||||
|
||||
if extension == ".xml":
|
||||
@@ -49,7 +50,7 @@ def load_gz(filename):
|
||||
result.extend(words.values())
|
||||
|
||||
with gzip.open(filename, 'r') as fp:
|
||||
for line in progress(fp, 'load-gz', infile=True):
|
||||
for line in progress(fp, 'load-gz'):
|
||||
line_str = line.decode('utf8').strip()
|
||||
line_fixed = line_str.replace(',', '\t').replace('\t\t\t', '\t,\t')
|
||||
line_split = line_fixed.split("\t")
|
||||
@@ -85,7 +86,7 @@ def load_xml(filename):
|
||||
def file_sentence_generator(et, skip_id_check, do_msd_translate, pc_tag):
|
||||
words = {}
|
||||
sentences = list(et.iter('s'))
|
||||
for sentence in progress(sentences, "load-text", infile=True):
|
||||
for sentence in progress(sentences, "load-text"):
|
||||
for w in sentence.iter("w"):
|
||||
words[w.get('id')] = Word.from_xml(w, do_msd_translate)
|
||||
for pc in sentence.iter(pc_tag):
|
||||
|
||||
Reference in New Issue
Block a user