New progress bar

This commit is contained in:
Ozbolt Menegatti 2019-06-17 17:30:51 +02:00
parent 3552f14b81
commit 70b05e8637
7 changed files with 70 additions and 21 deletions

View File

@ -63,6 +63,9 @@ class OutNoStatFormatter(Formatter):
def group(self): def group(self):
return True return True
def __str__(self):
return "out-no-stat"
class AllFormatter(Formatter): class AllFormatter(Formatter):
def header_repeat(self): def header_repeat(self):
@ -81,6 +84,9 @@ class AllFormatter(Formatter):
def group(self): def group(self):
return False return False
def __str__(self):
return "all"
class StatsFormatter(Formatter): class StatsFormatter(Formatter):
def additional_init(self): def additional_init(self):
self.stats = None self.stats = None
@ -160,6 +166,9 @@ class StatsFormatter(Formatter):
def group(self): def group(self):
return True return True
def __str__(self):
return "stat"
class OutFormatter(Formatter): class OutFormatter(Formatter):
def additional_init(self): def additional_init(self):
@ -187,4 +196,7 @@ class OutFormatter(Formatter):
self.f2.set_structure(structure) self.f2.set_structure(structure)
def new_match(self, match): def new_match(self, match):
self.f2.new_match(match) self.f2.new_match(match)
def __str__(self):
return "out"

View File

@ -3,8 +3,7 @@ import logging
import re import re
import sys import sys
from tqdm import tqdm from progress_bar import progress
from word import Word from word import Word
@ -23,7 +22,6 @@ def load_files(args):
def load_xml(filename): def load_xml(filename):
logging.info("LOADING XML: {}".format(filename))
with open(filename, 'r') as fp: with open(filename, 'r') as fp:
content = fp.read() content = fp.read()
@ -35,7 +33,7 @@ def load_xml(filename):
def file_sentence_generator(et, skip_id_check, do_msd_translate, pc_tag): def file_sentence_generator(et, skip_id_check, do_msd_translate, pc_tag):
words = {} words = {}
sentences = list(et.iter('s')) sentences = list(et.iter('s'))
for sentence in tqdm(sentences, desc="load-text"): for sentence in progress(sentences, "load-text", infile=True):
for w in sentence.iter("w"): for w in sentence.iter("w"):
words[w.get('id')] = Word(w, do_msd_translate) words[w.get('id')] = Word(w, do_msd_translate)
for pc in sentence.iter(pc_tag): for pc in sentence.iter(pc_tag):

View File

@ -2,11 +2,7 @@ from collections import defaultdict
from match import StructureMatch from match import StructureMatch
from representation_assigner import RepresentationAssigner from representation_assigner import RepresentationAssigner
from progress_bar import progress
try:
from tqdm import tqdm
except ImportError:
tqdm = lambda x: x
class MatchStore: class MatchStore:
def __init__(self, args): def __init__(self, args):
@ -35,7 +31,7 @@ class MatchStore:
yield sm yield sm
def set_representations(self, word_renderer): def set_representations(self, word_renderer):
for _1, sm in tqdm(self.data.items()): for _1, sm in progress(self.data.items(), "representations"):
RepresentationAssigner.set_representations(sm, word_renderer) RepresentationAssigner.set_representations(sm, word_renderer)
def determine_colocation_dispersions(self): def determine_colocation_dispersions(self):

42
src/progress_bar.py Normal file
View File

@ -0,0 +1,42 @@
try:
from tqdm import tqdm
except ImportError:
tqdm = None
class Progress:
def __init__(self):
self.infile = False
def __call__(self, iterable, description, infile=False, outfile=False):
show_progress = True
if infile and not self.infile:
show_progress = False
elif outfile and self.infile:
show_progress = False
if not show_progress:
yield from iterable
return
if tqdm is None:
iterlist = list(iterable)
proc = -1
for n, el in enumerate(iterlist):
nxt_proc = int(n / len(iterlist) * 100)
if nxt_proc > proc:
print("\r{}: {:02d}% ({}/{})".format(description, nxt_proc, n, len(iterlist)), end="")
proc = nxt_proc
yield el
print("")
else:
yield from tqdm(iterable, desc=description)
def init(self, args):
self.infile = not args.hide_inner_progress
progress = Progress()

View File

@ -9,11 +9,7 @@ import subprocess
import concurrent.futures import concurrent.futures
import tempfile import tempfile
try: from progress_bar import progress
from tqdm import tqdm
except ImportError:
tqdm = lambda x: x
from word import Word from word import Word
from syntactic_structure import build_structures from syntactic_structure import build_structures
from match_store import MatchStore from match_store import MatchStore
@ -22,11 +18,10 @@ from writer import Writer
from loader import load_files from loader import load_files
def match_file(words, structures): def match_file(words, structures):
matches = {s: [] for s in structures} matches = {s: [] for s in structures}
for s in structures: for s in progress(structures, "matching", infile=True):
for w in words: for w in words:
mhere = s.match(w) mhere = s.match(w)
for match in mhere: for match in mhere:
@ -77,7 +72,7 @@ def main(args):
word_stats.add_words(words) word_stats.add_words(words)
else: else:
for words in tqdm(load_files(args)): for words in progress(load_files(args), "files", outfile=True):
matches = match_file(words, structures) matches = match_file(words, structures)
# just save to temporary file, used for children of a parallel process # just save to temporary file, used for children of a parallel process
# MUST NOT have more than one file # MUST NOT have more than one file
@ -151,9 +146,11 @@ if __name__ == '__main__':
parser.add_argument('--match-to-file', help='Do not use!') parser.add_argument('--match-to-file', help='Do not use!')
parser.add_argument('--pickled-structures', help='Do not use!', action='store_true') parser.add_argument('--pickled-structures', help='Do not use!', action='store_true')
parser.add_argument('--hide-inner-progress', help='Do not use!', action='store_true')
args = parser.parse_args() args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper()) logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
progress.init(args)
start = time.time() start = time.time()
main(args) main(args)

View File

@ -1,5 +1,8 @@
from collections import defaultdict, Counter from collections import defaultdict, Counter
from progress_bar import progress
class WordStats: class WordStats:
def __init__(self, lemma_features): def __init__(self, lemma_features):
self.raw_data = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) self.raw_data = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
@ -22,7 +25,7 @@ class WordStats:
def generate_renders(self): def generate_renders(self):
num_words = defaultdict(int) num_words = defaultdict(int)
for lemma, ld in self.raw_data.items(): for lemma, ld in progress(self.raw_data.items(), "lemma-render"):
self.rendered_words[lemma] = {} self.rendered_words[lemma] = {}
freq_words = defaultdict(int) freq_words = defaultdict(int)
common_msd = "*" * 10 common_msd = "*" * 10

View File

@ -1,4 +1,5 @@
import logging import logging
from progress_bar import progress
from formatter import OutFormatter, OutNoStatFormatter, AllFormatter, StatsFormatter from formatter import OutFormatter, OutNoStatFormatter, AllFormatter, StatsFormatter
@ -122,7 +123,7 @@ class Writer:
fp = fp_open() fp = fp_open()
self.write_header(fp) self.write_header(fp)
for s in structures: for s in progress(structures, "writing:{}".format(self.formatter)):
if self.multiple_output: if self.multiple_output:
fp = fp_open(s.id) fp = fp_open(s.id)
self.write_header(fp) self.write_header(fp)