New progress bar
This commit is contained in:
parent
3552f14b81
commit
70b05e8637
|
@ -63,6 +63,9 @@ class OutNoStatFormatter(Formatter):
|
|||
|
||||
def group(self):
|
||||
return True
|
||||
|
||||
def __str__(self):
|
||||
return "out-no-stat"
|
||||
|
||||
class AllFormatter(Formatter):
|
||||
def header_repeat(self):
|
||||
|
@ -81,6 +84,9 @@ class AllFormatter(Formatter):
|
|||
def group(self):
|
||||
return False
|
||||
|
||||
def __str__(self):
|
||||
return "all"
|
||||
|
||||
class StatsFormatter(Formatter):
|
||||
def additional_init(self):
|
||||
self.stats = None
|
||||
|
@ -160,6 +166,9 @@ class StatsFormatter(Formatter):
|
|||
|
||||
def group(self):
|
||||
return True
|
||||
|
||||
def __str__(self):
|
||||
return "stat"
|
||||
|
||||
class OutFormatter(Formatter):
|
||||
def additional_init(self):
|
||||
|
@ -187,4 +196,7 @@ class OutFormatter(Formatter):
|
|||
self.f2.set_structure(structure)
|
||||
|
||||
def new_match(self, match):
|
||||
self.f2.new_match(match)
|
||||
self.f2.new_match(match)
|
||||
|
||||
def __str__(self):
|
||||
return "out"
|
|
@ -3,8 +3,7 @@ import logging
|
|||
import re
|
||||
import sys
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from progress_bar import progress
|
||||
from word import Word
|
||||
|
||||
|
||||
|
@ -23,7 +22,6 @@ def load_files(args):
|
|||
|
||||
|
||||
def load_xml(filename):
|
||||
logging.info("LOADING XML: {}".format(filename))
|
||||
with open(filename, 'r') as fp:
|
||||
content = fp.read()
|
||||
|
||||
|
@ -35,7 +33,7 @@ def load_xml(filename):
|
|||
def file_sentence_generator(et, skip_id_check, do_msd_translate, pc_tag):
|
||||
words = {}
|
||||
sentences = list(et.iter('s'))
|
||||
for sentence in tqdm(sentences, desc="load-text"):
|
||||
for sentence in progress(sentences, "load-text", infile=True):
|
||||
for w in sentence.iter("w"):
|
||||
words[w.get('id')] = Word(w, do_msd_translate)
|
||||
for pc in sentence.iter(pc_tag):
|
||||
|
|
|
@ -2,11 +2,7 @@ from collections import defaultdict
|
|||
|
||||
from match import StructureMatch
|
||||
from representation_assigner import RepresentationAssigner
|
||||
|
||||
try:
|
||||
from tqdm import tqdm
|
||||
except ImportError:
|
||||
tqdm = lambda x: x
|
||||
from progress_bar import progress
|
||||
|
||||
class MatchStore:
|
||||
def __init__(self, args):
|
||||
|
@ -35,7 +31,7 @@ class MatchStore:
|
|||
yield sm
|
||||
|
||||
def set_representations(self, word_renderer):
|
||||
for _1, sm in tqdm(self.data.items()):
|
||||
for _1, sm in progress(self.data.items(), "representations"):
|
||||
RepresentationAssigner.set_representations(sm, word_renderer)
|
||||
|
||||
def determine_colocation_dispersions(self):
|
||||
|
|
42
src/progress_bar.py
Normal file
42
src/progress_bar.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
try:
|
||||
from tqdm import tqdm
|
||||
except ImportError:
|
||||
tqdm = None
|
||||
|
||||
|
||||
class Progress:
|
||||
def __init__(self):
|
||||
self.infile = False
|
||||
|
||||
|
||||
def __call__(self, iterable, description, infile=False, outfile=False):
|
||||
show_progress = True
|
||||
if infile and not self.infile:
|
||||
show_progress = False
|
||||
elif outfile and self.infile:
|
||||
show_progress = False
|
||||
|
||||
if not show_progress:
|
||||
yield from iterable
|
||||
return
|
||||
|
||||
if tqdm is None:
|
||||
iterlist = list(iterable)
|
||||
proc = -1
|
||||
for n, el in enumerate(iterlist):
|
||||
nxt_proc = int(n / len(iterlist) * 100)
|
||||
if nxt_proc > proc:
|
||||
print("\r{}: {:02d}% ({}/{})".format(description, nxt_proc, n, len(iterlist)), end="")
|
||||
proc = nxt_proc
|
||||
yield el
|
||||
print("")
|
||||
else:
|
||||
yield from tqdm(iterable, desc=description)
|
||||
|
||||
|
||||
def init(self, args):
|
||||
self.infile = not args.hide_inner_progress
|
||||
|
||||
|
||||
progress = Progress()
|
||||
|
13
src/wani.py
13
src/wani.py
|
@ -9,11 +9,7 @@ import subprocess
|
|||
import concurrent.futures
|
||||
import tempfile
|
||||
|
||||
try:
|
||||
from tqdm import tqdm
|
||||
except ImportError:
|
||||
tqdm = lambda x: x
|
||||
|
||||
from progress_bar import progress
|
||||
from word import Word
|
||||
from syntactic_structure import build_structures
|
||||
from match_store import MatchStore
|
||||
|
@ -22,11 +18,10 @@ from writer import Writer
|
|||
from loader import load_files
|
||||
|
||||
|
||||
|
||||
def match_file(words, structures):
|
||||
matches = {s: [] for s in structures}
|
||||
|
||||
for s in structures:
|
||||
for s in progress(structures, "matching", infile=True):
|
||||
for w in words:
|
||||
mhere = s.match(w)
|
||||
for match in mhere:
|
||||
|
@ -77,7 +72,7 @@ def main(args):
|
|||
word_stats.add_words(words)
|
||||
|
||||
else:
|
||||
for words in tqdm(load_files(args)):
|
||||
for words in progress(load_files(args), "files", outfile=True):
|
||||
matches = match_file(words, structures)
|
||||
# just save to temporary file, used for children of a parallel process
|
||||
# MUST NOT have more than one file
|
||||
|
@ -151,9 +146,11 @@ if __name__ == '__main__':
|
|||
|
||||
parser.add_argument('--match-to-file', help='Do not use!')
|
||||
parser.add_argument('--pickled-structures', help='Do not use!', action='store_true')
|
||||
parser.add_argument('--hide-inner-progress', help='Do not use!', action='store_true')
|
||||
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
||||
progress.init(args)
|
||||
|
||||
start = time.time()
|
||||
main(args)
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
from collections import defaultdict, Counter
|
||||
|
||||
from progress_bar import progress
|
||||
|
||||
|
||||
class WordStats:
|
||||
def __init__(self, lemma_features):
|
||||
self.raw_data = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
|
||||
|
@ -22,7 +25,7 @@ class WordStats:
|
|||
|
||||
def generate_renders(self):
|
||||
num_words = defaultdict(int)
|
||||
for lemma, ld in self.raw_data.items():
|
||||
for lemma, ld in progress(self.raw_data.items(), "lemma-render"):
|
||||
self.rendered_words[lemma] = {}
|
||||
freq_words = defaultdict(int)
|
||||
common_msd = "*" * 10
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import logging
|
||||
from progress_bar import progress
|
||||
|
||||
from formatter import OutFormatter, OutNoStatFormatter, AllFormatter, StatsFormatter
|
||||
|
||||
|
@ -122,7 +123,7 @@ class Writer:
|
|||
fp = fp_open()
|
||||
self.write_header(fp)
|
||||
|
||||
for s in structures:
|
||||
for s in progress(structures, "writing:{}".format(self.formatter)):
|
||||
if self.multiple_output:
|
||||
fp = fp_open(s.id)
|
||||
self.write_header(fp)
|
||||
|
|
Loading…
Reference in New Issue
Block a user