New progress bar
This commit is contained in:
parent
3552f14b81
commit
70b05e8637
|
@ -63,6 +63,9 @@ class OutNoStatFormatter(Formatter):
|
||||||
|
|
||||||
def group(self):
|
def group(self):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "out-no-stat"
|
||||||
|
|
||||||
class AllFormatter(Formatter):
|
class AllFormatter(Formatter):
|
||||||
def header_repeat(self):
|
def header_repeat(self):
|
||||||
|
@ -81,6 +84,9 @@ class AllFormatter(Formatter):
|
||||||
def group(self):
|
def group(self):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "all"
|
||||||
|
|
||||||
class StatsFormatter(Formatter):
|
class StatsFormatter(Formatter):
|
||||||
def additional_init(self):
|
def additional_init(self):
|
||||||
self.stats = None
|
self.stats = None
|
||||||
|
@ -160,6 +166,9 @@ class StatsFormatter(Formatter):
|
||||||
|
|
||||||
def group(self):
|
def group(self):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "stat"
|
||||||
|
|
||||||
class OutFormatter(Formatter):
|
class OutFormatter(Formatter):
|
||||||
def additional_init(self):
|
def additional_init(self):
|
||||||
|
@ -187,4 +196,7 @@ class OutFormatter(Formatter):
|
||||||
self.f2.set_structure(structure)
|
self.f2.set_structure(structure)
|
||||||
|
|
||||||
def new_match(self, match):
|
def new_match(self, match):
|
||||||
self.f2.new_match(match)
|
self.f2.new_match(match)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "out"
|
|
@ -3,8 +3,7 @@ import logging
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from tqdm import tqdm
|
from progress_bar import progress
|
||||||
|
|
||||||
from word import Word
|
from word import Word
|
||||||
|
|
||||||
|
|
||||||
|
@ -23,7 +22,6 @@ def load_files(args):
|
||||||
|
|
||||||
|
|
||||||
def load_xml(filename):
|
def load_xml(filename):
|
||||||
logging.info("LOADING XML: {}".format(filename))
|
|
||||||
with open(filename, 'r') as fp:
|
with open(filename, 'r') as fp:
|
||||||
content = fp.read()
|
content = fp.read()
|
||||||
|
|
||||||
|
@ -35,7 +33,7 @@ def load_xml(filename):
|
||||||
def file_sentence_generator(et, skip_id_check, do_msd_translate, pc_tag):
|
def file_sentence_generator(et, skip_id_check, do_msd_translate, pc_tag):
|
||||||
words = {}
|
words = {}
|
||||||
sentences = list(et.iter('s'))
|
sentences = list(et.iter('s'))
|
||||||
for sentence in tqdm(sentences, desc="load-text"):
|
for sentence in progress(sentences, "load-text", infile=True):
|
||||||
for w in sentence.iter("w"):
|
for w in sentence.iter("w"):
|
||||||
words[w.get('id')] = Word(w, do_msd_translate)
|
words[w.get('id')] = Word(w, do_msd_translate)
|
||||||
for pc in sentence.iter(pc_tag):
|
for pc in sentence.iter(pc_tag):
|
||||||
|
|
|
@ -2,11 +2,7 @@ from collections import defaultdict
|
||||||
|
|
||||||
from match import StructureMatch
|
from match import StructureMatch
|
||||||
from representation_assigner import RepresentationAssigner
|
from representation_assigner import RepresentationAssigner
|
||||||
|
from progress_bar import progress
|
||||||
try:
|
|
||||||
from tqdm import tqdm
|
|
||||||
except ImportError:
|
|
||||||
tqdm = lambda x: x
|
|
||||||
|
|
||||||
class MatchStore:
|
class MatchStore:
|
||||||
def __init__(self, args):
|
def __init__(self, args):
|
||||||
|
@ -35,7 +31,7 @@ class MatchStore:
|
||||||
yield sm
|
yield sm
|
||||||
|
|
||||||
def set_representations(self, word_renderer):
|
def set_representations(self, word_renderer):
|
||||||
for _1, sm in tqdm(self.data.items()):
|
for _1, sm in progress(self.data.items(), "representations"):
|
||||||
RepresentationAssigner.set_representations(sm, word_renderer)
|
RepresentationAssigner.set_representations(sm, word_renderer)
|
||||||
|
|
||||||
def determine_colocation_dispersions(self):
|
def determine_colocation_dispersions(self):
|
||||||
|
|
42
src/progress_bar.py
Normal file
42
src/progress_bar.py
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
try:
|
||||||
|
from tqdm import tqdm
|
||||||
|
except ImportError:
|
||||||
|
tqdm = None
|
||||||
|
|
||||||
|
|
||||||
|
class Progress:
|
||||||
|
def __init__(self):
|
||||||
|
self.infile = False
|
||||||
|
|
||||||
|
|
||||||
|
def __call__(self, iterable, description, infile=False, outfile=False):
|
||||||
|
show_progress = True
|
||||||
|
if infile and not self.infile:
|
||||||
|
show_progress = False
|
||||||
|
elif outfile and self.infile:
|
||||||
|
show_progress = False
|
||||||
|
|
||||||
|
if not show_progress:
|
||||||
|
yield from iterable
|
||||||
|
return
|
||||||
|
|
||||||
|
if tqdm is None:
|
||||||
|
iterlist = list(iterable)
|
||||||
|
proc = -1
|
||||||
|
for n, el in enumerate(iterlist):
|
||||||
|
nxt_proc = int(n / len(iterlist) * 100)
|
||||||
|
if nxt_proc > proc:
|
||||||
|
print("\r{}: {:02d}% ({}/{})".format(description, nxt_proc, n, len(iterlist)), end="")
|
||||||
|
proc = nxt_proc
|
||||||
|
yield el
|
||||||
|
print("")
|
||||||
|
else:
|
||||||
|
yield from tqdm(iterable, desc=description)
|
||||||
|
|
||||||
|
|
||||||
|
def init(self, args):
|
||||||
|
self.infile = not args.hide_inner_progress
|
||||||
|
|
||||||
|
|
||||||
|
progress = Progress()
|
||||||
|
|
13
src/wani.py
13
src/wani.py
|
@ -9,11 +9,7 @@ import subprocess
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
try:
|
from progress_bar import progress
|
||||||
from tqdm import tqdm
|
|
||||||
except ImportError:
|
|
||||||
tqdm = lambda x: x
|
|
||||||
|
|
||||||
from word import Word
|
from word import Word
|
||||||
from syntactic_structure import build_structures
|
from syntactic_structure import build_structures
|
||||||
from match_store import MatchStore
|
from match_store import MatchStore
|
||||||
|
@ -22,11 +18,10 @@ from writer import Writer
|
||||||
from loader import load_files
|
from loader import load_files
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def match_file(words, structures):
|
def match_file(words, structures):
|
||||||
matches = {s: [] for s in structures}
|
matches = {s: [] for s in structures}
|
||||||
|
|
||||||
for s in structures:
|
for s in progress(structures, "matching", infile=True):
|
||||||
for w in words:
|
for w in words:
|
||||||
mhere = s.match(w)
|
mhere = s.match(w)
|
||||||
for match in mhere:
|
for match in mhere:
|
||||||
|
@ -77,7 +72,7 @@ def main(args):
|
||||||
word_stats.add_words(words)
|
word_stats.add_words(words)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
for words in tqdm(load_files(args)):
|
for words in progress(load_files(args), "files", outfile=True):
|
||||||
matches = match_file(words, structures)
|
matches = match_file(words, structures)
|
||||||
# just save to temporary file, used for children of a parallel process
|
# just save to temporary file, used for children of a parallel process
|
||||||
# MUST NOT have more than one file
|
# MUST NOT have more than one file
|
||||||
|
@ -151,9 +146,11 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
parser.add_argument('--match-to-file', help='Do not use!')
|
parser.add_argument('--match-to-file', help='Do not use!')
|
||||||
parser.add_argument('--pickled-structures', help='Do not use!', action='store_true')
|
parser.add_argument('--pickled-structures', help='Do not use!', action='store_true')
|
||||||
|
parser.add_argument('--hide-inner-progress', help='Do not use!', action='store_true')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
||||||
|
progress.init(args)
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
main(args)
|
main(args)
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
from collections import defaultdict, Counter
|
from collections import defaultdict, Counter
|
||||||
|
|
||||||
|
from progress_bar import progress
|
||||||
|
|
||||||
|
|
||||||
class WordStats:
|
class WordStats:
|
||||||
def __init__(self, lemma_features):
|
def __init__(self, lemma_features):
|
||||||
self.raw_data = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
|
self.raw_data = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
|
||||||
|
@ -22,7 +25,7 @@ class WordStats:
|
||||||
|
|
||||||
def generate_renders(self):
|
def generate_renders(self):
|
||||||
num_words = defaultdict(int)
|
num_words = defaultdict(int)
|
||||||
for lemma, ld in self.raw_data.items():
|
for lemma, ld in progress(self.raw_data.items(), "lemma-render"):
|
||||||
self.rendered_words[lemma] = {}
|
self.rendered_words[lemma] = {}
|
||||||
freq_words = defaultdict(int)
|
freq_words = defaultdict(int)
|
||||||
common_msd = "*" * 10
|
common_msd = "*" * 10
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import logging
|
import logging
|
||||||
|
from progress_bar import progress
|
||||||
|
|
||||||
from formatter import OutFormatter, OutNoStatFormatter, AllFormatter, StatsFormatter
|
from formatter import OutFormatter, OutNoStatFormatter, AllFormatter, StatsFormatter
|
||||||
|
|
||||||
|
@ -122,7 +123,7 @@ class Writer:
|
||||||
fp = fp_open()
|
fp = fp_open()
|
||||||
self.write_header(fp)
|
self.write_header(fp)
|
||||||
|
|
||||||
for s in structures:
|
for s in progress(structures, "writing:{}".format(self.formatter)):
|
||||||
if self.multiple_output:
|
if self.multiple_output:
|
||||||
fp = fp_open(s.id)
|
fp = fp_open(s.id)
|
||||||
self.write_header(fp)
|
self.write_header(fp)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user