count-files for more verbose output added

This commit is contained in:
Ozbolt Menegatti 2019-02-12 12:19:21 +01:00
parent 2d373ab477
commit 31483c79ff

13
wani.py
View File

@ -745,12 +745,16 @@ def load_files(args):
skip_id_check = args.skip_id_check skip_id_check = args.skip_id_check
do_msd_translate = not args.no_msd_translate do_msd_translate = not args.no_msd_translate
for fname in filenames: for n, fname in enumerate(filenames):
yield load_tei_file(fname, skip_id_check, do_msd_translate, args.pc_tag) if args.count_files:
status = " :: {} / {}".format(n, len(filenames))
else:
status = ""
yield load_tei_file(fname, skip_id_check, do_msd_translate, args.pc_tag, status)
def load_tei_file(filename, skip_id_check, do_msd_translate, pc_tag): def load_tei_file(filename, skip_id_check, do_msd_translate, pc_tag, status):
logging.info("LOADING FILE: {}".format(filename)) logging.info("LOADING FILE: {}{}".format(filename, status))
with open(filename, 'r') as fp: with open(filename, 'r') as fp:
xmlstring = re.sub(' xmlns="[^"]+"', '', fp.read(), count=1) xmlstring = re.sub(' xmlns="[^"]+"', '', fp.read(), count=1)
@ -971,6 +975,7 @@ if __name__ == '__main__':
parser.add_argument('--without-rep', help='Will not write representtaions in output', action='store_true') parser.add_argument('--without-rep', help='Will not write representtaions in output', action='store_true')
parser.add_argument('--group', help='Group collocations with same collocation ID', action='store_true') parser.add_argument('--group', help='Group collocations with same collocation ID', action='store_true')
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info") parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
parser.add_argument('--count-files', help="Count files: more verbose output", action='store_true')
parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true') parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true')
parser.add_argument('--pc-tag', help='Tag for separators, usually pc or c', default="pc") parser.add_argument('--pc-tag', help='Tag for separators, usually pc or c', default="pc")