|
|
|
@ -745,12 +745,16 @@ def load_files(args):
|
|
|
|
|
skip_id_check = args.skip_id_check
|
|
|
|
|
do_msd_translate = not args.no_msd_translate
|
|
|
|
|
|
|
|
|
|
for fname in filenames:
|
|
|
|
|
yield load_tei_file(fname, skip_id_check, do_msd_translate, args.pc_tag)
|
|
|
|
|
for n, fname in enumerate(filenames):
|
|
|
|
|
if args.count_files:
|
|
|
|
|
status = " :: {} / {}".format(n, len(filenames))
|
|
|
|
|
else:
|
|
|
|
|
status = ""
|
|
|
|
|
yield load_tei_file(fname, skip_id_check, do_msd_translate, args.pc_tag, status)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_tei_file(filename, skip_id_check, do_msd_translate, pc_tag):
|
|
|
|
|
logging.info("LOADING FILE: {}".format(filename))
|
|
|
|
|
def load_tei_file(filename, skip_id_check, do_msd_translate, pc_tag, status):
|
|
|
|
|
logging.info("LOADING FILE: {}{}".format(filename, status))
|
|
|
|
|
|
|
|
|
|
with open(filename, 'r') as fp:
|
|
|
|
|
xmlstring = re.sub(' xmlns="[^"]+"', '', fp.read(), count=1)
|
|
|
|
@ -971,6 +975,7 @@ if __name__ == '__main__':
|
|
|
|
|
parser.add_argument('--without-rep', help='Will not write representtaions in output', action='store_true')
|
|
|
|
|
parser.add_argument('--group', help='Group collocations with same collocation ID', action='store_true')
|
|
|
|
|
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
|
|
|
|
|
parser.add_argument('--count-files', help="Count files: more verbose output", action='store_true')
|
|
|
|
|
parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true')
|
|
|
|
|
|
|
|
|
|
parser.add_argument('--pc-tag', help='Tag for separators, usually pc or c', default="pc")
|
|
|
|
|