Adding changable pc tag (when it is c and not pc)

This commit is contained in:
Ozbolt Menegatti 2019-02-12 12:08:30 +01:00
parent c1e85255c7
commit 2d373ab477

View File

@ -746,10 +746,10 @@ def load_files(args):
do_msd_translate = not args.no_msd_translate do_msd_translate = not args.no_msd_translate
for fname in filenames: for fname in filenames:
yield load_tei_file(fname, skip_id_check, do_msd_translate) yield load_tei_file(fname, skip_id_check, do_msd_translate, args.pc_tag)
def load_tei_file(filename, skip_id_check, do_msd_translate): def load_tei_file(filename, skip_id_check, do_msd_translate, pc_tag):
logging.info("LOADING FILE: {}".format(filename)) logging.info("LOADING FILE: {}".format(filename))
with open(filename, 'r') as fp: with open(filename, 'r') as fp:
@ -760,7 +760,7 @@ def load_tei_file(filename, skip_id_check, do_msd_translate):
words = {} words = {}
for w in et.iter("w"): for w in et.iter("w"):
words[w.get('id')] = Word(w, do_msd_translate) words[w.get('id')] = Word(w, do_msd_translate)
for pc in et.iter("pc"): for pc in et.iter(pc_tag):
words[pc.get('id')] = Word.pcWord(pc, do_msd_translate) words[pc.get('id')] = Word.pcWord(pc, do_msd_translate)
for l in et.iter("link"): for l in et.iter("link"):
@ -973,6 +973,8 @@ if __name__ == '__main__':
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info") parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true') parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true')
parser.add_argument('--pc-tag', help='Tag for separators, usually pc or c', default="pc")
args = parser.parse_args() args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper()) logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())