Adding changable pc tag (when it is c and not pc)
This commit is contained in:
parent
c1e85255c7
commit
2d373ab477
8
wani.py
8
wani.py
|
@ -746,10 +746,10 @@ def load_files(args):
|
|||
do_msd_translate = not args.no_msd_translate
|
||||
|
||||
for fname in filenames:
|
||||
yield load_tei_file(fname, skip_id_check, do_msd_translate)
|
||||
yield load_tei_file(fname, skip_id_check, do_msd_translate, args.pc_tag)
|
||||
|
||||
|
||||
def load_tei_file(filename, skip_id_check, do_msd_translate):
|
||||
def load_tei_file(filename, skip_id_check, do_msd_translate, pc_tag):
|
||||
logging.info("LOADING FILE: {}".format(filename))
|
||||
|
||||
with open(filename, 'r') as fp:
|
||||
|
@ -760,7 +760,7 @@ def load_tei_file(filename, skip_id_check, do_msd_translate):
|
|||
words = {}
|
||||
for w in et.iter("w"):
|
||||
words[w.get('id')] = Word(w, do_msd_translate)
|
||||
for pc in et.iter("pc"):
|
||||
for pc in et.iter(pc_tag):
|
||||
words[pc.get('id')] = Word.pcWord(pc, do_msd_translate)
|
||||
|
||||
for l in et.iter("link"):
|
||||
|
@ -973,6 +973,8 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
|
||||
parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true')
|
||||
|
||||
parser.add_argument('--pc-tag', help='Tag for separators, usually pc or c', default="pc")
|
||||
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user