From 8e4133d45e6ab4c539ffb5e8def34a2193f40ee6 Mon Sep 17 00:00:00 2001 From: Cyprian Laskowski Date: Thu, 19 Nov 2020 17:00:55 +0100 Subject: [PATCH] IssueID #1487: added helper script for fixing xml ids --- scripts/fix_xml_ids.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 scripts/fix_xml_ids.py diff --git a/scripts/fix_xml_ids.py b/scripts/fix_xml_ids.py new file mode 100644 index 0000000..7b0ceea --- /dev/null +++ b/scripts/fix_xml_ids.py @@ -0,0 +1,21 @@ +import argparse +import codecs +import re + +arg_parser = argparse.ArgumentParser(description='Fix invalid XML ids.') +arg_parser.add_argument('-infile', type=str, help='Input file') +arg_parser.add_argument('-outfile', type=str, help='Output file') +arguments = arg_parser.parse_args() +input_file_name = arguments.infile +output_file_name = arguments.outfile + +output_file = codecs.open(output_file_name, 'w') +input_file = codecs.open(input_file_name, 'r') + +for line in input_file: + line = re.sub('xml:id="(?=\d)','xml:id="s', line) + line = line.replace('#', '#s') + output_file.write(line) + +input_file.close() +output_file.close()