import argparse import codecs import re arg_parser = argparse.ArgumentParser(description='Fix invalid XML ids.') arg_parser.add_argument('-infile', type=str, help='Input file') arg_parser.add_argument('-outfile', type=str, help='Output file') arguments = arg_parser.parse_args() input_file_name = arguments.infile output_file_name = arguments.outfile output_file = codecs.open(output_file_name, 'w') input_file = codecs.open(input_file_name, 'r') for line in input_file: line = re.sub('xml:id="(?=\d)','xml:id="s', line) line = line.replace('#', '#s') output_file.write(line) input_file.close() output_file.close()