diff --git a/scripts/delexonomise.py b/scripts/delexonomise.py new file mode 100644 index 0000000..72c13e2 --- /dev/null +++ b/scripts/delexonomise.py @@ -0,0 +1,27 @@ +#!/usr/bin/python3 + +import lxml.etree as lxml +import sys +import codecs +import re +import os + +input_file_name = sys.argv[1] +output_file_name = sys.argv[2] +temp_file_name = '/tmp/temp.xml' + +tree = lxml.parse(input_file_name) +root = tree.getroot() +root.tag = 'dictionary' +tree.write(temp_file_name, encoding='UTF-8', pretty_print=True) + +output_file = codecs.open(output_file_name, 'w') +temp_file = codecs.open(temp_file_name, 'r') +for line in temp_file: + line = re.sub(' xmlns:lxnm=".*?"', '', line) + line = re.sub(' lxnm:.+?=".*?"', '', line) + output_file.write(line) +temp_file.close() +output_file.close() + +os.remove(temp_file_name)