Redmine #1104: added script for remove lexonomy meta stuff from xml

This commit is contained in:
Cyprian Laskowski 2021-03-16 15:35:41 +01:00
parent 006b553eda
commit 140f18ff88

27
scripts/delexonomise.py Normal file
View File

@ -0,0 +1,27 @@
#!/usr/bin/python3
import lxml.etree as lxml
import sys
import codecs
import re
import os
input_file_name = sys.argv[1]
output_file_name = sys.argv[2]
temp_file_name = '/tmp/temp.xml'
tree = lxml.parse(input_file_name)
root = tree.getroot()
root.tag = 'dictionary'
tree.write(temp_file_name, encoding='UTF-8', pretty_print=True)
output_file = codecs.open(output_file_name, 'w')
temp_file = codecs.open(temp_file_name, 'r')
for line in temp_file:
line = re.sub(' xmlns:lxnm=".*?"', '', line)
line = re.sub(' lxnm:.+?=".*?"', '', line)
output_file.write(line)
temp_file.close()
output_file.close()
os.remove(temp_file_name)