From 140f18ff88c4b103f4de028bb59ab8b5603e4058 Mon Sep 17 00:00:00 2001 From: Cyprian Laskowski Date: Tue, 16 Mar 2021 15:35:41 +0100 Subject: [PATCH] Redmine #1104: added script for remove lexonomy meta stuff from xml --- scripts/delexonomise.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 scripts/delexonomise.py diff --git a/scripts/delexonomise.py b/scripts/delexonomise.py new file mode 100644 index 0000000..72c13e2 --- /dev/null +++ b/scripts/delexonomise.py @@ -0,0 +1,27 @@ +#!/usr/bin/python3 + +import lxml.etree as lxml +import sys +import codecs +import re +import os + +input_file_name = sys.argv[1] +output_file_name = sys.argv[2] +temp_file_name = '/tmp/temp.xml' + +tree = lxml.parse(input_file_name) +root = tree.getroot() +root.tag = 'dictionary' +tree.write(temp_file_name, encoding='UTF-8', pretty_print=True) + +output_file = codecs.open(output_file_name, 'w') +temp_file = codecs.open(temp_file_name, 'r') +for line in temp_file: + line = re.sub(' xmlns:lxnm=".*?"', '', line) + line = re.sub(' lxnm:.+?=".*?"', '', line) + output_file.write(line) +temp_file.close() +output_file.close() + +os.remove(temp_file_name)