You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

49 lines
1.0 KiB

import xml.etree.ElementTree as ElementTree
import sys
FILE_IN = sys.argv[1]
FILE_OUT = sys.argv[2]
with open(FILE_IN, "r") as fp:
xml_tree = ElementTree.XML(fp.read())
SEARCH_FOR = ["u1"]
ID = "ozbo_id" # should be the same in all files!
out_xml = ElementTree.Element('top')
ctr = 0
def add_ctr(el, previous=False):
global ctr
if previous:
el.attrib[ID] = str(ctr - 1)
else:
el.attrib[ID] = str(ctr)
ctr += 1
empty = ElementTree.Element('')
last_w1 = empty
last_w2 = empty
for idx, el in enumerate(xml_tree.findall(".//*")):
if el.tag == "w3":
add_ctr(el)
elif el.tag == "w2":
add_ctr(el, el.text == last_w2.text)
last_w2 = el
elif el.tag == "w1":
add_ctr(el, el.text == last_w1.text)
last_w1 = el
# reset last_w1 lastw2
elif el.tag == "S":
last_w1 = empty
last_w2 = empty
with open(FILE_OUT, "wb") as fp:
fp.write(ElementTree.tostring(xml_tree, encoding='utf8', method='xml'))