49 lines
1.0 KiB
XML
49 lines
1.0 KiB
XML
|
import xml.etree.ElementTree as ElementTree
|
||
|
import sys
|
||
|
|
||
|
FILE_IN = sys.argv[1]
|
||
|
FILE_OUT = sys.argv[2]
|
||
|
|
||
|
with open(FILE_IN, "r") as fp:
|
||
|
xml_tree = ElementTree.XML(fp.read())
|
||
|
|
||
|
SEARCH_FOR = ["u1"]
|
||
|
ID = "ozbo_id" # should be the same in all files!
|
||
|
|
||
|
out_xml = ElementTree.Element('top')
|
||
|
|
||
|
ctr = 0
|
||
|
def add_ctr(el, previous=False):
|
||
|
global ctr
|
||
|
if previous:
|
||
|
el.attrib[ID] = str(ctr - 1)
|
||
|
else:
|
||
|
el.attrib[ID] = str(ctr)
|
||
|
ctr += 1
|
||
|
|
||
|
empty = ElementTree.Element('')
|
||
|
last_w1 = empty
|
||
|
last_w2 = empty
|
||
|
|
||
|
for idx, el in enumerate(xml_tree.findall(".//*")):
|
||
|
if el.tag == "w3":
|
||
|
add_ctr(el)
|
||
|
|
||
|
elif el.tag == "w2":
|
||
|
add_ctr(el, el.text == last_w2.text)
|
||
|
last_w2 = el
|
||
|
|
||
|
elif el.tag == "w1":
|
||
|
add_ctr(el, el.text == last_w1.text)
|
||
|
last_w1 = el
|
||
|
|
||
|
# reset last_w1 lastw2
|
||
|
elif el.tag == "S":
|
||
|
last_w1 = empty
|
||
|
last_w2 = empty
|
||
|
|
||
|
|
||
|
with open(FILE_OUT, "wb") as fp:
|
||
|
fp.write(ElementTree.tostring(xml_tree, encoding='utf8', method='xml'))
|
||
|
|