import xml.etree.ElementTree as ElementTree import sys FILE_IN = sys.argv[1] FILE_OUT = sys.argv[2] with open(FILE_IN, "r") as fp: xml_tree = ElementTree.XML(fp.read()) SEARCH_FOR = ["u1"] ID = "ozbo_id" # should be the same in all files! out_xml = ElementTree.Element('top') ctr = 0 def add_ctr(el, previous=False): global ctr if previous: el.attrib[ID] = str(ctr - 1) else: el.attrib[ID] = str(ctr) ctr += 1 empty = ElementTree.Element('') last_w1 = empty last_w2 = empty for idx, el in enumerate(xml_tree.findall(".//*")): if el.tag == "w3": add_ctr(el) elif el.tag == "w2": add_ctr(el, el.text == last_w2.text) last_w2 = el elif el.tag == "w1": add_ctr(el, el.text == last_w1.text) last_w1 = el # reset last_w1 lastw2 elif el.tag == "S": last_w1 = empty last_w2 = empty with open(FILE_OUT, "wb") as fp: fp.write(ElementTree.tostring(xml_tree, encoding='utf8', method='xml'))