|
|
|
@ -21,7 +21,6 @@ for word in root.iterfind('.//w'):
|
|
|
|
|
good_s.add(".".join(word.attrib['id'].split('.')[:-1]))
|
|
|
|
|
|
|
|
|
|
for div in root.iterfind('.//div'):
|
|
|
|
|
print("div ", div.attrib['id'])
|
|
|
|
|
good_d = False
|
|
|
|
|
|
|
|
|
|
for par in div.iterfind('./p'):
|
|
|
|
@ -29,9 +28,7 @@ for div in root.iterfind('.//div'):
|
|
|
|
|
for s in par.iterfind('./s'):
|
|
|
|
|
if s.attrib['id'] not in good_s:
|
|
|
|
|
par.remove(s)
|
|
|
|
|
print("remove", s.attrib)
|
|
|
|
|
else:
|
|
|
|
|
print("keep", s.attrib)
|
|
|
|
|
good_p = True
|
|
|
|
|
|
|
|
|
|
if not good_p:
|
|
|
|
|