xml_schemas/scripts/get_sample.py

21 lines
469 B
Python

#!/usr/bin/python3
import lxml.etree as lxml
import sys
import random
input_file_name = sys.argv[1]
sample_size = int(sys.argv[2])
output_file_name = sys.argv[3]
tree = lxml.parse(input_file_name)
root = tree.getroot()
indexes = list(range(len(root.xpath('entry'))))
random.shuffle(indexes)
for (index, entry) in enumerate(root.xpath('entry')):
if (index not in indexes[:sample_size]):
root.remove(entry)
tree.write(output_file_name, encoding='UTF-8')