21 lines
469 B
Python
21 lines
469 B
Python
#!/usr/bin/python3
|
|
|
|
import lxml.etree as lxml
|
|
import sys
|
|
import random
|
|
|
|
input_file_name = sys.argv[1]
|
|
sample_size = int(sys.argv[2])
|
|
output_file_name = sys.argv[3]
|
|
|
|
tree = lxml.parse(input_file_name)
|
|
root = tree.getroot()
|
|
indexes = list(range(len(root.xpath('entry'))))
|
|
random.shuffle(indexes)
|
|
|
|
for (index, entry) in enumerate(root.xpath('entry')):
|
|
if (index not in indexes[:sample_size]):
|
|
root.remove(entry)
|
|
|
|
tree.write(output_file_name, encoding='UTF-8')
|