You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
cjvt-valency/src/backend_flask/get_sentence_ids.py

19 lines
564 B

import json
import os
input_dir = "/media/luka/Portable Disk/Datasets/gigafida_jos/final_json"
output_file = "../../all_sentences.json"
results = {}
filenames = os.listdir(input_dir)
len(filenames)
for i, filename in enumerate(filenames):
if filename.endswith(".json"):
with open(os.path.join(input_dir, filename)) as json_file:
data = json.load(json_file)
results[filename.split('-')[0]] = list(data.keys())
print('Progress: %.2f %%' % (i/len(filenames)))
with open(output_file, 'w') as f:
json.dump(results, f)