Loading/Saving to temporary file

This commit is contained in:
Ozbolt Menegatti 2019-02-09 13:40:57 +01:00
parent 518fe5e113
commit 25f3918170

25
wani.py
View File

@ -5,6 +5,7 @@ from collections import defaultdict
import sys import sys
import logging import logging
import argparse import argparse
import pickle
import time import time
from msd_translate import MSD_TRANSLATE from msd_translate import MSD_TRANSLATE
@ -925,21 +926,23 @@ class ColocationIds:
def main(input_file, structures_file, args): def main(input_file, structures_file, args):
t = time.time()
writer = Writer(args) writer = Writer(args)
structures = build_structures(structures_file) structures = build_structures(structures_file)
for s in structures: for s in structures:
logging.debug(str(s)) logging.debug(str(s))
if args.temporary_load:
logging.info("Loading temporary file: {}".format(args.temporary_load))
with open(args.temporary_load, "rb") as fp:
words = pickle.load(fp)
else:
words = load_corpus(args) words = load_corpus(args)
# useful for faster debugging... if args.temporary_save is not None:
# import pickle logging.info("Saving to temporary file: {}".format(args.temporary_save))
# with open("words.p", "wb") as fp: with open(args.temporary_save, "wb") as fp:
# pickle.dump(words, fp) pickle.dump(words, fp)
# with open("words.p", "rb") as fp: return
# words = pickle.load(fp)
logging.info("MATCHES...") logging.info("MATCHES...")
matches = {s.id: [] for s in structures} matches = {s.id: [] for s in structures}
@ -960,7 +963,6 @@ def main(input_file, structures_file, args):
writer.write_out(matches, structures, colocation_ids) writer.write_out(matches, structures, colocation_ids)
logging.info("TIME: {}".format(time.time() - t))
logging.debug([(k, len(v)) for k, v in matches.items()]) logging.debug([(k, len(v)) for k, v in matches.items()])
logging.debug(sum(len(v) for _, v in matches.items())) logging.debug(sum(len(v) for _, v in matches.items()))
@ -977,7 +979,12 @@ if __name__ == '__main__':
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info") parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true') parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true')
parser.add_argument('--temporary-save', help='Save corpus given as input to a temporary file for faster loading')
parser.add_argument('--temporary-load', help='Load corpus from a temporary file')
args = parser.parse_args() args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper()) logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
start = time.time()
main(args.input, args.structures, args) main(args.input, args.structures, args)
logging.info("TIME: {}".format(time.time() - start))