From 25f39181700b0873e07ebf907c87c2271bfda298 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Sat, 9 Feb 2019 13:40:57 +0100 Subject: [PATCH] Loading/Saving to temporary file --- wani.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/wani.py b/wani.py index bb87325..17db942 100644 --- a/wani.py +++ b/wani.py @@ -5,6 +5,7 @@ from collections import defaultdict import sys import logging import argparse +import pickle import time from msd_translate import MSD_TRANSLATE @@ -925,21 +926,23 @@ class ColocationIds: def main(input_file, structures_file, args): - t = time.time() writer = Writer(args) - structures = build_structures(structures_file) for s in structures: logging.debug(str(s)) - words = load_corpus(args) + if args.temporary_load: + logging.info("Loading temporary file: {}".format(args.temporary_load)) + with open(args.temporary_load, "rb") as fp: + words = pickle.load(fp) + else: + words = load_corpus(args) - # useful for faster debugging... - # import pickle - # with open("words.p", "wb") as fp: - # pickle.dump(words, fp) - # with open("words.p", "rb") as fp: - # words = pickle.load(fp) + if args.temporary_save is not None: + logging.info("Saving to temporary file: {}".format(args.temporary_save)) + with open(args.temporary_save, "wb") as fp: + pickle.dump(words, fp) + return logging.info("MATCHES...") matches = {s.id: [] for s in structures} @@ -960,7 +963,6 @@ def main(input_file, structures_file, args): writer.write_out(matches, structures, colocation_ids) - logging.info("TIME: {}".format(time.time() - t)) logging.debug([(k, len(v)) for k, v in matches.items()]) logging.debug(sum(len(v) for _, v in matches.items())) @@ -977,7 +979,12 @@ if __name__ == '__main__': parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info") parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true') + parser.add_argument('--temporary-save', help='Save corpus given as input to a temporary file for faster loading') + parser.add_argument('--temporary-load', help='Load corpus from a temporary file') + args = parser.parse_args() logging.basicConfig(stream=sys.stderr, level=args.verbose.upper()) + start = time.time() main(args.input, args.structures, args) + logging.info("TIME: {}".format(time.time() - start))