Loading/Saving to temporary file
This commit is contained in:
parent
518fe5e113
commit
25f3918170
27
wani.py
27
wani.py
|
@ -5,6 +5,7 @@ from collections import defaultdict
|
||||||
import sys
|
import sys
|
||||||
import logging
|
import logging
|
||||||
import argparse
|
import argparse
|
||||||
|
import pickle
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from msd_translate import MSD_TRANSLATE
|
from msd_translate import MSD_TRANSLATE
|
||||||
|
@ -925,21 +926,23 @@ class ColocationIds:
|
||||||
|
|
||||||
|
|
||||||
def main(input_file, structures_file, args):
|
def main(input_file, structures_file, args):
|
||||||
t = time.time()
|
|
||||||
writer = Writer(args)
|
writer = Writer(args)
|
||||||
|
|
||||||
structures = build_structures(structures_file)
|
structures = build_structures(structures_file)
|
||||||
for s in structures:
|
for s in structures:
|
||||||
logging.debug(str(s))
|
logging.debug(str(s))
|
||||||
|
|
||||||
words = load_corpus(args)
|
if args.temporary_load:
|
||||||
|
logging.info("Loading temporary file: {}".format(args.temporary_load))
|
||||||
|
with open(args.temporary_load, "rb") as fp:
|
||||||
|
words = pickle.load(fp)
|
||||||
|
else:
|
||||||
|
words = load_corpus(args)
|
||||||
|
|
||||||
# useful for faster debugging...
|
if args.temporary_save is not None:
|
||||||
# import pickle
|
logging.info("Saving to temporary file: {}".format(args.temporary_save))
|
||||||
# with open("words.p", "wb") as fp:
|
with open(args.temporary_save, "wb") as fp:
|
||||||
# pickle.dump(words, fp)
|
pickle.dump(words, fp)
|
||||||
# with open("words.p", "rb") as fp:
|
return
|
||||||
# words = pickle.load(fp)
|
|
||||||
|
|
||||||
logging.info("MATCHES...")
|
logging.info("MATCHES...")
|
||||||
matches = {s.id: [] for s in structures}
|
matches = {s.id: [] for s in structures}
|
||||||
|
@ -960,7 +963,6 @@ def main(input_file, structures_file, args):
|
||||||
|
|
||||||
writer.write_out(matches, structures, colocation_ids)
|
writer.write_out(matches, structures, colocation_ids)
|
||||||
|
|
||||||
logging.info("TIME: {}".format(time.time() - t))
|
|
||||||
logging.debug([(k, len(v)) for k, v in matches.items()])
|
logging.debug([(k, len(v)) for k, v in matches.items()])
|
||||||
logging.debug(sum(len(v) for _, v in matches.items()))
|
logging.debug(sum(len(v) for _, v in matches.items()))
|
||||||
|
|
||||||
|
@ -977,7 +979,12 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
|
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
|
||||||
parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true')
|
parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true')
|
||||||
|
|
||||||
|
parser.add_argument('--temporary-save', help='Save corpus given as input to a temporary file for faster loading')
|
||||||
|
parser.add_argument('--temporary-load', help='Load corpus from a temporary file')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
main(args.input, args.structures, args)
|
main(args.input, args.structures, args)
|
||||||
|
logging.info("TIME: {}".format(time.time() - start))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user