Loading/Saving to temporary file
This commit is contained in:
		
							parent
							
								
									518fe5e113
								
							
						
					
					
						commit
						25f3918170
					
				
							
								
								
									
										27
									
								
								wani.py
									
									
									
									
									
								
							
							
						
						
									
										27
									
								
								wani.py
									
									
									
									
									
								
							| @ -5,6 +5,7 @@ from collections import defaultdict | ||||
| import sys | ||||
| import logging | ||||
| import argparse | ||||
| import pickle | ||||
| import time | ||||
| 
 | ||||
| from msd_translate import MSD_TRANSLATE | ||||
| @ -925,21 +926,23 @@ class ColocationIds: | ||||
| 
 | ||||
| 
 | ||||
| def main(input_file, structures_file, args): | ||||
|     t = time.time() | ||||
|     writer = Writer(args) | ||||
| 
 | ||||
|     structures = build_structures(structures_file) | ||||
|     for s in structures: | ||||
|         logging.debug(str(s)) | ||||
| 
 | ||||
|     words = load_corpus(args) | ||||
|     if args.temporary_load: | ||||
|         logging.info("Loading temporary file: {}".format(args.temporary_load)) | ||||
|         with open(args.temporary_load, "rb") as fp: | ||||
|             words = pickle.load(fp) | ||||
|     else:  | ||||
|         words = load_corpus(args) | ||||
| 
 | ||||
|     # useful for faster debugging... | ||||
|     # import pickle | ||||
|     # with open("words.p", "wb") as fp: | ||||
|     #     pickle.dump(words, fp) | ||||
|     # with open("words.p", "rb") as fp: | ||||
|     #     words = pickle.load(fp) | ||||
|     if args.temporary_save is not None: | ||||
|         logging.info("Saving to temporary file: {}".format(args.temporary_save)) | ||||
|         with open(args.temporary_save, "wb") as fp: | ||||
|             pickle.dump(words, fp) | ||||
|             return | ||||
| 
 | ||||
|     logging.info("MATCHES...") | ||||
|     matches = {s.id: [] for s in structures} | ||||
| @ -960,7 +963,6 @@ def main(input_file, structures_file, args): | ||||
| 
 | ||||
|     writer.write_out(matches, structures, colocation_ids) | ||||
| 
 | ||||
|     logging.info("TIME: {}".format(time.time() - t)) | ||||
|     logging.debug([(k, len(v)) for k, v in matches.items()]) | ||||
|     logging.debug(sum(len(v) for _, v in matches.items())) | ||||
| 
 | ||||
| @ -977,7 +979,12 @@ if __name__ == '__main__': | ||||
|     parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info") | ||||
|     parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true') | ||||
| 
 | ||||
|     parser.add_argument('--temporary-save', help='Save corpus given as input to a temporary file for faster loading') | ||||
|     parser.add_argument('--temporary-load', help='Load corpus from a temporary file') | ||||
| 
 | ||||
|     args = parser.parse_args() | ||||
|     logging.basicConfig(stream=sys.stderr, level=args.verbose.upper()) | ||||
| 
 | ||||
|     start = time.time() | ||||
|     main(args.input, args.structures, args) | ||||
|     logging.info("TIME: {}".format(time.time() - start)) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user