Removing parallel stuff
This commit is contained in:
		
							parent
							
								
									d771137dc7
								
							
						
					
					
						commit
						ea92b44d71
					
				
							
								
								
									
										48
									
								
								src/wani.py
									
									
									
									
									
								
							
							
						
						
									
										48
									
								
								src/wani.py
									
									
									
									
									
								
							| @ -5,6 +5,7 @@ import logging | |||||||
| import argparse | import argparse | ||||||
| import pickle | import pickle | ||||||
| import time | import time | ||||||
|  | import gc | ||||||
| import subprocess | import subprocess | ||||||
| import concurrent.futures | import concurrent.futures | ||||||
| import tempfile | import tempfile | ||||||
| @ -42,50 +43,16 @@ def main(args): | |||||||
|     match_store = MatchStore(args, database) |     match_store = MatchStore(args, database) | ||||||
|     word_stats = WordStats(lemma_msds, database) |     word_stats = WordStats(lemma_msds, database) | ||||||
| 
 | 
 | ||||||
|     if args.parallel: |  | ||||||
|         num_parallel = int(args.parallel) |  | ||||||
| 
 |  | ||||||
|         # make temporary directory to hold temporary files |  | ||||||
|         with tempfile.TemporaryDirectory() as tmpdirname: |  | ||||||
|             cmd = sys.argv |  | ||||||
|             for inpt in args.input: |  | ||||||
|                 if inpt in cmd: |  | ||||||
|                     cmd.remove(inpt) |  | ||||||
| 
 |  | ||||||
|             # remove "--parallel X" |  | ||||||
|             pidx = cmd.index('--parallel') |  | ||||||
|             del cmd[pidx] |  | ||||||
|             del cmd[pidx] |  | ||||||
| 
 |  | ||||||
|             def func(n): |  | ||||||
|                 cmdn = [sys.executable] + cmd + [args.input[n], |  | ||||||
|                                                  "--match-to-file", "{}/{}.p".format(tmpdirname, n)] |  | ||||||
|                 subprocess.check_call(cmdn) |  | ||||||
|                 return n |  | ||||||
| 
 |  | ||||||
|             # use ThreadPoolExecuter to run subprocesses in parallel using py threads |  | ||||||
|             with concurrent.futures.ThreadPoolExecutor(max_workers=num_parallel) as executor: |  | ||||||
|                 # fancy interface to wait for threads to finish |  | ||||||
|                 for id_input in executor.map(func, [i for i, _ in enumerate(args.input)]): |  | ||||||
|                     with open("{}/{}.p".format(tmpdirname, id_input), "rb") as fp: |  | ||||||
|                         words, matches = pickle.load(fp) |  | ||||||
| 
 |  | ||||||
|                     match_store.add_matches(matches) |  | ||||||
|                     word_stats.add_words(words) |  | ||||||
| 
 |  | ||||||
|     else: |  | ||||||
|     for words in load_files(args): |     for words in load_files(args): | ||||||
|         matches = match_file(words, structures) |         matches = match_file(words, structures) | ||||||
|             # just save to temporary file, used for children of a parallel process |  | ||||||
|             # MUST NOT have more than one file |  | ||||||
|             if args.match_to_file is not None: |  | ||||||
|                 with open(args.match_to_file, "wb") as fp: |  | ||||||
|                     pickle.dump((words, matches), fp) |  | ||||||
|                     return |  | ||||||
|             else: |  | ||||||
|         match_store.add_matches(matches) |         match_store.add_matches(matches) | ||||||
|         word_stats.add_words(words) |         word_stats.add_words(words) | ||||||
| 
 | 
 | ||||||
|  |         # force a bit of garbage collection | ||||||
|  |         del words | ||||||
|  |         del matches | ||||||
|  |         gc.collect() | ||||||
|  | 
 | ||||||
|     # get word renders for lemma/msd |     # get word renders for lemma/msd | ||||||
|     word_stats.generate_renders() |     word_stats.generate_renders() | ||||||
|     match_store.determine_colocation_dispersions() |     match_store.determine_colocation_dispersions() | ||||||
| @ -148,10 +115,7 @@ if __name__ == '__main__': | |||||||
| 
 | 
 | ||||||
|     parser.add_argument('--pc-tag', |     parser.add_argument('--pc-tag', | ||||||
|                         help='Tag for separators, usually pc or c', default="pc") |                         help='Tag for separators, usually pc or c', default="pc") | ||||||
|     parser.add_argument('--parallel', |  | ||||||
|                         help='Run in multiple processes, should speed things up') |  | ||||||
| 
 | 
 | ||||||
|     parser.add_argument('--match-to-file', help='Do not use!') |  | ||||||
| 
 | 
 | ||||||
|     args = parser.parse_args() |     args = parser.parse_args() | ||||||
|     logging.basicConfig(stream=sys.stderr, level=args.verbose.upper()) |     logging.basicConfig(stream=sys.stderr, level=args.verbose.upper()) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user