Removing parallel stuff
This commit is contained in:
parent
d771137dc7
commit
ea92b44d71
48
src/wani.py
48
src/wani.py
|
@ -5,6 +5,7 @@ import logging
|
|||
import argparse
|
||||
import pickle
|
||||
import time
|
||||
import gc
|
||||
import subprocess
|
||||
import concurrent.futures
|
||||
import tempfile
|
||||
|
@ -42,50 +43,16 @@ def main(args):
|
|||
match_store = MatchStore(args, database)
|
||||
word_stats = WordStats(lemma_msds, database)
|
||||
|
||||
if args.parallel:
|
||||
num_parallel = int(args.parallel)
|
||||
|
||||
# make temporary directory to hold temporary files
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
cmd = sys.argv
|
||||
for inpt in args.input:
|
||||
if inpt in cmd:
|
||||
cmd.remove(inpt)
|
||||
|
||||
# remove "--parallel X"
|
||||
pidx = cmd.index('--parallel')
|
||||
del cmd[pidx]
|
||||
del cmd[pidx]
|
||||
|
||||
def func(n):
|
||||
cmdn = [sys.executable] + cmd + [args.input[n],
|
||||
"--match-to-file", "{}/{}.p".format(tmpdirname, n)]
|
||||
subprocess.check_call(cmdn)
|
||||
return n
|
||||
|
||||
# use ThreadPoolExecuter to run subprocesses in parallel using py threads
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_parallel) as executor:
|
||||
# fancy interface to wait for threads to finish
|
||||
for id_input in executor.map(func, [i for i, _ in enumerate(args.input)]):
|
||||
with open("{}/{}.p".format(tmpdirname, id_input), "rb") as fp:
|
||||
words, matches = pickle.load(fp)
|
||||
|
||||
match_store.add_matches(matches)
|
||||
word_stats.add_words(words)
|
||||
|
||||
else:
|
||||
for words in load_files(args):
|
||||
matches = match_file(words, structures)
|
||||
# just save to temporary file, used for children of a parallel process
|
||||
# MUST NOT have more than one file
|
||||
if args.match_to_file is not None:
|
||||
with open(args.match_to_file, "wb") as fp:
|
||||
pickle.dump((words, matches), fp)
|
||||
return
|
||||
else:
|
||||
match_store.add_matches(matches)
|
||||
word_stats.add_words(words)
|
||||
|
||||
# force a bit of garbage collection
|
||||
del words
|
||||
del matches
|
||||
gc.collect()
|
||||
|
||||
# get word renders for lemma/msd
|
||||
word_stats.generate_renders()
|
||||
match_store.determine_colocation_dispersions()
|
||||
|
@ -148,10 +115,7 @@ if __name__ == '__main__':
|
|||
|
||||
parser.add_argument('--pc-tag',
|
||||
help='Tag for separators, usually pc or c', default="pc")
|
||||
parser.add_argument('--parallel',
|
||||
help='Run in multiple processes, should speed things up')
|
||||
|
||||
parser.add_argument('--match-to-file', help='Do not use!')
|
||||
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
||||
|
|
Loading…
Reference in New Issue
Block a user