Removing parallel stuff

This commit is contained in:
Ozbolt Menegatti 2019-07-03 13:06:59 +02:00
parent d771137dc7
commit ea92b44d71

View File

@ -5,6 +5,7 @@ import logging
import argparse import argparse
import pickle import pickle
import time import time
import gc
import subprocess import subprocess
import concurrent.futures import concurrent.futures
import tempfile import tempfile
@ -42,49 +43,15 @@ def main(args):
match_store = MatchStore(args, database) match_store = MatchStore(args, database)
word_stats = WordStats(lemma_msds, database) word_stats = WordStats(lemma_msds, database)
if args.parallel: for words in load_files(args):
num_parallel = int(args.parallel) matches = match_file(words, structures)
match_store.add_matches(matches)
word_stats.add_words(words)
# make temporary directory to hold temporary files # force a bit of garbage collection
with tempfile.TemporaryDirectory() as tmpdirname: del words
cmd = sys.argv del matches
for inpt in args.input: gc.collect()
if inpt in cmd:
cmd.remove(inpt)
# remove "--parallel X"
pidx = cmd.index('--parallel')
del cmd[pidx]
del cmd[pidx]
def func(n):
cmdn = [sys.executable] + cmd + [args.input[n],
"--match-to-file", "{}/{}.p".format(tmpdirname, n)]
subprocess.check_call(cmdn)
return n
# use ThreadPoolExecuter to run subprocesses in parallel using py threads
with concurrent.futures.ThreadPoolExecutor(max_workers=num_parallel) as executor:
# fancy interface to wait for threads to finish
for id_input in executor.map(func, [i for i, _ in enumerate(args.input)]):
with open("{}/{}.p".format(tmpdirname, id_input), "rb") as fp:
words, matches = pickle.load(fp)
match_store.add_matches(matches)
word_stats.add_words(words)
else:
for words in load_files(args):
matches = match_file(words, structures)
# just save to temporary file, used for children of a parallel process
# MUST NOT have more than one file
if args.match_to_file is not None:
with open(args.match_to_file, "wb") as fp:
pickle.dump((words, matches), fp)
return
else:
match_store.add_matches(matches)
word_stats.add_words(words)
# get word renders for lemma/msd # get word renders for lemma/msd
word_stats.generate_renders() word_stats.generate_renders()
@ -148,10 +115,7 @@ if __name__ == '__main__':
parser.add_argument('--pc-tag', parser.add_argument('--pc-tag',
help='Tag for separators, usually pc or c', default="pc") help='Tag for separators, usually pc or c', default="pc")
parser.add_argument('--parallel',
help='Run in multiple processes, should speed things up')
parser.add_argument('--match-to-file', help='Do not use!')
args = parser.parse_args() args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper()) logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())