forked from kristjan/cjvt-srl-tagging
added multiprocessing to parse_all.py
This commit is contained in:
parent
17cb0677a7
commit
3881c74613
|
@ -7,6 +7,7 @@ import sys
|
|||
import cProfile
|
||||
import configparser
|
||||
import logging
|
||||
from multiprocessing import Pool
|
||||
|
||||
SSJ500K_2_1 = 27829 # number of sentences
|
||||
par = Parser()
|
||||
|
@ -16,7 +17,10 @@ config = configparser.ConfigParser()
|
|||
config.read("tools.cfg")
|
||||
INDIR = Path(config["tools"]["kres_orig"])
|
||||
OUTDIR = Path(config["tools"]["kres_tsv"])
|
||||
LOGFILE = Path(config["tools"]["logfile"]).absolute().resolve()
|
||||
|
||||
LOGFILE = Path(config["tools"]["logfile"]).absolute()
|
||||
LOGFILE.touch(exist_ok=True)
|
||||
LOGFILE.resolve()
|
||||
|
||||
logging.basicConfig(filename=str(LOGFILE), level=logging.INFO)
|
||||
|
||||
|
@ -32,8 +36,11 @@ logging.info("parsing kres")
|
|||
# kres_file = "../data/kres_example/F0019343.xml.parsed.xml"
|
||||
OUTDIR.mkdir(exist_ok=True)
|
||||
|
||||
infiles = [x for x in INDIR.iterdir() if x.is_file()]
|
||||
for i, kres_file in enumerate(infiles):
|
||||
infiles = list(enumerate([x for x in INDIR.iterdir() if x.is_file()]))
|
||||
|
||||
def handle_file(infile):
|
||||
i = infile[0]
|
||||
kres_file = infile[1]
|
||||
|
||||
logging.info("Processing file ({}/{}): {}".format(i+1, len(infiles), str(kres_file)))
|
||||
res_dict = par.parse_tei(kres_file)
|
||||
|
@ -44,4 +51,11 @@ for i, kres_file in enumerate(infiles):
|
|||
|
||||
with (OUTDIR / kres_file.name).with_suffix(".tsv").open("wb+") as fp:
|
||||
fp.write(kres_out_str.encode("utf-8"))
|
||||
|
||||
with Pool(3) as p:
|
||||
p.map(handle_file, infiles)
|
||||
|
||||
|
||||
logging.info("end parsing kres")
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user