added number of cores to config

master
voje 5 years ago
parent a61ec8770a
commit d45b6d9f47

@ -17,6 +17,7 @@ config = configparser.ConfigParser()
config.read("tools.cfg") config.read("tools.cfg")
INDIR = Path(config["tools"]["kres_orig"]) INDIR = Path(config["tools"]["kres_orig"])
OUTDIR = Path(config["tools"]["kres_tsv"]) OUTDIR = Path(config["tools"]["kres_tsv"])
CPU_CORES = int(config["tools"]["cpu_cores"])
LOGFILE = Path(config["tools"]["logfile"]).absolute() LOGFILE = Path(config["tools"]["logfile"]).absolute()
LOGFILE.touch(exist_ok=True) LOGFILE.touch(exist_ok=True)
@ -32,27 +33,38 @@ ssj_dict = par.parse_tei(ssj_file)
print("end parsing ssj") print("end parsing ssj")
""" """
logging.info("parsing kres")
# kres_file = "../data/kres_example/F0019343.xml.parsed.xml" # kres_file = "../data/kres_example/F0019343.xml.parsed.xml"
OUTDIR.mkdir(exist_ok=True) OUTDIR.mkdir(exist_ok=True)
infiles = list(enumerate([x for x in INDIR.iterdir() if x.is_file()])) infiles = list(enumerate([x for x in INDIR.iterdir() if x.is_file()]))
logging.info("Parsing kres: {} files.".format(len(infiles)))
def handle_file(infile): def handle_file(infile):
i = infile[0] i = infile[0]
kres_file = infile[1] kres_file = infile[1]
outfile = (OUTDIR / kres_file.name).with_suffix(".tsv")
logging.info("Processing file ({}/{}): {}".format(i+1, len(infiles), str(kres_file))) if outfile.is_file():
res_dict = par.parse_tei(kres_file) logging.info("Skipping existing file: {}.".format(str(kres_file)))
kres_out_str = "" return True
for _, sentence in res_dict.items(): try:
kres_out_str += par.to_conll_2009_SRL(sentence) res_dict = par.parse_tei(kres_file)
kres_out_str = ""
for _, sentence in res_dict.items():
kres_out_str += par.to_conll_2009_SRL(sentence)
except:
logging.info("Failed processing file: {}".format(str(kres_file)))
return False
with (OUTDIR / kres_file.name).with_suffix(".tsv").open("wb+") as fp:
with outfile.open("wb+") as fp:
fp.write(kres_out_str.encode("utf-8")) fp.write(kres_out_str.encode("utf-8"))
logging.info("Processed file ({}/{}): {}".format(i+1, len(infiles), str(kres_file)))
return True
return False
with Pool(3) as p: with Pool(CPU_CORES) as p:
p.map(handle_file, infiles) p.map(handle_file, infiles)

@ -4,4 +4,5 @@ kres_tsv = ../data/kres_out/1_tsv
kres_srl = ../data/kres_out/2_srl kres_srl = ../data/kres_out/2_srl
kres_json = ../data/kres_out/final_json kres_json = ../data/kres_out/final_json
logfile = ../progress.log logfile = ../progress.log
cpu_cores = 5
debug = False debug = False

Loading…
Cancel
Save