added number of cores to config
This commit is contained in:
parent
a61ec8770a
commit
d45b6d9f47
@ -17,6 +17,7 @@ config = configparser.ConfigParser()
|
|||||||
config.read("tools.cfg")
|
config.read("tools.cfg")
|
||||||
INDIR = Path(config["tools"]["kres_orig"])
|
INDIR = Path(config["tools"]["kres_orig"])
|
||||||
OUTDIR = Path(config["tools"]["kres_tsv"])
|
OUTDIR = Path(config["tools"]["kres_tsv"])
|
||||||
|
CPU_CORES = int(config["tools"]["cpu_cores"])
|
||||||
|
|
||||||
LOGFILE = Path(config["tools"]["logfile"]).absolute()
|
LOGFILE = Path(config["tools"]["logfile"]).absolute()
|
||||||
LOGFILE.touch(exist_ok=True)
|
LOGFILE.touch(exist_ok=True)
|
||||||
@ -32,27 +33,38 @@ ssj_dict = par.parse_tei(ssj_file)
|
|||||||
print("end parsing ssj")
|
print("end parsing ssj")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
logging.info("parsing kres")
|
|
||||||
# kres_file = "../data/kres_example/F0019343.xml.parsed.xml"
|
# kres_file = "../data/kres_example/F0019343.xml.parsed.xml"
|
||||||
OUTDIR.mkdir(exist_ok=True)
|
OUTDIR.mkdir(exist_ok=True)
|
||||||
|
|
||||||
infiles = list(enumerate([x for x in INDIR.iterdir() if x.is_file()]))
|
infiles = list(enumerate([x for x in INDIR.iterdir() if x.is_file()]))
|
||||||
|
logging.info("Parsing kres: {} files.".format(len(infiles)))
|
||||||
|
|
||||||
def handle_file(infile):
|
def handle_file(infile):
|
||||||
i = infile[0]
|
i = infile[0]
|
||||||
kres_file = infile[1]
|
kres_file = infile[1]
|
||||||
|
outfile = (OUTDIR / kres_file.name).with_suffix(".tsv")
|
||||||
|
|
||||||
logging.info("Processing file ({}/{}): {}".format(i+1, len(infiles), str(kres_file)))
|
if outfile.is_file():
|
||||||
|
logging.info("Skipping existing file: {}.".format(str(kres_file)))
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
res_dict = par.parse_tei(kres_file)
|
res_dict = par.parse_tei(kres_file)
|
||||||
kres_out_str = ""
|
kres_out_str = ""
|
||||||
|
|
||||||
for _, sentence in res_dict.items():
|
for _, sentence in res_dict.items():
|
||||||
kres_out_str += par.to_conll_2009_SRL(sentence)
|
kres_out_str += par.to_conll_2009_SRL(sentence)
|
||||||
|
except:
|
||||||
|
logging.info("Failed processing file: {}".format(str(kres_file)))
|
||||||
|
return False
|
||||||
|
|
||||||
with (OUTDIR / kres_file.name).with_suffix(".tsv").open("wb+") as fp:
|
|
||||||
|
with outfile.open("wb+") as fp:
|
||||||
fp.write(kres_out_str.encode("utf-8"))
|
fp.write(kres_out_str.encode("utf-8"))
|
||||||
|
logging.info("Processed file ({}/{}): {}".format(i+1, len(infiles), str(kres_file)))
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
with Pool(3) as p:
|
with Pool(CPU_CORES) as p:
|
||||||
p.map(handle_file, infiles)
|
p.map(handle_file, infiles)
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,4 +4,5 @@ kres_tsv = ../data/kres_out/1_tsv
|
|||||||
kres_srl = ../data/kres_out/2_srl
|
kres_srl = ../data/kres_out/2_srl
|
||||||
kres_json = ../data/kres_out/final_json
|
kres_json = ../data/kres_out/final_json
|
||||||
logfile = ../progress.log
|
logfile = ../progress.log
|
||||||
|
cpu_cores = 5
|
||||||
debug = False
|
debug = False
|
||||||
|
Loading…
Reference in New Issue
Block a user