|
|
|
@ -4,6 +4,7 @@ import configparser
|
|
|
|
|
import json
|
|
|
|
|
import sys
|
|
|
|
|
import logging
|
|
|
|
|
from multiprocessing import Pool
|
|
|
|
|
|
|
|
|
|
# parse config
|
|
|
|
|
config = configparser.ConfigParser()
|
|
|
|
@ -12,10 +13,13 @@ ORIGPATH = Path(config["tools"]["kres_orig"])
|
|
|
|
|
INPATH = Path(config["tools"]["kres_srl"])
|
|
|
|
|
OUTPATH = Path(config["tools"]["kres_json"])
|
|
|
|
|
DEBUG = config["tools"]["debug"] == "True"
|
|
|
|
|
LOGFILE = Path(config["tools"]["logfile"]).absolute().resolve()
|
|
|
|
|
CPU_CORES = int(config["tools"]["cpu_cores"])
|
|
|
|
|
|
|
|
|
|
logging.basicConfig(filename=str(LOGFILE), level=logging.INFO)
|
|
|
|
|
LOGFILE = Path(config["tools"]["logfile"]).absolute()
|
|
|
|
|
LOGFILE.touch(exist_ok=True)
|
|
|
|
|
LOGFILE.resolve()
|
|
|
|
|
|
|
|
|
|
logging.basicConfig(filename=str(LOGFILE), level=logging.INFO)
|
|
|
|
|
|
|
|
|
|
def get_origfile(filename):
|
|
|
|
|
for origfile in ORIGPATH.iterdir():
|
|
|
|
@ -55,12 +59,10 @@ def get_dep_rel(token):
|
|
|
|
|
}
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
par = Parser()
|
|
|
|
|
OUTPATH.mkdir(exist_ok=True)
|
|
|
|
|
|
|
|
|
|
logging.info("Start generating .josn files.")
|
|
|
|
|
for infile in [x for x in INPATH.iterdir() if x.is_file()]:
|
|
|
|
|
def handle_file(infile_tpl):
|
|
|
|
|
i = infile_tpl[0]
|
|
|
|
|
infile = infile_tpl[1]
|
|
|
|
|
outfile = (OUTPATH / infile.name).with_suffix(".json")
|
|
|
|
|
origfile = get_origfile(infile)
|
|
|
|
|
orig_dict = par.parse_tei(origfile)
|
|
|
|
|
|
|
|
|
@ -94,8 +96,19 @@ for infile in [x for x in INPATH.iterdir() if x.is_file()]:
|
|
|
|
|
print()
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
outfile = (OUTPATH / infile.name).with_suffix(".json")
|
|
|
|
|
with outfile.open("w") as fp:
|
|
|
|
|
json.dump(outdata, fp)
|
|
|
|
|
logging.info("SRL relations written to: ", outfile)
|
|
|
|
|
logging.info("SRL relations written to: {}".format(outfile))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# main
|
|
|
|
|
par = Parser()
|
|
|
|
|
OUTPATH.mkdir(exist_ok=True)
|
|
|
|
|
|
|
|
|
|
infiles = list(enumerate([x for x in INPATH.iterdir() if x.is_file()]))
|
|
|
|
|
logging.info("Generating JSON SRL files from {} tsv files.".format(len(infiles)))
|
|
|
|
|
|
|
|
|
|
with Pool(CPU_CORES) as p:
|
|
|
|
|
p.map(handle_file, infiles)
|
|
|
|
|
|
|
|
|
|
logging.info("Finished generating .json files.")
|
|
|
|
|