# parse config import configparser import json import logging import os from pathlib import Path from tools.parser.parser import Parser config = configparser.ConfigParser() config.read("tools.cfg.ssj500k2.3") ORIGPATH = Path(config["tools"]["ssj500k_orig_folder"]) JSONPATH = Path(config["tools"]["ssj500k_json"] + '/ssj500k-sl.body.json') OUTPATH = Path(config["tools"]["ssj500k_tei"]) INTERNAL_DATA = Path(config["tools"]["internal_data"]) DEBUG = config["tools"]["debug"] == "True" CPU_CORES = int(config["tools"]["cpu_cores"]) LOGFILE = Path(config["tools"]["logfile"]).absolute() LOGFILE.touch(exist_ok=True) LOGFILE.resolve() logging.basicConfig(filename=str(LOGFILE), level=logging.INFO) par = Parser() OUTPATH.mkdir(exist_ok=True) jsondata = [] with open(JSONPATH, 'r') as jf: jsondata = json.load(jf) logging.info("Generating TEI with annotated SRL.") def handle_file(file, jsondata): teifile = (ORIGPATH / file) resfile = (OUTPATH / file) orig_dict = par.parse_tei(teifile) # origfile = get_origfile() orig_dict = par.minimize_tei(teifile, jsondata) origfiles = [] for subdir, dirs, files in os.walk(ORIGPATH): for file in files: handle_file(file, jsondata)