48 lines
1.2 KiB
Python
48 lines
1.2 KiB
Python
# parse config
|
|
import configparser
|
|
import json
|
|
import logging
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from tools.parser.parser import Parser
|
|
|
|
config = configparser.ConfigParser()
|
|
config.read("tools.cfg.ssj500k2.3")
|
|
ORIGPATH = Path(config["tools"]["ssj500k_orig_folder"])
|
|
JSONPATH = Path(config["tools"]["ssj500k_json"] + '/ssj500k-sl.body.json')
|
|
OUTPATH = Path(config["tools"]["ssj500k_tei"])
|
|
INTERNAL_DATA = Path(config["tools"]["internal_data"])
|
|
DEBUG = config["tools"]["debug"] == "True"
|
|
CPU_CORES = int(config["tools"]["cpu_cores"])
|
|
|
|
LOGFILE = Path(config["tools"]["logfile"]).absolute()
|
|
LOGFILE.touch(exist_ok=True)
|
|
LOGFILE.resolve()
|
|
|
|
logging.basicConfig(filename=str(LOGFILE), level=logging.INFO)
|
|
|
|
|
|
par = Parser()
|
|
OUTPATH.mkdir(exist_ok=True)
|
|
|
|
jsondata = []
|
|
with open(JSONPATH, 'r') as jf:
|
|
jsondata = json.load(jf)
|
|
|
|
logging.info("Generating TEI with annotated SRL.")
|
|
|
|
def handle_file(file, jsondata):
|
|
teifile = (ORIGPATH / file)
|
|
resfile = (OUTPATH / file)
|
|
|
|
orig_dict = par.parse_tei(teifile)
|
|
|
|
# origfile = get_origfile()
|
|
orig_dict = par.minimize_tei(teifile, jsondata)
|
|
|
|
origfiles = []
|
|
for subdir, dirs, files in os.walk(ORIGPATH):
|
|
for file in files:
|
|
handle_file(file, jsondata)
|