|
|
|
@ -3,12 +3,7 @@ from parser.parser import Parser
|
|
|
|
|
import configparser
|
|
|
|
|
import json
|
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
# defaults
|
|
|
|
|
ORIGPATH = Path("../data/kres_example") # we need the IDs
|
|
|
|
|
INPATH = Path("../data/kres_example_srl")
|
|
|
|
|
OUTPATH = Path("../data/kres_example_json")
|
|
|
|
|
DEBUG = False
|
|
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
# parse config
|
|
|
|
|
config = configparser.ConfigParser()
|
|
|
|
@ -16,8 +11,11 @@ config.read("tools.cfg")
|
|
|
|
|
ORIGPATH = Path(config["tools"]["kres_orig"])
|
|
|
|
|
INPATH = Path(config["tools"]["kres_srl"])
|
|
|
|
|
OUTPATH = Path(config["tools"]["kres_json"])
|
|
|
|
|
LOGFILE = Path(config["tools"]["logfile"])
|
|
|
|
|
DEBUG = config["tools"]["debug"] == "True"
|
|
|
|
|
|
|
|
|
|
logging.basicConfig(filename=LOGFILE, level=logging.INFO)
|
|
|
|
|
|
|
|
|
|
def get_origfile(filename):
|
|
|
|
|
for origfile in ORIGPATH.iterdir():
|
|
|
|
|
if filename.name.split('.')[0] == origfile.name.split('.')[0]:
|
|
|
|
@ -46,8 +44,7 @@ def match_sentence_id(sentence, orig_dict):
|
|
|
|
|
raise KeyError
|
|
|
|
|
|
|
|
|
|
def get_dep_rel(token):
|
|
|
|
|
if DEBUG:
|
|
|
|
|
print(token)
|
|
|
|
|
logging.debug(token)
|
|
|
|
|
for i, field in enumerate(token[14:]):
|
|
|
|
|
if field != "_":
|
|
|
|
|
return {
|
|
|
|
@ -61,7 +58,7 @@ def get_dep_rel(token):
|
|
|
|
|
par = Parser()
|
|
|
|
|
OUTPATH.mkdir(exist_ok=True)
|
|
|
|
|
|
|
|
|
|
print("Start generating .josn files.")
|
|
|
|
|
logging.info("Start generating .josn files.")
|
|
|
|
|
for infile in [x for x in INPATH.iterdir() if x.is_file()]:
|
|
|
|
|
origfile = get_origfile(infile)
|
|
|
|
|
orig_dict = par.parse_tei(origfile)
|
|
|
|
@ -99,5 +96,5 @@ for infile in [x for x in INPATH.iterdir() if x.is_file()]:
|
|
|
|
|
outfile = (OUTPATH / infile.name).with_suffix(".json")
|
|
|
|
|
with outfile.open("w") as fp:
|
|
|
|
|
json.dump(outdata, fp)
|
|
|
|
|
print("SRL relations written to: ", outfile)
|
|
|
|
|
print("Finished generating .json files.")
|
|
|
|
|
logging.info("SRL relations written to: ", outfile)
|
|
|
|
|
logging.info("Finished generating .json files.")
|
|
|
|
|