forked from kristjan/cjvt-srl-tagging
added logger
This commit is contained in:
parent
12f3994115
commit
fd0f9794f1
|
@ -3,12 +3,7 @@ from parser.parser import Parser
|
|||
import configparser
|
||||
import json
|
||||
import sys
|
||||
|
||||
# defaults
|
||||
ORIGPATH = Path("../data/kres_example") # we need the IDs
|
||||
INPATH = Path("../data/kres_example_srl")
|
||||
OUTPATH = Path("../data/kres_example_json")
|
||||
DEBUG = False
|
||||
import logging
|
||||
|
||||
# parse config
|
||||
config = configparser.ConfigParser()
|
||||
|
@ -16,8 +11,11 @@ config.read("tools.cfg")
|
|||
ORIGPATH = Path(config["tools"]["kres_orig"])
|
||||
INPATH = Path(config["tools"]["kres_srl"])
|
||||
OUTPATH = Path(config["tools"]["kres_json"])
|
||||
LOGFILE = Path(config["tools"]["logfile"])
|
||||
DEBUG = config["tools"]["debug"] == "True"
|
||||
|
||||
logging.basicConfig(filename=LOGFILE, level=logging.INFO)
|
||||
|
||||
def get_origfile(filename):
|
||||
for origfile in ORIGPATH.iterdir():
|
||||
if filename.name.split('.')[0] == origfile.name.split('.')[0]:
|
||||
|
@ -46,8 +44,7 @@ def match_sentence_id(sentence, orig_dict):
|
|||
raise KeyError
|
||||
|
||||
def get_dep_rel(token):
|
||||
if DEBUG:
|
||||
print(token)
|
||||
logging.debug(token)
|
||||
for i, field in enumerate(token[14:]):
|
||||
if field != "_":
|
||||
return {
|
||||
|
@ -61,7 +58,7 @@ def get_dep_rel(token):
|
|||
par = Parser()
|
||||
OUTPATH.mkdir(exist_ok=True)
|
||||
|
||||
print("Start generating .josn files.")
|
||||
logging.info("Start generating .josn files.")
|
||||
for infile in [x for x in INPATH.iterdir() if x.is_file()]:
|
||||
origfile = get_origfile(infile)
|
||||
orig_dict = par.parse_tei(origfile)
|
||||
|
@ -99,5 +96,5 @@ for infile in [x for x in INPATH.iterdir() if x.is_file()]:
|
|||
outfile = (OUTPATH / infile.name).with_suffix(".json")
|
||||
with outfile.open("w") as fp:
|
||||
json.dump(outdata, fp)
|
||||
print("SRL relations written to: ", outfile)
|
||||
print("Finished generating .json files.")
|
||||
logging.info("SRL relations written to: ", outfile)
|
||||
logging.info("Finished generating .json files.")
|
||||
|
|
|
@ -6,10 +6,7 @@ import re
|
|||
import sys
|
||||
import cProfile
|
||||
import configparser
|
||||
|
||||
# some defaults
|
||||
INDIR = Path("../data/kres_example")
|
||||
OUTDIR = Path("../data/kres_example_tsv")
|
||||
import logging
|
||||
|
||||
SSJ500K_2_1 = 27829 # number of sentences
|
||||
par = Parser()
|
||||
|
@ -19,6 +16,9 @@ config = configparser.ConfigParser()
|
|||
config.read("tools.cfg")
|
||||
INDIR = Path(config["tools"]["kres_orig"])
|
||||
OUTDIR = Path(config["tools"]["kres_tsv"])
|
||||
LOGFILE = Path(config["tools"]["logfile"])
|
||||
|
||||
logging.basicConfig(filename=LOGFILE, level=logging.INFO)
|
||||
|
||||
"""
|
||||
print("parsing ssj")
|
||||
|
@ -28,14 +28,14 @@ ssj_dict = par.parse_tei(ssj_file)
|
|||
print("end parsing ssj")
|
||||
"""
|
||||
|
||||
print("parsing kres")
|
||||
logging.info("parsing kres")
|
||||
# kres_file = "../data/kres_example/F0019343.xml.parsed.xml"
|
||||
OUTDIR.mkdir(exist_ok=True)
|
||||
|
||||
infiles = [x for x in INDIR.iterdir() if x.is_file()]
|
||||
for i, kres_file in enumerate(infiles):
|
||||
|
||||
print("Processing file ({}/{}): {}".format(i+1, len(infiles), str(kres_file)))
|
||||
logging.info("Processing file ({}/{}): {}".format(i+1, len(infiles), str(kres_file)))
|
||||
res_dict = par.parse_tei(kres_file)
|
||||
kres_out_str = ""
|
||||
|
||||
|
@ -44,4 +44,4 @@ for i, kres_file in enumerate(infiles):
|
|||
|
||||
with (OUTDIR / kres_file.name).with_suffix(".tsv").open("wb+") as fp:
|
||||
fp.write(kres_out_str.encode("utf-8"))
|
||||
print("end parsing kres")
|
||||
logging.info("end parsing kres")
|
||||
|
|
|
@ -3,4 +3,5 @@ kres_orig = /kres_mount/kres_parsed/tei
|
|||
kres_tsv = ../data/kres_out/1_tsv
|
||||
kres_srl = ../data/kres_out/_srl
|
||||
kres_json = ../data/kres_out/final_json
|
||||
logfile = "../progress.log"
|
||||
debug = False
|
||||
|
|
Loading…
Reference in New Issue
Block a user