From 60ac569f40247c2fcb4409d5af9df1cbbbc07aff Mon Sep 17 00:00:00 2001 From: voje Date: Thu, 28 Feb 2019 08:20:21 +0100 Subject: [PATCH] ready to go --- .gitignore | 6 +++--- Makefile | 3 ++- README.md | 9 ++++++++- tools/gen_json.py | 3 +++ tools/parse_all.py | 5 +++-- tools/tools.cfg | 6 +++--- 6 files changed, 22 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index b0df98f..3c5a087 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ *.pyc *.pickle +*.log -data/*/*.xml -data/*/*.tsv -data/*/*.json +data/kres_out/ +data/kres_example/ diff --git a/Makefile b/Makefile index 3e77ab7..e890601 100644 --- a/Makefile +++ b/Makefile @@ -19,4 +19,5 @@ env: cd dockerfiles; cd python-java; $(MAKE) clean: - rm tools/fillpred_model/model.pickle \ No newline at end of file + rm tools/fillpred_model/model.pickle + rm data/kres_out/* -rf \ No newline at end of file diff --git a/README.md b/README.md index 50a01e8..b4ce5da 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,9 @@ Check out `./tools/srl-20131216/README.md`. ## Scripts Check all possible xml tags (that occur after the tag. -'cat F0006347.xml.parsed.xml | grep -A 999999999999 -e '' | grep -o -e '<[^" "]*' | sort | uniq' +``` bash +cat F0006347.xml.parsed.xml | grep -A 999999999999 -e '' | grep -o -e '<[^" "]*' | sort | uniq +``` ## Tools * Parser for reading both `SSJ500k 2.1 TEI xml` and `Kres F....xml.parsed.xml"` files found in `./tools/parser/parser.py`. @@ -26,6 +28,11 @@ $ cd ./cjvt-srl-tagging $ make ``` +If you want to run it on a server overnight, you might want to use `nohup`, so you can close the ssh connection without closing the process. +``` +$ nohup make > tagging.log & +``` + # Makefile The Makefile follows certain steps: 1. Create a fillpred model. diff --git a/tools/gen_json.py b/tools/gen_json.py index e966087..4c489a3 100644 --- a/tools/gen_json.py +++ b/tools/gen_json.py @@ -61,6 +61,7 @@ def get_dep_rel(token): par = Parser() OUTPATH.mkdir(exist_ok=True) +print("Start generating .josn files.") for infile in [x for x in INPATH.iterdir() if x.is_file()]: origfile = get_origfile(infile) orig_dict = par.parse_tei(origfile) @@ -98,3 +99,5 @@ for infile in [x for x in INPATH.iterdir() if x.is_file()]: outfile = (OUTPATH / infile.name).with_suffix(".json") with outfile.open("w") as fp: json.dump(outdata, fp) + print("SRL relations written to: ", outfile) +print("Finished generating .json files.") \ No newline at end of file diff --git a/tools/parse_all.py b/tools/parse_all.py index 4039701..6b76782 100644 --- a/tools/parse_all.py +++ b/tools/parse_all.py @@ -32,9 +32,10 @@ print("parsing kres") # kres_file = "../data/kres_example/F0019343.xml.parsed.xml" OUTDIR.mkdir(exist_ok=True) -for kres_file in [x for x in INDIR.iterdir() if x.is_file()]: +infiles = [x for x in INDIR.iterdir() if x.is_file()] +for i, kres_file in enumerate(infiles): - print("Processing file: " + str(kres_file)) + print("Processing file ({}/{}): {}".format(i+1, len(infiles), str(kres_file))) res_dict = par.parse_tei(kres_file) kres_out_str = "" diff --git a/tools/tools.cfg b/tools/tools.cfg index 2cc3fcd..201a880 100644 --- a/tools/tools.cfg +++ b/tools/tools.cfg @@ -1,6 +1,6 @@ [tools] kres_orig = ../data/kres_example -kres_tsv = ../data/kres_example_tsv -kres_srl = ../data/kres_example_srl -kres_json = ../data/kres_example_json +kres_tsv = ../data/kres_out/1_tsv +kres_srl = ../data/kres_out/_srl +kres_json = ../data/kres_out/final_json debug = False \ No newline at end of file