From 577c8418d20be4367277a82bb5954f49a51ad201 Mon Sep 17 00:00:00 2001 From: voje Date: Wed, 27 Feb 2019 16:58:04 +0100 Subject: [PATCH] tmp --- Makefile | 6 +-- data/kres_example_json/F0006347.srl.json | 1 + data/kres_example_json/F0012782.srl.json | 1 + data/kres_example_json/F0019343.srl.json | 1 + data/kres_example_json/F0025741.srl.json | 1 + data/kres_example_json/F0032377.srl.json | 1 + data/kres_example_json/F0039402.srl.json | 1 + tools/gen_json.py | 67 +++++++++++++++++++----- tools/parse_all.py | 4 +- tools/parser/parser.py | 7 +-- 10 files changed, 67 insertions(+), 23 deletions(-) create mode 100644 data/kres_example_json/F0006347.srl.json create mode 100644 data/kres_example_json/F0012782.srl.json create mode 100644 data/kres_example_json/F0019343.srl.json create mode 100644 data/kres_example_json/F0025741.srl.json create mode 100644 data/kres_example_json/F0032377.srl.json create mode 100644 data/kres_example_json/F0039402.srl.json diff --git a/Makefile b/Makefile index ad7e8a0..0092763 100644 --- a/Makefile +++ b/Makefile @@ -2,17 +2,17 @@ all: json_files -json_files: #TODO srl_tagged_files +json_files: srl_tagged_files cd tools; python3 gen_json.py srl_tagged_files: tsv_files # cd tools/srl-20131216; ./scripts/parse_srl_only_mod.sh; cd - cd tools/srl-20131216; ./tag_all.sh ../../data/kres_example_tsv ../../data/kres_example_srl -tsv_files: fillpred_model/model.pickle +tsv_files: tools/fillpred_model/model.pickle cd tools; python3 parse_all.py -fillpred_model/model.pickle: +tools/fillpred_model/model.pickle: cd tools/fillpred_model; $(MAKE) env: diff --git a/data/kres_example_json/F0006347.srl.json b/data/kres_example_json/F0006347.srl.json new file mode 100644 index 0000000..d6f2deb --- /dev/null +++ b/data/kres_example_json/F0006347.srl.json @@ -0,0 +1 @@ +{"F0006347.50.1": [{"dep": "3", "from": "6", "arg": "CAUSE"}, {"dep": "5", "from": "6", "arg": "PAT"}, {"dep": "11", "from": "12", "arg": "ACT"}, {"dep": "16", "from": "12", "arg": "LOC"}], "F0006347.50.0": [], "F0006347.50.2": [{"dep": "5", "from": "14", "arg": "TIME"}, {"dep": "12", "from": "14", "arg": "ACT"}, {"dep": "15", "from": "14", "arg": "MWPRED"}, {"dep": "18", "from": "20", "arg": "ACT"}, {"dep": "19", "from": "20", "arg": "TIME"}, {"dep": "23", "from": "20", "arg": "PAT"}, {"dep": "26", "from": "13", "arg": "REC"}, {"dep": "29", "from": "30", "arg": "MANN"}, {"dep": "31", "from": "30", "arg": "PAT"}], "F0006347.50.3": [{"dep": "14", "from": "19", "arg": "PAT"}, {"dep": "16", "from": "19", "arg": "LOC"}, {"dep": "30", "from": "29", "arg": "PAT"}, {"dep": "32", "from": "29", "arg": "PAT"}, {"dep": "42", "from": "43", "arg": "DUR"}, {"dep": "45", "from": "43", "arg": "PAT"}, {"dep": "48", "from": "43", "arg": "LOC"}, {"dep": "56", "from": "57", "arg": "TIME"}, {"dep": "57", "from": "55", "arg": "MODAL"}, {"dep": "62", "from": "57", "arg": "PAT"}, {"dep": "67", "from": "66", "arg": "TIME"}, {"dep": "73", "from": "66", "arg": "ACT"}]} \ No newline at end of file diff --git a/data/kres_example_json/F0012782.srl.json b/data/kres_example_json/F0012782.srl.json new file mode 100644 index 0000000..619b588 --- /dev/null +++ b/data/kres_example_json/F0012782.srl.json @@ -0,0 +1 @@ +{"F0012782.9.0": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "MWPRED"}, {"dep": "6", "from": "4", "arg": "TIME"}], "F0012782.5.0": [], "F0012782.6.0": [{"dep": "56", "from": "54", "arg": "MEANS"}], "F0012782.10.0": [], "F0012782.11.0": [], "F0012782.8.0": [], "F0012782.7.0": [{"dep": "3", "from": "7", "arg": "COND"}, {"dep": "6", "from": "7", "arg": "ACT"}, {"dep": "8", "from": "7", "arg": "MANN"}]} \ No newline at end of file diff --git a/data/kres_example_json/F0019343.srl.json b/data/kres_example_json/F0019343.srl.json new file mode 100644 index 0000000..b41da3e --- /dev/null +++ b/data/kres_example_json/F0019343.srl.json @@ -0,0 +1 @@ +{"F0019343.130.0": [], "F0019343.87.0": [], "F0019343.73.0": [], "F0019343.41.0": [], "F0019343.29.0": [], "F0019343.43.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "PAT"}], "F0019343.12.0": [{"dep": "2", "from": "4", "arg": "GOAL"}, {"dep": "5", "from": "4", "arg": "ACT"}], "F0019343.31.0": [{"dep": "10", "from": "9", "arg": "RESLT"}, {"dep": "13", "from": "9", "arg": "LOC"}], "F0019343.86.0": [], "F0019343.100.0": [], "F0019343.77.0": [], "F0019343.75.0": [], "F0019343.35.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "PAT"}], "F0019343.37.0": [{"dep": "1", "from": "5", "arg": "ACT"}], "F0019343.6.0": [], "F0019343.79.0": [], "F0019343.109.5": [], "F0019343.21.0": [{"dep": "25", "from": "26", "arg": "ACT"}, {"dep": "28", "from": "26", "arg": "REG"}], "F0019343.74.0": [], "F0019343.131.2": [], "F0019343.20.0": [], "F0019343.102.0": [{"dep": "13", "from": "10", "arg": "LOC"}], "F0019343.103.0": [{"dep": "1", "from": "8", "arg": "ACT"}, {"dep": "6", "from": "8", "arg": "REG"}, {"dep": "9", "from": "8", "arg": "REG"}, {"dep": "37", "from": "38", "arg": "ACT"}, {"dep": "39", "from": "38", "arg": "TIME"}, {"dep": "40", "from": "38", "arg": "PAT"}], "F0019343.109.0": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "9", "from": "13", "arg": "TIME"}, {"dep": "11", "from": "13", "arg": "CAUSE"}, {"dep": "14", "from": "13", "arg": "PAT"}, {"dep": "20", "from": "18", "arg": "PAT"}, {"dep": "28", "from": "26", "arg": "MEANS"}], "F0019343.16.0": [], "F0019343.17.0": [{"dep": "6", "from": "7", "arg": "PAT"}, {"dep": "8", "from": "7", "arg": "MODAL"}, {"dep": "9", "from": "8", "arg": "ACT"}, {"dep": "12", "from": "8", "arg": "REG"}], "F0019343.8.0": [{"dep": "1", "from": "14", "arg": "ACT"}, {"dep": "4", "from": "14", "arg": "ORIG"}, {"dep": "10", "from": "14", "arg": "ORIG"}], "F0019343.67.0": [{"dep": "2", "from": "1", "arg": "PAT"}, {"dep": "4", "from": "1", "arg": "LOC"}, {"dep": "8", "from": "12", "arg": "GOAL"}, {"dep": "15", "from": "12", "arg": "GOAL"}], "F0019343.111.0": [{"dep": "5", "from": "6", "arg": "PAT"}, {"dep": "7", "from": "8", "arg": "ACT"}, {"dep": "8", "from": "6", "arg": "MODAL"}, {"dep": "9", "from": "8", "arg": "TIME"}, {"dep": "18", "from": "16", "arg": "LOC"}], "F0019343.95.0": [], "F0019343.106.1": [{"dep": "10", "from": "11", "arg": "ACT"}, {"dep": "12", "from": "11", "arg": "TIME"}, {"dep": "13", "from": "11", "arg": "PAT"}, {"dep": "24", "from": "22", "arg": "REG"}, {"dep": "29", "from": "22", "arg": "PAT"}], "F0019343.39.0": [], "F0019343.63.0": [], "F0019343.33.0": [{"dep": "22", "from": "20", "arg": "REG"}], "F0019343.105.1": [], "F0019343.114.0": [], "F0019343.58.0": [], "F0019343.118.0": [], "F0019343.101.1": [], "F0019343.131.1": [], "F0019343.129.1": [{"dep": "5", "from": "16", "arg": "COND"}, {"dep": "8", "from": "5", "arg": "ORIG"}, {"dep": "11", "from": "5", "arg": "REG"}, {"dep": "15", "from": "16", "arg": "ACT"}, {"dep": "17", "from": "16", "arg": "RESLT"}, {"dep": "18", "from": "16", "arg": "MWPRED"}, {"dep": "21", "from": "18", "arg": "LOC"}, {"dep": "23", "from": "24", "arg": "MANN"}, {"dep": "26", "from": "24", "arg": "REG"}], "F0019343.123.1": [{"dep": "5", "from": "8", "arg": "ACT"}, {"dep": "11", "from": "8", "arg": "ORIG"}], "F0019343.56.0": [], "F0019343.32.0": [], "F0019343.14.0": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "MWPRED"}, {"dep": "6", "from": "4", "arg": "TIME"}], "F0019343.61.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "ACT"}], "F0019343.51.0": [{"dep": "3", "from": "10", "arg": "ACT"}, {"dep": "9", "from": "10", "arg": "LOC"}, {"dep": "12", "from": "10", "arg": "ACT"}], "F0019343.10.0": [], "F0019343.76.0": [], "F0019343.9.0": [{"dep": "2", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "7", "arg": "PAT"}], "F0019343.126.0": [], "F0019343.109.3": [], "F0019343.40.0": [{"dep": "32", "from": "30", "arg": "REG"}], "F0019343.109.4": [], "F0019343.122.0": [], "F0019343.30.0": [{"dep": "8", "from": "6", "arg": "PAT"}, {"dep": "54", "from": "53", "arg": "ACT"}], "F0019343.28.0": [{"dep": "6", "from": "5", "arg": "ACT"}, {"dep": "23", "from": "21", "arg": "REG"}], "F0019343.107.0": [{"dep": "3", "from": "14", "arg": "CAUSE"}, {"dep": "10", "from": "14", "arg": "TIME"}, {"dep": "15", "from": "14", "arg": "TIME"}, {"dep": "21", "from": "22", "arg": "ACT"}, {"dep": "23", "from": "22", "arg": "FREQ"}, {"dep": "28", "from": "26", "arg": "REG"}, {"dep": "50", "from": "43", "arg": "MANN"}], "F0019343.46.0": [], "F0019343.105.0": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "8", "from": "7", "arg": "PAT"}], "F0019343.128.0": [], "F0019343.109.2": [], "F0019343.119.0": [{"dep": "10", "from": "11", "arg": "ACT"}, {"dep": "15", "from": "11", "arg": "PAT"}], "F0019343.5.0": [{"dep": "9", "from": "10", "arg": "PAT"}, {"dep": "11", "from": "10", "arg": "MODAL"}, {"dep": "12", "from": "11", "arg": "ACT"}, {"dep": "39", "from": "37", "arg": "LOC"}], "F0019343.47.0": [], "F0019343.125.0": [{"dep": "18", "from": "19", "arg": "DUR"}, {"dep": "23", "from": "21", "arg": "PAT"}, {"dep": "27", "from": "31", "arg": "LOC"}, {"dep": "34", "from": "31", "arg": "ACT"}, {"dep": "48", "from": "49", "arg": "MANN"}, {"dep": "53", "from": "57", "arg": "REC"}, {"dep": "55", "from": "57", "arg": "ACT"}, {"dep": "56", "from": "57", "arg": "MANN"}], "F0019343.36.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "ACT"}], "F0019343.55.0": [], "F0019343.109.7": [], "F0019343.106.0": [{"dep": "4", "from": "14", "arg": "PAT"}, {"dep": "15", "from": "14", "arg": "RESLT"}, {"dep": "16", "from": "14", "arg": "PAT"}], "F0019343.52.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "PAT"}], "F0019343.93.0": [], "F0019343.7.0": [{"dep": "3", "from": "1", "arg": "ACT"}, {"dep": "5", "from": "1", "arg": "PAT"}, {"dep": "8", "from": "9", "arg": "PAT"}, {"dep": "10", "from": "9", "arg": "MODAL"}, {"dep": "11", "from": "10", "arg": "ACT"}, {"dep": "14", "from": "10", "arg": "REG"}, {"dep": "38", "from": "36", "arg": "LOC"}], "F0019343.65.0": [], "F0019343.98.0": [], "F0019343.101.0": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "5", "from": "3", "arg": "PAT"}, {"dep": "16", "from": "14", "arg": "TIME"}, {"dep": "18", "from": "14", "arg": "REG"}, {"dep": "26", "from": "14", "arg": "LOC"}], "F0019343.42.0": [{"dep": "3", "from": "10", "arg": "ACT"}, {"dep": "9", "from": "10", "arg": "LOC"}, {"dep": "12", "from": "10", "arg": "QUANT"}], "F0019343.50.0": [], "F0019343.108.0": [], "F0019343.99.0": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "9", "from": "7", "arg": "PAT"}], "F0019343.116.0": [], "F0019343.15.0": [], "F0019343.68.0": [], "F0019343.57.0": [{"dep": "32", "from": "30", "arg": "REG"}], "F0019343.48.0": [], "F0019343.49.0": [{"dep": "32", "from": "30", "arg": "REG"}], "F0019343.131.0": [], "F0019343.97.0": [{"dep": "2", "from": "12", "arg": "PAT"}, {"dep": "14", "from": "12", "arg": "AIM"}], "F0019343.11.0": [], "F0019343.123.0": [{"dep": "2", "from": "4", "arg": "ACT"}, {"dep": "5", "from": "4", "arg": "PAT"}], "F0019343.24.0": [], "F0019343.110.0": [], "F0019343.82.0": [], "F0019343.38.0": [], "F0019343.127.0": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "MWPRED"}, {"dep": "6", "from": "4", "arg": "TIME"}, {"dep": "10", "from": "16", "arg": "TIME"}, {"dep": "15", "from": "16", "arg": "MANN"}], "F0019343.19.0": [], "F0019343.104.0": [], "F0019343.117.1": [], "F0019343.25.0": [{"dep": "9", "from": "6", "arg": "ACT"}], "F0019343.109.1": [], "F0019343.90.0": [], "F0019343.129.0": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "5", "from": "3", "arg": "PAT"}], "F0019343.64.0": [], "F0019343.54.0": [], "F0019343.34.0": [{"dep": "3", "from": "10", "arg": "ACT"}, {"dep": "9", "from": "10", "arg": "LOC"}, {"dep": "12", "from": "10", "arg": "QUANT"}], "F0019343.60.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "PAT"}], "F0019343.44.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "ACT"}], "F0019343.120.0": [], "F0019343.18.0": [], "F0019343.59.0": [{"dep": "3", "from": "10", "arg": "ACT"}, {"dep": "9", "from": "10", "arg": "LOC"}, {"dep": "12", "from": "10", "arg": "QUANT"}], "F0019343.113.0": [{"dep": "10", "from": "12", "arg": "ACT"}, {"dep": "15", "from": "12", "arg": "PAT"}, {"dep": "20", "from": "21", "arg": "PAT"}, {"dep": "24", "from": "25", "arg": "PAT"}, {"dep": "27", "from": "25", "arg": "LOC"}], "F0019343.115.0": [{"dep": "2", "from": "4", "arg": "ACT"}, {"dep": "7", "from": "4", "arg": "PAT"}, {"dep": "14", "from": "13", "arg": "ACT"}, {"dep": "15", "from": "13", "arg": "PAT"}, {"dep": "20", "from": "23", "arg": "LOC"}, {"dep": "24", "from": "23", "arg": "PAT"}], "F0019343.109.6": [], "F0019343.23.0": [], "F0019343.22.0": [], "F0019343.112.0": [], "F0019343.26.0": [{"dep": "15", "from": "25", "arg": "PAT"}, {"dep": "19", "from": "25", "arg": "TIME"}, {"dep": "21", "from": "25", "arg": "CAUSE"}, {"dep": "23", "from": "25", "arg": "TIME"}, {"dep": "26", "from": "25", "arg": "ACT"}, {"dep": "30", "from": "33", "arg": "PAT"}, {"dep": "32", "from": "33", "arg": "ACT"}, {"dep": "33", "from": "25", "arg": "TIME"}, {"dep": "35", "from": "33", "arg": "LOC"}], "F0019343.121.0": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "8", "from": "9", "arg": "MANN"}, {"dep": "10", "from": "9", "arg": "PAT"}, {"dep": "13", "from": "9", "arg": "TIME"}, {"dep": "22", "from": "21", "arg": "ACT"}, {"dep": "25", "from": "21", "arg": "LOC"}], "F0019343.124.0": [{"dep": "19", "from": "17", "arg": "REG"}, {"dep": "37", "from": "36", "arg": "PAT"}], "F0019343.117.0": [{"dep": "12", "from": "11", "arg": "RESLT"}, {"dep": "15", "from": "11", "arg": "LOC"}, {"dep": "22", "from": "23", "arg": "MANN"}, {"dep": "25", "from": "23", "arg": "CAUSE"}]} \ No newline at end of file diff --git a/data/kres_example_json/F0025741.srl.json b/data/kres_example_json/F0025741.srl.json new file mode 100644 index 0000000..7f7a30a --- /dev/null +++ b/data/kres_example_json/F0025741.srl.json @@ -0,0 +1 @@ +{"F0025741.69.1": [], "F0025741.54.2": [{"dep": "12", "from": "13", "arg": "PAT"}, {"dep": "15", "from": "13", "arg": "ACT"}, {"dep": "28", "from": "33", "arg": "ACT"}, {"dep": "32", "from": "33", "arg": "PAT"}], "F0025741.34.0": [], "F0025741.56.2": [{"dep": "3", "from": "1", "arg": "ACT"}, {"dep": "8", "from": "9", "arg": "PAT"}, {"dep": "10", "from": "9", "arg": "ACT"}, {"dep": "14", "from": "15", "arg": "PAT"}, {"dep": "18", "from": "15", "arg": "DUR"}], "F0025741.52.0": [{"dep": "3", "from": "5", "arg": "DUR"}, {"dep": "4", "from": "5", "arg": "PAT"}, {"dep": "7", "from": "5", "arg": "RESLT"}], "F0025741.64.3": [{"dep": "1", "from": "3", "arg": "TIME"}, {"dep": "5", "from": "3", "arg": "PAT"}, {"dep": "11", "from": "13", "arg": "REC"}, {"dep": "14", "from": "13", "arg": "PAT"}, {"dep": "16", "from": "13", "arg": "FREQ"}], "F0025741.36.0": [], "F0025741.63.0": [], "F0025741.56.0": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "5", "from": "6", "arg": "MANN"}, {"dep": "7", "from": "6", "arg": "PAT"}, {"dep": "13", "from": "16", "arg": "TIME"}, {"dep": "15", "from": "16", "arg": "ORIG"}, {"dep": "18", "from": "16", "arg": "RESLT"}], "F0025741.53.0": [{"dep": "2", "from": "9", "arg": "ACT"}, {"dep": "11", "from": "12", "arg": "CAUSE"}, {"dep": "12", "from": "9", "arg": "MWPRED"}, {"dep": "14", "from": "12", "arg": "CAUSE"}], "F0025741.54.4": [{"dep": "4", "from": "5", "arg": "PAT"}, {"dep": "6", "from": "5", "arg": "MODAL"}, {"dep": "8", "from": "6", "arg": "MEANS"}], "F0025741.68.5": [{"dep": "2", "from": "4", "arg": "ACT"}, {"dep": "7", "from": "4", "arg": "TIME"}, {"dep": "9", "from": "4", "arg": "PAT"}, {"dep": "13", "from": "4", "arg": "LOC"}], "F0025741.53.1": [], "F0025741.66.3": [], "F0025741.73.3": [{"dep": "1", "from": "2", "arg": "ACT"}, {"dep": "10", "from": "16", "arg": "ACT"}, {"dep": "14", "from": "16", "arg": "PAT"}], "F0025741.64.2": [{"dep": "1", "from": "5", "arg": "ACT"}, {"dep": "4", "from": "5", "arg": "DUR"}, {"dep": "7", "from": "5", "arg": "PAT"}, {"dep": "9", "from": "11", "arg": "TIME"}, {"dep": "13", "from": "11", "arg": "RESLT"}, {"dep": "15", "from": "11", "arg": "PAT"}], "F0025741.52.3": [{"dep": "2", "from": "13", "arg": "TIME"}, {"dep": "4", "from": "13", "arg": "MEANS"}, {"dep": "8", "from": "13", "arg": "TIME"}, {"dep": "11", "from": "13", "arg": "TIME"}, {"dep": "12", "from": "13", "arg": "PAT"}, {"dep": "15", "from": "13", "arg": "ACT"}, {"dep": "16", "from": "13", "arg": "ACT"}, {"dep": "18", "from": "16", "arg": "CAUSE"}], "F0025741.77.7": [{"dep": "15", "from": "16", "arg": "MANN"}, {"dep": "17", "from": "16", "arg": "TIME"}], "F0025741.73.4": [], "F0025741.50.0": [], "F0025741.77.4": [], "F0025741.59.0": [], "F0025741.82.0": [], "F0025741.85.3": [{"dep": "11", "from": "9", "arg": "PAT"}], "F0025741.54.7": [{"dep": "5", "from": "9", "arg": "MANN"}, {"dep": "8", "from": "9", "arg": "DUR"}, {"dep": "12", "from": "9", "arg": "PAT"}, {"dep": "16", "from": "17", "arg": "TIME"}, {"dep": "17", "from": "15", "arg": "MODAL"}, {"dep": "20", "from": "17", "arg": "REC"}], "F0025741.85.0": [], "F0025741.58.0": [], "F0025741.52.4": [{"dep": "2", "from": "5", "arg": "LOC"}, {"dep": "6", "from": "5", "arg": "ACT"}, {"dep": "19", "from": "24", "arg": "PAT"}, {"dep": "23", "from": "24", "arg": "PAT"}, {"dep": "27", "from": "24", "arg": "PAT"}, {"dep": "37", "from": "35", "arg": "LOC"}], "F0025741.83.1": [{"dep": "6", "from": "9", "arg": "CAUSE"}, {"dep": "7", "from": "9", "arg": "PAT"}, {"dep": "13", "from": "9", "arg": "DUR"}], "F0025741.66.2": [{"dep": "5", "from": "1", "arg": "PAT"}, {"dep": "10", "from": "12", "arg": "PAT"}, {"dep": "11", "from": "12", "arg": "MANN"}, {"dep": "13", "from": "12", "arg": "ACT"}], "F0025741.77.0": [{"dep": "2", "from": "5", "arg": "TIME"}, {"dep": "4", "from": "5", "arg": "RESLT"}, {"dep": "10", "from": "7", "arg": "RESLT"}, {"dep": "15", "from": "13", "arg": "PAT"}], "F0025741.41.0": [], "F0025741.66.0": [], "F0025741.66.1": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "ACT"}], "F0025741.87.0": [{"dep": "9", "from": "12", "arg": "ACT"}, {"dep": "14", "from": "12", "arg": "PAT"}], "F0025741.81.0": [], "F0025741.90.0": [{"dep": "5", "from": "6", "arg": "PAT"}, {"dep": "8", "from": "6", "arg": "GOAL"}, {"dep": "18", "from": "17", "arg": "PAT"}, {"dep": "24", "from": "23", "arg": "PAT"}, {"dep": "32", "from": "34", "arg": "PAT"}, {"dep": "48", "from": "45", "arg": "PAT"}], "F0025741.56.4": [], "F0025741.65.0": [], "F0025741.77.2": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "6", "from": "3", "arg": "PAT"}], "F0025741.56.1": [], "F0025741.63.2": [{"dep": "2", "from": "4", "arg": "REG"}], "F0025741.87.1": [{"dep": "2", "from": "5", "arg": "GOAL"}, {"dep": "4", "from": "5", "arg": "MANN"}, {"dep": "7", "from": "5", "arg": "PAT"}], "F0025741.79.0": [], "F0025741.77.3": [{"dep": "1", "from": "7", "arg": "ACT"}, {"dep": "9", "from": "7", "arg": "SOURCE"}, {"dep": "12", "from": "13", "arg": "PAT"}, {"dep": "14", "from": "13", "arg": "ACT"}], "F0025741.68.6": [{"dep": "3", "from": "9", "arg": "ACT"}, {"dep": "11", "from": "9", "arg": "TIME"}, {"dep": "14", "from": "9", "arg": "ACT"}, {"dep": "27", "from": "28", "arg": "REC"}], "F0025741.85.4": [{"dep": "10", "from": "11", "arg": "TIME"}, {"dep": "13", "from": "11", "arg": "PAT"}], "F0025741.64.0": [], "F0025741.90.2": [{"dep": "2", "from": "3", "arg": "REG"}, {"dep": "5", "from": "3", "arg": "PAT"}, {"dep": "11", "from": "13", "arg": "MANN"}, {"dep": "15", "from": "18", "arg": "MANN"}, {"dep": "17", "from": "18", "arg": "MEANS"}, {"dep": "19", "from": "18", "arg": "DUR"}], "F0025741.55.0": [], "F0025741.64.4": [{"dep": "9", "from": "8", "arg": "ACT"}, {"dep": "17", "from": "16", "arg": "PAT"}, {"dep": "22", "from": "21", "arg": "PAT"}, {"dep": "26", "from": "27", "arg": "ACT"}, {"dep": "28", "from": "27", "arg": "PAT"}], "F0025741.42.0": [], "F0025741.73.2": [], "F0025741.38.0": [{"dep": "12", "from": "13", "arg": "DUR"}, {"dep": "15", "from": "13", "arg": "PAT"}], "F0025741.40.0": [{"dep": "5", "from": "6", "arg": "MANN"}, {"dep": "7", "from": "6", "arg": "ACT"}], "F0025741.44.0": [{"dep": "2", "from": "1", "arg": "ACT"}], "F0025741.85.1": [], "F0025741.71.0": [], "F0025741.52.1": [{"dep": "8", "from": "13", "arg": "GOAL"}, {"dep": "12", "from": "13", "arg": "GOAL"}, {"dep": "14", "from": "13", "arg": "PAT"}, {"dep": "22", "from": "23", "arg": "LOC"}, {"dep": "25", "from": "23", "arg": "PAT"}], "F0025741.68.0": [{"dep": "2", "from": "10", "arg": "PAT"}, {"dep": "4", "from": "10", "arg": "ACT"}, {"dep": "13", "from": "10", "arg": "PAT"}, {"dep": "21", "from": "22", "arg": "ACT"}, {"dep": "23", "from": "22", "arg": "RESLT"}], "F0025741.90.1": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "MODAL"}, {"dep": "5", "from": "4", "arg": "RESLT"}], "F0025741.45.0": [], "F0025741.83.0": [{"dep": "12", "from": "8", "arg": "DUR"}], "F0025741.77.5": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "5", "from": "3", "arg": "PAT"}], "F0025741.39.0": [], "F0025741.64.1": [{"dep": "2", "from": "7", "arg": "DUR"}, {"dep": "4", "from": "7", "arg": "GOAL"}, {"dep": "10", "from": "7", "arg": "LOC"}], "F0025741.77.6": [{"dep": "7", "from": "9", "arg": "FREQ"}, {"dep": "11", "from": "9", "arg": "MANN"}], "F0025741.69.0": [{"dep": "3", "from": "10", "arg": "ACT"}], "F0025741.74.0": [], "F0025741.60.0": [], "F0025741.86.0": [], "F0025741.43.0": [{"dep": "7", "from": "21", "arg": "ACT"}, {"dep": "20", "from": "21", "arg": "MANN"}, {"dep": "22", "from": "21", "arg": "PAT"}, {"dep": "32", "from": "30", "arg": "LOC"}, {"dep": "36", "from": "30", "arg": "ACT"}], "F0025741.71.1": [], "F0025741.49.0": [], "F0025741.37.0": [], "F0025741.76.0": [{"dep": "3", "from": "12", "arg": "ACT"}, {"dep": "11", "from": "12", "arg": "REG"}, {"dep": "13", "from": "12", "arg": "PAT"}], "F0025741.54.3": [{"dep": "3", "from": "10", "arg": "COND"}, {"dep": "8", "from": "10", "arg": "ACT"}, {"dep": "14", "from": "10", "arg": "REC"}], "F0025741.52.2": [], "F0025741.68.4": [{"dep": "3", "from": "11", "arg": "TIME"}, {"dep": "7", "from": "11", "arg": "ACT"}, {"dep": "10", "from": "11", "arg": "TIME"}, {"dep": "14", "from": "11", "arg": "CAUSE"}], "F0025741.68.3": [{"dep": "1", "from": "3", "arg": "MANN"}, {"dep": "4", "from": "8", "arg": "ACT"}, {"dep": "6", "from": "8", "arg": "TIME"}, {"dep": "8", "from": "3", "arg": "MODAL"}, {"dep": "9", "from": "8", "arg": "PAT"}, {"dep": "19", "from": "21", "arg": "GOAL"}, {"dep": "22", "from": "21", "arg": "MANN"}, {"dep": "23", "from": "21", "arg": "PAT"}], "F0025741.63.1": [{"dep": "6", "from": "9", "arg": "PAT"}, {"dep": "8", "from": "9", "arg": "PAT"}, {"dep": "14", "from": "12", "arg": "PAT"}], "F0025741.68.2": [{"dep": "23", "from": "31", "arg": "PAT"}, {"dep": "27", "from": "31", "arg": "TIME"}, {"dep": "29", "from": "31", "arg": "ACT"}], "F0025741.54.0": [{"dep": "2", "from": "4", "arg": "LOC"}, {"dep": "5", "from": "4", "arg": "MODAL"}, {"dep": "6", "from": "5", "arg": "PAT"}, {"dep": "17", "from": "20", "arg": "PAT"}, {"dep": "19", "from": "20", "arg": "MANN"}, {"dep": "22", "from": "20", "arg": "GOAL"}, {"dep": "38", "from": "36", "arg": "PAT"}, {"dep": "45", "from": "44", "arg": "PAT"}], "F0025741.80.0": [], "F0025741.78.0": [{"dep": "3", "from": "5", "arg": "CAUSE"}, {"dep": "12", "from": "18", "arg": "PAT"}, {"dep": "14", "from": "18", "arg": "EVENT"}, {"dep": "15", "from": "18", "arg": "MANN"}, {"dep": "17", "from": "18", "arg": "MANN"}], "F0025741.48.0": [], "F0025741.54.5": [{"dep": "3", "from": "4", "arg": "LOC"}, {"dep": "13", "from": "12", "arg": "MODAL"}, {"dep": "15", "from": "13", "arg": "PAT"}], "F0025741.72.0": [], "F0025741.77.1": [], "F0025741.61.0": [{"dep": "5", "from": "8", "arg": "MANN"}, {"dep": "10", "from": "8", "arg": "PAT"}], "F0025741.68.7": [{"dep": "1", "from": "10", "arg": "ACT"}, {"dep": "7", "from": "10", "arg": "TIME"}], "F0025741.46.0": [], "F0025741.66.4": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "ACT"}, {"dep": "7", "from": "11", "arg": "PAT"}, {"dep": "10", "from": "11", "arg": "LOC"}], "F0025741.66.8": [{"dep": "4", "from": "5", "arg": "TIME"}, {"dep": "6", "from": "5", "arg": "ACT"}], "F0025741.35.0": [], "F0025741.75.0": [], "F0025741.73.1": [{"dep": "2", "from": "7", "arg": "LOC"}, {"dep": "5", "from": "7", "arg": "ACT"}, {"dep": "6", "from": "7", "arg": "MANN"}, {"dep": "10", "from": "7", "arg": "PAT"}], "F0025741.88.0": [], "F0025741.57.0": [{"dep": "3", "from": "4", "arg": "PAT"}, {"dep": "8", "from": "4", "arg": "QUANT"}], "F0025741.83.2": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "5", "from": "3", "arg": "LOC"}], "F0025741.54.6": [{"dep": "1", "from": "2", "arg": "ACT"}, {"dep": "3", "from": "2", "arg": "LOC"}], "F0025741.84.0": [], "F0025741.33.0": [], "F0025741.89.3": [{"dep": "7", "from": "5", "arg": "PAT"}, {"dep": "16", "from": "12", "arg": "PAT"}, {"dep": "21", "from": "19", "arg": "PAT"}], "F0025741.89.1": [{"dep": "2", "from": "1", "arg": "MODAL"}, {"dep": "4", "from": "2", "arg": "RESLT"}, {"dep": "11", "from": "10", "arg": "MODAL"}, {"dep": "12", "from": "11", "arg": "PAT"}, {"dep": "14", "from": "18", "arg": "ACT"}, {"dep": "16", "from": "14", "arg": "PAT"}], "F0025741.54.1": [{"dep": "29", "from": "31", "arg": "RESLT"}, {"dep": "30", "from": "31", "arg": "RESLT"}, {"dep": "32", "from": "31", "arg": "PAT"}, {"dep": "44", "from": "31", "arg": "PAT"}], "F0025741.89.2": [{"dep": "2", "from": "7", "arg": "MEANS"}, {"dep": "8", "from": "7", "arg": "MODAL"}], "F0025741.66.5": [{"dep": "18", "from": "20", "arg": "ACT"}, {"dep": "19", "from": "20", "arg": "MANN"}, {"dep": "23", "from": "20", "arg": "ORIG"}], "F0025741.67.0": [], "F0025741.89.0": [{"dep": "3", "from": "1", "arg": "PAT"}, {"dep": "7", "from": "6", "arg": "ACT"}, {"dep": "12", "from": "10", "arg": "PAT"}], "F0025741.70.0": [{"dep": "2", "from": "4", "arg": "MANN"}, {"dep": "5", "from": "4", "arg": "GOAL"}], "F0025741.47.0": [{"dep": "2", "from": "5", "arg": "TIME"}, {"dep": "4", "from": "5", "arg": "PAT"}, {"dep": "7", "from": "5", "arg": "QUANT"}], "F0025741.78.1": [{"dep": "3", "from": "7", "arg": "COND"}, {"dep": "6", "from": "7", "arg": "LOC"}, {"dep": "9", "from": "7", "arg": "PAT"}, {"dep": "18", "from": "21", "arg": "PAT"}, {"dep": "19", "from": "21", "arg": "FREQ"}, {"dep": "20", "from": "21", "arg": "MANN"}], "F0025741.85.2": [{"dep": "3", "from": "1", "arg": "MANN"}, {"dep": "6", "from": "1", "arg": "LOC"}, {"dep": "10", "from": "1", "arg": "PAT"}], "F0025741.66.6": [{"dep": "3", "from": "6", "arg": "ACT"}, {"dep": "9", "from": "6", "arg": "PAT"}, {"dep": "22", "from": "27", "arg": "ACT"}, {"dep": "24", "from": "27", "arg": "TIME"}, {"dep": "28", "from": "27", "arg": "PAT"}], "F0025741.73.0": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "6", "from": "3", "arg": "PAT"}, {"dep": "9", "from": "3", "arg": "LOC"}], "F0025741.68.1": [{"dep": "1", "from": "6", "arg": "ACT"}, {"dep": "8", "from": "6", "arg": "PAT"}], "F0025741.62.0": [{"dep": "2", "from": "4", "arg": "REG"}, {"dep": "5", "from": "4", "arg": "RESLT"}, {"dep": "7", "from": "4", "arg": "ACT"}], "F0025741.66.7": [{"dep": "31", "from": "29", "arg": "MANN"}, {"dep": "35", "from": "33", "arg": "ACT"}], "F0025741.56.3": [{"dep": "11", "from": "16", "arg": "TIME"}, {"dep": "14", "from": "16", "arg": "REG"}, {"dep": "15", "from": "16", "arg": "MANN"}, {"dep": "17", "from": "16", "arg": "ACT"}, {"dep": "24", "from": "25", "arg": "MANN"}, {"dep": "26", "from": "25", "arg": "PAT"}]} \ No newline at end of file diff --git a/data/kres_example_json/F0032377.srl.json b/data/kres_example_json/F0032377.srl.json new file mode 100644 index 0000000..50f3ec1 --- /dev/null +++ b/data/kres_example_json/F0032377.srl.json @@ -0,0 +1 @@ +{"F0032377.38.0": [], "F0032377.32.4": [], "F0032377.29.0": [], "F0032377.33.0": [{"dep": "10", "from": "11", "arg": "GOAL"}, {"dep": "12", "from": "11", "arg": "ACT"}], "F0032377.25.0": [{"dep": "5", "from": "4", "arg": "PAT"}, {"dep": "11", "from": "13", "arg": "REC"}, {"dep": "14", "from": "13", "arg": "PAT"}, {"dep": "16", "from": "13", "arg": "LOC"}, {"dep": "19", "from": "23", "arg": "LOC"}, {"dep": "21", "from": "23", "arg": "TIME"}, {"dep": "22", "from": "23", "arg": "PAT"}, {"dep": "24", "from": "23", "arg": "ACT"}], "F0032377.32.0": [], "F0032377.35.1": [{"dep": "1", "from": "8", "arg": "ACT"}, {"dep": "7", "from": "8", "arg": "CAUSE"}, {"dep": "9", "from": "8", "arg": "PAT"}, {"dep": "12", "from": "13", "arg": "TIME"}], "F0032377.33.1": [{"dep": "2", "from": "3", "arg": "LOC"}, {"dep": "9", "from": "10", "arg": "ACT"}, {"dep": "13", "from": "10", "arg": "REG"}, {"dep": "17", "from": "19", "arg": "ACT"}, {"dep": "21", "from": "19", "arg": "GOAL"}, {"dep": "27", "from": "26", "arg": "PAT"}, {"dep": "33", "from": "34", "arg": "MANN"}, {"dep": "36", "from": "34", "arg": "PAT"}], "F0032377.32.5": [{"dep": "6", "from": "5", "arg": "MANN"}, {"dep": "13", "from": "16", "arg": "PAT"}, {"dep": "15", "from": "16", "arg": "TIME"}, {"dep": "18", "from": "16", "arg": "RESLT"}], "F0032377.33.2": [{"dep": "3", "from": "1", "arg": "PAT"}, {"dep": "8", "from": "5", "arg": "PAT"}], "F0032377.26.3": [{"dep": "1", "from": "3", "arg": "TIME"}, {"dep": "9", "from": "8", "arg": "ACT"}, {"dep": "13", "from": "14", "arg": "TIME"}, {"dep": "17", "from": "18", "arg": "LOC"}, {"dep": "18", "from": "14", "arg": "PAT"}], "F0032377.37.0": [], "F0032377.25.2": [{"dep": "1", "from": "3", "arg": "REC"}, {"dep": "6", "from": "9", "arg": "PAT"}, {"dep": "8", "from": "9", "arg": "ACT"}, {"dep": "11", "from": "9", "arg": "LOC"}, {"dep": "18", "from": "15", "arg": "RESLT"}], "F0032377.36.0": [], "F0032377.32.1": [{"dep": "1", "from": "10", "arg": "ACT"}, {"dep": "5", "from": "10", "arg": "MANN"}, {"dep": "12", "from": "10", "arg": "GOAL"}], "F0032377.26.1": [{"dep": "1", "from": "4", "arg": "PAT"}, {"dep": "3", "from": "4", "arg": "TIME"}, {"dep": "6", "from": "4", "arg": "GOAL"}, {"dep": "9", "from": "13", "arg": "TIME"}, {"dep": "12", "from": "13", "arg": "DUR"}, {"dep": "15", "from": "13", "arg": "PAT"}, {"dep": "20", "from": "18", "arg": "PAT"}], "F0032377.24.0": [{"dep": "1", "from": "2", "arg": "MANN"}, {"dep": "4", "from": "2", "arg": "ACT"}], "F0032377.31.0": [], "F0032377.30.0": [], "F0032377.23.0": [], "F0032377.32.3": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "PAT"}, {"dep": "8", "from": "9", "arg": "PAT"}, {"dep": "11", "from": "9", "arg": "PAT"}], "F0032377.26.0": [{"dep": "1", "from": "4", "arg": "ACT"}, {"dep": "10", "from": "11", "arg": "QUANT"}, {"dep": "13", "from": "11", "arg": "PAT"}, {"dep": "18", "from": "19", "arg": "TIME"}], "F0032377.27.0": [], "F0032377.34.0": [], "F0032377.28.0": [{"dep": "4", "from": "3", "arg": "PAT"}], "F0032377.32.2": [{"dep": "1", "from": "5", "arg": "TIME"}, {"dep": "2", "from": "5", "arg": "REC"}, {"dep": "3", "from": "5", "arg": "PAT"}, {"dep": "7", "from": "5", "arg": "ACT"}, {"dep": "11", "from": "12", "arg": "REC"}, {"dep": "13", "from": "12", "arg": "ACT"}], "F0032377.26.2": [{"dep": "2", "from": "4", "arg": "ACT"}, {"dep": "5", "from": "4", "arg": "ACT"}], "F0032377.35.2": [{"dep": "8", "from": "7", "arg": "ACT"}, {"dep": "12", "from": "14", "arg": "REC"}, {"dep": "13", "from": "14", "arg": "MANN"}, {"dep": "15", "from": "14", "arg": "PAT"}], "F0032377.35.0": [{"dep": "9", "from": "12", "arg": "LOC"}, {"dep": "15", "from": "12", "arg": "PAT"}, {"dep": "20", "from": "21", "arg": "TIME"}, {"dep": "23", "from": "21", "arg": "GOAL"}], "F0032377.25.1": []} \ No newline at end of file diff --git a/data/kres_example_json/F0039402.srl.json b/data/kres_example_json/F0039402.srl.json new file mode 100644 index 0000000..6cde334 --- /dev/null +++ b/data/kres_example_json/F0039402.srl.json @@ -0,0 +1 @@ +{"F0039402.11.0": [{"dep": "1", "from": "5", "arg": "ACT"}, {"dep": "7", "from": "5", "arg": "TIME"}], "F0039402.10.0": [{"dep": "2", "from": "8", "arg": "ACT"}, {"dep": "11", "from": "8", "arg": "PAT"}, {"dep": "16", "from": "14", "arg": "RESLT"}], "F0039402.12.1": [{"dep": "1", "from": "9", "arg": "ACT"}, {"dep": "5", "from": "9", "arg": "TIME"}, {"dep": "8", "from": "9", "arg": "PAT"}, {"dep": "11", "from": "9", "arg": "PAT"}], "F0039402.15.0": [{"dep": "2", "from": "10", "arg": "MANN"}, {"dep": "7", "from": "10", "arg": "TIME"}, {"dep": "11", "from": "10", "arg": "ACT"}, {"dep": "13", "from": "10", "arg": "RESLT"}, {"dep": "20", "from": "22", "arg": "PAT"}, {"dep": "24", "from": "22", "arg": "TIME"}], "F0039402.14.2": [{"dep": "9", "from": "17", "arg": "ACT"}, {"dep": "13", "from": "17", "arg": "PAT"}, {"dep": "16", "from": "17", "arg": "LOC"}, {"dep": "20", "from": "17", "arg": "LOC"}, {"dep": "32", "from": "31", "arg": "ACT"}], "F0039402.7.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "ACT"}, {"dep": "22", "from": "24", "arg": "TIME"}, {"dep": "25", "from": "24", "arg": "PAT"}], "F0039402.5.2": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "6", "from": "3", "arg": "ACT"}, {"dep": "16", "from": "25", "arg": "ACT"}, {"dep": "20", "from": "25", "arg": "COND"}, {"dep": "27", "from": "25", "arg": "PAT"}], "F0039402.15.2": [], "F0039402.8.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "11", "from": "9", "arg": "MEANS"}, {"dep": "31", "from": "33", "arg": "PAT"}, {"dep": "34", "from": "33", "arg": "ACT"}], "F0039402.15.1": [{"dep": "7", "from": "8", "arg": "MANN"}, {"dep": "29", "from": "30", "arg": "ACT"}, {"dep": "33", "from": "30", "arg": "LOC"}], "F0039402.14.1": [], "F0039402.12.2": [{"dep": "1", "from": "8", "arg": "ACT"}, {"dep": "7", "from": "8", "arg": "GOAL"}, {"dep": "10", "from": "8", "arg": "PAT"}, {"dep": "15", "from": "18", "arg": "ACT"}, {"dep": "19", "from": "18", "arg": "MODAL"}, {"dep": "20", "from": "19", "arg": "PAT"}, {"dep": "33", "from": "37", "arg": "CAUSE"}, {"dep": "38", "from": "37", "arg": "ACT"}], "F0039402.6.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "10", "from": "7", "arg": "PAT"}], "F0039402.5.1": [], "F0039402.14.0": [{"dep": "1", "from": "8", "arg": "ACT"}, {"dep": "4", "from": "8", "arg": "PAT"}, {"dep": "12", "from": "14", "arg": "ACT"}, {"dep": "15", "from": "14", "arg": "PAT"}], "F0039402.12.0": [], "F0039402.5.0": [], "F0039402.13.1": [{"dep": "13", "from": "16", "arg": "ACT"}, {"dep": "19", "from": "26", "arg": "PAT"}, {"dep": "25", "from": "26", "arg": "TIME"}, {"dep": "31", "from": "32", "arg": "ACT"}, {"dep": "35", "from": "32", "arg": "PAT"}], "F0039402.9.0": [{"dep": "15", "from": "14", "arg": "ACT"}, {"dep": "18", "from": "14", "arg": "TIME"}], "F0039402.13.0": [{"dep": "2", "from": "8", "arg": "EVENT"}, {"dep": "4", "from": "8", "arg": "ACT"}, {"dep": "6", "from": "8", "arg": "REC"}, {"dep": "7", "from": "8", "arg": "MANN"}, {"dep": "10", "from": "8", "arg": "PAT"}, {"dep": "17", "from": "18", "arg": "MANN"}, {"dep": "19", "from": "18", "arg": "PAT"}, {"dep": "26", "from": "24", "arg": "PAT"}, {"dep": "28", "from": "30", "arg": "MANN"}, {"dep": "31", "from": "30", "arg": "PAT"}]} \ No newline at end of file diff --git a/tools/gen_json.py b/tools/gen_json.py index 0c7e701..72e7068 100644 --- a/tools/gen_json.py +++ b/tools/gen_json.py @@ -1,9 +1,11 @@ from pathlib import Path from parser.parser import Parser +import json ORIGPATH = Path("../data/kres_example") # we need the IDs INPATH = Path("../data/kres_example_srl") OUTPATH = Path("../data/kres_example_json") +DEBUG = False def get_origfile(filename): for origfile in ORIGPATH.iterdir(): @@ -13,36 +15,77 @@ def get_origfile(filename): def extract_sentences(line_reader): acc = [] - for line in [x.decode("utf-8").split('\t') for x in line_reader]: - if line[0] == '\n': + # last char in line is \n, remove it + for line in [x.decode("utf-8")[:-1].split('\t') for x in line_reader]: + if len(line) == 1: # empty line tmp = acc acc = [] yield tmp else: acc.append(line) -def match_sentence_id(string, rd): - str1 = " ".join([token[1] for token in sentence_arr]) - for k, e in rd.items(): - str2 = " ".join(token[2] for token in dict_entry["tokens"]) - if str1 == str2 +def to_sentence(sentence_arr): + return " ".join([token[1] for token in sentence_arr]) + +def match_sentence_id(sentence, orig_dict): + for k, e in orig_dict.items(): + orig_sentence = " ".join(token[2] for token in e["tokens"]) + if sentence == orig_sentence: return k raise KeyError +def get_dep_rel(token): + if DEBUG: + print(token) + for i, field in enumerate(token[14:]): + if field != "_": + return { + "arg": field, + "from": i, # i-th predicate in sentence + "dep": token[0], + } + return None + if __name__ == "__main__": par = Parser() + OUTPATH.mkdir(exist_ok=True) for infile in [x for x in INPATH.iterdir() if x.is_file()]: origfile = get_origfile(infile) - rd = par.parse_tei(origfile) + orig_dict = par.parse_tei(origfile) fp = infile.open("rb") + outdata = {} for sentence_arr in extract_sentences(fp.readlines()): - sid = match_sentence_id(sentence_arr, rd) - print(sid) - # OK, we got the sentence id, now generate the predicate map! + # tsv dropped sentence ids, match the ID, using original data + sid = match_sentence_id(to_sentence(sentence_arr), orig_dict) + outdata[sid] = [] - outfile = (OUTPATH / infile.name).with_suffix(".json") \ No newline at end of file + # find all predicate indices in the sentence + predicates = [] + for token in sentence_arr: + if token[12] == "Y": + predicates += [token[0]] # idx + + deprel = get_dep_rel(token) + if deprel is not None: + outdata[sid].append(deprel) + + # deprel["from"] points to n-th predicate + # replace with predicate's token index + for deprel in outdata[sid]: + deprel["from"] = predicates[deprel["from"]] + + if DEBUG: + print(to_sentence(sentence_arr)) + print(outdata[sid]) + print(sid) + print() + print() + + outfile = (OUTPATH / infile.name).with_suffix(".json") + # print(outdata) + json.dump(outdata, outfile.open("w")) diff --git a/tools/parse_all.py b/tools/parse_all.py index 01a867f..f6f67dc 100644 --- a/tools/parse_all.py +++ b/tools/parse_all.py @@ -32,12 +32,10 @@ if __name__ == "__main__": print("Processing file: " + str(kres_file)) res_dict = par.parse_tei(kres_file) - longest_sent = max([len(e["tokens"]) for k, e in res_dict.items()]) - print("Longest sentence: ", longest_sent) kres_out_str = "" for _, sentence in res_dict.items(): - kres_out_str += par.to_conll_2009_SRL(sentence, longest_sent) + kres_out_str += par.to_conll_2009_SRL(sentence) with (kres_out_dir / kres_file.name).with_suffix(".tsv").open("wb+") as fp: fp.write(kres_out_str.encode("utf-8")) diff --git a/tools/parser/parser.py b/tools/parser/parser.py index 3ae75e7..92c226c 100644 --- a/tools/parser/parser.py +++ b/tools/parser/parser.py @@ -114,7 +114,7 @@ class Parser: return res_dict - def to_conll_2009_SRL(self, sentence_entry, napreds=9): + def to_conll_2009_SRL(self, sentence_entry): def fillpred(tsv_row): mrow = build_model_row(tsv_row) @@ -122,8 +122,6 @@ class Parser: y = self.fillpred_model.predict([x]) return y[0] # bool - apreds_string = '\t'.join(["_" for x in range(napreds)]) - # works with kres, with parsed links out_str = "" for token in sentence_entry["tokens"]: @@ -136,7 +134,7 @@ class Parser: [t_id] + [form for x in range(7)] + ["0", "0", "modra", "modra", "_", "_"] + - [apreds_string, "\n"] + ["\n"] ) continue @@ -165,7 +163,6 @@ class Parser: sentence_entry["links"][t_id][0], # pdeprel "_", # fillpred "_", # pred - apreds_string, "\n", ] fprd = fillpred(row_list)