diff --git a/.gitignore b/.gitignore index a67a708..b0df98f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ data/*/*.xml data/*/*.tsv +data/*/*.json diff --git a/Makefile b/Makefile index 0251da3..3e77ab7 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: tsv_files srl_tagged_files json_files env +.PHONY: tsv_files srl_tagged_files json_files env clean all: json_files @@ -17,3 +17,6 @@ tools/fillpred_model/model.pickle: env: cd dockerfiles; cd python-java; $(MAKE) + +clean: + rm tools/fillpred_model/model.pickle \ No newline at end of file diff --git a/data/kres_example_json/F0006347.srl.json b/data/kres_example_json/F0006347.srl.json deleted file mode 100644 index d6f2deb..0000000 --- a/data/kres_example_json/F0006347.srl.json +++ /dev/null @@ -1 +0,0 @@ -{"F0006347.50.1": [{"dep": "3", "from": "6", "arg": "CAUSE"}, {"dep": "5", "from": "6", "arg": "PAT"}, {"dep": "11", "from": "12", "arg": "ACT"}, {"dep": "16", "from": "12", "arg": "LOC"}], "F0006347.50.0": [], "F0006347.50.2": [{"dep": "5", "from": "14", "arg": "TIME"}, {"dep": "12", "from": "14", "arg": "ACT"}, {"dep": "15", "from": "14", "arg": "MWPRED"}, {"dep": "18", "from": "20", "arg": "ACT"}, {"dep": "19", "from": "20", "arg": "TIME"}, {"dep": "23", "from": "20", "arg": "PAT"}, {"dep": "26", "from": "13", "arg": "REC"}, {"dep": "29", "from": "30", "arg": "MANN"}, {"dep": "31", "from": "30", "arg": "PAT"}], "F0006347.50.3": [{"dep": "14", "from": "19", "arg": "PAT"}, {"dep": "16", "from": "19", "arg": "LOC"}, {"dep": "30", "from": "29", "arg": "PAT"}, {"dep": "32", "from": "29", "arg": "PAT"}, {"dep": "42", "from": "43", "arg": "DUR"}, {"dep": "45", "from": "43", "arg": "PAT"}, {"dep": "48", "from": "43", "arg": "LOC"}, {"dep": "56", "from": "57", "arg": "TIME"}, {"dep": "57", "from": "55", "arg": "MODAL"}, {"dep": "62", "from": "57", "arg": "PAT"}, {"dep": "67", "from": "66", "arg": "TIME"}, {"dep": "73", "from": "66", "arg": "ACT"}]} \ No newline at end of file diff --git a/data/kres_example_json/F0012782.srl.json b/data/kres_example_json/F0012782.srl.json deleted file mode 100644 index 619b588..0000000 --- a/data/kres_example_json/F0012782.srl.json +++ /dev/null @@ -1 +0,0 @@ -{"F0012782.9.0": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "MWPRED"}, {"dep": "6", "from": "4", "arg": "TIME"}], "F0012782.5.0": [], "F0012782.6.0": [{"dep": "56", "from": "54", "arg": "MEANS"}], "F0012782.10.0": [], "F0012782.11.0": [], "F0012782.8.0": [], "F0012782.7.0": [{"dep": "3", "from": "7", "arg": "COND"}, {"dep": "6", "from": "7", "arg": "ACT"}, {"dep": "8", "from": "7", "arg": "MANN"}]} \ No newline at end of file diff --git a/data/kres_example_json/F0019343.srl.json b/data/kres_example_json/F0019343.srl.json deleted file mode 100644 index b41da3e..0000000 --- a/data/kres_example_json/F0019343.srl.json +++ /dev/null @@ -1 +0,0 @@ -{"F0019343.130.0": [], "F0019343.87.0": [], "F0019343.73.0": [], "F0019343.41.0": [], "F0019343.29.0": [], "F0019343.43.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "PAT"}], "F0019343.12.0": [{"dep": "2", "from": "4", "arg": "GOAL"}, {"dep": "5", "from": "4", "arg": "ACT"}], "F0019343.31.0": [{"dep": "10", "from": "9", "arg": "RESLT"}, {"dep": "13", "from": "9", "arg": "LOC"}], "F0019343.86.0": [], "F0019343.100.0": [], "F0019343.77.0": [], "F0019343.75.0": [], "F0019343.35.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "PAT"}], "F0019343.37.0": [{"dep": "1", "from": "5", "arg": "ACT"}], "F0019343.6.0": [], "F0019343.79.0": [], "F0019343.109.5": [], "F0019343.21.0": [{"dep": "25", "from": "26", "arg": "ACT"}, {"dep": "28", "from": "26", "arg": "REG"}], "F0019343.74.0": [], "F0019343.131.2": [], "F0019343.20.0": [], "F0019343.102.0": [{"dep": "13", "from": "10", "arg": "LOC"}], "F0019343.103.0": [{"dep": "1", "from": "8", "arg": "ACT"}, {"dep": "6", "from": "8", "arg": "REG"}, {"dep": "9", "from": "8", "arg": "REG"}, {"dep": "37", "from": "38", "arg": "ACT"}, {"dep": "39", "from": "38", "arg": "TIME"}, {"dep": "40", "from": "38", "arg": "PAT"}], "F0019343.109.0": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "9", "from": "13", "arg": "TIME"}, {"dep": "11", "from": "13", "arg": "CAUSE"}, {"dep": "14", "from": "13", "arg": "PAT"}, {"dep": "20", "from": "18", "arg": "PAT"}, {"dep": "28", "from": "26", "arg": "MEANS"}], "F0019343.16.0": [], "F0019343.17.0": [{"dep": "6", "from": "7", "arg": "PAT"}, {"dep": "8", "from": "7", "arg": "MODAL"}, {"dep": "9", "from": "8", "arg": "ACT"}, {"dep": "12", "from": "8", "arg": "REG"}], "F0019343.8.0": [{"dep": "1", "from": "14", "arg": "ACT"}, {"dep": "4", "from": "14", "arg": "ORIG"}, {"dep": "10", "from": "14", "arg": "ORIG"}], "F0019343.67.0": [{"dep": "2", "from": "1", "arg": "PAT"}, {"dep": "4", "from": "1", "arg": "LOC"}, {"dep": "8", "from": "12", "arg": "GOAL"}, {"dep": "15", "from": "12", "arg": "GOAL"}], "F0019343.111.0": [{"dep": "5", "from": "6", "arg": "PAT"}, {"dep": "7", "from": "8", "arg": "ACT"}, {"dep": "8", "from": "6", "arg": "MODAL"}, {"dep": "9", "from": "8", "arg": "TIME"}, {"dep": "18", "from": "16", "arg": "LOC"}], "F0019343.95.0": [], "F0019343.106.1": [{"dep": "10", "from": "11", "arg": "ACT"}, {"dep": "12", "from": "11", "arg": "TIME"}, {"dep": "13", "from": "11", "arg": "PAT"}, {"dep": "24", "from": "22", "arg": "REG"}, {"dep": "29", "from": "22", "arg": "PAT"}], "F0019343.39.0": [], "F0019343.63.0": [], "F0019343.33.0": [{"dep": "22", "from": "20", "arg": "REG"}], "F0019343.105.1": [], "F0019343.114.0": [], "F0019343.58.0": [], "F0019343.118.0": [], "F0019343.101.1": [], "F0019343.131.1": [], "F0019343.129.1": [{"dep": "5", "from": "16", "arg": "COND"}, {"dep": "8", "from": "5", "arg": "ORIG"}, {"dep": "11", "from": "5", "arg": "REG"}, {"dep": "15", "from": "16", "arg": "ACT"}, {"dep": "17", "from": "16", "arg": "RESLT"}, {"dep": "18", "from": "16", "arg": "MWPRED"}, {"dep": "21", "from": "18", "arg": "LOC"}, {"dep": "23", "from": "24", "arg": "MANN"}, {"dep": "26", "from": "24", "arg": "REG"}], "F0019343.123.1": [{"dep": "5", "from": "8", "arg": "ACT"}, {"dep": "11", "from": "8", "arg": "ORIG"}], "F0019343.56.0": [], "F0019343.32.0": [], "F0019343.14.0": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "MWPRED"}, {"dep": "6", "from": "4", "arg": "TIME"}], "F0019343.61.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "ACT"}], "F0019343.51.0": [{"dep": "3", "from": "10", "arg": "ACT"}, {"dep": "9", "from": "10", "arg": "LOC"}, {"dep": "12", "from": "10", "arg": "ACT"}], "F0019343.10.0": [], "F0019343.76.0": [], "F0019343.9.0": [{"dep": "2", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "7", "arg": "PAT"}], "F0019343.126.0": [], "F0019343.109.3": [], "F0019343.40.0": [{"dep": "32", "from": "30", "arg": "REG"}], "F0019343.109.4": [], "F0019343.122.0": [], "F0019343.30.0": [{"dep": "8", "from": "6", "arg": "PAT"}, {"dep": "54", "from": "53", "arg": "ACT"}], "F0019343.28.0": [{"dep": "6", "from": "5", "arg": "ACT"}, {"dep": "23", "from": "21", "arg": "REG"}], "F0019343.107.0": [{"dep": "3", "from": "14", "arg": "CAUSE"}, {"dep": "10", "from": "14", "arg": "TIME"}, {"dep": "15", "from": "14", "arg": "TIME"}, {"dep": "21", "from": "22", "arg": "ACT"}, {"dep": "23", "from": "22", "arg": "FREQ"}, {"dep": "28", "from": "26", "arg": "REG"}, {"dep": "50", "from": "43", "arg": "MANN"}], "F0019343.46.0": [], "F0019343.105.0": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "8", "from": "7", "arg": "PAT"}], "F0019343.128.0": [], "F0019343.109.2": [], "F0019343.119.0": [{"dep": "10", "from": "11", "arg": "ACT"}, {"dep": "15", "from": "11", "arg": "PAT"}], "F0019343.5.0": [{"dep": "9", "from": "10", "arg": "PAT"}, {"dep": "11", "from": "10", "arg": "MODAL"}, {"dep": "12", "from": "11", "arg": "ACT"}, {"dep": "39", "from": "37", "arg": "LOC"}], "F0019343.47.0": [], "F0019343.125.0": [{"dep": "18", "from": "19", "arg": "DUR"}, {"dep": "23", "from": "21", "arg": "PAT"}, {"dep": "27", "from": "31", "arg": "LOC"}, {"dep": "34", "from": "31", "arg": "ACT"}, {"dep": "48", "from": "49", "arg": "MANN"}, {"dep": "53", "from": "57", "arg": "REC"}, {"dep": "55", "from": "57", "arg": "ACT"}, {"dep": "56", "from": "57", "arg": "MANN"}], "F0019343.36.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "ACT"}], "F0019343.55.0": [], "F0019343.109.7": [], "F0019343.106.0": [{"dep": "4", "from": "14", "arg": "PAT"}, {"dep": "15", "from": "14", "arg": "RESLT"}, {"dep": "16", "from": "14", "arg": "PAT"}], "F0019343.52.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "PAT"}], "F0019343.93.0": [], "F0019343.7.0": [{"dep": "3", "from": "1", "arg": "ACT"}, {"dep": "5", "from": "1", "arg": "PAT"}, {"dep": "8", "from": "9", "arg": "PAT"}, {"dep": "10", "from": "9", "arg": "MODAL"}, {"dep": "11", "from": "10", "arg": "ACT"}, {"dep": "14", "from": "10", "arg": "REG"}, {"dep": "38", "from": "36", "arg": "LOC"}], "F0019343.65.0": [], "F0019343.98.0": [], "F0019343.101.0": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "5", "from": "3", "arg": "PAT"}, {"dep": "16", "from": "14", "arg": "TIME"}, {"dep": "18", "from": "14", "arg": "REG"}, {"dep": "26", "from": "14", "arg": "LOC"}], "F0019343.42.0": [{"dep": "3", "from": "10", "arg": "ACT"}, {"dep": "9", "from": "10", "arg": "LOC"}, {"dep": "12", "from": "10", "arg": "QUANT"}], "F0019343.50.0": [], "F0019343.108.0": [], "F0019343.99.0": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "9", "from": "7", "arg": "PAT"}], "F0019343.116.0": [], "F0019343.15.0": [], "F0019343.68.0": [], "F0019343.57.0": [{"dep": "32", "from": "30", "arg": "REG"}], "F0019343.48.0": [], "F0019343.49.0": [{"dep": "32", "from": "30", "arg": "REG"}], "F0019343.131.0": [], "F0019343.97.0": [{"dep": "2", "from": "12", "arg": "PAT"}, {"dep": "14", "from": "12", "arg": "AIM"}], "F0019343.11.0": [], "F0019343.123.0": [{"dep": "2", "from": "4", "arg": "ACT"}, {"dep": "5", "from": "4", "arg": "PAT"}], "F0019343.24.0": [], "F0019343.110.0": [], "F0019343.82.0": [], "F0019343.38.0": [], "F0019343.127.0": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "MWPRED"}, {"dep": "6", "from": "4", "arg": "TIME"}, {"dep": "10", "from": "16", "arg": "TIME"}, {"dep": "15", "from": "16", "arg": "MANN"}], "F0019343.19.0": [], "F0019343.104.0": [], "F0019343.117.1": [], "F0019343.25.0": [{"dep": "9", "from": "6", "arg": "ACT"}], "F0019343.109.1": [], "F0019343.90.0": [], "F0019343.129.0": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "5", "from": "3", "arg": "PAT"}], "F0019343.64.0": [], "F0019343.54.0": [], "F0019343.34.0": [{"dep": "3", "from": "10", "arg": "ACT"}, {"dep": "9", "from": "10", "arg": "LOC"}, {"dep": "12", "from": "10", "arg": "QUANT"}], "F0019343.60.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "PAT"}], "F0019343.44.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "ACT"}], "F0019343.120.0": [], "F0019343.18.0": [], "F0019343.59.0": [{"dep": "3", "from": "10", "arg": "ACT"}, {"dep": "9", "from": "10", "arg": "LOC"}, {"dep": "12", "from": "10", "arg": "QUANT"}], "F0019343.113.0": [{"dep": "10", "from": "12", "arg": "ACT"}, {"dep": "15", "from": "12", "arg": "PAT"}, {"dep": "20", "from": "21", "arg": "PAT"}, {"dep": "24", "from": "25", "arg": "PAT"}, {"dep": "27", "from": "25", "arg": "LOC"}], "F0019343.115.0": [{"dep": "2", "from": "4", "arg": "ACT"}, {"dep": "7", "from": "4", "arg": "PAT"}, {"dep": "14", "from": "13", "arg": "ACT"}, {"dep": "15", "from": "13", "arg": "PAT"}, {"dep": "20", "from": "23", "arg": "LOC"}, {"dep": "24", "from": "23", "arg": "PAT"}], "F0019343.109.6": [], "F0019343.23.0": [], "F0019343.22.0": [], "F0019343.112.0": [], "F0019343.26.0": [{"dep": "15", "from": "25", "arg": "PAT"}, {"dep": "19", "from": "25", "arg": "TIME"}, {"dep": "21", "from": "25", "arg": "CAUSE"}, {"dep": "23", "from": "25", "arg": "TIME"}, {"dep": "26", "from": "25", "arg": "ACT"}, {"dep": "30", "from": "33", "arg": "PAT"}, {"dep": "32", "from": "33", "arg": "ACT"}, {"dep": "33", "from": "25", "arg": "TIME"}, {"dep": "35", "from": "33", "arg": "LOC"}], "F0019343.121.0": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "8", "from": "9", "arg": "MANN"}, {"dep": "10", "from": "9", "arg": "PAT"}, {"dep": "13", "from": "9", "arg": "TIME"}, {"dep": "22", "from": "21", "arg": "ACT"}, {"dep": "25", "from": "21", "arg": "LOC"}], "F0019343.124.0": [{"dep": "19", "from": "17", "arg": "REG"}, {"dep": "37", "from": "36", "arg": "PAT"}], "F0019343.117.0": [{"dep": "12", "from": "11", "arg": "RESLT"}, {"dep": "15", "from": "11", "arg": "LOC"}, {"dep": "22", "from": "23", "arg": "MANN"}, {"dep": "25", "from": "23", "arg": "CAUSE"}]} \ No newline at end of file diff --git a/data/kres_example_json/F0025741.srl.json b/data/kres_example_json/F0025741.srl.json deleted file mode 100644 index 7f7a30a..0000000 --- a/data/kres_example_json/F0025741.srl.json +++ /dev/null @@ -1 +0,0 @@ -{"F0025741.69.1": [], "F0025741.54.2": [{"dep": "12", "from": "13", "arg": "PAT"}, {"dep": "15", "from": "13", "arg": "ACT"}, {"dep": "28", "from": "33", "arg": "ACT"}, {"dep": "32", "from": "33", "arg": "PAT"}], "F0025741.34.0": [], "F0025741.56.2": [{"dep": "3", "from": "1", "arg": "ACT"}, {"dep": "8", "from": "9", "arg": "PAT"}, {"dep": "10", "from": "9", "arg": "ACT"}, {"dep": "14", "from": "15", "arg": "PAT"}, {"dep": "18", "from": "15", "arg": "DUR"}], "F0025741.52.0": [{"dep": "3", "from": "5", "arg": "DUR"}, {"dep": "4", "from": "5", "arg": "PAT"}, {"dep": "7", "from": "5", "arg": "RESLT"}], "F0025741.64.3": [{"dep": "1", "from": "3", "arg": "TIME"}, {"dep": "5", "from": "3", "arg": "PAT"}, {"dep": "11", "from": "13", "arg": "REC"}, {"dep": "14", "from": "13", "arg": "PAT"}, {"dep": "16", "from": "13", "arg": "FREQ"}], "F0025741.36.0": [], "F0025741.63.0": [], "F0025741.56.0": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "5", "from": "6", "arg": "MANN"}, {"dep": "7", "from": "6", "arg": "PAT"}, {"dep": "13", "from": "16", "arg": "TIME"}, {"dep": "15", "from": "16", "arg": "ORIG"}, {"dep": "18", "from": "16", "arg": "RESLT"}], "F0025741.53.0": [{"dep": "2", "from": "9", "arg": "ACT"}, {"dep": "11", "from": "12", "arg": "CAUSE"}, {"dep": "12", "from": "9", "arg": "MWPRED"}, {"dep": "14", "from": "12", "arg": "CAUSE"}], "F0025741.54.4": [{"dep": "4", "from": "5", "arg": "PAT"}, {"dep": "6", "from": "5", "arg": "MODAL"}, {"dep": "8", "from": "6", "arg": "MEANS"}], "F0025741.68.5": [{"dep": "2", "from": "4", "arg": "ACT"}, {"dep": "7", "from": "4", "arg": "TIME"}, {"dep": "9", "from": "4", "arg": "PAT"}, {"dep": "13", "from": "4", "arg": "LOC"}], "F0025741.53.1": [], "F0025741.66.3": [], "F0025741.73.3": [{"dep": "1", "from": "2", "arg": "ACT"}, {"dep": "10", "from": "16", "arg": "ACT"}, {"dep": "14", "from": "16", "arg": "PAT"}], "F0025741.64.2": [{"dep": "1", "from": "5", "arg": "ACT"}, {"dep": "4", "from": "5", "arg": "DUR"}, {"dep": "7", "from": "5", "arg": "PAT"}, {"dep": "9", "from": "11", "arg": "TIME"}, {"dep": "13", "from": "11", "arg": "RESLT"}, {"dep": "15", "from": "11", "arg": "PAT"}], "F0025741.52.3": [{"dep": "2", "from": "13", "arg": "TIME"}, {"dep": "4", "from": "13", "arg": "MEANS"}, {"dep": "8", "from": "13", "arg": "TIME"}, {"dep": "11", "from": "13", "arg": "TIME"}, {"dep": "12", "from": "13", "arg": "PAT"}, {"dep": "15", "from": "13", "arg": "ACT"}, {"dep": "16", "from": "13", "arg": "ACT"}, {"dep": "18", "from": "16", "arg": "CAUSE"}], "F0025741.77.7": [{"dep": "15", "from": "16", "arg": "MANN"}, {"dep": "17", "from": "16", "arg": "TIME"}], "F0025741.73.4": [], "F0025741.50.0": [], "F0025741.77.4": [], "F0025741.59.0": [], "F0025741.82.0": [], "F0025741.85.3": [{"dep": "11", "from": "9", "arg": "PAT"}], "F0025741.54.7": [{"dep": "5", "from": "9", "arg": "MANN"}, {"dep": "8", "from": "9", "arg": "DUR"}, {"dep": "12", "from": "9", "arg": "PAT"}, {"dep": "16", "from": "17", "arg": "TIME"}, {"dep": "17", "from": "15", "arg": "MODAL"}, {"dep": "20", "from": "17", "arg": "REC"}], "F0025741.85.0": [], "F0025741.58.0": [], "F0025741.52.4": [{"dep": "2", "from": "5", "arg": "LOC"}, {"dep": "6", "from": "5", "arg": "ACT"}, {"dep": "19", "from": "24", "arg": "PAT"}, {"dep": "23", "from": "24", "arg": "PAT"}, {"dep": "27", "from": "24", "arg": "PAT"}, {"dep": "37", "from": "35", "arg": "LOC"}], "F0025741.83.1": [{"dep": "6", "from": "9", "arg": "CAUSE"}, {"dep": "7", "from": "9", "arg": "PAT"}, {"dep": "13", "from": "9", "arg": "DUR"}], "F0025741.66.2": [{"dep": "5", "from": "1", "arg": "PAT"}, {"dep": "10", "from": "12", "arg": "PAT"}, {"dep": "11", "from": "12", "arg": "MANN"}, {"dep": "13", "from": "12", "arg": "ACT"}], "F0025741.77.0": [{"dep": "2", "from": "5", "arg": "TIME"}, {"dep": "4", "from": "5", "arg": "RESLT"}, {"dep": "10", "from": "7", "arg": "RESLT"}, {"dep": "15", "from": "13", "arg": "PAT"}], "F0025741.41.0": [], "F0025741.66.0": [], "F0025741.66.1": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "ACT"}], "F0025741.87.0": [{"dep": "9", "from": "12", "arg": "ACT"}, {"dep": "14", "from": "12", "arg": "PAT"}], "F0025741.81.0": [], "F0025741.90.0": [{"dep": "5", "from": "6", "arg": "PAT"}, {"dep": "8", "from": "6", "arg": "GOAL"}, {"dep": "18", "from": "17", "arg": "PAT"}, {"dep": "24", "from": "23", "arg": "PAT"}, {"dep": "32", "from": "34", "arg": "PAT"}, {"dep": "48", "from": "45", "arg": "PAT"}], "F0025741.56.4": [], "F0025741.65.0": [], "F0025741.77.2": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "6", "from": "3", "arg": "PAT"}], "F0025741.56.1": [], "F0025741.63.2": [{"dep": "2", "from": "4", "arg": "REG"}], "F0025741.87.1": [{"dep": "2", "from": "5", "arg": "GOAL"}, {"dep": "4", "from": "5", "arg": "MANN"}, {"dep": "7", "from": "5", "arg": "PAT"}], "F0025741.79.0": [], "F0025741.77.3": [{"dep": "1", "from": "7", "arg": "ACT"}, {"dep": "9", "from": "7", "arg": "SOURCE"}, {"dep": "12", "from": "13", "arg": "PAT"}, {"dep": "14", "from": "13", "arg": "ACT"}], "F0025741.68.6": [{"dep": "3", "from": "9", "arg": "ACT"}, {"dep": "11", "from": "9", "arg": "TIME"}, {"dep": "14", "from": "9", "arg": "ACT"}, {"dep": "27", "from": "28", "arg": "REC"}], "F0025741.85.4": [{"dep": "10", "from": "11", "arg": "TIME"}, {"dep": "13", "from": "11", "arg": "PAT"}], "F0025741.64.0": [], "F0025741.90.2": [{"dep": "2", "from": "3", "arg": "REG"}, {"dep": "5", "from": "3", "arg": "PAT"}, {"dep": "11", "from": "13", "arg": "MANN"}, {"dep": "15", "from": "18", "arg": "MANN"}, {"dep": "17", "from": "18", "arg": "MEANS"}, {"dep": "19", "from": "18", "arg": "DUR"}], "F0025741.55.0": [], "F0025741.64.4": [{"dep": "9", "from": "8", "arg": "ACT"}, {"dep": "17", "from": "16", "arg": "PAT"}, {"dep": "22", "from": "21", "arg": "PAT"}, {"dep": "26", "from": "27", "arg": "ACT"}, {"dep": "28", "from": "27", "arg": "PAT"}], "F0025741.42.0": [], "F0025741.73.2": [], "F0025741.38.0": [{"dep": "12", "from": "13", "arg": "DUR"}, {"dep": "15", "from": "13", "arg": "PAT"}], "F0025741.40.0": [{"dep": "5", "from": "6", "arg": "MANN"}, {"dep": "7", "from": "6", "arg": "ACT"}], "F0025741.44.0": [{"dep": "2", "from": "1", "arg": "ACT"}], "F0025741.85.1": [], "F0025741.71.0": [], "F0025741.52.1": [{"dep": "8", "from": "13", "arg": "GOAL"}, {"dep": "12", "from": "13", "arg": "GOAL"}, {"dep": "14", "from": "13", "arg": "PAT"}, {"dep": "22", "from": "23", "arg": "LOC"}, {"dep": "25", "from": "23", "arg": "PAT"}], "F0025741.68.0": [{"dep": "2", "from": "10", "arg": "PAT"}, {"dep": "4", "from": "10", "arg": "ACT"}, {"dep": "13", "from": "10", "arg": "PAT"}, {"dep": "21", "from": "22", "arg": "ACT"}, {"dep": "23", "from": "22", "arg": "RESLT"}], "F0025741.90.1": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "MODAL"}, {"dep": "5", "from": "4", "arg": "RESLT"}], "F0025741.45.0": [], "F0025741.83.0": [{"dep": "12", "from": "8", "arg": "DUR"}], "F0025741.77.5": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "5", "from": "3", "arg": "PAT"}], "F0025741.39.0": [], "F0025741.64.1": [{"dep": "2", "from": "7", "arg": "DUR"}, {"dep": "4", "from": "7", "arg": "GOAL"}, {"dep": "10", "from": "7", "arg": "LOC"}], "F0025741.77.6": [{"dep": "7", "from": "9", "arg": "FREQ"}, {"dep": "11", "from": "9", "arg": "MANN"}], "F0025741.69.0": [{"dep": "3", "from": "10", "arg": "ACT"}], "F0025741.74.0": [], "F0025741.60.0": [], "F0025741.86.0": [], "F0025741.43.0": [{"dep": "7", "from": "21", "arg": "ACT"}, {"dep": "20", "from": "21", "arg": "MANN"}, {"dep": "22", "from": "21", "arg": "PAT"}, {"dep": "32", "from": "30", "arg": "LOC"}, {"dep": "36", "from": "30", "arg": "ACT"}], "F0025741.71.1": [], "F0025741.49.0": [], "F0025741.37.0": [], "F0025741.76.0": [{"dep": "3", "from": "12", "arg": "ACT"}, {"dep": "11", "from": "12", "arg": "REG"}, {"dep": "13", "from": "12", "arg": "PAT"}], "F0025741.54.3": [{"dep": "3", "from": "10", "arg": "COND"}, {"dep": "8", "from": "10", "arg": "ACT"}, {"dep": "14", "from": "10", "arg": "REC"}], "F0025741.52.2": [], "F0025741.68.4": [{"dep": "3", "from": "11", "arg": "TIME"}, {"dep": "7", "from": "11", "arg": "ACT"}, {"dep": "10", "from": "11", "arg": "TIME"}, {"dep": "14", "from": "11", "arg": "CAUSE"}], "F0025741.68.3": [{"dep": "1", "from": "3", "arg": "MANN"}, {"dep": "4", "from": "8", "arg": "ACT"}, {"dep": "6", "from": "8", "arg": "TIME"}, {"dep": "8", "from": "3", "arg": "MODAL"}, {"dep": "9", "from": "8", "arg": "PAT"}, {"dep": "19", "from": "21", "arg": "GOAL"}, {"dep": "22", "from": "21", "arg": "MANN"}, {"dep": "23", "from": "21", "arg": "PAT"}], "F0025741.63.1": [{"dep": "6", "from": "9", "arg": "PAT"}, {"dep": "8", "from": "9", "arg": "PAT"}, {"dep": "14", "from": "12", "arg": "PAT"}], "F0025741.68.2": [{"dep": "23", "from": "31", "arg": "PAT"}, {"dep": "27", "from": "31", "arg": "TIME"}, {"dep": "29", "from": "31", "arg": "ACT"}], "F0025741.54.0": [{"dep": "2", "from": "4", "arg": "LOC"}, {"dep": "5", "from": "4", "arg": "MODAL"}, {"dep": "6", "from": "5", "arg": "PAT"}, {"dep": "17", "from": "20", "arg": "PAT"}, {"dep": "19", "from": "20", "arg": "MANN"}, {"dep": "22", "from": "20", "arg": "GOAL"}, {"dep": "38", "from": "36", "arg": "PAT"}, {"dep": "45", "from": "44", "arg": "PAT"}], "F0025741.80.0": [], "F0025741.78.0": [{"dep": "3", "from": "5", "arg": "CAUSE"}, {"dep": "12", "from": "18", "arg": "PAT"}, {"dep": "14", "from": "18", "arg": "EVENT"}, {"dep": "15", "from": "18", "arg": "MANN"}, {"dep": "17", "from": "18", "arg": "MANN"}], "F0025741.48.0": [], "F0025741.54.5": [{"dep": "3", "from": "4", "arg": "LOC"}, {"dep": "13", "from": "12", "arg": "MODAL"}, {"dep": "15", "from": "13", "arg": "PAT"}], "F0025741.72.0": [], "F0025741.77.1": [], "F0025741.61.0": [{"dep": "5", "from": "8", "arg": "MANN"}, {"dep": "10", "from": "8", "arg": "PAT"}], "F0025741.68.7": [{"dep": "1", "from": "10", "arg": "ACT"}, {"dep": "7", "from": "10", "arg": "TIME"}], "F0025741.46.0": [], "F0025741.66.4": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "ACT"}, {"dep": "7", "from": "11", "arg": "PAT"}, {"dep": "10", "from": "11", "arg": "LOC"}], "F0025741.66.8": [{"dep": "4", "from": "5", "arg": "TIME"}, {"dep": "6", "from": "5", "arg": "ACT"}], "F0025741.35.0": [], "F0025741.75.0": [], "F0025741.73.1": [{"dep": "2", "from": "7", "arg": "LOC"}, {"dep": "5", "from": "7", "arg": "ACT"}, {"dep": "6", "from": "7", "arg": "MANN"}, {"dep": "10", "from": "7", "arg": "PAT"}], "F0025741.88.0": [], "F0025741.57.0": [{"dep": "3", "from": "4", "arg": "PAT"}, {"dep": "8", "from": "4", "arg": "QUANT"}], "F0025741.83.2": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "5", "from": "3", "arg": "LOC"}], "F0025741.54.6": [{"dep": "1", "from": "2", "arg": "ACT"}, {"dep": "3", "from": "2", "arg": "LOC"}], "F0025741.84.0": [], "F0025741.33.0": [], "F0025741.89.3": [{"dep": "7", "from": "5", "arg": "PAT"}, {"dep": "16", "from": "12", "arg": "PAT"}, {"dep": "21", "from": "19", "arg": "PAT"}], "F0025741.89.1": [{"dep": "2", "from": "1", "arg": "MODAL"}, {"dep": "4", "from": "2", "arg": "RESLT"}, {"dep": "11", "from": "10", "arg": "MODAL"}, {"dep": "12", "from": "11", "arg": "PAT"}, {"dep": "14", "from": "18", "arg": "ACT"}, {"dep": "16", "from": "14", "arg": "PAT"}], "F0025741.54.1": [{"dep": "29", "from": "31", "arg": "RESLT"}, {"dep": "30", "from": "31", "arg": "RESLT"}, {"dep": "32", "from": "31", "arg": "PAT"}, {"dep": "44", "from": "31", "arg": "PAT"}], "F0025741.89.2": [{"dep": "2", "from": "7", "arg": "MEANS"}, {"dep": "8", "from": "7", "arg": "MODAL"}], "F0025741.66.5": [{"dep": "18", "from": "20", "arg": "ACT"}, {"dep": "19", "from": "20", "arg": "MANN"}, {"dep": "23", "from": "20", "arg": "ORIG"}], "F0025741.67.0": [], "F0025741.89.0": [{"dep": "3", "from": "1", "arg": "PAT"}, {"dep": "7", "from": "6", "arg": "ACT"}, {"dep": "12", "from": "10", "arg": "PAT"}], "F0025741.70.0": [{"dep": "2", "from": "4", "arg": "MANN"}, {"dep": "5", "from": "4", "arg": "GOAL"}], "F0025741.47.0": [{"dep": "2", "from": "5", "arg": "TIME"}, {"dep": "4", "from": "5", "arg": "PAT"}, {"dep": "7", "from": "5", "arg": "QUANT"}], "F0025741.78.1": [{"dep": "3", "from": "7", "arg": "COND"}, {"dep": "6", "from": "7", "arg": "LOC"}, {"dep": "9", "from": "7", "arg": "PAT"}, {"dep": "18", "from": "21", "arg": "PAT"}, {"dep": "19", "from": "21", "arg": "FREQ"}, {"dep": "20", "from": "21", "arg": "MANN"}], "F0025741.85.2": [{"dep": "3", "from": "1", "arg": "MANN"}, {"dep": "6", "from": "1", "arg": "LOC"}, {"dep": "10", "from": "1", "arg": "PAT"}], "F0025741.66.6": [{"dep": "3", "from": "6", "arg": "ACT"}, {"dep": "9", "from": "6", "arg": "PAT"}, {"dep": "22", "from": "27", "arg": "ACT"}, {"dep": "24", "from": "27", "arg": "TIME"}, {"dep": "28", "from": "27", "arg": "PAT"}], "F0025741.73.0": [{"dep": "2", "from": "3", "arg": "ACT"}, {"dep": "6", "from": "3", "arg": "PAT"}, {"dep": "9", "from": "3", "arg": "LOC"}], "F0025741.68.1": [{"dep": "1", "from": "6", "arg": "ACT"}, {"dep": "8", "from": "6", "arg": "PAT"}], "F0025741.62.0": [{"dep": "2", "from": "4", "arg": "REG"}, {"dep": "5", "from": "4", "arg": "RESLT"}, {"dep": "7", "from": "4", "arg": "ACT"}], "F0025741.66.7": [{"dep": "31", "from": "29", "arg": "MANN"}, {"dep": "35", "from": "33", "arg": "ACT"}], "F0025741.56.3": [{"dep": "11", "from": "16", "arg": "TIME"}, {"dep": "14", "from": "16", "arg": "REG"}, {"dep": "15", "from": "16", "arg": "MANN"}, {"dep": "17", "from": "16", "arg": "ACT"}, {"dep": "24", "from": "25", "arg": "MANN"}, {"dep": "26", "from": "25", "arg": "PAT"}]} \ No newline at end of file diff --git a/data/kres_example_json/F0032377.srl.json b/data/kres_example_json/F0032377.srl.json deleted file mode 100644 index 50f3ec1..0000000 --- a/data/kres_example_json/F0032377.srl.json +++ /dev/null @@ -1 +0,0 @@ -{"F0032377.38.0": [], "F0032377.32.4": [], "F0032377.29.0": [], "F0032377.33.0": [{"dep": "10", "from": "11", "arg": "GOAL"}, {"dep": "12", "from": "11", "arg": "ACT"}], "F0032377.25.0": [{"dep": "5", "from": "4", "arg": "PAT"}, {"dep": "11", "from": "13", "arg": "REC"}, {"dep": "14", "from": "13", "arg": "PAT"}, {"dep": "16", "from": "13", "arg": "LOC"}, {"dep": "19", "from": "23", "arg": "LOC"}, {"dep": "21", "from": "23", "arg": "TIME"}, {"dep": "22", "from": "23", "arg": "PAT"}, {"dep": "24", "from": "23", "arg": "ACT"}], "F0032377.32.0": [], "F0032377.35.1": [{"dep": "1", "from": "8", "arg": "ACT"}, {"dep": "7", "from": "8", "arg": "CAUSE"}, {"dep": "9", "from": "8", "arg": "PAT"}, {"dep": "12", "from": "13", "arg": "TIME"}], "F0032377.33.1": [{"dep": "2", "from": "3", "arg": "LOC"}, {"dep": "9", "from": "10", "arg": "ACT"}, {"dep": "13", "from": "10", "arg": "REG"}, {"dep": "17", "from": "19", "arg": "ACT"}, {"dep": "21", "from": "19", "arg": "GOAL"}, {"dep": "27", "from": "26", "arg": "PAT"}, {"dep": "33", "from": "34", "arg": "MANN"}, {"dep": "36", "from": "34", "arg": "PAT"}], "F0032377.32.5": [{"dep": "6", "from": "5", "arg": "MANN"}, {"dep": "13", "from": "16", "arg": "PAT"}, {"dep": "15", "from": "16", "arg": "TIME"}, {"dep": "18", "from": "16", "arg": "RESLT"}], "F0032377.33.2": [{"dep": "3", "from": "1", "arg": "PAT"}, {"dep": "8", "from": "5", "arg": "PAT"}], "F0032377.26.3": [{"dep": "1", "from": "3", "arg": "TIME"}, {"dep": "9", "from": "8", "arg": "ACT"}, {"dep": "13", "from": "14", "arg": "TIME"}, {"dep": "17", "from": "18", "arg": "LOC"}, {"dep": "18", "from": "14", "arg": "PAT"}], "F0032377.37.0": [], "F0032377.25.2": [{"dep": "1", "from": "3", "arg": "REC"}, {"dep": "6", "from": "9", "arg": "PAT"}, {"dep": "8", "from": "9", "arg": "ACT"}, {"dep": "11", "from": "9", "arg": "LOC"}, {"dep": "18", "from": "15", "arg": "RESLT"}], "F0032377.36.0": [], "F0032377.32.1": [{"dep": "1", "from": "10", "arg": "ACT"}, {"dep": "5", "from": "10", "arg": "MANN"}, {"dep": "12", "from": "10", "arg": "GOAL"}], "F0032377.26.1": [{"dep": "1", "from": "4", "arg": "PAT"}, {"dep": "3", "from": "4", "arg": "TIME"}, {"dep": "6", "from": "4", "arg": "GOAL"}, {"dep": "9", "from": "13", "arg": "TIME"}, {"dep": "12", "from": "13", "arg": "DUR"}, {"dep": "15", "from": "13", "arg": "PAT"}, {"dep": "20", "from": "18", "arg": "PAT"}], "F0032377.24.0": [{"dep": "1", "from": "2", "arg": "MANN"}, {"dep": "4", "from": "2", "arg": "ACT"}], "F0032377.31.0": [], "F0032377.30.0": [], "F0032377.23.0": [], "F0032377.32.3": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "4", "from": "3", "arg": "PAT"}, {"dep": "8", "from": "9", "arg": "PAT"}, {"dep": "11", "from": "9", "arg": "PAT"}], "F0032377.26.0": [{"dep": "1", "from": "4", "arg": "ACT"}, {"dep": "10", "from": "11", "arg": "QUANT"}, {"dep": "13", "from": "11", "arg": "PAT"}, {"dep": "18", "from": "19", "arg": "TIME"}], "F0032377.27.0": [], "F0032377.34.0": [], "F0032377.28.0": [{"dep": "4", "from": "3", "arg": "PAT"}], "F0032377.32.2": [{"dep": "1", "from": "5", "arg": "TIME"}, {"dep": "2", "from": "5", "arg": "REC"}, {"dep": "3", "from": "5", "arg": "PAT"}, {"dep": "7", "from": "5", "arg": "ACT"}, {"dep": "11", "from": "12", "arg": "REC"}, {"dep": "13", "from": "12", "arg": "ACT"}], "F0032377.26.2": [{"dep": "2", "from": "4", "arg": "ACT"}, {"dep": "5", "from": "4", "arg": "ACT"}], "F0032377.35.2": [{"dep": "8", "from": "7", "arg": "ACT"}, {"dep": "12", "from": "14", "arg": "REC"}, {"dep": "13", "from": "14", "arg": "MANN"}, {"dep": "15", "from": "14", "arg": "PAT"}], "F0032377.35.0": [{"dep": "9", "from": "12", "arg": "LOC"}, {"dep": "15", "from": "12", "arg": "PAT"}, {"dep": "20", "from": "21", "arg": "TIME"}, {"dep": "23", "from": "21", "arg": "GOAL"}], "F0032377.25.1": []} \ No newline at end of file diff --git a/data/kres_example_json/F0039402.srl.json b/data/kres_example_json/F0039402.srl.json deleted file mode 100644 index 6cde334..0000000 --- a/data/kres_example_json/F0039402.srl.json +++ /dev/null @@ -1 +0,0 @@ -{"F0039402.11.0": [{"dep": "1", "from": "5", "arg": "ACT"}, {"dep": "7", "from": "5", "arg": "TIME"}], "F0039402.10.0": [{"dep": "2", "from": "8", "arg": "ACT"}, {"dep": "11", "from": "8", "arg": "PAT"}, {"dep": "16", "from": "14", "arg": "RESLT"}], "F0039402.12.1": [{"dep": "1", "from": "9", "arg": "ACT"}, {"dep": "5", "from": "9", "arg": "TIME"}, {"dep": "8", "from": "9", "arg": "PAT"}, {"dep": "11", "from": "9", "arg": "PAT"}], "F0039402.15.0": [{"dep": "2", "from": "10", "arg": "MANN"}, {"dep": "7", "from": "10", "arg": "TIME"}, {"dep": "11", "from": "10", "arg": "ACT"}, {"dep": "13", "from": "10", "arg": "RESLT"}, {"dep": "20", "from": "22", "arg": "PAT"}, {"dep": "24", "from": "22", "arg": "TIME"}], "F0039402.14.2": [{"dep": "9", "from": "17", "arg": "ACT"}, {"dep": "13", "from": "17", "arg": "PAT"}, {"dep": "16", "from": "17", "arg": "LOC"}, {"dep": "20", "from": "17", "arg": "LOC"}, {"dep": "32", "from": "31", "arg": "ACT"}], "F0039402.7.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "6", "from": "5", "arg": "ACT"}, {"dep": "22", "from": "24", "arg": "TIME"}, {"dep": "25", "from": "24", "arg": "PAT"}], "F0039402.5.2": [{"dep": "1", "from": "3", "arg": "ACT"}, {"dep": "6", "from": "3", "arg": "ACT"}, {"dep": "16", "from": "25", "arg": "ACT"}, {"dep": "20", "from": "25", "arg": "COND"}, {"dep": "27", "from": "25", "arg": "PAT"}], "F0039402.15.2": [], "F0039402.8.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "11", "from": "9", "arg": "MEANS"}, {"dep": "31", "from": "33", "arg": "PAT"}, {"dep": "34", "from": "33", "arg": "ACT"}], "F0039402.15.1": [{"dep": "7", "from": "8", "arg": "MANN"}, {"dep": "29", "from": "30", "arg": "ACT"}, {"dep": "33", "from": "30", "arg": "LOC"}], "F0039402.14.1": [], "F0039402.12.2": [{"dep": "1", "from": "8", "arg": "ACT"}, {"dep": "7", "from": "8", "arg": "GOAL"}, {"dep": "10", "from": "8", "arg": "PAT"}, {"dep": "15", "from": "18", "arg": "ACT"}, {"dep": "19", "from": "18", "arg": "MODAL"}, {"dep": "20", "from": "19", "arg": "PAT"}, {"dep": "33", "from": "37", "arg": "CAUSE"}, {"dep": "38", "from": "37", "arg": "ACT"}], "F0039402.6.0": [{"dep": "3", "from": "5", "arg": "ACT"}, {"dep": "10", "from": "7", "arg": "PAT"}], "F0039402.5.1": [], "F0039402.14.0": [{"dep": "1", "from": "8", "arg": "ACT"}, {"dep": "4", "from": "8", "arg": "PAT"}, {"dep": "12", "from": "14", "arg": "ACT"}, {"dep": "15", "from": "14", "arg": "PAT"}], "F0039402.12.0": [], "F0039402.5.0": [], "F0039402.13.1": [{"dep": "13", "from": "16", "arg": "ACT"}, {"dep": "19", "from": "26", "arg": "PAT"}, {"dep": "25", "from": "26", "arg": "TIME"}, {"dep": "31", "from": "32", "arg": "ACT"}, {"dep": "35", "from": "32", "arg": "PAT"}], "F0039402.9.0": [{"dep": "15", "from": "14", "arg": "ACT"}, {"dep": "18", "from": "14", "arg": "TIME"}], "F0039402.13.0": [{"dep": "2", "from": "8", "arg": "EVENT"}, {"dep": "4", "from": "8", "arg": "ACT"}, {"dep": "6", "from": "8", "arg": "REC"}, {"dep": "7", "from": "8", "arg": "MANN"}, {"dep": "10", "from": "8", "arg": "PAT"}, {"dep": "17", "from": "18", "arg": "MANN"}, {"dep": "19", "from": "18", "arg": "PAT"}, {"dep": "26", "from": "24", "arg": "PAT"}, {"dep": "28", "from": "30", "arg": "MANN"}, {"dep": "31", "from": "30", "arg": "PAT"}]} \ No newline at end of file diff --git a/tools/fillpred_model/step1.py b/tools/fillpred_model/step1.py index 4808eb9..45efc8a 100644 --- a/tools/fillpred_model/step1.py +++ b/tools/fillpred_model/step1.py @@ -51,4 +51,4 @@ if __name__ == "__main__": print(i, df.shape) print(ndf.head()) - ndf.to_pickle(OUTFILE) + ndf.to_pickle(Path(OUTFILE)) diff --git a/tools/fillpred_model/step2.py b/tools/fillpred_model/step2.py index dcd1737..264f849 100644 --- a/tools/fillpred_model/step2.py +++ b/tools/fillpred_model/step2.py @@ -27,4 +27,6 @@ if __name__ == "__main__": clf_full = DecisionTreeClassifier() clf_full.fit(X, y) - pickle.dump(clf_full, open(OUTFILE, "wb")) + with open(OUTFILE, "wb") as fp: + pickle.dump(clf_full, fp) + diff --git a/tools/gen_json.py b/tools/gen_json.py index b8ba508..e966087 100644 --- a/tools/gen_json.py +++ b/tools/gen_json.py @@ -1,6 +1,8 @@ from pathlib import Path from parser.parser import Parser import configparser +import json +import sys # defaults ORIGPATH = Path("../data/kres_example") # we need the IDs @@ -14,7 +16,7 @@ config.read("tools.cfg") ORIGPATH = Path(config["tools"]["kres_orig"]) INPATH = Path(config["tools"]["kres_srl"]) OUTPATH = Path(config["tools"]["kres_json"]) -DEBUG = bool(config["tools"]["debug"]) +DEBUG = config["tools"]["debug"] == "True" def get_origfile(filename): for origfile in ORIGPATH.iterdir(): @@ -63,36 +65,36 @@ for infile in [x for x in INPATH.iterdir() if x.is_file()]: origfile = get_origfile(infile) orig_dict = par.parse_tei(origfile) - fp = infile.open("rb") - outdata = {} - for sentence_arr in extract_sentences(fp.readlines()): - # tsv dropped sentence ids, match the ID, using original data - sid = match_sentence_id(to_sentence(sentence_arr), orig_dict) + with infile.open("rb") as fp: + outdata = {} + for sentence_arr in extract_sentences(fp.readlines()): + # tsv dropped sentence ids, match the ID, using original data + sid = match_sentence_id(to_sentence(sentence_arr), orig_dict) - outdata[sid] = [] + outdata[sid] = [] - # find all predicate indices in the sentence - predicates = [] - for token in sentence_arr: - if token[12] == "Y": - predicates += [token[0]] # idx + # find all predicate indices in the sentence + predicates = [] + for token in sentence_arr: + if token[12] == "Y": + predicates += [token[0]] # idx - deprel = get_dep_rel(token) - if deprel is not None: - outdata[sid].append(deprel) + deprel = get_dep_rel(token) + if deprel is not None: + outdata[sid].append(deprel) - # deprel["from"] points to n-th predicate - # replace with predicate's token index - for deprel in outdata[sid]: - deprel["from"] = predicates[deprel["from"]] + # deprel["from"] points to n-th predicate + # replace with predicate's token index + for deprel in outdata[sid]: + deprel["from"] = predicates[deprel["from"]] - if DEBUG: - print(to_sentence(sentence_arr)) - print(outdata[sid]) - print(sid) - print() - print() + if DEBUG: + print(to_sentence(sentence_arr)) + print(outdata[sid]) + print(sid) + print() + print() outfile = (OUTPATH / infile.name).with_suffix(".json") - # print(outdata) - json.dump(outdata, outfile.open("w")) + with outfile.open("w") as fp: + json.dump(outdata, fp) diff --git a/tools/parse_all.py b/tools/parse_all.py index 1300afa..4039701 100644 --- a/tools/parse_all.py +++ b/tools/parse_all.py @@ -36,14 +36,11 @@ for kres_file in [x for x in INDIR.iterdir() if x.is_file()]: print("Processing file: " + str(kres_file)) res_dict = par.parse_tei(kres_file) - longest_sent = max([len(e["tokens"]) for k, e in res_dict.items()]) - print("Longest sentence: ", longest_sent) kres_out_str = "" for _, sentence in res_dict.items(): - kres_out_str += par.to_conll_2009_SRL(sentence, longest_sent) + kres_out_str += par.to_conll_2009_SRL(sentence) with (OUTDIR / kres_file.name).with_suffix(".tsv").open("wb+") as fp: fp.write(kres_out_str.encode("utf-8")) - fp.close() print("end parsing kres") diff --git a/tools/tools.cfg b/tools/tools.cfg index f2ed069..2cc3fcd 100644 --- a/tools/tools.cfg +++ b/tools/tools.cfg @@ -2,5 +2,5 @@ kres_orig = ../data/kres_example kres_tsv = ../data/kres_example_tsv kres_srl = ../data/kres_example_srl -kres_json = ../data/kres/example_json +kres_json = ../data/kres_example_json debug = False \ No newline at end of file