forked from kristjan/cjvt-srl-tagging
fixed paths
This commit is contained in:
@@ -51,4 +51,4 @@ if __name__ == "__main__":
|
||||
print(i, df.shape)
|
||||
|
||||
print(ndf.head())
|
||||
ndf.to_pickle(OUTFILE)
|
||||
ndf.to_pickle(Path(OUTFILE))
|
||||
|
||||
@@ -27,4 +27,6 @@ if __name__ == "__main__":
|
||||
clf_full = DecisionTreeClassifier()
|
||||
clf_full.fit(X, y)
|
||||
|
||||
pickle.dump(clf_full, open(OUTFILE, "wb"))
|
||||
with open(OUTFILE, "wb") as fp:
|
||||
pickle.dump(clf_full, fp)
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from pathlib import Path
|
||||
from parser.parser import Parser
|
||||
import configparser
|
||||
import json
|
||||
import sys
|
||||
|
||||
# defaults
|
||||
ORIGPATH = Path("../data/kres_example") # we need the IDs
|
||||
@@ -14,7 +16,7 @@ config.read("tools.cfg")
|
||||
ORIGPATH = Path(config["tools"]["kres_orig"])
|
||||
INPATH = Path(config["tools"]["kres_srl"])
|
||||
OUTPATH = Path(config["tools"]["kres_json"])
|
||||
DEBUG = bool(config["tools"]["debug"])
|
||||
DEBUG = config["tools"]["debug"] == "True"
|
||||
|
||||
def get_origfile(filename):
|
||||
for origfile in ORIGPATH.iterdir():
|
||||
@@ -63,36 +65,36 @@ for infile in [x for x in INPATH.iterdir() if x.is_file()]:
|
||||
origfile = get_origfile(infile)
|
||||
orig_dict = par.parse_tei(origfile)
|
||||
|
||||
fp = infile.open("rb")
|
||||
outdata = {}
|
||||
for sentence_arr in extract_sentences(fp.readlines()):
|
||||
# tsv dropped sentence ids, match the ID, using original data
|
||||
sid = match_sentence_id(to_sentence(sentence_arr), orig_dict)
|
||||
with infile.open("rb") as fp:
|
||||
outdata = {}
|
||||
for sentence_arr in extract_sentences(fp.readlines()):
|
||||
# tsv dropped sentence ids, match the ID, using original data
|
||||
sid = match_sentence_id(to_sentence(sentence_arr), orig_dict)
|
||||
|
||||
outdata[sid] = []
|
||||
outdata[sid] = []
|
||||
|
||||
# find all predicate indices in the sentence
|
||||
predicates = []
|
||||
for token in sentence_arr:
|
||||
if token[12] == "Y":
|
||||
predicates += [token[0]] # idx
|
||||
# find all predicate indices in the sentence
|
||||
predicates = []
|
||||
for token in sentence_arr:
|
||||
if token[12] == "Y":
|
||||
predicates += [token[0]] # idx
|
||||
|
||||
deprel = get_dep_rel(token)
|
||||
if deprel is not None:
|
||||
outdata[sid].append(deprel)
|
||||
deprel = get_dep_rel(token)
|
||||
if deprel is not None:
|
||||
outdata[sid].append(deprel)
|
||||
|
||||
# deprel["from"] points to n-th predicate
|
||||
# replace with predicate's token index
|
||||
for deprel in outdata[sid]:
|
||||
deprel["from"] = predicates[deprel["from"]]
|
||||
# deprel["from"] points to n-th predicate
|
||||
# replace with predicate's token index
|
||||
for deprel in outdata[sid]:
|
||||
deprel["from"] = predicates[deprel["from"]]
|
||||
|
||||
if DEBUG:
|
||||
print(to_sentence(sentence_arr))
|
||||
print(outdata[sid])
|
||||
print(sid)
|
||||
print()
|
||||
print()
|
||||
if DEBUG:
|
||||
print(to_sentence(sentence_arr))
|
||||
print(outdata[sid])
|
||||
print(sid)
|
||||
print()
|
||||
print()
|
||||
|
||||
outfile = (OUTPATH / infile.name).with_suffix(".json")
|
||||
# print(outdata)
|
||||
json.dump(outdata, outfile.open("w"))
|
||||
with outfile.open("w") as fp:
|
||||
json.dump(outdata, fp)
|
||||
|
||||
@@ -36,14 +36,11 @@ for kres_file in [x for x in INDIR.iterdir() if x.is_file()]:
|
||||
|
||||
print("Processing file: " + str(kres_file))
|
||||
res_dict = par.parse_tei(kres_file)
|
||||
longest_sent = max([len(e["tokens"]) for k, e in res_dict.items()])
|
||||
print("Longest sentence: ", longest_sent)
|
||||
kres_out_str = ""
|
||||
|
||||
for _, sentence in res_dict.items():
|
||||
kres_out_str += par.to_conll_2009_SRL(sentence, longest_sent)
|
||||
kres_out_str += par.to_conll_2009_SRL(sentence)
|
||||
|
||||
with (OUTDIR / kres_file.name).with_suffix(".tsv").open("wb+") as fp:
|
||||
fp.write(kres_out_str.encode("utf-8"))
|
||||
fp.close()
|
||||
print("end parsing kres")
|
||||
|
||||
@@ -2,5 +2,5 @@
|
||||
kres_orig = ../data/kres_example
|
||||
kres_tsv = ../data/kres_example_tsv
|
||||
kres_srl = ../data/kres_example_srl
|
||||
kres_json = ../data/kres/example_json
|
||||
kres_json = ../data/kres_example_json
|
||||
debug = False
|
||||
Reference in New Issue
Block a user