added formatter for mate-tools parse_full.sh
This commit is contained in:
parent
b3a39d74ef
commit
f9f7fae76a
|
@ -17,7 +17,11 @@ if __name__ == "__main__":
|
|||
# kres_file = "../data/kres_example/F0019343.xml.parsed.xml"
|
||||
kres_dir = "../data/kres_example/"
|
||||
for kres_file in os.listdir(kres_dir):
|
||||
out_file = ""
|
||||
res_dict = parser.parse_tei(join(kres_dir, kres_file))
|
||||
for _, sentence in res_dict.items():
|
||||
parser.to_conll09(sentence)
|
||||
out_file += parser.to_conll_2009_full(sentence)
|
||||
with open(join(kres_dir, kres_file + ".tsv"), "wb+") as fp:
|
||||
fp.write(out_file.encode("utf-8"))
|
||||
fp.close()
|
||||
print("end parsing kres")
|
||||
|
|
BIN
tools/parser/__pycache__/__init__.cpython-37.pyc
Normal file
BIN
tools/parser/__pycache__/__init__.cpython-37.pyc
Normal file
Binary file not shown.
BIN
tools/parser/__pycache__/msdmap.cpython-37.pyc
Normal file
BIN
tools/parser/__pycache__/msdmap.cpython-37.pyc
Normal file
Binary file not shown.
BIN
tools/parser/__pycache__/parser.cpython-37.pyc
Normal file
BIN
tools/parser/__pycache__/parser.cpython-37.pyc
Normal file
Binary file not shown.
|
@ -90,6 +90,7 @@ def parse_tei(filepath):
|
|||
parse_links(s) if guess_corpus == "KRES" else None
|
||||
)
|
||||
}
|
||||
fp.close()
|
||||
return res_dict
|
||||
|
||||
|
||||
|
@ -108,7 +109,7 @@ def parse_links(s_el):
|
|||
return res_links
|
||||
|
||||
|
||||
def to_conll09(sentence_entry):
|
||||
def to_conll_2009_SRL(sentence_entry):
|
||||
|
||||
def fillpred(pos, feat):
|
||||
if False:
|
||||
|
@ -154,3 +155,16 @@ def to_conll09(sentence_entry):
|
|||
out_str += "\n"
|
||||
print(out_str)
|
||||
return out_str
|
||||
|
||||
|
||||
def to_conll_2009_full(sentence_entry):
|
||||
out_str = ""
|
||||
for token in sentence_entry["tokens"]:
|
||||
t_id = token[1]
|
||||
# 1 3
|
||||
out_str += "{}\t{}\n".format(
|
||||
t_id, # id
|
||||
token[2], # form
|
||||
)
|
||||
out_str += "\n"
|
||||
return out_str
|
Loading…
Reference in New Issue
Block a user