adding per-file parsing, for parallel use

bug_fix
voje 5 years ago
parent 19945a9dd9
commit cce83045e8

@ -94,6 +94,7 @@ class Parser():
def sentence_generator(self):
# Using generators so we don't copy a whole corpu around in memory.
# Might be too complicated. Try per-file generator instead.
if self.corpus == "kres":
# some logging output
@ -109,7 +110,18 @@ class Parser():
else:
yield from self.parse_xml_file(self.ssj_file)
def kres_to_json_file(self, in_xml_file, out_json_file):
out_buffer = []
for _, sentence_entry in parser.parser_xml_file(in_xml_file):
out_buffer += [sentence_entry]
with outfile.open("w") as fp:
json.dump(out_buffer, fp)
def parse_xml_file(self, xml_file):
# for separate srl links, it will guess the srl file based on
# self.kres_srl_folder
srl_from_json = {}
if self.corpus == "kres":
# in case of kres, read the SRL links form a separate json file

Loading…
Cancel
Save