adding per-file parsing, for parallel use
This commit is contained in:
parent
19945a9dd9
commit
cce83045e8
|
@ -94,6 +94,7 @@ class Parser():
|
|||
|
||||
def sentence_generator(self):
|
||||
# Using generators so we don't copy a whole corpu around in memory.
|
||||
# Might be too complicated. Try per-file generator instead.
|
||||
if self.corpus == "kres":
|
||||
|
||||
# some logging output
|
||||
|
@ -109,7 +110,18 @@ class Parser():
|
|||
else:
|
||||
yield from self.parse_xml_file(self.ssj_file)
|
||||
|
||||
def kres_to_json_file(self, in_xml_file, out_json_file):
|
||||
out_buffer = []
|
||||
for _, sentence_entry in parser.parser_xml_file(in_xml_file):
|
||||
out_buffer += [sentence_entry]
|
||||
|
||||
with outfile.open("w") as fp:
|
||||
json.dump(out_buffer, fp)
|
||||
|
||||
def parse_xml_file(self, xml_file):
|
||||
# for separate srl links, it will guess the srl file based on
|
||||
# self.kres_srl_folder
|
||||
|
||||
srl_from_json = {}
|
||||
if self.corpus == "kres":
|
||||
# in case of kres, read the SRL links form a separate json file
|
||||
|
|
Loading…
Reference in New Issue
Block a user