added more logging

This commit is contained in:
voje 2019-04-14 04:18:52 +02:00
parent 2b7339ac5a
commit c17361fbda

View File

@ -30,6 +30,10 @@ class Parser():
"missing_srl": [] "missing_srl": []
} }
# for logging output
self.n_kres_files = -1
self.nth_kres_file = -1
def parse_jos_links(self, sent_el): def parse_jos_links(self, sent_el):
if self.corpus == "kres": if self.corpus == "kres":
return self.parse_jos_links_kres(sent_el) return self.parse_jos_links_kres(sent_el)
@ -91,8 +95,16 @@ class Parser():
def sentence_generator(self): def sentence_generator(self):
# Using generators so we don't copy a whole corpu around in memory. # Using generators so we don't copy a whole corpu around in memory.
if self.corpus == "kres": if self.corpus == "kres":
# some logging output
if self.n_kres_files == -1:
self.n_kres_files = len(list(Path(self.kres_folder).glob('*')))
for xml_file in self.kres_folder.iterdir(): for xml_file in self.kres_folder.iterdir():
# self.parse_xml_file(xml_file) # self.parse_xml_file(xml_file)
self.nth_kres_file += 1
self.logger.info("{} ({}/{})".format(
xml_file, self.nth_kres_file, self.n_kres_files))
yield from self.parse_xml_file(xml_file) yield from self.parse_xml_file(xml_file)
else: else:
yield from self.parse_xml_file(self.ssj_file) yield from self.parse_xml_file(self.ssj_file)