From c17361fbda194d845cbac674065e40490ebc04cc Mon Sep 17 00:00:00 2001 From: voje Date: Sun, 14 Apr 2019 04:18:52 +0200 Subject: [PATCH] added more logging --- corpusparser/Parser.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/corpusparser/Parser.py b/corpusparser/Parser.py index d908d04..75e8b7b 100644 --- a/corpusparser/Parser.py +++ b/corpusparser/Parser.py @@ -30,6 +30,10 @@ class Parser(): "missing_srl": [] } + # for logging output + self.n_kres_files = -1 + self.nth_kres_file = -1 + def parse_jos_links(self, sent_el): if self.corpus == "kres": return self.parse_jos_links_kres(sent_el) @@ -91,8 +95,16 @@ class Parser(): def sentence_generator(self): # Using generators so we don't copy a whole corpu around in memory. if self.corpus == "kres": + + # some logging output + if self.n_kres_files == -1: + self.n_kres_files = len(list(Path(self.kres_folder).glob('*'))) + for xml_file in self.kres_folder.iterdir(): # self.parse_xml_file(xml_file) + self.nth_kres_file += 1 + self.logger.info("{} ({}/{})".format( + xml_file, self.nth_kres_file, self.n_kres_files)) yield from self.parse_xml_file(xml_file) else: yield from self.parse_xml_file(self.ssj_file)