Removing old logging.debug calls, makes matching stuff much faster :)
This commit is contained in:
parent
2c8a9f0ed0
commit
08c8050f3f
11
wani.py
11
wani.py
|
@ -634,7 +634,6 @@ class Component:
|
||||||
|
|
||||||
# if more than one match found for particular component
|
# if more than one match found for particular component
|
||||||
elif len(cmatch) > 1:
|
elif len(cmatch) > 1:
|
||||||
logging.debug("MULTIPLE: {}, {}".format(self.idx, cmatch))
|
|
||||||
# if more than one match in multiple components, NOPE!
|
# if more than one match in multiple components, NOPE!
|
||||||
if len(to_ret) > 1:
|
if len(to_ret) > 1:
|
||||||
logging.warning("Strange multiple match: {}".format(
|
logging.warning("Strange multiple match: {}".format(
|
||||||
|
@ -652,7 +651,6 @@ class Component:
|
||||||
for tr in to_ret:
|
for tr in to_ret:
|
||||||
tr.update(cmatch[0])
|
tr.update(cmatch[0])
|
||||||
|
|
||||||
logging.debug("MA: {}".format(str(to_ret)))
|
|
||||||
return to_ret
|
return to_ret
|
||||||
|
|
||||||
def _match_self(self, word):
|
def _match_self(self, word):
|
||||||
|
@ -667,8 +665,6 @@ class Component:
|
||||||
else:
|
else:
|
||||||
matched = self.restriction.match(word)
|
matched = self.restriction.match(word)
|
||||||
|
|
||||||
logging.debug("SELF MATCH({}: {} -> {}".format(self.idx, word.text, matched))
|
|
||||||
|
|
||||||
# recurse to next
|
# recurse to next
|
||||||
if not matched:
|
if not matched:
|
||||||
return None
|
return None
|
||||||
|
@ -682,13 +678,11 @@ class Component:
|
||||||
# need to get all links that match
|
# need to get all links that match
|
||||||
for next, link, order in self.next_element:
|
for next, link, order in self.next_element:
|
||||||
next_links = word.get_links(link)
|
next_links = word.get_links(link)
|
||||||
logging.debug("FIND LINKS FOR: {} -> {}: #{}".format(self.idx, next.idx, len(next_links)))
|
|
||||||
to_ret.append([])
|
to_ret.append([])
|
||||||
|
|
||||||
# good flag
|
# good flag
|
||||||
good = next.status != ComponentStatus.Required
|
good = next.status != ComponentStatus.Required
|
||||||
for next_word in next_links:
|
for next_word in next_links:
|
||||||
logging.debug("link: {}: {} -> {}".format(link, word.id, next_word.id))
|
|
||||||
if not order.match(word, next_word):
|
if not order.match(word, next_word):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -707,7 +701,6 @@ class Component:
|
||||||
|
|
||||||
# if none matched, nothing found!
|
# if none matched, nothing found!
|
||||||
if not good:
|
if not good:
|
||||||
logging.debug("BAD")
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return to_ret
|
return to_ret
|
||||||
|
@ -1208,7 +1201,6 @@ def match_file(words, structures):
|
||||||
for s in tqdm(structures):
|
for s in tqdm(structures):
|
||||||
for w in words:
|
for w in words:
|
||||||
mhere = s.match(w)
|
mhere = s.match(w)
|
||||||
logging.debug(" GOT: {}".format(len(mhere)))
|
|
||||||
for match in mhere:
|
for match in mhere:
|
||||||
colocation_id = [(idx, w.lemma) for idx, w in match.items()]
|
colocation_id = [(idx, w.lemma) for idx, w in match.items()]
|
||||||
colocation_id = [s.id] + list(sorted(colocation_id, key=lambda x:x[0]))
|
colocation_id = [s.id] + list(sorted(colocation_id, key=lambda x:x[0]))
|
||||||
|
@ -1284,9 +1276,6 @@ def main(input_file, structures_file, args):
|
||||||
if args.all:
|
if args.all:
|
||||||
Writer.make_all_writer(args).write_out(structures, colocation_ids)
|
Writer.make_all_writer(args).write_out(structures, colocation_ids)
|
||||||
|
|
||||||
logging.debug([(k, len(v)) for k, v in matches.items()])
|
|
||||||
logging.debug(sum(len(v) for _, v in matches.items()))
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser(description='Extract structures from a parsed corpus.')
|
parser = argparse.ArgumentParser(description='Extract structures from a parsed corpus.')
|
||||||
parser.add_argument('structures', help='Structures definitions in xml file')
|
parser.add_argument('structures', help='Structures definitions in xml file')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user