|
|
|
@ -634,7 +634,6 @@ class Component:
|
|
|
|
|
|
|
|
|
|
# if more than one match found for particular component
|
|
|
|
|
elif len(cmatch) > 1:
|
|
|
|
|
logging.debug("MULTIPLE: {}, {}".format(self.idx, cmatch))
|
|
|
|
|
# if more than one match in multiple components, NOPE!
|
|
|
|
|
if len(to_ret) > 1:
|
|
|
|
|
logging.warning("Strange multiple match: {}".format(
|
|
|
|
@ -652,7 +651,6 @@ class Component:
|
|
|
|
|
for tr in to_ret:
|
|
|
|
|
tr.update(cmatch[0])
|
|
|
|
|
|
|
|
|
|
logging.debug("MA: {}".format(str(to_ret)))
|
|
|
|
|
return to_ret
|
|
|
|
|
|
|
|
|
|
def _match_self(self, word):
|
|
|
|
@ -667,8 +665,6 @@ class Component:
|
|
|
|
|
else:
|
|
|
|
|
matched = self.restriction.match(word)
|
|
|
|
|
|
|
|
|
|
logging.debug("SELF MATCH({}: {} -> {}".format(self.idx, word.text, matched))
|
|
|
|
|
|
|
|
|
|
# recurse to next
|
|
|
|
|
if not matched:
|
|
|
|
|
return None
|
|
|
|
@ -682,13 +678,11 @@ class Component:
|
|
|
|
|
# need to get all links that match
|
|
|
|
|
for next, link, order in self.next_element:
|
|
|
|
|
next_links = word.get_links(link)
|
|
|
|
|
logging.debug("FIND LINKS FOR: {} -> {}: #{}".format(self.idx, next.idx, len(next_links)))
|
|
|
|
|
to_ret.append([])
|
|
|
|
|
|
|
|
|
|
# good flag
|
|
|
|
|
good = next.status != ComponentStatus.Required
|
|
|
|
|
for next_word in next_links:
|
|
|
|
|
logging.debug("link: {}: {} -> {}".format(link, word.id, next_word.id))
|
|
|
|
|
if not order.match(word, next_word):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
@ -707,7 +701,6 @@ class Component:
|
|
|
|
|
|
|
|
|
|
# if none matched, nothing found!
|
|
|
|
|
if not good:
|
|
|
|
|
logging.debug("BAD")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
return to_ret
|
|
|
|
@ -1208,7 +1201,6 @@ def match_file(words, structures):
|
|
|
|
|
for s in tqdm(structures):
|
|
|
|
|
for w in words:
|
|
|
|
|
mhere = s.match(w)
|
|
|
|
|
logging.debug(" GOT: {}".format(len(mhere)))
|
|
|
|
|
for match in mhere:
|
|
|
|
|
colocation_id = [(idx, w.lemma) for idx, w in match.items()]
|
|
|
|
|
colocation_id = [s.id] + list(sorted(colocation_id, key=lambda x:x[0]))
|
|
|
|
@ -1284,9 +1276,6 @@ def main(input_file, structures_file, args):
|
|
|
|
|
if args.all:
|
|
|
|
|
Writer.make_all_writer(args).write_out(structures, colocation_ids)
|
|
|
|
|
|
|
|
|
|
logging.debug([(k, len(v)) for k, v in matches.items()])
|
|
|
|
|
logging.debug(sum(len(v) for _, v in matches.items()))
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
parser = argparse.ArgumentParser(description='Extract structures from a parsed corpus.')
|
|
|
|
|
parser.add_argument('structures', help='Structures definitions in xml file')
|
|
|
|
|