diff --git a/wani.py b/wani.py index b459829..850d1c1 100644 --- a/wani.py +++ b/wani.py @@ -201,12 +201,17 @@ class Restriction: class Component: - def __init__(self, name): - self.name = name if name is not None else "" + def __init__(self, name, idx): + assert(idx is not None) + + self.name = name if name is not None else "" # for printing... + self.idx = idx self.restriction = None - self.next_element = None + self.next_element = [] self.level = None + self.iter_ctr = 0 + def word_to_str(self, word): if self.level == ComponentLevel.Lemma: return word.lemma, word.msd @@ -215,17 +220,20 @@ class Component: else: raise RuntimeError("Unreachable") - def has_next(self): - return self.next_element is not None + def __iter__(self): + self.iter_ctr = 0 + return self - def get_next(self): - return self.next_element[0] + def __next__(self): + if self.iter_ctr < len(self.next_element): + to_ret = self.next_element[self.iter_ctr] + self.iter_ctr += 1 + return to_ret + else: + raise StopIteration - def link_label(self): - return self.next_element[1] - - def set_next(self, next_component, link_label): - self.next_element = (next_component, link_label) + def add_next(self, next_component, link_label): + self.next_element.append((next_component, link_label)) def set_restriction(self, restrictions_tag): if restrictions_tag.tag == "restriction": @@ -243,10 +251,21 @@ class Component: else: raise RuntimeError("Unreachable") + def find_next(self, deps, comps, restrs): + for d in deps: + if d[0] == self.idx: + _, idx, dep_label = d + + next_component = Component(comps[idx], idx) + next_component.set_restriction(restrs[idx]) + + self.add_next(next_component, dep_label) + next_component.find_next(deps, comps, restrs) + def __str__(self): - el = "(N.{:7s} {})".format(self.name, str(self.restriction)) - if self.has_next(): - el += " -- {} -->\n{}".format(self.link_label(), str(self.get_next())) + el = "({:10} {})".format(self.name, str(self.restriction)) + for next, link in self: + el += "\n{:10} -- {:10} --> {}".format(self.name, link, str(next)) return el def __repr__(self): @@ -268,24 +287,27 @@ class Component: if matched: to_ret = [self.word_to_str(word)] - # already matched everything! - if not self.has_next(): - return to_ret + for next, link in self: + # need to get all links that match + for next_word in word.get_links(link): + match = next.match(next_word) + # if matches, return + if match is not None: + to_ret.extend(match) + break - # need to get all links that match - for next_word in word.get_links(self.link_label()): - match = self.get_next().match(next_word) - # if matches, return - if match is not None: - to_ret.extend(match) - return to_ret + # if none matched, nothing found! + else: + return None + + return to_ret # return None... class SyntacticStructure: def __init__(self): - self.root_component = Component('root') + self.root_component = Component("", 'root') self.id = None self.lbs = None @@ -300,27 +322,18 @@ class SyntacticStructure: assert(system.get('type') == 'JOS') - deps = { dep.get('from'): (dep.get('to'), dep.get('label')) for dep in dependencies } + deps = [ (dep.get('from'), dep.get('to'), dep.get('label')) for dep in dependencies ] comps = { comp.get('cid'): comp.get('name') for comp in components } restrs = { r.get('cid'): r.getchildren()[0] for r in restrictions } - current_component = st.root_component - idx = 'root' + st.root_component.find_next(deps, comps, restrs) + st.root_component = list(st.root_component)[0][0] # get first next - while idx in deps: - idx, dep_label = deps[idx] - - next_component = Component(comps[idx]) - next_component.set_restriction(restrs[idx]) - - current_component.set_next(next_component, dep_label) - current_component = next_component - - st.root_component = st.root_component.get_next() return st def __str__(self): - return "{} LBS {}\n------\n{}".format(self.id, self.lbs, str(self.root_component)) + arrow = "root -- modra --> " + return "{} LBS {}\n------\n{}{}".format(self.id, self.lbs, arrow, str(self.root_component)) def match(self, word): return self.root_component.match(word)