diff --git a/wani.py b/wani.py index e9c7f06..2e14537 100644 --- a/wani.py +++ b/wani.py @@ -177,10 +177,10 @@ class Restriction: restriction_type = restriction_tag.get('type') if restriction_type == "morphology": self.type = RestrictionType.Morphology - self.matcher = build_morphology_regex(restriction_tag.getchildren()) + self.matcher = build_morphology_regex(list(restriction_tag)) elif restriction_type == "lexis": self.type = RestrictionType.Lexis - self.matcher = build_lexis_regex(restriction_tag.getchildren()) + self.matcher = build_lexis_regex(list(restriction_tag)) else: raise NotImplementedError() @@ -318,14 +318,14 @@ class SyntacticStructure: st.id = xml.get('id') st.lbs = xml.get('LBS') - components, system = xml.getchildren() - dependencies, restrictions = system.getchildren() + components, system = list(xml) + dependencies, restrictions = list(system) assert(system.get('type') == 'JOS') deps = [ (dep.get('from'), dep.get('to'), dep.get('label')) for dep in dependencies ] comps = { comp.get('cid'): comp.get('name') for comp in components } - restrs = { r.get('cid'): r.getchildren()[0] for r in restrictions } + restrs = { r.get('cid'): next(iter(r)) for r in restrictions } st.root_component.find_next(deps, comps, restrs) st.root_component = list(st.root_component)[0][0] # get first next @@ -376,6 +376,7 @@ def load_corpus(filename): xmlstring = xmlstring.replace(' xml:', ' ') et = ElementTree.XML(xmlstring) + root_words = set() words = {} for w in et.iter("w"): words[w.get('id')] = Word(w) @@ -394,10 +395,15 @@ def load_corpus(filename): # catch modra links from root elif lfrom[-1] == '0' and l.get('afun') == 'modra': root_words.add(l.get('dep')) - pass + else: # strange errors, just skip... pass + + no_root_words = [w for k, w in words.items() if k in root_words] + missing = root_words - set(w.id for w in no_root_words) + # what should i do with this I forgot :( + return list(words.values())