Removing getchildren() and adding root_words (don't know why yet, will remove if I dont remember)

This commit is contained in:
Ozbolt Menegatti 2019-01-08 21:17:15 +01:00
parent aeb2770966
commit 106db9394e

18
wani.py
View File

@ -177,10 +177,10 @@ class Restriction:
restriction_type = restriction_tag.get('type') restriction_type = restriction_tag.get('type')
if restriction_type == "morphology": if restriction_type == "morphology":
self.type = RestrictionType.Morphology self.type = RestrictionType.Morphology
self.matcher = build_morphology_regex(restriction_tag.getchildren()) self.matcher = build_morphology_regex(list(restriction_tag))
elif restriction_type == "lexis": elif restriction_type == "lexis":
self.type = RestrictionType.Lexis self.type = RestrictionType.Lexis
self.matcher = build_lexis_regex(restriction_tag.getchildren()) self.matcher = build_lexis_regex(list(restriction_tag))
else: else:
raise NotImplementedError() raise NotImplementedError()
@ -318,14 +318,14 @@ class SyntacticStructure:
st.id = xml.get('id') st.id = xml.get('id')
st.lbs = xml.get('LBS') st.lbs = xml.get('LBS')
components, system = xml.getchildren() components, system = list(xml)
dependencies, restrictions = system.getchildren() dependencies, restrictions = list(system)
assert(system.get('type') == 'JOS') assert(system.get('type') == 'JOS')
deps = [ (dep.get('from'), dep.get('to'), dep.get('label')) for dep in dependencies ] deps = [ (dep.get('from'), dep.get('to'), dep.get('label')) for dep in dependencies ]
comps = { comp.get('cid'): comp.get('name') for comp in components } comps = { comp.get('cid'): comp.get('name') for comp in components }
restrs = { r.get('cid'): r.getchildren()[0] for r in restrictions } restrs = { r.get('cid'): next(iter(r)) for r in restrictions }
st.root_component.find_next(deps, comps, restrs) st.root_component.find_next(deps, comps, restrs)
st.root_component = list(st.root_component)[0][0] # get first next st.root_component = list(st.root_component)[0][0] # get first next
@ -376,6 +376,7 @@ def load_corpus(filename):
xmlstring = xmlstring.replace(' xml:', ' ') xmlstring = xmlstring.replace(' xml:', ' ')
et = ElementTree.XML(xmlstring) et = ElementTree.XML(xmlstring)
root_words = set()
words = {} words = {}
for w in et.iter("w"): for w in et.iter("w"):
words[w.get('id')] = Word(w) words[w.get('id')] = Word(w)
@ -394,10 +395,15 @@ def load_corpus(filename):
# catch modra links from root # catch modra links from root
elif lfrom[-1] == '0' and l.get('afun') == 'modra': elif lfrom[-1] == '0' and l.get('afun') == 'modra':
root_words.add(l.get('dep')) root_words.add(l.get('dep'))
pass
else: else:
# strange errors, just skip... # strange errors, just skip...
pass pass
no_root_words = [w for k, w in words.items() if k in root_words]
missing = root_words - set(w.id for w in no_root_words)
# what should i do with this I forgot :(
return list(words.values()) return list(words.values())