Removing getchildren() and adding root_words (don't know why yet, will remove if I dont remember)
This commit is contained in:
parent
aeb2770966
commit
106db9394e
18
wani.py
18
wani.py
|
@ -177,10 +177,10 @@ class Restriction:
|
||||||
restriction_type = restriction_tag.get('type')
|
restriction_type = restriction_tag.get('type')
|
||||||
if restriction_type == "morphology":
|
if restriction_type == "morphology":
|
||||||
self.type = RestrictionType.Morphology
|
self.type = RestrictionType.Morphology
|
||||||
self.matcher = build_morphology_regex(restriction_tag.getchildren())
|
self.matcher = build_morphology_regex(list(restriction_tag))
|
||||||
elif restriction_type == "lexis":
|
elif restriction_type == "lexis":
|
||||||
self.type = RestrictionType.Lexis
|
self.type = RestrictionType.Lexis
|
||||||
self.matcher = build_lexis_regex(restriction_tag.getchildren())
|
self.matcher = build_lexis_regex(list(restriction_tag))
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
@ -318,14 +318,14 @@ class SyntacticStructure:
|
||||||
st.id = xml.get('id')
|
st.id = xml.get('id')
|
||||||
st.lbs = xml.get('LBS')
|
st.lbs = xml.get('LBS')
|
||||||
|
|
||||||
components, system = xml.getchildren()
|
components, system = list(xml)
|
||||||
dependencies, restrictions = system.getchildren()
|
dependencies, restrictions = list(system)
|
||||||
|
|
||||||
assert(system.get('type') == 'JOS')
|
assert(system.get('type') == 'JOS')
|
||||||
|
|
||||||
deps = [ (dep.get('from'), dep.get('to'), dep.get('label')) for dep in dependencies ]
|
deps = [ (dep.get('from'), dep.get('to'), dep.get('label')) for dep in dependencies ]
|
||||||
comps = { comp.get('cid'): comp.get('name') for comp in components }
|
comps = { comp.get('cid'): comp.get('name') for comp in components }
|
||||||
restrs = { r.get('cid'): r.getchildren()[0] for r in restrictions }
|
restrs = { r.get('cid'): next(iter(r)) for r in restrictions }
|
||||||
|
|
||||||
st.root_component.find_next(deps, comps, restrs)
|
st.root_component.find_next(deps, comps, restrs)
|
||||||
st.root_component = list(st.root_component)[0][0] # get first next
|
st.root_component = list(st.root_component)[0][0] # get first next
|
||||||
|
@ -376,6 +376,7 @@ def load_corpus(filename):
|
||||||
xmlstring = xmlstring.replace(' xml:', ' ')
|
xmlstring = xmlstring.replace(' xml:', ' ')
|
||||||
et = ElementTree.XML(xmlstring)
|
et = ElementTree.XML(xmlstring)
|
||||||
|
|
||||||
|
root_words = set()
|
||||||
words = {}
|
words = {}
|
||||||
for w in et.iter("w"):
|
for w in et.iter("w"):
|
||||||
words[w.get('id')] = Word(w)
|
words[w.get('id')] = Word(w)
|
||||||
|
@ -394,10 +395,15 @@ def load_corpus(filename):
|
||||||
# catch modra links from root
|
# catch modra links from root
|
||||||
elif lfrom[-1] == '0' and l.get('afun') == 'modra':
|
elif lfrom[-1] == '0' and l.get('afun') == 'modra':
|
||||||
root_words.add(l.get('dep'))
|
root_words.add(l.get('dep'))
|
||||||
pass
|
|
||||||
else:
|
else:
|
||||||
# strange errors, just skip...
|
# strange errors, just skip...
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
no_root_words = [w for k, w in words.items() if k in root_words]
|
||||||
|
missing = root_words - set(w.id for w in no_root_words)
|
||||||
|
# what should i do with this I forgot :(
|
||||||
|
|
||||||
return list(words.values())
|
return list(words.values())
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user