redid representation storate, (as prev commit: to make it easier to use)

find_next does not collect representations, no separate
class to parse representation features,
This commit is contained in:
Ozbolt Menegatti 2019-05-13 09:52:29 +02:00
parent 19067e4135
commit 6eefd9c9f6

125
wani.py
View File

@ -132,6 +132,10 @@ class Rendition(Enum):
Lexis = 2 Lexis = 2
Unknown = 3 Unknown = 3
class StructureSelection(Enum):
All = 0
Msd = 1
class Order(Enum): class Order(Enum):
FromTo = 0 FromTo = 0
ToFrom = 1 ToFrom = 1
@ -165,20 +169,53 @@ class Order(Enum):
raise NotImplementedError("Should not be here: Order match") raise NotImplementedError("Should not be here: Order match")
class ComponentRendition: class ComponentRendition:
def __init__(self, rendition=Rendition.Unknown, lexis=None): def __init__(self):
assert(type(rendition) is Rendition) self.more = None
self.lexis = lexis self.rendition = Rendition.Unknown
self.rendition = rendition
def render(self, word): def _set_rendition(self, r):
assert(self.rendition is Rendition.Unknown)
self.rendition = r
def _set_more(self, m):
assert(self.more is None and m is not None)
self.more = m
def add_feature(self, feature):
if 'rendition' in feature:
if feature['rendition'] == "lemma":
self._set_rendition(Rendition.Lemma)
elif feature['rendition'] == "word_form":
self._set_rendition(Rendition.WordForm)
elif feature['rendition'] == "lexis":
self._set_rendition(Rendition.Lexis)
self._set_more(feature['string'])
else:
raise NotImplementedError("Representation rendition: {}".format(feature))
elif 'selection' in feature:
if feature['selection'] == "msd":
self._set_more(StructureSelection.Msd)
elif feature['selection'] == "all":
self._set_more(StructureSelection.All)
else:
raise NotImplementedError("Representation selection: {}".format(feature))
else:
return None
def render(self, words):
if self.rendition == Rendition.Lemma: if self.rendition == Rendition.Lemma:
return word.lemma return words[0].lemma
elif self.rendition == Rendition.WordForm:
return word.text
elif self.rendition == Rendition.Lexis: elif self.rendition == Rendition.Lexis:
return self.lexis return self.more
elif self.rendition == Rendition.Unknown: elif self.rendition == Rendition.Unknown:
return None return None
elif self.rendition == Rendition.WordForm:
# check more!
return words[0].text
else: else:
raise RuntimeError("Unknown rendition: {}".format(self.rendition)) raise RuntimeError("Unknown rendition: {}".format(self.rendition))
@ -186,34 +223,6 @@ class ComponentRendition:
return str(self.rendition) return str(self.rendition)
class StructureSelection(Enum):
All = 0
Msd = 1
class ComponentRepresentation:
@staticmethod
def new(s):
if 'rendition' in s:
if s['rendition'] == "lemma":
return ComponentRendition(Rendition.Lemma)
elif s['rendition'] == "word_form":
return ComponentRendition(Rendition.WordForm)
elif s['rendition'] == "lexis":
assert(s['string'] is not None)
return ComponentRendition(Rendition.Lexis, s['string'])
else:
raise NotImplementedError("Rendition: {}".format(s))
elif 'selection' in s:
if s['selection'] == "msd":
return StructureSelection.Msd
elif s['selection'] == "all":
return StructureSelection.All
else:
raise NotImplementedError("Selection: {}".format(s))
else:
return None
class ComponentStatus(Enum): class ComponentStatus(Enum):
Optional = 0 Optional = 0
Required = 1 Required = 1
@ -357,13 +366,13 @@ class Component:
self.idx = idx self.idx = idx
self.restriction = None self.restriction = None
self.next_element = [] self.next_element = []
self.rendition = ComponentRendition() self.representation = ComponentRendition()
self.selection = {} self.selection = {}
self.iter_ctr = 0 self.iter_ctr = 0
def render_word(self, word): def render_word(self, word):
return self.rendition.render(word) return self.representation.render(word)
def add_next(self, next_component, link_label, order): def add_next(self, next_component, link_label, order):
self.next_element.append((next_component, link_label, Order.new(order))) self.next_element.append((next_component, link_label, Order.new(order)))
@ -382,31 +391,15 @@ class Component:
raise RuntimeError("Unreachable") raise RuntimeError("Unreachable")
def set_representation(self, representation): def set_representation(self, representation):
cr = None # for r in representation:
# print(ElementTree.tostring(r).decode('ascii').replace('\n', ''))
# print("--")
if len(representation) > 0: if len(representation) > 0:
self.representation = []
for feature in representation: for feature in representation:
f = ComponentRepresentation.new(dict(feature.attrib)) self.representation.add_feature(feature)
if type(f) is None:
logging.warning("Unknown representation in component {}, skipping...".format(self.idx), file=sys.stderr)
continue
if type(f) is StructureSelection:
assert(cr is None)
cr = f
elif type(f) is ComponentRendition:
self.rendition = f
elif type(f) is dict:
self.selection.update(f)
else:
raise RuntimeError("Unreachable: {}".format(f))
return cr
def find_next(self, deps, comps, restrs, reprs): def find_next(self, deps, comps, restrs, reprs):
representation = StructureSelection.All
to_ret = [] to_ret = []
for d in deps: for d in deps:
if d[0] == self.idx: if d[0] == self.idx:
@ -414,23 +407,22 @@ class Component:
next_component = Component(comps[idx]) next_component = Component(comps[idx])
next_component.set_restriction(restrs[idx]) next_component.set_restriction(restrs[idx])
r1 = next_component.set_representation(reprs[idx]) next_component.set_representation(reprs[idx])
to_ret.append(next_component) to_ret.append(next_component)
self.add_next(next_component, dep_label, order) self.add_next(next_component, dep_label, order)
others, r2 = next_component.find_next(deps, comps, restrs, reprs) others = next_component.find_next(deps, comps, restrs, reprs)
to_ret.extend(others) to_ret.extend(others)
return to_ret, representation return to_ret
def name_str(self): def name_str(self):
return "_" if self.name is None else self.name return "_" if self.name is None else self.name
def __str__(self): def __str__(self):
n = self.name_str() n = self.name_str()
return "{:s}) {:7s}:{} [{}] :{}".format( return "{:s}) {:7s}:{} [{}] :{}".format(
self.idx, n, self.status, self.restriction, self.rendition) self.idx, n, self.status, self.restriction, self.representation)
def tree(self): def tree(self):
el = [] el = []
@ -582,7 +574,7 @@ class SyntacticStructure:
raise NotImplementedError("Unknown definition: {} in structure {}".format(el.tag, st.id)) raise NotImplementedError("Unknown definition: {} in structure {}".format(el.tag, st.id))
fake_root_component = Component({'cid': '#', 'type': 'other'}) fake_root_component = Component({'cid': '#', 'type': 'other'})
st.components, st.selection = fake_root_component.find_next(deps, comps, restrs, forms) st.components = fake_root_component.find_next(deps, comps, restrs, forms)
return st return st
def add_representation(self, n, rep_el, forms): def add_representation(self, n, rep_el, forms):
@ -969,7 +961,6 @@ class ColocationIds:
if group: if group:
break break
def match_file(words, structures): def match_file(words, structures):
matches = {s.id: [] for s in structures} matches = {s.id: [] for s in structures}