From 6eefd9c9f6fb373f93955af80f41c0d3e0e392ba Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Mon, 13 May 2019 09:52:29 +0200 Subject: [PATCH] redid representation storate, (as prev commit: to make it easier to use) find_next does not collect representations, no separate class to parse representation features, --- wani.py | 125 ++++++++++++++++++++++++++------------------------------ 1 file changed, 58 insertions(+), 67 deletions(-) diff --git a/wani.py b/wani.py index 72630f2..f4dc1e7 100644 --- a/wani.py +++ b/wani.py @@ -132,6 +132,10 @@ class Rendition(Enum): Lexis = 2 Unknown = 3 +class StructureSelection(Enum): + All = 0 + Msd = 1 + class Order(Enum): FromTo = 0 ToFrom = 1 @@ -165,20 +169,53 @@ class Order(Enum): raise NotImplementedError("Should not be here: Order match") class ComponentRendition: - def __init__(self, rendition=Rendition.Unknown, lexis=None): - assert(type(rendition) is Rendition) - self.lexis = lexis - self.rendition = rendition + def __init__(self): + self.more = None + self.rendition = Rendition.Unknown + + def _set_rendition(self, r): + assert(self.rendition is Rendition.Unknown) + self.rendition = r + + def _set_more(self, m): + assert(self.more is None and m is not None) + self.more = m + + def add_feature(self, feature): + if 'rendition' in feature: + if feature['rendition'] == "lemma": + self._set_rendition(Rendition.Lemma) + elif feature['rendition'] == "word_form": + self._set_rendition(Rendition.WordForm) + elif feature['rendition'] == "lexis": + self._set_rendition(Rendition.Lexis) + self._set_more(feature['string']) + else: + raise NotImplementedError("Representation rendition: {}".format(feature)) + + elif 'selection' in feature: + if feature['selection'] == "msd": + self._set_more(StructureSelection.Msd) + elif feature['selection'] == "all": + self._set_more(StructureSelection.All) + else: + raise NotImplementedError("Representation selection: {}".format(feature)) - def render(self, word): + else: + return None + + def render(self, words): if self.rendition == Rendition.Lemma: - return word.lemma - elif self.rendition == Rendition.WordForm: - return word.text + return words[0].lemma elif self.rendition == Rendition.Lexis: - return self.lexis + return self.more elif self.rendition == Rendition.Unknown: return None + + elif self.rendition == Rendition.WordForm: + # check more! + return words[0].text + else: raise RuntimeError("Unknown rendition: {}".format(self.rendition)) @@ -186,34 +223,6 @@ class ComponentRendition: return str(self.rendition) -class StructureSelection(Enum): - All = 0 - Msd = 1 - -class ComponentRepresentation: - @staticmethod - def new(s): - if 'rendition' in s: - if s['rendition'] == "lemma": - return ComponentRendition(Rendition.Lemma) - elif s['rendition'] == "word_form": - return ComponentRendition(Rendition.WordForm) - elif s['rendition'] == "lexis": - assert(s['string'] is not None) - return ComponentRendition(Rendition.Lexis, s['string']) - else: - raise NotImplementedError("Rendition: {}".format(s)) - elif 'selection' in s: - if s['selection'] == "msd": - return StructureSelection.Msd - elif s['selection'] == "all": - return StructureSelection.All - else: - raise NotImplementedError("Selection: {}".format(s)) - else: - return None - - class ComponentStatus(Enum): Optional = 0 Required = 1 @@ -357,13 +366,13 @@ class Component: self.idx = idx self.restriction = None self.next_element = [] - self.rendition = ComponentRendition() + self.representation = ComponentRendition() self.selection = {} self.iter_ctr = 0 def render_word(self, word): - return self.rendition.render(word) + return self.representation.render(word) def add_next(self, next_component, link_label, order): self.next_element.append((next_component, link_label, Order.new(order))) @@ -382,31 +391,15 @@ class Component: raise RuntimeError("Unreachable") def set_representation(self, representation): - cr = None - if len(representation) > 0: - self.representation = [] + # for r in representation: + # print(ElementTree.tostring(r).decode('ascii').replace('\n', '')) + # print("--") + if len(representation) > 0: for feature in representation: - f = ComponentRepresentation.new(dict(feature.attrib)) - - if type(f) is None: - logging.warning("Unknown representation in component {}, skipping...".format(self.idx), file=sys.stderr) - continue - if type(f) is StructureSelection: - assert(cr is None) - cr = f - elif type(f) is ComponentRendition: - self.rendition = f - elif type(f) is dict: - self.selection.update(f) - else: - raise RuntimeError("Unreachable: {}".format(f)) - - return cr + self.representation.add_feature(feature) def find_next(self, deps, comps, restrs, reprs): - representation = StructureSelection.All - to_ret = [] for d in deps: if d[0] == self.idx: @@ -414,23 +407,22 @@ class Component: next_component = Component(comps[idx]) next_component.set_restriction(restrs[idx]) - r1 = next_component.set_representation(reprs[idx]) + next_component.set_representation(reprs[idx]) to_ret.append(next_component) self.add_next(next_component, dep_label, order) - others, r2 = next_component.find_next(deps, comps, restrs, reprs) + others = next_component.find_next(deps, comps, restrs, reprs) to_ret.extend(others) - return to_ret, representation + return to_ret def name_str(self): return "_" if self.name is None else self.name - def __str__(self): n = self.name_str() return "{:s}) {:7s}:{} [{}] :{}".format( - self.idx, n, self.status, self.restriction, self.rendition) + self.idx, n, self.status, self.restriction, self.representation) def tree(self): el = [] @@ -582,7 +574,7 @@ class SyntacticStructure: raise NotImplementedError("Unknown definition: {} in structure {}".format(el.tag, st.id)) fake_root_component = Component({'cid': '#', 'type': 'other'}) - st.components, st.selection = fake_root_component.find_next(deps, comps, restrs, forms) + st.components = fake_root_component.find_next(deps, comps, restrs, forms) return st def add_representation(self, n, rep_el, forms): @@ -969,7 +961,6 @@ class ColocationIds: if group: break - def match_file(words, structures): matches = {s.id: [] for s in structures}