From b4b93022fef63c19d1986dfb889e05543e5fea82 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Sun, 12 May 2019 22:13:22 +0200 Subject: [PATCH] Updating for new representations, for now only parsing --- wani.py | 67 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/wani.py b/wani.py index 6de5a76..0cee15b 100644 --- a/wani.py +++ b/wani.py @@ -129,7 +129,8 @@ class RestrictionType(Enum): class Rendition(Enum): Lemma = 0 WordForm = 1 - Unknown = 2 + Lexis = 2 + Unknown = 3 class Order(Enum): FromTo = 0 @@ -164,8 +165,9 @@ class Order(Enum): raise NotImplementedError("Should not be here: Order match") class ComponentRendition: - def __init__(self, rendition=Rendition.Unknown): - self.word_form = {} + def __init__(self, rendition=Rendition.Unknown, lexis=None): + assert(type(rendition) is Rendition) + self.lexis = lexis self.rendition = rendition def render(self, word): @@ -173,6 +175,8 @@ class ComponentRendition: return word.lemma elif self.rendition == Rendition.WordForm: return word.text + elif self.rendition == Rendition.Lexis: + return self.lexis elif self.rendition == Rendition.Unknown: return None else: @@ -182,27 +186,30 @@ class ComponentRendition: return str(self.rendition) -# dont know... class StructureSelection(Enum): All = 0 - Frequency = 1 + Msd = 1 class ComponentRepresentation: + @staticmethod def new(s): if 'rendition' in s: if s['rendition'] == "lemma": return ComponentRendition(Rendition.Lemma) elif s['rendition'] == "word_form": return ComponentRendition(Rendition.WordForm) + elif s['rendition'] == "lexis": + assert(s['string'] is not None) + return ComponentRendition(Rendition.Lexis, s['string']) else: raise NotImplementedError("Rendition: {}".format(s)) elif 'selection' in s: - if s['selection'] == "frequency": - return StructureSelection.Frequency + if s['selection'] == "msd": + return StructureSelection.Msd elif s['selection'] == "all": return StructureSelection.All else: - return {s['selection']: s['value']} + raise NotImplementedError("Selection: {}".format(s)) else: return None @@ -376,7 +383,7 @@ class Component: def set_representation(self, representation): cr = None - if representation is not None: + if len(representation) > 0: self.representation = [] for feature in representation: @@ -414,9 +421,6 @@ class Component: others, r2 = next_component.find_next(deps, comps, restrs, reprs) to_ret.extend(others) - if StructureSelection.Frequency in (r1, r2): - representation = StructureSelection.Frequency - return to_ret, representation def name_str(self): @@ -566,7 +570,7 @@ class SyntacticStructure: for comp in definitions: n = comp.get('cid') restrs[n] = None - forms[n] = None + forms[n] = [] for el in comp: if el.tag.startswith("restriction"): @@ -575,29 +579,29 @@ class SyntacticStructure: elif el.tag.startswith("representation"): st.add_representation(n, el, forms) else: - raise NotImplementedError("definition??") + raise NotImplementedError("Unknown definition: {} in structure {}".format(el.tag, st.id)) fake_root_component = Component({'cid': '#', 'type': 'other'}) st.components, st.selection = fake_root_component.find_next(deps, comps, restrs, forms) return st - def add_representation(self, n, el, forms): - if el.tag == "representation": - els = [el] - elif el.tag == "representation_and": - els = list(el) - else: - raise NotImplementedError("Unknown representation tag: {}".format(el.tag)) - - for el in els: - if el.get('basic') == 'form': - assert(forms[n] is None) - forms[n] = el - elif el.get('basic') == "agreement": + def add_representation(self, n, rep_el, forms): + if rep_el.tag == "representation_and": + rep_el = rep_el[0] + logging.warning("Only using first reprentation in representation_and in structure {}".format(self.id)) + + assert(rep_el.tag == "representation") + for el in rep_el: + assert(el.tag == "feature") + if 'rendition' in el.attrib: + forms[n].append(el) + elif 'selection' in el.attrib and el.attrib["selection"] != "agreement": + forms[n].append(el) + elif 'selection' in el.attrib: self.add_agreement(n, el) else: - logging.warning("Strange representation (basic={}) in structure {}. Skipping" - .format(el.get('basic'), self.id)) + logging.warning("Strange representation feature in structure {}. Skipping" + .format(self.id)) continue def add_agreement(self, n, el): @@ -605,12 +609,13 @@ class SyntacticStructure: n1 = n n2 = el.get('head')[4:] - agreement_str = next(iter(el)).get('agreement') + agreement_str = el.get('msd') + assert(agreement_str is not None) self.agreements.append({ 'n1': n1, 'n2': n2, - 'match': agreement_str.split('|')}) + 'match': agreement_str.split('+')}) def __str__(self): comp_str = "\n".join(str(comp) for comp in self.components)