|
|
|
@ -132,6 +132,10 @@ class Rendition(Enum):
|
|
|
|
|
Lexis = 2
|
|
|
|
|
Unknown = 3
|
|
|
|
|
|
|
|
|
|
class StructureSelection(Enum):
|
|
|
|
|
All = 0
|
|
|
|
|
Msd = 1
|
|
|
|
|
|
|
|
|
|
class Order(Enum):
|
|
|
|
|
FromTo = 0
|
|
|
|
|
ToFrom = 1
|
|
|
|
@ -165,20 +169,53 @@ class Order(Enum):
|
|
|
|
|
raise NotImplementedError("Should not be here: Order match")
|
|
|
|
|
|
|
|
|
|
class ComponentRendition:
|
|
|
|
|
def __init__(self, rendition=Rendition.Unknown, lexis=None):
|
|
|
|
|
assert(type(rendition) is Rendition)
|
|
|
|
|
self.lexis = lexis
|
|
|
|
|
self.rendition = rendition
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.more = None
|
|
|
|
|
self.rendition = Rendition.Unknown
|
|
|
|
|
|
|
|
|
|
def _set_rendition(self, r):
|
|
|
|
|
assert(self.rendition is Rendition.Unknown)
|
|
|
|
|
self.rendition = r
|
|
|
|
|
|
|
|
|
|
def _set_more(self, m):
|
|
|
|
|
assert(self.more is None and m is not None)
|
|
|
|
|
self.more = m
|
|
|
|
|
|
|
|
|
|
def add_feature(self, feature):
|
|
|
|
|
if 'rendition' in feature:
|
|
|
|
|
if feature['rendition'] == "lemma":
|
|
|
|
|
self._set_rendition(Rendition.Lemma)
|
|
|
|
|
elif feature['rendition'] == "word_form":
|
|
|
|
|
self._set_rendition(Rendition.WordForm)
|
|
|
|
|
elif feature['rendition'] == "lexis":
|
|
|
|
|
self._set_rendition(Rendition.Lexis)
|
|
|
|
|
self._set_more(feature['string'])
|
|
|
|
|
else:
|
|
|
|
|
raise NotImplementedError("Representation rendition: {}".format(feature))
|
|
|
|
|
|
|
|
|
|
elif 'selection' in feature:
|
|
|
|
|
if feature['selection'] == "msd":
|
|
|
|
|
self._set_more(StructureSelection.Msd)
|
|
|
|
|
elif feature['selection'] == "all":
|
|
|
|
|
self._set_more(StructureSelection.All)
|
|
|
|
|
else:
|
|
|
|
|
raise NotImplementedError("Representation selection: {}".format(feature))
|
|
|
|
|
|
|
|
|
|
def render(self, word):
|
|
|
|
|
else:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def render(self, words):
|
|
|
|
|
if self.rendition == Rendition.Lemma:
|
|
|
|
|
return word.lemma
|
|
|
|
|
elif self.rendition == Rendition.WordForm:
|
|
|
|
|
return word.text
|
|
|
|
|
return words[0].lemma
|
|
|
|
|
elif self.rendition == Rendition.Lexis:
|
|
|
|
|
return self.lexis
|
|
|
|
|
return self.more
|
|
|
|
|
elif self.rendition == Rendition.Unknown:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
elif self.rendition == Rendition.WordForm:
|
|
|
|
|
# check more!
|
|
|
|
|
return words[0].text
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
raise RuntimeError("Unknown rendition: {}".format(self.rendition))
|
|
|
|
|
|
|
|
|
@ -186,34 +223,6 @@ class ComponentRendition:
|
|
|
|
|
return str(self.rendition)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class StructureSelection(Enum):
|
|
|
|
|
All = 0
|
|
|
|
|
Msd = 1
|
|
|
|
|
|
|
|
|
|
class ComponentRepresentation:
|
|
|
|
|
@staticmethod
|
|
|
|
|
def new(s):
|
|
|
|
|
if 'rendition' in s:
|
|
|
|
|
if s['rendition'] == "lemma":
|
|
|
|
|
return ComponentRendition(Rendition.Lemma)
|
|
|
|
|
elif s['rendition'] == "word_form":
|
|
|
|
|
return ComponentRendition(Rendition.WordForm)
|
|
|
|
|
elif s['rendition'] == "lexis":
|
|
|
|
|
assert(s['string'] is not None)
|
|
|
|
|
return ComponentRendition(Rendition.Lexis, s['string'])
|
|
|
|
|
else:
|
|
|
|
|
raise NotImplementedError("Rendition: {}".format(s))
|
|
|
|
|
elif 'selection' in s:
|
|
|
|
|
if s['selection'] == "msd":
|
|
|
|
|
return StructureSelection.Msd
|
|
|
|
|
elif s['selection'] == "all":
|
|
|
|
|
return StructureSelection.All
|
|
|
|
|
else:
|
|
|
|
|
raise NotImplementedError("Selection: {}".format(s))
|
|
|
|
|
else:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ComponentStatus(Enum):
|
|
|
|
|
Optional = 0
|
|
|
|
|
Required = 1
|
|
|
|
@ -357,13 +366,13 @@ class Component:
|
|
|
|
|
self.idx = idx
|
|
|
|
|
self.restriction = None
|
|
|
|
|
self.next_element = []
|
|
|
|
|
self.rendition = ComponentRendition()
|
|
|
|
|
self.representation = ComponentRendition()
|
|
|
|
|
self.selection = {}
|
|
|
|
|
|
|
|
|
|
self.iter_ctr = 0
|
|
|
|
|
|
|
|
|
|
def render_word(self, word):
|
|
|
|
|
return self.rendition.render(word)
|
|
|
|
|
return self.representation.render(word)
|
|
|
|
|
|
|
|
|
|
def add_next(self, next_component, link_label, order):
|
|
|
|
|
self.next_element.append((next_component, link_label, Order.new(order)))
|
|
|
|
@ -382,31 +391,15 @@ class Component:
|
|
|
|
|
raise RuntimeError("Unreachable")
|
|
|
|
|
|
|
|
|
|
def set_representation(self, representation):
|
|
|
|
|
cr = None
|
|
|
|
|
if len(representation) > 0:
|
|
|
|
|
self.representation = []
|
|
|
|
|
# for r in representation:
|
|
|
|
|
# print(ElementTree.tostring(r).decode('ascii').replace('\n', ''))
|
|
|
|
|
# print("--")
|
|
|
|
|
|
|
|
|
|
if len(representation) > 0:
|
|
|
|
|
for feature in representation:
|
|
|
|
|
f = ComponentRepresentation.new(dict(feature.attrib))
|
|
|
|
|
|
|
|
|
|
if type(f) is None:
|
|
|
|
|
logging.warning("Unknown representation in component {}, skipping...".format(self.idx), file=sys.stderr)
|
|
|
|
|
continue
|
|
|
|
|
if type(f) is StructureSelection:
|
|
|
|
|
assert(cr is None)
|
|
|
|
|
cr = f
|
|
|
|
|
elif type(f) is ComponentRendition:
|
|
|
|
|
self.rendition = f
|
|
|
|
|
elif type(f) is dict:
|
|
|
|
|
self.selection.update(f)
|
|
|
|
|
else:
|
|
|
|
|
raise RuntimeError("Unreachable: {}".format(f))
|
|
|
|
|
|
|
|
|
|
return cr
|
|
|
|
|
self.representation.add_feature(feature)
|
|
|
|
|
|
|
|
|
|
def find_next(self, deps, comps, restrs, reprs):
|
|
|
|
|
representation = StructureSelection.All
|
|
|
|
|
|
|
|
|
|
to_ret = []
|
|
|
|
|
for d in deps:
|
|
|
|
|
if d[0] == self.idx:
|
|
|
|
@ -414,23 +407,22 @@ class Component:
|
|
|
|
|
|
|
|
|
|
next_component = Component(comps[idx])
|
|
|
|
|
next_component.set_restriction(restrs[idx])
|
|
|
|
|
r1 = next_component.set_representation(reprs[idx])
|
|
|
|
|
next_component.set_representation(reprs[idx])
|
|
|
|
|
to_ret.append(next_component)
|
|
|
|
|
|
|
|
|
|
self.add_next(next_component, dep_label, order)
|
|
|
|
|
others, r2 = next_component.find_next(deps, comps, restrs, reprs)
|
|
|
|
|
others = next_component.find_next(deps, comps, restrs, reprs)
|
|
|
|
|
to_ret.extend(others)
|
|
|
|
|
|
|
|
|
|
return to_ret, representation
|
|
|
|
|
return to_ret
|
|
|
|
|
|
|
|
|
|
def name_str(self):
|
|
|
|
|
return "_" if self.name is None else self.name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
|
n = self.name_str()
|
|
|
|
|
return "{:s}) {:7s}:{} [{}] :{}".format(
|
|
|
|
|
self.idx, n, self.status, self.restriction, self.rendition)
|
|
|
|
|
self.idx, n, self.status, self.restriction, self.representation)
|
|
|
|
|
|
|
|
|
|
def tree(self):
|
|
|
|
|
el = []
|
|
|
|
@ -582,7 +574,7 @@ class SyntacticStructure:
|
|
|
|
|
raise NotImplementedError("Unknown definition: {} in structure {}".format(el.tag, st.id))
|
|
|
|
|
|
|
|
|
|
fake_root_component = Component({'cid': '#', 'type': 'other'})
|
|
|
|
|
st.components, st.selection = fake_root_component.find_next(deps, comps, restrs, forms)
|
|
|
|
|
st.components = fake_root_component.find_next(deps, comps, restrs, forms)
|
|
|
|
|
return st
|
|
|
|
|
|
|
|
|
|
def add_representation(self, n, rep_el, forms):
|
|
|
|
@ -969,7 +961,6 @@ class ColocationIds:
|
|
|
|
|
if group:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def match_file(words, structures):
|
|
|
|
|
matches = {s.id: [] for s in structures}
|
|
|
|
|
|
|
|
|
|