|
|
|
@ -129,7 +129,8 @@ class RestrictionType(Enum):
|
|
|
|
|
class Rendition(Enum):
|
|
|
|
|
Lemma = 0
|
|
|
|
|
WordForm = 1
|
|
|
|
|
Unknown = 2
|
|
|
|
|
Lexis = 2
|
|
|
|
|
Unknown = 3
|
|
|
|
|
|
|
|
|
|
class Order(Enum):
|
|
|
|
|
FromTo = 0
|
|
|
|
@ -164,8 +165,9 @@ class Order(Enum):
|
|
|
|
|
raise NotImplementedError("Should not be here: Order match")
|
|
|
|
|
|
|
|
|
|
class ComponentRendition:
|
|
|
|
|
def __init__(self, rendition=Rendition.Unknown):
|
|
|
|
|
self.word_form = {}
|
|
|
|
|
def __init__(self, rendition=Rendition.Unknown, lexis=None):
|
|
|
|
|
assert(type(rendition) is Rendition)
|
|
|
|
|
self.lexis = lexis
|
|
|
|
|
self.rendition = rendition
|
|
|
|
|
|
|
|
|
|
def render(self, word):
|
|
|
|
@ -173,6 +175,8 @@ class ComponentRendition:
|
|
|
|
|
return word.lemma
|
|
|
|
|
elif self.rendition == Rendition.WordForm:
|
|
|
|
|
return word.text
|
|
|
|
|
elif self.rendition == Rendition.Lexis:
|
|
|
|
|
return self.lexis
|
|
|
|
|
elif self.rendition == Rendition.Unknown:
|
|
|
|
|
return None
|
|
|
|
|
else:
|
|
|
|
@ -182,27 +186,30 @@ class ComponentRendition:
|
|
|
|
|
return str(self.rendition)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# dont know...
|
|
|
|
|
class StructureSelection(Enum):
|
|
|
|
|
All = 0
|
|
|
|
|
Frequency = 1
|
|
|
|
|
Msd = 1
|
|
|
|
|
|
|
|
|
|
class ComponentRepresentation:
|
|
|
|
|
@staticmethod
|
|
|
|
|
def new(s):
|
|
|
|
|
if 'rendition' in s:
|
|
|
|
|
if s['rendition'] == "lemma":
|
|
|
|
|
return ComponentRendition(Rendition.Lemma)
|
|
|
|
|
elif s['rendition'] == "word_form":
|
|
|
|
|
return ComponentRendition(Rendition.WordForm)
|
|
|
|
|
elif s['rendition'] == "lexis":
|
|
|
|
|
assert(s['string'] is not None)
|
|
|
|
|
return ComponentRendition(Rendition.Lexis, s['string'])
|
|
|
|
|
else:
|
|
|
|
|
raise NotImplementedError("Rendition: {}".format(s))
|
|
|
|
|
elif 'selection' in s:
|
|
|
|
|
if s['selection'] == "frequency":
|
|
|
|
|
return StructureSelection.Frequency
|
|
|
|
|
if s['selection'] == "msd":
|
|
|
|
|
return StructureSelection.Msd
|
|
|
|
|
elif s['selection'] == "all":
|
|
|
|
|
return StructureSelection.All
|
|
|
|
|
else:
|
|
|
|
|
return {s['selection']: s['value']}
|
|
|
|
|
raise NotImplementedError("Selection: {}".format(s))
|
|
|
|
|
else:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
@ -376,7 +383,7 @@ class Component:
|
|
|
|
|
|
|
|
|
|
def set_representation(self, representation):
|
|
|
|
|
cr = None
|
|
|
|
|
if representation is not None:
|
|
|
|
|
if len(representation) > 0:
|
|
|
|
|
self.representation = []
|
|
|
|
|
|
|
|
|
|
for feature in representation:
|
|
|
|
@ -414,9 +421,6 @@ class Component:
|
|
|
|
|
others, r2 = next_component.find_next(deps, comps, restrs, reprs)
|
|
|
|
|
to_ret.extend(others)
|
|
|
|
|
|
|
|
|
|
if StructureSelection.Frequency in (r1, r2):
|
|
|
|
|
representation = StructureSelection.Frequency
|
|
|
|
|
|
|
|
|
|
return to_ret, representation
|
|
|
|
|
|
|
|
|
|
def name_str(self):
|
|
|
|
@ -566,7 +570,7 @@ class SyntacticStructure:
|
|
|
|
|
for comp in definitions:
|
|
|
|
|
n = comp.get('cid')
|
|
|
|
|
restrs[n] = None
|
|
|
|
|
forms[n] = None
|
|
|
|
|
forms[n] = []
|
|
|
|
|
|
|
|
|
|
for el in comp:
|
|
|
|
|
if el.tag.startswith("restriction"):
|
|
|
|
@ -575,29 +579,29 @@ class SyntacticStructure:
|
|
|
|
|
elif el.tag.startswith("representation"):
|
|
|
|
|
st.add_representation(n, el, forms)
|
|
|
|
|
else:
|
|
|
|
|
raise NotImplementedError("definition??")
|
|
|
|
|
raise NotImplementedError("Unknown definition: {} in structure {}".format(el.tag, st.id))
|
|
|
|
|
|
|
|
|
|
fake_root_component = Component({'cid': '#', 'type': 'other'})
|
|
|
|
|
st.components, st.selection = fake_root_component.find_next(deps, comps, restrs, forms)
|
|
|
|
|
return st
|
|
|
|
|
|
|
|
|
|
def add_representation(self, n, el, forms):
|
|
|
|
|
if el.tag == "representation":
|
|
|
|
|
els = [el]
|
|
|
|
|
elif el.tag == "representation_and":
|
|
|
|
|
els = list(el)
|
|
|
|
|
else:
|
|
|
|
|
raise NotImplementedError("Unknown representation tag: {}".format(el.tag))
|
|
|
|
|
|
|
|
|
|
for el in els:
|
|
|
|
|
if el.get('basic') == 'form':
|
|
|
|
|
assert(forms[n] is None)
|
|
|
|
|
forms[n] = el
|
|
|
|
|
elif el.get('basic') == "agreement":
|
|
|
|
|
def add_representation(self, n, rep_el, forms):
|
|
|
|
|
if rep_el.tag == "representation_and":
|
|
|
|
|
rep_el = rep_el[0]
|
|
|
|
|
logging.warning("Only using first reprentation in representation_and in structure {}".format(self.id))
|
|
|
|
|
|
|
|
|
|
assert(rep_el.tag == "representation")
|
|
|
|
|
for el in rep_el:
|
|
|
|
|
assert(el.tag == "feature")
|
|
|
|
|
if 'rendition' in el.attrib:
|
|
|
|
|
forms[n].append(el)
|
|
|
|
|
elif 'selection' in el.attrib and el.attrib["selection"] != "agreement":
|
|
|
|
|
forms[n].append(el)
|
|
|
|
|
elif 'selection' in el.attrib:
|
|
|
|
|
self.add_agreement(n, el)
|
|
|
|
|
else:
|
|
|
|
|
logging.warning("Strange representation (basic={}) in structure {}. Skipping"
|
|
|
|
|
.format(el.get('basic'), self.id))
|
|
|
|
|
logging.warning("Strange representation feature in structure {}. Skipping"
|
|
|
|
|
.format(self.id))
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
def add_agreement(self, n, el):
|
|
|
|
@ -605,12 +609,13 @@ class SyntacticStructure:
|
|
|
|
|
|
|
|
|
|
n1 = n
|
|
|
|
|
n2 = el.get('head')[4:]
|
|
|
|
|
agreement_str = next(iter(el)).get('agreement')
|
|
|
|
|
agreement_str = el.get('msd')
|
|
|
|
|
assert(agreement_str is not None)
|
|
|
|
|
|
|
|
|
|
self.agreements.append({
|
|
|
|
|
'n1': n1,
|
|
|
|
|
'n2': n2,
|
|
|
|
|
'match': agreement_str.split('|')})
|
|
|
|
|
'match': agreement_str.split('+')})
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
|
comp_str = "\n".join(str(comp) for comp in self.components)
|
|
|
|
|