|
|
|
@ -3,30 +3,7 @@ from model.translation import from_container_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Example(Editable):
|
|
|
|
|
@staticmethod
|
|
|
|
|
def add_clusters(entry):
|
|
|
|
|
nocluster_examples = []
|
|
|
|
|
taken_clusters = []
|
|
|
|
|
|
|
|
|
|
# gahter all taken cluster numbers and all examples without clusters
|
|
|
|
|
for sense in entry.senses:
|
|
|
|
|
for example in sense.examples:
|
|
|
|
|
cluster = example.get_cluster()
|
|
|
|
|
if cluster == -1:
|
|
|
|
|
nocluster_examples.append(example)
|
|
|
|
|
elif cluster is not None:
|
|
|
|
|
taken_clusters.append(cluster)
|
|
|
|
|
|
|
|
|
|
cnum = 1
|
|
|
|
|
for example in nocluster_examples:
|
|
|
|
|
while cnum in taken_clusters:
|
|
|
|
|
cnum += 1
|
|
|
|
|
taken_clusters.append(cnum)
|
|
|
|
|
|
|
|
|
|
example.set_cluster(cnum)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, example_xml):
|
|
|
|
|
def __init__(self, example_xml, cluster_info):
|
|
|
|
|
self.translations = from_container_list(example_xml.querySelectorAll("translationContainer"))
|
|
|
|
|
|
|
|
|
|
inner_xml = example_xml.querySelector("corpusExample")
|
|
|
|
@ -34,7 +11,7 @@ class Example(Editable):
|
|
|
|
|
self.inner = CorpusExample(inner_xml)
|
|
|
|
|
else:
|
|
|
|
|
inner_xml = example_xml.querySelector("multiwordExample")
|
|
|
|
|
self.inner = MultiwordExample(inner_xml)
|
|
|
|
|
self.inner = MultiwordExample(inner_xml, cluster_info)
|
|
|
|
|
|
|
|
|
|
all_components = [ComponentLexeme(el) for el in inner_xml.childNodes]
|
|
|
|
|
self.components = [comp for comp in all_components if comp.isValid()]
|
|
|
|
@ -57,14 +34,11 @@ class Example(Editable):
|
|
|
|
|
def get_cluster(self):
|
|
|
|
|
return self.inner.get_cluster()
|
|
|
|
|
|
|
|
|
|
def get_valid_cluster(self):
|
|
|
|
|
return self.inner.get_valid_cluster()
|
|
|
|
|
|
|
|
|
|
def set_cluster(self, cluster):
|
|
|
|
|
self.inner.cluster = cluster
|
|
|
|
|
|
|
|
|
|
def is_collocation(self):
|
|
|
|
|
return type(self.inner) is CorpusExample
|
|
|
|
|
return self.get_view_type() == 2
|
|
|
|
|
|
|
|
|
|
def get_view_type(self):
|
|
|
|
|
# as per the bosses, these are the rules for different colors
|
|
|
|
@ -92,28 +66,40 @@ class CorpusExample:
|
|
|
|
|
def get_cluster(self):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def get_valid_cluster(self):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class MultiwordExample:
|
|
|
|
|
def __init__(self, example_xml):
|
|
|
|
|
def __init__(self, example_xml, cluster_info):
|
|
|
|
|
self.other_attributes = {}
|
|
|
|
|
for oth_attr in ["lexical_unit_id", "structure_id", "structureName", "audio", "frequency", "logDice"]:
|
|
|
|
|
if example_xml.hasAttribute(oth_attr):
|
|
|
|
|
self.other_attributes[oth_attr] = example_xml.getAttribute(oth_attr)
|
|
|
|
|
|
|
|
|
|
self.cluster_valid = False
|
|
|
|
|
self.cluster = -1
|
|
|
|
|
if example_xml.hasAttribute("cluster"):
|
|
|
|
|
self.cluster_valid = True
|
|
|
|
|
self.cluster = int(example_xml.getAttribute("cluster"))
|
|
|
|
|
self.cluster = self._determine_cluster_number(example_xml, cluster_info)
|
|
|
|
|
|
|
|
|
|
if example_xml.hasAttribute("type"):
|
|
|
|
|
self.type = example_xml.getAttribute(oth_attr)
|
|
|
|
|
self.type = example_xml.getAttribute("type")
|
|
|
|
|
else:
|
|
|
|
|
self.type = None
|
|
|
|
|
|
|
|
|
|
def _determine_cluster_number(self, example_xml, cluster_info):
|
|
|
|
|
# since cluster numbers can be fairly fragmented, this is defragmentation
|
|
|
|
|
# we do need to get info about sense examples to do this, that is why we have cluster_info
|
|
|
|
|
if cluster_info is None:
|
|
|
|
|
cluster_mappings, cluster_begin = {}, 0
|
|
|
|
|
else:
|
|
|
|
|
cluster_mappings, cluster_begin = cluster_info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not example_xml.hasAttribute("cluster"):
|
|
|
|
|
cluster = len(cluster_mappings) + cluster_begin
|
|
|
|
|
cluster_mappings[cluster] = cluster
|
|
|
|
|
else:
|
|
|
|
|
cluster = int(example_xml.getAttribute("cluster"))
|
|
|
|
|
if cluster not in cluster_mappings:
|
|
|
|
|
cluster_mappings[cluster] = len(cluster_mappings) + cluster_begin
|
|
|
|
|
|
|
|
|
|
return cluster_mappings[cluster]
|
|
|
|
|
|
|
|
|
|
def export(self, doc):
|
|
|
|
|
result = doc.createElement("multiwordExample")
|
|
|
|
|
|
|
|
|
@ -131,9 +117,6 @@ class MultiwordExample:
|
|
|
|
|
def get_cluster(self):
|
|
|
|
|
return self.cluster
|
|
|
|
|
|
|
|
|
|
def get_valid_cluster(self):
|
|
|
|
|
return self.cluster if self.cluster_valid else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ComponentLexeme(Editable):
|
|
|
|
|
def __init__(self, xml):
|
|
|
|
|