from model.editable import Editable from model.translation import from_container_list class Example(Editable): @staticmethod def add_clusters(entry): nocluster_examples = [] taken_clusters = [] # gahter all taken cluster numbers and all examples without clusters for sense in entry.senses: for example in sense.examples: cluster = example.get_cluster() if cluster == -1: nocluster_examples.append(example) elif cluster is not None: taken_clusters.append(cluster) cnum = 1 for example in nocluster_examples: while cnum in taken_clusters: cnum += 1 taken_clusters.append(cnum) example.set_cluster(cnum) def __init__(self, example_xml): self.translations = from_container_list(example_xml.querySelectorAll("translationContainer")) inner_xml = example_xml.querySelector("corpusExample") if inner_xml is not None: self.inner = CorpusExample(inner_xml) else: inner_xml = example_xml.querySelector("multiwordExample") self.inner = MultiwordExample(inner_xml) all_components = [ComponentLexeme(el) for el in inner_xml.childNodes] self.components = [comp for comp in all_components if comp.isValid()] def export(self, doc): result = doc.createElement("exampleContainer") inner = self.inner.export(doc) # TODO: bad quick fix for comp in self.components: inner.appendChild(comp.export(doc)) result.appendChild(inner) return result def text(self): return " ".join([comp.text for comp in self.components]) def get_cluster(self): return self.inner.get_cluster() def get_valid_cluster(self): return self.inner.get_valid_cluster() def set_cluster(self, cluster): self.inner.cluster = cluster def is_collocation(self): return type(self.inner) is CorpusExample class CorpusExample: def __init__(self, example_xml): self.other_attributes = {} for oth_attr in ["example_id", "modified", "lexical_unit_id", "audio"]: if example_xml.hasAttribute(oth_attr): self.other_attributes[oth_attr] = example_xml.getAttribute(oth_attr) def export(self, doc): result = doc.createElement("corpusExample") for key, value in self.other_attributes.items(): result.setAttribute(key, value) return result def get_cluster(self): return None def get_valid_cluster(self): return None class MultiwordExample: def __init__(self, example_xml): self.other_attributes = {} for oth_attr in ["type", "lexical_unit_id", "structure_id", "structureName", "audio", "frequency", "logDice"]: if example_xml.hasAttribute(oth_attr): self.other_attributes[oth_attr] = example_xml.getAttribute(oth_attr) self.cluster_valid = False self.cluster = -1 if example_xml.hasAttribute("cluster"): self.cluster_valid = True self.cluster = int(example_xml.getAttribute("cluster")) def export(self, doc): result = doc.createElement("multiwordExample") for key, value in self.other_attributes.items(): result.setAttribute(key, value) if self.cluster_valid: result.setAttribute("cluster", str(self.cluster)) return result def get_cluster(self): return self.cluster def get_valid_cluster(self): return self.cluster if self.cluster_valid else None class ComponentLexeme(Editable): def __init__(self, xml): self.other_attributes = {} if xml.nodeName == "#text": self.text = xml.data self.role = None else: self.text = xml.textContent self.role = xml.getAttribute("role") for oth_attr in ["lexical_unit_lexeme_id", "slolex", "kol"]: if xml.hasAttribute(oth_attr): self.other_attributes[oth_attr] = xml.getAttribute(oth_attr) self.text = self.text.strip() def isValid(self): return len(self.text) > 0 def export(self, doc): if self.role is None: return doc.createTextNode(self.text) result = doc.createElement("comp") result.setAttribute("role", self.role) result.textContent = self.text for key, value in self.other_attributes.items(): result.setAttribute(key, value) return result