from model.example import Example from model.translation import Translation from model.editable import Editable from model.tags import TAGS class Sense(Editable): def __init__(self, sense_xml): definition = sense_xml.querySelector("definitionList definition") self.labels = [] for tag_xml in sense_xml.querySelectorAll("sense > labelList label"): t_type = tag_xml.getAttribute("type") t_value = tag_xml.textContent if t_type not in TAGS: # using some default t_type = TAGS.keys()[0] self.labels.append((t_type, t_value)) self.definition = definition.textContent if definition else "" self.examples = [Example(example_xml) for example_xml in sense_xml.querySelectorAll("exampleContainerList exampleContainer")] translations = [] max_num_cluster = 0 for translation_xml in sense_xml.querySelectorAll("translationContainerList translationContainer"): num_cluster = int(translation_xml.getAttribute("cluster")) max_num_cluster = max(max_num_cluster, num_cluster) translations.append((num_cluster, Translation(translation_xml))) self.translations = [[] for _ in range(max_num_cluster)] for clusterNum, translation in translations: self.translations[clusterNum - 1].append(translation) def merge_labels(self): return ", ".join(val for _, val in self.labels)