Better, but not yet 100% working clusters, now with structure name.
This commit is contained in:
parent
be90c9dc89
commit
3844c7025b
|
@ -31,9 +31,8 @@ class Entry(Editable):
|
||||||
|
|
||||||
self.labels = import_label_list("head labelList label", entry_xml)
|
self.labels = import_label_list("head labelList label", entry_xml)
|
||||||
|
|
||||||
Sense.reset_cluster_count()
|
self.senses = [Sense(sense_xml, i) for i, sense_xml in
|
||||||
self.senses = [Sense(sense_xml) for sense_xml in
|
enumerate(entry_xml.querySelectorAll("body senseList sense"))]
|
||||||
entry_xml.querySelectorAll("body senseList sense")]
|
|
||||||
|
|
||||||
def get_measure_text(self):
|
def get_measure_text(self):
|
||||||
return self.measure["text"] if "text" in self.measure else ""
|
return self.measure["text"] if "text" in self.measure else ""
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
from model.editable import Editable
|
from model.editable import Editable
|
||||||
from model.translation import from_container_list
|
from model.translation import from_container_list
|
||||||
|
from model.example_clusters import ExampleClusters
|
||||||
|
|
||||||
|
|
||||||
class Example(Editable):
|
class Example(Editable):
|
||||||
def __init__(self, example_xml, cluster_info):
|
def __init__(self, example_xml):
|
||||||
self.translations = from_container_list(example_xml.querySelectorAll("translationContainer"))
|
self.translations = from_container_list(example_xml.querySelectorAll("translationContainer"))
|
||||||
|
|
||||||
inner_xml = example_xml.querySelector("corpusExample")
|
inner_xml = example_xml.querySelector("corpusExample")
|
||||||
|
@ -11,7 +12,7 @@ class Example(Editable):
|
||||||
self.inner = CorpusExample(inner_xml)
|
self.inner = CorpusExample(inner_xml)
|
||||||
else:
|
else:
|
||||||
inner_xml = example_xml.querySelector("multiwordExample")
|
inner_xml = example_xml.querySelector("multiwordExample")
|
||||||
self.inner = MultiwordExample(inner_xml, cluster_info)
|
self.inner = MultiwordExample(inner_xml)
|
||||||
|
|
||||||
all_components = [ComponentLexeme(el) for el in inner_xml.childNodes]
|
all_components = [ComponentLexeme(el) for el in inner_xml.childNodes]
|
||||||
self.components = [comp for comp in all_components if comp.isValid()]
|
self.components = [comp for comp in all_components if comp.isValid()]
|
||||||
|
@ -37,6 +38,9 @@ class Example(Editable):
|
||||||
def set_cluster(self, cluster):
|
def set_cluster(self, cluster):
|
||||||
self.inner.cluster = cluster
|
self.inner.cluster = cluster
|
||||||
|
|
||||||
|
def get_structure(self):
|
||||||
|
return self.inner.get_structure()
|
||||||
|
|
||||||
def is_collocation(self):
|
def is_collocation(self):
|
||||||
return self.get_view_type() == 2
|
return self.get_view_type() == 2
|
||||||
|
|
||||||
|
@ -66,39 +70,32 @@ class CorpusExample:
|
||||||
def get_cluster(self):
|
def get_cluster(self):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_structure(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class MultiwordExample:
|
class MultiwordExample:
|
||||||
def __init__(self, example_xml, cluster_info):
|
def __init__(self, example_xml):
|
||||||
self.other_attributes = {}
|
self.other_attributes = {}
|
||||||
for oth_attr in ["lexical_unit_id", "structure_id", "structureName", "audio", "frequency", "logDice"]:
|
for oth_attr in ["lexical_unit_id", "structure_id", "structureName", "audio", "frequency", "logDice"]:
|
||||||
if example_xml.hasAttribute(oth_attr):
|
if example_xml.hasAttribute(oth_attr):
|
||||||
self.other_attributes[oth_attr] = example_xml.getAttribute(oth_attr)
|
self.other_attributes[oth_attr] = example_xml.getAttribute(oth_attr)
|
||||||
|
|
||||||
self.cluster = self._determine_cluster_number(example_xml, cluster_info)
|
self.cluster = MultiwordExample._determine_cluster_number(example_xml)
|
||||||
|
|
||||||
if example_xml.hasAttribute("type"):
|
if example_xml.hasAttribute("type"):
|
||||||
self.type = example_xml.getAttribute("type")
|
self.type = example_xml.getAttribute("type")
|
||||||
else:
|
else:
|
||||||
self.type = None
|
self.type = None
|
||||||
|
|
||||||
def _determine_cluster_number(self, example_xml, cluster_info):
|
|
||||||
# since cluster numbers can be fairly fragmented, this is defragmentation
|
|
||||||
# we do need to get info about sense examples to do this, that is why we have cluster_info
|
|
||||||
if cluster_info is None:
|
|
||||||
cluster_mappings, cluster_begin = {}, 0
|
|
||||||
else:
|
|
||||||
cluster_mappings, cluster_begin = cluster_info
|
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _determine_cluster_number(example_xml):
|
||||||
if not example_xml.hasAttribute("cluster"):
|
if not example_xml.hasAttribute("cluster"):
|
||||||
cluster = len(cluster_mappings) + cluster_begin
|
return ExampleClusters.first_empty_cluster()
|
||||||
cluster_mappings[cluster] = cluster
|
|
||||||
else:
|
else:
|
||||||
cluster = int(example_xml.getAttribute("cluster"))
|
cluster = int(example_xml.getAttribute("cluster"))
|
||||||
if cluster not in cluster_mappings:
|
ExampleClusters.register_cluster(cluster)
|
||||||
cluster_mappings[cluster] = len(cluster_mappings) + cluster_begin
|
return cluster
|
||||||
|
|
||||||
return cluster_mappings[cluster]
|
|
||||||
|
|
||||||
def export(self, doc):
|
def export(self, doc):
|
||||||
result = doc.createElement("multiwordExample")
|
result = doc.createElement("multiwordExample")
|
||||||
|
@ -115,6 +112,12 @@ class MultiwordExample:
|
||||||
|
|
||||||
def get_cluster(self):
|
def get_cluster(self):
|
||||||
return self.cluster
|
return self.cluster
|
||||||
|
|
||||||
|
def get_structure(self):
|
||||||
|
if "structureName" in self.other_attributes:
|
||||||
|
return self.other_attributes["structureName"]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class ComponentLexeme(Editable):
|
class ComponentLexeme(Editable):
|
||||||
|
|
65
src/model/example_clusters.py
Normal file
65
src/model/example_clusters.py
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
class ExampleClusters:
|
||||||
|
def __init__(self):
|
||||||
|
self.idxs = set()
|
||||||
|
self.lists = {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_key(sense, example):
|
||||||
|
sense_idx = sense.original_idx
|
||||||
|
structure_name = example.get_structure()
|
||||||
|
key = "{}-{}".format(sense_idx, structure_name)
|
||||||
|
return key
|
||||||
|
|
||||||
|
def _first_empty_cluster(self):
|
||||||
|
idx = 0
|
||||||
|
while True:
|
||||||
|
if idx not in self.clusters:
|
||||||
|
self.idxs.add(idx)
|
||||||
|
return idx
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
def _register_cluster(self, idx):
|
||||||
|
if idx not in self.idxs:
|
||||||
|
self.idxs.add(idx)
|
||||||
|
|
||||||
|
def _rebuild_lists(self, model):
|
||||||
|
self.lists = {}
|
||||||
|
|
||||||
|
for sense in model.entry.senses:
|
||||||
|
for example in sense.examples:
|
||||||
|
structure = example.get_structure()
|
||||||
|
key = ExampleClusters.get_key(sense, example)
|
||||||
|
|
||||||
|
if key not in self.lists:
|
||||||
|
self.lists[key] = set()
|
||||||
|
|
||||||
|
self.lists[key].add(example.get_cluster())
|
||||||
|
|
||||||
|
for key in self.lists.keys():
|
||||||
|
self.lists[key] = sorted(self.lists[key], key=lambda x: x)
|
||||||
|
|
||||||
|
def _get_list(self, sense, example):
|
||||||
|
key = ExampleClusters.get_key(sense, example)
|
||||||
|
if key in self.lists:
|
||||||
|
return self.lists[key]
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def first_empty_cluster():
|
||||||
|
return ec._first_empty_cluster()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def register_cluster(idx):
|
||||||
|
return ec._register_cluster(idx)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def rebuild_lists(model):
|
||||||
|
return ec._rebuild_lists(model)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_list(sense, example):
|
||||||
|
return ec._get_list(sense, example)
|
||||||
|
|
||||||
|
|
||||||
|
ec = ExampleClusters()
|
|
@ -1,6 +1,8 @@
|
||||||
from model.entry import Entry
|
from model.entry import Entry
|
||||||
from model.example import Example
|
from model.example import Example
|
||||||
from browser import window
|
from browser import window
|
||||||
|
from model.example_clusters import ExampleClusters
|
||||||
|
|
||||||
|
|
||||||
class Model:
|
class Model:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -48,6 +50,9 @@ class Model:
|
||||||
self.modal = modal
|
self.modal = modal
|
||||||
self.modal_shown = True
|
self.modal_shown = True
|
||||||
|
|
||||||
|
def pre_view(self):
|
||||||
|
# this should go into "after data change" or something
|
||||||
|
ExampleClusters.rebuild_lists(self)
|
||||||
|
|
||||||
def import_xml(self, xml_text):
|
def import_xml(self, xml_text):
|
||||||
parser = __new__(DOMParser())
|
parser = __new__(DOMParser())
|
||||||
|
|
|
@ -4,12 +4,9 @@ from model.editable import Editable
|
||||||
from model.tags import import_label_list
|
from model.tags import import_label_list
|
||||||
|
|
||||||
|
|
||||||
cluster_min = 0
|
|
||||||
|
|
||||||
|
|
||||||
class Sense(Editable):
|
class Sense(Editable):
|
||||||
def __init__(self, sense_xml):
|
def __init__(self, sense_xml, idx):
|
||||||
global cluster_min
|
self.original_idx = idx
|
||||||
|
|
||||||
self.definition = {}
|
self.definition = {}
|
||||||
for definition in sense_xml.querySelectorAll("definitionList definition"):
|
for definition in sense_xml.querySelectorAll("definitionList definition"):
|
||||||
|
@ -20,14 +17,8 @@ class Sense(Editable):
|
||||||
self.translations = from_container_list(
|
self.translations = from_container_list(
|
||||||
sense_xml.querySelectorAll("translationContainerList translationContainer"))
|
sense_xml.querySelectorAll("translationContainerList translationContainer"))
|
||||||
|
|
||||||
cluster_mappings = {}
|
self.examples = [Example(example_xml) for example_xml in
|
||||||
cluster_info = (cluster_mappings, cluster_min)
|
|
||||||
|
|
||||||
self.examples = [Example(example_xml, cluster_info) for example_xml in
|
|
||||||
sense_xml.querySelectorAll("exampleContainerList exampleContainer")]
|
sense_xml.querySelectorAll("exampleContainerList exampleContainer")]
|
||||||
|
|
||||||
# set limit for example cluster
|
|
||||||
cluster_min = max(cluster_mappings.values()) + 1
|
|
||||||
|
|
||||||
def merge_labels(self):
|
def merge_labels(self):
|
||||||
return ", ".join(val for _, val in self.labels)
|
return ", ".join(val for _, val in self.labels)
|
||||||
|
@ -40,9 +31,4 @@ class Sense(Editable):
|
||||||
result.add(cluster)
|
result.add(cluster)
|
||||||
|
|
||||||
return sorted(result, key=lambda x: x)
|
return sorted(result, key=lambda x: x)
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def reset_cluster_count():
|
|
||||||
global cluster_min
|
|
||||||
cluster_min = 0
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from export import export_to_xml
|
from export import export_to_xml
|
||||||
|
from model.example_clusters import ExampleClusters
|
||||||
from browser import window
|
from browser import window
|
||||||
from lib.snabbdom import h
|
from lib.snabbdom import h
|
||||||
import message
|
import message
|
||||||
|
@ -12,7 +13,7 @@ def show_toggle_cluster_buttons(sense, example):
|
||||||
|
|
||||||
base_tag = "input.cluster-list-button"
|
base_tag = "input.cluster-list-button"
|
||||||
result = []
|
result = []
|
||||||
for opt in sense.example_clusters():
|
for opt in ExampleClusters.get_list(sense, example):
|
||||||
tag = base_tag
|
tag = base_tag
|
||||||
if opt == cls:
|
if opt == cls:
|
||||||
tag += ".cluster-button-checked"
|
tag += ".cluster-button-checked"
|
||||||
|
|
|
@ -19,6 +19,8 @@ class View:
|
||||||
|
|
||||||
def view(self, model):
|
def view(self, model):
|
||||||
self.model = model
|
self.model = model
|
||||||
|
self.model.pre_view()
|
||||||
|
|
||||||
new_vdom = self._view()
|
new_vdom = self._view()
|
||||||
patch(self.vdom, new_vdom)
|
patch(self.vdom, new_vdom)
|
||||||
self.vdom = new_vdom
|
self.vdom = new_vdom
|
||||||
|
|
Loading…
Reference in New Issue
Block a user