Better, but not yet 100% working clusters, now with structure name.

This commit is contained in:
Ozbolt Menegatti 2020-02-02 22:04:02 +01:00
parent be90c9dc89
commit 3844c7025b
7 changed files with 101 additions and 40 deletions

View File

@ -31,9 +31,8 @@ class Entry(Editable):
self.labels = import_label_list("head labelList label", entry_xml)
Sense.reset_cluster_count()
self.senses = [Sense(sense_xml) for sense_xml in
entry_xml.querySelectorAll("body senseList sense")]
self.senses = [Sense(sense_xml, i) for i, sense_xml in
enumerate(entry_xml.querySelectorAll("body senseList sense"))]
def get_measure_text(self):
return self.measure["text"] if "text" in self.measure else ""

View File

@ -1,9 +1,10 @@
from model.editable import Editable
from model.translation import from_container_list
from model.example_clusters import ExampleClusters
class Example(Editable):
def __init__(self, example_xml, cluster_info):
def __init__(self, example_xml):
self.translations = from_container_list(example_xml.querySelectorAll("translationContainer"))
inner_xml = example_xml.querySelector("corpusExample")
@ -11,7 +12,7 @@ class Example(Editable):
self.inner = CorpusExample(inner_xml)
else:
inner_xml = example_xml.querySelector("multiwordExample")
self.inner = MultiwordExample(inner_xml, cluster_info)
self.inner = MultiwordExample(inner_xml)
all_components = [ComponentLexeme(el) for el in inner_xml.childNodes]
self.components = [comp for comp in all_components if comp.isValid()]
@ -37,6 +38,9 @@ class Example(Editable):
def set_cluster(self, cluster):
self.inner.cluster = cluster
def get_structure(self):
return self.inner.get_structure()
def is_collocation(self):
return self.get_view_type() == 2
@ -66,39 +70,32 @@ class CorpusExample:
def get_cluster(self):
return None
def get_structure(self):
return None
class MultiwordExample:
def __init__(self, example_xml, cluster_info):
def __init__(self, example_xml):
self.other_attributes = {}
for oth_attr in ["lexical_unit_id", "structure_id", "structureName", "audio", "frequency", "logDice"]:
if example_xml.hasAttribute(oth_attr):
self.other_attributes[oth_attr] = example_xml.getAttribute(oth_attr)
self.cluster = self._determine_cluster_number(example_xml, cluster_info)
self.cluster = MultiwordExample._determine_cluster_number(example_xml)
if example_xml.hasAttribute("type"):
self.type = example_xml.getAttribute("type")
else:
self.type = None
def _determine_cluster_number(self, example_xml, cluster_info):
# since cluster numbers can be fairly fragmented, this is defragmentation
# we do need to get info about sense examples to do this, that is why we have cluster_info
if cluster_info is None:
cluster_mappings, cluster_begin = {}, 0
else:
cluster_mappings, cluster_begin = cluster_info
@staticmethod
def _determine_cluster_number(example_xml):
if not example_xml.hasAttribute("cluster"):
cluster = len(cluster_mappings) + cluster_begin
cluster_mappings[cluster] = cluster
return ExampleClusters.first_empty_cluster()
else:
cluster = int(example_xml.getAttribute("cluster"))
if cluster not in cluster_mappings:
cluster_mappings[cluster] = len(cluster_mappings) + cluster_begin
return cluster_mappings[cluster]
ExampleClusters.register_cluster(cluster)
return cluster
def export(self, doc):
result = doc.createElement("multiwordExample")
@ -115,6 +112,12 @@ class MultiwordExample:
def get_cluster(self):
return self.cluster
def get_structure(self):
if "structureName" in self.other_attributes:
return self.other_attributes["structureName"]
else:
return None
class ComponentLexeme(Editable):

View File

@ -0,0 +1,65 @@
class ExampleClusters:
def __init__(self):
self.idxs = set()
self.lists = {}
@staticmethod
def get_key(sense, example):
sense_idx = sense.original_idx
structure_name = example.get_structure()
key = "{}-{}".format(sense_idx, structure_name)
return key
def _first_empty_cluster(self):
idx = 0
while True:
if idx not in self.clusters:
self.idxs.add(idx)
return idx
idx += 1
def _register_cluster(self, idx):
if idx not in self.idxs:
self.idxs.add(idx)
def _rebuild_lists(self, model):
self.lists = {}
for sense in model.entry.senses:
for example in sense.examples:
structure = example.get_structure()
key = ExampleClusters.get_key(sense, example)
if key not in self.lists:
self.lists[key] = set()
self.lists[key].add(example.get_cluster())
for key in self.lists.keys():
self.lists[key] = sorted(self.lists[key], key=lambda x: x)
def _get_list(self, sense, example):
key = ExampleClusters.get_key(sense, example)
if key in self.lists:
return self.lists[key]
else:
return []
@staticmethod
def first_empty_cluster():
return ec._first_empty_cluster()
@staticmethod
def register_cluster(idx):
return ec._register_cluster(idx)
@staticmethod
def rebuild_lists(model):
return ec._rebuild_lists(model)
@staticmethod
def get_list(sense, example):
return ec._get_list(sense, example)
ec = ExampleClusters()

View File

@ -1,6 +1,8 @@
from model.entry import Entry
from model.example import Example
from browser import window
from model.example_clusters import ExampleClusters
class Model:
def __init__(self):
@ -48,6 +50,9 @@ class Model:
self.modal = modal
self.modal_shown = True
def pre_view(self):
# this should go into "after data change" or something
ExampleClusters.rebuild_lists(self)
def import_xml(self, xml_text):
parser = __new__(DOMParser())

View File

@ -4,12 +4,9 @@ from model.editable import Editable
from model.tags import import_label_list
cluster_min = 0
class Sense(Editable):
def __init__(self, sense_xml):
global cluster_min
def __init__(self, sense_xml, idx):
self.original_idx = idx
self.definition = {}
for definition in sense_xml.querySelectorAll("definitionList definition"):
@ -20,14 +17,8 @@ class Sense(Editable):
self.translations = from_container_list(
sense_xml.querySelectorAll("translationContainerList translationContainer"))
cluster_mappings = {}
cluster_info = (cluster_mappings, cluster_min)
self.examples = [Example(example_xml, cluster_info) for example_xml in
self.examples = [Example(example_xml) for example_xml in
sense_xml.querySelectorAll("exampleContainerList exampleContainer")]
# set limit for example cluster
cluster_min = max(cluster_mappings.values()) + 1
def merge_labels(self):
return ", ".join(val for _, val in self.labels)
@ -40,9 +31,4 @@ class Sense(Editable):
result.add(cluster)
return sorted(result, key=lambda x: x)
@staticmethod
def reset_cluster_count():
global cluster_min
cluster_min = 0

View File

@ -1,4 +1,5 @@
from export import export_to_xml
from model.example_clusters import ExampleClusters
from browser import window
from lib.snabbdom import h
import message
@ -12,7 +13,7 @@ def show_toggle_cluster_buttons(sense, example):
base_tag = "input.cluster-list-button"
result = []
for opt in sense.example_clusters():
for opt in ExampleClusters.get_list(sense, example):
tag = base_tag
if opt == cls:
tag += ".cluster-button-checked"

View File

@ -19,6 +19,8 @@ class View:
def view(self, model):
self.model = model
self.model.pre_view()
new_vdom = self._view()
patch(self.vdom, new_vdom)
self.vdom = new_vdom