centralized tags import/export into tags.py. Also added slo2eng and back as per 1088

This commit is contained in:
Ozbolt Menegatti 2020-01-05 10:38:53 +01:00
parent 8820b5700f
commit 17153d78b6
5 changed files with 102 additions and 40 deletions

View File

@ -1,4 +1,5 @@
from browser import document from browser import document
from model.tags import export_tag
def export_to_xml(model): def export_to_xml(model):
@ -146,6 +147,8 @@ def export_translation(doc, translation):
def _export_label_list(doc, lst): def _export_label_list(doc, lst):
result = doc.createElement("labelList") result = doc.createElement("labelList")
for key, value in lst: for key, value in lst:
key, value = export_tag(key, value)
label_el = doc.createElement("label") label_el = doc.createElement("label")
label_el.textContent = value label_el.textContent = value
label_el.setAttribute('type', key) label_el.setAttribute('type', key)

View File

@ -1,6 +1,6 @@
from model.sense import Sense from model.sense import Sense
from model.editable import Editable from model.editable import Editable
from model.tags import TAGS from model.tags import import_label_list
class Entry(Editable): class Entry(Editable):
def __init__(self, entry_xml): def __init__(self, entry_xml):
@ -28,17 +28,7 @@ class Entry(Editable):
self.measure["type"] = measure.getAttribute("type") self.measure["type"] = measure.getAttribute("type")
self.measure["text"] = measure.textContent self.measure["text"] = measure.textContent
self.labels = [] self.labels = import_label_list("head labelList label", entry_xml)
for tag_xml in entry_xml.querySelectorAll("head labelList label"):
t_type = tag_xml.getAttribute("type")
t_value = tag_xml.textContent
if t_type not in TAGS:
# using some default
t_type = TAGS.keys()[0]
self.labels.append((t_type, t_value))
self.senses = [Sense(sense_xml) for sense_xml in self.senses = [Sense(sense_xml) for sense_xml in
entry_xml.querySelectorAll("body senseList sense")] entry_xml.querySelectorAll("body senseList sense")]

View File

@ -1,26 +1,15 @@
from model.example import Example from model.example import Example
from model.translation import Translation from model.translation import Translation
from model.editable import Editable from model.editable import Editable
from model.tags import TAGS from model.tags import import_label_list
class Sense(Editable): class Sense(Editable):
def __init__(self, sense_xml): def __init__(self, sense_xml):
definition = sense_xml.querySelector("definitionList definition") definition = sense_xml.querySelector("definitionList definition")
self.labels = []
for tag_xml in sense_xml.querySelectorAll("sense > labelList label"):
t_type = tag_xml.getAttribute("type")
t_value = tag_xml.textContent
if t_type not in TAGS:
# using some default
t_type = TAGS.keys()[0]
self.labels.append((t_type, t_value))
self.definition = definition.textContent if definition else "" self.definition = definition.textContent if definition else ""
self.labels = import_label_list("sense > labelList label", sense_xml)
self.examples = [Example(example_xml) for example_xml in self.examples = [Example(example_xml) for example_xml in
sense_xml.querySelectorAll("exampleContainerList exampleContainer")] sense_xml.querySelectorAll("exampleContainerList exampleContainer")]

View File

@ -5,3 +5,87 @@ TAGS = {
"slovnične": ["v pridevniški rabi", "v primerniku", "v presežniku", "v samostalniški rabi", "v ednini", "v dvojini", "v množini", "števno", "neštevno", "v 3. osebi", "v velelniku", "z veliko začetnico"], "slovnične": ["v pridevniški rabi", "v primerniku", "v presežniku", "v samostalniški rabi", "v ednini", "v dvojini", "v množini", "števno", "neštevno", "v 3. osebi", "v velelniku", "z veliko začetnico"],
"skupne": ["tudi", "predvsem", "običajno", "včasih", "redko"] "skupne": ["tudi", "predvsem", "običajno", "včasih", "redko"]
} }
SLO2ENG_TAGS = {
"podrocje": "domain",
"stilne-časovne": "style-time",
"stilne-registrske": "style-register",
"stilne-konotacijske": "style-connotation",
"stilne-pragmatične": "style-pragmatics",
"slovnične": "grammar",
"skupne": "general"
}
ENG2SLO_TAGS = { value: key for key, value in SLO2ENG_TAGS.items() }
STYLE_SECTIONS = [("registrske", "register"),
("konotacijske", "connotation"),
("pragmatične", "pragmatics"),
("časovne", "time")]
def import_label_list(selector, xml_element):
result = []
for tag_xml in xml_element.querySelectorAll(selector):
t_type = tag_xml.getAttribute("type")
t_value = tag_xml.textContent
t_type, t_value = import_tag(t_type, t_value)
result.append((t_type, t_value))
return result
def import_tag(key, value):
# converts tag from input xml to what we need in the editor
# if slovene, than some old values apparantly, just import as is
# simmilar for "stilne"
# if english, than translate and change style-xxx for stilne
# for value, just handle if some bad "-- xxx" stuff is in xml
if key in SLO2ENG_TAGS:
pass
elif key in ("stilne", "style"):
key = "stilne"
elif key in ENG2SLO_TAGS:
key = ENG2SLO_TAGS[key]
# handle stilne-xxx stuff
if "-" in key:
key = key.split("-")[0]
else:
window.console.log("Uknown tag :(", key, value)
# using some default
key = TAGS.keys()[0]
# this should not happen, but maybe there was a bug...
value = value.replace("--", "").strip()
for tag_key in TAGS.keys():
for possible_value in TAGS[tag_key]:
if value == possible_value or "-- " + value == possible_value:
return key, possible_value
# not found, must be manually inputted
return key, value
def export_tag(key, value):
if key == "stilne":
arr = TAGS["stilne"]
key = "style"
value_idx = arr.index(value)
if value_idx >= 0:
for sec_slo, sec_eng in reversed(STYLE_SECTIONS):
idx = arr.index(sec_slo)
if idx < value_idx:
key = "style-" + sec_eng
break
else:
key = SLO2ENG_TAGS[key]
value = value.replace("--", "").strip()
return key, value

View File

@ -1,25 +1,21 @@
from model.tags import TAGS from model.tags import import_label_list
from model.editable import Editable from model.editable import Editable
class Translation(Editable): class Translation(Editable):
def __init__(self, translation_xml): def __init__(self, translation_xml):
translation = translation_xml.querySelector("translation") translation = translation_xml.querySelector("translation")
self.translation = translation.textContent
self.source = translation.getAttribute("source") if translation.hasAttribute("source") else "" if translation:
self.targetLang = translation.getAttribute("targetLang") if translation.hasAttribute("targetLang") else "" self.translation = translation.textContent
self.source = translation.getAttribute("source") if translation.hasAttribute("source") else ""
self.targetLang = translation.getAttribute("targetLang") if translation.hasAttribute("targetLang") else ""
else:
self.translation = ""
self.source = ""
self.targetLang = ""
explanation = translation_xml.querySelector("explanation") explanation = translation_xml.querySelector("explanation")
self.explanation = explanation.textContent if explanation else "" self.explanation = explanation.textContent if explanation else ""
self.tags = import_label_list("labelList label", translation_xml)
self.tags = []
for tag_xml in translation_xml.querySelectorAll("labelList label"):
t_type = tag_xml.getAttribute("type")
t_value = tag_xml.textContent
if t_type not in TAGS:
console.log("Bad tag: ({})->({})".format(t_type, t_value))
continue
self.tags.append((t_type, t_value))