lexonomy_custom_editor/src/export.py

340 lines
12 KiB
Python
Raw Permalink Normal View History

2019-11-11 22:04:45 +00:00
from browser import document
from model.tags import export_tag
2020-10-05 10:41:07 +00:00
from model.translation import Translation
2019-11-11 22:04:45 +00:00
def export_to_xml(model):
xml_document = export_entry(model.entry)
serializer = __new__(XMLSerializer())
2020-08-10 15:38:08 +00:00
return serializer.serializeToString(xml_document)
2019-11-11 22:04:45 +00:00
def export_entry(entry):
parser = __new__(DOMParser())
doc = parser.parseFromString("<entry />", "text/xml")
entry_xml = doc.firstChild
2019-11-11 22:04:45 +00:00
# create head
head = doc.createElement("head")
entry_xml.appendChild(head)
2019-11-11 22:04:45 +00:00
status = doc.createElement("status")
status.textContent = entry.status
head.appendChild(status)
2020-07-15 11:17:58 +00:00
2019-11-11 22:04:45 +00:00
headword = doc.createElement("headword")
headword_lemma = doc.createElement("lemma")
2020-08-11 14:41:47 +00:00
# headword_lemma = entry.original_xml.querySelector("head headword lemma")
2019-11-11 22:04:45 +00:00
headword_lemma.textContent = entry.headword
if entry.headword_type is not None:
headword_lemma.setAttribute("type", entry.headword_type)
2020-08-06 12:39:24 +00:00
if entry.headword_audio is not None:
headword_lemma.setAttribute("audio", entry.headword_audio)
2019-11-11 22:04:45 +00:00
headword.appendChild(headword_lemma)
head.appendChild(headword)
2020-07-15 11:17:58 +00:00
2020-07-16 08:31:16 +00:00
homonymy = doc.createElement("homonymy")
headword.appendChild(homonymy)
2020-07-15 11:17:58 +00:00
2020-07-16 10:46:48 +00:00
for hFeature in entry.homonymy:
2020-07-16 08:31:16 +00:00
feature = doc.createElement("homonymyFeature")
2020-07-16 10:46:48 +00:00
feature.textContent = hFeature.value
# Can't use hFeature.name, because Python has name reserver and so it becomes py_name in JS
feature.setAttribute("name", hFeature["name"])
2020-07-16 08:31:16 +00:00
homonymy.appendChild(feature)
2020-07-15 11:17:58 +00:00
# if({}) works uncorrectly in transcrypt
if len(entry.lexical_unit) > 0 and len(entry.lexical_unit['lexemes']) > 0:
lexunit = doc.createElement("lexicalUnit")
if(entry.lexical_unit["id"]):
lexunit.setAttribute("id", entry.lexical_unit["id"])
lexunit.setAttribute("type", entry.lexical_unit['type'])
for lexeme in entry.lexical_unit["lexemes"]:
lexeme_xml = doc.createElement("lexeme")
if(lexeme["id"]):
lexeme_xml.setAttribute("lexical_unit_lexeme_id", lexeme["id"])
lexeme_xml.textContent = lexeme["text"]
if len(entry.lexical_unit["lexemes"]) > 1:
2020-10-07 11:40:46 +00:00
component = doc.createElement('component')
component.appendChild(lexeme_xml)
lexunit.appendChild(component)
else:
lexunit.appendChild(lexeme_xml)
head.appendChild(lexunit)
2020-07-15 11:17:58 +00:00
2020-08-11 14:41:47 +00:00
2020-08-27 07:20:58 +00:00
grammar = doc.createElement("grammar")
grammar_category = doc.createElement("category")
2019-11-11 22:04:45 +00:00
grammar_category.textContent = entry.grammar
2020-08-27 07:20:58 +00:00
grammar.appendChild(grammar_category)
head.appendChild(grammar)
2020-08-11 14:41:47 +00:00
2020-07-15 11:17:58 +00:00
if len(entry.measure) > 0:
measure_list = doc.createElement("measureList")
measure = doc.createElement("measure")
measure.setAttribute("source", entry.measure["source"])
measure.setAttribute("type", entry.measure["type"])
measure.textContent = entry.measure["text"]
2020-07-15 11:17:58 +00:00
measure_list.appendChild(measure)
head.appendChild(measure_list)
2020-07-15 11:17:58 +00:00
variants = doc.createElement("variantList")
head.appendChild(variants)
2020-07-15 11:17:58 +00:00
for v in entry.variants:
variant = doc.createElement("variant")
variant.textContent = v
variants.appendChild(variant)
2020-07-15 11:17:58 +00:00
relist = doc.createElement("relatedEntryList")
head.appendChild(relist)
2020-07-15 11:17:58 +00:00
for re in entry.related_entries:
relateEntry = doc.createElement("relatedEntry")
relateEntry.textContent = re
relist.appendChild(relateEntry)
2020-07-15 11:17:58 +00:00
head.appendChild(_export_label_list(doc, entry.labels))
2020-07-15 11:17:58 +00:00
comment = doc.createElement("comment")
comment.textContent = entry.comment
head.appendChild(comment)
2020-07-15 11:17:58 +00:00
2019-11-11 22:04:45 +00:00
# now lets do body
body = doc.createElement("body")
entry_xml.appendChild(body)
2020-07-15 11:17:58 +00:00
2019-11-13 22:16:28 +00:00
sense_list = doc.createElement("senseList")
2019-11-11 22:04:45 +00:00
body.appendChild(sense_list)
2020-07-15 11:17:58 +00:00
2019-11-11 22:04:45 +00:00
for sense in entry.senses:
sense_list.appendChild(export_sense(doc, sense))
2020-07-15 11:17:58 +00:00
2019-11-11 22:04:45 +00:00
return doc
2020-07-15 11:17:58 +00:00
2019-11-11 22:04:45 +00:00
def export_sense(doc, sense):
sense_xml = doc.createElement("sense")
sense_xml.appendChild(_export_label_list(doc, sense.labels))
2020-07-15 11:17:58 +00:00
2020-10-12 08:27:36 +00:00
if sense.id is not None:
sense_xml.setAttribute("id", sense.id)
2019-11-11 22:04:45 +00:00
definition_list = doc.createElement("definitionList")
sense_xml.appendChild(definition_list)
2020-07-15 11:17:58 +00:00
2020-10-30 12:01:58 +00:00
for definition in sense.definitions:
definition_xml = doc.createElement("definition")
definition_xml.textContent = definition["value"]
definition_xml.setAttribute("type", definition["type"])
definition_list.appendChild(definition_xml)
2020-07-15 11:17:58 +00:00
2019-11-11 22:04:45 +00:00
translation_container_list = doc.createElement("translationContainerList")
export_translation_list(doc, sense, translation_container_list)
2019-11-11 22:04:45 +00:00
sense_xml.appendChild(translation_container_list)
2020-07-15 11:17:58 +00:00
2019-11-13 22:16:28 +00:00
example_container_list = doc.createElement("exampleContainerList")
sense_xml.appendChild(example_container_list)
2020-07-15 11:17:58 +00:00
2019-11-13 22:16:28 +00:00
for example in sense.examples:
example_container = example.export(doc)
translation_container_list = doc.createElement("translationContainerList")
export_translation_list(doc, example, translation_container_list)
example_container.appendChild(translation_container_list)
example_container_list.appendChild(example_container)
2020-07-15 11:17:58 +00:00
2019-11-11 22:04:45 +00:00
return sense_xml
def export_translation_list(doc, py_parent, xml_parent):
for cidx, cluster in enumerate(py_parent.translations):
2020-10-05 10:41:07 +00:00
if len(cluster) > 0:
for translation in cluster:
translation_container = export_translation(doc, translation)
translation_container.setAttribute("cluster", str(cidx + 1))
xml_parent.appendChild(translation_container)
2020-07-15 11:17:58 +00:00
2019-11-11 22:04:45 +00:00
def export_translation(doc, translation):
translation_xml = doc.createElement("translationContainer")
translation_xml.appendChild(_export_label_list(doc, translation.tags))
2020-07-15 11:17:58 +00:00
2019-11-11 22:04:45 +00:00
actual_t = doc.createElement("translation")
actual_t.textContent = translation.translation
actual_t.setAttribute("targetLang", translation.targetLang)
2020-07-15 11:17:58 +00:00
2020-08-06 12:39:24 +00:00
if translation.audio:
actual_t.setAttribute("audio", translation.audio)
if translation.source:
actual_t.setAttribute("source", translation.source)
2019-11-11 22:04:45 +00:00
translation_xml.appendChild(actual_t)
2020-07-15 11:17:58 +00:00
2020-08-10 15:38:08 +00:00
if len(translation.explanationList) > 0 :
explanationList = _export_explanation_list(doc, translation.explanationList)
translation_xml.appendChild(explanationList)
2020-07-23 12:40:39 +00:00
2020-07-15 11:17:58 +00:00
2019-11-11 22:04:45 +00:00
return translation_xml
2020-07-15 11:17:58 +00:00
2020-08-10 15:38:08 +00:00
def _export_explanation_list(doc, lst):
result = doc.createElement('explanationList')
for explanation in lst:
result.appendChild(explanation.export(doc))
return result
def _export_label_list(doc, lst):
result = doc.createElement("labelList")
for key, value in lst:
key, value = export_tag(key, value)
2020-07-15 11:17:58 +00:00
label_el = doc.createElement("label")
label_el.textContent = value
label_el.setAttribute('type', key)
result.appendChild(label_el)
return result
2020-09-29 11:07:05 +00:00
def _original_xml_query_selector(selector, entry, doc, parent_selector = selector.rsplit(' ', 1)[0]):
query = entry.original_xml.querySelector(selector)
if query is None:
query = doc.createElement(selector.rsplit(' ', 1)[1])
entry.original_xml.querySelector(parent_selector).appendChild(query)
return query
2020-10-05 13:54:01 +00:00
def export_example_to_entry_xml(example, other_examples = None):
2020-09-29 11:07:05 +00:00
parser = __new__(DOMParser())
doc = parser.parseFromString("<entry />", "text/xml")
entry_xml = doc.firstChild
head = doc.createElement("head")
entry_xml.appendChild(head)
status = doc.createElement("status")
head.appendChild(status)
headword = doc.createElement("headword")
head.appendChild(headword)
lemma = doc.createElement("lemma")
lemma.textContent = " ".join(comp.text for comp in example.components)
lemma.setAttribute("type", "compound")
2020-10-07 11:40:46 +00:00
if example.inner.other_attributes['audio'] is not None:
lemma.setAttribute('audio', example.inner.other_attributes['audio'])
2020-09-29 11:07:05 +00:00
headword.appendChild(lemma)
homonymy = doc.createElement("homonymy")
headword.appendChild(homonymy)
lexical_unit = doc.createElement("lexicalUnit")
lexical_unit.setAttribute("type", "MWE")
head.appendChild(lexical_unit)
2020-10-05 10:41:07 +00:00
if example.inner.other_attributes['structure_id'] != None and len(example.components) <= 3:
lexical_unit.setAttribute("id", example.inner.other_attributes['structure_id'])
2020-09-29 11:07:05 +00:00
for comp in example.components:
lexeme = doc.createElement("lexeme")
lexeme.textContent = comp.text
2020-10-07 11:40:46 +00:00
comp_xml = doc.createElement("component")
2020-09-29 11:07:05 +00:00
lexical_unit.appendChild(comp_xml)
2020-10-07 11:40:46 +00:00
comp_xml.appendChild(lexeme)
2020-09-29 11:07:05 +00:00
grammar = doc.createElement("grammar")
category = doc.createElement("category")
grammar.appendChild(category)
head.appendChild(grammar)
variant_list = doc.createElement("variantList")
head.appendChild(variant_list)
related_entry_list = doc.createElement("relatedEntryList")
head.appendChild(related_entry_list)
label_list = doc.createElement("labelList")
head.appendChild(label_list)
comment = doc.createElement("comment")
head.appendChild(comment)
body = doc.createElement("body")
entry_xml.appendChild(body)
sense_list = doc.createElement("senseList")
body.appendChild(sense_list)
sense = doc.createElement("sense")
sense_list.appendChild(sense)
sense_label_list = doc.createElement("labelList")
sense.appendChild(sense_label_list)
2020-10-05 10:41:07 +00:00
first_translation = example.translations[0][0] if len(example.translations) > 0 and len(example.translations[0]) > 0 else Translation()
first_translation_is_valid = False
2020-09-29 11:07:05 +00:00
translation_label_list = doc.createElement("labelList")
# Add labels to sense if label value isn't kontrastivno or približek else keep them in translation
for key, value in first_translation.tags:
key, value = export_tag(key, value)
label_el = doc.createElement("label")
label_list = translation_label_list if value == "kontrastivno" or value == "približek" else sense_label_list
label_el.textContent = value
label_el.setAttribute('type', key)
label_list.appendChild(label_el)
# Set definition as explanation if explanation in slo
definition_list = doc.createElement("definitionList")
sense.appendChild(definition_list)
for explanation in first_translation.explanationList:
if explanation.language == "slo":
definition = doc.createElement("definition")
definition.setAttribute("type", "indicator")
definition.textContent = explanation.value
definition_list.appendChild(definition)
first_translation.explanationList.remove(explanation)
translation_container_list = doc.createElement("translationContainerList")
sense.appendChild(translation_container_list)
translation_container = doc.createElement("translationContainer")
if len(translation_label_list) > 0:
translation_container.appendChild(translation_label_list)
2020-10-05 10:41:07 +00:00
if first_translation.translation is not "":
translation = doc.createElement("translation")
translation_container.appendChild(translation)
2020-09-29 11:07:05 +00:00
2020-10-05 10:41:07 +00:00
translation.textContent = first_translation.translation
translation.setAttribute("targetLang", first_translation.targetLang)
2020-09-29 11:07:05 +00:00
2020-10-05 10:41:07 +00:00
if first_translation.audio:
translation.setAttribute("audio", first_translation.audio)
2020-09-29 11:07:05 +00:00
2020-10-05 10:41:07 +00:00
if first_translation.source:
translation.setAttribute("source", first_translation.source)
first_translation_is_valid = True
2020-09-29 11:07:05 +00:00
if len(first_translation.explanationList) > 0 :
2020-10-05 10:41:07 +00:00
explanation_list = _export_explanation_list(doc, first_translation.explanationList)
translation_container.appendChild(explanation_list)
first_translation_is_valid = True
2020-09-29 11:07:05 +00:00
2020-10-05 10:41:07 +00:00
if first_translation_is_valid:
translation_container_list.appendChild(translation_container)
2020-09-29 11:07:05 +00:00
2020-10-05 10:41:07 +00:00
example.translations[0] = example.translations[0][1:] if len(example.translations) > 0 and len(example.translations[0]) > 0 else example.translations[0]
2020-09-29 11:07:05 +00:00
export_translation_list(doc, example, translation_container_list)
2020-10-05 13:54:01 +00:00
if other_examples is not None:
example_container_list = doc.createElement("exampleContainerList")
sense.appendChild(example_container_list)
for example in other_examples:
example_container = example.export(doc)
translation_container_list = doc.createElement("translationContainerList")
export_translation_list(doc, example, translation_container_list)
example_container.appendChild(translation_container_list)
example_container_list.appendChild(example_container)
2020-09-29 11:07:05 +00:00
2020-10-05 10:41:07 +00:00
return doc