From bd4e64f818821c12b95efd5a9673b0c101e368e5 Mon Sep 17 00:00:00 2001 From: matic_t Date: Tue, 29 Sep 2020 04:07:28 -0700 Subject: [PATCH] lexicalUnit from single lexeme to multiple lexeme support --- src/export.py | 16 +++++++++------- src/model/entry.py | 17 ++++++++++++----- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/export.py b/src/export.py index 7ffdafc..b296e23 100644 --- a/src/export.py +++ b/src/export.py @@ -46,15 +46,17 @@ def export_entry(entry): # if({}) works uncorrectly in transcrypt - if len(entry.lexical_unit) > 0: + if len(entry.lexical_unit) > 0 and len(entry.lexical_unit['lexemes']) > 0: lexunit = doc.createElement("lexicalUnit") lexunit.setAttribute("id", entry.lexical_unit["id"]) - lexunit.setAttribute("type", "single") - lexeme = doc.createElement("lexeme") - lexeme.setAttribute("lexical_unit_lexeme_id", entry.lexical_unit["id"]) - lexeme.textContent = entry.lexical_unit["text"] - - lexunit.appendChild(lexeme) + lexunit.setAttribute("type", entry.lexical_unit['type']) + for lexeme in entry.lexical_unit["lexemes"]: + component = doc.createElement('component') + lexeme_xml = doc.createElement("lexeme") + component.appendChild(lexeme_xml) + lexeme_xml.setAttribute("lexical_unit_lexeme_id", lexeme["id"]) + lexeme_xml.textContent = lexeme["text"] + lexunit.appendChild(component) head.appendChild(lexunit) diff --git a/src/model/entry.py b/src/model/entry.py index 5e60489..c3c6fa9 100644 --- a/src/model/entry.py +++ b/src/model/entry.py @@ -42,11 +42,18 @@ class Entry(Data): self.variants = [v.textContent for v in entry_xml.querySelectorAll("head variantList variant")] self.homonymy = [{"value": v.textContent, "name": v.getAttribute("name")} for v in entry_xml.querySelectorAll("head headword homonymy homonymyFeature ")] self.related_entries = [re.textContent for re in entry_xml.querySelectorAll("head relatedEntryList relatedEntry")] - - lex_unit = entry_xml.querySelector("lexical_unit lexeme,lexicalUnit lexeme") - if lex_unit: - self.lexical_unit['id'] = lex_unit.getAttribute("lexical_unit_lexeme_id") - self.lexical_unit['text'] = lex_unit.textContent + lex_units = entry_xml.querySelectorAll("lexical_unit lexeme,lexicalUnit lexeme") + lex_unit_parent = entry_xml.querySelector("lexicalUnit") + self.lexical_unit['lexemes'] = [] + self.lexical_unit['id'] = lex_unit_parent.getAttribute('id') if lex_unit_parent and lex_unit_parent.hasAttribute( + "id") else None + self.lexical_unit['type'] = lex_unit_parent.getAttribute("type") if lex_unit_parent and lex_unit_parent.hasAttribute( + "type") else "single" + for unit in lex_units: + lexical_unit = {} + lexical_unit['id'] = unit.getAttribute("lexical_unit_lexeme_id") + lexical_unit['text'] = unit.textContent + self.lexical_unit['lexemes'].append(lexical_unit) measure = entry_xml.querySelector("measureList measure") if measure: