From 8489bf826468e23d731202455033ee068dc7a33b Mon Sep 17 00:00:00 2001 From: Luka Date: Tue, 1 Mar 2022 09:55:53 +0100 Subject: [PATCH] Added complete.xml file --- src/create_tei.py | 23 +++++++++++++++++++---- svala2tei.py | 7 ++++++- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/create_tei.py b/src/create_tei.py index f0cc260..d184d48 100644 --- a/src/create_tei.py +++ b/src/create_tei.py @@ -121,11 +121,25 @@ def build_tei_etrees(documents): elements.append(document.as_xml()) return elements +def build_complete_tei(etree_source, etree_target, etree_links): + root = etree.Element('text') + group = etree.Element('group') + group.append(list(etree_source[0])[1]) + group.append(list(etree_target[0])[1]) + # link_text = etree.Element('text') + # link_body = etree.Element('body') + # link_body.append(etree_links) + # link_text.append(link_body) + group.append(etree_links) + root.append(group) + + return root def build_links(all_edges): - root = etree.Element('TEI') - root.set('xmlns', 'http://www.tei-c.org/ns/1.0') - set_xml_attr(root, 'lang', 'sl') + root = etree.Element('text') + body = etree.Element('body') + # root.set('xmlns', 'http://www.tei-c.org/ns/1.0') + # set_xml_attr(root, 'lang', 'sl') # elements = [] for document_edges in all_edges: @@ -148,7 +162,8 @@ def build_links(all_edges): set_xml_attr(p, 'paragraph_id', '.'.join(random_id.split('.')[:2])) d.append(p) set_xml_attr(d, 'document_id', random_id.split('.')[0]) - root.append(d) + body.append(d) + root.append(body) return root diff --git a/svala2tei.py b/svala2tei.py index ca5c556..1c38d24 100644 --- a/svala2tei.py +++ b/svala2tei.py @@ -9,7 +9,7 @@ from xml.etree import ElementTree from lxml import etree from src.create_tei import construct_tei_etrees, construct_tei_documents_from_list, construct_sentence_from_list, \ - construct_paragraph_from_list, TeiDocument, build_tei_etrees, build_links + construct_paragraph_from_list, TeiDocument, build_tei_etrees, build_links, build_complete_tei logging.basicConfig(level=logging.INFO) @@ -245,6 +245,8 @@ def process_file(et, args): # TODO FIX THIS etree_links = build_links(document_edges) + complete_etree = build_complete_tei(etree_source, etree_target, etree_links) + with open(os.path.join(args.results_folder, f"source.xml"), 'w') as sf: sf.write(etree.tostring(etree_source[0], pretty_print=True, encoding='utf-8').decode()) @@ -254,6 +256,9 @@ def process_file(et, args): with open(os.path.join(args.results_folder, f"links.xml"), 'w') as tf: tf.write(etree.tostring(etree_links, pretty_print=True, encoding='utf-8').decode()) + with open(os.path.join(args.results_folder, f"complete.xml"), 'w') as tf: + tf.write(etree.tostring(complete_etree, pretty_print=True, encoding='utf-8').decode()) + with open(os.path.join(args.results_folder, f"links.json"), 'w') as jf: json.dump(document_edges, jf, ensure_ascii=False, indent=" ")