Added complete.xml file

This commit is contained in:
Luka 2022-03-01 09:55:53 +01:00
parent 95b71cd6a3
commit 8489bf8264
2 changed files with 25 additions and 5 deletions

View File

@ -121,11 +121,25 @@ def build_tei_etrees(documents):
elements.append(document.as_xml()) elements.append(document.as_xml())
return elements return elements
def build_complete_tei(etree_source, etree_target, etree_links):
root = etree.Element('text')
group = etree.Element('group')
group.append(list(etree_source[0])[1])
group.append(list(etree_target[0])[1])
# link_text = etree.Element('text')
# link_body = etree.Element('body')
# link_body.append(etree_links)
# link_text.append(link_body)
group.append(etree_links)
root.append(group)
return root
def build_links(all_edges): def build_links(all_edges):
root = etree.Element('TEI') root = etree.Element('text')
root.set('xmlns', 'http://www.tei-c.org/ns/1.0') body = etree.Element('body')
set_xml_attr(root, 'lang', 'sl') # root.set('xmlns', 'http://www.tei-c.org/ns/1.0')
# set_xml_attr(root, 'lang', 'sl')
# elements = [] # elements = []
for document_edges in all_edges: for document_edges in all_edges:
@ -148,7 +162,8 @@ def build_links(all_edges):
set_xml_attr(p, 'paragraph_id', '.'.join(random_id.split('.')[:2])) set_xml_attr(p, 'paragraph_id', '.'.join(random_id.split('.')[:2]))
d.append(p) d.append(p)
set_xml_attr(d, 'document_id', random_id.split('.')[0]) set_xml_attr(d, 'document_id', random_id.split('.')[0])
root.append(d) body.append(d)
root.append(body)
return root return root

View File

@ -9,7 +9,7 @@ from xml.etree import ElementTree
from lxml import etree from lxml import etree
from src.create_tei import construct_tei_etrees, construct_tei_documents_from_list, construct_sentence_from_list, \ from src.create_tei import construct_tei_etrees, construct_tei_documents_from_list, construct_sentence_from_list, \
construct_paragraph_from_list, TeiDocument, build_tei_etrees, build_links construct_paragraph_from_list, TeiDocument, build_tei_etrees, build_links, build_complete_tei
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -245,6 +245,8 @@ def process_file(et, args):
# TODO FIX THIS # TODO FIX THIS
etree_links = build_links(document_edges) etree_links = build_links(document_edges)
complete_etree = build_complete_tei(etree_source, etree_target, etree_links)
with open(os.path.join(args.results_folder, f"source.xml"), 'w') as sf: with open(os.path.join(args.results_folder, f"source.xml"), 'w') as sf:
sf.write(etree.tostring(etree_source[0], pretty_print=True, encoding='utf-8').decode()) sf.write(etree.tostring(etree_source[0], pretty_print=True, encoding='utf-8').decode())
@ -254,6 +256,9 @@ def process_file(et, args):
with open(os.path.join(args.results_folder, f"links.xml"), 'w') as tf: with open(os.path.join(args.results_folder, f"links.xml"), 'w') as tf:
tf.write(etree.tostring(etree_links, pretty_print=True, encoding='utf-8').decode()) tf.write(etree.tostring(etree_links, pretty_print=True, encoding='utf-8').decode())
with open(os.path.join(args.results_folder, f"complete.xml"), 'w') as tf:
tf.write(etree.tostring(complete_etree, pretty_print=True, encoding='utf-8').decode())
with open(os.path.join(args.results_folder, f"links.json"), 'w') as jf: with open(os.path.join(args.results_folder, f"links.json"), 'w') as jf:
json.dump(document_edges, jf, ensure_ascii=False, indent=" ") json.dump(document_edges, jf, ensure_ascii=False, indent=" ")