From 08c291b5db4522eee9fe5d3e34d5a5be7b203bd4 Mon Sep 17 00:00:00 2001 From: Cyprian Laskowski Date: Mon, 15 Mar 2021 16:00:28 +0100 Subject: [PATCH] Redmine #1487: looked for min other id in structures --- package/structure_assignment/pipeline.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/package/structure_assignment/pipeline.py b/package/structure_assignment/pipeline.py index 11a1cfc..3fff914 100644 --- a/package/structure_assignment/pipeline.py +++ b/package/structure_assignment/pipeline.py @@ -135,11 +135,22 @@ class Pipeline: wani_main(namespace) + def _find_min_other_id(self, key): + try: + root = lxml.parse(self.file_map[key]) + other_ids = [int(oid) for oid in root.xpath('syntactic_structure[@type="other"]/@id')] + min_id = min(other_ids) + except: + min_id = 109 # This is the current value in structures.xml, and is not expected to change. Ugly, but code shouldn't reach here ... + return min_id + def do_assign_multiple_first(self): - assign_multiple(self.file_map['tei-multiple'], self.file_map['mwes-1'], self.file_map['tei-multiple-ids-1']) + min_other_id = self._find_min_other_id('structures-old') + assign_multiple(self.file_map['tei-multiple'], self.file_map['mwes-1'], self.file_map['tei-multiple-ids-1'], min_other_id) def do_assign_multiple_second(self): - assign_multiple(self.file_map['tei-multiple'], self.file_map['mwes-2'], self.file_map['tei-multiple-ids-2']) + min_other_id = self._find_min_other_id('structures-new') + assign_multiple(self.file_map['tei-multiple'], self.file_map['mwes-2'], self.file_map['tei-multiple-ids-2'], min_other_id) def do_create_structures(self): input_file_name = self.file_map['structures-old'] @@ -172,5 +183,4 @@ class Pipeline: shutil.copyfile(self.file_map[file_key], file_name) def cleanup(self): - print(self.tmp_directory) shutil.rmtree(self.tmp_directory, True)