diff --git a/scripts/create_xml.py b/scripts/create_xml.py index 4ab72a7..4c79b41 100644 --- a/scripts/create_xml.py +++ b/scripts/create_xml.py @@ -382,7 +382,7 @@ def create_sentence_output(sentence, headword_id, corpus, sent_id): headword_id = str(headword_id) parent_node = etree.Element('corpusExample') parent_node.set('corpusName', corpus) - parent_node.set('id', sent_id) + parent_node.set('exampleId', sent_id) # parent_node.text = 'AAA' # parent_node.prefix = 'BBB' # parent_node.tail = 'CCC' @@ -911,10 +911,10 @@ def write_xml(headword_category, collection_ssj, collection_gigafida, RF, mongo, # .filter(Feature.name == 'aspect').all() else: frequency = None - lexeme_id = 0 - lexical_unit_id = 0 - lexical_unit_lexeme_id = 0 - lexical_unit_type_name = '' + lexeme_id = None + lexical_unit_id = None + lexical_unit_lexeme_id = None + lexical_unit_type_name = None sense_ids = [] features = [] @@ -928,11 +928,14 @@ def write_xml(headword_category, collection_ssj, collection_gigafida, RF, mongo, lemma.text = headword_text lexical_unit = lxml.SubElement(head, 'lexicalUnit') - lexical_unit.set('id', str(lexical_unit_id)) - lexical_unit_type_name = 'single' if lexical_unit_type_name == 'single_lexeme_unit' else lexical_unit_type_name - lexical_unit.set('type', lexical_unit_type_name) + if lexical_unit_id is not None: + lexical_unit.set('id', str(lexical_unit_id)) + if lexical_unit_type_name is not None: + lexical_unit_type_name = 'single' if lexical_unit_type_name == 'single_lexeme_unit' else lexical_unit_type_name + lexical_unit.set('type', lexical_unit_type_name) lexeme = lxml.SubElement(lexical_unit, 'lexeme') - lexeme.set('lexical_unit_lexeme_id', str(lexical_unit_lexeme_id)) + if lexical_unit_lexeme_id is not None: + lexeme.set('lexical_unit_lexeme_id', str(lexical_unit_lexeme_id)) lexeme.text = headword_text grammar = lxml.SubElement(head, 'grammar') @@ -947,8 +950,6 @@ def write_xml(headword_category, collection_ssj, collection_gigafida, RF, mongo, ssj_frequency = ssj_frequency_dict[(headword_text, features[0].value)] if (headword_text, features[0].value) in ssj_frequency_dict else None if args.language == 'sl': grammarFeature.set('name', 'vid') - if len(features) > 1: - print(features) grammarFeature.text = ASPECT_MAP[features[0].value] else: grammarFeature.set('name', 'aspect')