Redmine #1835: added back schema validation
This commit is contained in:
parent
f5d4a009ea
commit
f7b9aaf210
|
@ -1,20 +1,10 @@
|
|||
# scripts
|
||||
MWE_EXTRACTION_SCRIPT_NAME = 'wani.py'
|
||||
STRUCTURE_ASSIGNMENT_SCRIPT_NAME = 'assign_structures.py'
|
||||
STRUCTURE_CREATION_SCRIPT_NAME = 'create_structures.py'
|
||||
DICTIONARY_MERGE_SCRIPT_NAME = 'merge_dictionaries.py'
|
||||
|
||||
# resources
|
||||
TRANSLATION_FILE_NAME = '../resources/dict.xml'
|
||||
STRUCTURE_SCHEMA_FILE_NAME = '../resources/structures.xsd'
|
||||
DICTIONARY_SCHEMA_FILE_NAME = '../resources/monolingual_dictionaries.xsd'
|
||||
|
||||
# temporary outputs
|
||||
FILE_MAP = {'strings-list': 'strings.txt',
|
||||
'obeliks-tokenised': 'obeliks_raw.conllu',
|
||||
'obeliks-tweaked': 'obeliks_tweaked.conllu',
|
||||
'classla-parsed': 'classla_raw.conllu',
|
||||
'classla-translated': 'classla_translated.conllu',
|
||||
'dict': 'dict.xml',
|
||||
'structure-schema': 'structures.xsd',
|
||||
'tei-initial': 'tei_initial.xml',
|
||||
'tei-single': 'tei_single.xml',
|
||||
'tei-single-ids': 'tei_single_with_ids.xml',
|
||||
|
@ -27,7 +17,8 @@ FILE_MAP = {'strings-list': 'strings.txt',
|
|||
'structures-new': 'structures_new.xml',
|
||||
'dictionary-single': 'dictionary_single.xml',
|
||||
'dictionary-multiple': 'dictionary_multiple.xml',
|
||||
'dictionary': 'dictionary.xml'
|
||||
'dictionary': 'dictionary.xml',
|
||||
'dictionary-schema': 'monolingual_dictionaries.xsd'
|
||||
}
|
||||
|
||||
NLP_CONFIG_MAP = {
|
||||
|
|
|
@ -2,6 +2,7 @@ import os
|
|||
import shutil
|
||||
import tempfile
|
||||
from types import SimpleNamespace
|
||||
import lxml.etree as lxml
|
||||
|
||||
import obeliks
|
||||
|
||||
|
@ -152,8 +153,24 @@ class Pipeline:
|
|||
output_file_name = self.file_map['dictionary']
|
||||
merge_dictionaries(single_file_name, multiple_file_name, output_file_name)
|
||||
|
||||
def _do_validate(self, schema_file_name, xml_file_name):
|
||||
xml_schema = lxml.XMLSchema(lxml.parse(schema_file_name))
|
||||
xml_tree = lxml.parse(xml_file_name)
|
||||
xml_schema.assertValid(xml_tree)
|
||||
|
||||
def do_validate_structures(self):
|
||||
schema_file_name = self.file_map['structure-schema']
|
||||
xml_file_name = self.file_map['structures-new']
|
||||
self._do_validate(schema_file_name, xml_file_name)
|
||||
|
||||
def do_validate_dictionary(self):
|
||||
schema_file_name = self.file_map['dictionary-schema']
|
||||
xml_file_name = self.file_map['dictionary']
|
||||
self._do_validate(schema_file_name, xml_file_name)
|
||||
|
||||
def export_file(self, file_name, file_key):
|
||||
shutil.copyfile(self.file_map[file_key], file_name)
|
||||
|
||||
def cleanup(self):
|
||||
print(self.tmp_directory)
|
||||
shutil.rmtree(self.tmp_directory, True)
|
||||
|
|
Loading…
Reference in New Issue
Block a user