You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

39 lines
1.5 KiB

# scripts
MWE_EXTRACTION_SCRIPT_NAME = 'wani.py'
STRUCTURE_ASSIGNMENT_SCRIPT_NAME = 'assign_structures.py'
STRUCTURE_CREATION_SCRIPT_NAME = 'create_structures.py'
DICTIONARY_MERGE_SCRIPT_NAME = 'merge_dictionaries.py'
# resources
TRANSLATION_FILE_NAME = '../resources/dict.xml'
STRUCTURE_SCHEMA_FILE_NAME = '../resources/structures.xsd'
DICTIONARY_SCHEMA_FILE_NAME = '../resources/monolingual_dictionaries.xsd'
# temporary outputs
FILE_MAP = {'strings-list': 'strings.txt',
'obeliks-tokenised': 'obeliks_raw.conllu',
'obeliks-tweaked': 'obeliks_tweaked.conllu',
'classla-parsed': 'classla_raw.conllu',
'classla-translated': 'classla_translated.conllu',
'tei-initial': 'tei_initial.xml',
'tei-single': 'tei_single.xml',
'tei-single-ids': 'tei_single_with_ids.xml',
'tei-multiple': 'tei_multiple.xml',
'tei-multiple-ids-1': 'tei_multiple_with_ids1.xml',
'tei-multiple-ids-2': 'tei_multiple_with_ids2.xml',
'mwes-1': 'mwes1.csv',
'mwes-2': 'mwes2.csv',
'structures-old': 'structures.xml',
'structures-new': 'structures_new.xml',
'dictionary-single': 'dictionary_single.xml',
'dictionary-multiple': 'dictionary_multiple.xml',
'dictionary': 'dictionary.xml'
}
NLP_CONFIG_MAP = {
'type': 'standard_jos',
'processors': 'tokenize,pos,lemma,depparse',
'tokenize_pretokenized': True,
'pos_use_lexicon': True,
}