diff --git a/conversion_utils/jos_msds_and_properties.py b/conversion_utils/jos_msds_and_properties.py index b019535..20c61ff 100644 --- a/conversion_utils/jos_msds_and_properties.py +++ b/conversion_utils/jos_msds_and_properties.py @@ -57,11 +57,11 @@ class Specifications: def add_category(self, category): self.categories.append(category) - def find_category_by_char(self, char, language): - return next((category for category in self.categories if category.char_pair.get(language) == char), None) + def find_category_by_code(self, char, language): + return next((category for category in self.categories if category.codes.get(language) == char), None) def find_category_by_name(self, name, language): - return next((category for category in self.categories if category.string_pair.get(language) == name), None) + return next((category for category in self.categories if category.names.get(language) == name), None) def __str__(self): return 'categories:{categories}'.format(categories=self.categories) @@ -82,9 +82,9 @@ class Pair: class Category: - def __init__(self, string_pair, char_pair, *features): - self.string_pair = string_pair - self.char_pair = char_pair + def __init__(self, names, codes, *features): + self.names = names + self.codes = codes self.features = list(features) def add_feature(self, feature): @@ -94,17 +94,17 @@ class Category: return next((feature for feature in self.features if feature.position == position), None) def find_feature_by_name(self, name, language): - return next((feature for feature in self.features if feature.string_pair.get(language) == name), None) + return next((feature for feature in self.features if feature.names.get(language) == name), None) def __str__(self): - return 'strings:{strings}, chars:{chars}, features:{features}'.\ - format(strings=self.string_pair, chars=self.char_pair, features=self.features) + return 'names:{names}, codes:{codes}, features:{features}'.\ + format(strings=self.names, chars=self.codes, features=self.features) class Feature: - def __init__(self, string_pair, position, lexeme_level_flag, *values): - self.string_pair = string_pair + def __init__(self, names, position, lexeme_level_flag, *values): + self.names = names self.position = position self.lexeme_level_flag = lexeme_level_flag self.values = list(values) @@ -113,25 +113,25 @@ class Feature: self.values.append(value) def find_value_by_char(self, char, language): - return next((value for value in self.values if value.char_pair.get(language) == char), None) + return next((value for value in self.values if value.codes.get(language) == char), None) def find_value_by_name(self, name, language): - return next((value for value in self.values if value.string_pair.get(language) == name), None) + return next((value for value in self.values if value.names.get(language) == name), None) def __str__(self): - return 'strings:{strings}, position:{position}, level:{level}, values:{values}'.\ - format(strings=self.string_pair, position=self.position, level='level' if self.lexeme_level_flag else 'form', values=self.values) + return 'names:{names}, position:{position}, level:{level}, values:{values}'.\ + format(strings=self.names, position=self.position, level='level' if self.lexeme_level_flag else 'form', values=self.values) class Value: - def __init__(self, string_pair, char_pair): - self.char_pair = char_pair - self.string_pair = string_pair + def __init__(self, names, codes): + self.codes = codes + self.names = names def __str__(self): - return 'chars:{chars}, strings:{strings}'.\ - format(chars=self.char_pair, strings=self.strings_pair) + return 'codes:{codes}, names:{names}'.\ + format(codes=self.codes, names=self.names) class SpecificationsBuilder: @@ -143,22 +143,22 @@ class SpecificationsBuilder: for div_element in div_elements: if (re.match(r'^msd\..-sl', get_xml_id(div_element))): category_element = xpath_find(div_element, 'tei:table/tei:row[@role="type"]')[0] - category_string_pair = self.get_cell_pair(category_element, 'value') - category_char_pair = self.get_cell_pair(category_element, 'code') - category = Category(category_string_pair, category_char_pair) + category_names = self.get_cell_pair(category_element, 'value') + category_codes = self.get_cell_pair(category_element, 'code') + category = Category(category_names, category_codes) specifications.add_category(category) feature_elements = xpath_find(div_element, 'tei:table/tei:row[@role="attribute"]') for feature_element in feature_elements: - feature_string_pair = self.get_cell_pair(feature_element, 'name') + feature_names = self.get_cell_pair(feature_element, 'name') feature_position = int(self.get_cell(feature_element, 'position')) - lexeme_level_flag = feature_position in LEXEME_FEATURE_MAP[category_string_pair.get('en')] - feature = Feature(feature_string_pair, feature_position, lexeme_level_flag) + lexeme_level_flag = feature_position in LEXEME_FEATURE_MAP[category_names.get('en')] + feature = Feature(feature_names, feature_position, lexeme_level_flag) category.add_feature(feature) value_elements = xpath_find(feature_element, 'tei:cell[@role="values"]/tei:table/tei:row[@role="value"]') for value_element in value_elements: - value_char_pair = self.get_cell_pair(value_element, 'name') - value_string_pair = self.get_cell_pair(value_element, 'code') - value = Value(value_char_pair, value_string_pair) + value_codes = self.get_cell_pair(value_element, 'name') + value_names = self.get_cell_pair(value_element, 'code') + value = Value(value_codes, value_names) feature.add_value(value) return specifications @@ -204,8 +204,8 @@ class Converter: def msd_to_properties(self, msd, lemma, properties_language): category_char = msd.code[0].lower() value_chars = msd.code[1:] - category = self.specifications.find_category_by_char(category_char, msd.language) - category_name = category.string_pair.get(properties_language) + category = self.specifications.find_category_by_code(category_char, msd.language) + category_name = category.names.get(properties_language) feature_value_list = [] lexeme_feature_map = {} form_feature_map = {} @@ -213,8 +213,8 @@ class Converter: if (value_char != '-'): feature = category.find_feature_by_position(index) value = feature.find_value_by_char(value_char, msd.language) - feature_name = feature.string_pair.get(properties_language) - feature_value = value.string_pair.get(properties_language) + feature_name = feature.names.get(properties_language) + feature_value = value.names.get(properties_language) level_exception_flag = (category_name, feature.position, lemma) in LEVEL_EXCEPTIONS lexeme_level_flag = feature.lexeme_level_flag if not level_exception_flag else not feature.lexeme_level_flag feature_value_list.append((feature, value)) @@ -226,14 +226,14 @@ class Converter: def properties_to_msd(self, properties, msd_language): category = self.specifications.find_category_by_name(properties.category, properties.language) - category_char = category.char_pair.get(msd_language).upper() + category_char = category.codes.get(msd_language).upper() feature_map = properties.lexeme_feature_map.copy() feature_map.update(properties.form_feature_map.copy()) position_map = {} for (name, value) in feature_map.items(): feature = category.find_feature_by_name(name, properties.language) value = feature.find_value_by_name(value, properties.language) - position_map[feature.position] = value.char_pair.get(msd_language) + position_map[feature.position] = value.codes.get(msd_language) msd_code = category_char i = 0 for position in sorted(position_map.keys()):