Improved language handling
This commit is contained in:
parent
88f678fa1f
commit
f14879f619
|
@ -4,8 +4,6 @@ import sys
|
|||
|
||||
from conversion_utils.utils import xpath_find, get_xml_id
|
||||
|
||||
LANGUAGES = ['en', 'sl']
|
||||
|
||||
LEXEME_FEATURE_MAP = {'noun':{1,2},
|
||||
'verb':{1,2},
|
||||
'adjective':{1},
|
||||
|
@ -59,13 +57,26 @@ class Specifications:
|
|||
def add_category(self, category):
|
||||
self.categories.append(category)
|
||||
|
||||
def find_category(self, char, language_index):
|
||||
return next((category for category in self.categories if category.char_pair[language_index] == char), None)
|
||||
def find_category(self, char, language):
|
||||
return next((category for category in self.categories if category.char_pair.get(language) == char), None)
|
||||
|
||||
def __str__(self):
|
||||
return 'categories:{categories}'.format(categories=self.categories)
|
||||
|
||||
|
||||
class Pair:
|
||||
|
||||
def __init__(self, en, sl):
|
||||
self.en = en
|
||||
self.sl = sl
|
||||
|
||||
def get(self, language):
|
||||
return getattr(self, language)
|
||||
|
||||
def __str__(self):
|
||||
return 'en:{en}, sl:{sl}'.format(en=self.en, sl=self.sl)
|
||||
|
||||
|
||||
class Category:
|
||||
|
||||
def __init__(self, string_pair, char_pair, *features):
|
||||
|
@ -95,8 +106,8 @@ class Feature:
|
|||
def add_value(self, value):
|
||||
self.values.append(value)
|
||||
|
||||
def find_value(self, char, language_index):
|
||||
return next((value for value in self.values if value.char_pair[language_index] == char), None)
|
||||
def find_value(self, char, language):
|
||||
return next((value for value in self.values if value.char_pair.get(language) == char), None)
|
||||
|
||||
def __str__(self):
|
||||
return 'strings:{strings}, position:{position}, level:{level}, values:{values}'.\
|
||||
|
@ -131,7 +142,7 @@ class SpecificationsBuilder:
|
|||
for feature_element in feature_elements:
|
||||
feature_string_pair = self.get_cell_pair(feature_element, 'name')
|
||||
feature_position = int(self.get_cell(feature_element, 'position'))
|
||||
lexeme_level_flag = feature_position in LEXEME_FEATURE_MAP[category_string_pair[0]]
|
||||
lexeme_level_flag = feature_position in LEXEME_FEATURE_MAP[category_string_pair.get('en')]
|
||||
feature = Feature(feature_string_pair, feature_position, lexeme_level_flag)
|
||||
category.add_feature(feature)
|
||||
value_elements = xpath_find(feature_element, 'tei:cell[@role="values"]/tei:table/tei:row[@role="value"]')
|
||||
|
@ -150,20 +161,20 @@ class SpecificationsBuilder:
|
|||
return text
|
||||
|
||||
def get_cell_pair(self, row, role):
|
||||
return (self.get_cell(row, role, 'en'), self.get_cell(row, role, 'sl'))
|
||||
return Pair(self.get_cell(row, role, 'en'), self.get_cell(row, role, 'sl'))
|
||||
|
||||
|
||||
class Properties:
|
||||
|
||||
def __init__(self, lemma, category, feature_value_list, language_index):
|
||||
self.language = LANGUAGES[language_index]
|
||||
self.category = category.string_pair[language_index]
|
||||
def __init__(self, lemma, category, feature_value_list, language):
|
||||
self.language = language
|
||||
self.category = category.string_pair.get(language)
|
||||
self.lexeme_feature_map = {}
|
||||
self.form_feature_map = {}
|
||||
for (feature, value) in feature_value_list:
|
||||
feature_name = feature.string_pair[language_index]
|
||||
feature_value = value.string_pair[language_index]
|
||||
level_exception_flag = (category.string_pair[0], feature.position, lemma) in LEVEL_EXCEPTIONS
|
||||
feature_name = feature.string_pair.get(language)
|
||||
feature_value = value.string_pair.get(language)
|
||||
level_exception_flag = (category.string_pair.get('en'), feature.position, lemma) in LEVEL_EXCEPTIONS
|
||||
lexeme_level_flag = feature.lexeme_level_flag if not level_exception_flag else not feature.lexeme_level_flag
|
||||
if (lexeme_level_flag):
|
||||
self.lexeme_feature_map[feature_name] = feature_value
|
||||
|
@ -188,15 +199,13 @@ class Msd:
|
|||
class MsdToProperties:
|
||||
|
||||
def convert(self, specifications, msd, lemma, properties_language):
|
||||
msd_language_index = LANGUAGES.index(msd.language)
|
||||
properties_language_index = LANGUAGES.index(properties_language)
|
||||
category_char = msd.code[0].lower()
|
||||
value_chars = msd.code[1:]
|
||||
category = specifications.find_category(category_char, msd_language_index)
|
||||
category = specifications.find_category(category_char, msd.language)
|
||||
feature_value_list = []
|
||||
for (index, value_char) in enumerate(value_chars, start=1):
|
||||
if (value_char != '-'):
|
||||
feature = category.find_feature(index)
|
||||
value = feature.find_value(value_char, msd_language_index)
|
||||
value = feature.find_value(value_char, msd.language)
|
||||
feature_value_list.append((feature, value))
|
||||
return Properties(lemma, category, feature_value_list, properties_language_index)
|
||||
return Properties(lemma, category, feature_value_list, properties_language)
|
||||
|
|
Loading…
Reference in New Issue
Block a user