diff --git a/conversion_utils/jos_msds_and_properties.py b/conversion_utils/jos_msds_and_properties.py index 0bddd2b..2751ec1 100644 --- a/conversion_utils/jos_msds_and_properties.py +++ b/conversion_utils/jos_msds_and_properties.py @@ -4,7 +4,7 @@ import sys from conversion_utils.utils import xpath_find, get_xml_id -LANGUAGE_INDEX_MAP = {'en':0, 'sl':1} +LANGUAGES = ['en', 'sl'] LEXEME_FEATURE_MAP = {'noun':{1,2}, 'verb':{1,2}, @@ -156,7 +156,7 @@ class SpecificationsBuilder: class Properties: def __init__(self, lemma, category, feature_value_list, language_index): - self.language = next(l for (l, i) in LANGUAGE_INDEX_MAP.items() if i == language_index) + self.language = LANGUAGES[language_index] self.category = category.string_pair[language_index] self.lexeme_feature_map = {} self.form_feature_map = {} @@ -175,13 +175,23 @@ class Properties: format(language=self.language, category=self.category, lexeme_features=str(self.lexeme_feature_map), form_features=str(self.form_feature_map)) +class Msd: + + def __init__(self, code, language): + self.code = code + self.language = language + + def __str__(self): + return 'code={code}, language={language}'.format(code=self.code, language=self.language) + + class MsdToProperties: - def convert(self, specifications, lemma, msd, msd_language, properties_language): - msd_language_index = LANGUAGE_INDEX_MAP[msd_language] - properties_language_index = LANGUAGE_INDEX_MAP[properties_language] - category_char = msd[0].lower() - value_chars = msd[1:] + def convert(self, specifications, msd, lemma, properties_language): + msd_language_index = LANGUAGES.index(msd.language) + properties_language_index = LANGUAGES.index(properties_language) + category_char = msd.code[0].lower() + value_chars = msd.code[1:] category = specifications.find_category(category_char, msd_language_index) feature_value_list = [] for (index, value_char) in enumerate(value_chars, start=1): diff --git a/tests/test_jos_msd_to_properties.py b/tests/test_jos_msd_to_properties.py index 7307e0f..4b33e4d 100644 --- a/tests/test_jos_msd_to_properties.py +++ b/tests/test_jos_msd_to_properties.py @@ -2,7 +2,7 @@ import os.path import lxml.etree as lxml import unittest -from conversion_utils.jos_msds_and_properties import SpecificationsBuilder, MsdToProperties +from conversion_utils.jos_msds_and_properties import SpecificationsBuilder, MsdToProperties, Msd class JosMsdToPropertiesTestCase(unittest.TestCase): @@ -13,49 +13,49 @@ class JosMsdToPropertiesTestCase(unittest.TestCase): self.converter = MsdToProperties() def test_en_en(self): - properties = self.converter.convert(self.specifications, 'miza', 'Ncfpd', 'en', 'en') + properties = self.converter.convert(self.specifications, Msd('Ncfpd', 'en'), 'miza', 'en') self.assertEqual(properties.language, 'en') self.assertEqual(properties.category, 'noun') self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'}) self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'}) def test_en_sl(self): - properties = self.converter.convert(self.specifications, 'miza', 'Ncfpd', 'en', 'sl') + properties = self.converter.convert(self.specifications, Msd('Ncfpd', 'en'), 'miza', 'sl') self.assertEqual(properties.language, 'sl') self.assertEqual(properties.category, 'samostalnik') self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'}) self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'}) def test_sl_en(self): - properties = self.converter.convert(self.specifications, 'miza', 'Sozmd', 'sl', 'en') + properties = self.converter.convert(self.specifications, Msd('Sozmd', 'sl'), 'miza', 'en') self.assertEqual(properties.language, 'en') self.assertEqual(properties.category, 'noun') self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'}) self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'}) def test_sl_sl(self): - properties = self.converter.convert(self.specifications, 'miza', 'Sozmd', 'sl', 'sl') + properties = self.converter.convert(self.specifications, Msd('Sozmd', 'sl'), 'miza', 'sl') self.assertEqual(properties.language, 'sl') self.assertEqual(properties.category, 'samostalnik') self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'}) self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'}) def test_exception_feature_level(self): # testing level exception: ('pronoun', 8, 'ti') - properties = self.converter.convert(self.specifications, 'ti', 'Pp2-sd--y', 'en', 'sl') + properties = self.converter.convert(self.specifications, Msd('Pp2-sd--y', 'en'), 'ti', 'sl') self.assertEqual(properties.language, 'sl') self.assertEqual(properties.category, 'zaimek') self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga'}) self.assertEqual(properties.form_feature_map, {'število':'ednina', 'sklon':'dajalnik', 'naslonskost':'klitična'}) def test_normal_feature_level(self): # invalid msd, but useful for testing contrast with previous test - properties = self.converter.convert(self.specifications, 'vi', 'Pp2-pd--y', 'en', 'sl') + properties = self.converter.convert(self.specifications, Msd('Pp2-pd--y', 'en'), 'vi', 'sl') self.assertEqual(properties.language, 'sl') self.assertEqual(properties.category, 'zaimek') self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga', 'naslonskost':'klitična'}) self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'}) def test_featureless(self): - properties = self.converter.convert(self.specifications, 'in', 'V', 'sl', 'en') + properties = self.converter.convert(self.specifications, Msd('V', 'sl'), 'in', 'en') self.assertEqual(properties.language, 'en') self.assertEqual(properties.category, 'conjunction') self.assertEqual(properties.lexeme_feature_map, {})