Merged converters into one

This commit is contained in:
Cyprian Laskowski 2021-09-10 15:15:04 +02:00
parent 5c5b2c20cc
commit 2f7f90d073
3 changed files with 21 additions and 27 deletions

View File

@ -196,12 +196,12 @@ class Msd:
return 'code={code}, language={language}'.format(code=self.code, language=self.language) return 'code={code}, language={language}'.format(code=self.code, language=self.language)
class MsdToProperties: class Converter:
def __init__(self, specifications): def __init__(self, specifications):
self.specifications = specifications self.specifications = specifications
def convert(self, msd, lemma, properties_language): def msd_to_properties(self, msd, lemma, properties_language):
category_char = msd.code[0].lower() category_char = msd.code[0].lower()
value_chars = msd.code[1:] value_chars = msd.code[1:]
category = self.specifications.find_category_by_char(category_char, msd.language) category = self.specifications.find_category_by_char(category_char, msd.language)
@ -224,13 +224,7 @@ class MsdToProperties:
form_feature_map[feature_name] = feature_value form_feature_map[feature_name] = feature_value
return Properties(category_name, lexeme_feature_map, form_feature_map, properties_language) return Properties(category_name, lexeme_feature_map, form_feature_map, properties_language)
def properties_to_msd(self, properties, msd_language):
class PropertiesToMsd:
def __init__(self, specifications):
self.specifications = specifications
def convert(self, properties, msd_language):
category = self.specifications.find_category_by_name(properties.category, properties.language) category = self.specifications.find_category_by_name(properties.category, properties.language)
category_char = category.char_pair.get(msd_language).upper() category_char = category.char_pair.get(msd_language).upper()
feature_map = properties.lexeme_feature_map.copy() feature_map = properties.lexeme_feature_map.copy()

View File

@ -2,7 +2,7 @@ import os.path
import lxml.etree as lxml import lxml.etree as lxml
import unittest import unittest
from conversion_utils.jos_msds_and_properties import SpecificationsBuilder, MsdToProperties, Msd from conversion_utils.jos_msds_and_properties import SpecificationsBuilder, Converter, Msd
class JosMsdToPropertiesTestCase(unittest.TestCase): class JosMsdToPropertiesTestCase(unittest.TestCase):
@ -10,52 +10,52 @@ class JosMsdToPropertiesTestCase(unittest.TestCase):
specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml') specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml')
builder = SpecificationsBuilder() builder = SpecificationsBuilder()
specifications = builder.build(specifications_file_name) specifications = builder.build(specifications_file_name)
self.converter = MsdToProperties(specifications) self.converter = Converter(specifications)
def test_en_en(self): def test_en_en(self):
properties = self.converter.convert(Msd('Ncfpd', 'en'), 'miza', 'en') properties = self.converter.msd_to_properties(Msd('Ncfpd', 'en'), 'miza', 'en')
self.assertEqual(properties.language, 'en') self.assertEqual(properties.language, 'en')
self.assertEqual(properties.category, 'noun') self.assertEqual(properties.category, 'noun')
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'}) self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'}) self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
def test_en_sl(self): def test_en_sl(self):
properties = self.converter.convert(Msd('Ncfpd', 'en'), 'miza', 'sl') properties = self.converter.msd_to_properties(Msd('Ncfpd', 'en'), 'miza', 'sl')
self.assertEqual(properties.language, 'sl') self.assertEqual(properties.language, 'sl')
self.assertEqual(properties.category, 'samostalnik') self.assertEqual(properties.category, 'samostalnik')
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'}) self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'})
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'}) self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
def test_sl_en(self): def test_sl_en(self):
properties = self.converter.convert(Msd('Sozmd', 'sl'), 'miza', 'en') properties = self.converter.msd_to_properties(Msd('Sozmd', 'sl'), 'miza', 'en')
self.assertEqual(properties.language, 'en') self.assertEqual(properties.language, 'en')
self.assertEqual(properties.category, 'noun') self.assertEqual(properties.category, 'noun')
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'}) self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'}) self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
def test_sl_sl(self): def test_sl_sl(self):
properties = self.converter.convert(Msd('Sozmd', 'sl'), 'miza', 'sl') properties = self.converter.msd_to_properties(Msd('Sozmd', 'sl'), 'miza', 'sl')
self.assertEqual(properties.language, 'sl') self.assertEqual(properties.language, 'sl')
self.assertEqual(properties.category, 'samostalnik') self.assertEqual(properties.category, 'samostalnik')
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'}) self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'})
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'}) self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
def test_exception_feature_level(self): # testing level exception: ('pronoun', 8, 'ti') def test_exception_feature_level(self): # testing level exception: ('pronoun', 8, 'ti')
properties = self.converter.convert(Msd('Pp2-sd--y', 'en'), 'ti', 'sl') properties = self.converter.msd_to_properties(Msd('Pp2-sd--y', 'en'), 'ti', 'sl')
self.assertEqual(properties.language, 'sl') self.assertEqual(properties.language, 'sl')
self.assertEqual(properties.category, 'zaimek') self.assertEqual(properties.category, 'zaimek')
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga'}) self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga'})
self.assertEqual(properties.form_feature_map, {'število':'ednina', 'sklon':'dajalnik', 'naslonskost':'klitična'}) self.assertEqual(properties.form_feature_map, {'število':'ednina', 'sklon':'dajalnik', 'naslonskost':'klitična'})
def test_normal_feature_level(self): # invalid msd, but useful for testing contrast with previous test def test_normal_feature_level(self): # invalid msd, but useful for testing contrast with previous test
properties = self.converter.convert(Msd('Pp2-pd--y', 'en'), 'vi', 'sl') properties = self.converter.msd_to_properties(Msd('Pp2-pd--y', 'en'), 'vi', 'sl')
self.assertEqual(properties.language, 'sl') self.assertEqual(properties.language, 'sl')
self.assertEqual(properties.category, 'zaimek') self.assertEqual(properties.category, 'zaimek')
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga', 'naslonskost':'klitična'}) self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga', 'naslonskost':'klitična'})
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'}) self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
def test_featureless(self): def test_featureless(self):
properties = self.converter.convert(Msd('V', 'sl'), 'in', 'en') properties = self.converter.msd_to_properties(Msd('V', 'sl'), 'in', 'en')
self.assertEqual(properties.language, 'en') self.assertEqual(properties.language, 'en')
self.assertEqual(properties.category, 'conjunction') self.assertEqual(properties.category, 'conjunction')
self.assertEqual(properties.lexeme_feature_map, {}) self.assertEqual(properties.lexeme_feature_map, {})

View File

@ -2,7 +2,7 @@ import os.path
import lxml.etree as lxml import lxml.etree as lxml
import unittest import unittest
from conversion_utils.jos_msds_and_properties import SpecificationsBuilder, PropertiesToMsd, Properties from conversion_utils.jos_msds_and_properties import SpecificationsBuilder, Converter, Properties
class JosPropertiesToMsdTestCase(unittest.TestCase): class JosPropertiesToMsdTestCase(unittest.TestCase):
@ -10,40 +10,40 @@ class JosPropertiesToMsdTestCase(unittest.TestCase):
specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml') specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml')
builder = SpecificationsBuilder() builder = SpecificationsBuilder()
specifications = builder.build(specifications_file_name) specifications = builder.build(specifications_file_name)
self.converter = PropertiesToMsd(specifications) self.converter = Converter(specifications)
def test_en_en(self): def test_en_en(self):
msd = self.converter.convert(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'en') msd = self.converter.properties_to_msd(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'en')
self.assertEqual(msd.language, 'en') self.assertEqual(msd.language, 'en')
self.assertEqual(msd.code, 'Ncfdn') self.assertEqual(msd.code, 'Ncfdn')
def test_en_sl(self): def test_en_sl(self):
msd = self.converter.convert(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'sl') msd = self.converter.properties_to_msd(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'sl')
self.assertEqual(msd.language, 'sl') self.assertEqual(msd.language, 'sl')
self.assertEqual(msd.code, 'Sozdi') self.assertEqual(msd.code, 'Sozdi')
def test_sl_en(self): def test_sl_en(self):
msd = self.converter.convert(Properties('samostalnik', {'vrsta':'občno_ime', 'spol':'ženski'}, {'število':'dvojina', 'sklon':'imenovalnik'}, 'sl'), 'en') msd = self.converter.properties_to_msd(Properties('samostalnik', {'vrsta':'občno_ime', 'spol':'ženski'}, {'število':'dvojina', 'sklon':'imenovalnik'}, 'sl'), 'en')
self.assertEqual(msd.language, 'en') self.assertEqual(msd.language, 'en')
self.assertEqual(msd.code, 'Ncfdn') self.assertEqual(msd.code, 'Ncfdn')
def test_sl_sl(self): def test_sl_sl(self):
msd = self.converter.convert(Properties('samostalnik', {'vrsta':'občno_ime', 'spol':'ženski'}, {'število':'dvojina', 'sklon':'imenovalnik'}, 'sl'), 'sl') msd = self.converter.properties_to_msd(Properties('samostalnik', {'vrsta':'občno_ime', 'spol':'ženski'}, {'število':'dvojina', 'sklon':'imenovalnik'}, 'sl'), 'sl')
self.assertEqual(msd.language, 'sl') self.assertEqual(msd.language, 'sl')
self.assertEqual(msd.code, 'Sozdi') self.assertEqual(msd.code, 'Sozdi')
def test_exception_feature_level(self): def test_exception_feature_level(self):
msd = self.converter.convert(Properties('zaimek', {'vrsta':'osebni', 'oseba':'druga'}, {'število':'ednina', 'sklon':'dajalnik', 'naslonskost':'klitična'}, 'sl'), 'en') msd = self.converter.properties_to_msd(Properties('zaimek', {'vrsta':'osebni', 'oseba':'druga'}, {'število':'ednina', 'sklon':'dajalnik', 'naslonskost':'klitična'}, 'sl'), 'en')
self.assertEqual(msd.language, 'en') self.assertEqual(msd.language, 'en')
self.assertEqual(msd.code, 'Pp2-sd--y') self.assertEqual(msd.code, 'Pp2-sd--y')
def test_normal_feature_level(self): def test_normal_feature_level(self):
msd = self.converter.convert(Properties('zaimek', {'vrsta':'osebni', 'oseba':'druga', 'naslonskost':'klitična'}, {'število':'množina', 'sklon':'dajalnik'}, 'sl'), 'en') msd = self.converter.properties_to_msd(Properties('zaimek', {'vrsta':'osebni', 'oseba':'druga', 'naslonskost':'klitična'}, {'število':'množina', 'sklon':'dajalnik'}, 'sl'), 'en')
self.assertEqual(msd.language, 'en') self.assertEqual(msd.language, 'en')
self.assertEqual(msd.code, 'Pp2-pd--y') self.assertEqual(msd.code, 'Pp2-pd--y')
def test_featureless(self): def test_featureless(self):
msd = self.converter.convert(Properties('conjunction', {}, {}, 'en'), 'sl') msd = self.converter.properties_to_msd(Properties('conjunction', {}, {}, 'en'), 'sl')
self.assertEqual(msd.language, 'sl') self.assertEqual(msd.language, 'sl')
self.assertEqual(msd.code, 'V') self.assertEqual(msd.code, 'V')