Merged converters into one
This commit is contained in:
parent
5c5b2c20cc
commit
2f7f90d073
|
@ -196,12 +196,12 @@ class Msd:
|
||||||
return 'code={code}, language={language}'.format(code=self.code, language=self.language)
|
return 'code={code}, language={language}'.format(code=self.code, language=self.language)
|
||||||
|
|
||||||
|
|
||||||
class MsdToProperties:
|
class Converter:
|
||||||
|
|
||||||
def __init__(self, specifications):
|
def __init__(self, specifications):
|
||||||
self.specifications = specifications
|
self.specifications = specifications
|
||||||
|
|
||||||
def convert(self, msd, lemma, properties_language):
|
def msd_to_properties(self, msd, lemma, properties_language):
|
||||||
category_char = msd.code[0].lower()
|
category_char = msd.code[0].lower()
|
||||||
value_chars = msd.code[1:]
|
value_chars = msd.code[1:]
|
||||||
category = self.specifications.find_category_by_char(category_char, msd.language)
|
category = self.specifications.find_category_by_char(category_char, msd.language)
|
||||||
|
@ -224,13 +224,7 @@ class MsdToProperties:
|
||||||
form_feature_map[feature_name] = feature_value
|
form_feature_map[feature_name] = feature_value
|
||||||
return Properties(category_name, lexeme_feature_map, form_feature_map, properties_language)
|
return Properties(category_name, lexeme_feature_map, form_feature_map, properties_language)
|
||||||
|
|
||||||
|
def properties_to_msd(self, properties, msd_language):
|
||||||
class PropertiesToMsd:
|
|
||||||
|
|
||||||
def __init__(self, specifications):
|
|
||||||
self.specifications = specifications
|
|
||||||
|
|
||||||
def convert(self, properties, msd_language):
|
|
||||||
category = self.specifications.find_category_by_name(properties.category, properties.language)
|
category = self.specifications.find_category_by_name(properties.category, properties.language)
|
||||||
category_char = category.char_pair.get(msd_language).upper()
|
category_char = category.char_pair.get(msd_language).upper()
|
||||||
feature_map = properties.lexeme_feature_map.copy()
|
feature_map = properties.lexeme_feature_map.copy()
|
||||||
|
|
|
@ -2,7 +2,7 @@ import os.path
|
||||||
import lxml.etree as lxml
|
import lxml.etree as lxml
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from conversion_utils.jos_msds_and_properties import SpecificationsBuilder, MsdToProperties, Msd
|
from conversion_utils.jos_msds_and_properties import SpecificationsBuilder, Converter, Msd
|
||||||
|
|
||||||
class JosMsdToPropertiesTestCase(unittest.TestCase):
|
class JosMsdToPropertiesTestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -10,52 +10,52 @@ class JosMsdToPropertiesTestCase(unittest.TestCase):
|
||||||
specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml')
|
specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml')
|
||||||
builder = SpecificationsBuilder()
|
builder = SpecificationsBuilder()
|
||||||
specifications = builder.build(specifications_file_name)
|
specifications = builder.build(specifications_file_name)
|
||||||
self.converter = MsdToProperties(specifications)
|
self.converter = Converter(specifications)
|
||||||
|
|
||||||
def test_en_en(self):
|
def test_en_en(self):
|
||||||
properties = self.converter.convert(Msd('Ncfpd', 'en'), 'miza', 'en')
|
properties = self.converter.msd_to_properties(Msd('Ncfpd', 'en'), 'miza', 'en')
|
||||||
self.assertEqual(properties.language, 'en')
|
self.assertEqual(properties.language, 'en')
|
||||||
self.assertEqual(properties.category, 'noun')
|
self.assertEqual(properties.category, 'noun')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
|
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
|
||||||
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
|
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
|
||||||
|
|
||||||
def test_en_sl(self):
|
def test_en_sl(self):
|
||||||
properties = self.converter.convert(Msd('Ncfpd', 'en'), 'miza', 'sl')
|
properties = self.converter.msd_to_properties(Msd('Ncfpd', 'en'), 'miza', 'sl')
|
||||||
self.assertEqual(properties.language, 'sl')
|
self.assertEqual(properties.language, 'sl')
|
||||||
self.assertEqual(properties.category, 'samostalnik')
|
self.assertEqual(properties.category, 'samostalnik')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'})
|
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'})
|
||||||
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
|
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
|
||||||
|
|
||||||
def test_sl_en(self):
|
def test_sl_en(self):
|
||||||
properties = self.converter.convert(Msd('Sozmd', 'sl'), 'miza', 'en')
|
properties = self.converter.msd_to_properties(Msd('Sozmd', 'sl'), 'miza', 'en')
|
||||||
self.assertEqual(properties.language, 'en')
|
self.assertEqual(properties.language, 'en')
|
||||||
self.assertEqual(properties.category, 'noun')
|
self.assertEqual(properties.category, 'noun')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
|
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
|
||||||
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
|
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
|
||||||
|
|
||||||
def test_sl_sl(self):
|
def test_sl_sl(self):
|
||||||
properties = self.converter.convert(Msd('Sozmd', 'sl'), 'miza', 'sl')
|
properties = self.converter.msd_to_properties(Msd('Sozmd', 'sl'), 'miza', 'sl')
|
||||||
self.assertEqual(properties.language, 'sl')
|
self.assertEqual(properties.language, 'sl')
|
||||||
self.assertEqual(properties.category, 'samostalnik')
|
self.assertEqual(properties.category, 'samostalnik')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'})
|
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'})
|
||||||
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
|
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
|
||||||
|
|
||||||
def test_exception_feature_level(self): # testing level exception: ('pronoun', 8, 'ti')
|
def test_exception_feature_level(self): # testing level exception: ('pronoun', 8, 'ti')
|
||||||
properties = self.converter.convert(Msd('Pp2-sd--y', 'en'), 'ti', 'sl')
|
properties = self.converter.msd_to_properties(Msd('Pp2-sd--y', 'en'), 'ti', 'sl')
|
||||||
self.assertEqual(properties.language, 'sl')
|
self.assertEqual(properties.language, 'sl')
|
||||||
self.assertEqual(properties.category, 'zaimek')
|
self.assertEqual(properties.category, 'zaimek')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga'})
|
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga'})
|
||||||
self.assertEqual(properties.form_feature_map, {'število':'ednina', 'sklon':'dajalnik', 'naslonskost':'klitična'})
|
self.assertEqual(properties.form_feature_map, {'število':'ednina', 'sklon':'dajalnik', 'naslonskost':'klitična'})
|
||||||
|
|
||||||
def test_normal_feature_level(self): # invalid msd, but useful for testing contrast with previous test
|
def test_normal_feature_level(self): # invalid msd, but useful for testing contrast with previous test
|
||||||
properties = self.converter.convert(Msd('Pp2-pd--y', 'en'), 'vi', 'sl')
|
properties = self.converter.msd_to_properties(Msd('Pp2-pd--y', 'en'), 'vi', 'sl')
|
||||||
self.assertEqual(properties.language, 'sl')
|
self.assertEqual(properties.language, 'sl')
|
||||||
self.assertEqual(properties.category, 'zaimek')
|
self.assertEqual(properties.category, 'zaimek')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga', 'naslonskost':'klitična'})
|
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga', 'naslonskost':'klitična'})
|
||||||
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
|
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
|
||||||
|
|
||||||
def test_featureless(self):
|
def test_featureless(self):
|
||||||
properties = self.converter.convert(Msd('V', 'sl'), 'in', 'en')
|
properties = self.converter.msd_to_properties(Msd('V', 'sl'), 'in', 'en')
|
||||||
self.assertEqual(properties.language, 'en')
|
self.assertEqual(properties.language, 'en')
|
||||||
self.assertEqual(properties.category, 'conjunction')
|
self.assertEqual(properties.category, 'conjunction')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {})
|
self.assertEqual(properties.lexeme_feature_map, {})
|
||||||
|
|
|
@ -2,7 +2,7 @@ import os.path
|
||||||
import lxml.etree as lxml
|
import lxml.etree as lxml
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from conversion_utils.jos_msds_and_properties import SpecificationsBuilder, PropertiesToMsd, Properties
|
from conversion_utils.jos_msds_and_properties import SpecificationsBuilder, Converter, Properties
|
||||||
|
|
||||||
class JosPropertiesToMsdTestCase(unittest.TestCase):
|
class JosPropertiesToMsdTestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -10,40 +10,40 @@ class JosPropertiesToMsdTestCase(unittest.TestCase):
|
||||||
specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml')
|
specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml')
|
||||||
builder = SpecificationsBuilder()
|
builder = SpecificationsBuilder()
|
||||||
specifications = builder.build(specifications_file_name)
|
specifications = builder.build(specifications_file_name)
|
||||||
self.converter = PropertiesToMsd(specifications)
|
self.converter = Converter(specifications)
|
||||||
|
|
||||||
def test_en_en(self):
|
def test_en_en(self):
|
||||||
msd = self.converter.convert(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'en')
|
msd = self.converter.properties_to_msd(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'en')
|
||||||
self.assertEqual(msd.language, 'en')
|
self.assertEqual(msd.language, 'en')
|
||||||
self.assertEqual(msd.code, 'Ncfdn')
|
self.assertEqual(msd.code, 'Ncfdn')
|
||||||
|
|
||||||
def test_en_sl(self):
|
def test_en_sl(self):
|
||||||
msd = self.converter.convert(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'sl')
|
msd = self.converter.properties_to_msd(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'sl')
|
||||||
self.assertEqual(msd.language, 'sl')
|
self.assertEqual(msd.language, 'sl')
|
||||||
self.assertEqual(msd.code, 'Sozdi')
|
self.assertEqual(msd.code, 'Sozdi')
|
||||||
|
|
||||||
def test_sl_en(self):
|
def test_sl_en(self):
|
||||||
msd = self.converter.convert(Properties('samostalnik', {'vrsta':'občno_ime', 'spol':'ženski'}, {'število':'dvojina', 'sklon':'imenovalnik'}, 'sl'), 'en')
|
msd = self.converter.properties_to_msd(Properties('samostalnik', {'vrsta':'občno_ime', 'spol':'ženski'}, {'število':'dvojina', 'sklon':'imenovalnik'}, 'sl'), 'en')
|
||||||
self.assertEqual(msd.language, 'en')
|
self.assertEqual(msd.language, 'en')
|
||||||
self.assertEqual(msd.code, 'Ncfdn')
|
self.assertEqual(msd.code, 'Ncfdn')
|
||||||
|
|
||||||
def test_sl_sl(self):
|
def test_sl_sl(self):
|
||||||
msd = self.converter.convert(Properties('samostalnik', {'vrsta':'občno_ime', 'spol':'ženski'}, {'število':'dvojina', 'sklon':'imenovalnik'}, 'sl'), 'sl')
|
msd = self.converter.properties_to_msd(Properties('samostalnik', {'vrsta':'občno_ime', 'spol':'ženski'}, {'število':'dvojina', 'sklon':'imenovalnik'}, 'sl'), 'sl')
|
||||||
self.assertEqual(msd.language, 'sl')
|
self.assertEqual(msd.language, 'sl')
|
||||||
self.assertEqual(msd.code, 'Sozdi')
|
self.assertEqual(msd.code, 'Sozdi')
|
||||||
|
|
||||||
def test_exception_feature_level(self):
|
def test_exception_feature_level(self):
|
||||||
msd = self.converter.convert(Properties('zaimek', {'vrsta':'osebni', 'oseba':'druga'}, {'število':'ednina', 'sklon':'dajalnik', 'naslonskost':'klitična'}, 'sl'), 'en')
|
msd = self.converter.properties_to_msd(Properties('zaimek', {'vrsta':'osebni', 'oseba':'druga'}, {'število':'ednina', 'sklon':'dajalnik', 'naslonskost':'klitična'}, 'sl'), 'en')
|
||||||
self.assertEqual(msd.language, 'en')
|
self.assertEqual(msd.language, 'en')
|
||||||
self.assertEqual(msd.code, 'Pp2-sd--y')
|
self.assertEqual(msd.code, 'Pp2-sd--y')
|
||||||
|
|
||||||
def test_normal_feature_level(self):
|
def test_normal_feature_level(self):
|
||||||
msd = self.converter.convert(Properties('zaimek', {'vrsta':'osebni', 'oseba':'druga', 'naslonskost':'klitična'}, {'število':'množina', 'sklon':'dajalnik'}, 'sl'), 'en')
|
msd = self.converter.properties_to_msd(Properties('zaimek', {'vrsta':'osebni', 'oseba':'druga', 'naslonskost':'klitična'}, {'število':'množina', 'sklon':'dajalnik'}, 'sl'), 'en')
|
||||||
self.assertEqual(msd.language, 'en')
|
self.assertEqual(msd.language, 'en')
|
||||||
self.assertEqual(msd.code, 'Pp2-pd--y')
|
self.assertEqual(msd.code, 'Pp2-pd--y')
|
||||||
|
|
||||||
def test_featureless(self):
|
def test_featureless(self):
|
||||||
msd = self.converter.convert(Properties('conjunction', {}, {}, 'en'), 'sl')
|
msd = self.converter.properties_to_msd(Properties('conjunction', {}, {}, 'en'), 'sl')
|
||||||
self.assertEqual(msd.language, 'sl')
|
self.assertEqual(msd.language, 'sl')
|
||||||
self.assertEqual(msd.code, 'V')
|
self.assertEqual(msd.code, 'V')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user