Included msd language in msd object
This commit is contained in:
parent
5f5b8a4fe4
commit
88f678fa1f
|
@ -4,7 +4,7 @@ import sys
|
||||||
|
|
||||||
from conversion_utils.utils import xpath_find, get_xml_id
|
from conversion_utils.utils import xpath_find, get_xml_id
|
||||||
|
|
||||||
LANGUAGE_INDEX_MAP = {'en':0, 'sl':1}
|
LANGUAGES = ['en', 'sl']
|
||||||
|
|
||||||
LEXEME_FEATURE_MAP = {'noun':{1,2},
|
LEXEME_FEATURE_MAP = {'noun':{1,2},
|
||||||
'verb':{1,2},
|
'verb':{1,2},
|
||||||
|
@ -156,7 +156,7 @@ class SpecificationsBuilder:
|
||||||
class Properties:
|
class Properties:
|
||||||
|
|
||||||
def __init__(self, lemma, category, feature_value_list, language_index):
|
def __init__(self, lemma, category, feature_value_list, language_index):
|
||||||
self.language = next(l for (l, i) in LANGUAGE_INDEX_MAP.items() if i == language_index)
|
self.language = LANGUAGES[language_index]
|
||||||
self.category = category.string_pair[language_index]
|
self.category = category.string_pair[language_index]
|
||||||
self.lexeme_feature_map = {}
|
self.lexeme_feature_map = {}
|
||||||
self.form_feature_map = {}
|
self.form_feature_map = {}
|
||||||
|
@ -175,13 +175,23 @@ class Properties:
|
||||||
format(language=self.language, category=self.category, lexeme_features=str(self.lexeme_feature_map), form_features=str(self.form_feature_map))
|
format(language=self.language, category=self.category, lexeme_features=str(self.lexeme_feature_map), form_features=str(self.form_feature_map))
|
||||||
|
|
||||||
|
|
||||||
|
class Msd:
|
||||||
|
|
||||||
|
def __init__(self, code, language):
|
||||||
|
self.code = code
|
||||||
|
self.language = language
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'code={code}, language={language}'.format(code=self.code, language=self.language)
|
||||||
|
|
||||||
|
|
||||||
class MsdToProperties:
|
class MsdToProperties:
|
||||||
|
|
||||||
def convert(self, specifications, lemma, msd, msd_language, properties_language):
|
def convert(self, specifications, msd, lemma, properties_language):
|
||||||
msd_language_index = LANGUAGE_INDEX_MAP[msd_language]
|
msd_language_index = LANGUAGES.index(msd.language)
|
||||||
properties_language_index = LANGUAGE_INDEX_MAP[properties_language]
|
properties_language_index = LANGUAGES.index(properties_language)
|
||||||
category_char = msd[0].lower()
|
category_char = msd.code[0].lower()
|
||||||
value_chars = msd[1:]
|
value_chars = msd.code[1:]
|
||||||
category = specifications.find_category(category_char, msd_language_index)
|
category = specifications.find_category(category_char, msd_language_index)
|
||||||
feature_value_list = []
|
feature_value_list = []
|
||||||
for (index, value_char) in enumerate(value_chars, start=1):
|
for (index, value_char) in enumerate(value_chars, start=1):
|
||||||
|
|
|
@ -2,7 +2,7 @@ import os.path
|
||||||
import lxml.etree as lxml
|
import lxml.etree as lxml
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from conversion_utils.jos_msds_and_properties import SpecificationsBuilder, MsdToProperties
|
from conversion_utils.jos_msds_and_properties import SpecificationsBuilder, MsdToProperties, Msd
|
||||||
|
|
||||||
class JosMsdToPropertiesTestCase(unittest.TestCase):
|
class JosMsdToPropertiesTestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -13,49 +13,49 @@ class JosMsdToPropertiesTestCase(unittest.TestCase):
|
||||||
self.converter = MsdToProperties()
|
self.converter = MsdToProperties()
|
||||||
|
|
||||||
def test_en_en(self):
|
def test_en_en(self):
|
||||||
properties = self.converter.convert(self.specifications, 'miza', 'Ncfpd', 'en', 'en')
|
properties = self.converter.convert(self.specifications, Msd('Ncfpd', 'en'), 'miza', 'en')
|
||||||
self.assertEqual(properties.language, 'en')
|
self.assertEqual(properties.language, 'en')
|
||||||
self.assertEqual(properties.category, 'noun')
|
self.assertEqual(properties.category, 'noun')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
|
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
|
||||||
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
|
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
|
||||||
|
|
||||||
def test_en_sl(self):
|
def test_en_sl(self):
|
||||||
properties = self.converter.convert(self.specifications, 'miza', 'Ncfpd', 'en', 'sl')
|
properties = self.converter.convert(self.specifications, Msd('Ncfpd', 'en'), 'miza', 'sl')
|
||||||
self.assertEqual(properties.language, 'sl')
|
self.assertEqual(properties.language, 'sl')
|
||||||
self.assertEqual(properties.category, 'samostalnik')
|
self.assertEqual(properties.category, 'samostalnik')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'})
|
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'})
|
||||||
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
|
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
|
||||||
|
|
||||||
def test_sl_en(self):
|
def test_sl_en(self):
|
||||||
properties = self.converter.convert(self.specifications, 'miza', 'Sozmd', 'sl', 'en')
|
properties = self.converter.convert(self.specifications, Msd('Sozmd', 'sl'), 'miza', 'en')
|
||||||
self.assertEqual(properties.language, 'en')
|
self.assertEqual(properties.language, 'en')
|
||||||
self.assertEqual(properties.category, 'noun')
|
self.assertEqual(properties.category, 'noun')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
|
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
|
||||||
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
|
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
|
||||||
|
|
||||||
def test_sl_sl(self):
|
def test_sl_sl(self):
|
||||||
properties = self.converter.convert(self.specifications, 'miza', 'Sozmd', 'sl', 'sl')
|
properties = self.converter.convert(self.specifications, Msd('Sozmd', 'sl'), 'miza', 'sl')
|
||||||
self.assertEqual(properties.language, 'sl')
|
self.assertEqual(properties.language, 'sl')
|
||||||
self.assertEqual(properties.category, 'samostalnik')
|
self.assertEqual(properties.category, 'samostalnik')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'})
|
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'občno_ime', 'spol':'ženski'})
|
||||||
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
|
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
|
||||||
|
|
||||||
def test_exception_feature_level(self): # testing level exception: ('pronoun', 8, 'ti')
|
def test_exception_feature_level(self): # testing level exception: ('pronoun', 8, 'ti')
|
||||||
properties = self.converter.convert(self.specifications, 'ti', 'Pp2-sd--y', 'en', 'sl')
|
properties = self.converter.convert(self.specifications, Msd('Pp2-sd--y', 'en'), 'ti', 'sl')
|
||||||
self.assertEqual(properties.language, 'sl')
|
self.assertEqual(properties.language, 'sl')
|
||||||
self.assertEqual(properties.category, 'zaimek')
|
self.assertEqual(properties.category, 'zaimek')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga'})
|
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga'})
|
||||||
self.assertEqual(properties.form_feature_map, {'število':'ednina', 'sklon':'dajalnik', 'naslonskost':'klitična'})
|
self.assertEqual(properties.form_feature_map, {'število':'ednina', 'sklon':'dajalnik', 'naslonskost':'klitična'})
|
||||||
|
|
||||||
def test_normal_feature_level(self): # invalid msd, but useful for testing contrast with previous test
|
def test_normal_feature_level(self): # invalid msd, but useful for testing contrast with previous test
|
||||||
properties = self.converter.convert(self.specifications, 'vi', 'Pp2-pd--y', 'en', 'sl')
|
properties = self.converter.convert(self.specifications, Msd('Pp2-pd--y', 'en'), 'vi', 'sl')
|
||||||
self.assertEqual(properties.language, 'sl')
|
self.assertEqual(properties.language, 'sl')
|
||||||
self.assertEqual(properties.category, 'zaimek')
|
self.assertEqual(properties.category, 'zaimek')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga', 'naslonskost':'klitična'})
|
self.assertEqual(properties.lexeme_feature_map, {'vrsta':'osebni', 'oseba':'druga', 'naslonskost':'klitična'})
|
||||||
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
|
self.assertEqual(properties.form_feature_map, {'število':'množina', 'sklon':'dajalnik'})
|
||||||
|
|
||||||
def test_featureless(self):
|
def test_featureless(self):
|
||||||
properties = self.converter.convert(self.specifications, 'in', 'V', 'sl', 'en')
|
properties = self.converter.convert(self.specifications, Msd('V', 'sl'), 'in', 'en')
|
||||||
self.assertEqual(properties.language, 'en')
|
self.assertEqual(properties.language, 'en')
|
||||||
self.assertEqual(properties.category, 'conjunction')
|
self.assertEqual(properties.category, 'conjunction')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {})
|
self.assertEqual(properties.lexeme_feature_map, {})
|
||||||
|
|
Loading…
Reference in New Issue
Block a user