Made msd and feature-level checking optional, added docstrings

This commit is contained in:
Cyprian Laskowski 2022-09-15 11:01:05 +02:00
parent 4ca67ec8cc
commit d7be39d894
3 changed files with 104 additions and 17 deletions

View File

@ -230,7 +230,10 @@ class Msd:
return isinstance(obj, Msd) and self.code == obj.code and self.language == obj.language
class ConverterException(Exception):
class CustomException(Exception):
pass
class MsdException(CustomException):
pass
class Converter:
@ -253,17 +256,46 @@ class Converter:
except:
exit('Could not parse specifications xml file provided.')
def msd_to_properties(self, msd, language, lemma=None):
"""Convert Msd to Properties (possibly in the other language).
def is_valid_msd(self, msd):
"""Verify if the Msd code is in the standard JOS set."""
return msd.code in self.specifications.codes_map[msd.language]
def check_valid_msd(self, msd, require_valid_flag):
"""If the Msd code is not valid, raise an exception or give a warning."""
if (not self.is_valid_msd(msd)):
message = 'The msd {} is unknown'.format(msd.code)
if (require_valid_flag):
raise MsdException(message)
else:
print('[WARN] ' + message)
def msd_to_properties(self, msd, language, lemma=None, require_valid_flag=False, warn_level_flag=False):
"""Convert Msd to Properties.
The language of the generated Properties is specified and can differ from the Msd language.
If require_valid_flag is True, a MsdException is raised if the MSD is not in the standard
JOS set. Otherwise only a warning is given.
If you care about accurate level information (i.e., which properties are lexeme-level and
which are form-level), note that some features depends on the particular lemma. For such
features, if lemma is not provided and warn_level_flag is True, a warning will be given.
If a MSD has dashes in place of letters for certain features, they are skipped, so that
these features are not included in the generated Properties object.
Parameters:
msd(Msd): the JOS MSD to convert
language(str): the language for the Properties object to be generated: "en" (English) or "sl" (Slovene)
lemma(str): the lemma of the word form with the MSD
require_valid_flag(boolean): whether to raise a MsdException or only warn if a non-standard MSD is provided
warn_level_flag(boolean): whether to warn if cannot be sure of level of a property
Returns:
Properties: the result of the conversion of the Msd in the language requested
The level (lexeme vs form) of certain reflexive msd features
depends on the lemma, so set the lemma if you need accurate
level information.
"""
if (msd.code not in self.specifications.codes_map[msd.language]):
raise ConverterException('The msd {} is unknown'.format(msd.code))
self.check_valid_msd(msd, require_valid_flag)
category_char = msd.code[0].lower()
value_chars = msd.code[1:]
category = self.specifications.find_category_by_code(category_char, msd.language)
@ -277,8 +309,8 @@ class Converter:
value = feature.find_value_by_char(value_char, msd.language)
feature_name = feature.names.get(language)
feature_value = value.names.get(language)
if (lemma is None and (category_name, index) in [(le[0], le[1]) for le in LEVEL_EXCEPTIONS]):
print('[WARN] The level of feature (category={category}, position={position}) may be incorrect, as it is lemma-specific and no lemma has been specified.'
if (warn_level_flag and lemma is None and (category_name, index) in [(le[0], le[1]) for le in LEVEL_EXCEPTIONS]):
print('[WARN] The level (lexeme vs form) of feature (category={category}, position={position}) may be incorrect, as it is lemma-specific and no lemma has been specified.'
.format(category=category_name, position=index))
level_exception_flag = (category_name, feature.position, lemma) in LEVEL_EXCEPTIONS
lexeme_level_flag = feature.lexeme_level_flag if not level_exception_flag else not feature.lexeme_level_flag
@ -289,8 +321,21 @@ class Converter:
form_feature_map[feature_name] = feature_value
return Properties(category_name, lexeme_feature_map, form_feature_map, language)
def properties_to_msd(self, properties, language):
"""Convert Properties to msd (possibly in the other language)."""
def properties_to_msd(self, properties, language, require_valid_flag=False):
"""Convert Properties to Msd.
The language of the generated Msd is specified and can differ from the Properties language.
If require_valid_flag is True, a MsdException is raised if the generated MSD is not in
the standard JOS set. Otherwise only a warning is given.
Any skipped positions among the Properties are represented as dashes in the MSD.
Parameters:
properties(Properties): the properties to convert
language(str): the language for the Msd object to be returned: "en" (English) or "sl" (Slovene)
require_valid_flag(boolean): whether to raise a MsdException or only warn if a non-standard MSD is generated
"""
category = self.specifications.find_category_by_name(properties.category, properties.language)
category_char = category.codes.get(language).upper()
feature_map = properties.lexeme_feature_map.copy()
@ -308,7 +353,9 @@ class Converter:
msd_code += '-'
i += 1
msd_code += position_map[position]
return Msd(msd_code, language)
msd = Msd(msd_code, language)
self.check_valid_msd(msd, require_valid_flag)
return msd
def translate_msd(self, msd, language):
return self.properties_to_msd(self.msd_to_properties(msd, language), language)

View File

@ -1,6 +1,6 @@
import unittest
from conversion_utils.jos_msds_and_properties import Converter, Msd
from conversion_utils.jos_msds_and_properties import Converter, Msd, MsdException
class JosMsdToPropertiesTestCase(unittest.TestCase):
@ -55,3 +55,25 @@ class JosMsdToPropertiesTestCase(unittest.TestCase):
self.assertEqual(properties.category, 'punctuation')
self.assertEqual(properties.lexeme_feature_map, {})
self.assertEqual(properties.form_feature_map, {})
def test_good_msd_with_require_valid(self):
properties = self.converter.msd_to_properties(Msd('Ncfpd', 'en'), 'en', require_valid_flag=True)
self.assertEqual(properties.language, 'en')
self.assertEqual(properties.category, 'noun')
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
def test_bad_msd(self):
properties = self.converter.msd_to_properties(Msd('N---d', 'en'), 'en')
self.assertEqual(properties.language, 'en')
self.assertEqual(properties.category, 'noun')
self.assertEqual(properties.lexeme_feature_map, {})
self.assertEqual(properties.form_feature_map, {'case':'dative'})
def test_bad_msd_with_require_valid(self):
try:
self.converter.msd_to_properties(Msd('N---d', 'en'), 'en', require_valid_flag=True)
fails = False
except MsdException:
fails = True
self.assertEqual(fails, True)

View File

@ -1,6 +1,6 @@
import unittest
from conversion_utils.jos_msds_and_properties import Converter, Properties
from conversion_utils.jos_msds_and_properties import Converter, Properties, MsdException
class JosPropertiesToMsdTestCase(unittest.TestCase):
@ -41,3 +41,21 @@ class JosPropertiesToMsdTestCase(unittest.TestCase):
msd = self.converter.properties_to_msd(Properties('punctuation', {}, {}, 'en'), 'sl')
self.assertEqual(msd.language, 'sl')
self.assertEqual(msd.code, 'U')
def test_good_msd_with_require_valid(self):
msd = self.converter.properties_to_msd(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'en', require_valid_flag=True)
self.assertEqual(msd.language, 'en')
self.assertEqual(msd.code, 'Ncfdn')
def test_bad_msd(self):
msd = self.converter.properties_to_msd(Properties('noun', {'type':'common'}, {'number':'dual'}, 'en'), 'en')
self.assertEqual(msd.language, 'en')
self.assertEqual(msd.code, 'Nc-d')
def test_bad_msd_with_require_valid(self):
try:
self.converter.properties_to_msd(Properties('noun', {'type':'common'}, {'number':'dual'}, 'en'), 'en', require_valid_flag=True)
fails = False
except MsdException:
fails = True
self.assertEqual(fails, True)