Compare commits

..

No commits in common. "d7be39d8947ff3809f400e1f4ace6e94af4d3aea" and "2f74dfcab890f50e52426018e350faabfac11742" have entirely different histories.

7 changed files with 18 additions and 105 deletions

View File

@ -230,10 +230,7 @@ class Msd:
return isinstance(obj, Msd) and self.code == obj.code and self.language == obj.language return isinstance(obj, Msd) and self.code == obj.code and self.language == obj.language
class CustomException(Exception): class ConverterException(Exception):
pass
class MsdException(CustomException):
pass pass
class Converter: class Converter:
@ -256,46 +253,17 @@ class Converter:
except: except:
exit('Could not parse specifications xml file provided.') exit('Could not parse specifications xml file provided.')
def is_valid_msd(self, msd): def msd_to_properties(self, msd, language, lemma=None):
"""Verify if the Msd code is in the standard JOS set.""" """Convert Msd to Properties (possibly in the other language).
return msd.code in self.specifications.codes_map[msd.language]
def check_valid_msd(self, msd, require_valid_flag):
"""If the Msd code is not valid, raise an exception or give a warning."""
if (not self.is_valid_msd(msd)):
message = 'The msd {} is unknown'.format(msd.code)
if (require_valid_flag):
raise MsdException(message)
else:
print('[WARN] ' + message)
def msd_to_properties(self, msd, language, lemma=None, require_valid_flag=False, warn_level_flag=False):
"""Convert Msd to Properties.
The language of the generated Properties is specified and can differ from the Msd language.
If require_valid_flag is True, a MsdException is raised if the MSD is not in the standard
JOS set. Otherwise only a warning is given.
If you care about accurate level information (i.e., which properties are lexeme-level and
which are form-level), note that some features depends on the particular lemma. For such
features, if lemma is not provided and warn_level_flag is True, a warning will be given.
If a MSD has dashes in place of letters for certain features, they are skipped, so that
these features are not included in the generated Properties object.
Parameters:
msd(Msd): the JOS MSD to convert
language(str): the language for the Properties object to be generated: "en" (English) or "sl" (Slovene)
lemma(str): the lemma of the word form with the MSD
require_valid_flag(boolean): whether to raise a MsdException or only warn if a non-standard MSD is provided
warn_level_flag(boolean): whether to warn if cannot be sure of level of a property
Returns:
Properties: the result of the conversion of the Msd in the language requested
The level (lexeme vs form) of certain reflexive msd features
depends on the lemma, so set the lemma if you need accurate
level information.
""" """
self.check_valid_msd(msd, require_valid_flag)
if (msd.code not in self.specifications.codes_map[msd.language]):
raise ConverterException('The msd {} is unknown'.format(msd.code))
category_char = msd.code[0].lower() category_char = msd.code[0].lower()
value_chars = msd.code[1:] value_chars = msd.code[1:]
category = self.specifications.find_category_by_code(category_char, msd.language) category = self.specifications.find_category_by_code(category_char, msd.language)
@ -309,8 +277,8 @@ class Converter:
value = feature.find_value_by_char(value_char, msd.language) value = feature.find_value_by_char(value_char, msd.language)
feature_name = feature.names.get(language) feature_name = feature.names.get(language)
feature_value = value.names.get(language) feature_value = value.names.get(language)
if (warn_level_flag and lemma is None and (category_name, index) in [(le[0], le[1]) for le in LEVEL_EXCEPTIONS]): if (lemma is None and (category_name, index) in [(le[0], le[1]) for le in LEVEL_EXCEPTIONS]):
print('[WARN] The level (lexeme vs form) of feature (category={category}, position={position}) may be incorrect, as it is lemma-specific and no lemma has been specified.' print('[WARN] The level of feature (category={category}, position={position}) may be incorrect, as it is lemma-specific and no lemma has been specified.'
.format(category=category_name, position=index)) .format(category=category_name, position=index))
level_exception_flag = (category_name, feature.position, lemma) in LEVEL_EXCEPTIONS level_exception_flag = (category_name, feature.position, lemma) in LEVEL_EXCEPTIONS
lexeme_level_flag = feature.lexeme_level_flag if not level_exception_flag else not feature.lexeme_level_flag lexeme_level_flag = feature.lexeme_level_flag if not level_exception_flag else not feature.lexeme_level_flag
@ -321,21 +289,8 @@ class Converter:
form_feature_map[feature_name] = feature_value form_feature_map[feature_name] = feature_value
return Properties(category_name, lexeme_feature_map, form_feature_map, language) return Properties(category_name, lexeme_feature_map, form_feature_map, language)
def properties_to_msd(self, properties, language, require_valid_flag=False): def properties_to_msd(self, properties, language):
"""Convert Properties to Msd. """Convert Properties to msd (possibly in the other language)."""
The language of the generated Msd is specified and can differ from the Properties language.
If require_valid_flag is True, a MsdException is raised if the generated MSD is not in
the standard JOS set. Otherwise only a warning is given.
Any skipped positions among the Properties are represented as dashes in the MSD.
Parameters:
properties(Properties): the properties to convert
language(str): the language for the Msd object to be returned: "en" (English) or "sl" (Slovene)
require_valid_flag(boolean): whether to raise a MsdException or only warn if a non-standard MSD is generated
"""
category = self.specifications.find_category_by_name(properties.category, properties.language) category = self.specifications.find_category_by_name(properties.category, properties.language)
category_char = category.codes.get(language).upper() category_char = category.codes.get(language).upper()
feature_map = properties.lexeme_feature_map.copy() feature_map = properties.lexeme_feature_map.copy()
@ -353,9 +308,7 @@ class Converter:
msd_code += '-' msd_code += '-'
i += 1 i += 1
msd_code += position_map[position] msd_code += position_map[position]
msd = Msd(msd_code, language) return Msd(msd_code, language)
self.check_valid_msd(msd, require_valid_flag)
return msd
def translate_msd(self, msd, language): def translate_msd(self, msd, language):
return self.properties_to_msd(self.msd_to_properties(msd, language), language) return self.properties_to_msd(self.msd_to_properties(msd, language), language)

View File

@ -6,7 +6,7 @@ setup(name='conversion_utils',
url='https://gitea.cjvt.si/generic/conversion_utils', url='https://gitea.cjvt.si/generic/conversion_utils',
author='Cyprian Laskowski', author='Cyprian Laskowski',
author_email='cyp@cjvt.si', author_email='cyp@cjvt.si',
packages=['conversion_utils', 'conversion_utils.resources', 'conversion_utils.tests'], packages=['conversion_utils', 'conversion_utils.resources'],
install_requires=['importlib_resources'], install_requires=['importlib_resources'],
include_package_data=True, include_package_data=True,
zip_safe=True) zip_safe=True)

View File

@ -1,6 +1,6 @@
import unittest import unittest
from conversion_utils.jos_msds_and_properties import Converter, Msd, MsdException from conversion_utils.jos_msds_and_properties import Converter, Msd
class JosMsdToPropertiesTestCase(unittest.TestCase): class JosMsdToPropertiesTestCase(unittest.TestCase):
@ -55,25 +55,3 @@ class JosMsdToPropertiesTestCase(unittest.TestCase):
self.assertEqual(properties.category, 'punctuation') self.assertEqual(properties.category, 'punctuation')
self.assertEqual(properties.lexeme_feature_map, {}) self.assertEqual(properties.lexeme_feature_map, {})
self.assertEqual(properties.form_feature_map, {}) self.assertEqual(properties.form_feature_map, {})
def test_good_msd_with_require_valid(self):
properties = self.converter.msd_to_properties(Msd('Ncfpd', 'en'), 'en', require_valid_flag=True)
self.assertEqual(properties.language, 'en')
self.assertEqual(properties.category, 'noun')
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
def test_bad_msd(self):
properties = self.converter.msd_to_properties(Msd('N---d', 'en'), 'en')
self.assertEqual(properties.language, 'en')
self.assertEqual(properties.category, 'noun')
self.assertEqual(properties.lexeme_feature_map, {})
self.assertEqual(properties.form_feature_map, {'case':'dative'})
def test_bad_msd_with_require_valid(self):
try:
self.converter.msd_to_properties(Msd('N---d', 'en'), 'en', require_valid_flag=True)
fails = False
except MsdException:
fails = True
self.assertEqual(fails, True)

View File

@ -1,6 +1,6 @@
import unittest import unittest
from conversion_utils.jos_msds_and_properties import Converter, Properties, MsdException from conversion_utils.jos_msds_and_properties import Converter, Properties
class JosPropertiesToMsdTestCase(unittest.TestCase): class JosPropertiesToMsdTestCase(unittest.TestCase):
@ -41,21 +41,3 @@ class JosPropertiesToMsdTestCase(unittest.TestCase):
msd = self.converter.properties_to_msd(Properties('punctuation', {}, {}, 'en'), 'sl') msd = self.converter.properties_to_msd(Properties('punctuation', {}, {}, 'en'), 'sl')
self.assertEqual(msd.language, 'sl') self.assertEqual(msd.language, 'sl')
self.assertEqual(msd.code, 'U') self.assertEqual(msd.code, 'U')
def test_good_msd_with_require_valid(self):
msd = self.converter.properties_to_msd(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'en', require_valid_flag=True)
self.assertEqual(msd.language, 'en')
self.assertEqual(msd.code, 'Ncfdn')
def test_bad_msd(self):
msd = self.converter.properties_to_msd(Properties('noun', {'type':'common'}, {'number':'dual'}, 'en'), 'en')
self.assertEqual(msd.language, 'en')
self.assertEqual(msd.code, 'Nc-d')
def test_bad_msd_with_require_valid(self):
try:
self.converter.properties_to_msd(Properties('noun', {'type':'common'}, {'number':'dual'}, 'en'), 'en', require_valid_flag=True)
fails = False
except MsdException:
fails = True
self.assertEqual(fails, True)