Compare commits
No commits in common. "d7be39d8947ff3809f400e1f4ace6e94af4d3aea" and "2f74dfcab890f50e52426018e350faabfac11742" have entirely different histories.
d7be39d894
...
2f74dfcab8
@ -230,10 +230,7 @@ class Msd:
|
|||||||
return isinstance(obj, Msd) and self.code == obj.code and self.language == obj.language
|
return isinstance(obj, Msd) and self.code == obj.code and self.language == obj.language
|
||||||
|
|
||||||
|
|
||||||
class CustomException(Exception):
|
class ConverterException(Exception):
|
||||||
pass
|
|
||||||
|
|
||||||
class MsdException(CustomException):
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class Converter:
|
class Converter:
|
||||||
@ -256,46 +253,17 @@ class Converter:
|
|||||||
except:
|
except:
|
||||||
exit('Could not parse specifications xml file provided.')
|
exit('Could not parse specifications xml file provided.')
|
||||||
|
|
||||||
def is_valid_msd(self, msd):
|
def msd_to_properties(self, msd, language, lemma=None):
|
||||||
"""Verify if the Msd code is in the standard JOS set."""
|
"""Convert Msd to Properties (possibly in the other language).
|
||||||
return msd.code in self.specifications.codes_map[msd.language]
|
|
||||||
|
|
||||||
def check_valid_msd(self, msd, require_valid_flag):
|
|
||||||
"""If the Msd code is not valid, raise an exception or give a warning."""
|
|
||||||
if (not self.is_valid_msd(msd)):
|
|
||||||
message = 'The msd {} is unknown'.format(msd.code)
|
|
||||||
if (require_valid_flag):
|
|
||||||
raise MsdException(message)
|
|
||||||
else:
|
|
||||||
print('[WARN] ' + message)
|
|
||||||
|
|
||||||
def msd_to_properties(self, msd, language, lemma=None, require_valid_flag=False, warn_level_flag=False):
|
|
||||||
"""Convert Msd to Properties.
|
|
||||||
|
|
||||||
The language of the generated Properties is specified and can differ from the Msd language.
|
|
||||||
|
|
||||||
If require_valid_flag is True, a MsdException is raised if the MSD is not in the standard
|
|
||||||
JOS set. Otherwise only a warning is given.
|
|
||||||
|
|
||||||
If you care about accurate level information (i.e., which properties are lexeme-level and
|
|
||||||
which are form-level), note that some features depends on the particular lemma. For such
|
|
||||||
features, if lemma is not provided and warn_level_flag is True, a warning will be given.
|
|
||||||
|
|
||||||
If a MSD has dashes in place of letters for certain features, they are skipped, so that
|
|
||||||
these features are not included in the generated Properties object.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
msd(Msd): the JOS MSD to convert
|
|
||||||
language(str): the language for the Properties object to be generated: "en" (English) or "sl" (Slovene)
|
|
||||||
lemma(str): the lemma of the word form with the MSD
|
|
||||||
require_valid_flag(boolean): whether to raise a MsdException or only warn if a non-standard MSD is provided
|
|
||||||
warn_level_flag(boolean): whether to warn if cannot be sure of level of a property
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Properties: the result of the conversion of the Msd in the language requested
|
|
||||||
|
|
||||||
|
The level (lexeme vs form) of certain reflexive msd features
|
||||||
|
depends on the lemma, so set the lemma if you need accurate
|
||||||
|
level information.
|
||||||
"""
|
"""
|
||||||
self.check_valid_msd(msd, require_valid_flag)
|
|
||||||
|
if (msd.code not in self.specifications.codes_map[msd.language]):
|
||||||
|
raise ConverterException('The msd {} is unknown'.format(msd.code))
|
||||||
|
|
||||||
category_char = msd.code[0].lower()
|
category_char = msd.code[0].lower()
|
||||||
value_chars = msd.code[1:]
|
value_chars = msd.code[1:]
|
||||||
category = self.specifications.find_category_by_code(category_char, msd.language)
|
category = self.specifications.find_category_by_code(category_char, msd.language)
|
||||||
@ -309,8 +277,8 @@ class Converter:
|
|||||||
value = feature.find_value_by_char(value_char, msd.language)
|
value = feature.find_value_by_char(value_char, msd.language)
|
||||||
feature_name = feature.names.get(language)
|
feature_name = feature.names.get(language)
|
||||||
feature_value = value.names.get(language)
|
feature_value = value.names.get(language)
|
||||||
if (warn_level_flag and lemma is None and (category_name, index) in [(le[0], le[1]) for le in LEVEL_EXCEPTIONS]):
|
if (lemma is None and (category_name, index) in [(le[0], le[1]) for le in LEVEL_EXCEPTIONS]):
|
||||||
print('[WARN] The level (lexeme vs form) of feature (category={category}, position={position}) may be incorrect, as it is lemma-specific and no lemma has been specified.'
|
print('[WARN] The level of feature (category={category}, position={position}) may be incorrect, as it is lemma-specific and no lemma has been specified.'
|
||||||
.format(category=category_name, position=index))
|
.format(category=category_name, position=index))
|
||||||
level_exception_flag = (category_name, feature.position, lemma) in LEVEL_EXCEPTIONS
|
level_exception_flag = (category_name, feature.position, lemma) in LEVEL_EXCEPTIONS
|
||||||
lexeme_level_flag = feature.lexeme_level_flag if not level_exception_flag else not feature.lexeme_level_flag
|
lexeme_level_flag = feature.lexeme_level_flag if not level_exception_flag else not feature.lexeme_level_flag
|
||||||
@ -321,21 +289,8 @@ class Converter:
|
|||||||
form_feature_map[feature_name] = feature_value
|
form_feature_map[feature_name] = feature_value
|
||||||
return Properties(category_name, lexeme_feature_map, form_feature_map, language)
|
return Properties(category_name, lexeme_feature_map, form_feature_map, language)
|
||||||
|
|
||||||
def properties_to_msd(self, properties, language, require_valid_flag=False):
|
def properties_to_msd(self, properties, language):
|
||||||
"""Convert Properties to Msd.
|
"""Convert Properties to msd (possibly in the other language)."""
|
||||||
|
|
||||||
The language of the generated Msd is specified and can differ from the Properties language.
|
|
||||||
|
|
||||||
If require_valid_flag is True, a MsdException is raised if the generated MSD is not in
|
|
||||||
the standard JOS set. Otherwise only a warning is given.
|
|
||||||
|
|
||||||
Any skipped positions among the Properties are represented as dashes in the MSD.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
properties(Properties): the properties to convert
|
|
||||||
language(str): the language for the Msd object to be returned: "en" (English) or "sl" (Slovene)
|
|
||||||
require_valid_flag(boolean): whether to raise a MsdException or only warn if a non-standard MSD is generated
|
|
||||||
"""
|
|
||||||
category = self.specifications.find_category_by_name(properties.category, properties.language)
|
category = self.specifications.find_category_by_name(properties.category, properties.language)
|
||||||
category_char = category.codes.get(language).upper()
|
category_char = category.codes.get(language).upper()
|
||||||
feature_map = properties.lexeme_feature_map.copy()
|
feature_map = properties.lexeme_feature_map.copy()
|
||||||
@ -353,9 +308,7 @@ class Converter:
|
|||||||
msd_code += '-'
|
msd_code += '-'
|
||||||
i += 1
|
i += 1
|
||||||
msd_code += position_map[position]
|
msd_code += position_map[position]
|
||||||
msd = Msd(msd_code, language)
|
return Msd(msd_code, language)
|
||||||
self.check_valid_msd(msd, require_valid_flag)
|
|
||||||
return msd
|
|
||||||
|
|
||||||
def translate_msd(self, msd, language):
|
def translate_msd(self, msd, language):
|
||||||
return self.properties_to_msd(self.msd_to_properties(msd, language), language)
|
return self.properties_to_msd(self.msd_to_properties(msd, language), language)
|
||||||
|
2
setup.py
2
setup.py
@ -6,7 +6,7 @@ setup(name='conversion_utils',
|
|||||||
url='https://gitea.cjvt.si/generic/conversion_utils',
|
url='https://gitea.cjvt.si/generic/conversion_utils',
|
||||||
author='Cyprian Laskowski',
|
author='Cyprian Laskowski',
|
||||||
author_email='cyp@cjvt.si',
|
author_email='cyp@cjvt.si',
|
||||||
packages=['conversion_utils', 'conversion_utils.resources', 'conversion_utils.tests'],
|
packages=['conversion_utils', 'conversion_utils.resources'],
|
||||||
install_requires=['importlib_resources'],
|
install_requires=['importlib_resources'],
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
zip_safe=True)
|
zip_safe=True)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from conversion_utils.jos_msds_and_properties import Converter, Msd, MsdException
|
from conversion_utils.jos_msds_and_properties import Converter, Msd
|
||||||
|
|
||||||
class JosMsdToPropertiesTestCase(unittest.TestCase):
|
class JosMsdToPropertiesTestCase(unittest.TestCase):
|
||||||
|
|
||||||
@ -55,25 +55,3 @@ class JosMsdToPropertiesTestCase(unittest.TestCase):
|
|||||||
self.assertEqual(properties.category, 'punctuation')
|
self.assertEqual(properties.category, 'punctuation')
|
||||||
self.assertEqual(properties.lexeme_feature_map, {})
|
self.assertEqual(properties.lexeme_feature_map, {})
|
||||||
self.assertEqual(properties.form_feature_map, {})
|
self.assertEqual(properties.form_feature_map, {})
|
||||||
|
|
||||||
def test_good_msd_with_require_valid(self):
|
|
||||||
properties = self.converter.msd_to_properties(Msd('Ncfpd', 'en'), 'en', require_valid_flag=True)
|
|
||||||
self.assertEqual(properties.language, 'en')
|
|
||||||
self.assertEqual(properties.category, 'noun')
|
|
||||||
self.assertEqual(properties.lexeme_feature_map, {'type':'common', 'gender':'feminine'})
|
|
||||||
self.assertEqual(properties.form_feature_map, {'number':'plural', 'case':'dative'})
|
|
||||||
|
|
||||||
def test_bad_msd(self):
|
|
||||||
properties = self.converter.msd_to_properties(Msd('N---d', 'en'), 'en')
|
|
||||||
self.assertEqual(properties.language, 'en')
|
|
||||||
self.assertEqual(properties.category, 'noun')
|
|
||||||
self.assertEqual(properties.lexeme_feature_map, {})
|
|
||||||
self.assertEqual(properties.form_feature_map, {'case':'dative'})
|
|
||||||
|
|
||||||
def test_bad_msd_with_require_valid(self):
|
|
||||||
try:
|
|
||||||
self.converter.msd_to_properties(Msd('N---d', 'en'), 'en', require_valid_flag=True)
|
|
||||||
fails = False
|
|
||||||
except MsdException:
|
|
||||||
fails = True
|
|
||||||
self.assertEqual(fails, True)
|
|
@ -1,6 +1,6 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from conversion_utils.jos_msds_and_properties import Converter, Properties, MsdException
|
from conversion_utils.jos_msds_and_properties import Converter, Properties
|
||||||
|
|
||||||
class JosPropertiesToMsdTestCase(unittest.TestCase):
|
class JosPropertiesToMsdTestCase(unittest.TestCase):
|
||||||
|
|
||||||
@ -41,21 +41,3 @@ class JosPropertiesToMsdTestCase(unittest.TestCase):
|
|||||||
msd = self.converter.properties_to_msd(Properties('punctuation', {}, {}, 'en'), 'sl')
|
msd = self.converter.properties_to_msd(Properties('punctuation', {}, {}, 'en'), 'sl')
|
||||||
self.assertEqual(msd.language, 'sl')
|
self.assertEqual(msd.language, 'sl')
|
||||||
self.assertEqual(msd.code, 'U')
|
self.assertEqual(msd.code, 'U')
|
||||||
|
|
||||||
def test_good_msd_with_require_valid(self):
|
|
||||||
msd = self.converter.properties_to_msd(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'en', require_valid_flag=True)
|
|
||||||
self.assertEqual(msd.language, 'en')
|
|
||||||
self.assertEqual(msd.code, 'Ncfdn')
|
|
||||||
|
|
||||||
def test_bad_msd(self):
|
|
||||||
msd = self.converter.properties_to_msd(Properties('noun', {'type':'common'}, {'number':'dual'}, 'en'), 'en')
|
|
||||||
self.assertEqual(msd.language, 'en')
|
|
||||||
self.assertEqual(msd.code, 'Nc-d')
|
|
||||||
|
|
||||||
def test_bad_msd_with_require_valid(self):
|
|
||||||
try:
|
|
||||||
self.converter.properties_to_msd(Properties('noun', {'type':'common'}, {'number':'dual'}, 'en'), 'en', require_valid_flag=True)
|
|
||||||
fails = False
|
|
||||||
except MsdException:
|
|
||||||
fails = True
|
|
||||||
self.assertEqual(fails, True)
|
|
Loading…
Reference in New Issue
Block a user