Specifications are loaded on import

This commit is contained in:
2026-04-15 08:23:06 +02:00
parent aef9a3698f
commit bb3c673e29

View File

@@ -4,7 +4,7 @@ import lxml.etree as lxml
from collections import defaultdict
from importlib_resources import files
from enum import Enum
from enum import IntEnum
from conversion_utils.utils import xpath_find, get_xml_id
@@ -62,10 +62,10 @@ LEVEL_EXCEPTIONS = {('pronoun', 2, 'čezme'), ('zaimek', 2, 'čezme'),
('pronoun', 8, 'se'), ('zaimek', 8, 'se'),
('pronoun', 8, 'ti'), ('zaimek', 8, 'ti')}
class MsdState(Enum):
FULL = 1
PARTIAL = 2
UNKNOWN = 3
class MsdState(IntEnum):
UNKNOWN = -1
PARTIAL = 1
FULL = 2
class MsdException(Exception):
pass
@@ -266,9 +266,35 @@ class UD:
class Msd:
"""JOS msd."""
def __init__(self, code, language):
class State(IntEnum):
UNKNOWN = -1
PARTIAL = 1
FULL = 2
def __init__(self, code, language, expected_state=State.FULL, require_valid=False):
self.code = code
self.language = language
self.expected_state = expected_state
self.require_valid = require_valid
self.state = self._validate_and_get_state()
def _validate_and_get_state(self):
states = set()
if self.code in DEFAULT_SPECIFICATIONS.codes_map[self.language]:
states.add(self.State.FULL)
if self.code in DEFAULT_SPECIFICATIONS.partial_codes_map[self.language]:
states.add(self.State.PARTIAL)
if len(states) == 0:
states.add(self.State.UNKNOWN)
if self.expected_state not in states:
if self.require_valid:
raise MsdException(f"Given msd '{self.codecode}' is '{self.state.name}', but expected state is '{self.expected_state.name}'.")
else:
if self.state == self.State.UNKNOWN:
print(f"[WARN] The Msd '{self.code}' is unknown.")
else:
print(f"[WARN] The Msd '{self.code}' is unknown for expected state '{self.expected_state.name}'.")
return max(states)
def __str__(self):
return 'code={code}, language={language}'.format(code=self.code, language=self.language)
@@ -282,15 +308,7 @@ class Converter:
def __init__(self, xml_file_name=None):
if (xml_file_name is None):
resource = files('conversion_utils.resources').joinpath(JOS_SPECIFICATIONS_PICKLE_RESOURCE)
if (resource.is_file()):
try:
with resource.open('rb') as pickle_file:
self.specifications = pickle.load(pickle_file)
except:
exit('Could not parse specifications pickle file installed.')
else:
exit('No pickle installed or xml provided.')
self.specifications = DEFAULT_SPECIFICATIONS
else:
parser = SpecificationsParser()
try:
@@ -313,7 +331,7 @@ class Converter:
form_feature_map={},
language=msd.language
)
return self.properties_to_msd(properties, msd.language).code
return self.properties_to_msd(properties, msd.language, expected_state=Msd.State.PARTIAL).code
def _parse_msd_ud_conversion(self, file_name):
"""Parse file with direct conversions from English Msd to Universal Dependencies."""
@@ -336,32 +354,7 @@ class Converter:
all_rules[priority].append(current_rules)
return all_rules
def is_valid_msd(self, msd):
"""Verify if the Msd code is in the standard JOS set."""
return msd.code in self.specifications.codes_map[msd.language]
def get_msd_state(self, msd):
"""Determine if the Msd code is full, partial or unknown."""
if msd.code in self.specifications.codes_map[msd.language]:
return MsdState.FULL
elif msd.code in self.specifications.partial_codes_map[msd.language]:
return MsdState.PARTIAL
else:
return MsdState.UNKNOWN
def check_valid_msd(self, msd, require_valid_flag, allow_partial=True):
"""If the Msd code is not valid, raise an exception or give a warning."""
msd_state = self.get_msd_state(msd)
if msd_state == MsdState.UNKNOWN:
message = f"The msd '{msd.code}' is unknown"
if require_valid_flag:
raise MsdException(message)
else:
print('[WARN] ' + message)
if msd_state == MsdState.PARTIAL and not allow_partial:
raise MsdException(f"Partial msd '{msd.code}' is not allowed. Full msd is required.")
def msd_to_properties(self, msd, language, lemma=None, require_valid_flag=False, warn_level_flag=False):
def msd_to_properties(self, msd, language, lemma=None, warn_level_flag=False):
"""Convert Msd to Properties.
The language of the generated Properties is specified and can differ from the Msd language.
@@ -387,7 +380,6 @@ class Converter:
Properties: the result of the conversion of the Msd in the language requested
"""
self.check_valid_msd(msd, require_valid_flag)
category_char = msd.code[0].lower()
value_chars = msd.code[1:]
category = self.specifications.find_category_by_code(category_char, msd.language)
@@ -413,7 +405,7 @@ class Converter:
form_feature_map[feature_name] = feature_value
return Properties(category_name, lexeme_feature_map, form_feature_map, language)
def properties_to_msd(self, properties, language, require_valid_flag=False):
def properties_to_msd(self, properties, language, expected_state=Msd.State.FULL):
"""Convert Properties to Msd.
The language of the generated Msd is specified and can differ from the Properties language.
@@ -445,8 +437,7 @@ class Converter:
msd_code += '-'
i += 1
msd_code += position_map[position]
msd = Msd(msd_code, language)
self.check_valid_msd(msd, require_valid_flag)
msd = Msd(msd_code, language, expected_state=expected_state)
return msd
def msd_to_ud(self, msd, lemma):
@@ -459,7 +450,8 @@ class Converter:
lemma(str): the lemma of the word form with the MSD
"""
self.check_valid_msd(msd, False, allow_partial=False)
if msd.state != Msd.State.FULL:
raise MsdException(f"Msd must be full to be converted to UD.")
upos_category, *upos_features = self.mte_to_ud_features[msd.code].split()
final_upos = ""
@@ -496,3 +488,17 @@ class Converter:
def translate_properties(self, properties, language):
return self.msd_to_properties(self.properties_to_msd(properties, language), language)
def _load_default_specifications():
global DEFAULT_SPECIFICATIONS
resource = files('conversion_utils.resources').joinpath(JOS_SPECIFICATIONS_PICKLE_RESOURCE)
if resource.is_file():
try:
with resource.open('rb') as pickle_file:
DEFAULT_SPECIFICATIONS = pickle.load(pickle_file)
except Exception as e:
exit('Could not parse specifications pickle file installed.')
else:
exit("Default specifications not found.")
_load_default_specifications()