Specifications are loaded on import
This commit is contained in:
@@ -4,7 +4,7 @@ import lxml.etree as lxml
|
||||
from collections import defaultdict
|
||||
from importlib_resources import files
|
||||
|
||||
from enum import Enum
|
||||
from enum import IntEnum
|
||||
|
||||
from conversion_utils.utils import xpath_find, get_xml_id
|
||||
|
||||
@@ -62,10 +62,10 @@ LEVEL_EXCEPTIONS = {('pronoun', 2, 'čezme'), ('zaimek', 2, 'čezme'),
|
||||
('pronoun', 8, 'se'), ('zaimek', 8, 'se'),
|
||||
('pronoun', 8, 'ti'), ('zaimek', 8, 'ti')}
|
||||
|
||||
class MsdState(Enum):
|
||||
FULL = 1
|
||||
PARTIAL = 2
|
||||
UNKNOWN = 3
|
||||
class MsdState(IntEnum):
|
||||
UNKNOWN = -1
|
||||
PARTIAL = 1
|
||||
FULL = 2
|
||||
|
||||
class MsdException(Exception):
|
||||
pass
|
||||
@@ -266,9 +266,35 @@ class UD:
|
||||
class Msd:
|
||||
"""JOS msd."""
|
||||
|
||||
def __init__(self, code, language):
|
||||
class State(IntEnum):
|
||||
UNKNOWN = -1
|
||||
PARTIAL = 1
|
||||
FULL = 2
|
||||
|
||||
def __init__(self, code, language, expected_state=State.FULL, require_valid=False):
|
||||
self.code = code
|
||||
self.language = language
|
||||
self.expected_state = expected_state
|
||||
self.require_valid = require_valid
|
||||
self.state = self._validate_and_get_state()
|
||||
|
||||
def _validate_and_get_state(self):
|
||||
states = set()
|
||||
if self.code in DEFAULT_SPECIFICATIONS.codes_map[self.language]:
|
||||
states.add(self.State.FULL)
|
||||
if self.code in DEFAULT_SPECIFICATIONS.partial_codes_map[self.language]:
|
||||
states.add(self.State.PARTIAL)
|
||||
if len(states) == 0:
|
||||
states.add(self.State.UNKNOWN)
|
||||
if self.expected_state not in states:
|
||||
if self.require_valid:
|
||||
raise MsdException(f"Given msd '{self.codecode}' is '{self.state.name}', but expected state is '{self.expected_state.name}'.")
|
||||
else:
|
||||
if self.state == self.State.UNKNOWN:
|
||||
print(f"[WARN] The Msd '{self.code}' is unknown.")
|
||||
else:
|
||||
print(f"[WARN] The Msd '{self.code}' is unknown for expected state '{self.expected_state.name}'.")
|
||||
return max(states)
|
||||
|
||||
def __str__(self):
|
||||
return 'code={code}, language={language}'.format(code=self.code, language=self.language)
|
||||
@@ -282,15 +308,7 @@ class Converter:
|
||||
|
||||
def __init__(self, xml_file_name=None):
|
||||
if (xml_file_name is None):
|
||||
resource = files('conversion_utils.resources').joinpath(JOS_SPECIFICATIONS_PICKLE_RESOURCE)
|
||||
if (resource.is_file()):
|
||||
try:
|
||||
with resource.open('rb') as pickle_file:
|
||||
self.specifications = pickle.load(pickle_file)
|
||||
except:
|
||||
exit('Could not parse specifications pickle file installed.')
|
||||
else:
|
||||
exit('No pickle installed or xml provided.')
|
||||
self.specifications = DEFAULT_SPECIFICATIONS
|
||||
else:
|
||||
parser = SpecificationsParser()
|
||||
try:
|
||||
@@ -313,7 +331,7 @@ class Converter:
|
||||
form_feature_map={},
|
||||
language=msd.language
|
||||
)
|
||||
return self.properties_to_msd(properties, msd.language).code
|
||||
return self.properties_to_msd(properties, msd.language, expected_state=Msd.State.PARTIAL).code
|
||||
|
||||
def _parse_msd_ud_conversion(self, file_name):
|
||||
"""Parse file with direct conversions from English Msd to Universal Dependencies."""
|
||||
@@ -336,32 +354,7 @@ class Converter:
|
||||
all_rules[priority].append(current_rules)
|
||||
return all_rules
|
||||
|
||||
def is_valid_msd(self, msd):
|
||||
"""Verify if the Msd code is in the standard JOS set."""
|
||||
return msd.code in self.specifications.codes_map[msd.language]
|
||||
|
||||
def get_msd_state(self, msd):
|
||||
"""Determine if the Msd code is full, partial or unknown."""
|
||||
if msd.code in self.specifications.codes_map[msd.language]:
|
||||
return MsdState.FULL
|
||||
elif msd.code in self.specifications.partial_codes_map[msd.language]:
|
||||
return MsdState.PARTIAL
|
||||
else:
|
||||
return MsdState.UNKNOWN
|
||||
|
||||
def check_valid_msd(self, msd, require_valid_flag, allow_partial=True):
|
||||
"""If the Msd code is not valid, raise an exception or give a warning."""
|
||||
msd_state = self.get_msd_state(msd)
|
||||
if msd_state == MsdState.UNKNOWN:
|
||||
message = f"The msd '{msd.code}' is unknown"
|
||||
if require_valid_flag:
|
||||
raise MsdException(message)
|
||||
else:
|
||||
print('[WARN] ' + message)
|
||||
if msd_state == MsdState.PARTIAL and not allow_partial:
|
||||
raise MsdException(f"Partial msd '{msd.code}' is not allowed. Full msd is required.")
|
||||
|
||||
def msd_to_properties(self, msd, language, lemma=None, require_valid_flag=False, warn_level_flag=False):
|
||||
def msd_to_properties(self, msd, language, lemma=None, warn_level_flag=False):
|
||||
"""Convert Msd to Properties.
|
||||
|
||||
The language of the generated Properties is specified and can differ from the Msd language.
|
||||
@@ -387,7 +380,6 @@ class Converter:
|
||||
Properties: the result of the conversion of the Msd in the language requested
|
||||
|
||||
"""
|
||||
self.check_valid_msd(msd, require_valid_flag)
|
||||
category_char = msd.code[0].lower()
|
||||
value_chars = msd.code[1:]
|
||||
category = self.specifications.find_category_by_code(category_char, msd.language)
|
||||
@@ -413,7 +405,7 @@ class Converter:
|
||||
form_feature_map[feature_name] = feature_value
|
||||
return Properties(category_name, lexeme_feature_map, form_feature_map, language)
|
||||
|
||||
def properties_to_msd(self, properties, language, require_valid_flag=False):
|
||||
def properties_to_msd(self, properties, language, expected_state=Msd.State.FULL):
|
||||
"""Convert Properties to Msd.
|
||||
|
||||
The language of the generated Msd is specified and can differ from the Properties language.
|
||||
@@ -445,8 +437,7 @@ class Converter:
|
||||
msd_code += '-'
|
||||
i += 1
|
||||
msd_code += position_map[position]
|
||||
msd = Msd(msd_code, language)
|
||||
self.check_valid_msd(msd, require_valid_flag)
|
||||
msd = Msd(msd_code, language, expected_state=expected_state)
|
||||
return msd
|
||||
|
||||
def msd_to_ud(self, msd, lemma):
|
||||
@@ -459,7 +450,8 @@ class Converter:
|
||||
lemma(str): the lemma of the word form with the MSD
|
||||
"""
|
||||
|
||||
self.check_valid_msd(msd, False, allow_partial=False)
|
||||
if msd.state != Msd.State.FULL:
|
||||
raise MsdException(f"Msd must be full to be converted to UD.")
|
||||
upos_category, *upos_features = self.mte_to_ud_features[msd.code].split()
|
||||
final_upos = ""
|
||||
|
||||
@@ -496,3 +488,17 @@ class Converter:
|
||||
|
||||
def translate_properties(self, properties, language):
|
||||
return self.msd_to_properties(self.properties_to_msd(properties, language), language)
|
||||
|
||||
def _load_default_specifications():
|
||||
global DEFAULT_SPECIFICATIONS
|
||||
resource = files('conversion_utils.resources').joinpath(JOS_SPECIFICATIONS_PICKLE_RESOURCE)
|
||||
if resource.is_file():
|
||||
try:
|
||||
with resource.open('rb') as pickle_file:
|
||||
DEFAULT_SPECIFICATIONS = pickle.load(pickle_file)
|
||||
except Exception as e:
|
||||
exit('Could not parse specifications pickle file installed.')
|
||||
else:
|
||||
exit("Default specifications not found.")
|
||||
|
||||
_load_default_specifications()
|
||||
Reference in New Issue
Block a user