Specifications are loaded on import
This commit is contained in:
@@ -4,7 +4,7 @@ import lxml.etree as lxml
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from importlib_resources import files
|
from importlib_resources import files
|
||||||
|
|
||||||
from enum import Enum
|
from enum import IntEnum
|
||||||
|
|
||||||
from conversion_utils.utils import xpath_find, get_xml_id
|
from conversion_utils.utils import xpath_find, get_xml_id
|
||||||
|
|
||||||
@@ -62,10 +62,10 @@ LEVEL_EXCEPTIONS = {('pronoun', 2, 'čezme'), ('zaimek', 2, 'čezme'),
|
|||||||
('pronoun', 8, 'se'), ('zaimek', 8, 'se'),
|
('pronoun', 8, 'se'), ('zaimek', 8, 'se'),
|
||||||
('pronoun', 8, 'ti'), ('zaimek', 8, 'ti')}
|
('pronoun', 8, 'ti'), ('zaimek', 8, 'ti')}
|
||||||
|
|
||||||
class MsdState(Enum):
|
class MsdState(IntEnum):
|
||||||
FULL = 1
|
UNKNOWN = -1
|
||||||
PARTIAL = 2
|
PARTIAL = 1
|
||||||
UNKNOWN = 3
|
FULL = 2
|
||||||
|
|
||||||
class MsdException(Exception):
|
class MsdException(Exception):
|
||||||
pass
|
pass
|
||||||
@@ -266,9 +266,35 @@ class UD:
|
|||||||
class Msd:
|
class Msd:
|
||||||
"""JOS msd."""
|
"""JOS msd."""
|
||||||
|
|
||||||
def __init__(self, code, language):
|
class State(IntEnum):
|
||||||
|
UNKNOWN = -1
|
||||||
|
PARTIAL = 1
|
||||||
|
FULL = 2
|
||||||
|
|
||||||
|
def __init__(self, code, language, expected_state=State.FULL, require_valid=False):
|
||||||
self.code = code
|
self.code = code
|
||||||
self.language = language
|
self.language = language
|
||||||
|
self.expected_state = expected_state
|
||||||
|
self.require_valid = require_valid
|
||||||
|
self.state = self._validate_and_get_state()
|
||||||
|
|
||||||
|
def _validate_and_get_state(self):
|
||||||
|
states = set()
|
||||||
|
if self.code in DEFAULT_SPECIFICATIONS.codes_map[self.language]:
|
||||||
|
states.add(self.State.FULL)
|
||||||
|
if self.code in DEFAULT_SPECIFICATIONS.partial_codes_map[self.language]:
|
||||||
|
states.add(self.State.PARTIAL)
|
||||||
|
if len(states) == 0:
|
||||||
|
states.add(self.State.UNKNOWN)
|
||||||
|
if self.expected_state not in states:
|
||||||
|
if self.require_valid:
|
||||||
|
raise MsdException(f"Given msd '{self.codecode}' is '{self.state.name}', but expected state is '{self.expected_state.name}'.")
|
||||||
|
else:
|
||||||
|
if self.state == self.State.UNKNOWN:
|
||||||
|
print(f"[WARN] The Msd '{self.code}' is unknown.")
|
||||||
|
else:
|
||||||
|
print(f"[WARN] The Msd '{self.code}' is unknown for expected state '{self.expected_state.name}'.")
|
||||||
|
return max(states)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'code={code}, language={language}'.format(code=self.code, language=self.language)
|
return 'code={code}, language={language}'.format(code=self.code, language=self.language)
|
||||||
@@ -282,15 +308,7 @@ class Converter:
|
|||||||
|
|
||||||
def __init__(self, xml_file_name=None):
|
def __init__(self, xml_file_name=None):
|
||||||
if (xml_file_name is None):
|
if (xml_file_name is None):
|
||||||
resource = files('conversion_utils.resources').joinpath(JOS_SPECIFICATIONS_PICKLE_RESOURCE)
|
self.specifications = DEFAULT_SPECIFICATIONS
|
||||||
if (resource.is_file()):
|
|
||||||
try:
|
|
||||||
with resource.open('rb') as pickle_file:
|
|
||||||
self.specifications = pickle.load(pickle_file)
|
|
||||||
except:
|
|
||||||
exit('Could not parse specifications pickle file installed.')
|
|
||||||
else:
|
|
||||||
exit('No pickle installed or xml provided.')
|
|
||||||
else:
|
else:
|
||||||
parser = SpecificationsParser()
|
parser = SpecificationsParser()
|
||||||
try:
|
try:
|
||||||
@@ -313,7 +331,7 @@ class Converter:
|
|||||||
form_feature_map={},
|
form_feature_map={},
|
||||||
language=msd.language
|
language=msd.language
|
||||||
)
|
)
|
||||||
return self.properties_to_msd(properties, msd.language).code
|
return self.properties_to_msd(properties, msd.language, expected_state=Msd.State.PARTIAL).code
|
||||||
|
|
||||||
def _parse_msd_ud_conversion(self, file_name):
|
def _parse_msd_ud_conversion(self, file_name):
|
||||||
"""Parse file with direct conversions from English Msd to Universal Dependencies."""
|
"""Parse file with direct conversions from English Msd to Universal Dependencies."""
|
||||||
@@ -336,32 +354,7 @@ class Converter:
|
|||||||
all_rules[priority].append(current_rules)
|
all_rules[priority].append(current_rules)
|
||||||
return all_rules
|
return all_rules
|
||||||
|
|
||||||
def is_valid_msd(self, msd):
|
def msd_to_properties(self, msd, language, lemma=None, warn_level_flag=False):
|
||||||
"""Verify if the Msd code is in the standard JOS set."""
|
|
||||||
return msd.code in self.specifications.codes_map[msd.language]
|
|
||||||
|
|
||||||
def get_msd_state(self, msd):
|
|
||||||
"""Determine if the Msd code is full, partial or unknown."""
|
|
||||||
if msd.code in self.specifications.codes_map[msd.language]:
|
|
||||||
return MsdState.FULL
|
|
||||||
elif msd.code in self.specifications.partial_codes_map[msd.language]:
|
|
||||||
return MsdState.PARTIAL
|
|
||||||
else:
|
|
||||||
return MsdState.UNKNOWN
|
|
||||||
|
|
||||||
def check_valid_msd(self, msd, require_valid_flag, allow_partial=True):
|
|
||||||
"""If the Msd code is not valid, raise an exception or give a warning."""
|
|
||||||
msd_state = self.get_msd_state(msd)
|
|
||||||
if msd_state == MsdState.UNKNOWN:
|
|
||||||
message = f"The msd '{msd.code}' is unknown"
|
|
||||||
if require_valid_flag:
|
|
||||||
raise MsdException(message)
|
|
||||||
else:
|
|
||||||
print('[WARN] ' + message)
|
|
||||||
if msd_state == MsdState.PARTIAL and not allow_partial:
|
|
||||||
raise MsdException(f"Partial msd '{msd.code}' is not allowed. Full msd is required.")
|
|
||||||
|
|
||||||
def msd_to_properties(self, msd, language, lemma=None, require_valid_flag=False, warn_level_flag=False):
|
|
||||||
"""Convert Msd to Properties.
|
"""Convert Msd to Properties.
|
||||||
|
|
||||||
The language of the generated Properties is specified and can differ from the Msd language.
|
The language of the generated Properties is specified and can differ from the Msd language.
|
||||||
@@ -387,7 +380,6 @@ class Converter:
|
|||||||
Properties: the result of the conversion of the Msd in the language requested
|
Properties: the result of the conversion of the Msd in the language requested
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self.check_valid_msd(msd, require_valid_flag)
|
|
||||||
category_char = msd.code[0].lower()
|
category_char = msd.code[0].lower()
|
||||||
value_chars = msd.code[1:]
|
value_chars = msd.code[1:]
|
||||||
category = self.specifications.find_category_by_code(category_char, msd.language)
|
category = self.specifications.find_category_by_code(category_char, msd.language)
|
||||||
@@ -413,7 +405,7 @@ class Converter:
|
|||||||
form_feature_map[feature_name] = feature_value
|
form_feature_map[feature_name] = feature_value
|
||||||
return Properties(category_name, lexeme_feature_map, form_feature_map, language)
|
return Properties(category_name, lexeme_feature_map, form_feature_map, language)
|
||||||
|
|
||||||
def properties_to_msd(self, properties, language, require_valid_flag=False):
|
def properties_to_msd(self, properties, language, expected_state=Msd.State.FULL):
|
||||||
"""Convert Properties to Msd.
|
"""Convert Properties to Msd.
|
||||||
|
|
||||||
The language of the generated Msd is specified and can differ from the Properties language.
|
The language of the generated Msd is specified and can differ from the Properties language.
|
||||||
@@ -445,8 +437,7 @@ class Converter:
|
|||||||
msd_code += '-'
|
msd_code += '-'
|
||||||
i += 1
|
i += 1
|
||||||
msd_code += position_map[position]
|
msd_code += position_map[position]
|
||||||
msd = Msd(msd_code, language)
|
msd = Msd(msd_code, language, expected_state=expected_state)
|
||||||
self.check_valid_msd(msd, require_valid_flag)
|
|
||||||
return msd
|
return msd
|
||||||
|
|
||||||
def msd_to_ud(self, msd, lemma):
|
def msd_to_ud(self, msd, lemma):
|
||||||
@@ -459,7 +450,8 @@ class Converter:
|
|||||||
lemma(str): the lemma of the word form with the MSD
|
lemma(str): the lemma of the word form with the MSD
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.check_valid_msd(msd, False, allow_partial=False)
|
if msd.state != Msd.State.FULL:
|
||||||
|
raise MsdException(f"Msd must be full to be converted to UD.")
|
||||||
upos_category, *upos_features = self.mte_to_ud_features[msd.code].split()
|
upos_category, *upos_features = self.mte_to_ud_features[msd.code].split()
|
||||||
final_upos = ""
|
final_upos = ""
|
||||||
|
|
||||||
@@ -496,3 +488,17 @@ class Converter:
|
|||||||
|
|
||||||
def translate_properties(self, properties, language):
|
def translate_properties(self, properties, language):
|
||||||
return self.msd_to_properties(self.properties_to_msd(properties, language), language)
|
return self.msd_to_properties(self.properties_to_msd(properties, language), language)
|
||||||
|
|
||||||
|
def _load_default_specifications():
|
||||||
|
global DEFAULT_SPECIFICATIONS
|
||||||
|
resource = files('conversion_utils.resources').joinpath(JOS_SPECIFICATIONS_PICKLE_RESOURCE)
|
||||||
|
if resource.is_file():
|
||||||
|
try:
|
||||||
|
with resource.open('rb') as pickle_file:
|
||||||
|
DEFAULT_SPECIFICATIONS = pickle.load(pickle_file)
|
||||||
|
except Exception as e:
|
||||||
|
exit('Could not parse specifications pickle file installed.')
|
||||||
|
else:
|
||||||
|
exit("Default specifications not found.")
|
||||||
|
|
||||||
|
_load_default_specifications()
|
||||||
Reference in New Issue
Block a user