diff --git a/conversion_utils/jos_msds_and_properties.py b/conversion_utils/jos_msds_and_properties.py index 604dd06..e4332b2 100644 --- a/conversion_utils/jos_msds_and_properties.py +++ b/conversion_utils/jos_msds_and_properties.py @@ -297,11 +297,24 @@ class Converter: self.specifications = parser.parse(xml_file_name) except: exit('Could not parse specifications xml file provided.') - + self.mte_to_ud_features = self._parse_msd_ud_conversion(MSD_TO_FEATURES) self.mte_to_ud_features_rules = self._parse_ud_rules(JOS_TO_UD_FEATURES_RULES) self.mte_to_upos_rules = self._parse_ud_rules(JOS_TO_UPOS_RULES) + def _get_partial_msd(self, msd): + properties = self.msd_to_properties(msd, msd.language) + category_char = msd.code[0].lower() + category = self.specifications.find_category_by_code(category_char, msd.language) + category_name = category.names.get(msd.language) + properties = Properties( + category=category_name, + lexeme_feature_map=properties.lexeme_feature_map, + form_feature_map={}, + language=msd.language + ) + return self.properties_to_msd(properties, msd.language).code + def _parse_msd_ud_conversion(self, file_name): """Parse file with direct conversions from English Msd to Universal Dependencies.""" conversion_map = defaultdict() @@ -329,13 +342,12 @@ class Converter: def get_msd_state(self, msd): """Determine if the Msd code is full, partial or unknown.""" - code_map = self.specifications.codes_map[msd.language] - if msd.code in code_map: + if msd.code in self.specifications.codes_map[msd.language]: return MsdState.FULL - for msd_code in code_map: - if msd_code.startswith(msd.code): - return MsdState.PARTIAL - return MsdState.UNKNOWN + elif msd.code in self.specifications.partial_codes_map[msd.language]: + return MsdState.PARTIAL + else: + return MsdState.UNKNOWN def check_valid_msd(self, msd, require_valid_flag, allow_partial=True): """If the Msd code is not valid, raise an exception or give a warning.""" diff --git a/conversion_utils/resources/jos_specifications.pickle b/conversion_utils/resources/jos_specifications.pickle index 6b0fdf0..1fe5f0e 100644 Binary files a/conversion_utils/resources/jos_specifications.pickle and b/conversion_utils/resources/jos_specifications.pickle differ