|
|
|
@ -53,12 +53,16 @@ LEVEL_EXCEPTIONS = {('pronoun', 2, 'čezme'), ('zaimek', 2, 'čezme'),
|
|
|
|
|
class Specifications:
|
|
|
|
|
"""JOS specifications with list of all word categories."""
|
|
|
|
|
|
|
|
|
|
def __init__(self, *categories):
|
|
|
|
|
self.categories = list(categories)
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.categories = []
|
|
|
|
|
self.codes_map = {'en':set(), 'sl':set()}
|
|
|
|
|
|
|
|
|
|
def add_category(self, category):
|
|
|
|
|
self.categories.append(category)
|
|
|
|
|
|
|
|
|
|
def add_code(self, code, language):
|
|
|
|
|
self.codes_map[language].add(code)
|
|
|
|
|
|
|
|
|
|
def find_category_by_code(self, char, language):
|
|
|
|
|
return next((category for category in self.categories if category.codes.get(language) == char), None)
|
|
|
|
|
|
|
|
|
@ -145,10 +149,17 @@ class SpecificationsParser:
|
|
|
|
|
|
|
|
|
|
def parse(self, file_name):
|
|
|
|
|
root = lxml.parse(file_name).getroot()
|
|
|
|
|
specifications = Specifications()
|
|
|
|
|
div_elements = xpath_find(root, 'tei:div')
|
|
|
|
|
specifications = Specifications()
|
|
|
|
|
for div_element in div_elements:
|
|
|
|
|
if (re.match(r'^msd\..-sl', get_xml_id(div_element))):
|
|
|
|
|
xml_id = get_xml_id(div_element)
|
|
|
|
|
if (xml_id == 'msd.msds-sl'):
|
|
|
|
|
msd_elements = xpath_find(div_element, 'tei:table/tei:row[@role="msd"]')
|
|
|
|
|
for msd_element in msd_elements:
|
|
|
|
|
msd_codes = self.get_cell_pair(msd_element, 'msd')
|
|
|
|
|
specifications.add_code(msd_codes.get('en').capitalize(), 'en')
|
|
|
|
|
specifications.add_code(msd_codes.get('sl').capitalize(), 'sl')
|
|
|
|
|
elif (re.match(r'^msd\..-sl', xml_id)):
|
|
|
|
|
category_element = xpath_find(div_element, 'tei:table/tei:row[@role="type"]')[0]
|
|
|
|
|
category_names = self.get_cell_pair(category_element, 'value')
|
|
|
|
|
category_codes = self.get_cell_pair(category_element, 'code')
|
|
|
|
@ -214,6 +225,9 @@ class Converter:
|
|
|
|
|
def msd_to_properties(self, msd, language, lemma=None):
|
|
|
|
|
"""Convert Msd to Properties (possibly in the other language)."""
|
|
|
|
|
|
|
|
|
|
if (msd.code not in self.specifications.codes_map[msd.language]):
|
|
|
|
|
exit('[ERROR] msd {} is unknown'.format(msd.code))
|
|
|
|
|
|
|
|
|
|
category_char = msd.code[0].lower()
|
|
|
|
|
value_chars = msd.code[1:]
|
|
|
|
|
category = self.specifications.find_category_by_code(category_char, msd.language)
|
|
|
|
|