Replaced JOS XML specifications with preprocessed pickle

This commit is contained in:
Cyprian Laskowski 2021-09-30 00:22:43 +02:00
parent eca02ebdd3
commit a088025026
9 changed files with 38 additions and 37878 deletions

1
MANIFEST.in Normal file
View File

@ -0,0 +1 @@
include conversion_utils/resources/jos_specifications.pickle

View File

@ -1,8 +1,12 @@
import lxml.etree as lxml
import re
import pickle
import importlib.resources as pkg_resources
from conversion_utils.utils import xpath_find, get_xml_id
JOS_SPECIFICATIONS_PICKLE_RESOURCE = 'jos_specifications.pickle'
## Positions of lexeme-level features for each category
LEXEME_FEATURE_MAP = {'noun':{1,2},
'verb':{1,2},
@ -219,8 +223,22 @@ class Msd:
class Converter:
"""Converter between Msd and Properties objects."""
def __init__(self, specifications):
self.specifications = specifications
def __init__(self, xml_file_name=None):
if (xml_file_name is None):
if (pkg_resources.is_resource('conversion_utils.resources', JOS_SPECIFICATIONS_PICKLE_RESOURCE)):
try:
with pkg_resources.open_binary('conversion_utils.resources', JOS_SPECIFICATIONS_PICKLE_RESOURCE) as pickle_file:
self.specifications = pickle.load(pickle_file)
except:
exit('Could not parse specifications pickle file installed.')
else:
exit('No pickle installed or xml provided.')
else:
parser = SpecificationsParser()
try:
self.specifications = parser.parse(xml_file_name)
except:
exit('Could not parse specifications xml file provided.')
def msd_to_properties(self, msd, language, lemma=None):
"""Convert Msd to Properties (possibly in the other language)."""

View File

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,13 @@
import pickle
import argparse
from conversion_utils.jos_msds_and_properties import SpecificationsParser
arg_parser = argparse.ArgumentParser(description='Parse source TEI specifications file and save as pickle.')
arg_parser.add_argument('-xml', type=str, help='input XML file', required=True)
arg_parser.add_argument('-pickle', type=str, help='output pickle file', required=True)
arguments = arg_parser.parse_args()
parser = SpecificationsParser()
specifications = parser.parse(arguments.xml)
with open(arguments.pickle, 'wb') as pickle_file:
pickle.dump(specifications, pickle_file)

View File

@ -6,5 +6,6 @@ setup(name='conversion_utils',
url='https://gitea.cjvt.si/generic/conversion_utils',
author='Cyprian Laskowski',
author_email='cyp@cjvt.si',
packages=['conversion_utils'],
packages=['conversion_utils', 'conversion_utils.resources'],
include_package_data=True,
zip_safe=True)

View File

@ -6,10 +6,7 @@ from conversion_utils.jos_msds_and_properties import SpecificationsParser, Conve
class JosMsdToPropertiesTestCase(unittest.TestCase):
def setUp(self):
specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml')
parser = SpecificationsParser()
specifications = parser.parse(specifications_file_name)
self.converter = Converter(specifications)
self.converter = Converter()
def test_en_en(self):
properties = self.converter.msd_to_properties(Msd('Ncfpd', 'en'), 'en')

View File

@ -6,10 +6,7 @@ from conversion_utils.jos_msds_and_properties import SpecificationsParser, Conve
class JosPropertiesToMsdTestCase(unittest.TestCase):
def setUp(self):
specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml')
parser = SpecificationsParser()
specifications = parser.parse(specifications_file_name)
self.converter = Converter(specifications)
self.converter = Converter()
def test_en_en(self):
msd = self.converter.properties_to_msd(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'en')