Replaced JOS XML specifications with preprocessed pickle
This commit is contained in:
parent
eca02ebdd3
commit
a088025026
1
MANIFEST.in
Normal file
1
MANIFEST.in
Normal file
|
@ -0,0 +1 @@
|
||||||
|
include conversion_utils/resources/jos_specifications.pickle
|
|
@ -1,8 +1,12 @@
|
||||||
import lxml.etree as lxml
|
import lxml.etree as lxml
|
||||||
import re
|
import re
|
||||||
|
import pickle
|
||||||
|
import importlib.resources as pkg_resources
|
||||||
|
|
||||||
from conversion_utils.utils import xpath_find, get_xml_id
|
from conversion_utils.utils import xpath_find, get_xml_id
|
||||||
|
|
||||||
|
JOS_SPECIFICATIONS_PICKLE_RESOURCE = 'jos_specifications.pickle'
|
||||||
|
|
||||||
## Positions of lexeme-level features for each category
|
## Positions of lexeme-level features for each category
|
||||||
LEXEME_FEATURE_MAP = {'noun':{1,2},
|
LEXEME_FEATURE_MAP = {'noun':{1,2},
|
||||||
'verb':{1,2},
|
'verb':{1,2},
|
||||||
|
@ -219,8 +223,22 @@ class Msd:
|
||||||
class Converter:
|
class Converter:
|
||||||
"""Converter between Msd and Properties objects."""
|
"""Converter between Msd and Properties objects."""
|
||||||
|
|
||||||
def __init__(self, specifications):
|
def __init__(self, xml_file_name=None):
|
||||||
self.specifications = specifications
|
if (xml_file_name is None):
|
||||||
|
if (pkg_resources.is_resource('conversion_utils.resources', JOS_SPECIFICATIONS_PICKLE_RESOURCE)):
|
||||||
|
try:
|
||||||
|
with pkg_resources.open_binary('conversion_utils.resources', JOS_SPECIFICATIONS_PICKLE_RESOURCE) as pickle_file:
|
||||||
|
self.specifications = pickle.load(pickle_file)
|
||||||
|
except:
|
||||||
|
exit('Could not parse specifications pickle file installed.')
|
||||||
|
else:
|
||||||
|
exit('No pickle installed or xml provided.')
|
||||||
|
else:
|
||||||
|
parser = SpecificationsParser()
|
||||||
|
try:
|
||||||
|
self.specifications = parser.parse(xml_file_name)
|
||||||
|
except:
|
||||||
|
exit('Could not parse specifications xml file provided.')
|
||||||
|
|
||||||
def msd_to_properties(self, msd, language, lemma=None):
|
def msd_to_properties(self, msd, language, lemma=None):
|
||||||
"""Convert Msd to Properties (possibly in the other language)."""
|
"""Convert Msd to Properties (possibly in the other language)."""
|
||||||
|
|
0
conversion_utils/resources/__init__.py
Normal file
0
conversion_utils/resources/__init__.py
Normal file
BIN
conversion_utils/resources/jos_specifications.pickle
Normal file
BIN
conversion_utils/resources/jos_specifications.pickle
Normal file
Binary file not shown.
37867
resources/msd-sl.spc.xml
37867
resources/msd-sl.spc.xml
File diff suppressed because it is too large
Load Diff
13
scripts/install_jos_specifications.py
Normal file
13
scripts/install_jos_specifications.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
import pickle
|
||||||
|
import argparse
|
||||||
|
from conversion_utils.jos_msds_and_properties import SpecificationsParser
|
||||||
|
|
||||||
|
arg_parser = argparse.ArgumentParser(description='Parse source TEI specifications file and save as pickle.')
|
||||||
|
arg_parser.add_argument('-xml', type=str, help='input XML file', required=True)
|
||||||
|
arg_parser.add_argument('-pickle', type=str, help='output pickle file', required=True)
|
||||||
|
arguments = arg_parser.parse_args()
|
||||||
|
|
||||||
|
parser = SpecificationsParser()
|
||||||
|
specifications = parser.parse(arguments.xml)
|
||||||
|
with open(arguments.pickle, 'wb') as pickle_file:
|
||||||
|
pickle.dump(specifications, pickle_file)
|
3
setup.py
3
setup.py
|
@ -6,5 +6,6 @@ setup(name='conversion_utils',
|
||||||
url='https://gitea.cjvt.si/generic/conversion_utils',
|
url='https://gitea.cjvt.si/generic/conversion_utils',
|
||||||
author='Cyprian Laskowski',
|
author='Cyprian Laskowski',
|
||||||
author_email='cyp@cjvt.si',
|
author_email='cyp@cjvt.si',
|
||||||
packages=['conversion_utils'],
|
packages=['conversion_utils', 'conversion_utils.resources'],
|
||||||
|
include_package_data=True,
|
||||||
zip_safe=True)
|
zip_safe=True)
|
||||||
|
|
|
@ -6,10 +6,7 @@ from conversion_utils.jos_msds_and_properties import SpecificationsParser, Conve
|
||||||
class JosMsdToPropertiesTestCase(unittest.TestCase):
|
class JosMsdToPropertiesTestCase(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml')
|
self.converter = Converter()
|
||||||
parser = SpecificationsParser()
|
|
||||||
specifications = parser.parse(specifications_file_name)
|
|
||||||
self.converter = Converter(specifications)
|
|
||||||
|
|
||||||
def test_en_en(self):
|
def test_en_en(self):
|
||||||
properties = self.converter.msd_to_properties(Msd('Ncfpd', 'en'), 'en')
|
properties = self.converter.msd_to_properties(Msd('Ncfpd', 'en'), 'en')
|
||||||
|
|
|
@ -6,10 +6,7 @@ from conversion_utils.jos_msds_and_properties import SpecificationsParser, Conve
|
||||||
class JosPropertiesToMsdTestCase(unittest.TestCase):
|
class JosPropertiesToMsdTestCase(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml')
|
self.converter = Converter()
|
||||||
parser = SpecificationsParser()
|
|
||||||
specifications = parser.parse(specifications_file_name)
|
|
||||||
self.converter = Converter(specifications)
|
|
||||||
|
|
||||||
def test_en_en(self):
|
def test_en_en(self):
|
||||||
msd = self.converter.properties_to_msd(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'en')
|
msd = self.converter.properties_to_msd(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'en')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user