Replaced JOS XML specifications with preprocessed pickle
This commit is contained in:
parent
eca02ebdd3
commit
a088025026
1
MANIFEST.in
Normal file
1
MANIFEST.in
Normal file
|
@ -0,0 +1 @@
|
|||
include conversion_utils/resources/jos_specifications.pickle
|
|
@ -1,8 +1,12 @@
|
|||
import lxml.etree as lxml
|
||||
import re
|
||||
import pickle
|
||||
import importlib.resources as pkg_resources
|
||||
|
||||
from conversion_utils.utils import xpath_find, get_xml_id
|
||||
|
||||
JOS_SPECIFICATIONS_PICKLE_RESOURCE = 'jos_specifications.pickle'
|
||||
|
||||
## Positions of lexeme-level features for each category
|
||||
LEXEME_FEATURE_MAP = {'noun':{1,2},
|
||||
'verb':{1,2},
|
||||
|
@ -219,8 +223,22 @@ class Msd:
|
|||
class Converter:
|
||||
"""Converter between Msd and Properties objects."""
|
||||
|
||||
def __init__(self, specifications):
|
||||
self.specifications = specifications
|
||||
def __init__(self, xml_file_name=None):
|
||||
if (xml_file_name is None):
|
||||
if (pkg_resources.is_resource('conversion_utils.resources', JOS_SPECIFICATIONS_PICKLE_RESOURCE)):
|
||||
try:
|
||||
with pkg_resources.open_binary('conversion_utils.resources', JOS_SPECIFICATIONS_PICKLE_RESOURCE) as pickle_file:
|
||||
self.specifications = pickle.load(pickle_file)
|
||||
except:
|
||||
exit('Could not parse specifications pickle file installed.')
|
||||
else:
|
||||
exit('No pickle installed or xml provided.')
|
||||
else:
|
||||
parser = SpecificationsParser()
|
||||
try:
|
||||
self.specifications = parser.parse(xml_file_name)
|
||||
except:
|
||||
exit('Could not parse specifications xml file provided.')
|
||||
|
||||
def msd_to_properties(self, msd, language, lemma=None):
|
||||
"""Convert Msd to Properties (possibly in the other language)."""
|
||||
|
|
0
conversion_utils/resources/__init__.py
Normal file
0
conversion_utils/resources/__init__.py
Normal file
BIN
conversion_utils/resources/jos_specifications.pickle
Normal file
BIN
conversion_utils/resources/jos_specifications.pickle
Normal file
Binary file not shown.
37867
resources/msd-sl.spc.xml
37867
resources/msd-sl.spc.xml
File diff suppressed because it is too large
Load Diff
13
scripts/install_jos_specifications.py
Normal file
13
scripts/install_jos_specifications.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
import pickle
|
||||
import argparse
|
||||
from conversion_utils.jos_msds_and_properties import SpecificationsParser
|
||||
|
||||
arg_parser = argparse.ArgumentParser(description='Parse source TEI specifications file and save as pickle.')
|
||||
arg_parser.add_argument('-xml', type=str, help='input XML file', required=True)
|
||||
arg_parser.add_argument('-pickle', type=str, help='output pickle file', required=True)
|
||||
arguments = arg_parser.parse_args()
|
||||
|
||||
parser = SpecificationsParser()
|
||||
specifications = parser.parse(arguments.xml)
|
||||
with open(arguments.pickle, 'wb') as pickle_file:
|
||||
pickle.dump(specifications, pickle_file)
|
3
setup.py
3
setup.py
|
@ -6,5 +6,6 @@ setup(name='conversion_utils',
|
|||
url='https://gitea.cjvt.si/generic/conversion_utils',
|
||||
author='Cyprian Laskowski',
|
||||
author_email='cyp@cjvt.si',
|
||||
packages=['conversion_utils'],
|
||||
packages=['conversion_utils', 'conversion_utils.resources'],
|
||||
include_package_data=True,
|
||||
zip_safe=True)
|
||||
|
|
|
@ -6,10 +6,7 @@ from conversion_utils.jos_msds_and_properties import SpecificationsParser, Conve
|
|||
class JosMsdToPropertiesTestCase(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml')
|
||||
parser = SpecificationsParser()
|
||||
specifications = parser.parse(specifications_file_name)
|
||||
self.converter = Converter(specifications)
|
||||
self.converter = Converter()
|
||||
|
||||
def test_en_en(self):
|
||||
properties = self.converter.msd_to_properties(Msd('Ncfpd', 'en'), 'en')
|
||||
|
|
|
@ -6,10 +6,7 @@ from conversion_utils.jos_msds_and_properties import SpecificationsParser, Conve
|
|||
class JosPropertiesToMsdTestCase(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
specifications_file_name = os.path.join(os.path.dirname(__file__), '../resources/msd-sl.spc.xml')
|
||||
parser = SpecificationsParser()
|
||||
specifications = parser.parse(specifications_file_name)
|
||||
self.converter = Converter(specifications)
|
||||
self.converter = Converter()
|
||||
|
||||
def test_en_en(self):
|
||||
msd = self.converter.properties_to_msd(Properties('noun', {'type':'common', 'gender':'feminine'}, {'number':'dual', 'case':'nominative'}, 'en'), 'en')
|
||||
|
|
Loading…
Reference in New Issue
Block a user