You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
2.3 KiB

import pkg_resources
import pandas
import sys
# msd mappings from slo to ang
5 years ago
class Msdmap:
def __init__(self):
# http://nl.ijs.si/ME/V4/msd/html/msd.categories-sl.html
self.pos_slo_ang = [
("samostalnik", "S", "Noun", "N"),
("glagol", "G", "Verb", "V"),
("pridevnik", "P", "Adjective", "A"),
("prislov", "R", "Adverb", "R"),
("zaimek", "Z", "Pronoun", "P"),
("števnik", "K", "Numeral", "M"),
("predlog", "D", "Adposition", "S"),
("veznik", "V", "Conjunction", "C"),
("členek", "L", "Particle", "Q"),
("medmet", "M", "Interjection", "I"),
("okrajšava", "O", "Abbreviation", "Y"),
("neuvrščeno", "N", "Residual", "X"),
]
table_name = "msd-human-sl.tbl"
5 years ago
table_path = pkg_resources.resource_filename(__name__, table_name)
with open(table_path, "rb") as fp:
# TODO: pandas is awfully slow --- might need optimization
self.msd_table = pandas.read_csv(
fp,
"\t",
names=["id", "eng_msd", "eng_long", "slo_msd", "slo_long", "slo_very_long"]
)
self.msd_table.set_index('slo_msd')
def slo_msd_to_eng_long(self, slo_msd):
5 years ago
# old, slow
# return (self.msd_table[self.msd_table["slo_msd"] == slo_msd]["eng_long"]).values[0]
# return self.msd_table.query("slo_msd == '{}'".format(slo_msd))["eng_long"].values[0]
query = self.msd_table.query("slo_msd == '{}'".format(slo_msd))
if query.empty:
return "No-matching-msd-found"
return query["eng_long"].values[0]
5 years ago
def slo_msd_to_eng_pos(self, slo_msd):
# first letter in slo_msd == slo_pos
return self.pos_slo_ang_map(1, slo_msd[0])[3]
def pos_slo_ang_map(self, col, query):
for pos in self.pos_slo_ang:
if pos[col] == query:
return pos
raise ValueError("Wrong part of speech.")
if __name__ == "__main__":
msdmap = Msdmap()
test_msds = [
"Soser",
"Ppnzmm",
"Gp-d-mz"
]
for msd in test_msds:
print(msd)
print(msdmap.slo_msd_to_eng_long(msd))
5 years ago
print(msdmap.slo_msd_to_eng_pos(msd))
print()