You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
61 lines
1.9 KiB
61 lines
1.9 KiB
import pkg_resources
|
|
import pandas
|
|
import sys
|
|
|
|
# msd mappings from slo to ang
|
|
class Msdmap():
|
|
def __init__(self):
|
|
# http://nl.ijs.si/ME/V4/msd/html/msd.categories-sl.html
|
|
self.pos_slo_ang = [
|
|
("samostalnik", "S", "Noun", "N"),
|
|
("glagol", "G", "Verb", "V"),
|
|
("pridevnik", "P", "Adjective", "A"),
|
|
("prislov", "R", "Adverb", "R"),
|
|
("zaimek", "Z", "Pronoun", "P"),
|
|
("števnik", "K", "Numeral", "M"),
|
|
("predlog", "D", "Adposition", "S"),
|
|
("veznik", "V", "Conjunction", "C"),
|
|
("členek", "L", "Particle", "Q"),
|
|
("medmet", "M", "Interjection", "I"),
|
|
("okrajšava", "O", "Abbreviation", "Y"),
|
|
("neuvrščeno", "N", "Residual", "X"),
|
|
]
|
|
|
|
table_name = "msd-human-sl.tbl"
|
|
table_path = pkg_resources.resource_filename(__name__, "msd/" + table_name)
|
|
self.msd_table = pandas.read_csv(
|
|
table_name,
|
|
"\t",
|
|
names=["id", "eng_msd", "eng_long", "slo_msd", "slo_long", "slo_very_long"]
|
|
)
|
|
|
|
def slo_msd_to_eng_long(self, slo_msd):
|
|
return (self.msd_table[self.msd_table["slo_msd"] == slo_msd]["eng_long"]).values[0]
|
|
|
|
def pos_slo_ang_map(self, col, query):
|
|
for pos in self.pos_slo_ang:
|
|
if pos[col] == query:
|
|
return pos
|
|
raise ValueError("Wrong part of speech.")
|
|
|
|
def msd_from_slo(self, msd):
|
|
pos = self.pos_slo_ang_map(1, msd[0])
|
|
category = pos[2]
|
|
attr = [self.pos_val_map(category, 1, m)
|
|
for m in msd[1:] if m != "-"]
|
|
return (pos, attr)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
msdmap = Msdmap()
|
|
test_msds = [
|
|
"Soser",
|
|
"Ppnzmm",
|
|
"Gp-d-mz"
|
|
]
|
|
|
|
for msd in test_msds:
|
|
print(msd)
|
|
print(msdmap.slo_msd_to_eng_long(msd))
|
|
print()
|