forked from kristjan/cjvt-srl-tagging
connl2009 output for kres
This commit is contained in:
0
tools/parser/msd/__init__.py
Normal file
0
tools/parser/msd/__init__.py
Normal file
1902
tools/parser/msd/msd-human-sl.tbl
Normal file
1902
tools/parser/msd/msd-human-sl.tbl
Normal file
File diff suppressed because it is too large
Load Diff
60
tools/parser/msd/msdmap.py
Normal file
60
tools/parser/msd/msdmap.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import pkg_resources
|
||||
import pandas
|
||||
import sys
|
||||
|
||||
# msd mappings from slo to ang
|
||||
class Msdmap():
|
||||
def __init__(self):
|
||||
# http://nl.ijs.si/ME/V4/msd/html/msd.categories-sl.html
|
||||
self.pos_slo_ang = [
|
||||
("samostalnik", "S", "Noun", "N"),
|
||||
("glagol", "G", "Verb", "V"),
|
||||
("pridevnik", "P", "Adjective", "A"),
|
||||
("prislov", "R", "Adverb", "R"),
|
||||
("zaimek", "Z", "Pronoun", "P"),
|
||||
("števnik", "K", "Numeral", "M"),
|
||||
("predlog", "D", "Adposition", "S"),
|
||||
("veznik", "V", "Conjunction", "C"),
|
||||
("členek", "L", "Particle", "Q"),
|
||||
("medmet", "M", "Interjection", "I"),
|
||||
("okrajšava", "O", "Abbreviation", "Y"),
|
||||
("neuvrščeno", "N", "Residual", "X"),
|
||||
]
|
||||
|
||||
table_name = "msd-human-sl.tbl"
|
||||
table_path = pkg_resources.resource_filename(__name__, "msd/" + table_name)
|
||||
self.msd_table = pandas.read_csv(
|
||||
table_name,
|
||||
"\t",
|
||||
names=["id", "eng_msd", "eng_long", "slo_msd", "slo_long", "slo_very_long"]
|
||||
)
|
||||
|
||||
def slo_msd_to_eng_long(self, slo_msd):
|
||||
return (self.msd_table[self.msd_table["slo_msd"] == slo_msd]["eng_long"]).values[0]
|
||||
|
||||
def pos_slo_ang_map(self, col, query):
|
||||
for pos in self.pos_slo_ang:
|
||||
if pos[col] == query:
|
||||
return pos
|
||||
raise ValueError("Wrong part of speech.")
|
||||
|
||||
def msd_from_slo(self, msd):
|
||||
pos = self.pos_slo_ang_map(1, msd[0])
|
||||
category = pos[2]
|
||||
attr = [self.pos_val_map(category, 1, m)
|
||||
for m in msd[1:] if m != "-"]
|
||||
return (pos, attr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
msdmap = Msdmap()
|
||||
test_msds = [
|
||||
"Soser",
|
||||
"Ppnzmm",
|
||||
"Gp-d-mz"
|
||||
]
|
||||
|
||||
for msd in test_msds:
|
||||
print(msd)
|
||||
print(msdmap.slo_msd_to_eng_long(msd))
|
||||
print()
|
||||
Reference in New Issue
Block a user