msd translate now optional
This commit is contained in:
parent
f89212f7c9
commit
40db51adf1
18
wani.py
18
wani.py
|
@ -710,9 +710,9 @@ def get_msd(comp):
|
|||
raise NotImplementedError("MSD?")
|
||||
|
||||
class Word:
|
||||
def __init__(self, xml):
|
||||
def __init__(self, xml, do_msd_translate):
|
||||
self.lemma = xml.get('lemma')
|
||||
self.msd = MSD_TRANSLATE[get_msd(xml)]
|
||||
self.msd = MSD_TRANSLATE[get_msd(xml)] if do_msd_translate else get_msd(xml)
|
||||
self.id = xml.get('id')
|
||||
self.text = xml.text
|
||||
self.links = defaultdict(list)
|
||||
|
@ -720,9 +720,9 @@ class Word:
|
|||
assert(None not in (self.id, self.lemma, self.msd))
|
||||
|
||||
@staticmethod
|
||||
def pcWord(pc):
|
||||
def pcWord(pc, do_msd_translate):
|
||||
pc.set('lemma', pc.text)
|
||||
return Word(pc)
|
||||
return Word(pc, do_msd_translate)
|
||||
|
||||
def add_link(self, link, to):
|
||||
self.links[link].append(to)
|
||||
|
@ -742,12 +742,13 @@ def is_root_id(id_):
|
|||
def load_files(args):
|
||||
filenames = args.input
|
||||
skip_id_check = args.skip_id_check
|
||||
do_msd_translate = not args.no_msd_translate
|
||||
|
||||
for fname in filenames:
|
||||
yield load_tei_file(fname, skip_id_check)
|
||||
yield load_tei_file(fname, skip_id_check, do_msd_translate)
|
||||
|
||||
|
||||
def load_tei_file(filename, skip_id_check):
|
||||
def load_tei_file(filename, skip_id_check, do_msd_translate):
|
||||
logging.info("LOADING FILE: {}".format(filename))
|
||||
|
||||
with open(filename, 'r') as fp:
|
||||
|
@ -757,9 +758,9 @@ def load_tei_file(filename, skip_id_check):
|
|||
|
||||
words = {}
|
||||
for w in et.iter("w"):
|
||||
words[w.get('id')] = Word(w)
|
||||
words[w.get('id')] = Word(w, do_msd_translate)
|
||||
for pc in et.iter("pc"):
|
||||
words[pc.get('id')] = Word.pcWord(pc)
|
||||
words[pc.get('id')] = Word.pcWord(pc, do_msd_translate)
|
||||
|
||||
for l in et.iter("link"):
|
||||
if 'dep' in l.keys():
|
||||
|
@ -963,6 +964,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument('input', help='input xml file in `ssj500k form`, can list more than one', nargs='+')
|
||||
parser.add_argument('--output', help='Output file (if none given, then output to stdout)')
|
||||
|
||||
parser.add_argument('--no-msd-translate', help='MSDs are translated from slovene to english by default', action='store_true')
|
||||
parser.add_argument('--skip-id-check', help='Skips checks for ids of <w> and <pc>, if they are in correct format', action='store_true')
|
||||
parser.add_argument('--lemma-only', help='Will not write word ids, forms and msds in output', action='store_true')
|
||||
parser.add_argument('--without-rep', help='Will not write representtaions in output', action='store_true')
|
||||
|
|
Loading…
Reference in New Issue
Block a user