Resurrected changes lost in merge

This commit is contained in:
Cyprian Laskowski 2021-12-07 22:36:10 +01:00
parent d741a365ab
commit 126d00eba9

View File

@ -12,7 +12,7 @@ class Sentence:
self.no_ud = no_ud self.no_ud = no_ud
def add_item(self, token, lemma, upos, upos_other, xpos, misc): def add_item(self, token, lemma, upos, upos_other, xpos, misc):
self.items.append([token, lemma, upos, upos_other, xpos, misc == "SpaceAfter=No"]) self.items.append([token, lemma, upos, upos_other, xpos, "SpaceAfter=No" in misc.split('|')])
def add_link(self, link_ref, link_type): def add_link(self, link_ref, link_type):
self.links.append([link_ref, link_type]) self.links.append([link_ref, link_type])
@ -55,11 +55,11 @@ class Sentence:
link_grp = etree.Element('linkGrp') link_grp = etree.Element('linkGrp')
link_grp.set('corresp', '#'+xml_id) link_grp.set('corresp', '#'+xml_id)
link_grp.set('targFunc', 'head argument') link_grp.set('targFunc', 'head argument')
link_grp.set('type', 'JOS-SYN') link_grp.set('type', system.upper() + '-SYN')
for link_id, item in enumerate(self.links): for link_id, item in enumerate(self.links):
link_ref, link_type = item link_ref, link_type = item
link = etree.Element('link') link = etree.Element('link')
link.set('ana', 'jos-syn:' + link_type) link.set('ana', system + '-syn:' + link_type.replace(':','_'))
if link_ref == u'0': if link_ref == u'0':
link.set('target', '#' + xml_id + ' #' + xml_id + '.' + str(link_id + 1)) link.set('target', '#' + xml_id + ' #' + xml_id + '.' + str(link_id + 1))
else: else:
@ -71,7 +71,7 @@ class Sentence:
class Paragraph: class Paragraph:
def __init__(self, _id): def __init__(self, _id):
self._id = _id self._id = _id if _id is not None else 'no-id'
self.sentences = [] self.sentences = []
def add_sentence(self, sentence): def add_sentence(self, sentence):
@ -265,6 +265,8 @@ def convert_file(input_file_name, output_file_name):
tree.write(output_file_name, pretty_print=True, encoding='utf-8') tree.write(output_file_name, pretty_print=True, encoding='utf-8')
system = 'jos' # default (TODO: make this cleaner)
if __name__ == '__main__': if __name__ == '__main__':
import argparse import argparse
from glob import glob from glob import glob
@ -273,6 +275,7 @@ if __name__ == '__main__':
parser.add_argument('files', nargs='+', help='CoNNL-U file') parser.add_argument('files', nargs='+', help='CoNNL-U file')
parser.add_argument('-o', '--out-file', dest='out', default=None, parser.add_argument('-o', '--out-file', dest='out', default=None,
help='Write output to file instead of stdout.') help='Write output to file instead of stdout.')
parser.add_argument('-s', '--system', dest='system', default='jos', choices=['jos', 'ud'])
args = parser.parse_args() args = parser.parse_args()
@ -281,6 +284,8 @@ if __name__ == '__main__':
else: else:
f_out = sys.stdout f_out = sys.stdout
system = args.system
for arg in args.files: for arg in args.files:
filelist = glob(arg) filelist = glob(arg)
for f in filelist: for f in filelist: