Resurrected changes lost in merge

multiple_files_conllu_to_tei
Cyprian Laskowski 2 years ago
parent d741a365ab
commit 126d00eba9

@ -12,7 +12,7 @@ class Sentence:
self.no_ud = no_ud
def add_item(self, token, lemma, upos, upos_other, xpos, misc):
self.items.append([token, lemma, upos, upos_other, xpos, misc == "SpaceAfter=No"])
self.items.append([token, lemma, upos, upos_other, xpos, "SpaceAfter=No" in misc.split('|')])
def add_link(self, link_ref, link_type):
self.links.append([link_ref, link_type])
@ -55,11 +55,11 @@ class Sentence:
link_grp = etree.Element('linkGrp')
link_grp.set('corresp', '#'+xml_id)
link_grp.set('targFunc', 'head argument')
link_grp.set('type', 'JOS-SYN')
link_grp.set('type', system.upper() + '-SYN')
for link_id, item in enumerate(self.links):
link_ref, link_type = item
link = etree.Element('link')
link.set('ana', 'jos-syn:' + link_type)
link.set('ana', system + '-syn:' + link_type.replace(':','_'))
if link_ref == u'0':
link.set('target', '#' + xml_id + ' #' + xml_id + '.' + str(link_id + 1))
else:
@ -71,7 +71,7 @@ class Sentence:
class Paragraph:
def __init__(self, _id):
self._id = _id
self._id = _id if _id is not None else 'no-id'
self.sentences = []
def add_sentence(self, sentence):
@ -265,6 +265,8 @@ def convert_file(input_file_name, output_file_name):
tree.write(output_file_name, pretty_print=True, encoding='utf-8')
system = 'jos' # default (TODO: make this cleaner)
if __name__ == '__main__':
import argparse
from glob import glob
@ -273,6 +275,7 @@ if __name__ == '__main__':
parser.add_argument('files', nargs='+', help='CoNNL-U file')
parser.add_argument('-o', '--out-file', dest='out', default=None,
help='Write output to file instead of stdout.')
parser.add_argument('-s', '--system', dest='system', default='jos', choices=['jos', 'ud'])
args = parser.parse_args()
@ -281,6 +284,8 @@ if __name__ == '__main__':
else:
f_out = sys.stdout
system = args.system
for arg in args.files:
filelist = glob(arg)
for f in filelist:

Loading…
Cancel
Save