|
|
|
@ -12,7 +12,7 @@ class Sentence:
|
|
|
|
|
self.no_ud = no_ud
|
|
|
|
|
|
|
|
|
|
def add_item(self, token, lemma, upos, upos_other, xpos, misc):
|
|
|
|
|
self.items.append([token, lemma, upos, upos_other, xpos, misc == "SpaceAfter=No"])
|
|
|
|
|
self.items.append([token, lemma, upos, upos_other, xpos, "SpaceAfter=No" in misc.split('|')])
|
|
|
|
|
|
|
|
|
|
def add_link(self, link_ref, link_type):
|
|
|
|
|
self.links.append([link_ref, link_type])
|
|
|
|
@ -55,11 +55,11 @@ class Sentence:
|
|
|
|
|
link_grp = etree.Element('linkGrp')
|
|
|
|
|
link_grp.set('corresp', '#'+xml_id)
|
|
|
|
|
link_grp.set('targFunc', 'head argument')
|
|
|
|
|
link_grp.set('type', 'JOS-SYN')
|
|
|
|
|
link_grp.set('type', system.upper() + '-SYN')
|
|
|
|
|
for link_id, item in enumerate(self.links):
|
|
|
|
|
link_ref, link_type = item
|
|
|
|
|
link = etree.Element('link')
|
|
|
|
|
link.set('ana', 'jos-syn:' + link_type)
|
|
|
|
|
link.set('ana', system + '-syn:' + link_type.replace(':','_'))
|
|
|
|
|
if link_ref == u'0':
|
|
|
|
|
link.set('target', '#' + xml_id + ' #' + xml_id + '.' + str(link_id + 1))
|
|
|
|
|
else:
|
|
|
|
@ -71,7 +71,7 @@ class Sentence:
|
|
|
|
|
|
|
|
|
|
class Paragraph:
|
|
|
|
|
def __init__(self, _id):
|
|
|
|
|
self._id = _id
|
|
|
|
|
self._id = _id if _id is not None else 'no-id'
|
|
|
|
|
self.sentences = []
|
|
|
|
|
|
|
|
|
|
def add_sentence(self, sentence):
|
|
|
|
@ -265,6 +265,8 @@ def convert_file(input_file_name, output_file_name):
|
|
|
|
|
tree.write(output_file_name, pretty_print=True, encoding='utf-8')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
system = 'jos' # default (TODO: make this cleaner)
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
import argparse
|
|
|
|
|
from glob import glob
|
|
|
|
@ -273,6 +275,7 @@ if __name__ == '__main__':
|
|
|
|
|
parser.add_argument('files', nargs='+', help='CoNNL-U file')
|
|
|
|
|
parser.add_argument('-o', '--out-file', dest='out', default=None,
|
|
|
|
|
help='Write output to file instead of stdout.')
|
|
|
|
|
parser.add_argument('-s', '--system', dest='system', default='jos', choices=['jos', 'ud'])
|
|
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
@ -281,6 +284,8 @@ if __name__ == '__main__':
|
|
|
|
|
else:
|
|
|
|
|
f_out = sys.stdout
|
|
|
|
|
|
|
|
|
|
system = args.system
|
|
|
|
|
|
|
|
|
|
for arg in args.files:
|
|
|
|
|
filelist = glob(arg)
|
|
|
|
|
for f in filelist:
|
|
|
|
|