Ignoring @type=single and added option for --new-tei
This commit is contained in:
parent
fa4479af60
commit
361331515e
|
@ -204,23 +204,36 @@ def file_sentence_generator(et, args):
|
|||
words[sentence.get('id')] = Word.fake_root_word(sentence.get('id'))
|
||||
last_word_id = None
|
||||
|
||||
for w in sentence.iter():
|
||||
if w.tag == 'w':
|
||||
words[w.get('id')] = Word.from_xml(w, do_msd_translate)
|
||||
if use_punctuations:
|
||||
previous_glue = ''
|
||||
last_word_id = None
|
||||
elif w.tag == pc_tag:
|
||||
words[w.get('id')] = Word.pc_word(w, do_msd_translate)
|
||||
if use_punctuations:
|
||||
last_word_id = w.get('id')
|
||||
words[w.get('id')].previous_glue = previous_glue
|
||||
previous_glue = ''
|
||||
elif use_punctuations and w.tag == 'c':
|
||||
# always save previous glue
|
||||
previous_glue = w.text
|
||||
if last_word_id:
|
||||
words[last_word_id].glue += w.text
|
||||
if args.new_tei:
|
||||
for w in sentence.iter():
|
||||
if w.tag == 'w':
|
||||
words[w.get('id')] = Word.from_xml(w, do_msd_translate)
|
||||
if use_punctuations:
|
||||
previous_glue = '' if 'join' in w.attrib and w.get('join') == 'right' else ' '
|
||||
elif w.tag == pc_tag:
|
||||
words[w.get('id')] = Word.pc_word(w, do_msd_translate)
|
||||
if use_punctuations:
|
||||
words[w.get('id')].previous_glue = previous_glue
|
||||
words[w.get('id')].glue = '' if 'join' in w.attrib and w.get('join') == 'right' else ' '
|
||||
previous_glue = '' if 'join' in w.attrib and w.get('join') == 'right' else ' '
|
||||
else:
|
||||
for w in sentence.iter():
|
||||
if w.tag == 'w':
|
||||
words[w.get('id')] = Word.from_xml(w, do_msd_translate)
|
||||
if use_punctuations:
|
||||
previous_glue = ''
|
||||
last_word_id = None
|
||||
elif w.tag == pc_tag:
|
||||
words[w.get('id')] = Word.pc_word(w, do_msd_translate)
|
||||
if use_punctuations:
|
||||
last_word_id = w.get('id')
|
||||
words[w.get('id')].previous_glue = previous_glue
|
||||
previous_glue = ''
|
||||
elif use_punctuations and w.tag == 'c':
|
||||
# always save previous glue
|
||||
previous_glue = w.text
|
||||
if last_word_id:
|
||||
words[last_word_id].glue += w.text
|
||||
|
||||
# for w in sentence.iter("w"):
|
||||
# words[w.get('id')] = Word.from_xml(w, do_msd_translate)
|
||||
|
|
|
@ -115,6 +115,8 @@ def build_structures(args):
|
|||
|
||||
structures = []
|
||||
for structure in et.iter('syntactic_structure'):
|
||||
if structure.attrib['type'] == 'single':
|
||||
continue
|
||||
to_append = SyntacticStructure.from_xml(structure, no_stats)
|
||||
if to_append is None:
|
||||
continue
|
||||
|
|
3
wani.py
3
wani.py
|
@ -160,6 +160,9 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--fixed-restriction-order',
|
||||
help='If used, words have to be in the same order as components.',
|
||||
action='store_true')
|
||||
parser.add_argument('--new-tei',
|
||||
help='Attribute to be used, when using new version of tei. (default=False)',
|
||||
action='store_true')
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user