Ignoring @type=single and added option for --new-tei
This commit is contained in:
parent
fa4479af60
commit
361331515e
|
@ -204,23 +204,36 @@ def file_sentence_generator(et, args):
|
||||||
words[sentence.get('id')] = Word.fake_root_word(sentence.get('id'))
|
words[sentence.get('id')] = Word.fake_root_word(sentence.get('id'))
|
||||||
last_word_id = None
|
last_word_id = None
|
||||||
|
|
||||||
for w in sentence.iter():
|
if args.new_tei:
|
||||||
if w.tag == 'w':
|
for w in sentence.iter():
|
||||||
words[w.get('id')] = Word.from_xml(w, do_msd_translate)
|
if w.tag == 'w':
|
||||||
if use_punctuations:
|
words[w.get('id')] = Word.from_xml(w, do_msd_translate)
|
||||||
previous_glue = ''
|
if use_punctuations:
|
||||||
last_word_id = None
|
previous_glue = '' if 'join' in w.attrib and w.get('join') == 'right' else ' '
|
||||||
elif w.tag == pc_tag:
|
elif w.tag == pc_tag:
|
||||||
words[w.get('id')] = Word.pc_word(w, do_msd_translate)
|
words[w.get('id')] = Word.pc_word(w, do_msd_translate)
|
||||||
if use_punctuations:
|
if use_punctuations:
|
||||||
last_word_id = w.get('id')
|
words[w.get('id')].previous_glue = previous_glue
|
||||||
words[w.get('id')].previous_glue = previous_glue
|
words[w.get('id')].glue = '' if 'join' in w.attrib and w.get('join') == 'right' else ' '
|
||||||
previous_glue = ''
|
previous_glue = '' if 'join' in w.attrib and w.get('join') == 'right' else ' '
|
||||||
elif use_punctuations and w.tag == 'c':
|
else:
|
||||||
# always save previous glue
|
for w in sentence.iter():
|
||||||
previous_glue = w.text
|
if w.tag == 'w':
|
||||||
if last_word_id:
|
words[w.get('id')] = Word.from_xml(w, do_msd_translate)
|
||||||
words[last_word_id].glue += w.text
|
if use_punctuations:
|
||||||
|
previous_glue = ''
|
||||||
|
last_word_id = None
|
||||||
|
elif w.tag == pc_tag:
|
||||||
|
words[w.get('id')] = Word.pc_word(w, do_msd_translate)
|
||||||
|
if use_punctuations:
|
||||||
|
last_word_id = w.get('id')
|
||||||
|
words[w.get('id')].previous_glue = previous_glue
|
||||||
|
previous_glue = ''
|
||||||
|
elif use_punctuations and w.tag == 'c':
|
||||||
|
# always save previous glue
|
||||||
|
previous_glue = w.text
|
||||||
|
if last_word_id:
|
||||||
|
words[last_word_id].glue += w.text
|
||||||
|
|
||||||
# for w in sentence.iter("w"):
|
# for w in sentence.iter("w"):
|
||||||
# words[w.get('id')] = Word.from_xml(w, do_msd_translate)
|
# words[w.get('id')] = Word.from_xml(w, do_msd_translate)
|
||||||
|
|
|
@ -115,6 +115,8 @@ def build_structures(args):
|
||||||
|
|
||||||
structures = []
|
structures = []
|
||||||
for structure in et.iter('syntactic_structure'):
|
for structure in et.iter('syntactic_structure'):
|
||||||
|
if structure.attrib['type'] == 'single':
|
||||||
|
continue
|
||||||
to_append = SyntacticStructure.from_xml(structure, no_stats)
|
to_append = SyntacticStructure.from_xml(structure, no_stats)
|
||||||
if to_append is None:
|
if to_append is None:
|
||||||
continue
|
continue
|
||||||
|
|
3
wani.py
3
wani.py
|
@ -160,6 +160,9 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--fixed-restriction-order',
|
parser.add_argument('--fixed-restriction-order',
|
||||||
help='If used, words have to be in the same order as components.',
|
help='If used, words have to be in the same order as components.',
|
||||||
action='store_true')
|
action='store_true')
|
||||||
|
parser.add_argument('--new-tei',
|
||||||
|
help='Attribute to be used, when using new version of tei. (default=False)',
|
||||||
|
action='store_true')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user