Sloleks accentuation fix

This commit is contained in:
Luka 2018-09-27 14:47:04 +02:00
parent ad7d10563e
commit 96d03b5e47

View File

@ -84,7 +84,7 @@ done_lexical_entries = 33522
import gc
with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
# myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
for event, element in etree.iterparse('data/Sloleks_v1.2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True):
# LOAD NEW WORDS AND ACCENTUATE THEM
# print("HERE")
@ -100,8 +100,8 @@ with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
continue
if word_glob_num >= word_limit:
myfile2.close()
myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
# myfile2.close()
# myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
iter_index += 1
print("Words proccesed: " + str(word_glob_num))
@ -155,7 +155,7 @@ with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
word_index += 1
# print(etree.tostring(element, encoding="UTF-8"))
myfile2.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
# myfile2.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
myfile.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
element.clear()
lexical_entries_save_number += 1