|
|
|
@ -84,7 +84,7 @@ done_lexical_entries = 33522
|
|
|
|
|
import gc
|
|
|
|
|
|
|
|
|
|
with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
|
|
|
|
|
myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
|
|
|
|
# myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
|
|
|
|
for event, element in etree.iterparse('data/Sloleks_v1.2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True):
|
|
|
|
|
# LOAD NEW WORDS AND ACCENTUATE THEM
|
|
|
|
|
# print("HERE")
|
|
|
|
@ -100,8 +100,8 @@ with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if word_glob_num >= word_limit:
|
|
|
|
|
myfile2.close()
|
|
|
|
|
myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
|
|
|
|
# myfile2.close()
|
|
|
|
|
# myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
|
|
|
|
iter_index += 1
|
|
|
|
|
print("Words proccesed: " + str(word_glob_num))
|
|
|
|
|
|
|
|
|
@ -155,7 +155,7 @@ with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
|
|
|
|
|
word_index += 1
|
|
|
|
|
|
|
|
|
|
# print(etree.tostring(element, encoding="UTF-8"))
|
|
|
|
|
myfile2.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
|
|
|
|
|
# myfile2.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
|
|
|
|
|
myfile.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
|
|
|
|
|
element.clear()
|
|
|
|
|
lexical_entries_save_number += 1
|
|
|
|
|