Sloleks accentuation fix
This commit is contained in:
parent
ad7d10563e
commit
96d03b5e47
|
@ -84,7 +84,7 @@ done_lexical_entries = 33522
|
|||
import gc
|
||||
|
||||
with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
|
||||
myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
||||
# myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
||||
for event, element in etree.iterparse('data/Sloleks_v1.2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True):
|
||||
# LOAD NEW WORDS AND ACCENTUATE THEM
|
||||
# print("HERE")
|
||||
|
@ -100,8 +100,8 @@ with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
|
|||
continue
|
||||
|
||||
if word_glob_num >= word_limit:
|
||||
myfile2.close()
|
||||
myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
||||
# myfile2.close()
|
||||
# myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
||||
iter_index += 1
|
||||
print("Words proccesed: " + str(word_glob_num))
|
||||
|
||||
|
@ -155,7 +155,7 @@ with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
|
|||
word_index += 1
|
||||
|
||||
# print(etree.tostring(element, encoding="UTF-8"))
|
||||
myfile2.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
|
||||
# myfile2.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
|
||||
myfile.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
|
||||
element.clear()
|
||||
lexical_entries_save_number += 1
|
||||
|
|
Loading…
Reference in New Issue
Block a user