Sloleks accentuation fix
This commit is contained in:
parent
ad7d10563e
commit
96d03b5e47
|
@ -84,7 +84,7 @@ done_lexical_entries = 33522
|
||||||
import gc
|
import gc
|
||||||
|
|
||||||
with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
|
with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
|
||||||
myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
# myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
||||||
for event, element in etree.iterparse('data/Sloleks_v1.2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True):
|
for event, element in etree.iterparse('data/Sloleks_v1.2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True):
|
||||||
# LOAD NEW WORDS AND ACCENTUATE THEM
|
# LOAD NEW WORDS AND ACCENTUATE THEM
|
||||||
# print("HERE")
|
# print("HERE")
|
||||||
|
@ -100,8 +100,8 @@ with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if word_glob_num >= word_limit:
|
if word_glob_num >= word_limit:
|
||||||
myfile2.close()
|
# myfile2.close()
|
||||||
myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
# myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
||||||
iter_index += 1
|
iter_index += 1
|
||||||
print("Words proccesed: " + str(word_glob_num))
|
print("Words proccesed: " + str(word_glob_num))
|
||||||
|
|
||||||
|
@ -155,7 +155,7 @@ with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
|
||||||
word_index += 1
|
word_index += 1
|
||||||
|
|
||||||
# print(etree.tostring(element, encoding="UTF-8"))
|
# print(etree.tostring(element, encoding="UTF-8"))
|
||||||
myfile2.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
|
# myfile2.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
|
||||||
myfile.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
|
myfile.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
|
||||||
element.clear()
|
element.clear()
|
||||||
lexical_entries_save_number += 1
|
lexical_entries_save_number += 1
|
||||||
|
|
Loading…
Reference in New Issue
Block a user