Sloleks accentuation fix

2018-09-27 14:47:04 +02:00 · 2018-09-27 14:47:04 +02:00 · 96d03b5e47
commit 96d03b5e47
parent ad7d10563e
1 changed files with 4 additions and 4 deletions
--- a/sloleks_accentuation.py
+++ b/sloleks_accentuation.py
@ -84,7 +84,7 @@ done_lexical_entries = 33522
 import gc
 with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
-    myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
+    # myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
    for event, element in etree.iterparse('data/Sloleks_v1.2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True):
        # LOAD NEW WORDS AND ACCENTUATE THEM
        # print("HERE")
@ -100,8 +100,8 @@ with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
            continue
        if word_glob_num >= word_limit:
-            myfile2.close()
+            # myfile2.close()
-            myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
+            # myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
            iter_index += 1
            print("Words proccesed: " + str(word_glob_num))
@ -155,7 +155,7 @@ with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile:
                        word_index += 1
        # print(etree.tostring(element, encoding="UTF-8"))
-        myfile2.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
+        # myfile2.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
        myfile.write(etree.tostring(element, encoding="UTF-8", pretty_print=True))
        element.clear()
        lexical_entries_save_number += 1