Sloleks accentuation fix
This commit is contained in:
		
							parent
							
								
									ad7d10563e
								
							
						
					
					
						commit
						96d03b5e47
					
				| @ -84,7 +84,7 @@ done_lexical_entries = 33522 | |||||||
| import gc | import gc | ||||||
| 
 | 
 | ||||||
| with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile: | with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile: | ||||||
|     myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab') |     # myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab') | ||||||
|     for event, element in etree.iterparse('data/Sloleks_v1.2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True): |     for event, element in etree.iterparse('data/Sloleks_v1.2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True): | ||||||
|         # LOAD NEW WORDS AND ACCENTUATE THEM |         # LOAD NEW WORDS AND ACCENTUATE THEM | ||||||
|         # print("HERE") |         # print("HERE") | ||||||
| @ -100,8 +100,8 @@ with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile: | |||||||
|             continue |             continue | ||||||
| 
 | 
 | ||||||
|         if word_glob_num >= word_limit: |         if word_glob_num >= word_limit: | ||||||
|             myfile2.close() |             # myfile2.close() | ||||||
|             myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab') |             # myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab') | ||||||
|             iter_index += 1 |             iter_index += 1 | ||||||
|             print("Words proccesed: " + str(word_glob_num)) |             print("Words proccesed: " + str(word_glob_num)) | ||||||
| 
 | 
 | ||||||
| @ -155,7 +155,7 @@ with open("data/new_sloleks/new_sloleks.xml", "ab") as myfile: | |||||||
|                         word_index += 1 |                         word_index += 1 | ||||||
| 
 | 
 | ||||||
|         # print(etree.tostring(element, encoding="UTF-8")) |         # print(etree.tostring(element, encoding="UTF-8")) | ||||||
|         myfile2.write(etree.tostring(element, encoding="UTF-8", pretty_print=True)) |         # myfile2.write(etree.tostring(element, encoding="UTF-8", pretty_print=True)) | ||||||
|         myfile.write(etree.tostring(element, encoding="UTF-8", pretty_print=True)) |         myfile.write(etree.tostring(element, encoding="UTF-8", pretty_print=True)) | ||||||
|         element.clear() |         element.clear() | ||||||
|         lexical_entries_save_number += 1 |         lexical_entries_save_number += 1 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user