Added fix for gímiljênje."
This commit is contained in:
parent
96d03b5e47
commit
def19f1eb1
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -109,3 +109,5 @@ postprocessing/data_merge_analysis.py
|
||||||
data_merge_analysis.py
|
data_merge_analysis.py
|
||||||
postprocessing/sp_sloleks_data_merge.py
|
postprocessing/sp_sloleks_data_merge.py
|
||||||
sp_sloleks_data_merge.py
|
sp_sloleks_data_merge.py
|
||||||
|
postprocessing/data_merge_xml2tab.py
|
||||||
|
data_merge_xml2tab.py
|
||||||
|
|
|
@ -5,7 +5,7 @@ import re
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
import time
|
import time
|
||||||
from prepare_data import *
|
# from prepare_data import *
|
||||||
|
|
||||||
accented_vowels = ['ŕ', 'á', 'à', 'é', 'è', 'ê', 'í', 'ì', 'ó', 'ô', 'ò', 'ú', 'ù']
|
accented_vowels = ['ŕ', 'á', 'à', 'é', 'è', 'ê', 'í', 'ì', 'ó', 'ô', 'ò', 'ú', 'ù']
|
||||||
|
|
||||||
|
@ -77,15 +77,15 @@ word_index = 0
|
||||||
# iter_index = 31
|
# iter_index = 31
|
||||||
|
|
||||||
# done_lexical_entries = 33522
|
# done_lexical_entries = 33522
|
||||||
data = Data('s', shuffle_all_inputs=False)
|
# data = Data('s', shuffle_all_inputs=False)
|
||||||
# accentuated_content = data._read_content('data/new_sloleks/final_sloleks2.tab')
|
# accentuated_content = data._read_content('data/new_sloleks/final_sloleks2.tab')
|
||||||
|
|
||||||
start_timer = time.time()
|
start_timer = time.time()
|
||||||
lemmas = 0
|
lemmas = 0
|
||||||
print('Copy initialization complete')
|
print('Copy initialization complete')
|
||||||
with open("data/contextual_changes/accented_lemmas_final_sloleks2_small.xml", "ab") as myfile:
|
with open("data/contextual_changes/stressed_lemmas_sloleks2.xml", "ab") as myfile:
|
||||||
# myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
# myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')
|
||||||
for event, element in etree.iterparse('data/new_sloleks/final_sloleks2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True):
|
for event, element in etree.iterparse('data/contextual_changes/final_sloleks2_inhouse2S.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True):
|
||||||
# for event, element in etree.iterparse('data/Sloleks_v1.2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True):
|
# for event, element in etree.iterparse('data/Sloleks_v1.2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True):
|
||||||
# if word_glob_num >= word_limit:
|
# if word_glob_num >= word_limit:
|
||||||
# myfile2.close()
|
# myfile2.close()
|
|
@ -1204,7 +1204,7 @@ class Data:
|
||||||
else:
|
else:
|
||||||
word_list = list(word[::-1])
|
word_list = list(word[::-1])
|
||||||
for i in range(len(word_list)):
|
for i in range(len(word_list)):
|
||||||
if self._is_vowel(word_list, i, vowels):
|
if self._is_vowel(word_list, i, vowels + accented_vowels):
|
||||||
if location == vowel_index:
|
if location == vowel_index:
|
||||||
if len(np.where(y == 1)[0]) == 1:
|
if len(np.where(y == 1)[0]) == 1:
|
||||||
word_list[i] = accented_vowels[np.where(y == 1)[0][0]]
|
word_list[i] = accented_vowels[np.where(y == 1)[0][0]]
|
||||||
|
|
|
@ -41,13 +41,14 @@ letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model = d
|
||||||
'cnn/accent_classification/syllabled_letters/v2_0/20_final_epoch.h5')
|
'cnn/accent_classification/syllabled_letters/v2_0/20_final_epoch.h5')
|
||||||
|
|
||||||
data = Data('s', shuffle_all_inputs=False)
|
data = Data('s', shuffle_all_inputs=False)
|
||||||
new_content = data._read_content('data/sloleks-sl_v1.2.tbl')
|
# new_content = data._read_content('data/sloleks-sl_v1.2.tbl')
|
||||||
|
new_content = data._read_content('data/contextual_changes/small/sloleks-sl_v1.2.tbl')
|
||||||
|
|
||||||
print('Commencing accentuator!')
|
print('Commencing accentuator!')
|
||||||
|
|
||||||
rate = 100000
|
rate = 100000
|
||||||
start_timer = time.time()
|
start_timer = time.time()
|
||||||
with open("data/new_sloleks/new_sloleks2.tab", "a") as myfile:
|
with open("data/contextual_changes/small/new_sloleks2_small2.tab", "a") as myfile:
|
||||||
for index in range(0, len(new_content), rate):
|
for index in range(0, len(new_content), rate):
|
||||||
if index+rate >= len(new_content):
|
if index+rate >= len(new_content):
|
||||||
words = [[el[0], '', el[2], el[0]] for el in new_content][index:len(new_content)]
|
words = [[el[0], '', el[2], el[0]] for el in new_content][index:len(new_content)]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user