Added parts of per vowel accetuation
This commit is contained in:
parent
dfe4b9a362
commit
0e3dd4f88c
|
@ -34,8 +34,8 @@
|
||||||
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true">
|
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true">
|
||||||
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="334">
|
<state relative-caret-position="54">
|
||||||
<caret line="76" column="44" lean-forward="false" selection-start-line="76" selection-start-column="23" selection-end-line="76" selection-end-column="44" />
|
<caret line="438" column="14" lean-forward="false" selection-start-line="438" selection-start-column="14" selection-end-line="438" selection-end-column="14" />
|
||||||
<folding>
|
<folding>
|
||||||
<element signature="e#24#63#0" expanded="true" />
|
<element signature="e#24#63#0" expanded="true" />
|
||||||
</folding>
|
</folding>
|
||||||
|
@ -127,8 +127,11 @@
|
||||||
<find>u</find>
|
<find>u</find>
|
||||||
<find>shuffle_inputs</find>
|
<find>shuffle_inputs</find>
|
||||||
<find>num_all_vowels</find>
|
<find>num_all_vowels</find>
|
||||||
<find>create_and_save_inputs</find>
|
|
||||||
<find>load_shuffle_vector</find>
|
<find>load_shuffle_vector</find>
|
||||||
|
<find>create_and_save_inputs</find>
|
||||||
|
<find>load_extended_inputs</find>
|
||||||
|
<find>create_and_save_shuffle_vector</find>
|
||||||
|
<find>h5f</find>
|
||||||
</findStrings>
|
</findStrings>
|
||||||
</component>
|
</component>
|
||||||
<component name="Git.Settings">
|
<component name="Git.Settings">
|
||||||
|
@ -168,6 +171,7 @@
|
||||||
<foldersAlwaysOnTop value="true" />
|
<foldersAlwaysOnTop value="true" />
|
||||||
</navigator>
|
</navigator>
|
||||||
<panes>
|
<panes>
|
||||||
|
<pane id="Scope" />
|
||||||
<pane id="ProjectPane">
|
<pane id="ProjectPane">
|
||||||
<subPane>
|
<subPane>
|
||||||
<PATH>
|
<PATH>
|
||||||
|
@ -215,7 +219,6 @@
|
||||||
</subPane>
|
</subPane>
|
||||||
</pane>
|
</pane>
|
||||||
<pane id="Scratches" />
|
<pane id="Scratches" />
|
||||||
<pane id="Scope" />
|
|
||||||
</panes>
|
</panes>
|
||||||
</component>
|
</component>
|
||||||
<component name="PropertiesComponent">
|
<component name="PropertiesComponent">
|
||||||
|
@ -936,24 +939,44 @@
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
<entry file="file://$PROJECT_DIR$/theanoTest.py">
|
<entry file="file://$PROJECT_DIR$/theano_tutorial/test.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="0">
|
<state relative-caret-position="1332">
|
||||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
<caret line="76" column="9" lean-forward="false" selection-start-line="76" selection-start-column="9" selection-end-line="76" selection-end-column="9" />
|
||||||
<folding />
|
<folding>
|
||||||
|
<element signature="e#0#18#0" expanded="true" />
|
||||||
|
</folding>
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
<entry file="file://$PROJECT_DIR$/theano_tutorial/tutorial_conditions.py">
|
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="18">
|
<state relative-caret-position="54">
|
||||||
<caret line="1" column="31" lean-forward="false" selection-start-line="1" selection-start-column="31" selection-end-line="1" selection-end-column="31" />
|
<caret line="438" column="14" lean-forward="false" selection-start-line="438" selection-start-column="14" selection-end-line="438" selection-end-column="14" />
|
||||||
<folding>
|
<folding>
|
||||||
<element signature="e#55#85#0" expanded="true" />
|
<element signature="e#24#63#0" expanded="true" />
|
||||||
</folding>
|
</folding>
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
|
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
|
||||||
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
<state relative-caret-position="774">
|
||||||
|
<caret line="45" column="35" lean-forward="false" selection-start-line="45" selection-start-column="33" selection-end-line="45" selection-end-column="35" />
|
||||||
|
<folding>
|
||||||
|
<element signature="e#0#12#0" expanded="true" />
|
||||||
|
</folding>
|
||||||
|
</state>
|
||||||
|
</provider>
|
||||||
|
</entry>
|
||||||
|
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression_loop.py">
|
||||||
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
<state relative-caret-position="828">
|
||||||
|
<caret line="48" column="20" lean-forward="false" selection-start-line="48" selection-start-column="20" selection-end-line="48" selection-end-column="20" />
|
||||||
|
<folding />
|
||||||
|
</state>
|
||||||
|
</provider>
|
||||||
|
</entry>
|
||||||
<entry file="file://$PROJECT_DIR$/theano_tutorial/tutorial_derivates.py">
|
<entry file="file://$PROJECT_DIR$/theano_tutorial/tutorial_derivates.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="576">
|
<state relative-caret-position="576">
|
||||||
|
@ -974,43 +997,23 @@
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression_loop.py">
|
<entry file="file://$PROJECT_DIR$/theano_tutorial/tutorial_conditions.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="828">
|
<state relative-caret-position="18">
|
||||||
<caret line="48" column="20" lean-forward="false" selection-start-line="48" selection-start-column="20" selection-end-line="48" selection-end-column="20" />
|
<caret line="1" column="31" lean-forward="false" selection-start-line="1" selection-start-column="31" selection-end-line="1" selection-end-column="31" />
|
||||||
|
<folding>
|
||||||
|
<element signature="e#55#85#0" expanded="true" />
|
||||||
|
</folding>
|
||||||
|
</state>
|
||||||
|
</provider>
|
||||||
|
</entry>
|
||||||
|
<entry file="file://$PROJECT_DIR$/theanoTest.py">
|
||||||
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
<state relative-caret-position="0">
|
||||||
|
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||||
<folding />
|
<folding />
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
|
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
|
||||||
<state relative-caret-position="774">
|
|
||||||
<caret line="45" column="35" lean-forward="false" selection-start-line="45" selection-start-column="33" selection-end-line="45" selection-end-column="35" />
|
|
||||||
<folding>
|
|
||||||
<element signature="e#0#12#0" expanded="true" />
|
|
||||||
</folding>
|
|
||||||
</state>
|
|
||||||
</provider>
|
|
||||||
</entry>
|
|
||||||
<entry file="file://$PROJECT_DIR$/theano_tutorial/test.py">
|
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
|
||||||
<state relative-caret-position="1332">
|
|
||||||
<caret line="76" column="9" lean-forward="false" selection-start-line="76" selection-start-column="9" selection-end-line="76" selection-end-column="9" />
|
|
||||||
<folding>
|
|
||||||
<element signature="e#0#18#0" expanded="true" />
|
|
||||||
</folding>
|
|
||||||
</state>
|
|
||||||
</provider>
|
|
||||||
</entry>
|
|
||||||
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
|
||||||
<state relative-caret-position="334">
|
|
||||||
<caret line="76" column="44" lean-forward="false" selection-start-line="76" selection-start-column="23" selection-end-line="76" selection-end-column="44" />
|
|
||||||
<folding>
|
|
||||||
<element signature="e#24#63#0" expanded="true" />
|
|
||||||
</folding>
|
|
||||||
</state>
|
|
||||||
</provider>
|
|
||||||
</entry>
|
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
105
prepare_data.py
105
prepare_data.py
|
@ -6,6 +6,7 @@ import numpy as np
|
||||||
import h5py
|
import h5py
|
||||||
import gc
|
import gc
|
||||||
import StringIO
|
import StringIO
|
||||||
|
import copy
|
||||||
|
|
||||||
def save_inputs(file_name, X, y):
|
def save_inputs(file_name, X, y):
|
||||||
h5f = h5py.File(file_name, 'w')
|
h5f = h5py.File(file_name, 'w')
|
||||||
|
@ -45,11 +46,11 @@ def load_inputs(file_name):
|
||||||
h5f.close()
|
h5f.close()
|
||||||
return X, y
|
return X, y
|
||||||
|
|
||||||
def load_extended_inputs(file_name):
|
def load_extended_inputs(file_name, obtain_range):
|
||||||
h5f = h5py.File(file_name,'r')
|
h5f = h5py.File(file_name,'r')
|
||||||
X = h5f['X'][:]
|
X = h5f['X'][obtain_range[0]:obtain_range[1]]
|
||||||
y = h5f['y'][:]
|
y = h5f['y'][obtain_range[0]:obtain_range[1]]
|
||||||
X_pure = h5f['X_pure'][:]
|
X_pure = h5f['X_pure'][obtain_range[0]:obtain_range[1]]
|
||||||
|
|
||||||
h5f.close()
|
h5f.close()
|
||||||
return X, y, X_pure
|
return X, y, X_pure
|
||||||
|
@ -410,35 +411,105 @@ def generate_full_vowel_matrix_inputs(name, split_number):
|
||||||
h5f.close()
|
h5f.close()
|
||||||
|
|
||||||
|
|
||||||
|
def generate_arrays_from_file(path, batch_size):
|
||||||
|
h5f = h5py.File(path, 'r')
|
||||||
|
|
||||||
|
X = h5f['X'][:]
|
||||||
|
y = h5f['y'][:]
|
||||||
|
X_pure = h5f['X_pure'][:]
|
||||||
|
yield (X, y, X_pure)
|
||||||
|
# while 1:
|
||||||
|
# f = open(path)
|
||||||
|
# for line in f:
|
||||||
|
# # create Numpy arrays of input data
|
||||||
|
# # and labels, from each line in the file
|
||||||
|
# x, y = process_line(line)
|
||||||
|
# yield (x, y)
|
||||||
|
# # f.close()
|
||||||
|
|
||||||
|
h5f.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def shuffle_full_vowel_inputs(name, orderd_name, parts):
|
def shuffle_full_vowel_inputs(name, orderd_name, parts):
|
||||||
# internal_representations/inputs/X_ordered_part
|
# internal_representations/inputs/X_ordered_part
|
||||||
dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
|
dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
|
||||||
num_all_vowels = count_vowels(content, vowels)
|
num_all_vowels = count_vowels(content, vowels)
|
||||||
# s = np.arange(num_all_vowels)
|
num_all_vowels = 12
|
||||||
# np.random.shuffle(s)
|
|
||||||
|
|
||||||
|
s = np.arange(num_all_vowels)
|
||||||
|
np.random.shuffle(s)
|
||||||
# create_and_save_shuffle_vector(name, s)
|
# create_and_save_shuffle_vector(name, s)
|
||||||
s = load_shuffle_vector('internal_representations/inputs/X_shuffled_part_shuffle_vector.h5')
|
|
||||||
|
# s = load_shuffle_vector('internal_representations/inputs/X_shuffled_part_shuffle_vector.h5')
|
||||||
|
|
||||||
|
# try:
|
||||||
|
# h5f.close()
|
||||||
|
# except Exception, e:
|
||||||
|
# pass
|
||||||
|
|
||||||
|
h5f = h5py.File(name, 'w')
|
||||||
|
data_X = h5f.create_dataset('X', (num_all_vowels, max_word, len(dictionary)),
|
||||||
|
maxshape=(num_all_vowels, max_word, len(dictionary)),
|
||||||
|
dtype=np.uint8)
|
||||||
|
data_y = h5f.create_dataset('y', (num_all_vowels,),
|
||||||
|
maxshape=(num_all_vowels,),
|
||||||
|
dtype=np.uint8)
|
||||||
|
data_X_pure = h5f.create_dataset('X_pure', (num_all_vowels,),
|
||||||
|
maxshape=(num_all_vowels,),
|
||||||
|
dtype=np.uint8)
|
||||||
|
|
||||||
|
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
print('Shuffled vector loaded!')
|
print('Shuffled vector loaded!')
|
||||||
section_range = [0, (num_all_vowels + 1)/parts]
|
section_range = [0, (num_all_vowels + 1)/parts]
|
||||||
for h in range(3, parts+1):
|
for h in range(1, parts+1):
|
||||||
gc.collect()
|
gc.collect()
|
||||||
new_X = np.zeros((section_range[1], max_word, len(dictionary)))
|
new_X = np.zeros((section_range[1] - section_range[0], max_word, len(dictionary)))
|
||||||
new_X_pure = np.zeros(section_range[1])
|
new_X_pure = np.zeros(section_range[1] - section_range[0])
|
||||||
new_y = np.zeros(section_range[1])
|
new_y = np.zeros(section_range[1] - section_range[0])
|
||||||
|
targeted_range = [0, (num_all_vowels + 1)/parts]
|
||||||
for i in range(1, parts+1):
|
for i in range(1, parts+1):
|
||||||
X, y, X_pure = load_extended_inputs(orderd_name + str(parts) + '.h5')
|
X, y, X_pure = load_extended_inputs(orderd_name, targeted_range)
|
||||||
for j in range(X.shape[0]):
|
for j in range(X.shape[0]):
|
||||||
if s[j] >= section_range[0] and s[j] < section_range[1]:
|
# print targeted_range[0]
|
||||||
new_X[s[j]] = X[j]
|
# print targeted_range[1]
|
||||||
new_y[s[j]] = y[j]
|
# print s[j]
|
||||||
new_X_pure[s[j]] = X_pure[j]
|
if s[j + targeted_range[0]] >= section_range[0] and s[j + targeted_range[0]] < section_range[1]:
|
||||||
|
# print 's[j] ' + str(s[j + targeted_range[0]]) + ' section_range[0] ' + str(section_range[0]) + ' section_range[1] ' + str(section_range[1])
|
||||||
|
new_X[s[j + targeted_range[0]] - section_range[0]] = X[j]
|
||||||
|
new_y[s[j + targeted_range[0]] - section_range[0]] = y[j]
|
||||||
|
new_X_pure[s[j + targeted_range[0]] - section_range[0]] = X_pure[j]
|
||||||
|
targeted_range[0] = targeted_range[1]
|
||||||
|
if targeted_range[1] + (num_all_vowels + 1) / parts < num_all_vowels:
|
||||||
|
targeted_range[1] += (num_all_vowels + 1) / parts
|
||||||
|
else:
|
||||||
|
targeted_range[1] = num_all_vowels
|
||||||
|
del X, y, X_pure
|
||||||
print('CREATED ' + str(h) + '. PART OF SHUFFLED MATRIX')
|
print('CREATED ' + str(h) + '. PART OF SHUFFLED MATRIX')
|
||||||
create_and_save_inputs(name, str(h), new_X, new_y, new_X_pure)
|
# create_and_save_inputs(name, str(h), new_X, new_y, new_X_pure)
|
||||||
|
# a =
|
||||||
|
# print (a.shape)
|
||||||
|
# print s
|
||||||
|
# for el in np.array(new_X):
|
||||||
|
# print el
|
||||||
|
# print 'new_X ' + str(new_X) + ' section_range[0] ' + str(section_range[0]) + ' section_range[1] ' + str(section_range[1])
|
||||||
|
# print new_X.shape
|
||||||
|
# print type(new_X)
|
||||||
|
data_X[section_range[0]:section_range[1]] = new_X
|
||||||
|
data_y[section_range[0]:section_range[1]] = new_y
|
||||||
|
data_X_pure[section_range[0]:section_range[1]] = new_X_pure
|
||||||
section_range[0] = section_range[1]
|
section_range[0] = section_range[1]
|
||||||
if section_range[1] + (num_all_vowels + 1)/parts < num_all_vowels:
|
if section_range[1] + (num_all_vowels + 1)/parts < num_all_vowels:
|
||||||
section_range[1] += (num_all_vowels + 1)/parts
|
section_range[1] += (num_all_vowels + 1)/parts
|
||||||
else:
|
else:
|
||||||
section_range[1] = num_all_vowels
|
section_range[1] = num_all_vowels
|
||||||
|
del new_X, new_X_pure, new_y
|
||||||
|
|
||||||
|
h5f.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user