diff --git a/.idea/workspace.xml b/.idea/workspace.xml index a14642a..e97f849 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,7 +2,9 @@ + + @@ -178,7 +181,7 @@ - + @@ -205,49 +208,9 @@ - - - - - - - - - - - - - - - - - - - - - - - + @@ -860,8 +823,8 @@ - - + + @@ -928,12 +891,6 @@ - - - - - - - + \ No newline at end of file diff --git a/prepare_data.py b/prepare_data.py index 2465409..9e149fb 100644 --- a/prepare_data.py +++ b/prepare_data.py @@ -7,6 +7,7 @@ import h5py import gc import math + # functions for saving, loading and shuffling whole arrays to ram def save_inputs(file_name, X, y, other_features=[]): h5f = h5py.File(file_name, 'w') @@ -18,6 +19,7 @@ def save_inputs(file_name, X, y, other_features=[]): h5f.create_dataset(k, data=v) h5f.close() + def load_inputs(file_name, other_features=False): h5f = h5py.File(file_name,'r') X = h5f['X'][:] @@ -42,6 +44,7 @@ def shuffle_inputs(X, y, X_pure=[]): else: return X, y + # functions for saving and loading partial arrays to ram def create_and_save_inputs(file_name, part, X, y, X_pure): # X, y, X_pure = generate_full_vowel_matrix_inputs() @@ -51,6 +54,7 @@ def create_and_save_inputs(file_name, part, X, y, X_pure): h5f.create_dataset(k,data=v) h5f.close() + def load_extended_inputs(file_name, obtain_range): h5f = h5py.File(file_name,'r') X = h5f['X'][obtain_range[0]:obtain_range[1]] @@ -70,6 +74,7 @@ def create_and_save_shuffle_vector(file_name, shuffle_vector): h5f.create_dataset(k,data=v) h5f.close() + def load_shuffle_vector(file_name): h5f = h5py.File(file_name,'r') shuffle_vector = h5f['shuffle_vector'][[179859, 385513, 893430]] @@ -452,6 +457,19 @@ def generate_X_and_y_RAM_efficient(name, split_number): h5f.close() +# generator for inputs for tracking of data fitting +def generate_fake_epoch(orig_X, orig_X_additional, orig_y, batch_size): + size = orig_X.shape[0] + while 1: + loc = 0 + while loc < size: + if loc + batch_size >= size: + yield([orig_X[loc:size], orig_X_additional[loc:size]], orig_y[loc:size]) + else: + yield([orig_X[loc:loc + batch_size], orig_X_additional[loc:loc + batch_size]], orig_y[loc:loc + batch_size]) + loc += batch_size + + # generator for inputs def generate_arrays_from_file(path, batch_size): h5f = h5py.File(path, 'r')