From 719cec6d29a89544b6e667545890ce97cb30ce13 Mon Sep 17 00:00:00 2001 From: lkrsnik Date: Wed, 28 Jun 2017 16:04:04 +0200 Subject: [PATCH] Added some modularity and X_other_features generation --- .idea/workspace.xml | 58 ++++++++++++++++++++++----------------------- prepare_data.py | 27 ++++++++++++++++----- 2 files changed, 50 insertions(+), 35 deletions(-) diff --git a/.idea/workspace.xml b/.idea/workspace.xml index c634d16..8c2f2a9 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -3,7 +3,6 @@ - @@ -35,8 +34,8 @@ - - + + @@ -47,8 +46,8 @@ - - + + @@ -134,6 +133,7 @@ create_and_save_shuffle_vector h5f generate_inputs + split_number @@ -153,10 +153,10 @@ - @@ -173,8 +173,8 @@ - + @@ -482,7 +482,7 @@ - + @@ -951,26 +951,6 @@ - - - - - - - - - - - - - - - - - - - - @@ -1017,5 +997,25 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/prepare_data.py b/prepare_data.py index 9104d56..c21a2f0 100644 --- a/prepare_data.py +++ b/prepare_data.py @@ -152,13 +152,16 @@ def generate_presentable_y(accetuations_list, word_list, max_num_vowels): final_position = accetuations_list[0] + max_num_vowels * accetuations_list[1] return final_position -def shuffle_inputs(X, y, X_pure): +def shuffle_inputs(X, y, X_pure=False): s = np.arange(X.shape[0]) np.random.shuffle(s) X = X[s] y = y[s] - X_pure = X_pure[s] - return X, y, X_pure + if X_pure: + X_pure = X_pure[s] + return X, y, X_pure + else: + return X, y # def generate_inputs(): # dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict() @@ -250,12 +253,21 @@ def shuffle_inputs(X, y, X_pure): def generate_full_matrix_inputs(): dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict() + train_content, validate_content = split_content(content, 0.2) - + # Generate X and y print('GENERATING X AND y...') + X_train, y_train = generate_X_and_y(dictionary, max_word, max_num_vowels, train_content, vowels, accetuated_vowels) + X_validate, y_validate = generate_X_and_y(dictionary, max_word, max_num_vowels, validate_content, vowels, accetuated_vowels) + print('GENERATION SUCCESSFUL!') + return X_train, y_train, X_validate, y_validate + +def generate_X_and_y(dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels): + # X = np.zeros((len(content), max_word*len(dictionary))) y = np.zeros((len(content), max_num_vowels * max_num_vowels )) X = np.zeros((len(content), max_word, len(dictionary))) + X_aditional_data = [] i = 0 for el in content: @@ -289,7 +301,7 @@ def generate_full_matrix_inputs(): y[i][generate_presentable_y(word_accetuations, list(el[3]), max_num_vowels)] = 1 i += 1 # X = np.array(X) - print('GENERATION SUCCESSFUL!') + print('SHUFFELING INPUTS...') X, y = shuffle_inputs(X, y) print('INPUTS SHUFFELED!') @@ -303,8 +315,11 @@ def count_vowels(content, vowels): num_all_vowels += 1 return num_all_vowels +# def generate_full_vowel_matrix_inputs(name, split_number): + + -def generate_full_vowel_matrix_inputs(name, split_number): +def generate_X_and_y_RAM_efficient(name, split_number): h5f = h5py.File(name + '.h5', 'w') dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict() num_all_vowels = count_vowels(content, vowels)