Added some modularity and X_other_features generation

master
lkrsnik 7 years ago
parent 3c9edd5d1c
commit 719cec6d29

@ -3,7 +3,6 @@
<component name="ChangeListManager">
<list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment="">
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/character_based_ffnn/character_based_ffnn_keras.ipynb" afterPath="$PROJECT_DIR$/character_based_ffnn/character_based_ffnn_keras.ipynb" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/character_based_ffnn/word_accetuation/character_based_ffnn_keras.ipynb" afterPath="$PROJECT_DIR$/character_based_ffnn/word_accetuation/character_based_ffnn_keras.ipynb" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" />
</list>
@ -35,8 +34,8 @@
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/prepare_data.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="266">
<caret line="562" column="17" lean-forward="true" selection-start-line="562" selection-start-column="17" selection-end-line="562" selection-end-column="17" />
<state relative-caret-position="606">
<caret line="606" column="50" lean-forward="true" selection-start-line="606" selection-start-column="4" selection-end-line="606" selection-end-column="50" />
<folding>
<element signature="e#24#63#0" expanded="true" />
</folding>
@ -47,8 +46,8 @@
<file leaf-file-name="logistic_regression.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="774">
<caret line="45" column="35" lean-forward="false" selection-start-line="45" selection-start-column="33" selection-end-line="45" selection-end-column="35" />
<state relative-caret-position="162">
<caret line="11" column="0" lean-forward="false" selection-start-line="11" selection-start-column="0" selection-end-line="11" selection-end-column="0" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
@ -134,6 +133,7 @@
<find>create_and_save_shuffle_vector</find>
<find>h5f</find>
<find>generate_inputs</find>
<find>split_number</find>
</findStrings>
</component>
<component name="Git.Settings">
@ -153,10 +153,10 @@
</option>
</component>
<component name="ProjectFrameBounds">
<option name="x" value="65" />
<option name="x" value="1985" />
<option name="y" value="24" />
<option name="width" value="1855" />
<option name="height" value="1056" />
<option name="height" value="1176" />
</component>
<component name="ProjectView">
<navigator currentView="ProjectPane" proportions="" version="1">
@ -173,8 +173,8 @@
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scratches" />
<pane id="Scope" />
<pane id="Scratches" />
<pane id="ProjectPane">
<subPane>
<PATH>
@ -482,7 +482,7 @@
<servers />
</component>
<component name="ToolWindowManager">
<frame x="65" y="24" width="1855" height="1056" extended-state="6" />
<frame x="1985" y="24" width="1855" height="1176" extended-state="6" />
<editor active="true" />
<layout>
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.16375546" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
@ -951,26 +951,6 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/prepare_data.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="266">
<caret line="562" column="17" lean-forward="true" selection-start-line="562" selection-start-column="17" selection-end-line="562" selection-end-column="17" />
<folding>
<element signature="e#24#63#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="774">
<caret line="45" column="35" lean-forward="false" selection-start-line="45" selection-start-column="33" selection-end-line="45" selection-end-column="35" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression_loop.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="828">
@ -1017,5 +997,25 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="162">
<caret line="11" column="0" lean-forward="false" selection-start-line="11" selection-start-column="0" selection-end-line="11" selection-end-column="0" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/prepare_data.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="606">
<caret line="606" column="50" lean-forward="true" selection-start-line="606" selection-start-column="4" selection-end-line="606" selection-end-column="50" />
<folding>
<element signature="e#24#63#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</component>
</project>

@ -152,13 +152,16 @@ def generate_presentable_y(accetuations_list, word_list, max_num_vowels):
final_position = accetuations_list[0] + max_num_vowels * accetuations_list[1]
return final_position
def shuffle_inputs(X, y, X_pure):
def shuffle_inputs(X, y, X_pure=False):
s = np.arange(X.shape[0])
np.random.shuffle(s)
X = X[s]
y = y[s]
X_pure = X_pure[s]
return X, y, X_pure
if X_pure:
X_pure = X_pure[s]
return X, y, X_pure
else:
return X, y
# def generate_inputs():
# dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
@ -250,12 +253,21 @@ def shuffle_inputs(X, y, X_pure):
def generate_full_matrix_inputs():
dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
train_content, validate_content = split_content(content, 0.2)
# Generate X and y
print('GENERATING X AND y...')
X_train, y_train = generate_X_and_y(dictionary, max_word, max_num_vowels, train_content, vowels, accetuated_vowels)
X_validate, y_validate = generate_X_and_y(dictionary, max_word, max_num_vowels, validate_content, vowels, accetuated_vowels)
print('GENERATION SUCCESSFUL!')
return X_train, y_train, X_validate, y_validate
def generate_X_and_y(dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels):
# X = np.zeros((len(content), max_word*len(dictionary)))
y = np.zeros((len(content), max_num_vowels * max_num_vowels ))
X = np.zeros((len(content), max_word, len(dictionary)))
X_aditional_data = []
i = 0
for el in content:
@ -289,7 +301,7 @@ def generate_full_matrix_inputs():
y[i][generate_presentable_y(word_accetuations, list(el[3]), max_num_vowels)] = 1
i += 1
# X = np.array(X)
print('GENERATION SUCCESSFUL!')
print('SHUFFELING INPUTS...')
X, y = shuffle_inputs(X, y)
print('INPUTS SHUFFELED!')
@ -303,8 +315,11 @@ def count_vowels(content, vowels):
num_all_vowels += 1
return num_all_vowels
# def generate_full_vowel_matrix_inputs(name, split_number):
def generate_full_vowel_matrix_inputs(name, split_number):
def generate_X_and_y_RAM_efficient(name, split_number):
h5f = h5py.File(name + '.h5', 'w')
dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
num_all_vowels = count_vowels(content, vowels)

Loading…
Cancel
Save