Added some modularity and X_other_features generation

This commit is contained in:
lkrsnik 2017-06-28 16:04:04 +02:00
parent 3c9edd5d1c
commit 719cec6d29
2 changed files with 50 additions and 35 deletions

View File

@ -3,7 +3,6 @@
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment=""> <list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment="">
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/character_based_ffnn/character_based_ffnn_keras.ipynb" afterPath="$PROJECT_DIR$/character_based_ffnn/character_based_ffnn_keras.ipynb" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/character_based_ffnn/word_accetuation/character_based_ffnn_keras.ipynb" afterPath="$PROJECT_DIR$/character_based_ffnn/word_accetuation/character_based_ffnn_keras.ipynb" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/character_based_ffnn/word_accetuation/character_based_ffnn_keras.ipynb" afterPath="$PROJECT_DIR$/character_based_ffnn/word_accetuation/character_based_ffnn_keras.ipynb" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" />
</list> </list>
@ -35,8 +34,8 @@
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true"> <file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/prepare_data.py"> <entry file="file://$PROJECT_DIR$/prepare_data.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="266"> <state relative-caret-position="606">
<caret line="562" column="17" lean-forward="true" selection-start-line="562" selection-start-column="17" selection-end-line="562" selection-end-column="17" /> <caret line="606" column="50" lean-forward="true" selection-start-line="606" selection-start-column="4" selection-end-line="606" selection-end-column="50" />
<folding> <folding>
<element signature="e#24#63#0" expanded="true" /> <element signature="e#24#63#0" expanded="true" />
</folding> </folding>
@ -47,8 +46,8 @@
<file leaf-file-name="logistic_regression.py" pinned="false" current-in-tab="false"> <file leaf-file-name="logistic_regression.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py"> <entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="774"> <state relative-caret-position="162">
<caret line="45" column="35" lean-forward="false" selection-start-line="45" selection-start-column="33" selection-end-line="45" selection-end-column="35" /> <caret line="11" column="0" lean-forward="false" selection-start-line="11" selection-start-column="0" selection-end-line="11" selection-end-column="0" />
<folding> <folding>
<element signature="e#0#12#0" expanded="true" /> <element signature="e#0#12#0" expanded="true" />
</folding> </folding>
@ -134,6 +133,7 @@
<find>create_and_save_shuffle_vector</find> <find>create_and_save_shuffle_vector</find>
<find>h5f</find> <find>h5f</find>
<find>generate_inputs</find> <find>generate_inputs</find>
<find>split_number</find>
</findStrings> </findStrings>
</component> </component>
<component name="Git.Settings"> <component name="Git.Settings">
@ -153,10 +153,10 @@
</option> </option>
</component> </component>
<component name="ProjectFrameBounds"> <component name="ProjectFrameBounds">
<option name="x" value="65" /> <option name="x" value="1985" />
<option name="y" value="24" /> <option name="y" value="24" />
<option name="width" value="1855" /> <option name="width" value="1855" />
<option name="height" value="1056" /> <option name="height" value="1176" />
</component> </component>
<component name="ProjectView"> <component name="ProjectView">
<navigator currentView="ProjectPane" proportions="" version="1"> <navigator currentView="ProjectPane" proportions="" version="1">
@ -173,8 +173,8 @@
<foldersAlwaysOnTop value="true" /> <foldersAlwaysOnTop value="true" />
</navigator> </navigator>
<panes> <panes>
<pane id="Scratches" />
<pane id="Scope" /> <pane id="Scope" />
<pane id="Scratches" />
<pane id="ProjectPane"> <pane id="ProjectPane">
<subPane> <subPane>
<PATH> <PATH>
@ -482,7 +482,7 @@
<servers /> <servers />
</component> </component>
<component name="ToolWindowManager"> <component name="ToolWindowManager">
<frame x="65" y="24" width="1855" height="1056" extended-state="6" /> <frame x="1985" y="24" width="1855" height="1176" extended-state="6" />
<editor active="true" /> <editor active="true" />
<layout> <layout>
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.16375546" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" /> <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.16375546" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
@ -951,26 +951,6 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/prepare_data.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="266">
<caret line="562" column="17" lean-forward="true" selection-start-line="562" selection-start-column="17" selection-end-line="562" selection-end-column="17" />
<folding>
<element signature="e#24#63#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="774">
<caret line="45" column="35" lean-forward="false" selection-start-line="45" selection-start-column="33" selection-end-line="45" selection-end-column="35" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression_loop.py"> <entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression_loop.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="828"> <state relative-caret-position="828">
@ -1017,5 +997,25 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="162">
<caret line="11" column="0" lean-forward="false" selection-start-line="11" selection-start-column="0" selection-end-line="11" selection-end-column="0" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/prepare_data.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="606">
<caret line="606" column="50" lean-forward="true" selection-start-line="606" selection-start-column="4" selection-end-line="606" selection-end-column="50" />
<folding>
<element signature="e#24#63#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</component> </component>
</project> </project>

View File

@ -152,13 +152,16 @@ def generate_presentable_y(accetuations_list, word_list, max_num_vowels):
final_position = accetuations_list[0] + max_num_vowels * accetuations_list[1] final_position = accetuations_list[0] + max_num_vowels * accetuations_list[1]
return final_position return final_position
def shuffle_inputs(X, y, X_pure): def shuffle_inputs(X, y, X_pure=False):
s = np.arange(X.shape[0]) s = np.arange(X.shape[0])
np.random.shuffle(s) np.random.shuffle(s)
X = X[s] X = X[s]
y = y[s] y = y[s]
if X_pure:
X_pure = X_pure[s] X_pure = X_pure[s]
return X, y, X_pure return X, y, X_pure
else:
return X, y
# def generate_inputs(): # def generate_inputs():
# dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict() # dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
@ -250,12 +253,21 @@ def shuffle_inputs(X, y, X_pure):
def generate_full_matrix_inputs(): def generate_full_matrix_inputs():
dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict() dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
train_content, validate_content = split_content(content, 0.2)
# Generate X and y
print('GENERATING X AND y...') print('GENERATING X AND y...')
X_train, y_train = generate_X_and_y(dictionary, max_word, max_num_vowels, train_content, vowels, accetuated_vowels)
X_validate, y_validate = generate_X_and_y(dictionary, max_word, max_num_vowels, validate_content, vowels, accetuated_vowels)
print('GENERATION SUCCESSFUL!')
return X_train, y_train, X_validate, y_validate
def generate_X_and_y(dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels):
# X = np.zeros((len(content), max_word*len(dictionary))) # X = np.zeros((len(content), max_word*len(dictionary)))
y = np.zeros((len(content), max_num_vowels * max_num_vowels )) y = np.zeros((len(content), max_num_vowels * max_num_vowels ))
X = np.zeros((len(content), max_word, len(dictionary))) X = np.zeros((len(content), max_word, len(dictionary)))
X_aditional_data = []
i = 0 i = 0
for el in content: for el in content:
@ -289,7 +301,7 @@ def generate_full_matrix_inputs():
y[i][generate_presentable_y(word_accetuations, list(el[3]), max_num_vowels)] = 1 y[i][generate_presentable_y(word_accetuations, list(el[3]), max_num_vowels)] = 1
i += 1 i += 1
# X = np.array(X) # X = np.array(X)
print('GENERATION SUCCESSFUL!')
print('SHUFFELING INPUTS...') print('SHUFFELING INPUTS...')
X, y = shuffle_inputs(X, y) X, y = shuffle_inputs(X, y)
print('INPUTS SHUFFELED!') print('INPUTS SHUFFELED!')
@ -303,8 +315,11 @@ def count_vowels(content, vowels):
num_all_vowels += 1 num_all_vowels += 1
return num_all_vowels return num_all_vowels
# def generate_full_vowel_matrix_inputs(name, split_number):
def generate_full_vowel_matrix_inputs(name, split_number):
def generate_X_and_y_RAM_efficient(name, split_number):
h5f = h5py.File(name + '.h5', 'w') h5f = h5py.File(name + '.h5', 'w')
dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict() dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
num_all_vowels = count_vowels(content, vowels) num_all_vowels = count_vowels(content, vowels)