Added some modularity and X_other_features generation
This commit is contained in:
parent
3c9edd5d1c
commit
719cec6d29
|
@ -3,7 +3,6 @@
|
||||||
<component name="ChangeListManager">
|
<component name="ChangeListManager">
|
||||||
<list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment="">
|
<list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment="">
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/character_based_ffnn/character_based_ffnn_keras.ipynb" afterPath="$PROJECT_DIR$/character_based_ffnn/character_based_ffnn_keras.ipynb" />
|
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/character_based_ffnn/word_accetuation/character_based_ffnn_keras.ipynb" afterPath="$PROJECT_DIR$/character_based_ffnn/word_accetuation/character_based_ffnn_keras.ipynb" />
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/character_based_ffnn/word_accetuation/character_based_ffnn_keras.ipynb" afterPath="$PROJECT_DIR$/character_based_ffnn/word_accetuation/character_based_ffnn_keras.ipynb" />
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" />
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" />
|
||||||
</list>
|
</list>
|
||||||
|
@ -35,8 +34,8 @@
|
||||||
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true">
|
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true">
|
||||||
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="266">
|
<state relative-caret-position="606">
|
||||||
<caret line="562" column="17" lean-forward="true" selection-start-line="562" selection-start-column="17" selection-end-line="562" selection-end-column="17" />
|
<caret line="606" column="50" lean-forward="true" selection-start-line="606" selection-start-column="4" selection-end-line="606" selection-end-column="50" />
|
||||||
<folding>
|
<folding>
|
||||||
<element signature="e#24#63#0" expanded="true" />
|
<element signature="e#24#63#0" expanded="true" />
|
||||||
</folding>
|
</folding>
|
||||||
|
@ -47,8 +46,8 @@
|
||||||
<file leaf-file-name="logistic_regression.py" pinned="false" current-in-tab="false">
|
<file leaf-file-name="logistic_regression.py" pinned="false" current-in-tab="false">
|
||||||
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
|
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="774">
|
<state relative-caret-position="162">
|
||||||
<caret line="45" column="35" lean-forward="false" selection-start-line="45" selection-start-column="33" selection-end-line="45" selection-end-column="35" />
|
<caret line="11" column="0" lean-forward="false" selection-start-line="11" selection-start-column="0" selection-end-line="11" selection-end-column="0" />
|
||||||
<folding>
|
<folding>
|
||||||
<element signature="e#0#12#0" expanded="true" />
|
<element signature="e#0#12#0" expanded="true" />
|
||||||
</folding>
|
</folding>
|
||||||
|
@ -134,6 +133,7 @@
|
||||||
<find>create_and_save_shuffle_vector</find>
|
<find>create_and_save_shuffle_vector</find>
|
||||||
<find>h5f</find>
|
<find>h5f</find>
|
||||||
<find>generate_inputs</find>
|
<find>generate_inputs</find>
|
||||||
|
<find>split_number</find>
|
||||||
</findStrings>
|
</findStrings>
|
||||||
</component>
|
</component>
|
||||||
<component name="Git.Settings">
|
<component name="Git.Settings">
|
||||||
|
@ -153,10 +153,10 @@
|
||||||
</option>
|
</option>
|
||||||
</component>
|
</component>
|
||||||
<component name="ProjectFrameBounds">
|
<component name="ProjectFrameBounds">
|
||||||
<option name="x" value="65" />
|
<option name="x" value="1985" />
|
||||||
<option name="y" value="24" />
|
<option name="y" value="24" />
|
||||||
<option name="width" value="1855" />
|
<option name="width" value="1855" />
|
||||||
<option name="height" value="1056" />
|
<option name="height" value="1176" />
|
||||||
</component>
|
</component>
|
||||||
<component name="ProjectView">
|
<component name="ProjectView">
|
||||||
<navigator currentView="ProjectPane" proportions="" version="1">
|
<navigator currentView="ProjectPane" proportions="" version="1">
|
||||||
|
@ -173,8 +173,8 @@
|
||||||
<foldersAlwaysOnTop value="true" />
|
<foldersAlwaysOnTop value="true" />
|
||||||
</navigator>
|
</navigator>
|
||||||
<panes>
|
<panes>
|
||||||
<pane id="Scratches" />
|
|
||||||
<pane id="Scope" />
|
<pane id="Scope" />
|
||||||
|
<pane id="Scratches" />
|
||||||
<pane id="ProjectPane">
|
<pane id="ProjectPane">
|
||||||
<subPane>
|
<subPane>
|
||||||
<PATH>
|
<PATH>
|
||||||
|
@ -482,7 +482,7 @@
|
||||||
<servers />
|
<servers />
|
||||||
</component>
|
</component>
|
||||||
<component name="ToolWindowManager">
|
<component name="ToolWindowManager">
|
||||||
<frame x="65" y="24" width="1855" height="1056" extended-state="6" />
|
<frame x="1985" y="24" width="1855" height="1176" extended-state="6" />
|
||||||
<editor active="true" />
|
<editor active="true" />
|
||||||
<layout>
|
<layout>
|
||||||
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.16375546" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
|
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.16375546" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
|
||||||
|
@ -951,26 +951,6 @@
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
|
||||||
<state relative-caret-position="266">
|
|
||||||
<caret line="562" column="17" lean-forward="true" selection-start-line="562" selection-start-column="17" selection-end-line="562" selection-end-column="17" />
|
|
||||||
<folding>
|
|
||||||
<element signature="e#24#63#0" expanded="true" />
|
|
||||||
</folding>
|
|
||||||
</state>
|
|
||||||
</provider>
|
|
||||||
</entry>
|
|
||||||
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
|
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
|
||||||
<state relative-caret-position="774">
|
|
||||||
<caret line="45" column="35" lean-forward="false" selection-start-line="45" selection-start-column="33" selection-end-line="45" selection-end-column="35" />
|
|
||||||
<folding>
|
|
||||||
<element signature="e#0#12#0" expanded="true" />
|
|
||||||
</folding>
|
|
||||||
</state>
|
|
||||||
</provider>
|
|
||||||
</entry>
|
|
||||||
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression_loop.py">
|
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression_loop.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="828">
|
<state relative-caret-position="828">
|
||||||
|
@ -1017,5 +997,25 @@
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
|
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
|
||||||
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
<state relative-caret-position="162">
|
||||||
|
<caret line="11" column="0" lean-forward="false" selection-start-line="11" selection-start-column="0" selection-end-line="11" selection-end-column="0" />
|
||||||
|
<folding>
|
||||||
|
<element signature="e#0#12#0" expanded="true" />
|
||||||
|
</folding>
|
||||||
|
</state>
|
||||||
|
</provider>
|
||||||
|
</entry>
|
||||||
|
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
||||||
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
<state relative-caret-position="606">
|
||||||
|
<caret line="606" column="50" lean-forward="true" selection-start-line="606" selection-start-column="4" selection-end-line="606" selection-end-column="50" />
|
||||||
|
<folding>
|
||||||
|
<element signature="e#24#63#0" expanded="true" />
|
||||||
|
</folding>
|
||||||
|
</state>
|
||||||
|
</provider>
|
||||||
|
</entry>
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
|
@ -152,13 +152,16 @@ def generate_presentable_y(accetuations_list, word_list, max_num_vowels):
|
||||||
final_position = accetuations_list[0] + max_num_vowels * accetuations_list[1]
|
final_position = accetuations_list[0] + max_num_vowels * accetuations_list[1]
|
||||||
return final_position
|
return final_position
|
||||||
|
|
||||||
def shuffle_inputs(X, y, X_pure):
|
def shuffle_inputs(X, y, X_pure=False):
|
||||||
s = np.arange(X.shape[0])
|
s = np.arange(X.shape[0])
|
||||||
np.random.shuffle(s)
|
np.random.shuffle(s)
|
||||||
X = X[s]
|
X = X[s]
|
||||||
y = y[s]
|
y = y[s]
|
||||||
|
if X_pure:
|
||||||
X_pure = X_pure[s]
|
X_pure = X_pure[s]
|
||||||
return X, y, X_pure
|
return X, y, X_pure
|
||||||
|
else:
|
||||||
|
return X, y
|
||||||
|
|
||||||
# def generate_inputs():
|
# def generate_inputs():
|
||||||
# dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
|
# dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
|
||||||
|
@ -250,12 +253,21 @@ def shuffle_inputs(X, y, X_pure):
|
||||||
|
|
||||||
def generate_full_matrix_inputs():
|
def generate_full_matrix_inputs():
|
||||||
dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
|
dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
|
||||||
|
train_content, validate_content = split_content(content, 0.2)
|
||||||
|
|
||||||
|
# Generate X and y
|
||||||
print('GENERATING X AND y...')
|
print('GENERATING X AND y...')
|
||||||
|
X_train, y_train = generate_X_and_y(dictionary, max_word, max_num_vowels, train_content, vowels, accetuated_vowels)
|
||||||
|
X_validate, y_validate = generate_X_and_y(dictionary, max_word, max_num_vowels, validate_content, vowels, accetuated_vowels)
|
||||||
|
print('GENERATION SUCCESSFUL!')
|
||||||
|
return X_train, y_train, X_validate, y_validate
|
||||||
|
|
||||||
|
def generate_X_and_y(dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels):
|
||||||
|
|
||||||
# X = np.zeros((len(content), max_word*len(dictionary)))
|
# X = np.zeros((len(content), max_word*len(dictionary)))
|
||||||
y = np.zeros((len(content), max_num_vowels * max_num_vowels ))
|
y = np.zeros((len(content), max_num_vowels * max_num_vowels ))
|
||||||
X = np.zeros((len(content), max_word, len(dictionary)))
|
X = np.zeros((len(content), max_word, len(dictionary)))
|
||||||
|
X_aditional_data = []
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
for el in content:
|
for el in content:
|
||||||
|
@ -289,7 +301,7 @@ def generate_full_matrix_inputs():
|
||||||
y[i][generate_presentable_y(word_accetuations, list(el[3]), max_num_vowels)] = 1
|
y[i][generate_presentable_y(word_accetuations, list(el[3]), max_num_vowels)] = 1
|
||||||
i += 1
|
i += 1
|
||||||
# X = np.array(X)
|
# X = np.array(X)
|
||||||
print('GENERATION SUCCESSFUL!')
|
|
||||||
print('SHUFFELING INPUTS...')
|
print('SHUFFELING INPUTS...')
|
||||||
X, y = shuffle_inputs(X, y)
|
X, y = shuffle_inputs(X, y)
|
||||||
print('INPUTS SHUFFELED!')
|
print('INPUTS SHUFFELED!')
|
||||||
|
@ -303,8 +315,11 @@ def count_vowels(content, vowels):
|
||||||
num_all_vowels += 1
|
num_all_vowels += 1
|
||||||
return num_all_vowels
|
return num_all_vowels
|
||||||
|
|
||||||
|
# def generate_full_vowel_matrix_inputs(name, split_number):
|
||||||
|
|
||||||
def generate_full_vowel_matrix_inputs(name, split_number):
|
|
||||||
|
|
||||||
|
def generate_X_and_y_RAM_efficient(name, split_number):
|
||||||
h5f = h5py.File(name + '.h5', 'w')
|
h5f = h5py.File(name + '.h5', 'w')
|
||||||
dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
|
dictionary, max_word, max_num_vowels, content, vowels, accetuated_vowels = create_dict()
|
||||||
num_all_vowels = count_vowels(content, vowels)
|
num_all_vowels = count_vowels(content, vowels)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user