Test ready bidirectional architectural inputs
This commit is contained in:
parent
9edad0ad07
commit
94ce159d44
|
@ -2,19 +2,10 @@
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="ChangeListManager">
|
<component name="ChangeListManager">
|
||||||
<list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment="">
|
<list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment="">
|
||||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/sloleks_accentuation.py" />
|
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/ensemble_test_errors.pkl" afterPath="$PROJECT_DIR$/cnn/word_accetuation/ensemble_test_errors.pkl" />
|
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/ensemble_test_predictions.pkl" afterPath="$PROJECT_DIR$/cnn/word_accetuation/ensemble_test_predictions.pkl" />
|
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/syllables_word_accetuation_test_error.pkl" afterPath="$PROJECT_DIR$/cnn/word_accetuation/syllables_word_accetuation_test_error.pkl" />
|
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/syllables_word_accetuation_test_predictions.pkl" afterPath="$PROJECT_DIR$/cnn/word_accetuation/syllables_word_accetuation_test_predictions.pkl" />
|
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn.ipynb" />
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn.ipynb" />
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/connected_text_accetuation.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/connected_text_accetuation.ipynb" />
|
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/results_presentation.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/results_presentation.ipynb" />
|
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v3_10/cnn.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v3_10/cnn.ipynb" />
|
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis.ipynb" />
|
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" />
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" />
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/sloleks_accetuation.ipynb" afterPath="$PROJECT_DIR$/sloleks_accetuation.ipynb" />
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/workbench.py" afterPath="$PROJECT_DIR$/workbench.py" />
|
||||||
</list>
|
</list>
|
||||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||||
<option name="TRACKING_ENABLED" value="true" />
|
<option name="TRACKING_ENABLED" value="true" />
|
||||||
|
@ -41,19 +32,20 @@
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
</file>
|
</file>
|
||||||
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true">
|
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="false">
|
||||||
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="-1034">
|
<state relative-caret-position="298">
|
||||||
<caret line="71" column="114" lean-forward="true" selection-start-line="68" selection-start-column="12" selection-end-line="71" selection-end-column="114" />
|
<caret line="444" column="40" lean-forward="false" selection-start-line="444" selection-start-column="21" selection-end-line="444" selection-end-column="40" />
|
||||||
<folding>
|
<folding>
|
||||||
<element signature="e#24#63#0" expanded="true" />
|
<element signature="e#24#63#0" expanded="true" />
|
||||||
<element signature="e#6485#7773#0" expanded="false" />
|
<element signature="e#6821#8109#0" expanded="false" />
|
||||||
<element signature="e#9429#9724#0" expanded="false" />
|
<element signature="e#9765#10060#0" expanded="false" />
|
||||||
<element signature="e#15725#16027#0" expanded="false" />
|
<element signature="e#13592#14199#0" expanded="false" />
|
||||||
<element signature="e#17000#17346#0" expanded="false" />
|
<element signature="e#16771#17073#0" expanded="false" />
|
||||||
<element signature="e#21415#22062#0" expanded="false" />
|
<element signature="e#18046#18392#0" expanded="false" />
|
||||||
<element signature="e#32751#32892#0" expanded="false" />
|
<element signature="e#22808#23455#0" expanded="false" />
|
||||||
|
<element signature="e#34768#34909#0" expanded="false" />
|
||||||
</folding>
|
</folding>
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
|
@ -91,11 +83,11 @@
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
</file>
|
</file>
|
||||||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="true">
|
||||||
<entry file="file://$PROJECT_DIR$/workbench.py">
|
<entry file="file://$PROJECT_DIR$/workbench.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="1710">
|
<state relative-caret-position="306">
|
||||||
<caret line="106" column="30" lean-forward="false" selection-start-line="106" selection-start-column="30" selection-end-line="106" selection-end-column="39" />
|
<caret line="17" column="0" lean-forward="true" selection-start-line="17" selection-start-column="0" selection-end-line="17" selection-end-column="0" />
|
||||||
<folding>
|
<folding>
|
||||||
<element signature="e#24#63#0" expanded="true" />
|
<element signature="e#24#63#0" expanded="true" />
|
||||||
</folding>
|
</folding>
|
||||||
|
@ -182,14 +174,6 @@
|
||||||
</component>
|
</component>
|
||||||
<component name="FindInProjectRecents">
|
<component name="FindInProjectRecents">
|
||||||
<findStrings>
|
<findStrings>
|
||||||
<find>round</find>
|
|
||||||
<find>is_vow</find>
|
|
||||||
<find>self._input_type == 'l'</find>
|
|
||||||
<find>print</find>
|
|
||||||
<find>np.eye</find>
|
|
||||||
<find>allow_shuffle_vector_generation</find>
|
|
||||||
<find>accented_vowels</find>
|
|
||||||
<find>generate</find>
|
|
||||||
<find>generate_x_and</find>
|
<find>generate_x_and</find>
|
||||||
<find>accentuate</find>
|
<find>accentuate</find>
|
||||||
<find>_generator</find>
|
<find>_generator</find>
|
||||||
|
@ -206,12 +190,20 @@
|
||||||
<find>convert_multext</find>
|
<find>convert_multext</find>
|
||||||
<find>_syllable_generator</find>
|
<find>_syllable_generator</find>
|
||||||
<find>generator</find>
|
<find>generator</find>
|
||||||
<find>generate_data</find>
|
|
||||||
<find>_x</find>
|
<find>_x</find>
|
||||||
<find>bidirectional_basic_input</find>
|
<find>bidirectional_basic_input</find>
|
||||||
<find>_bidirectional_basic_input</find>
|
<find>_bidirectional_basic_input</find>
|
||||||
<find>shuffeling</find>
|
<find>shuffeling</find>
|
||||||
|
<find>generate_data</find>
|
||||||
|
<find>_generate_inputs</find>
|
||||||
|
<find>content_shuffle_vector_path</find>
|
||||||
|
<find>content_shuffle_vector_location</find>
|
||||||
|
<find>_shuffle_all_inputs</find>
|
||||||
|
<find>_generator_instance</find>
|
||||||
|
<find>_x_letter_input</find>
|
||||||
<find>_generate_x_and_y</find>
|
<find>_generate_x_and_y</find>
|
||||||
|
<find>content</find>
|
||||||
|
<find>number_of_syllables</find>
|
||||||
</findStrings>
|
</findStrings>
|
||||||
</component>
|
</component>
|
||||||
<component name="Git.Settings">
|
<component name="Git.Settings">
|
||||||
|
@ -232,9 +224,9 @@
|
||||||
<option value="$PROJECT_DIR$/tex_hyphenation.py" />
|
<option value="$PROJECT_DIR$/tex_hyphenation.py" />
|
||||||
<option value="$PROJECT_DIR$/notes" />
|
<option value="$PROJECT_DIR$/notes" />
|
||||||
<option value="$PROJECT_DIR$/workbench.xrsl" />
|
<option value="$PROJECT_DIR$/workbench.xrsl" />
|
||||||
<option value="$PROJECT_DIR$/workbench.py" />
|
|
||||||
<option value="$PROJECT_DIR$/sloleks_accentuation.py" />
|
<option value="$PROJECT_DIR$/sloleks_accentuation.py" />
|
||||||
<option value="$PROJECT_DIR$/prepare_data.py" />
|
<option value="$PROJECT_DIR$/prepare_data.py" />
|
||||||
|
<option value="$PROJECT_DIR$/workbench.py" />
|
||||||
</list>
|
</list>
|
||||||
</option>
|
</option>
|
||||||
</component>
|
</component>
|
||||||
|
@ -925,16 +917,6 @@
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
<entry file="file://$PROJECT_DIR$/workbench.py">
|
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
|
||||||
<state relative-caret-position="1710">
|
|
||||||
<caret line="106" column="30" lean-forward="false" selection-start-line="106" selection-start-column="30" selection-end-line="106" selection-end-column="39" />
|
|
||||||
<folding>
|
|
||||||
<element signature="e#24#63#0" expanded="true" />
|
|
||||||
</folding>
|
|
||||||
</state>
|
|
||||||
</provider>
|
|
||||||
</entry>
|
|
||||||
<entry file="file://$PROJECT_DIR$/../adventofcode/2017/2/1.py" />
|
<entry file="file://$PROJECT_DIR$/../adventofcode/2017/2/1.py" />
|
||||||
<entry file="file://$PROJECT_DIR$/sloleks_accentuation.py">
|
<entry file="file://$PROJECT_DIR$/sloleks_accentuation.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
@ -948,16 +930,27 @@
|
||||||
</entry>
|
</entry>
|
||||||
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="-1034">
|
<state relative-caret-position="298">
|
||||||
<caret line="71" column="114" lean-forward="true" selection-start-line="68" selection-start-column="12" selection-end-line="71" selection-end-column="114" />
|
<caret line="444" column="40" lean-forward="false" selection-start-line="444" selection-start-column="21" selection-end-line="444" selection-end-column="40" />
|
||||||
|
<folding>
|
||||||
|
<element signature="e#24#63#0" expanded="true" />
|
||||||
|
<element signature="e#6821#8109#0" expanded="false" />
|
||||||
|
<element signature="e#9765#10060#0" expanded="false" />
|
||||||
|
<element signature="e#13592#14199#0" expanded="false" />
|
||||||
|
<element signature="e#16771#17073#0" expanded="false" />
|
||||||
|
<element signature="e#18046#18392#0" expanded="false" />
|
||||||
|
<element signature="e#22808#23455#0" expanded="false" />
|
||||||
|
<element signature="e#34768#34909#0" expanded="false" />
|
||||||
|
</folding>
|
||||||
|
</state>
|
||||||
|
</provider>
|
||||||
|
</entry>
|
||||||
|
<entry file="file://$PROJECT_DIR$/workbench.py">
|
||||||
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
<state relative-caret-position="306">
|
||||||
|
<caret line="17" column="0" lean-forward="true" selection-start-line="17" selection-start-column="0" selection-end-line="17" selection-end-column="0" />
|
||||||
<folding>
|
<folding>
|
||||||
<element signature="e#24#63#0" expanded="true" />
|
<element signature="e#24#63#0" expanded="true" />
|
||||||
<element signature="e#6485#7773#0" expanded="false" />
|
|
||||||
<element signature="e#9429#9724#0" expanded="false" />
|
|
||||||
<element signature="e#15725#16027#0" expanded="false" />
|
|
||||||
<element signature="e#17000#17346#0" expanded="false" />
|
|
||||||
<element signature="e#21415#22062#0" expanded="false" />
|
|
||||||
<element signature="e#32751#32892#0" expanded="false" />
|
|
||||||
</folding>
|
</folding>
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
|
|
|
@ -22,7 +22,7 @@ from keras.models import load_model
|
||||||
class Data:
|
class Data:
|
||||||
def __init__(self, input_type, allow_shuffle_vector_generation=False, save_generated_data=True, shuffle_all_inputs=True,
|
def __init__(self, input_type, allow_shuffle_vector_generation=False, save_generated_data=True, shuffle_all_inputs=True,
|
||||||
additional_letter_attributes=True, reverse_inputs=True, accent_classification=False, number_of_syllables=False,
|
additional_letter_attributes=True, reverse_inputs=True, accent_classification=False, number_of_syllables=False,
|
||||||
convert_multext=True, bidirectional_basic_input=False):
|
convert_multext=True, bidirectional_basic_input=False, bidirectional_architectural_input=False):
|
||||||
self._input_type = input_type
|
self._input_type = input_type
|
||||||
self._save_generated_data = save_generated_data
|
self._save_generated_data = save_generated_data
|
||||||
self._allow_shuffle_vector_generation = allow_shuffle_vector_generation
|
self._allow_shuffle_vector_generation = allow_shuffle_vector_generation
|
||||||
|
@ -33,14 +33,18 @@ class Data:
|
||||||
self._number_of_syllables = number_of_syllables
|
self._number_of_syllables = number_of_syllables
|
||||||
self._convert_multext = convert_multext
|
self._convert_multext = convert_multext
|
||||||
self._bidirectional_basic_input = bidirectional_basic_input
|
self._bidirectional_basic_input = bidirectional_basic_input
|
||||||
|
self._bidirectional_architectural_input = bidirectional_architectural_input
|
||||||
|
|
||||||
self.x_train = None
|
self.x_train = None
|
||||||
|
# self.x2_train = None
|
||||||
self.x_other_features_train = None
|
self.x_other_features_train = None
|
||||||
self.y_train = None
|
self.y_train = None
|
||||||
self.x_test = None
|
self.x_test = None
|
||||||
|
# self.x2_test = None
|
||||||
self.x_other_features_test = None
|
self.x_other_features_test = None
|
||||||
self.y_test = None
|
self.y_test = None
|
||||||
self.x_validate = None
|
self.x_validate = None
|
||||||
|
# self.x2_validate = None
|
||||||
self.x_other_features_validate = None
|
self.x_other_features_validate = None
|
||||||
self.y_validate = None
|
self.y_validate = None
|
||||||
|
|
||||||
|
@ -63,15 +67,11 @@ class Data:
|
||||||
shuffle_vector_path = '{}{}'.format(inputs_location, shuffle_vector)
|
shuffle_vector_path = '{}{}'.format(inputs_location, shuffle_vector)
|
||||||
|
|
||||||
# actual generation of inputs
|
# actual generation of inputs
|
||||||
self._generate_inputs(content_path, content_shuffle_vector_path, shuffle_vector_path, test_and_validation_size)
|
self._generate_inputs(content_path, content_shuffle_vector_path, shuffle_vector_path, test_and_validation_size, train_path, test_path,
|
||||||
|
validate_path)
|
||||||
|
|
||||||
# save inputs
|
def _generate_inputs(self, content_location, content_shuffle_vector_location, shuffle_vector_location, test_and_validation_size, train_path,
|
||||||
if self._save_generated_data:
|
test_path, validate_path):
|
||||||
self._save_inputs(train_path, self.x_train, self.x_other_features_train, self.y_train)
|
|
||||||
self._save_inputs(test_path, self.x_test, self.x_other_features_test, self.y_test)
|
|
||||||
self._save_inputs(validate_path, self.x_validate, self.x_other_features_validate, self.y_validate)
|
|
||||||
|
|
||||||
def _generate_inputs(self, content_location, content_shuffle_vector_location, shuffle_vector_location, test_and_validation_size):
|
|
||||||
print('READING CONTENT...')
|
print('READING CONTENT...')
|
||||||
content = self._read_content(content_location)
|
content = self._read_content(content_location)
|
||||||
print('CONTENT READ SUCCESSFULLY')
|
print('CONTENT READ SUCCESSFULLY')
|
||||||
|
@ -97,6 +97,13 @@ class Data:
|
||||||
accented_vowels, feature_dictionary,
|
accented_vowels, feature_dictionary,
|
||||||
shuffle_vector_location + '_validate.h5')
|
shuffle_vector_location + '_validate.h5')
|
||||||
print('GENERATION SUCCESSFUL!')
|
print('GENERATION SUCCESSFUL!')
|
||||||
|
|
||||||
|
# save inputs
|
||||||
|
if self._save_generated_data:
|
||||||
|
self._save_inputs(train_path, self.x_train, self.x_other_features_train, self.y_train)
|
||||||
|
self._save_inputs(test_path, self.x_test, self.x_other_features_test, self.y_test)
|
||||||
|
self._save_inputs(validate_path, self.x_validate, self.x_other_features_validate, self.y_validate)
|
||||||
|
|
||||||
# return X_train, X_other_features_train, y_train, X_test, X_other_features_test, y_test, X_validate, X_other_features_validate, y_validate
|
# return X_train, X_other_features_train, y_train, X_test, X_other_features_test, y_test, X_validate, X_other_features_validate, y_validate
|
||||||
|
|
||||||
# functions for creating X and y from content
|
# functions for creating X and y from content
|
||||||
|
@ -179,7 +186,7 @@ class Data:
|
||||||
h5f.close()
|
h5f.close()
|
||||||
return shuffle_vector
|
return shuffle_vector
|
||||||
|
|
||||||
def _x_letter_input(self, content, dictionary, max_word, vowels):
|
def _x_letter_input(self, content, dictionary, max_word, vowels, shuffle_vector_location):
|
||||||
if self._additional_letter_attributes:
|
if self._additional_letter_attributes:
|
||||||
if not self._bidirectional_basic_input:
|
if not self._bidirectional_basic_input:
|
||||||
x = np.zeros((len(content), max_word, len(dictionary) + 6), dtype=int)
|
x = np.zeros((len(content), max_word, len(dictionary) + 6), dtype=int)
|
||||||
|
@ -196,9 +203,18 @@ class Data:
|
||||||
else:
|
else:
|
||||||
x = np.zeros((len(content), 2 * max_word, len(dictionary)), dtype=int)
|
x = np.zeros((len(content), 2 * max_word, len(dictionary)), dtype=int)
|
||||||
|
|
||||||
i = 0
|
if self._shuffle_all_inputs:
|
||||||
for el in content:
|
s = self._load_shuffle_vector(shuffle_vector_location, len(content))
|
||||||
word = el[0]
|
else:
|
||||||
|
s = None
|
||||||
|
|
||||||
|
# i = 0
|
||||||
|
for i in range(len(content)):
|
||||||
|
if self._shuffle_all_inputs:
|
||||||
|
mod_i = s[i]
|
||||||
|
else:
|
||||||
|
mod_i = i
|
||||||
|
word = content[mod_i][0]
|
||||||
if self._reverse_inputs:
|
if self._reverse_inputs:
|
||||||
word = word[::-1]
|
word = word[::-1]
|
||||||
j = 0
|
j = 0
|
||||||
|
@ -242,7 +258,7 @@ class Data:
|
||||||
if self._bidirectional_basic_input:
|
if self._bidirectional_basic_input:
|
||||||
x[i][j2][len(dictionary) + 5] = 1
|
x[i][j2][len(dictionary) + 5] = 1
|
||||||
j += 1
|
j += 1
|
||||||
i += 1
|
#i += 1
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def _x_syllable_input(self, content, dictionary, max_num_vowels, vowels):
|
def _x_syllable_input(self, content, dictionary, max_num_vowels, vowels):
|
||||||
|
@ -266,11 +282,19 @@ class Data:
|
||||||
i += 1
|
i += 1
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def _y_output(self, content, max_num_vowels, vowels, accentuated_vowels):
|
def _y_output(self, content, max_num_vowels, vowels, accentuated_vowels, shuffle_vector_location):
|
||||||
y = np.zeros((len(content), max_num_vowels))
|
y = np.zeros((len(content), max_num_vowels))
|
||||||
i = 0
|
i = 0
|
||||||
|
if self._shuffle_all_inputs:
|
||||||
for el in content:
|
s = self._load_shuffle_vector(shuffle_vector_location, len(content))
|
||||||
|
else:
|
||||||
|
s = None
|
||||||
|
for i in range(len(content)):
|
||||||
|
if self._shuffle_all_inputs:
|
||||||
|
mod_i = s[i]
|
||||||
|
else:
|
||||||
|
mod_i = i
|
||||||
|
el = content[mod_i]
|
||||||
word = el[3]
|
word = el[3]
|
||||||
if self._reverse_inputs:
|
if self._reverse_inputs:
|
||||||
word = word[::-1]
|
word = word[::-1]
|
||||||
|
@ -292,27 +316,26 @@ class Data:
|
||||||
if self._is_vowel(word, j, vowels):
|
if self._is_vowel(word, j, vowels):
|
||||||
num_vowels += 1
|
num_vowels += 1
|
||||||
j += 1
|
j += 1
|
||||||
i += 1
|
|
||||||
return y
|
return y
|
||||||
|
|
||||||
# Generate each y as an array of 11 numbers (with possible values between 0 and 1)
|
# Generate each y as an array of 11 numbers (with possible values between 0 and 1)
|
||||||
def _generate_x_and_y(self, dictionary, max_word, max_num_vowels, content, vowels, accentuated_vowels, feature_dictionary,
|
def _generate_x_and_y(self, dictionary, max_word, max_num_vowels, content, vowels, accentuated_vowels, feature_dictionary,
|
||||||
shuffle_vector_location):
|
shuffle_vector_location):
|
||||||
if self._input_type == 'l':
|
if self._input_type == 'l':
|
||||||
x = self._x_letter_input(content, dictionary, max_word, vowels)
|
x = self._x_letter_input(content, dictionary, max_word, vowels, shuffle_vector_location)
|
||||||
elif self._input_type == 's' or self._input_type == 'sl':
|
elif self._input_type == 's' or self._input_type == 'sl':
|
||||||
x = self._x_syllable_input(content, dictionary, max_num_vowels, vowels)
|
x = self._x_syllable_input(content, dictionary, max_num_vowels, vowels)
|
||||||
else:
|
else:
|
||||||
raise ValueError('No input_type provided. It could be \'l\', \'s\' or \'sl\'.')
|
raise ValueError('No input_type provided. It could be \'l\', \'s\' or \'sl\'.')
|
||||||
y = self._y_output(content, max_num_vowels, vowels, accentuated_vowels)
|
y = self._y_output(content, max_num_vowels, vowels, accentuated_vowels, shuffle_vector_location)
|
||||||
|
|
||||||
# print('CREATING OTHER FEATURES...')
|
# print('CREATING OTHER FEATURES...')
|
||||||
x_other_features = self._create_x_features(content, feature_dictionary, vowels)
|
x_other_features = self._create_x_features(content, feature_dictionary, vowels, shuffle_vector_location)
|
||||||
# print('OTHER FEATURES CREATED!')
|
# print('OTHER FEATURES CREATED!')
|
||||||
|
|
||||||
if self._shuffle_all_inputs:
|
if self._shuffle_all_inputs:
|
||||||
print('SHUFFELING INPUTS...')
|
print('SHUFFELING INPUTS...')
|
||||||
x, x_other_features, y = self._shuffle_inputs(x, x_other_features, y, shuffle_vector_location)
|
#x, x_other_features, y = self._shuffle_inputs(x, x_other_features, y, shuffle_vector_location)
|
||||||
print('INPUTS SHUFFELED!')
|
print('INPUTS SHUFFELED!')
|
||||||
return x, x_other_features, y
|
return x, x_other_features, y
|
||||||
|
|
||||||
|
@ -390,10 +413,19 @@ class Data:
|
||||||
split = min(split_options, key=lambda x: x[1])
|
split = min(split_options, key=lambda x: x[1])
|
||||||
return consonants[:split[0] + 1], consonants[split[0] + 1:]
|
return consonants[:split[0] + 1], consonants[split[0] + 1:]
|
||||||
|
|
||||||
def _create_x_features(self, content, feature_dictionary, vowels):
|
def _create_x_features(self, content, feature_dictionary, vowels, shuffle_vector_location):
|
||||||
content = content
|
content = content
|
||||||
x_other_features = []
|
x_other_features = []
|
||||||
for el in content:
|
if self._shuffle_all_inputs:
|
||||||
|
s = self._load_shuffle_vector(shuffle_vector_location, len(content))
|
||||||
|
else:
|
||||||
|
s = None
|
||||||
|
for index in range(len(content)):
|
||||||
|
if self._shuffle_all_inputs:
|
||||||
|
mod_i = s[index]
|
||||||
|
else:
|
||||||
|
mod_i = index
|
||||||
|
el = content[mod_i]
|
||||||
x_el_other_features = []
|
x_el_other_features = []
|
||||||
if self._convert_multext:
|
if self._convert_multext:
|
||||||
converted_el = ''.join(self._convert_to_multext_east_v4(list(el[2]), feature_dictionary))
|
converted_el = ''.join(self._convert_to_multext_east_v4(list(el[2]), feature_dictionary))
|
||||||
|
@ -587,9 +619,17 @@ class Data:
|
||||||
else:
|
else:
|
||||||
while loc < size:
|
while loc < size:
|
||||||
if loc + batch_size >= size:
|
if loc + batch_size >= size:
|
||||||
yield ([orig_x[loc:size], orig_x_additional[loc:size]], orig_y[loc:size])
|
if self._bidirectional_architectural_input:
|
||||||
|
split_orig_x = np.hsplit(orig_x[loc:size], 2)
|
||||||
|
yield ([split_orig_x[0], split_orig_x[1], orig_x_additional[loc:size]], orig_y[loc:size])
|
||||||
|
else:
|
||||||
|
yield ([orig_x[loc:size], orig_x_additional[loc:size]], orig_y[loc:size])
|
||||||
else:
|
else:
|
||||||
yield ([orig_x[loc:loc + batch_size], orig_x_additional[loc:loc + batch_size]], orig_y[loc:loc + batch_size])
|
if self._bidirectional_architectural_input:
|
||||||
|
split_orig_x = np.hsplit(orig_x[loc:loc + batch_size], 2)
|
||||||
|
yield ([split_orig_x[0], split_orig_x[1], orig_x_additional[loc:loc + batch_size]], orig_y[loc:loc + batch_size])
|
||||||
|
else:
|
||||||
|
yield ([orig_x[loc:loc + batch_size], orig_x_additional[loc:loc + batch_size]], orig_y[loc:loc + batch_size])
|
||||||
loc += batch_size
|
loc += batch_size
|
||||||
|
|
||||||
# generator for inputs for tracking of data fitting
|
# generator for inputs for tracking of data fitting
|
||||||
|
|
59
workbench.py
59
workbench.py
|
@ -27,29 +27,16 @@ from prepare_data import *
|
||||||
# save_inputs('../../internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', X_validate, y_validate, other_features = X_other_features_validate)
|
# save_inputs('../../internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', X_validate, y_validate, other_features = X_other_features_validate)
|
||||||
# X_train, X_other_features_train, y_train = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_train_inputs_other_features_output_11.h5', other_features=True)
|
# X_train, X_other_features_train, y_train = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_train_inputs_other_features_output_11.h5', other_features=True)
|
||||||
# X_validate, X_other_features_validate, y_validate = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', other_features=True)
|
# X_validate, X_other_features_validate, y_validate = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', other_features=True)
|
||||||
# letters
|
data = Data('l', bidirectional_basic_input=True, bidirectional_architectural_input=True)
|
||||||
# data = Data('l', save_generated_data=False, number_of_syllables=True)
|
data.generate_data('letters_word_accetuation_bidirectional_train',
|
||||||
|
'letters_word_accetuation_bidirectional_test',
|
||||||
# syllabled letters
|
'letters_word_accetuation_bidirectional_validate', content_name='SlovarIJS_BESEDE_utf8.lex',
|
||||||
data = Data('s', save_generated_data=False, accent_classification=True)
|
|
||||||
data.generate_data('letters_word_accetuation_train',
|
|
||||||
'letters_word_accetuation_test',
|
|
||||||
'letters_word_accetuation_validate', content_name='SlovarIJS_BESEDE_utf8.lex',
|
|
||||||
content_shuffle_vector='content_shuffle_vector', shuffle_vector='shuffle_vector',
|
content_shuffle_vector='content_shuffle_vector', shuffle_vector='shuffle_vector',
|
||||||
inputs_location='', content_location='')
|
inputs_location='', content_location='')
|
||||||
|
|
||||||
# concatenate test and train data
|
|
||||||
# data.x_train = np.concatenate((data.x_train, data.x_test), axis=0)
|
|
||||||
# data.x_other_features_train = np.concatenate((data.x_other_features_train, data.x_other_features_test), axis=0)
|
|
||||||
# data.y_train = np.concatenate((data.y_train, data.y_test), axis=0)
|
|
||||||
|
|
||||||
# concatenate all data
|
|
||||||
data.x_train = np.concatenate((data.x_train, data.x_test, data.x_validate), axis=0)
|
|
||||||
data.x_other_features_train = np.concatenate((data.x_other_features_train, data.x_other_features_test, data.x_other_features_validate), axis=0)
|
|
||||||
data.y_train = np.concatenate((data.y_train, data.y_test, data.y_validate), axis=0)
|
|
||||||
|
|
||||||
num_examples = len(data.x_train) # training set size
|
num_examples = len(data.x_train) # training set size
|
||||||
nn_output_dim = 13
|
nn_output_dim = 10
|
||||||
nn_hdim = 516
|
nn_hdim = 516
|
||||||
batch_size = 16
|
batch_size = 16
|
||||||
# actual_epoch = 1
|
# actual_epoch = 1
|
||||||
|
@ -57,32 +44,28 @@ actual_epoch = 20
|
||||||
# num_fake_epoch = 2
|
# num_fake_epoch = 2
|
||||||
num_fake_epoch = 20
|
num_fake_epoch = 20
|
||||||
|
|
||||||
# letters
|
|
||||||
# conv_input_shape=(23, 36)
|
|
||||||
|
|
||||||
# syllabled letters
|
|
||||||
# conv_input_shape=(10, 252)
|
|
||||||
|
|
||||||
# syllables
|
|
||||||
conv_input_shape=(10, 5168)
|
|
||||||
|
|
||||||
|
|
||||||
# othr_input = (140, )
|
|
||||||
othr_input = (150, )
|
conv_input_shape=(23, 36)
|
||||||
|
othr_input = (140, )
|
||||||
|
|
||||||
conv_input = Input(shape=conv_input_shape, name='conv_input')
|
conv_input = Input(shape=conv_input_shape, name='conv_input')
|
||||||
# letters
|
x_conv = Conv1D(115, (3), padding='same', activation='relu')(conv_input)
|
||||||
# x_conv = Conv1D(115, (3), padding='same', activation='relu')(conv_input)
|
x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv)
|
||||||
# x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv)
|
|
||||||
|
|
||||||
# syllabled letters
|
|
||||||
x_conv = Conv1D(200, (2), padding='same', activation='relu')(conv_input)
|
|
||||||
x_conv = MaxPooling1D(pool_size=2)(x_conv)
|
x_conv = MaxPooling1D(pool_size=2)(x_conv)
|
||||||
x_conv = Flatten()(x_conv)
|
x_conv = Flatten()(x_conv)
|
||||||
|
|
||||||
|
conv_input2 = Input(shape=conv_input_shape, name='conv_input2')
|
||||||
|
x_conv2 = Conv1D(115, (3), padding='same', activation='relu')(conv_input2)
|
||||||
|
x_conv2 = Conv1D(46, (3), padding='same', activation='relu')(x_conv2)
|
||||||
|
x_conv2 = MaxPooling1D(pool_size=2)(x_conv2)
|
||||||
|
x_conv2 = Flatten()(x_conv2)
|
||||||
|
# x_conv = Dense(516, activation='relu', kernel_constraint=maxnorm(3))(x_conv)
|
||||||
|
|
||||||
othr_input = Input(shape=othr_input, name='othr_input')
|
othr_input = Input(shape=othr_input, name='othr_input')
|
||||||
|
|
||||||
x = concatenate([x_conv, othr_input])
|
x = concatenate([x_conv, x_conv2, othr_input])
|
||||||
# x = Dense(1024, input_dim=(516 + 256), activation='relu')(x)
|
# x = Dense(1024, input_dim=(516 + 256), activation='relu')(x)
|
||||||
x = Dense(256, activation='relu')(x)
|
x = Dense(256, activation='relu')(x)
|
||||||
x = Dropout(0.3)(x)
|
x = Dropout(0.3)(x)
|
||||||
|
@ -95,7 +78,7 @@ x = Dense(nn_output_dim, activation='sigmoid')(x)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
model = Model(inputs=[conv_input, othr_input], outputs=x)
|
model = Model(inputs=[conv_input, conv_input2, othr_input], outputs=x)
|
||||||
opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
|
opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
|
||||||
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy,])
|
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy,])
|
||||||
# model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
|
# model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
|
||||||
|
@ -104,10 +87,12 @@ model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accurac
|
||||||
history = model.fit_generator(data.generator('train', batch_size, content_name='SlovarIJS_BESEDE_utf8.lex', content_location=''),
|
history = model.fit_generator(data.generator('train', batch_size, content_name='SlovarIJS_BESEDE_utf8.lex', content_location=''),
|
||||||
data.x_train.shape[0]/(batch_size * num_fake_epoch),
|
data.x_train.shape[0]/(batch_size * num_fake_epoch),
|
||||||
epochs=actual_epoch*num_fake_epoch,
|
epochs=actual_epoch*num_fake_epoch,
|
||||||
|
validation_data=data.generator('test', batch_size, content_name='SlovarIJS_BESEDE_utf8.lex', content_location=''),
|
||||||
|
validation_steps=data.x_test.shape[0]/(batch_size * num_fake_epoch),
|
||||||
verbose=2
|
verbose=2
|
||||||
)
|
)
|
||||||
|
|
||||||
name = '40_epoch'
|
name = '20_epoch'
|
||||||
model.save(name + '.h5')
|
model.save(name + '.h5')
|
||||||
output = open(name + '_history.pkl', 'wb')
|
output = open(name + '_history.pkl', 'wb')
|
||||||
pickle.dump(history.history, output)
|
pickle.dump(history.history, output)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user