diff --git a/.idea/workspace.xml b/.idea/workspace.xml index ec9b006..7335d64 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,11 +2,14 @@ + + + + - - - + + @@ -38,18 +41,16 @@ - - + + - - @@ -72,8 +73,8 @@ - - + + @@ -81,6 +82,16 @@ + + + + + + + + + + @@ -150,9 +161,6 @@ - orig_X - vowels - _create_syllable_letters_translator _get_unresonant_silent_consonants el[0] max_num_vowels @@ -170,7 +178,6 @@ predict _reverse_inputs _letter_generator - _accent_classification _create_feature_dictionary generate_data Data @@ -180,6 +187,10 @@ ../../../data/ self.x_other_features_train _create_x_features + force + test_and_validation_size + _accent_classification + self.y_train @@ -199,13 +210,14 @@ @@ -224,7 +236,6 @@ - @@ -239,6 +250,7 @@ + @@ -502,7 +514,7 @@ - + @@ -538,16 +550,6 @@ - - - - - - - - - - @@ -886,20 +888,26 @@ + + + + + + + + - - + + - - @@ -910,8 +918,8 @@ - - + + diff --git a/notes b/notes new file mode 100644 index 0000000..9d39815 --- /dev/null +++ b/notes @@ -0,0 +1 @@ +256(0.3)-512(0.3)-512(0.3):115(3)-46(3) - [LETTERS ACCENT TYPE] One layer less diff --git a/workbench.py b/workbench.py index ba03c2b..b64a6f9 100644 --- a/workbench.py +++ b/workbench.py @@ -27,7 +27,11 @@ from prepare_data import * # save_inputs('../../internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', X_validate, y_validate, other_features = X_other_features_validate) # X_train, X_other_features_train, y_train = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_train_inputs_other_features_output_11.h5', other_features=True) # X_validate, X_other_features_validate, y_validate = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', other_features=True) -data = Data('l', save_generated_data=False, number_of_syllables=True) +# letters +# data = Data('l', save_generated_data=False, number_of_syllables=True) + +# syllabled letters +data = Data('l', save_generated_data=False, accent_classification=True) data.generate_data('letters_word_accetuation_train', 'letters_word_accetuation_test', 'letters_word_accetuation_validate', content_name='SlovarIJS_BESEDE_utf8.lex', @@ -36,7 +40,7 @@ data.generate_data('letters_word_accetuation_train', num_examples = len(data.x_train) # training set size -nn_output_dim = 10 +nn_output_dim = 13 nn_hdim = 516 batch_size = 16 # actual_epoch = 1 @@ -46,13 +50,23 @@ num_fake_epoch = 20 - +# letters conv_input_shape=(23, 36) -othr_input = (141, ) + +# syllabled letters +# conv_input_shape=(10, 5168) + + +# othr_input = (140, ) +othr_input = (150, ) conv_input = Input(shape=conv_input_shape, name='conv_input') +# letters x_conv = Conv1D(115, (3), padding='same', activation='relu')(conv_input) x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv) + +# syllabled letters +# x_conv = Conv1D(200, (2), padding='same', activation='relu')(conv_input) x_conv = MaxPooling1D(pool_size=2)(x_conv) x_conv = Flatten()(x_conv) @@ -62,10 +76,10 @@ x = concatenate([x_conv, othr_input]) # x = Dense(1024, input_dim=(516 + 256), activation='relu')(x) x = Dense(256, activation='relu')(x) x = Dropout(0.3)(x) -x = Dense(256, activation='relu')(x) +x = Dense(512, activation='relu')(x) +x = Dropout(0.3)(x) +x = Dense(512, activation='relu')(x) x = Dropout(0.3)(x) -x = Dense(256, activation='relu')(x) -x = Dropout(0.2)(x) x = Dense(nn_output_dim, activation='sigmoid')(x) diff --git a/workbench.xrsl b/workbench.xrsl index c06e985..55ed2ee 100644 --- a/workbench.xrsl +++ b/workbench.xrsl @@ -2,13 +2,13 @@ (jobName="accetuation_nn") (executable="workbench.sh") (inputfiles= - ("workbench.py" "")("prepare_data.py" "")("content_shuffle_vector.h5" "cnn/internal_representations/inputs/content_shuffle_vector.h5")("shuffle_vector_test.h5" "cnn/internal_representations/inputs/shuffle_vector_test.h5")("shuffle_vector_train.h5" "cnn/internal_representations/inputs/shuffle_vector_train.h5")("shuffle_vector_validate.h5" "cnn/internal_representations/inputs/shuffle_vector_validate.h5")("SlovarIJS_BESEDE_utf8.lex" "data/SlovarIJS_BESEDE_utf8.lex")("40_epoch.h5" "")("40_epoch_history.pkl" "") + ("workbench.py" "")("prepare_data.py" "")("content_shuffle_vector.h5" "cnn/internal_representations/inputs/content_shuffle_vector.h5")("shuffle_vector_test.h5" "cnn/internal_representations/inputs/shuffle_vector_test.h5")("shuffle_vector_train.h5" "cnn/internal_representations/inputs/shuffle_vector_train.h5")("shuffle_vector_validate.h5" "cnn/internal_representations/inputs/shuffle_vector_validate.h5")("SlovarIJS_BESEDE_utf8.lex" "data/SlovarIJS_BESEDE_utf8.lex")("40_epoch.h5" "")("40_epoch_history.pkl" "")("notes" "") ) -(outputfiles=("40_epoch.h5" "")("workbench.py" "")("workbench.sh" "")("40_epoch_history.pkl" "") +(outputfiles=("40_epoch.h5" "")("workbench.py" "")("workbench.sh" "")("40_epoch_history.pkl" "")("notes" "") ) (stdout="out.txt") (stderr="err.txt") (gmlog="gmlog") (runtimeenvironment="APPS/BASE/THEANO-GPU-0.9") -(gridTime=2000) +(gridTime=750) (memory=12000)