Added num of letters to x_other_features
This commit is contained in:
		
							parent
							
								
									18348b78fc
								
							
						
					
					
						commit
						a2fce7c1ae
					
				
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -91,3 +91,5 @@ ENV/ | ||||
| # Custom | ||||
| data/ | ||||
| cnn/internal_representations/inputs/ | ||||
| joblist.xml | ||||
| new_sloleks.xml | ||||
|  | ||||
							
								
								
									
										148
									
								
								.idea/workspace.xml
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										148
									
								
								.idea/workspace.xml
									
									
									
										generated
									
									
									
								
							| @ -3,9 +3,11 @@ | ||||
|   <component name="ChangeListManager"> | ||||
|     <list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment=""> | ||||
|       <change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" /> | ||||
|       <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/letters/cnn.ipynb" afterPath="$PROJECT_DIR$/cnn/accent_classification/letters/cnn.ipynb" /> | ||||
|       <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn.ipynb" /> | ||||
|       <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/syllables/cnn.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/syllables/cnn.ipynb" /> | ||||
|       <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/results_presentation.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/results_presentation.ipynb" /> | ||||
|       <change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" /> | ||||
|       <change type="MODIFICATION" beforePath="$PROJECT_DIR$/workbench.py" afterPath="$PROJECT_DIR$/workbench.py" /> | ||||
|     </list> | ||||
|     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" /> | ||||
|     <option name="TRACKING_ENABLED" value="true" /> | ||||
| @ -32,28 +34,25 @@ | ||||
|           </provider> | ||||
|         </entry> | ||||
|       </file> | ||||
|       <file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true"> | ||||
|       <file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="false"> | ||||
|         <entry file="file://$PROJECT_DIR$/prepare_data.py"> | ||||
|           <provider selected="true" editor-type-id="text-editor"> | ||||
|             <state relative-caret-position="244"> | ||||
|               <caret line="252" column="38" lean-forward="true" selection-start-line="252" selection-start-column="38" selection-end-line="252" selection-end-column="38" /> | ||||
|             <state relative-caret-position="198"> | ||||
|               <caret line="14" column="121" lean-forward="false" selection-start-line="14" selection-start-column="102" selection-end-line="14" selection-end-column="121" /> | ||||
|               <folding> | ||||
|                 <element signature="e#24#63#0" expanded="true" /> | ||||
|                 <element signature="e#5658#5771#0" expanded="false" /> | ||||
|                 <element signature="e#5818#7106#0" expanded="false" /> | ||||
|                 <element signature="e#7267#8674#0" expanded="false" /> | ||||
|                 <element signature="e#8762#9057#0" expanded="false" /> | ||||
|                 <element signature="e#13496#13798#0" expanded="false" /> | ||||
|                 <element signature="e#13855#14684#0" expanded="false" /> | ||||
|                 <element signature="e#14748#15094#0" expanded="false" /> | ||||
|                 <element signature="e#16969#17882#0" expanded="false" /> | ||||
|                 <element signature="e#18312#18508#0" expanded="false" /> | ||||
|                 <element signature="e#18569#18760#0" expanded="false" /> | ||||
|                 <element signature="e#18827#19474#0" expanded="false" /> | ||||
|                 <element signature="e#19573#21871#0" expanded="false" /> | ||||
|                 <element signature="e#22137#22836#0" expanded="false" /> | ||||
|                 <element signature="e#29631#29772#0" expanded="false" /> | ||||
|                 <element signature="e#29922#32067#0" expanded="false" /> | ||||
|                 <element signature="e#5979#7267#0" expanded="false" /> | ||||
|                 <element signature="e#7428#8835#0" expanded="false" /> | ||||
|                 <element signature="e#8923#9218#0" expanded="false" /> | ||||
|                 <element signature="e#13669#13971#0" expanded="false" /> | ||||
|                 <element signature="e#14028#14857#0" expanded="false" /> | ||||
|                 <element signature="e#14921#15267#0" expanded="false" /> | ||||
|                 <element signature="e#18834#19030#0" expanded="false" /> | ||||
|                 <element signature="e#19091#19282#0" expanded="false" /> | ||||
|                 <element signature="e#19349#19996#0" expanded="false" /> | ||||
|                 <element signature="e#20095#22393#0" expanded="false" /> | ||||
|                 <element signature="e#30153#30294#0" expanded="false" /> | ||||
|                 <element signature="e#30444#32589#0" expanded="false" /> | ||||
|               </folding> | ||||
|             </state> | ||||
|           </provider> | ||||
| @ -69,13 +68,13 @@ | ||||
|           </provider> | ||||
|         </entry> | ||||
|       </file> | ||||
|       <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> | ||||
|       <file leaf-file-name="workbench.py" pinned="false" current-in-tab="true"> | ||||
|         <entry file="file://$PROJECT_DIR$/workbench.py"> | ||||
|           <provider selected="true" editor-type-id="text-editor"> | ||||
|             <state relative-caret-position="396"> | ||||
|               <caret line="37" column="68" lean-forward="false" selection-start-line="37" selection-start-column="68" selection-end-line="37" selection-end-column="68" /> | ||||
|             <state relative-caret-position="410"> | ||||
|               <caret line="42" column="17" lean-forward="true" selection-start-line="42" selection-start-column="17" selection-end-line="42" selection-end-column="17" /> | ||||
|               <folding> | ||||
|                 <element signature="e#53#92#0" expanded="true" /> | ||||
|                 <element signature="e#24#63#0" expanded="true" /> | ||||
|               </folding> | ||||
|             </state> | ||||
|           </provider> | ||||
| @ -150,21 +149,6 @@ | ||||
|   </component> | ||||
|   <component name="FindInProjectRecents"> | ||||
|     <findStrings> | ||||
|       <find>count_vowels</find> | ||||
|       <find>shuffle_full_vowel_inputs</find> | ||||
|       <find>generate_presentable_y</find> | ||||
|       <find>is_accetuated_vowel</find> | ||||
|       <find>is_vowel</find> | ||||
|       <find>load_shuffle_vector</find> | ||||
|       <find>create_and_save_shuffle_vector</find> | ||||
|       <find>load_extended_inputs</find> | ||||
|       <find>create_and_save_inputs</find> | ||||
|       <find>shuffle_inputs</find> | ||||
|       <find>complete_feature_dict</find> | ||||
|       <find>create_syllable_letters_translator</find> | ||||
|       <find>syllable_letters_translator</find> | ||||
|       <find>get_max_syllable</find> | ||||
|       <find>check_feature_letter_usage</find> | ||||
|       <find>orig_X</find> | ||||
|       <find>vowels</find> | ||||
|       <find>_create_syllable_letters_translator</find> | ||||
| @ -180,6 +164,21 @@ | ||||
|       <find>size</find> | ||||
|       <find>decode_x</find> | ||||
|       <find>self._input_type ==</find> | ||||
|       <find>../</find> | ||||
|       <find>math</find> | ||||
|       <find>predict</find> | ||||
|       <find>_reverse_inputs</find> | ||||
|       <find>_letter_generator</find> | ||||
|       <find>_accent_classification</find> | ||||
|       <find>_create_feature_dictionary</find> | ||||
|       <find>generate_data</find> | ||||
|       <find>Data</find> | ||||
|       <find>shuffle_vector</find> | ||||
|       <find>shuffle_vector_path</find> | ||||
|       <find>fit_generator</find> | ||||
|       <find>../../../data/</find> | ||||
|       <find>self.x_other_features_train</find> | ||||
|       <find>_create_x_features</find> | ||||
|     </findStrings> | ||||
|   </component> | ||||
|   <component name="Git.Settings"> | ||||
| @ -196,10 +195,10 @@ | ||||
|         <option value="$PROJECT_DIR$/theano_tutorial/logistic_regression_loop.py" /> | ||||
|         <option value="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn_test_on_other_attributes.ipynb" /> | ||||
|         <option value="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.py" /> | ||||
|         <option value="$PROJECT_DIR$/workbench.py" /> | ||||
|         <option value="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.ipynb" /> | ||||
|         <option value="$PROJECT_DIR$/tex_hyphenation.py" /> | ||||
|         <option value="$PROJECT_DIR$/prepare_data.py" /> | ||||
|         <option value="$PROJECT_DIR$/workbench.py" /> | ||||
|       </list> | ||||
|     </option> | ||||
|   </component> | ||||
| @ -224,6 +223,8 @@ | ||||
|       <foldersAlwaysOnTop value="true" /> | ||||
|     </navigator> | ||||
|     <panes> | ||||
|       <pane id="Scope" /> | ||||
|       <pane id="Scratches" /> | ||||
|       <pane id="ProjectPane"> | ||||
|         <subPane> | ||||
|           <PATH> | ||||
| @ -238,8 +239,6 @@ | ||||
|           </PATH> | ||||
|         </subPane> | ||||
|       </pane> | ||||
|       <pane id="Scope" /> | ||||
|       <pane id="Scratches" /> | ||||
|     </panes> | ||||
|   </component> | ||||
|   <component name="PropertiesComponent"> | ||||
| @ -830,31 +829,6 @@ | ||||
|         </state> | ||||
|       </provider> | ||||
|     </entry> | ||||
|     <entry file="file://$PROJECT_DIR$/prepare_data.py"> | ||||
|       <provider selected="true" editor-type-id="text-editor"> | ||||
|         <state relative-caret-position="244"> | ||||
|           <caret line="252" column="38" lean-forward="true" selection-start-line="252" selection-start-column="38" selection-end-line="252" selection-end-column="38" /> | ||||
|           <folding> | ||||
|             <element signature="e#24#63#0" expanded="true" /> | ||||
|             <element signature="e#5658#5771#0" expanded="false" /> | ||||
|             <element signature="e#5818#7106#0" expanded="false" /> | ||||
|             <element signature="e#7267#8674#0" expanded="false" /> | ||||
|             <element signature="e#8762#9057#0" expanded="false" /> | ||||
|             <element signature="e#13496#13798#0" expanded="false" /> | ||||
|             <element signature="e#13855#14684#0" expanded="false" /> | ||||
|             <element signature="e#14748#15094#0" expanded="false" /> | ||||
|             <element signature="e#16969#17882#0" expanded="false" /> | ||||
|             <element signature="e#18312#18508#0" expanded="false" /> | ||||
|             <element signature="e#18569#18760#0" expanded="false" /> | ||||
|             <element signature="e#18827#19474#0" expanded="false" /> | ||||
|             <element signature="e#19573#21871#0" expanded="false" /> | ||||
|             <element signature="e#22137#22836#0" expanded="false" /> | ||||
|             <element signature="e#29631#29772#0" expanded="false" /> | ||||
|             <element signature="e#29922#32067#0" expanded="false" /> | ||||
|           </folding> | ||||
|         </state> | ||||
|       </provider> | ||||
|     </entry> | ||||
|     <entry file="file://$PROJECT_DIR$/tex_hyphenation.py"> | ||||
|       <provider selected="true" editor-type-id="text-editor"> | ||||
|         <state relative-caret-position="1206"> | ||||
| @ -863,16 +837,6 @@ | ||||
|         </state> | ||||
|       </provider> | ||||
|     </entry> | ||||
|     <entry file="file://$PROJECT_DIR$/workbench.py"> | ||||
|       <provider selected="true" editor-type-id="text-editor"> | ||||
|         <state relative-caret-position="396"> | ||||
|           <caret line="37" column="68" lean-forward="false" selection-start-line="37" selection-start-column="68" selection-end-line="37" selection-end-column="68" /> | ||||
|           <folding> | ||||
|             <element signature="e#53#92#0" expanded="true" /> | ||||
|           </folding> | ||||
|         </state> | ||||
|       </provider> | ||||
|     </entry> | ||||
|     <entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py"> | ||||
|       <provider selected="true" editor-type-id="text-editor"> | ||||
|         <state relative-caret-position="162"> | ||||
| @ -921,5 +885,37 @@ | ||||
|         </state> | ||||
|       </provider> | ||||
|     </entry> | ||||
|     <entry file="file://$PROJECT_DIR$/prepare_data.py"> | ||||
|       <provider selected="true" editor-type-id="text-editor"> | ||||
|         <state relative-caret-position="198"> | ||||
|           <caret line="14" column="121" lean-forward="false" selection-start-line="14" selection-start-column="102" selection-end-line="14" selection-end-column="121" /> | ||||
|           <folding> | ||||
|             <element signature="e#24#63#0" expanded="true" /> | ||||
|             <element signature="e#5979#7267#0" expanded="false" /> | ||||
|             <element signature="e#7428#8835#0" expanded="false" /> | ||||
|             <element signature="e#8923#9218#0" expanded="false" /> | ||||
|             <element signature="e#13669#13971#0" expanded="false" /> | ||||
|             <element signature="e#14028#14857#0" expanded="false" /> | ||||
|             <element signature="e#14921#15267#0" expanded="false" /> | ||||
|             <element signature="e#18834#19030#0" expanded="false" /> | ||||
|             <element signature="e#19091#19282#0" expanded="false" /> | ||||
|             <element signature="e#19349#19996#0" expanded="false" /> | ||||
|             <element signature="e#20095#22393#0" expanded="false" /> | ||||
|             <element signature="e#30153#30294#0" expanded="false" /> | ||||
|             <element signature="e#30444#32589#0" expanded="false" /> | ||||
|           </folding> | ||||
|         </state> | ||||
|       </provider> | ||||
|     </entry> | ||||
|     <entry file="file://$PROJECT_DIR$/workbench.py"> | ||||
|       <provider selected="true" editor-type-id="text-editor"> | ||||
|         <state relative-caret-position="410"> | ||||
|           <caret line="42" column="17" lean-forward="true" selection-start-line="42" selection-start-column="17" selection-end-line="42" selection-end-column="17" /> | ||||
|           <folding> | ||||
|             <element signature="e#24#63#0" expanded="true" /> | ||||
|           </folding> | ||||
|         </state> | ||||
|       </provider> | ||||
|     </entry> | ||||
|   </component> | ||||
| </project> | ||||
| @ -7,11 +7,12 @@ import h5py | ||||
| import math | ||||
| import keras.backend as K | ||||
| import os.path | ||||
| import codecs | ||||
| 
 | ||||
| 
 | ||||
| class Data: | ||||
|     def __init__(self, input_type, allow_shuffle_vector_generation=False, save_generated_data=True, shuffle_all_inputs=True, | ||||
|                  additional_letter_attributes=True, reverse_inputs=True, accent_classification=False): | ||||
|                  additional_letter_attributes=True, reverse_inputs=True, accent_classification=False, number_of_syllables=False): | ||||
|         self._input_type = input_type | ||||
|         self._save_generated_data = save_generated_data | ||||
|         self._allow_shuffle_vector_generation = allow_shuffle_vector_generation | ||||
| @ -19,6 +20,7 @@ class Data: | ||||
|         self._additional_letter_attributes = additional_letter_attributes | ||||
|         self._reverse_inputs = reverse_inputs | ||||
|         self._accent_classification = accent_classification | ||||
|         self._number_of_syllables = number_of_syllables | ||||
| 
 | ||||
|         self.x_train = None | ||||
|         self.x_other_features_train = None | ||||
| @ -88,7 +90,8 @@ class Data: | ||||
|     # functions for creating X and y from content | ||||
|     @staticmethod | ||||
|     def _read_content(content_path): | ||||
|         with open(content_path) as f: | ||||
|         # with open(content_path) as f: | ||||
|         with codecs.open(content_path, encoding='utf8') as f: | ||||
|             content = f.readlines() | ||||
|         return [x.split('\t') for x in content] | ||||
| 
 | ||||
| @ -261,9 +264,9 @@ class Data: | ||||
|             raise ValueError('No input_type provided. It could be \'l\', \'s\' or \'sl\'.') | ||||
|         y = self._y_output(content, max_num_vowels, vowels, accentuated_vowels) | ||||
| 
 | ||||
|         print('CREATING OTHER FEATURES...') | ||||
|         x_other_features = self._create_x_features(content, feature_dictionary) | ||||
|         print('OTHER FEATURES CREATED!') | ||||
|         # print('CREATING OTHER FEATURES...') | ||||
|         x_other_features = self._create_x_features(content, feature_dictionary, vowels) | ||||
|         # print('OTHER FEATURES CREATED!') | ||||
| 
 | ||||
|         if self._shuffle_all_inputs: | ||||
|             print('SHUFFELING INPUTS...') | ||||
| @ -347,7 +350,7 @@ class Data: | ||||
|                 split = min(split_options, key=lambda x: x[1]) | ||||
|                 return consonants[:split[0] + 1], consonants[split[0] + 1:] | ||||
| 
 | ||||
|     def _create_x_features(self, content, feature_dictionary): | ||||
|     def _create_x_features(self, content, feature_dictionary, vowels): | ||||
|         content = content | ||||
|         x_other_features = [] | ||||
|         for el in content: | ||||
| @ -364,6 +367,14 @@ class Data: | ||||
|                                 x_el_other_features.append(0) | ||||
|                 else: | ||||
|                     x_el_other_features.extend([0] * feature[0]) | ||||
|             if self._number_of_syllables: | ||||
|                 list_of_letters = list(el[0]) | ||||
|                 num_of_vowels = 0 | ||||
|                 for i in range(len(list_of_letters)): | ||||
|                     if self._is_vowel(list(el[0]), i, vowels): | ||||
|                         num_of_vowels += 1 | ||||
|                 x_el_other_features.append(num_of_vowels) | ||||
| 
 | ||||
|             x_other_features.append(x_el_other_features) | ||||
|         return np.array(x_other_features) | ||||
| 
 | ||||
| @ -652,6 +663,60 @@ class Data: | ||||
|     def _get_nonresonant_silent_consonants(): | ||||
|         return ['p', 't', 's', 'š', 'č', 'k', 'f', 'h', 'c'] | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _create_slovene_feature_dictionary(): | ||||
|         # old: http://nl.ijs.si/ME/Vault/V3/msd/html/ | ||||
|         # new: http://nl.ijs.si/ME/V4/msd/html/ | ||||
|         # changes: http://nl.ijs.si/jos/msd/html-en/msd.diffs.html | ||||
|         return [[21, | ||||
|                  'P', | ||||
|                  ['p', 's'], | ||||
|                  ['n', 'p', 's'], | ||||
|                  ['m', 'z', 's'], | ||||
|                  ['e', 'd', 'm'], | ||||
|                  ['i', 'r', 'd', 't', 'm', 'o'], | ||||
|                  ['-', 'n', 'd']], | ||||
|                 [3, 'V', ['p', 'd']], | ||||
|                 [1, 'M'], | ||||
|                 [21, | ||||
|                  'K', | ||||
|                  ['b'], | ||||
|                  ['-', 'g', 'v', 'd'], | ||||
|                  ['m', 'z', 's'], | ||||
|                  ['e', 'd', 'm'], | ||||
|                  ['i', 'r', 'd', 't', 'm', 'o'], | ||||
|                  ['-', 'n', 'd']], | ||||
|                 [17, | ||||
|                  'S', | ||||
|                  ['o'], | ||||
|                  ['m', 'z', 's'], | ||||
|                  ['e', 'd', 'm'], | ||||
|                  ['i', 'r', 'd', 't', 'm', 'o'], | ||||
|                  ['-', 'n', 'd']], | ||||
|                 [40, | ||||
|                  'Z', | ||||
|                  ['o', 's', 'k', 'z', 'p', 'c', 'v', 'n', 'l'], | ||||
|                  ['-', 'p', 'd', 't'], | ||||
|                  ['-', 'm', 'z', 's'], | ||||
|                  ['-', 'e', 'd', 'm'], | ||||
|                  ['-', 'i', 'r', 'd', 't', 'm', 'o'], | ||||
|                  ['-', 'e', 'd', 'm'], | ||||
|                  ['-', 'm', 'z', 's'], | ||||
|                  ['-', 'k', 'z']], | ||||
|                 [1, 'L'], | ||||
|                 [5, 'R', ['s'], ['n', 'r', 's']], | ||||
|                 [7, 'D', ['-', 'r', 'd', 't', 'm', 'o']], | ||||
|                 [24, | ||||
|                  'G', | ||||
|                  ['g'], | ||||
|                  ['-'], | ||||
|                  ['n', 'm', 'd', 's', 'p', 'g'], | ||||
|                  ['-', 'p', 'd', 't'], | ||||
|                  ['-', 'e', 'm', 'd'], | ||||
|                  ['-', 'm', 'z', 's'], | ||||
|                  ['-', 'n', 'd']] | ||||
|                 ] | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _create_feature_dictionary(): | ||||
|         # old: http://nl.ijs.si/ME/Vault/V3/msd/html/ | ||||
|  | ||||
							
								
								
									
										1204
									
								
								sloleks_accetuation.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1204
									
								
								sloleks_accetuation.ipynb
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										108
									
								
								workbench.py
									
									
									
									
									
								
							
							
						
						
									
										108
									
								
								workbench.py
									
									
									
									
									
								
							| @ -1,26 +1,17 @@ | ||||
| 
 | ||||
| # coding: utf-8 | ||||
| 
 | ||||
| # In[1]: | ||||
| 
 | ||||
| 
 | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
| # text in Western (Windows 1252) | ||||
| 
 | ||||
| import pickle | ||||
| import numpy as np | ||||
| # import StringIO | ||||
| import math | ||||
| from keras.models import Sequential | ||||
| from keras.layers import Dense, Dropout, Merge | ||||
| from keras import optimizers | ||||
| from keras.models import Model | ||||
| from keras.layers import Dense, Dropout, Input | ||||
| from keras.layers.merge import concatenate | ||||
| from keras import regularizers | ||||
| from keras.layers.convolutional import Conv1D | ||||
| from keras.layers.convolutional import MaxPooling1D | ||||
| from keras.constraints import maxnorm | ||||
| from keras.layers import Flatten | ||||
| from keras.optimizers import SGD | ||||
| from keras.models import load_model | ||||
| # from keras import backend as Input | ||||
| np.random.seed(7) | ||||
| 
 | ||||
| # get_ipython().magic('run ../../../prepare_data.py') | ||||
| @ -34,39 +25,68 @@ from prepare_data import * | ||||
| # X_train, X_other_features_train, y_train, X_validate, X_other_features_validate, y_validate = generate_full_matrix_inputs() | ||||
| # save_inputs('../../internal_representations/inputs/shuffeled_matrix_train_inputs_other_features_output_11.h5', X_train, y_train, other_features = X_other_features_train) | ||||
| # save_inputs('../../internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', X_validate, y_validate,  other_features = X_other_features_validate) | ||||
| X_train, X_other_features_train, y_train = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_train_inputs_other_features_output_11.h5', other_features=True) | ||||
| X_validate, X_other_features_validate, y_validate = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', other_features=True) | ||||
| # X_train, X_other_features_train, y_train = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_train_inputs_other_features_output_11.h5', other_features=True) | ||||
| # X_validate, X_other_features_validate, y_validate = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', other_features=True) | ||||
| data = Data('l', save_generated_data=False, number_of_syllables=True) | ||||
| data.generate_data('letters_word_accetuation_train', | ||||
|                    'letters_word_accetuation_test', | ||||
|                    'letters_word_accetuation_validate', content_name='SlovarIJS_BESEDE_utf8.lex', | ||||
|                       content_shuffle_vector='content_shuffle_vector', shuffle_vector='shuffle_vector', | ||||
|                       inputs_location='', content_location='') | ||||
| 
 | ||||
| num_examples = len(X_train)  # training set size | ||||
| nn_output_dim = 11 | ||||
| 
 | ||||
| num_examples = len(data.x_train)  # training set size | ||||
| nn_output_dim = 10 | ||||
| nn_hdim = 516 | ||||
| 
 | ||||
| word_processor = Sequential() | ||||
| word_processor.add(Conv1D(43, (3), input_shape=(23, 43), padding='same', activation='relu')) | ||||
| word_processor.add(Conv1D(43, (3), padding='same', activation='relu')) | ||||
| word_processor.add(MaxPooling1D(pool_size=2)) | ||||
| word_processor.add(Flatten()) | ||||
| word_processor.add(Dense(516, activation='relu', kernel_constraint=maxnorm(3))) | ||||
| 
 | ||||
| metadata_processor = Sequential() | ||||
| metadata_processor.add(Dense(256, input_dim=167, activation='relu')) | ||||
| 
 | ||||
| model = Sequential() | ||||
| model.add(Merge([word_processor, metadata_processor], mode='concat'))  # Merge is your sensor fusion buddy | ||||
| model.add(Dense(1024, input_dim=(516 + 256), activation='relu')) | ||||
| model.add(Dropout(0.3)) | ||||
| model.add(Dense(1024, input_dim=(516 + 256), activation='relu')) | ||||
| model.add(Dropout(0.2)) | ||||
| model.add(Dense(nn_output_dim, activation='sigmoid')) | ||||
| batch_size = 16 | ||||
| # actual_epoch = 1 | ||||
| actual_epoch = 60 | ||||
| # num_fake_epoch = 2 | ||||
| num_fake_epoch = 20 | ||||
| 
 | ||||
| 
 | ||||
| # In[10]: | ||||
| 
 | ||||
| 
 | ||||
| # epochs = 5 | ||||
| # lrate = 0.1 | ||||
| # decay = lrate/epochs | ||||
| # sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=False) | ||||
| model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy']) | ||||
| model.fit([X_train, X_other_features_train], y_train, validation_data=([X_validate, X_other_features_validate], y_validate), epochs=10, batch_size=10) | ||||
| model.save('v1_1.h5') | ||||
| conv_input_shape=(23, 36) | ||||
| othr_input = (141, ) | ||||
| 
 | ||||
| conv_input = Input(shape=conv_input_shape, name='conv_input') | ||||
| x_conv = Conv1D(133, (3), padding='same', activation='relu')(conv_input) | ||||
| x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv) | ||||
| x_conv = MaxPooling1D(pool_size=2)(x_conv) | ||||
| x_conv = Flatten()(x_conv) | ||||
| 
 | ||||
| othr_input = Input(shape=othr_input, name='othr_input') | ||||
| 
 | ||||
| x = concatenate([x_conv, othr_input]) | ||||
| # x = Dense(1024, input_dim=(516 + 256), activation='relu')(x) | ||||
| x = Dense(256, activation='relu')(x) | ||||
| x = Dropout(0.3)(x) | ||||
| x = Dense(256, activation='relu')(x) | ||||
| x = Dropout(0.3)(x) | ||||
| x = Dense(256, activation='relu')(x) | ||||
| x = Dropout(0.2)(x) | ||||
| x = Dense(nn_output_dim, activation='sigmoid')(x) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| model = Model(inputs=[conv_input, othr_input], outputs=x) | ||||
| opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08) | ||||
| model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy,]) | ||||
| # model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy']) | ||||
| 
 | ||||
| 
 | ||||
| history = model.fit_generator(data.generator('train', batch_size, content_name='SlovarIJS_BESEDE_utf8.lex', content_location=''), | ||||
|                               data.x_train.shape[0]/(batch_size * num_fake_epoch), | ||||
|                               epochs=actual_epoch*num_fake_epoch, | ||||
|                               validation_data=data.generator('test', batch_size, content_name='SlovarIJS_BESEDE_utf8.lex', content_location=''), | ||||
|                               validation_steps=data.x_test.shape[0]/(batch_size * num_fake_epoch), | ||||
|                               verbose=2 | ||||
|                               ) | ||||
| 
 | ||||
| name = '60_epoch' | ||||
| model.save(name + '.h5') | ||||
| output = open(name + '_history.pkl', 'wb') | ||||
| pickle.dump(history.history, output) | ||||
| output.close() | ||||
|  | ||||
							
								
								
									
										3
									
								
								workbench.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								workbench.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,3 @@ | ||||
| #!/bin/sh | ||||
| #export KERAS_BACKEND=theano | ||||
| THEANO_FLAGS='mode=FAST_RUN,device=gpu,floatX=float32,nvcc.flags=-D_FORCE_INLINES' python3 workbench.py | ||||
							
								
								
									
										14
									
								
								workbench.xrsl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								workbench.xrsl
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | ||||
| & | ||||
| (jobName="accetuation_nn") | ||||
| (executable="workbench.sh") | ||||
| (inputfiles= | ||||
|   ("workbench.py" "")("prepare_data.py" "")("content_shuffle_vector.h5" "cnn/internal_representations/inputs/content_shuffle_vector.h5")("shuffle_vector_test.h5" "cnn/internal_representations/inputs/shuffle_vector_test.h5")("shuffle_vector_train.h5" "cnn/internal_representations/inputs/shuffle_vector_train.h5")("shuffle_vector_validate.h5" "cnn/internal_representations/inputs/shuffle_vector_validate.h5")("SlovarIJS_BESEDE_utf8.lex" "data/SlovarIJS_BESEDE_utf8.lex") | ||||
| ) | ||||
| (outputfiles=("60_epoch.h5" "")("workbench.py" "")("workbench.sh" "")("60_epoch_history.pkl" "") | ||||
| ) | ||||
| (stdout="out.txt") | ||||
| (stderr="err.txt") | ||||
| (gmlog="gmlog") | ||||
| (runtimeenvironment="APPS/BASE/THEANO-GPU-0.9") | ||||
| (gridTime=1000) | ||||
| (memory=12000) | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user