Added num of letters to x_other_features
This commit is contained in:
parent
18348b78fc
commit
a2fce7c1ae
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -91,3 +91,5 @@ ENV/
|
||||||
# Custom
|
# Custom
|
||||||
data/
|
data/
|
||||||
cnn/internal_representations/inputs/
|
cnn/internal_representations/inputs/
|
||||||
|
joblist.xml
|
||||||
|
new_sloleks.xml
|
||||||
|
|
|
@ -3,9 +3,11 @@
|
||||||
<component name="ChangeListManager">
|
<component name="ChangeListManager">
|
||||||
<list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment="">
|
<list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment="">
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
||||||
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/letters/cnn.ipynb" afterPath="$PROJECT_DIR$/cnn/accent_classification/letters/cnn.ipynb" />
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn.ipynb" />
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn.ipynb" />
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/syllables/cnn.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/syllables/cnn.ipynb" />
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/results_presentation.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/results_presentation.ipynb" />
|
||||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" />
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" />
|
||||||
|
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/workbench.py" afterPath="$PROJECT_DIR$/workbench.py" />
|
||||||
</list>
|
</list>
|
||||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||||
<option name="TRACKING_ENABLED" value="true" />
|
<option name="TRACKING_ENABLED" value="true" />
|
||||||
|
@ -32,28 +34,25 @@
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
</file>
|
</file>
|
||||||
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true">
|
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="false">
|
||||||
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="244">
|
<state relative-caret-position="198">
|
||||||
<caret line="252" column="38" lean-forward="true" selection-start-line="252" selection-start-column="38" selection-end-line="252" selection-end-column="38" />
|
<caret line="14" column="121" lean-forward="false" selection-start-line="14" selection-start-column="102" selection-end-line="14" selection-end-column="121" />
|
||||||
<folding>
|
<folding>
|
||||||
<element signature="e#24#63#0" expanded="true" />
|
<element signature="e#24#63#0" expanded="true" />
|
||||||
<element signature="e#5658#5771#0" expanded="false" />
|
<element signature="e#5979#7267#0" expanded="false" />
|
||||||
<element signature="e#5818#7106#0" expanded="false" />
|
<element signature="e#7428#8835#0" expanded="false" />
|
||||||
<element signature="e#7267#8674#0" expanded="false" />
|
<element signature="e#8923#9218#0" expanded="false" />
|
||||||
<element signature="e#8762#9057#0" expanded="false" />
|
<element signature="e#13669#13971#0" expanded="false" />
|
||||||
<element signature="e#13496#13798#0" expanded="false" />
|
<element signature="e#14028#14857#0" expanded="false" />
|
||||||
<element signature="e#13855#14684#0" expanded="false" />
|
<element signature="e#14921#15267#0" expanded="false" />
|
||||||
<element signature="e#14748#15094#0" expanded="false" />
|
<element signature="e#18834#19030#0" expanded="false" />
|
||||||
<element signature="e#16969#17882#0" expanded="false" />
|
<element signature="e#19091#19282#0" expanded="false" />
|
||||||
<element signature="e#18312#18508#0" expanded="false" />
|
<element signature="e#19349#19996#0" expanded="false" />
|
||||||
<element signature="e#18569#18760#0" expanded="false" />
|
<element signature="e#20095#22393#0" expanded="false" />
|
||||||
<element signature="e#18827#19474#0" expanded="false" />
|
<element signature="e#30153#30294#0" expanded="false" />
|
||||||
<element signature="e#19573#21871#0" expanded="false" />
|
<element signature="e#30444#32589#0" expanded="false" />
|
||||||
<element signature="e#22137#22836#0" expanded="false" />
|
|
||||||
<element signature="e#29631#29772#0" expanded="false" />
|
|
||||||
<element signature="e#29922#32067#0" expanded="false" />
|
|
||||||
</folding>
|
</folding>
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
|
@ -69,13 +68,13 @@
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
</file>
|
</file>
|
||||||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="true">
|
||||||
<entry file="file://$PROJECT_DIR$/workbench.py">
|
<entry file="file://$PROJECT_DIR$/workbench.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="396">
|
<state relative-caret-position="410">
|
||||||
<caret line="37" column="68" lean-forward="false" selection-start-line="37" selection-start-column="68" selection-end-line="37" selection-end-column="68" />
|
<caret line="42" column="17" lean-forward="true" selection-start-line="42" selection-start-column="17" selection-end-line="42" selection-end-column="17" />
|
||||||
<folding>
|
<folding>
|
||||||
<element signature="e#53#92#0" expanded="true" />
|
<element signature="e#24#63#0" expanded="true" />
|
||||||
</folding>
|
</folding>
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
|
@ -150,21 +149,6 @@
|
||||||
</component>
|
</component>
|
||||||
<component name="FindInProjectRecents">
|
<component name="FindInProjectRecents">
|
||||||
<findStrings>
|
<findStrings>
|
||||||
<find>count_vowels</find>
|
|
||||||
<find>shuffle_full_vowel_inputs</find>
|
|
||||||
<find>generate_presentable_y</find>
|
|
||||||
<find>is_accetuated_vowel</find>
|
|
||||||
<find>is_vowel</find>
|
|
||||||
<find>load_shuffle_vector</find>
|
|
||||||
<find>create_and_save_shuffle_vector</find>
|
|
||||||
<find>load_extended_inputs</find>
|
|
||||||
<find>create_and_save_inputs</find>
|
|
||||||
<find>shuffle_inputs</find>
|
|
||||||
<find>complete_feature_dict</find>
|
|
||||||
<find>create_syllable_letters_translator</find>
|
|
||||||
<find>syllable_letters_translator</find>
|
|
||||||
<find>get_max_syllable</find>
|
|
||||||
<find>check_feature_letter_usage</find>
|
|
||||||
<find>orig_X</find>
|
<find>orig_X</find>
|
||||||
<find>vowels</find>
|
<find>vowels</find>
|
||||||
<find>_create_syllable_letters_translator</find>
|
<find>_create_syllable_letters_translator</find>
|
||||||
|
@ -180,6 +164,21 @@
|
||||||
<find>size</find>
|
<find>size</find>
|
||||||
<find>decode_x</find>
|
<find>decode_x</find>
|
||||||
<find>self._input_type ==</find>
|
<find>self._input_type ==</find>
|
||||||
|
<find>../</find>
|
||||||
|
<find>math</find>
|
||||||
|
<find>predict</find>
|
||||||
|
<find>_reverse_inputs</find>
|
||||||
|
<find>_letter_generator</find>
|
||||||
|
<find>_accent_classification</find>
|
||||||
|
<find>_create_feature_dictionary</find>
|
||||||
|
<find>generate_data</find>
|
||||||
|
<find>Data</find>
|
||||||
|
<find>shuffle_vector</find>
|
||||||
|
<find>shuffle_vector_path</find>
|
||||||
|
<find>fit_generator</find>
|
||||||
|
<find>../../../data/</find>
|
||||||
|
<find>self.x_other_features_train</find>
|
||||||
|
<find>_create_x_features</find>
|
||||||
</findStrings>
|
</findStrings>
|
||||||
</component>
|
</component>
|
||||||
<component name="Git.Settings">
|
<component name="Git.Settings">
|
||||||
|
@ -196,10 +195,10 @@
|
||||||
<option value="$PROJECT_DIR$/theano_tutorial/logistic_regression_loop.py" />
|
<option value="$PROJECT_DIR$/theano_tutorial/logistic_regression_loop.py" />
|
||||||
<option value="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn_test_on_other_attributes.ipynb" />
|
<option value="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/cnn_test_on_other_attributes.ipynb" />
|
||||||
<option value="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.py" />
|
<option value="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.py" />
|
||||||
<option value="$PROJECT_DIR$/workbench.py" />
|
|
||||||
<option value="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.ipynb" />
|
<option value="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.ipynb" />
|
||||||
<option value="$PROJECT_DIR$/tex_hyphenation.py" />
|
<option value="$PROJECT_DIR$/tex_hyphenation.py" />
|
||||||
<option value="$PROJECT_DIR$/prepare_data.py" />
|
<option value="$PROJECT_DIR$/prepare_data.py" />
|
||||||
|
<option value="$PROJECT_DIR$/workbench.py" />
|
||||||
</list>
|
</list>
|
||||||
</option>
|
</option>
|
||||||
</component>
|
</component>
|
||||||
|
@ -224,6 +223,8 @@
|
||||||
<foldersAlwaysOnTop value="true" />
|
<foldersAlwaysOnTop value="true" />
|
||||||
</navigator>
|
</navigator>
|
||||||
<panes>
|
<panes>
|
||||||
|
<pane id="Scope" />
|
||||||
|
<pane id="Scratches" />
|
||||||
<pane id="ProjectPane">
|
<pane id="ProjectPane">
|
||||||
<subPane>
|
<subPane>
|
||||||
<PATH>
|
<PATH>
|
||||||
|
@ -238,8 +239,6 @@
|
||||||
</PATH>
|
</PATH>
|
||||||
</subPane>
|
</subPane>
|
||||||
</pane>
|
</pane>
|
||||||
<pane id="Scope" />
|
|
||||||
<pane id="Scratches" />
|
|
||||||
</panes>
|
</panes>
|
||||||
</component>
|
</component>
|
||||||
<component name="PropertiesComponent">
|
<component name="PropertiesComponent">
|
||||||
|
@ -830,31 +829,6 @@
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
|
||||||
<state relative-caret-position="244">
|
|
||||||
<caret line="252" column="38" lean-forward="true" selection-start-line="252" selection-start-column="38" selection-end-line="252" selection-end-column="38" />
|
|
||||||
<folding>
|
|
||||||
<element signature="e#24#63#0" expanded="true" />
|
|
||||||
<element signature="e#5658#5771#0" expanded="false" />
|
|
||||||
<element signature="e#5818#7106#0" expanded="false" />
|
|
||||||
<element signature="e#7267#8674#0" expanded="false" />
|
|
||||||
<element signature="e#8762#9057#0" expanded="false" />
|
|
||||||
<element signature="e#13496#13798#0" expanded="false" />
|
|
||||||
<element signature="e#13855#14684#0" expanded="false" />
|
|
||||||
<element signature="e#14748#15094#0" expanded="false" />
|
|
||||||
<element signature="e#16969#17882#0" expanded="false" />
|
|
||||||
<element signature="e#18312#18508#0" expanded="false" />
|
|
||||||
<element signature="e#18569#18760#0" expanded="false" />
|
|
||||||
<element signature="e#18827#19474#0" expanded="false" />
|
|
||||||
<element signature="e#19573#21871#0" expanded="false" />
|
|
||||||
<element signature="e#22137#22836#0" expanded="false" />
|
|
||||||
<element signature="e#29631#29772#0" expanded="false" />
|
|
||||||
<element signature="e#29922#32067#0" expanded="false" />
|
|
||||||
</folding>
|
|
||||||
</state>
|
|
||||||
</provider>
|
|
||||||
</entry>
|
|
||||||
<entry file="file://$PROJECT_DIR$/tex_hyphenation.py">
|
<entry file="file://$PROJECT_DIR$/tex_hyphenation.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="1206">
|
<state relative-caret-position="1206">
|
||||||
|
@ -863,16 +837,6 @@
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
<entry file="file://$PROJECT_DIR$/workbench.py">
|
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
|
||||||
<state relative-caret-position="396">
|
|
||||||
<caret line="37" column="68" lean-forward="false" selection-start-line="37" selection-start-column="68" selection-end-line="37" selection-end-column="68" />
|
|
||||||
<folding>
|
|
||||||
<element signature="e#53#92#0" expanded="true" />
|
|
||||||
</folding>
|
|
||||||
</state>
|
|
||||||
</provider>
|
|
||||||
</entry>
|
|
||||||
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
|
<entry file="file://$PROJECT_DIR$/theano_tutorial/logistic_regression.py">
|
||||||
<provider selected="true" editor-type-id="text-editor">
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
<state relative-caret-position="162">
|
<state relative-caret-position="162">
|
||||||
|
@ -921,5 +885,37 @@
|
||||||
</state>
|
</state>
|
||||||
</provider>
|
</provider>
|
||||||
</entry>
|
</entry>
|
||||||
|
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
||||||
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
<state relative-caret-position="198">
|
||||||
|
<caret line="14" column="121" lean-forward="false" selection-start-line="14" selection-start-column="102" selection-end-line="14" selection-end-column="121" />
|
||||||
|
<folding>
|
||||||
|
<element signature="e#24#63#0" expanded="true" />
|
||||||
|
<element signature="e#5979#7267#0" expanded="false" />
|
||||||
|
<element signature="e#7428#8835#0" expanded="false" />
|
||||||
|
<element signature="e#8923#9218#0" expanded="false" />
|
||||||
|
<element signature="e#13669#13971#0" expanded="false" />
|
||||||
|
<element signature="e#14028#14857#0" expanded="false" />
|
||||||
|
<element signature="e#14921#15267#0" expanded="false" />
|
||||||
|
<element signature="e#18834#19030#0" expanded="false" />
|
||||||
|
<element signature="e#19091#19282#0" expanded="false" />
|
||||||
|
<element signature="e#19349#19996#0" expanded="false" />
|
||||||
|
<element signature="e#20095#22393#0" expanded="false" />
|
||||||
|
<element signature="e#30153#30294#0" expanded="false" />
|
||||||
|
<element signature="e#30444#32589#0" expanded="false" />
|
||||||
|
</folding>
|
||||||
|
</state>
|
||||||
|
</provider>
|
||||||
|
</entry>
|
||||||
|
<entry file="file://$PROJECT_DIR$/workbench.py">
|
||||||
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
<state relative-caret-position="410">
|
||||||
|
<caret line="42" column="17" lean-forward="true" selection-start-line="42" selection-start-column="17" selection-end-line="42" selection-end-column="17" />
|
||||||
|
<folding>
|
||||||
|
<element signature="e#24#63#0" expanded="true" />
|
||||||
|
</folding>
|
||||||
|
</state>
|
||||||
|
</provider>
|
||||||
|
</entry>
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
|
@ -7,11 +7,12 @@ import h5py
|
||||||
import math
|
import math
|
||||||
import keras.backend as K
|
import keras.backend as K
|
||||||
import os.path
|
import os.path
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
|
||||||
class Data:
|
class Data:
|
||||||
def __init__(self, input_type, allow_shuffle_vector_generation=False, save_generated_data=True, shuffle_all_inputs=True,
|
def __init__(self, input_type, allow_shuffle_vector_generation=False, save_generated_data=True, shuffle_all_inputs=True,
|
||||||
additional_letter_attributes=True, reverse_inputs=True, accent_classification=False):
|
additional_letter_attributes=True, reverse_inputs=True, accent_classification=False, number_of_syllables=False):
|
||||||
self._input_type = input_type
|
self._input_type = input_type
|
||||||
self._save_generated_data = save_generated_data
|
self._save_generated_data = save_generated_data
|
||||||
self._allow_shuffle_vector_generation = allow_shuffle_vector_generation
|
self._allow_shuffle_vector_generation = allow_shuffle_vector_generation
|
||||||
|
@ -19,6 +20,7 @@ class Data:
|
||||||
self._additional_letter_attributes = additional_letter_attributes
|
self._additional_letter_attributes = additional_letter_attributes
|
||||||
self._reverse_inputs = reverse_inputs
|
self._reverse_inputs = reverse_inputs
|
||||||
self._accent_classification = accent_classification
|
self._accent_classification = accent_classification
|
||||||
|
self._number_of_syllables = number_of_syllables
|
||||||
|
|
||||||
self.x_train = None
|
self.x_train = None
|
||||||
self.x_other_features_train = None
|
self.x_other_features_train = None
|
||||||
|
@ -88,7 +90,8 @@ class Data:
|
||||||
# functions for creating X and y from content
|
# functions for creating X and y from content
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _read_content(content_path):
|
def _read_content(content_path):
|
||||||
with open(content_path) as f:
|
# with open(content_path) as f:
|
||||||
|
with codecs.open(content_path, encoding='utf8') as f:
|
||||||
content = f.readlines()
|
content = f.readlines()
|
||||||
return [x.split('\t') for x in content]
|
return [x.split('\t') for x in content]
|
||||||
|
|
||||||
|
@ -261,9 +264,9 @@ class Data:
|
||||||
raise ValueError('No input_type provided. It could be \'l\', \'s\' or \'sl\'.')
|
raise ValueError('No input_type provided. It could be \'l\', \'s\' or \'sl\'.')
|
||||||
y = self._y_output(content, max_num_vowels, vowels, accentuated_vowels)
|
y = self._y_output(content, max_num_vowels, vowels, accentuated_vowels)
|
||||||
|
|
||||||
print('CREATING OTHER FEATURES...')
|
# print('CREATING OTHER FEATURES...')
|
||||||
x_other_features = self._create_x_features(content, feature_dictionary)
|
x_other_features = self._create_x_features(content, feature_dictionary, vowels)
|
||||||
print('OTHER FEATURES CREATED!')
|
# print('OTHER FEATURES CREATED!')
|
||||||
|
|
||||||
if self._shuffle_all_inputs:
|
if self._shuffle_all_inputs:
|
||||||
print('SHUFFELING INPUTS...')
|
print('SHUFFELING INPUTS...')
|
||||||
|
@ -347,7 +350,7 @@ class Data:
|
||||||
split = min(split_options, key=lambda x: x[1])
|
split = min(split_options, key=lambda x: x[1])
|
||||||
return consonants[:split[0] + 1], consonants[split[0] + 1:]
|
return consonants[:split[0] + 1], consonants[split[0] + 1:]
|
||||||
|
|
||||||
def _create_x_features(self, content, feature_dictionary):
|
def _create_x_features(self, content, feature_dictionary, vowels):
|
||||||
content = content
|
content = content
|
||||||
x_other_features = []
|
x_other_features = []
|
||||||
for el in content:
|
for el in content:
|
||||||
|
@ -364,6 +367,14 @@ class Data:
|
||||||
x_el_other_features.append(0)
|
x_el_other_features.append(0)
|
||||||
else:
|
else:
|
||||||
x_el_other_features.extend([0] * feature[0])
|
x_el_other_features.extend([0] * feature[0])
|
||||||
|
if self._number_of_syllables:
|
||||||
|
list_of_letters = list(el[0])
|
||||||
|
num_of_vowels = 0
|
||||||
|
for i in range(len(list_of_letters)):
|
||||||
|
if self._is_vowel(list(el[0]), i, vowels):
|
||||||
|
num_of_vowels += 1
|
||||||
|
x_el_other_features.append(num_of_vowels)
|
||||||
|
|
||||||
x_other_features.append(x_el_other_features)
|
x_other_features.append(x_el_other_features)
|
||||||
return np.array(x_other_features)
|
return np.array(x_other_features)
|
||||||
|
|
||||||
|
@ -652,6 +663,60 @@ class Data:
|
||||||
def _get_nonresonant_silent_consonants():
|
def _get_nonresonant_silent_consonants():
|
||||||
return ['p', 't', 's', 'š', 'č', 'k', 'f', 'h', 'c']
|
return ['p', 't', 's', 'š', 'č', 'k', 'f', 'h', 'c']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _create_slovene_feature_dictionary():
|
||||||
|
# old: http://nl.ijs.si/ME/Vault/V3/msd/html/
|
||||||
|
# new: http://nl.ijs.si/ME/V4/msd/html/
|
||||||
|
# changes: http://nl.ijs.si/jos/msd/html-en/msd.diffs.html
|
||||||
|
return [[21,
|
||||||
|
'P',
|
||||||
|
['p', 's'],
|
||||||
|
['n', 'p', 's'],
|
||||||
|
['m', 'z', 's'],
|
||||||
|
['e', 'd', 'm'],
|
||||||
|
['i', 'r', 'd', 't', 'm', 'o'],
|
||||||
|
['-', 'n', 'd']],
|
||||||
|
[3, 'V', ['p', 'd']],
|
||||||
|
[1, 'M'],
|
||||||
|
[21,
|
||||||
|
'K',
|
||||||
|
['b'],
|
||||||
|
['-', 'g', 'v', 'd'],
|
||||||
|
['m', 'z', 's'],
|
||||||
|
['e', 'd', 'm'],
|
||||||
|
['i', 'r', 'd', 't', 'm', 'o'],
|
||||||
|
['-', 'n', 'd']],
|
||||||
|
[17,
|
||||||
|
'S',
|
||||||
|
['o'],
|
||||||
|
['m', 'z', 's'],
|
||||||
|
['e', 'd', 'm'],
|
||||||
|
['i', 'r', 'd', 't', 'm', 'o'],
|
||||||
|
['-', 'n', 'd']],
|
||||||
|
[40,
|
||||||
|
'Z',
|
||||||
|
['o', 's', 'k', 'z', 'p', 'c', 'v', 'n', 'l'],
|
||||||
|
['-', 'p', 'd', 't'],
|
||||||
|
['-', 'm', 'z', 's'],
|
||||||
|
['-', 'e', 'd', 'm'],
|
||||||
|
['-', 'i', 'r', 'd', 't', 'm', 'o'],
|
||||||
|
['-', 'e', 'd', 'm'],
|
||||||
|
['-', 'm', 'z', 's'],
|
||||||
|
['-', 'k', 'z']],
|
||||||
|
[1, 'L'],
|
||||||
|
[5, 'R', ['s'], ['n', 'r', 's']],
|
||||||
|
[7, 'D', ['-', 'r', 'd', 't', 'm', 'o']],
|
||||||
|
[24,
|
||||||
|
'G',
|
||||||
|
['g'],
|
||||||
|
['-'],
|
||||||
|
['n', 'm', 'd', 's', 'p', 'g'],
|
||||||
|
['-', 'p', 'd', 't'],
|
||||||
|
['-', 'e', 'm', 'd'],
|
||||||
|
['-', 'm', 'z', 's'],
|
||||||
|
['-', 'n', 'd']]
|
||||||
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _create_feature_dictionary():
|
def _create_feature_dictionary():
|
||||||
# old: http://nl.ijs.si/ME/Vault/V3/msd/html/
|
# old: http://nl.ijs.si/ME/Vault/V3/msd/html/
|
||||||
|
|
1204
sloleks_accetuation.ipynb
Normal file
1204
sloleks_accetuation.ipynb
Normal file
File diff suppressed because one or more lines are too long
108
workbench.py
108
workbench.py
|
@ -1,26 +1,17 @@
|
||||||
|
|
||||||
# coding: utf-8
|
|
||||||
|
|
||||||
# In[1]:
|
|
||||||
|
|
||||||
|
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
# text in Western (Windows 1252)
|
# text in Western (Windows 1252)
|
||||||
|
|
||||||
|
import pickle
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# import StringIO
|
from keras import optimizers
|
||||||
import math
|
from keras.models import Model
|
||||||
from keras.models import Sequential
|
from keras.layers import Dense, Dropout, Input
|
||||||
from keras.layers import Dense, Dropout, Merge
|
|
||||||
from keras.layers.merge import concatenate
|
from keras.layers.merge import concatenate
|
||||||
from keras import regularizers
|
|
||||||
from keras.layers.convolutional import Conv1D
|
from keras.layers.convolutional import Conv1D
|
||||||
from keras.layers.convolutional import MaxPooling1D
|
from keras.layers.convolutional import MaxPooling1D
|
||||||
from keras.constraints import maxnorm
|
|
||||||
from keras.layers import Flatten
|
from keras.layers import Flatten
|
||||||
from keras.optimizers import SGD
|
# from keras import backend as Input
|
||||||
from keras.models import load_model
|
|
||||||
np.random.seed(7)
|
np.random.seed(7)
|
||||||
|
|
||||||
# get_ipython().magic('run ../../../prepare_data.py')
|
# get_ipython().magic('run ../../../prepare_data.py')
|
||||||
|
@ -34,39 +25,68 @@ from prepare_data import *
|
||||||
# X_train, X_other_features_train, y_train, X_validate, X_other_features_validate, y_validate = generate_full_matrix_inputs()
|
# X_train, X_other_features_train, y_train, X_validate, X_other_features_validate, y_validate = generate_full_matrix_inputs()
|
||||||
# save_inputs('../../internal_representations/inputs/shuffeled_matrix_train_inputs_other_features_output_11.h5', X_train, y_train, other_features = X_other_features_train)
|
# save_inputs('../../internal_representations/inputs/shuffeled_matrix_train_inputs_other_features_output_11.h5', X_train, y_train, other_features = X_other_features_train)
|
||||||
# save_inputs('../../internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', X_validate, y_validate, other_features = X_other_features_validate)
|
# save_inputs('../../internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', X_validate, y_validate, other_features = X_other_features_validate)
|
||||||
X_train, X_other_features_train, y_train = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_train_inputs_other_features_output_11.h5', other_features=True)
|
# X_train, X_other_features_train, y_train = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_train_inputs_other_features_output_11.h5', other_features=True)
|
||||||
X_validate, X_other_features_validate, y_validate = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', other_features=True)
|
# X_validate, X_other_features_validate, y_validate = load_inputs('cnn/internal_representations/inputs/shuffeled_matrix_validate_inputs_other_features_output_11.h5', other_features=True)
|
||||||
|
data = Data('l', save_generated_data=False, number_of_syllables=True)
|
||||||
|
data.generate_data('letters_word_accetuation_train',
|
||||||
|
'letters_word_accetuation_test',
|
||||||
|
'letters_word_accetuation_validate', content_name='SlovarIJS_BESEDE_utf8.lex',
|
||||||
|
content_shuffle_vector='content_shuffle_vector', shuffle_vector='shuffle_vector',
|
||||||
|
inputs_location='', content_location='')
|
||||||
|
|
||||||
num_examples = len(X_train) # training set size
|
|
||||||
nn_output_dim = 11
|
num_examples = len(data.x_train) # training set size
|
||||||
|
nn_output_dim = 10
|
||||||
nn_hdim = 516
|
nn_hdim = 516
|
||||||
|
batch_size = 16
|
||||||
word_processor = Sequential()
|
# actual_epoch = 1
|
||||||
word_processor.add(Conv1D(43, (3), input_shape=(23, 43), padding='same', activation='relu'))
|
actual_epoch = 60
|
||||||
word_processor.add(Conv1D(43, (3), padding='same', activation='relu'))
|
# num_fake_epoch = 2
|
||||||
word_processor.add(MaxPooling1D(pool_size=2))
|
num_fake_epoch = 20
|
||||||
word_processor.add(Flatten())
|
|
||||||
word_processor.add(Dense(516, activation='relu', kernel_constraint=maxnorm(3)))
|
|
||||||
|
|
||||||
metadata_processor = Sequential()
|
|
||||||
metadata_processor.add(Dense(256, input_dim=167, activation='relu'))
|
|
||||||
|
|
||||||
model = Sequential()
|
|
||||||
model.add(Merge([word_processor, metadata_processor], mode='concat')) # Merge is your sensor fusion buddy
|
|
||||||
model.add(Dense(1024, input_dim=(516 + 256), activation='relu'))
|
|
||||||
model.add(Dropout(0.3))
|
|
||||||
model.add(Dense(1024, input_dim=(516 + 256), activation='relu'))
|
|
||||||
model.add(Dropout(0.2))
|
|
||||||
model.add(Dense(nn_output_dim, activation='sigmoid'))
|
|
||||||
|
|
||||||
|
|
||||||
# In[10]:
|
|
||||||
|
|
||||||
|
|
||||||
# epochs = 5
|
conv_input_shape=(23, 36)
|
||||||
# lrate = 0.1
|
othr_input = (141, )
|
||||||
# decay = lrate/epochs
|
|
||||||
# sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=False)
|
conv_input = Input(shape=conv_input_shape, name='conv_input')
|
||||||
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
|
x_conv = Conv1D(133, (3), padding='same', activation='relu')(conv_input)
|
||||||
model.fit([X_train, X_other_features_train], y_train, validation_data=([X_validate, X_other_features_validate], y_validate), epochs=10, batch_size=10)
|
x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv)
|
||||||
model.save('v1_1.h5')
|
x_conv = MaxPooling1D(pool_size=2)(x_conv)
|
||||||
|
x_conv = Flatten()(x_conv)
|
||||||
|
|
||||||
|
othr_input = Input(shape=othr_input, name='othr_input')
|
||||||
|
|
||||||
|
x = concatenate([x_conv, othr_input])
|
||||||
|
# x = Dense(1024, input_dim=(516 + 256), activation='relu')(x)
|
||||||
|
x = Dense(256, activation='relu')(x)
|
||||||
|
x = Dropout(0.3)(x)
|
||||||
|
x = Dense(256, activation='relu')(x)
|
||||||
|
x = Dropout(0.3)(x)
|
||||||
|
x = Dense(256, activation='relu')(x)
|
||||||
|
x = Dropout(0.2)(x)
|
||||||
|
x = Dense(nn_output_dim, activation='sigmoid')(x)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
model = Model(inputs=[conv_input, othr_input], outputs=x)
|
||||||
|
opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
|
||||||
|
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy,])
|
||||||
|
# model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
|
||||||
|
|
||||||
|
|
||||||
|
history = model.fit_generator(data.generator('train', batch_size, content_name='SlovarIJS_BESEDE_utf8.lex', content_location=''),
|
||||||
|
data.x_train.shape[0]/(batch_size * num_fake_epoch),
|
||||||
|
epochs=actual_epoch*num_fake_epoch,
|
||||||
|
validation_data=data.generator('test', batch_size, content_name='SlovarIJS_BESEDE_utf8.lex', content_location=''),
|
||||||
|
validation_steps=data.x_test.shape[0]/(batch_size * num_fake_epoch),
|
||||||
|
verbose=2
|
||||||
|
)
|
||||||
|
|
||||||
|
name = '60_epoch'
|
||||||
|
model.save(name + '.h5')
|
||||||
|
output = open(name + '_history.pkl', 'wb')
|
||||||
|
pickle.dump(history.history, output)
|
||||||
|
output.close()
|
||||||
|
|
3
workbench.sh
Normal file
3
workbench.sh
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#export KERAS_BACKEND=theano
|
||||||
|
THEANO_FLAGS='mode=FAST_RUN,device=gpu,floatX=float32,nvcc.flags=-D_FORCE_INLINES' python3 workbench.py
|
14
workbench.xrsl
Normal file
14
workbench.xrsl
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
&
|
||||||
|
(jobName="accetuation_nn")
|
||||||
|
(executable="workbench.sh")
|
||||||
|
(inputfiles=
|
||||||
|
("workbench.py" "")("prepare_data.py" "")("content_shuffle_vector.h5" "cnn/internal_representations/inputs/content_shuffle_vector.h5")("shuffle_vector_test.h5" "cnn/internal_representations/inputs/shuffle_vector_test.h5")("shuffle_vector_train.h5" "cnn/internal_representations/inputs/shuffle_vector_train.h5")("shuffle_vector_validate.h5" "cnn/internal_representations/inputs/shuffle_vector_validate.h5")("SlovarIJS_BESEDE_utf8.lex" "data/SlovarIJS_BESEDE_utf8.lex")
|
||||||
|
)
|
||||||
|
(outputfiles=("60_epoch.h5" "")("workbench.py" "")("workbench.sh" "")("60_epoch_history.pkl" "")
|
||||||
|
)
|
||||||
|
(stdout="out.txt")
|
||||||
|
(stderr="err.txt")
|
||||||
|
(gmlog="gmlog")
|
||||||
|
(runtimeenvironment="APPS/BASE/THEANO-GPU-0.9")
|
||||||
|
(gridTime=1000)
|
||||||
|
(memory=12000)
|
Loading…
Reference in New Issue
Block a user