Accentuation on sloleks
This commit is contained in:
parent
d4e6e1b222
commit
9f6e5b2752
|
@ -2,13 +2,15 @@
|
|||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment="">
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/sloleks_accentuation2.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis_results/onedirectional_input/ensemble_errors.pkl" afterPath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis_results/onedirectional_input/ensemble_errors.pkl" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis_results/onedirectional_input/ensemble_predictions.pkl" afterPath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis_results/onedirectional_input/ensemble_predictions.pkl" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/accent_classification/error_analysis.ipynb" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py" afterPath="$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py" afterPath="$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py" afterPath="$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/bidirectional_error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/accent_classification/bidirectional_error_analysis.ipynb" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/bidirectional_error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/bidirectional_error_analysis.ipynb" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/run_multiple_files.py" afterPath="$PROJECT_DIR$/run_multiple_files.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/pattern_repetition.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/pattern_repetition.ipynb" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/sloleks_accetuation.ipynb" afterPath="$PROJECT_DIR$/sloleks_accetuation.ipynb" />
|
||||
</list>
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
<option name="TRACKING_ENABLED" value="true" />
|
||||
|
@ -35,19 +37,19 @@
|
|||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true">
|
||||
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="311">
|
||||
<caret line="1055" column="31" lean-forward="true" selection-start-line="1055" selection-start-column="31" selection-end-line="1055" selection-end-column="31" />
|
||||
<state relative-caret-position="-221">
|
||||
<caret line="1570" column="23" lean-forward="false" selection-start-line="1570" selection-start-column="23" selection-end-line="1570" selection-end-column="23" />
|
||||
<folding>
|
||||
<element signature="e#24#63#0" expanded="true" />
|
||||
<element signature="e#7927#9215#0" expanded="false" />
|
||||
<element signature="e#10871#11166#0" expanded="false" />
|
||||
<element signature="e#18472#18774#0" expanded="false" />
|
||||
<element signature="e#19747#20093#0" expanded="false" />
|
||||
<element signature="e#24509#25156#0" expanded="false" />
|
||||
<element signature="e#38245#38386#0" expanded="false" />
|
||||
<element signature="e#7950#9238#0" expanded="false" />
|
||||
<element signature="e#10894#11189#0" expanded="false" />
|
||||
<element signature="e#11294#14633#0" expanded="false" />
|
||||
<element signature="e#18495#18797#0" expanded="false" />
|
||||
<element signature="e#19770#20116#0" expanded="false" />
|
||||
<element signature="e#24532#25179#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
|
@ -56,7 +58,7 @@
|
|||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_2/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="328">
|
||||
<state relative-caret-position="358">
|
||||
<caret line="85" column="39" lean-forward="false" selection-start-line="85" selection-start-column="39" selection-end-line="85" selection-end-column="39" />
|
||||
<folding />
|
||||
</state>
|
||||
|
@ -66,8 +68,8 @@
|
|||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="400">
|
||||
<caret line="100" column="61" lean-forward="false" selection-start-line="100" selection-start-column="61" selection-end-line="100" selection-end-column="61" />
|
||||
<state relative-caret-position="426">
|
||||
<caret line="87" column="84" lean-forward="true" selection-start-line="55" selection-start-column="0" selection-end-line="87" selection-end-column="84" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
|
@ -76,7 +78,7 @@
|
|||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_2/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="414">
|
||||
<state relative-caret-position="1602">
|
||||
<caret line="100" column="47" lean-forward="false" selection-start-line="100" selection-start-column="47" selection-end-line="100" selection-end-column="53" />
|
||||
<folding />
|
||||
</state>
|
||||
|
@ -86,7 +88,7 @@
|
|||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="346">
|
||||
<state relative-caret-position="468">
|
||||
<caret line="37" column="121" lean-forward="false" selection-start-line="37" selection-start-column="121" selection-end-line="37" selection-end-column="121" />
|
||||
<folding />
|
||||
</state>
|
||||
|
@ -96,7 +98,7 @@
|
|||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="306">
|
||||
<state relative-caret-position="414">
|
||||
<caret line="34" column="52" lean-forward="false" selection-start-line="34" selection-start-column="52" selection-end-line="34" selection-end-column="52" />
|
||||
<folding />
|
||||
</state>
|
||||
|
@ -106,8 +108,8 @@
|
|||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_1/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="396">
|
||||
<caret line="33" column="41" lean-forward="false" selection-start-line="33" selection-start-column="41" selection-end-line="33" selection-end-column="41" />
|
||||
<state relative-caret-position="612">
|
||||
<caret line="45" column="17" lean-forward="false" selection-start-line="45" selection-start-column="17" selection-end-line="45" selection-end-column="17" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
|
@ -116,7 +118,7 @@
|
|||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_1/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1494">
|
||||
<state relative-caret-position="1602">
|
||||
<caret line="100" column="56" lean-forward="false" selection-start-line="100" selection-start-column="56" selection-end-line="100" selection-end-column="56" />
|
||||
<folding />
|
||||
</state>
|
||||
|
@ -126,7 +128,7 @@
|
|||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_1/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1548">
|
||||
<state relative-caret-position="1602">
|
||||
<caret line="100" column="48" lean-forward="false" selection-start-line="100" selection-start-column="48" selection-end-line="100" selection-end-column="48" />
|
||||
<folding />
|
||||
</state>
|
||||
|
@ -146,8 +148,8 @@
|
|||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_3/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="378">
|
||||
<caret line="32" column="45" lean-forward="false" selection-start-line="32" selection-start-column="45" selection-end-line="32" selection-end-column="45" />
|
||||
<state relative-caret-position="1170">
|
||||
<caret line="76" column="84" lean-forward="false" selection-start-line="38" selection-start-column="0" selection-end-line="76" selection-end-column="84" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
|
@ -156,7 +158,7 @@
|
|||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_3/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="342">
|
||||
<state relative-caret-position="450">
|
||||
<caret line="36" column="47" lean-forward="false" selection-start-line="36" selection-start-column="47" selection-end-line="36" selection-end-column="47" />
|
||||
<folding />
|
||||
</state>
|
||||
|
@ -166,8 +168,8 @@
|
|||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_3/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="306">
|
||||
<caret line="37" column="40" lean-forward="false" selection-start-line="37" selection-start-column="40" selection-end-line="37" selection-end-column="40" />
|
||||
<state relative-caret-position="1368">
|
||||
<caret line="87" column="84" lean-forward="false" selection-start-line="41" selection-start-column="0" selection-end-line="87" selection-end-column="84" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
|
@ -176,8 +178,8 @@
|
|||
<file leaf-file-name="run_multiple_files.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/run_multiple_files.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="180">
|
||||
<caret line="10" column="26" lean-forward="false" selection-start-line="10" selection-start-column="26" selection-end-line="10" selection-end-column="26" />
|
||||
<state relative-caret-position="198">
|
||||
<caret line="11" column="26" lean-forward="false" selection-start-line="11" selection-start-column="26" selection-end-line="11" selection-end-column="26" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
|
@ -211,7 +213,19 @@
|
|||
<entry file="file://$PROJECT_DIR$/sloleks_accentuation.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="180">
|
||||
<caret line="16" column="53" lean-forward="false" selection-start-line="16" selection-start-column="53" selection-end-line="16" selection-end-column="53" />
|
||||
<caret line="16" column="28" lean-forward="true" selection-start-line="16" selection-start-column="28" selection-end-line="16" selection-end-column="28" />
|
||||
<folding>
|
||||
<element signature="e#24#63#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="sloleks_accentuation2.py" pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/sloleks_accentuation2.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="454">
|
||||
<caret line="48" column="61" lean-forward="true" selection-start-line="48" selection-start-column="61" selection-end-line="48" selection-end-column="61" />
|
||||
<folding>
|
||||
<element signature="e#24#63#0" expanded="true" />
|
||||
</folding>
|
||||
|
@ -244,7 +258,7 @@
|
|||
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="418">
|
||||
<state relative-caret-position="1512">
|
||||
<caret line="95" column="55" lean-forward="false" selection-start-line="95" selection-start-column="55" selection-end-line="95" selection-end-column="55" />
|
||||
<folding />
|
||||
</state>
|
||||
|
@ -262,36 +276,36 @@
|
|||
</component>
|
||||
<component name="FindInProjectRecents">
|
||||
<findStrings>
|
||||
<find>bidirectional_basic_input</find>
|
||||
<find>_bidirectional_basic_input</find>
|
||||
<find>shuffeling</find>
|
||||
<find>generate_data</find>
|
||||
<find>_generate_inputs</find>
|
||||
<find>content_shuffle_vector_path</find>
|
||||
<find>content_shuffle_vector_location</find>
|
||||
<find>_shuffle_all_inputs</find>
|
||||
<find>_generator_instance</find>
|
||||
<find>_x_letter_input</find>
|
||||
<find>_generate_x_and_y</find>
|
||||
<find>content</find>
|
||||
<find>number_of_syllables</find>
|
||||
<find>_create_syllables</find>
|
||||
<find>index</find>
|
||||
<find>x_sy</find>
|
||||
<find>_letter_generator</find>
|
||||
<find>translator</find>
|
||||
<find>_bidirectional_architectural_input</find>
|
||||
<find>print</find>
|
||||
<find>_syllable_generator</find>
|
||||
<find>np.concatenate</find>
|
||||
<find>prepare_data</find>
|
||||
<find>assign_stress_locations</find>
|
||||
<find>test_accuracy</find>
|
||||
<find>test_acc</find>
|
||||
<find>test_set</find>
|
||||
<find>reverse_inputs</find>
|
||||
<find>accent_classification</find>
|
||||
<find>test_type</find>
|
||||
<find>get_word_length</find>
|
||||
<find>input_words</find>
|
||||
<find>content</find>
|
||||
<find>codecs</find>
|
||||
<find>data</find>
|
||||
<find>load_model</find>
|
||||
<find>accentuate_word</find>
|
||||
<find>get_word_le</find>
|
||||
<find>load_location_models</find>
|
||||
<find>get_ensemble_location_predictions</find>
|
||||
<find>reverse_inputs</find>
|
||||
<find>_x_letter_input</find>
|
||||
<find>reverse</find>
|
||||
<find>print(</find>
|
||||
<find>count_vowels</find>
|
||||
<find>count</find>
|
||||
<find>sylla</find>
|
||||
<find># word</find>
|
||||
<find>accented_word</find>
|
||||
<find>get_ensemble_type_predictions</find>
|
||||
<find>rever</find>
|
||||
<find>accentuate_wo</find>
|
||||
</findStrings>
|
||||
</component>
|
||||
<component name="Git.Settings">
|
||||
|
@ -337,6 +351,7 @@
|
|||
<option value="$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_3/workbench.py" />
|
||||
<option value="$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_3/workbench.py" />
|
||||
<option value="$PROJECT_DIR$/prepare_data.py" />
|
||||
<option value="$PROJECT_DIR$/sloleks_accentuation2.py" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
|
@ -361,7 +376,7 @@
|
|||
<foldersAlwaysOnTop value="true" />
|
||||
</navigator>
|
||||
<panes>
|
||||
<pane id="Scope" />
|
||||
<pane id="Scratches" />
|
||||
<pane id="ProjectPane">
|
||||
<subPane>
|
||||
<PATH>
|
||||
|
@ -374,63 +389,9 @@
|
|||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
<PATH>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="accetuation" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="accetuation" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="cnn" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
<PATH>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="accetuation" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="accetuation" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="cnn" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="accent_classification" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
<PATH>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="accetuation" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="accetuation" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="cnn" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="accent_classification" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="syllabled_letters" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
</subPane>
|
||||
</pane>
|
||||
<pane id="Scratches" />
|
||||
<pane id="Scope" />
|
||||
</panes>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
|
@ -439,11 +400,11 @@
|
|||
</component>
|
||||
<component name="RecentsManager">
|
||||
<key name="CopyFile.RECENT_KEYS">
|
||||
<recent name="$PROJECT_DIR$" />
|
||||
<recent name="$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0" />
|
||||
<recent name="$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0" />
|
||||
<recent name="$PROJECT_DIR$/cnn/accent_classification/letters/v3_0" />
|
||||
<recent name="$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_2" />
|
||||
<recent name="$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2" />
|
||||
</key>
|
||||
</component>
|
||||
<component name="RunManager" selected="Python.TEST">
|
||||
|
@ -762,7 +723,6 @@
|
|||
<watches-manager />
|
||||
</component>
|
||||
<component name="editorHistoryManager">
|
||||
<entry file="file://$PROJECT_DIR$/theanoTest.py" />
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.py" />
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.ipynb" />
|
||||
<entry file="file://$PROJECT_DIR$/hyphenation">
|
||||
|
@ -878,21 +838,10 @@
|
|||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/sloleks_accentuation.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="180">
|
||||
<caret line="16" column="53" lean-forward="false" selection-start-line="16" selection-start-column="53" selection-end-line="16" selection-end-column="53" />
|
||||
<folding>
|
||||
<element signature="e#24#63#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_1/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="414">
|
||||
<caret line="34" column="66" lean-forward="false" selection-start-line="34" selection-start-column="20" selection-end-line="34" selection-end-column="66" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
|
@ -900,7 +849,6 @@
|
|||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1476">
|
||||
<caret line="93" column="84" lean-forward="false" selection-start-line="41" selection-start-column="0" selection-end-line="93" selection-end-column="84" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
|
@ -928,7 +876,6 @@
|
|||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="828">
|
||||
<caret line="57" column="0" lean-forward="false" selection-start-line="57" selection-start-column="0" selection-end-line="57" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
|
@ -960,7 +907,6 @@
|
|||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="612">
|
||||
<caret line="45" column="0" lean-forward="false" selection-start-line="45" selection-start-column="0" selection-end-line="45" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
|
@ -968,7 +914,6 @@
|
|||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="324">
|
||||
<caret line="29" column="42" lean-forward="false" selection-start-line="29" selection-start-column="17" selection-end-line="29" selection-end-column="42" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
|
@ -983,7 +928,6 @@
|
|||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="774">
|
||||
<caret line="54" column="26" lean-forward="false" selection-start-line="54" selection-start-column="0" selection-end-line="54" selection-end-column="26" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
|
@ -991,37 +935,20 @@
|
|||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="972">
|
||||
<caret line="65" column="70" lean-forward="false" selection-start-line="65" selection-start-column="70" selection-end-line="65" selection-end-column="70" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="400">
|
||||
<caret line="100" column="61" lean-forward="false" selection-start-line="100" selection-start-column="61" selection-end-line="100" selection-end-column="61" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_2/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="414">
|
||||
<state relative-caret-position="1602">
|
||||
<caret line="100" column="47" lean-forward="false" selection-start-line="100" selection-start-column="47" selection-end-line="100" selection-end-column="53" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_2/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="328">
|
||||
<caret line="85" column="39" lean-forward="false" selection-start-line="85" selection-start-column="39" selection-end-line="85" selection-end-column="39" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="418">
|
||||
<state relative-caret-position="1512">
|
||||
<caret line="95" column="55" lean-forward="false" selection-start-line="95" selection-start-column="55" selection-end-line="95" selection-end-column="55" />
|
||||
<folding />
|
||||
</state>
|
||||
|
@ -1029,7 +956,7 @@
|
|||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="346">
|
||||
<state relative-caret-position="468">
|
||||
<caret line="37" column="121" lean-forward="false" selection-start-line="37" selection-start-column="121" selection-end-line="37" selection-end-column="121" />
|
||||
<folding />
|
||||
</state>
|
||||
|
@ -1037,7 +964,7 @@
|
|||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="306">
|
||||
<state relative-caret-position="414">
|
||||
<caret line="34" column="52" lean-forward="false" selection-start-line="34" selection-start-column="52" selection-end-line="34" selection-end-column="52" />
|
||||
<folding />
|
||||
</state>
|
||||
|
@ -1051,17 +978,9 @@
|
|||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/run_multiple_files.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="180">
|
||||
<caret line="10" column="26" lean-forward="false" selection-start-line="10" selection-start-column="26" selection-end-line="10" selection-end-column="26" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_1/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1548">
|
||||
<state relative-caret-position="1602">
|
||||
<caret line="100" column="48" lean-forward="false" selection-start-line="100" selection-start-column="48" selection-end-line="100" selection-end-column="48" />
|
||||
<folding />
|
||||
</state>
|
||||
|
@ -1069,56 +988,100 @@
|
|||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_1/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1494">
|
||||
<state relative-caret-position="1602">
|
||||
<caret line="100" column="56" lean-forward="false" selection-start-line="100" selection-start-column="56" selection-end-line="100" selection-end-column="56" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_1/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="396">
|
||||
<caret line="33" column="41" lean-forward="false" selection-start-line="33" selection-start-column="41" selection-end-line="33" selection-end-column="41" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_3/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="378">
|
||||
<caret line="32" column="45" lean-forward="false" selection-start-line="32" selection-start-column="45" selection-end-line="32" selection-end-column="45" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_3/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="342">
|
||||
<state relative-caret-position="450">
|
||||
<caret line="36" column="47" lean-forward="false" selection-start-line="36" selection-start-column="47" selection-end-line="36" selection-end-column="47" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_3/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1170">
|
||||
<caret line="76" column="84" lean-forward="false" selection-start-line="38" selection-start-column="0" selection-end-line="76" selection-end-column="84" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_3/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="306">
|
||||
<caret line="37" column="40" lean-forward="false" selection-start-line="37" selection-start-column="40" selection-end-line="37" selection-end-column="40" />
|
||||
<state relative-caret-position="1368">
|
||||
<caret line="87" column="84" lean-forward="false" selection-start-line="41" selection-start-column="0" selection-end-line="87" selection-end-column="84" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_1/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="612">
|
||||
<caret line="45" column="17" lean-forward="false" selection-start-line="45" selection-start-column="17" selection-end-line="45" selection-end-column="17" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_2/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="358">
|
||||
<caret line="85" column="39" lean-forward="false" selection-start-line="85" selection-start-column="39" selection-end-line="85" selection-end-column="39" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2/workbench.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="426">
|
||||
<caret line="87" column="84" lean-forward="true" selection-start-line="55" selection-start-column="0" selection-end-line="87" selection-end-column="84" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/run_multiple_files.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="198">
|
||||
<caret line="11" column="26" lean-forward="false" selection-start-line="11" selection-start-column="26" selection-end-line="11" selection-end-column="26" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/prepare_data.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="311">
|
||||
<caret line="1055" column="31" lean-forward="true" selection-start-line="1055" selection-start-column="31" selection-end-line="1055" selection-end-column="31" />
|
||||
<state relative-caret-position="-221">
|
||||
<caret line="1570" column="23" lean-forward="false" selection-start-line="1570" selection-start-column="23" selection-end-line="1570" selection-end-column="23" />
|
||||
<folding>
|
||||
<element signature="e#24#63#0" expanded="true" />
|
||||
<element signature="e#7950#9238#0" expanded="false" />
|
||||
<element signature="e#10894#11189#0" expanded="false" />
|
||||
<element signature="e#11294#14633#0" expanded="false" />
|
||||
<element signature="e#18495#18797#0" expanded="false" />
|
||||
<element signature="e#19770#20116#0" expanded="false" />
|
||||
<element signature="e#24532#25179#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/sloleks_accentuation.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="180">
|
||||
<caret line="16" column="28" lean-forward="true" selection-start-line="16" selection-start-column="28" selection-end-line="16" selection-end-column="28" />
|
||||
<folding>
|
||||
<element signature="e#24#63#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/sloleks_accentuation2.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="454">
|
||||
<caret line="48" column="61" lean-forward="true" selection-start-line="48" selection-start-column="61" selection-end-line="48" selection-end-column="61" />
|
||||
<folding>
|
||||
<element signature="e#24#63#0" expanded="true" />
|
||||
<element signature="e#7927#9215#0" expanded="false" />
|
||||
<element signature="e#10871#11166#0" expanded="false" />
|
||||
<element signature="e#18472#18774#0" expanded="false" />
|
||||
<element signature="e#19747#20093#0" expanded="false" />
|
||||
<element signature="e#24509#25156#0" expanded="false" />
|
||||
<element signature="e#38245#38386#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
|
|
194
prepare_data.py
194
prepare_data.py
|
@ -9,6 +9,8 @@ import keras.backend as K
|
|||
import os.path
|
||||
import codecs
|
||||
|
||||
from copy import copy
|
||||
|
||||
from keras import optimizers
|
||||
from keras.models import Model
|
||||
from keras.layers import Dense, Dropout, Input
|
||||
|
@ -968,16 +970,49 @@ class Data:
|
|||
return res
|
||||
|
||||
def test_accuracy(self, predictions, x, x_other_features, y, dictionary, feature_dictionary, vowels, syllable_dictionary=None,
|
||||
threshold=0.4999955):
|
||||
threshold=0.4999955, patterns=None):
|
||||
errors = []
|
||||
num_of_pred = len(predictions)
|
||||
num_of_correct_pred = 0
|
||||
|
||||
# wrong_patterns = 0
|
||||
# wrong_pattern_prediction = 0
|
||||
for i in range(predictions.shape[0]):
|
||||
correct_prediction = True
|
||||
|
||||
round_predictions = np.zeros(predictions[i].shape)
|
||||
for j in range(len(y[i])):
|
||||
if predictions[i][j] < threshold:
|
||||
round_predictions[j] = 0.0
|
||||
else:
|
||||
round_predictions[j] = 1.0
|
||||
if (predictions[i][j] < threshold and y[i][j] == 1.0) or (predictions[i][j] >= threshold and y[i][j] == 0.0):
|
||||
correct_prediction = False
|
||||
break
|
||||
|
||||
# in_pattern = False
|
||||
# if patterns is not None:
|
||||
# test_predictions = copy(predictions[i])
|
||||
# l = self.get_word_length(x[i])
|
||||
# round_predictions = np.zeros(test_predictions.shape)
|
||||
# for j in range(len(y[i])):
|
||||
# if test_predictions[j] < threshold:
|
||||
# round_predictions[j] = 0.0
|
||||
# else:
|
||||
# round_predictions[j] = 1.0
|
||||
#
|
||||
# in_pattern = False
|
||||
# for pattern in patterns[l]:
|
||||
# if (pattern == round_predictions).all():
|
||||
# in_pattern = True
|
||||
# if not in_pattern:
|
||||
# wrong_patterns += 1
|
||||
#
|
||||
# for j in range(len(y[i])):
|
||||
# if (predictions[i][j] < threshold and y[i][j] == 1.0) or (predictions[i][j] >= threshold and y[i][j] == 0.0):
|
||||
# correct_prediction = False
|
||||
#
|
||||
# if not in_pattern and not correct_prediction:
|
||||
# wrong_pattern_prediction += 1
|
||||
# if (np.around(predictions[i]) == y[i]).all():
|
||||
if correct_prediction:
|
||||
num_of_correct_pred += 1
|
||||
|
@ -991,12 +1026,22 @@ class Data:
|
|||
errors.append([i,
|
||||
decoded_x,
|
||||
self.decode_x_other_features(feature_dictionary, [x_other_features[i]]),
|
||||
self.assign_stress_locations(decoded_x, np.around(predictions[i]), vowels, syllables=self._input_type != 'l'),
|
||||
self.assign_stress_locations(decoded_x, round_predictions, vowels, syllables=self._input_type != 'l'),
|
||||
self.assign_stress_locations(decoded_x, y[i], vowels, syllables=self._input_type != 'l')
|
||||
])
|
||||
|
||||
# print(wrong_patterns)
|
||||
# print(wrong_pattern_prediction)
|
||||
return (num_of_correct_pred / float(num_of_pred)) * 100, errors
|
||||
|
||||
# def get_word_length(self, x_el):
|
||||
# i = 0
|
||||
# for el in x_el:
|
||||
# if el == 0:
|
||||
# return i
|
||||
# i += 1
|
||||
# return 10
|
||||
|
||||
@staticmethod
|
||||
def decode_syllable_x(word_encoded, syllable_dictionary):
|
||||
word = []
|
||||
|
@ -1214,8 +1259,37 @@ class Data:
|
|||
@staticmethod
|
||||
def load_location_models(letters_path, syllables_path, syllabled_letters_path):
|
||||
############################ LOCATION ########################
|
||||
letter_location_model = load_model(letters_path, custom_objects={'actual_accuracy': actual_accuracy})
|
||||
nn_output_dim = 10
|
||||
|
||||
conv_input_shape = (23, 36)
|
||||
othr_input = (140,)
|
||||
|
||||
conv_input = Input(shape=conv_input_shape, name='conv_input')
|
||||
x_conv = Conv1D(115, (3), padding='same', activation='relu')(conv_input)
|
||||
x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv)
|
||||
x_conv = MaxPooling1D(pool_size=2)(x_conv)
|
||||
x_conv = Flatten()(x_conv)
|
||||
|
||||
othr_input = Input(shape=othr_input, name='othr_input')
|
||||
|
||||
x = concatenate([x_conv, othr_input])
|
||||
# x = Dense(1024, input_dim=(516 + 256), activation='relu')(x)
|
||||
x = Dense(256, activation='relu')(x)
|
||||
x = Dropout(0.3)(x)
|
||||
x = Dense(256, activation='relu')(x)
|
||||
x = Dropout(0.3)(x)
|
||||
x = Dense(256, activation='relu')(x)
|
||||
x = Dropout(0.3)(x)
|
||||
x = Dense(nn_output_dim, activation='sigmoid')(x)
|
||||
|
||||
letter_location_model = Model(inputs=[conv_input, othr_input], outputs=x)
|
||||
opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
|
||||
letter_location_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ])
|
||||
|
||||
|
||||
letter_location_model.load_weights(letters_path)
|
||||
|
||||
##############################################################
|
||||
# num_examples = len(data.x_train) # training set size
|
||||
nn_output_dim = 10
|
||||
|
||||
|
@ -1244,7 +1318,10 @@ class Data:
|
|||
syllable_location_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ])
|
||||
syllable_location_model.load_weights(syllables_path)
|
||||
|
||||
|
||||
#####################################################
|
||||
conv_input_shape = (10, 252)
|
||||
|
||||
othr_input = (140,)
|
||||
|
||||
conv_input = Input(shape=conv_input_shape, name='conv_input')
|
||||
|
@ -1354,6 +1431,7 @@ class Data:
|
|||
|
||||
@staticmethod
|
||||
def get_ensemble_location_predictions(input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model,
|
||||
letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,
|
||||
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary):
|
||||
batch_size = 16
|
||||
# print(tagged_input_words[pos])
|
||||
|
@ -1379,10 +1457,58 @@ class Data:
|
|||
generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, syllable_letters_translator, accented_vowels)
|
||||
syllabled_letters_location_predictions = syllabled_letters_location_model.predict_generator(generator, len(x) / (batch_size))
|
||||
|
||||
return np.mean(np.array([letter_location_predictions, syllable_location_predictions, syllabled_letters_location_predictions]), axis=0)
|
||||
############## CORRECT ORDER INPUT ##############
|
||||
data = Data('l', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False)
|
||||
x, x_other_features, fake_y = data._generate_x_and_y(dictionary, max_word, max_num_vowels, input_words, vowels, accented_vowels,
|
||||
feature_dictionary, 'who cares')
|
||||
generator = data._letter_generator(x, x_other_features, fake_y, batch_size, accented_vowels)
|
||||
letter_location_co_predictions = letter_location_co_model.predict_generator(generator, len(x) / (batch_size))
|
||||
|
||||
letter_location_co_predictions = data.reverse_predictions(letter_location_co_predictions, input_words, vowels)
|
||||
|
||||
data = Data('s', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False)
|
||||
x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
|
||||
accented_vowels, feature_dictionary, 'who cares')
|
||||
eye = np.eye(len(syllable_dictionary), dtype=int)
|
||||
generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, eye, accented_vowels)
|
||||
syllable_location_co_predictions = syllable_location_co_model.predict_generator(generator, len(x) / (batch_size))
|
||||
|
||||
syllable_location_co_predictions = data.reverse_predictions(syllable_location_co_predictions, input_words, vowels)
|
||||
|
||||
data = Data('sl', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False)
|
||||
x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
|
||||
accented_vowels, feature_dictionary, 'who cares')
|
||||
max_syllable = data._get_max_syllable(syllable_dictionary)
|
||||
syllable_letters_translator = data._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels)
|
||||
generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, syllable_letters_translator, accented_vowels)
|
||||
syllabled_letters_location_co_predictions = syllabled_letters_location_co_model.predict_generator(generator, len(x) / (batch_size))
|
||||
|
||||
syllabled_letters_location_co_predictions = data.reverse_predictions(syllabled_letters_location_co_predictions, input_words, vowels)
|
||||
|
||||
return np.mean(np.array([letter_location_predictions, syllable_location_predictions, syllabled_letters_location_predictions,
|
||||
letter_location_co_predictions, syllable_location_co_predictions, syllabled_letters_location_co_predictions]), axis=0)
|
||||
|
||||
def count_syllables(self, word, vowels):
|
||||
j = 0
|
||||
num_vowels = 0
|
||||
for j in range(len(word)):
|
||||
if self._is_vowel(word, j, vowels):
|
||||
num_vowels += 1
|
||||
return num_vowels
|
||||
|
||||
def reverse_predictions(self, predictions, words, vowels):
|
||||
new_predictions = np.zeros(predictions.shape, dtype='float32')
|
||||
for i in range(len(predictions)):
|
||||
word_len = self.count_syllables(words[i][0], vowels)
|
||||
|
||||
for k in range(word_len):
|
||||
new_predictions[i][k] += predictions[i][word_len - 1 - k]
|
||||
|
||||
return new_predictions
|
||||
|
||||
@staticmethod
|
||||
def get_ensemble_type_predictions(input_words, location_y, letter_type_model, syllable_type_model, syllabled_letter_type_model,
|
||||
letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
|
||||
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary):
|
||||
batch_size = 16
|
||||
y_array = np.asarray(location_y)
|
||||
|
@ -1409,7 +1535,57 @@ class Data:
|
|||
generator = data._syllable_generator(x, x_other_features, location_y, batch_size, syllable_letters_translator, accented_vowels)
|
||||
syllabled_letter_type_predictions = syllabled_letter_type_model.predict_generator(generator, accentuation_length / batch_size)
|
||||
|
||||
return np.mean(np.array([letter_type_predictions, syllable_type_predictions, syllabled_letter_type_predictions]), axis=0)
|
||||
############## CORRECT ORDER INPUT ##############
|
||||
location_y = data.reverse_predictions(location_y, input_words, vowels)
|
||||
|
||||
data = Data('l', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False)
|
||||
x, x_other_features, fake_y = data._generate_x_and_y(dictionary, max_word, max_num_vowels, input_words, vowels, accented_vowels,
|
||||
feature_dictionary, 'who cares')
|
||||
generator = data._letter_generator(x, x_other_features, location_y, batch_size, accented_vowels)
|
||||
letter_type_co_predictions = letter_type_co_model.predict_generator(generator, accentuation_length / (batch_size))
|
||||
|
||||
data.reorder_correct_direction_inputs(letter_type_co_predictions, location_y)
|
||||
|
||||
data = Data('s', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False)
|
||||
x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
|
||||
accented_vowels, feature_dictionary, 'who cares')
|
||||
eye = np.eye(len(syllable_dictionary), dtype=int)
|
||||
generator = data._syllable_generator(x, x_other_features, location_y, batch_size, eye, accented_vowels)
|
||||
syllable_type_co_predictions = syllable_type_co_model.predict_generator(generator, accentuation_length / (batch_size))
|
||||
|
||||
data.reorder_correct_direction_inputs(syllable_type_co_predictions, location_y)
|
||||
|
||||
data = Data('sl', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False)
|
||||
x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
|
||||
accented_vowels, feature_dictionary, 'who cares')
|
||||
max_syllable = data._get_max_syllable(syllable_dictionary)
|
||||
syllable_letters_translator = data._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels)
|
||||
generator = data._syllable_generator(x, x_other_features, location_y, batch_size, syllable_letters_translator, accented_vowels)
|
||||
syllabled_letter_type_co_predictions = syllabled_letter_type_co_model.predict_generator(generator, accentuation_length / batch_size)
|
||||
|
||||
data.reorder_correct_direction_inputs(syllabled_letter_type_co_predictions, location_y)
|
||||
|
||||
return np.mean(np.array([letter_type_predictions, syllable_type_predictions, syllabled_letter_type_predictions,
|
||||
letter_type_co_predictions, syllable_type_co_predictions, syllabled_letter_type_co_predictions]), axis=0)
|
||||
|
||||
def reorder_correct_direction_inputs(self, predictions, y):
|
||||
pred_i = 0
|
||||
for i in range(len(y)):
|
||||
num_accented_syllables = 0
|
||||
for el in y[i]:
|
||||
if el > 0:
|
||||
num_accented_syllables += 1
|
||||
if num_accented_syllables > 1:
|
||||
min_i = pred_i
|
||||
max_i = pred_i + num_accented_syllables - 1
|
||||
while (max_i > min_i):
|
||||
min_pred = copy(predictions[min_i])
|
||||
max_pred = copy(predictions[max_i])
|
||||
predictions[min_i] = max_pred
|
||||
predictions[max_i] = min_pred
|
||||
min_i += 1
|
||||
max_i -= 1
|
||||
pred_i += num_accented_syllables
|
||||
|
||||
def assign_location_stress(self, word, locations, vowels):
|
||||
# word = list(word)
|
||||
|
@ -1449,12 +1625,17 @@ class Data:
|
|||
return ''.join(word_list)
|
||||
|
||||
def accentuate_word(self, input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model,
|
||||
letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,
|
||||
letter_type_model, syllable_type_model, syllabled_letter_type_model,
|
||||
letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
|
||||
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary):
|
||||
predictions = self.get_ensemble_location_predictions(input_words, letter_location_model, syllable_location_model,
|
||||
syllabled_letters_location_model,
|
||||
letter_location_co_model, syllable_location_co_model,
|
||||
syllabled_letters_location_co_model,
|
||||
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary,
|
||||
syllable_dictionary)
|
||||
#print(predictions)
|
||||
if 'A' not in vowels:
|
||||
vowels.extend(['A', 'E', 'I', 'O', 'U'])
|
||||
location_accented_words = [self.assign_location_stress(input_words[i][0][::-1], self.decode_y(predictions[i]), vowels)[::-1] for i in
|
||||
|
@ -1463,6 +1644,7 @@ class Data:
|
|||
location_y = np.around(predictions)
|
||||
type_predictions = self.get_ensemble_type_predictions(input_words, location_y, letter_type_model, syllable_type_model,
|
||||
syllabled_letter_type_model,
|
||||
letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
|
||||
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary,
|
||||
syllable_dictionary)
|
||||
|
||||
|
|
70
sloleks_accentuation2.py
Normal file
70
sloleks_accentuation2.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import numpy as np
|
||||
from keras.models import load_model
|
||||
import sys
|
||||
import pickle
|
||||
import time
|
||||
|
||||
from prepare_data import *
|
||||
|
||||
np.random.seed(7)
|
||||
|
||||
data = Data('l', shuffle_all_inputs=False)
|
||||
content = data._read_content('data/SlovarIJS_BESEDE_utf8.lex')
|
||||
dictionary, max_word, max_num_vowels, vowels, accented_vowels = data._create_dict(content)
|
||||
feature_dictionary = data._create_slovene_feature_dictionary()
|
||||
syllable_dictionary = data._create_syllables_dictionary(content, vowels)
|
||||
accented_vowels = ['ŕ', 'á', 'ä', 'é', 'ë', 'ě', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü']
|
||||
|
||||
data = Data('l', shuffle_all_inputs=False)
|
||||
letter_location_model, syllable_location_model, syllabled_letters_location_model = data.load_location_models(
|
||||
'cnn/word_accetuation/cnn_dictionary/v5_3/20_final_epoch.h5',
|
||||
'cnn/word_accetuation/syllables/v3_3/20_final_epoch.h5',
|
||||
'cnn/word_accetuation/syllabled_letters/v3_3/20_final_epoch.h5')
|
||||
|
||||
letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model = data.load_location_models(
|
||||
'cnn/word_accetuation/cnn_dictionary/v5_2/20_final_epoch.h5',
|
||||
'cnn/word_accetuation/syllables/v3_2/20_final_epoch.h5',
|
||||
'cnn/word_accetuation/syllabled_letters/v3_2/20_final_epoch.h5')
|
||||
|
||||
letter_type_model, syllable_type_model, syllabled_letter_type_model = data.load_type_models(
|
||||
'cnn/accent_classification/letters/v3_1/20_final_epoch.h5',
|
||||
'cnn/accent_classification/syllables/v2_1/20_final_epoch.h5',
|
||||
'cnn/accent_classification/syllabled_letters/v2_1/20_final_epoch.h5')
|
||||
|
||||
letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model = data.load_type_models(
|
||||
'cnn/accent_classification/letters/v3_0/20_final_epoch.h5',
|
||||
'cnn/accent_classification/syllables/v2_0/20_final_epoch.h5',
|
||||
'cnn/accent_classification/syllabled_letters/v2_0/20_final_epoch.h5')
|
||||
|
||||
data = Data('s', shuffle_all_inputs=False)
|
||||
new_content = data._read_content('data/sloleks-sl_v1.2.tbl')
|
||||
|
||||
print('Commencing accentuator!')
|
||||
|
||||
rate = 100000
|
||||
start_timer = time.time()
|
||||
with open("data/new_sloleks/new_sloleks.tab", "a") as myfile:
|
||||
for index in range(0, len(new_content), rate):
|
||||
if index+rate >= len(new_content):
|
||||
words = [[el[0], '', el[2], el[0]] for el in new_content][index:len(new_content)]
|
||||
else:
|
||||
words = [[el[0], '', el[2], el[0]] for el in new_content][index:index+rate]
|
||||
data = Data('l', shuffle_all_inputs=False)
|
||||
location_accented_words, accented_words = data.accentuate_word(words, letter_location_model, syllable_location_model, syllabled_letters_location_model,
|
||||
letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,
|
||||
letter_type_model, syllable_type_model, syllabled_letter_type_model,
|
||||
letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
|
||||
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)
|
||||
|
||||
res = ''
|
||||
for i in range(index, index + len(words)):
|
||||
res += new_content[i][0] + '\t' + new_content[i][1] + '\t' + new_content[i][2] + '\t' \
|
||||
+ new_content[i][3][:-1] + '\t' + location_accented_words[i-index] + '\t' + accented_words[i-index] + '\n'
|
||||
|
||||
print('Writing data from ' + str(index) + ' onward.')
|
||||
end_timer = time.time()
|
||||
print("Elapsed time: " + "{0:.2f}".format((end_timer - start_timer)/60.0) + " minutes")
|
||||
myfile.write(res)
|
|
@ -87,18 +87,6 @@
|
|||
" i += 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 98,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"feature__en_dictionary = data._create_feature_dictionary()\n",
|
||||
"feature__slo_dictionary = data._create_slovene_feature_dictionary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
|
@ -120,7 +108,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
|
@ -131,14 +119,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%run prepare_data.py\n",
|
||||
"data = Data('l', shuffle_all_inputs=False)\n",
|
||||
"data = Data('s', shuffle_all_inputs=False)\n",
|
||||
"location_accented_words, accented_words = data.accentuate_word(test_input, letter_location_model, syllable_location_model, syllabled_letters_location_model,\n",
|
||||
" letter_type_model, syllable_type_model, syllabled_letter_type_model,\n",
|
||||
" dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)"
|
||||
|
@ -146,15 +134,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['uradní', 'podatkí', 'policíje', 'kažéjo', 'ná', 'precéj', 'napeté', 'razmeré', 'v', 'piranském', 'zalivú', 'jé', 'danés', 'poročála', 'oddajá', 'dó', 'danés', 'sé', 'jé', 'zgodílo']\n",
|
||||
"['uradnî', 'podatkî', 'policíje', 'kažëjo', 'ná', 'precëj', 'napetë', 'razmerë', 'v', 'piranskëm', 'zalivú', 'jë', 'danës', 'poročála', 'oddajá', 'dó', 'danës', 'së', 'jë', 'zgodílo']\n"
|
||||
"['uradni', 'podatkí', 'policíje', 'kažéjo', 'ná', 'precéj', 'napeté', 'razmeré', 'v', 'piranském', 'zalivú', 'jé', 'danés', 'poročála', 'oddajá', 'dó', 'danés', 'sé', 'jé', 'zgodílo']\n",
|
||||
"['uradni', 'pödatki', 'polícije', 'kažëjo', 'ná', 'prëcej', 'nápete', 'räzmere', 'v', 'pîranskem', 'zálivu', 'jë', 'dánes', 'poróčala', 'öddaja', 'dó', 'dánes', 'së', 'jë', 'zgodílo']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -225,7 +213,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
|
@ -508,6 +496,134 @@
|
|||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "NameError",
|
||||
"evalue": "name 'xml_words_generator' is not defined",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-8-44b0367c6cbf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mgen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxml_words_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data/Sloleks_v1.2_p2.xml'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mword_glob_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mword_limit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mNameError\u001b[0m: name 'xml_words_generator' is not defined"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#Words proccesed: 650250\n",
|
||||
"#Word indeks: 50023\n",
|
||||
"#Word number: 50023\n",
|
||||
"\n",
|
||||
"from lxml import etree\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"gen = xml_words_generator('data/Sloleks_v1.2_p2.xml')\n",
|
||||
"word_glob_num = 0\n",
|
||||
"word_limit = 0\n",
|
||||
"iter_num = 50000\n",
|
||||
"word_index = 0\n",
|
||||
"start_timer = time.time()\n",
|
||||
"iter_index = 0\n",
|
||||
"words = []\n",
|
||||
"\n",
|
||||
"lexical_entries_load_number = 0\n",
|
||||
"lexical_entries_save_number = 0\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# INSIDE\n",
|
||||
"#word_glob_num = 1500686\n",
|
||||
"word_glob_num = 1550705\n",
|
||||
"\n",
|
||||
"#word_limit = 1500686\n",
|
||||
"word_limit = 1550705\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"iter_index = 31\n",
|
||||
"\n",
|
||||
"#done_lexical_entries = 33522\n",
|
||||
"\n",
|
||||
"with open(\"data/new_sloleks/new_sloleks.xml\", \"ab\") as myfile:\n",
|
||||
" myfile2 = open('data/new_sloleks/pa' + str(iter_index) + '.xml', 'ab')\n",
|
||||
" for event, element in etree.iterparse('data/Sloleks_v1.2_p2.xml', tag=\"LexicalEntry\", encoding=\"UTF-8\", remove_blank_text=True):\n",
|
||||
" # LOAD NEW WORDS AND ACCENTUATE THEM\n",
|
||||
" #print(\"HERE\")\n",
|
||||
" \n",
|
||||
"# if lexical_entries_save_number < done_lexical_entries:\n",
|
||||
"# next(gen)\n",
|
||||
"# #print(lexical_entries_save_number)\n",
|
||||
"# lexical_entries_save_number += 1\n",
|
||||
"# lexical_entries_load_number += 1\n",
|
||||
"# continue\n",
|
||||
" \n",
|
||||
" if word_glob_num >= word_limit:\n",
|
||||
" myfile2.close()\n",
|
||||
" myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')\n",
|
||||
" iter_index += 1\n",
|
||||
" print(\"Words proccesed: \" + str(word_glob_num))\n",
|
||||
"\n",
|
||||
" print(\"Word indeks: \" + str(word_index))\n",
|
||||
" print(\"Word number: \" + str(len(words)))\n",
|
||||
" \n",
|
||||
" #print(\"lexical_entries_load_number: \" + str(lexical_entries_load_number))\n",
|
||||
" #print(\"lexical_entries_save_number: \" + str(lexical_entries_save_number))\n",
|
||||
"\n",
|
||||
" end_timer = time.time()\n",
|
||||
" print(\"Elapsed time: \" + \"{0:.2f}\".format((end_timer - start_timer)/60.0) + \" minutes\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" word_index = 0\n",
|
||||
" words = []\n",
|
||||
"\n",
|
||||
" while len(words) < iter_num:\n",
|
||||
" try:\n",
|
||||
" words.extend(next(gen))\n",
|
||||
" lexical_entries_load_number += 1\n",
|
||||
" except:\n",
|
||||
" break\n",
|
||||
" #if word_glob_num > 1:\n",
|
||||
" # break\n",
|
||||
"\n",
|
||||
" #problem_words = words\n",
|
||||
" #break\n",
|
||||
" data = Data('l', shuffle_all_inputs=False)\n",
|
||||
" location_accented_words, accented_words = data.accentuate_word(words, letter_location_model, syllable_location_model, syllabled_letters_location_model,\n",
|
||||
" letter_type_model, syllable_type_model, syllabled_letter_type_model,\n",
|
||||
" dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)\n",
|
||||
"\n",
|
||||
" word_limit += len(words)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" # READ DATA\n",
|
||||
" for child in element:\n",
|
||||
" if child.tag == 'WordForm':\n",
|
||||
" msd = None\n",
|
||||
" word = None\n",
|
||||
" for wf in child:\n",
|
||||
" if wf.tag == 'FormRepresentation':\n",
|
||||
" new_element = etree.Element('feat')\n",
|
||||
" new_element.attrib['att']='naglasna_mesta_oblike'\n",
|
||||
" new_element.attrib['val']=location_accented_words[word_index]\n",
|
||||
" wf.append(new_element)\n",
|
||||
"\n",
|
||||
" new_element = etree.Element('feat')\n",
|
||||
" new_element.attrib['att']='naglašena_oblika'\n",
|
||||
" new_element.attrib['val']=accented_words[word_index]\n",
|
||||
" wf.append(new_element)\n",
|
||||
" word_glob_num += 1\n",
|
||||
" word_index += 1\n",
|
||||
"\n",
|
||||
" # print(etree.tostring(element, encoding=\"UTF-8\"))\n",
|
||||
" myfile2.write(etree.tostring(element, encoding=\"UTF-8\", pretty_print=True))\n",
|
||||
" myfile.write(etree.tostring(element, encoding=\"UTF-8\", pretty_print=True))\n",
|
||||
" element.clear()\n",
|
||||
" lexical_entries_save_number += 1\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
|
|
263
sloleks_accetuation2.ipynb
Normal file
263
sloleks_accetuation2.ipynb
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user