Added functional connected text accentuation for assigning accent location

This commit is contained in:
lkrsnik 2017-09-11 16:46:03 +02:00
parent eb61cb9e74
commit a316574314
2 changed files with 76 additions and 52 deletions

View File

@ -4,10 +4,9 @@
<list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment=""> <list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment="">
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/accent_classification/error_analysis.ipynb" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/accent_classification/error_analysis.ipynb" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/letters/cnn.ipynb" afterPath="$PROJECT_DIR$/cnn/accent_classification/letters/cnn.ipynb" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/connected_text_accetuation.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/connected_text_accetuation.ipynb" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/cnn.ipynb" afterPath="$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/cnn.ipynb" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/results_presentation.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/results_presentation.ipynb" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis.ipynb" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis.ipynb" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" />
</list> </list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" /> <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="TRACKING_ENABLED" value="true" /> <option name="TRACKING_ENABLED" value="true" />
@ -34,23 +33,22 @@
</provider> </provider>
</entry> </entry>
</file> </file>
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="false"> <file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/prepare_data.py"> <entry file="file://$PROJECT_DIR$/prepare_data.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="18"> <state relative-caret-position="298">
<caret line="477" column="34" lean-forward="false" selection-start-line="477" selection-start-column="25" selection-end-line="477" selection-end-column="34" /> <caret line="514" column="33" lean-forward="false" selection-start-line="514" selection-start-column="20" selection-end-line="514" selection-end-column="33" />
<folding> <folding>
<element signature="e#24#63#0" expanded="true" /> <element signature="e#24#63#0" expanded="true" />
<element signature="e#5979#7267#0" expanded="false" /> <element signature="e#5979#7267#0" expanded="false" />
<element signature="e#8923#9218#0" expanded="false" /> <element signature="e#8923#9218#0" expanded="false" />
<element signature="e#13669#13971#0" expanded="false" /> <element signature="e#13768#14070#0" expanded="false" />
<element signature="e#14028#14857#0" expanded="false" /> <element signature="e#14127#14956#0" expanded="false" />
<element signature="e#14921#15267#0" expanded="false" /> <element signature="e#15020#15366#0" expanded="false" />
<element signature="e#18834#19030#0" expanded="false" /> <element signature="e#18933#19129#0" expanded="false" />
<element signature="e#19349#19996#0" expanded="false" /> <element signature="e#19448#20095#0" expanded="false" />
<element signature="e#20095#22393#0" expanded="false" /> <element signature="e#20194#22492#0" expanded="false" />
<element signature="e#30153#30294#0" expanded="false" /> <element signature="e#30252#30393#0" expanded="false" />
<element signature="e#30444#32589#0" expanded="false" />
</folding> </folding>
</state> </state>
</provider> </provider>
@ -76,11 +74,11 @@
</provider> </provider>
</entry> </entry>
</file> </file>
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="true"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/workbench.py"> <entry file="file://$PROJECT_DIR$/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="828"> <state relative-caret-position="1044">
<caret line="69" column="22" lean-forward="true" selection-start-line="69" selection-start-column="22" selection-end-line="69" selection-end-column="22" /> <caret line="69" column="22" lean-forward="false" selection-start-line="69" selection-start-column="22" selection-end-line="69" selection-end-column="22" />
<folding> <folding>
<element signature="e#24#63#0" expanded="true" /> <element signature="e#24#63#0" expanded="true" />
</folding> </folding>
@ -167,11 +165,6 @@
</component> </component>
<component name="FindInProjectRecents"> <component name="FindInProjectRecents">
<findStrings> <findStrings>
<find>size</find>
<find>decode_x</find>
<find>self._input_type ==</find>
<find>../</find>
<find>math</find>
<find>predict</find> <find>predict</find>
<find>_reverse_inputs</find> <find>_reverse_inputs</find>
<find>_letter_generator</find> <find>_letter_generator</find>
@ -186,7 +179,6 @@
<find>_create_x_features</find> <find>_create_x_features</find>
<find>force</find> <find>force</find>
<find>test_and_validation_size</find> <find>test_and_validation_size</find>
<find>_accent_classification</find>
<find>self.y_train</find> <find>self.y_train</find>
<find>_additional_letter_attributes</find> <find>_additional_letter_attributes</find>
<find>np.random.seed</find> <find>np.random.seed</find>
@ -197,6 +189,12 @@
<find>np.eye</find> <find>np.eye</find>
<find>allow_shuffle_vector_generation</find> <find>allow_shuffle_vector_generation</find>
<find>accented_vowels</find> <find>accented_vowels</find>
<find>generate</find>
<find>generate_x_and</find>
<find>accentuate</find>
<find>_generator</find>
<find>_create_syllable_letters_translator</find>
<find>_accent_classification</find>
</findStrings> </findStrings>
</component> </component>
<component name="Git.Settings"> <component name="Git.Settings">
@ -217,16 +215,16 @@
<option value="$PROJECT_DIR$/tex_hyphenation.py" /> <option value="$PROJECT_DIR$/tex_hyphenation.py" />
<option value="$PROJECT_DIR$/notes" /> <option value="$PROJECT_DIR$/notes" />
<option value="$PROJECT_DIR$/workbench.xrsl" /> <option value="$PROJECT_DIR$/workbench.xrsl" />
<option value="$PROJECT_DIR$/prepare_data.py" />
<option value="$PROJECT_DIR$/workbench.py" /> <option value="$PROJECT_DIR$/workbench.py" />
<option value="$PROJECT_DIR$/prepare_data.py" />
</list> </list>
</option> </option>
</component> </component>
<component name="ProjectFrameBounds"> <component name="ProjectFrameBounds">
<option name="x" value="65" /> <option name="x" value="65" />
<option name="y" value="-1176" /> <option name="y" value="144" />
<option name="width" value="1855" /> <option name="width" value="1855" />
<option name="height" value="1176" /> <option name="height" value="1056" />
</component> </component>
<component name="ProjectView"> <component name="ProjectView">
<navigator currentView="ProjectPane" proportions="" version="1"> <navigator currentView="ProjectPane" proportions="" version="1">
@ -243,6 +241,8 @@
<foldersAlwaysOnTop value="true" /> <foldersAlwaysOnTop value="true" />
</navigator> </navigator>
<panes> <panes>
<pane id="Scratches" />
<pane id="Scope" />
<pane id="ProjectPane"> <pane id="ProjectPane">
<subPane> <subPane>
<PATH> <PATH>
@ -257,8 +257,6 @@
</PATH> </PATH>
</subPane> </subPane>
</pane> </pane>
<pane id="Scope" />
<pane id="Scratches" />
</panes> </panes>
</component> </component>
<component name="PropertiesComponent"> <component name="PropertiesComponent">
@ -521,7 +519,7 @@
<servers /> <servers />
</component> </component>
<component name="ToolWindowManager"> <component name="ToolWindowManager">
<frame x="65" y="-1176" width="1855" height="1176" extended-state="6" /> <frame x="65" y="144" width="1855" height="1056" extended-state="6" />
<editor active="true" /> <editor active="true" />
<layout> <layout>
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.12227074" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" /> <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.12227074" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
@ -542,6 +540,25 @@
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.39979124" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" /> <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.39979124" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
</layout> </layout>
<layout-to-restore>
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="10" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32985386" sideWeight="0.5" order="8" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32985386" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32985386" sideWeight="0.5" order="9" side_tool="false" content_ui="tabs" />
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.12227074" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.39979124" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
</layout-to-restore>
</component> </component>
<component name="VcsContentAnnotationSettings"> <component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" /> <option name="myLimit" value="2678400000" />
@ -829,26 +846,6 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/prepare_data.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="18">
<caret line="477" column="34" lean-forward="false" selection-start-line="477" selection-start-column="25" selection-end-line="477" selection-end-column="34" />
<folding>
<element signature="e#24#63#0" expanded="true" />
<element signature="e#5979#7267#0" expanded="false" />
<element signature="e#8923#9218#0" expanded="false" />
<element signature="e#13669#13971#0" expanded="false" />
<element signature="e#14028#14857#0" expanded="false" />
<element signature="e#14921#15267#0" expanded="false" />
<element signature="e#18834#19030#0" expanded="false" />
<element signature="e#19349#19996#0" expanded="false" />
<element signature="e#20095#22393#0" expanded="false" />
<element signature="e#30153#30294#0" expanded="false" />
<element signature="e#30444#32589#0" expanded="false" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/workbench.xrsl"> <entry file="file://$PROJECT_DIR$/workbench.xrsl">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="216"> <state relative-caret-position="216">
@ -923,13 +920,32 @@
</entry> </entry>
<entry file="file://$PROJECT_DIR$/workbench.py"> <entry file="file://$PROJECT_DIR$/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="828"> <state relative-caret-position="1044">
<caret line="69" column="22" lean-forward="true" selection-start-line="69" selection-start-column="22" selection-end-line="69" selection-end-column="22" /> <caret line="69" column="22" lean-forward="false" selection-start-line="69" selection-start-column="22" selection-end-line="69" selection-end-column="22" />
<folding> <folding>
<element signature="e#24#63#0" expanded="true" /> <element signature="e#24#63#0" expanded="true" />
</folding> </folding>
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/prepare_data.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="298">
<caret line="514" column="33" lean-forward="false" selection-start-line="514" selection-start-column="20" selection-end-line="514" selection-end-column="33" />
<folding>
<element signature="e#24#63#0" expanded="true" />
<element signature="e#5979#7267#0" expanded="false" />
<element signature="e#8923#9218#0" expanded="false" />
<element signature="e#13768#14070#0" expanded="false" />
<element signature="e#14127#14956#0" expanded="false" />
<element signature="e#15020#15366#0" expanded="false" />
<element signature="e#18933#19129#0" expanded="false" />
<element signature="e#19448#20095#0" expanded="false" />
<element signature="e#20194#22492#0" expanded="false" />
<element signature="e#30252#30393#0" expanded="false" />
</folding>
</state>
</provider>
</entry>
</component> </component>
</project> </project>

View File

@ -218,7 +218,10 @@ class Data:
if self._reverse_inputs: if self._reverse_inputs:
syllables = syllables[::-1] syllables = syllables[::-1]
for syllable in syllables: for syllable in syllables:
if syllable in dictionary:
index = dictionary.index(syllable) index = dictionary.index(syllable)
else:
index = 0
x[i][j] = index x[i][j] = index
j += 1 j += 1
i += 1 i += 1
@ -964,6 +967,11 @@ class Data:
wrong_word, wrong_word,
correct_word correct_word
]) ])
print(num_of_pred)
print(len(y))
print(num_of_correct_pred_words)
print(len(errors))
print(num_of_correct_pred_words + len(errors))
return (num_of_correct_pred / float(num_of_pred)) * 100, (num_of_correct_pred_words / float(len(y))) * 100, errors return (num_of_correct_pred / float(num_of_pred)) * 100, (num_of_correct_pred_words / float(len(y))) * 100, errors
def get_accentuated_letter(self, word, location, vowels, syllables=False, debug=False): def get_accentuated_letter(self, word, location, vowels, syllables=False, debug=False):