Accentuation on sloleks

This commit is contained in:
Luka 2018-04-14 10:25:40 +02:00
parent d4e6e1b222
commit 9f6e5b2752
5 changed files with 807 additions and 213 deletions

View File

@ -2,13 +2,15 @@
<project version="4"> <project version="4">
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment=""> <list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment="">
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/sloleks_accentuation2.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis_results/onedirectional_input/ensemble_errors.pkl" afterPath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis_results/onedirectional_input/ensemble_errors.pkl" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis_results/onedirectional_input/ensemble_predictions.pkl" afterPath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis_results/onedirectional_input/ensemble_predictions.pkl" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/accent_classification/error_analysis.ipynb" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/bidirectional_error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/accent_classification/bidirectional_error_analysis.ipynb" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py" afterPath="$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py" afterPath="$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py" afterPath="$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/bidirectional_error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/bidirectional_error_analysis.ipynb" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/bidirectional_error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/bidirectional_error_analysis.ipynb" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/run_multiple_files.py" afterPath="$PROJECT_DIR$/run_multiple_files.py" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/pattern_repetition.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/pattern_repetition.ipynb" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/sloleks_accetuation.ipynb" afterPath="$PROJECT_DIR$/sloleks_accetuation.ipynb" />
</list> </list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" /> <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="TRACKING_ENABLED" value="true" /> <option name="TRACKING_ENABLED" value="true" />
@ -35,19 +37,19 @@
</provider> </provider>
</entry> </entry>
</file> </file>
<file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true"> <file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/prepare_data.py"> <entry file="file://$PROJECT_DIR$/prepare_data.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="311"> <state relative-caret-position="-221">
<caret line="1055" column="31" lean-forward="true" selection-start-line="1055" selection-start-column="31" selection-end-line="1055" selection-end-column="31" /> <caret line="1570" column="23" lean-forward="false" selection-start-line="1570" selection-start-column="23" selection-end-line="1570" selection-end-column="23" />
<folding> <folding>
<element signature="e#24#63#0" expanded="true" /> <element signature="e#24#63#0" expanded="true" />
<element signature="e#7927#9215#0" expanded="false" /> <element signature="e#7950#9238#0" expanded="false" />
<element signature="e#10871#11166#0" expanded="false" /> <element signature="e#10894#11189#0" expanded="false" />
<element signature="e#18472#18774#0" expanded="false" /> <element signature="e#11294#14633#0" expanded="false" />
<element signature="e#19747#20093#0" expanded="false" /> <element signature="e#18495#18797#0" expanded="false" />
<element signature="e#24509#25156#0" expanded="false" /> <element signature="e#19770#20116#0" expanded="false" />
<element signature="e#38245#38386#0" expanded="false" /> <element signature="e#24532#25179#0" expanded="false" />
</folding> </folding>
</state> </state>
</provider> </provider>
@ -56,7 +58,7 @@
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_2/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_2/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="328"> <state relative-caret-position="358">
<caret line="85" column="39" lean-forward="false" selection-start-line="85" selection-start-column="39" selection-end-line="85" selection-end-column="39" /> <caret line="85" column="39" lean-forward="false" selection-start-line="85" selection-start-column="39" selection-end-line="85" selection-end-column="39" />
<folding /> <folding />
</state> </state>
@ -66,8 +68,8 @@
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="400"> <state relative-caret-position="426">
<caret line="100" column="61" lean-forward="false" selection-start-line="100" selection-start-column="61" selection-end-line="100" selection-end-column="61" /> <caret line="87" column="84" lean-forward="true" selection-start-line="55" selection-start-column="0" selection-end-line="87" selection-end-column="84" />
<folding /> <folding />
</state> </state>
</provider> </provider>
@ -76,7 +78,7 @@
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_2/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_2/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="414"> <state relative-caret-position="1602">
<caret line="100" column="47" lean-forward="false" selection-start-line="100" selection-start-column="47" selection-end-line="100" selection-end-column="53" /> <caret line="100" column="47" lean-forward="false" selection-start-line="100" selection-start-column="47" selection-end-line="100" selection-end-column="53" />
<folding /> <folding />
</state> </state>
@ -86,7 +88,7 @@
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="346"> <state relative-caret-position="468">
<caret line="37" column="121" lean-forward="false" selection-start-line="37" selection-start-column="121" selection-end-line="37" selection-end-column="121" /> <caret line="37" column="121" lean-forward="false" selection-start-line="37" selection-start-column="121" selection-end-line="37" selection-end-column="121" />
<folding /> <folding />
</state> </state>
@ -96,7 +98,7 @@
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="306"> <state relative-caret-position="414">
<caret line="34" column="52" lean-forward="false" selection-start-line="34" selection-start-column="52" selection-end-line="34" selection-end-column="52" /> <caret line="34" column="52" lean-forward="false" selection-start-line="34" selection-start-column="52" selection-end-line="34" selection-end-column="52" />
<folding /> <folding />
</state> </state>
@ -106,8 +108,8 @@
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_1/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_1/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="396"> <state relative-caret-position="612">
<caret line="33" column="41" lean-forward="false" selection-start-line="33" selection-start-column="41" selection-end-line="33" selection-end-column="41" /> <caret line="45" column="17" lean-forward="false" selection-start-line="45" selection-start-column="17" selection-end-line="45" selection-end-column="17" />
<folding /> <folding />
</state> </state>
</provider> </provider>
@ -116,7 +118,7 @@
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_1/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_1/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1494"> <state relative-caret-position="1602">
<caret line="100" column="56" lean-forward="false" selection-start-line="100" selection-start-column="56" selection-end-line="100" selection-end-column="56" /> <caret line="100" column="56" lean-forward="false" selection-start-line="100" selection-start-column="56" selection-end-line="100" selection-end-column="56" />
<folding /> <folding />
</state> </state>
@ -126,7 +128,7 @@
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_1/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_1/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1548"> <state relative-caret-position="1602">
<caret line="100" column="48" lean-forward="false" selection-start-line="100" selection-start-column="48" selection-end-line="100" selection-end-column="48" /> <caret line="100" column="48" lean-forward="false" selection-start-line="100" selection-start-column="48" selection-end-line="100" selection-end-column="48" />
<folding /> <folding />
</state> </state>
@ -146,8 +148,8 @@
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_3/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_3/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="378"> <state relative-caret-position="1170">
<caret line="32" column="45" lean-forward="false" selection-start-line="32" selection-start-column="45" selection-end-line="32" selection-end-column="45" /> <caret line="76" column="84" lean-forward="false" selection-start-line="38" selection-start-column="0" selection-end-line="76" selection-end-column="84" />
<folding /> <folding />
</state> </state>
</provider> </provider>
@ -156,7 +158,7 @@
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_3/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_3/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="342"> <state relative-caret-position="450">
<caret line="36" column="47" lean-forward="false" selection-start-line="36" selection-start-column="47" selection-end-line="36" selection-end-column="47" /> <caret line="36" column="47" lean-forward="false" selection-start-line="36" selection-start-column="47" selection-end-line="36" selection-end-column="47" />
<folding /> <folding />
</state> </state>
@ -166,8 +168,8 @@
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_3/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_3/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="306"> <state relative-caret-position="1368">
<caret line="37" column="40" lean-forward="false" selection-start-line="37" selection-start-column="40" selection-end-line="37" selection-end-column="40" /> <caret line="87" column="84" lean-forward="false" selection-start-line="41" selection-start-column="0" selection-end-line="87" selection-end-column="84" />
<folding /> <folding />
</state> </state>
</provider> </provider>
@ -176,8 +178,8 @@
<file leaf-file-name="run_multiple_files.py" pinned="false" current-in-tab="false"> <file leaf-file-name="run_multiple_files.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/run_multiple_files.py"> <entry file="file://$PROJECT_DIR$/run_multiple_files.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180"> <state relative-caret-position="198">
<caret line="10" column="26" lean-forward="false" selection-start-line="10" selection-start-column="26" selection-end-line="10" selection-end-column="26" /> <caret line="11" column="26" lean-forward="false" selection-start-line="11" selection-start-column="26" selection-end-line="11" selection-end-column="26" />
<folding /> <folding />
</state> </state>
</provider> </provider>
@ -211,7 +213,19 @@
<entry file="file://$PROJECT_DIR$/sloleks_accentuation.py"> <entry file="file://$PROJECT_DIR$/sloleks_accentuation.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180"> <state relative-caret-position="180">
<caret line="16" column="53" lean-forward="false" selection-start-line="16" selection-start-column="53" selection-end-line="16" selection-end-column="53" /> <caret line="16" column="28" lean-forward="true" selection-start-line="16" selection-start-column="28" selection-end-line="16" selection-end-column="28" />
<folding>
<element signature="e#24#63#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="sloleks_accentuation2.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/sloleks_accentuation2.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="454">
<caret line="48" column="61" lean-forward="true" selection-start-line="48" selection-start-column="61" selection-end-line="48" selection-end-column="61" />
<folding> <folding>
<element signature="e#24#63#0" expanded="true" /> <element signature="e#24#63#0" expanded="true" />
</folding> </folding>
@ -244,7 +258,7 @@
<file leaf-file-name="workbench.py" pinned="false" current-in-tab="false"> <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="418"> <state relative-caret-position="1512">
<caret line="95" column="55" lean-forward="false" selection-start-line="95" selection-start-column="55" selection-end-line="95" selection-end-column="55" /> <caret line="95" column="55" lean-forward="false" selection-start-line="95" selection-start-column="55" selection-end-line="95" selection-end-column="55" />
<folding /> <folding />
</state> </state>
@ -262,36 +276,36 @@
</component> </component>
<component name="FindInProjectRecents"> <component name="FindInProjectRecents">
<findStrings> <findStrings>
<find>bidirectional_basic_input</find>
<find>_bidirectional_basic_input</find>
<find>shuffeling</find>
<find>generate_data</find>
<find>_generate_inputs</find>
<find>content_shuffle_vector_path</find>
<find>content_shuffle_vector_location</find>
<find>_shuffle_all_inputs</find>
<find>_generator_instance</find>
<find>_x_letter_input</find>
<find>_generate_x_and_y</find>
<find>content</find>
<find>number_of_syllables</find>
<find>_create_syllables</find>
<find>index</find>
<find>x_sy</find>
<find>_letter_generator</find>
<find>translator</find>
<find>_bidirectional_architectural_input</find>
<find>print</find>
<find>_syllable_generator</find>
<find>np.concatenate</find> <find>np.concatenate</find>
<find>prepare_data</find> <find>prepare_data</find>
<find>assign_stress_locations</find> <find>assign_stress_locations</find>
<find>test_accuracy</find> <find>test_accuracy</find>
<find>test_acc</find> <find>test_acc</find>
<find>test_set</find> <find>test_set</find>
<find>reverse_inputs</find>
<find>accent_classification</find> <find>accent_classification</find>
<find>test_type</find> <find>test_type</find>
<find>get_word_length</find>
<find>input_words</find>
<find>content</find>
<find>codecs</find>
<find>data</find>
<find>load_model</find>
<find>accentuate_word</find>
<find>get_word_le</find>
<find>load_location_models</find>
<find>get_ensemble_location_predictions</find>
<find>reverse_inputs</find>
<find>_x_letter_input</find>
<find>reverse</find>
<find>print(</find>
<find>count_vowels</find>
<find>count</find>
<find>sylla</find>
<find># word</find>
<find>accented_word</find>
<find>get_ensemble_type_predictions</find>
<find>rever</find>
<find>accentuate_wo</find>
</findStrings> </findStrings>
</component> </component>
<component name="Git.Settings"> <component name="Git.Settings">
@ -337,6 +351,7 @@
<option value="$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_3/workbench.py" /> <option value="$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_3/workbench.py" />
<option value="$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_3/workbench.py" /> <option value="$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_3/workbench.py" />
<option value="$PROJECT_DIR$/prepare_data.py" /> <option value="$PROJECT_DIR$/prepare_data.py" />
<option value="$PROJECT_DIR$/sloleks_accentuation2.py" />
</list> </list>
</option> </option>
</component> </component>
@ -361,7 +376,7 @@
<foldersAlwaysOnTop value="true" /> <foldersAlwaysOnTop value="true" />
</navigator> </navigator>
<panes> <panes>
<pane id="Scope" /> <pane id="Scratches" />
<pane id="ProjectPane"> <pane id="ProjectPane">
<subPane> <subPane>
<PATH> <PATH>
@ -374,63 +389,9 @@
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT> </PATH_ELEMENT>
</PATH> </PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="accetuation" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="accetuation" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="cnn" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="accetuation" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="accetuation" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="cnn" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="accent_classification" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="accetuation" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="accetuation" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="cnn" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="accent_classification" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="syllabled_letters" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
</subPane> </subPane>
</pane> </pane>
<pane id="Scratches" /> <pane id="Scope" />
</panes> </panes>
</component> </component>
<component name="PropertiesComponent"> <component name="PropertiesComponent">
@ -439,11 +400,11 @@
</component> </component>
<component name="RecentsManager"> <component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS"> <key name="CopyFile.RECENT_KEYS">
<recent name="$PROJECT_DIR$" />
<recent name="$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0" /> <recent name="$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0" />
<recent name="$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0" /> <recent name="$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0" />
<recent name="$PROJECT_DIR$/cnn/accent_classification/letters/v3_0" /> <recent name="$PROJECT_DIR$/cnn/accent_classification/letters/v3_0" />
<recent name="$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_2" /> <recent name="$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_2" />
<recent name="$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2" />
</key> </key>
</component> </component>
<component name="RunManager" selected="Python.TEST"> <component name="RunManager" selected="Python.TEST">
@ -762,7 +723,6 @@
<watches-manager /> <watches-manager />
</component> </component>
<component name="editorHistoryManager"> <component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/theanoTest.py" />
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.py" /> <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.py" />
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.ipynb" /> <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.ipynb" />
<entry file="file://$PROJECT_DIR$/hyphenation"> <entry file="file://$PROJECT_DIR$/hyphenation">
@ -878,21 +838,10 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/sloleks_accentuation.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180">
<caret line="16" column="53" lean-forward="false" selection-start-line="16" selection-start-column="53" selection-end-line="16" selection-end-column="53" />
<folding>
<element signature="e#24#63#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_1/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_1/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="414"> <state relative-caret-position="414">
<caret line="34" column="66" lean-forward="false" selection-start-line="34" selection-start-column="20" selection-end-line="34" selection-end-column="66" /> <caret line="34" column="66" lean-forward="false" selection-start-line="34" selection-start-column="20" selection-end-line="34" selection-end-column="66" />
<folding />
</state> </state>
</provider> </provider>
</entry> </entry>
@ -900,7 +849,6 @@
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1476"> <state relative-caret-position="1476">
<caret line="93" column="84" lean-forward="false" selection-start-line="41" selection-start-column="0" selection-end-line="93" selection-end-column="84" /> <caret line="93" column="84" lean-forward="false" selection-start-line="41" selection-start-column="0" selection-end-line="93" selection-end-column="84" />
<folding />
</state> </state>
</provider> </provider>
</entry> </entry>
@ -928,7 +876,6 @@
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="828"> <state relative-caret-position="828">
<caret line="57" column="0" lean-forward="false" selection-start-line="57" selection-start-column="0" selection-end-line="57" selection-end-column="0" /> <caret line="57" column="0" lean-forward="false" selection-start-line="57" selection-start-column="0" selection-end-line="57" selection-end-column="0" />
<folding />
</state> </state>
</provider> </provider>
</entry> </entry>
@ -960,7 +907,6 @@
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="612"> <state relative-caret-position="612">
<caret line="45" column="0" lean-forward="false" selection-start-line="45" selection-start-column="0" selection-end-line="45" selection-end-column="0" /> <caret line="45" column="0" lean-forward="false" selection-start-line="45" selection-start-column="0" selection-end-line="45" selection-end-column="0" />
<folding />
</state> </state>
</provider> </provider>
</entry> </entry>
@ -968,7 +914,6 @@
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="324"> <state relative-caret-position="324">
<caret line="29" column="42" lean-forward="false" selection-start-line="29" selection-start-column="17" selection-end-line="29" selection-end-column="42" /> <caret line="29" column="42" lean-forward="false" selection-start-line="29" selection-start-column="17" selection-end-line="29" selection-end-column="42" />
<folding />
</state> </state>
</provider> </provider>
</entry> </entry>
@ -983,7 +928,6 @@
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="774"> <state relative-caret-position="774">
<caret line="54" column="26" lean-forward="false" selection-start-line="54" selection-start-column="0" selection-end-line="54" selection-end-column="26" /> <caret line="54" column="26" lean-forward="false" selection-start-line="54" selection-start-column="0" selection-end-line="54" selection-end-column="26" />
<folding />
</state> </state>
</provider> </provider>
</entry> </entry>
@ -991,37 +935,20 @@
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="972"> <state relative-caret-position="972">
<caret line="65" column="70" lean-forward="false" selection-start-line="65" selection-start-column="70" selection-end-line="65" selection-end-column="70" /> <caret line="65" column="70" lean-forward="false" selection-start-line="65" selection-start-column="70" selection-end-line="65" selection-end-column="70" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2/workbench.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="400">
<caret line="100" column="61" lean-forward="false" selection-start-line="100" selection-start-column="61" selection-end-line="100" selection-end-column="61" />
<folding />
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_2/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_2/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="414"> <state relative-caret-position="1602">
<caret line="100" column="47" lean-forward="false" selection-start-line="100" selection-start-column="47" selection-end-line="100" selection-end-column="53" /> <caret line="100" column="47" lean-forward="false" selection-start-line="100" selection-start-column="47" selection-end-line="100" selection-end-column="53" />
<folding /> <folding />
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_2/workbench.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="328">
<caret line="85" column="39" lean-forward="false" selection-start-line="85" selection-start-column="39" selection-end-line="85" selection-end-column="39" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="418"> <state relative-caret-position="1512">
<caret line="95" column="55" lean-forward="false" selection-start-line="95" selection-start-column="55" selection-end-line="95" selection-end-column="55" /> <caret line="95" column="55" lean-forward="false" selection-start-line="95" selection-start-column="55" selection-end-line="95" selection-end-column="55" />
<folding /> <folding />
</state> </state>
@ -1029,7 +956,7 @@
</entry> </entry>
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="346"> <state relative-caret-position="468">
<caret line="37" column="121" lean-forward="false" selection-start-line="37" selection-start-column="121" selection-end-line="37" selection-end-column="121" /> <caret line="37" column="121" lean-forward="false" selection-start-line="37" selection-start-column="121" selection-end-line="37" selection-end-column="121" />
<folding /> <folding />
</state> </state>
@ -1037,7 +964,7 @@
</entry> </entry>
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="306"> <state relative-caret-position="414">
<caret line="34" column="52" lean-forward="false" selection-start-line="34" selection-start-column="52" selection-end-line="34" selection-end-column="52" /> <caret line="34" column="52" lean-forward="false" selection-start-line="34" selection-start-column="52" selection-end-line="34" selection-end-column="52" />
<folding /> <folding />
</state> </state>
@ -1051,17 +978,9 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/run_multiple_files.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180">
<caret line="10" column="26" lean-forward="false" selection-start-line="10" selection-start-column="26" selection-end-line="10" selection-end-column="26" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_1/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_1/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1548"> <state relative-caret-position="1602">
<caret line="100" column="48" lean-forward="false" selection-start-line="100" selection-start-column="48" selection-end-line="100" selection-end-column="48" /> <caret line="100" column="48" lean-forward="false" selection-start-line="100" selection-start-column="48" selection-end-line="100" selection-end-column="48" />
<folding /> <folding />
</state> </state>
@ -1069,56 +988,100 @@
</entry> </entry>
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_1/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_1/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1494"> <state relative-caret-position="1602">
<caret line="100" column="56" lean-forward="false" selection-start-line="100" selection-start-column="56" selection-end-line="100" selection-end-column="56" /> <caret line="100" column="56" lean-forward="false" selection-start-line="100" selection-start-column="56" selection-end-line="100" selection-end-column="56" />
<folding /> <folding />
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_1/workbench.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="396">
<caret line="33" column="41" lean-forward="false" selection-start-line="33" selection-start-column="41" selection-end-line="33" selection-end-column="41" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_3/workbench.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="378">
<caret line="32" column="45" lean-forward="false" selection-start-line="32" selection-start-column="45" selection-end-line="32" selection-end-column="45" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_3/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_3/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="342"> <state relative-caret-position="450">
<caret line="36" column="47" lean-forward="false" selection-start-line="36" selection-start-column="47" selection-end-line="36" selection-end-column="47" /> <caret line="36" column="47" lean-forward="false" selection-start-line="36" selection-start-column="47" selection-end-line="36" selection-end-column="47" />
<folding /> <folding />
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_3/workbench.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1170">
<caret line="76" column="84" lean-forward="false" selection-start-line="38" selection-start-column="0" selection-end-line="76" selection-end-column="84" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_3/workbench.py"> <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_3/workbench.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="306"> <state relative-caret-position="1368">
<caret line="37" column="40" lean-forward="false" selection-start-line="37" selection-start-column="40" selection-end-line="37" selection-end-column="40" /> <caret line="87" column="84" lean-forward="false" selection-start-line="41" selection-start-column="0" selection-end-line="87" selection-end-column="84" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_1/workbench.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="612">
<caret line="45" column="17" lean-forward="false" selection-start-line="45" selection-start-column="17" selection-end-line="45" selection-end-column="17" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_2/workbench.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="358">
<caret line="85" column="39" lean-forward="false" selection-start-line="85" selection-start-column="39" selection-end-line="85" selection-end-column="39" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2/workbench.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="426">
<caret line="87" column="84" lean-forward="true" selection-start-line="55" selection-start-column="0" selection-end-line="87" selection-end-column="84" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/run_multiple_files.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="198">
<caret line="11" column="26" lean-forward="false" selection-start-line="11" selection-start-column="26" selection-end-line="11" selection-end-column="26" />
<folding /> <folding />
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/prepare_data.py"> <entry file="file://$PROJECT_DIR$/prepare_data.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="311"> <state relative-caret-position="-221">
<caret line="1055" column="31" lean-forward="true" selection-start-line="1055" selection-start-column="31" selection-end-line="1055" selection-end-column="31" /> <caret line="1570" column="23" lean-forward="false" selection-start-line="1570" selection-start-column="23" selection-end-line="1570" selection-end-column="23" />
<folding>
<element signature="e#24#63#0" expanded="true" />
<element signature="e#7950#9238#0" expanded="false" />
<element signature="e#10894#11189#0" expanded="false" />
<element signature="e#11294#14633#0" expanded="false" />
<element signature="e#18495#18797#0" expanded="false" />
<element signature="e#19770#20116#0" expanded="false" />
<element signature="e#24532#25179#0" expanded="false" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/sloleks_accentuation.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180">
<caret line="16" column="28" lean-forward="true" selection-start-line="16" selection-start-column="28" selection-end-line="16" selection-end-column="28" />
<folding>
<element signature="e#24#63#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/sloleks_accentuation2.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="454">
<caret line="48" column="61" lean-forward="true" selection-start-line="48" selection-start-column="61" selection-end-line="48" selection-end-column="61" />
<folding> <folding>
<element signature="e#24#63#0" expanded="true" /> <element signature="e#24#63#0" expanded="true" />
<element signature="e#7927#9215#0" expanded="false" />
<element signature="e#10871#11166#0" expanded="false" />
<element signature="e#18472#18774#0" expanded="false" />
<element signature="e#19747#20093#0" expanded="false" />
<element signature="e#24509#25156#0" expanded="false" />
<element signature="e#38245#38386#0" expanded="false" />
</folding> </folding>
</state> </state>
</provider> </provider>

View File

@ -9,6 +9,8 @@ import keras.backend as K
import os.path import os.path
import codecs import codecs
from copy import copy
from keras import optimizers from keras import optimizers
from keras.models import Model from keras.models import Model
from keras.layers import Dense, Dropout, Input from keras.layers import Dense, Dropout, Input
@ -968,16 +970,49 @@ class Data:
return res return res
def test_accuracy(self, predictions, x, x_other_features, y, dictionary, feature_dictionary, vowels, syllable_dictionary=None, def test_accuracy(self, predictions, x, x_other_features, y, dictionary, feature_dictionary, vowels, syllable_dictionary=None,
threshold=0.4999955): threshold=0.4999955, patterns=None):
errors = [] errors = []
num_of_pred = len(predictions) num_of_pred = len(predictions)
num_of_correct_pred = 0 num_of_correct_pred = 0
# wrong_patterns = 0
# wrong_pattern_prediction = 0
for i in range(predictions.shape[0]): for i in range(predictions.shape[0]):
correct_prediction = True correct_prediction = True
round_predictions = np.zeros(predictions[i].shape)
for j in range(len(y[i])): for j in range(len(y[i])):
if predictions[i][j] < threshold:
round_predictions[j] = 0.0
else:
round_predictions[j] = 1.0
if (predictions[i][j] < threshold and y[i][j] == 1.0) or (predictions[i][j] >= threshold and y[i][j] == 0.0): if (predictions[i][j] < threshold and y[i][j] == 1.0) or (predictions[i][j] >= threshold and y[i][j] == 0.0):
correct_prediction = False correct_prediction = False
break
# in_pattern = False
# if patterns is not None:
# test_predictions = copy(predictions[i])
# l = self.get_word_length(x[i])
# round_predictions = np.zeros(test_predictions.shape)
# for j in range(len(y[i])):
# if test_predictions[j] < threshold:
# round_predictions[j] = 0.0
# else:
# round_predictions[j] = 1.0
#
# in_pattern = False
# for pattern in patterns[l]:
# if (pattern == round_predictions).all():
# in_pattern = True
# if not in_pattern:
# wrong_patterns += 1
#
# for j in range(len(y[i])):
# if (predictions[i][j] < threshold and y[i][j] == 1.0) or (predictions[i][j] >= threshold and y[i][j] == 0.0):
# correct_prediction = False
#
# if not in_pattern and not correct_prediction:
# wrong_pattern_prediction += 1
# if (np.around(predictions[i]) == y[i]).all(): # if (np.around(predictions[i]) == y[i]).all():
if correct_prediction: if correct_prediction:
num_of_correct_pred += 1 num_of_correct_pred += 1
@ -991,12 +1026,22 @@ class Data:
errors.append([i, errors.append([i,
decoded_x, decoded_x,
self.decode_x_other_features(feature_dictionary, [x_other_features[i]]), self.decode_x_other_features(feature_dictionary, [x_other_features[i]]),
self.assign_stress_locations(decoded_x, np.around(predictions[i]), vowels, syllables=self._input_type != 'l'), self.assign_stress_locations(decoded_x, round_predictions, vowels, syllables=self._input_type != 'l'),
self.assign_stress_locations(decoded_x, y[i], vowels, syllables=self._input_type != 'l') self.assign_stress_locations(decoded_x, y[i], vowels, syllables=self._input_type != 'l')
]) ])
# print(wrong_patterns)
# print(wrong_pattern_prediction)
return (num_of_correct_pred / float(num_of_pred)) * 100, errors return (num_of_correct_pred / float(num_of_pred)) * 100, errors
# def get_word_length(self, x_el):
# i = 0
# for el in x_el:
# if el == 0:
# return i
# i += 1
# return 10
@staticmethod @staticmethod
def decode_syllable_x(word_encoded, syllable_dictionary): def decode_syllable_x(word_encoded, syllable_dictionary):
word = [] word = []
@ -1214,8 +1259,37 @@ class Data:
@staticmethod @staticmethod
def load_location_models(letters_path, syllables_path, syllabled_letters_path): def load_location_models(letters_path, syllables_path, syllabled_letters_path):
############################ LOCATION ######################## ############################ LOCATION ########################
letter_location_model = load_model(letters_path, custom_objects={'actual_accuracy': actual_accuracy}) nn_output_dim = 10
conv_input_shape = (23, 36)
othr_input = (140,)
conv_input = Input(shape=conv_input_shape, name='conv_input')
x_conv = Conv1D(115, (3), padding='same', activation='relu')(conv_input)
x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv)
x_conv = MaxPooling1D(pool_size=2)(x_conv)
x_conv = Flatten()(x_conv)
othr_input = Input(shape=othr_input, name='othr_input')
x = concatenate([x_conv, othr_input])
# x = Dense(1024, input_dim=(516 + 256), activation='relu')(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(nn_output_dim, activation='sigmoid')(x)
letter_location_model = Model(inputs=[conv_input, othr_input], outputs=x)
opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
letter_location_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ])
letter_location_model.load_weights(letters_path)
##############################################################
# num_examples = len(data.x_train) # training set size # num_examples = len(data.x_train) # training set size
nn_output_dim = 10 nn_output_dim = 10
@ -1244,7 +1318,10 @@ class Data:
syllable_location_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ]) syllable_location_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ])
syllable_location_model.load_weights(syllables_path) syllable_location_model.load_weights(syllables_path)
#####################################################
conv_input_shape = (10, 252) conv_input_shape = (10, 252)
othr_input = (140,) othr_input = (140,)
conv_input = Input(shape=conv_input_shape, name='conv_input') conv_input = Input(shape=conv_input_shape, name='conv_input')
@ -1354,6 +1431,7 @@ class Data:
@staticmethod @staticmethod
def get_ensemble_location_predictions(input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model, def get_ensemble_location_predictions(input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model,
letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary): dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary):
batch_size = 16 batch_size = 16
# print(tagged_input_words[pos]) # print(tagged_input_words[pos])
@ -1379,11 +1457,59 @@ class Data:
generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, syllable_letters_translator, accented_vowels) generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, syllable_letters_translator, accented_vowels)
syllabled_letters_location_predictions = syllabled_letters_location_model.predict_generator(generator, len(x) / (batch_size)) syllabled_letters_location_predictions = syllabled_letters_location_model.predict_generator(generator, len(x) / (batch_size))
return np.mean(np.array([letter_location_predictions, syllable_location_predictions, syllabled_letters_location_predictions]), axis=0) ############## CORRECT ORDER INPUT ##############
data = Data('l', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False)
x, x_other_features, fake_y = data._generate_x_and_y(dictionary, max_word, max_num_vowels, input_words, vowels, accented_vowels,
feature_dictionary, 'who cares')
generator = data._letter_generator(x, x_other_features, fake_y, batch_size, accented_vowels)
letter_location_co_predictions = letter_location_co_model.predict_generator(generator, len(x) / (batch_size))
letter_location_co_predictions = data.reverse_predictions(letter_location_co_predictions, input_words, vowels)
data = Data('s', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False)
x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
accented_vowels, feature_dictionary, 'who cares')
eye = np.eye(len(syllable_dictionary), dtype=int)
generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, eye, accented_vowels)
syllable_location_co_predictions = syllable_location_co_model.predict_generator(generator, len(x) / (batch_size))
syllable_location_co_predictions = data.reverse_predictions(syllable_location_co_predictions, input_words, vowels)
data = Data('sl', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False)
x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
accented_vowels, feature_dictionary, 'who cares')
max_syllable = data._get_max_syllable(syllable_dictionary)
syllable_letters_translator = data._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels)
generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, syllable_letters_translator, accented_vowels)
syllabled_letters_location_co_predictions = syllabled_letters_location_co_model.predict_generator(generator, len(x) / (batch_size))
syllabled_letters_location_co_predictions = data.reverse_predictions(syllabled_letters_location_co_predictions, input_words, vowels)
return np.mean(np.array([letter_location_predictions, syllable_location_predictions, syllabled_letters_location_predictions,
letter_location_co_predictions, syllable_location_co_predictions, syllabled_letters_location_co_predictions]), axis=0)
def count_syllables(self, word, vowels):
j = 0
num_vowels = 0
for j in range(len(word)):
if self._is_vowel(word, j, vowels):
num_vowels += 1
return num_vowels
def reverse_predictions(self, predictions, words, vowels):
new_predictions = np.zeros(predictions.shape, dtype='float32')
for i in range(len(predictions)):
word_len = self.count_syllables(words[i][0], vowels)
for k in range(word_len):
new_predictions[i][k] += predictions[i][word_len - 1 - k]
return new_predictions
@staticmethod @staticmethod
def get_ensemble_type_predictions(input_words, location_y, letter_type_model, syllable_type_model, syllabled_letter_type_model, def get_ensemble_type_predictions(input_words, location_y, letter_type_model, syllable_type_model, syllabled_letter_type_model,
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary): letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary):
batch_size = 16 batch_size = 16
y_array = np.asarray(location_y) y_array = np.asarray(location_y)
accentuation_length = (y_array > 0).sum() accentuation_length = (y_array > 0).sum()
@ -1409,7 +1535,57 @@ class Data:
generator = data._syllable_generator(x, x_other_features, location_y, batch_size, syllable_letters_translator, accented_vowels) generator = data._syllable_generator(x, x_other_features, location_y, batch_size, syllable_letters_translator, accented_vowels)
syllabled_letter_type_predictions = syllabled_letter_type_model.predict_generator(generator, accentuation_length / batch_size) syllabled_letter_type_predictions = syllabled_letter_type_model.predict_generator(generator, accentuation_length / batch_size)
return np.mean(np.array([letter_type_predictions, syllable_type_predictions, syllabled_letter_type_predictions]), axis=0) ############## CORRECT ORDER INPUT ##############
location_y = data.reverse_predictions(location_y, input_words, vowels)
data = Data('l', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False)
x, x_other_features, fake_y = data._generate_x_and_y(dictionary, max_word, max_num_vowels, input_words, vowels, accented_vowels,
feature_dictionary, 'who cares')
generator = data._letter_generator(x, x_other_features, location_y, batch_size, accented_vowels)
letter_type_co_predictions = letter_type_co_model.predict_generator(generator, accentuation_length / (batch_size))
data.reorder_correct_direction_inputs(letter_type_co_predictions, location_y)
data = Data('s', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False)
x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
accented_vowels, feature_dictionary, 'who cares')
eye = np.eye(len(syllable_dictionary), dtype=int)
generator = data._syllable_generator(x, x_other_features, location_y, batch_size, eye, accented_vowels)
syllable_type_co_predictions = syllable_type_co_model.predict_generator(generator, accentuation_length / (batch_size))
data.reorder_correct_direction_inputs(syllable_type_co_predictions, location_y)
data = Data('sl', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False)
x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
accented_vowels, feature_dictionary, 'who cares')
max_syllable = data._get_max_syllable(syllable_dictionary)
syllable_letters_translator = data._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels)
generator = data._syllable_generator(x, x_other_features, location_y, batch_size, syllable_letters_translator, accented_vowels)
syllabled_letter_type_co_predictions = syllabled_letter_type_co_model.predict_generator(generator, accentuation_length / batch_size)
data.reorder_correct_direction_inputs(syllabled_letter_type_co_predictions, location_y)
return np.mean(np.array([letter_type_predictions, syllable_type_predictions, syllabled_letter_type_predictions,
letter_type_co_predictions, syllable_type_co_predictions, syllabled_letter_type_co_predictions]), axis=0)
def reorder_correct_direction_inputs(self, predictions, y):
pred_i = 0
for i in range(len(y)):
num_accented_syllables = 0
for el in y[i]:
if el > 0:
num_accented_syllables += 1
if num_accented_syllables > 1:
min_i = pred_i
max_i = pred_i + num_accented_syllables - 1
while (max_i > min_i):
min_pred = copy(predictions[min_i])
max_pred = copy(predictions[max_i])
predictions[min_i] = max_pred
predictions[max_i] = min_pred
min_i += 1
max_i -= 1
pred_i += num_accented_syllables
def assign_location_stress(self, word, locations, vowels): def assign_location_stress(self, word, locations, vowels):
# word = list(word) # word = list(word)
@ -1449,12 +1625,17 @@ class Data:
return ''.join(word_list) return ''.join(word_list)
def accentuate_word(self, input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model, def accentuate_word(self, input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model,
letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,
letter_type_model, syllable_type_model, syllabled_letter_type_model, letter_type_model, syllable_type_model, syllabled_letter_type_model,
letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary): dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary):
predictions = self.get_ensemble_location_predictions(input_words, letter_location_model, syllable_location_model, predictions = self.get_ensemble_location_predictions(input_words, letter_location_model, syllable_location_model,
syllabled_letters_location_model, syllabled_letters_location_model,
letter_location_co_model, syllable_location_co_model,
syllabled_letters_location_co_model,
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary,
syllable_dictionary) syllable_dictionary)
#print(predictions)
if 'A' not in vowels: if 'A' not in vowels:
vowels.extend(['A', 'E', 'I', 'O', 'U']) vowels.extend(['A', 'E', 'I', 'O', 'U'])
location_accented_words = [self.assign_location_stress(input_words[i][0][::-1], self.decode_y(predictions[i]), vowels)[::-1] for i in location_accented_words = [self.assign_location_stress(input_words[i][0][::-1], self.decode_y(predictions[i]), vowels)[::-1] for i in
@ -1463,6 +1644,7 @@ class Data:
location_y = np.around(predictions) location_y = np.around(predictions)
type_predictions = self.get_ensemble_type_predictions(input_words, location_y, letter_type_model, syllable_type_model, type_predictions = self.get_ensemble_type_predictions(input_words, location_y, letter_type_model, syllable_type_model,
syllabled_letter_type_model, syllabled_letter_type_model,
letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary,
syllable_dictionary) syllable_dictionary)

70
sloleks_accentuation2.py Normal file
View File

@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import numpy as np
from keras.models import load_model
import sys
import pickle
import time
from prepare_data import *
np.random.seed(7)
data = Data('l', shuffle_all_inputs=False)
content = data._read_content('data/SlovarIJS_BESEDE_utf8.lex')
dictionary, max_word, max_num_vowels, vowels, accented_vowels = data._create_dict(content)
feature_dictionary = data._create_slovene_feature_dictionary()
syllable_dictionary = data._create_syllables_dictionary(content, vowels)
accented_vowels = ['ŕ', 'á', 'ä', 'é', 'ë', 'ě', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü']
data = Data('l', shuffle_all_inputs=False)
letter_location_model, syllable_location_model, syllabled_letters_location_model = data.load_location_models(
'cnn/word_accetuation/cnn_dictionary/v5_3/20_final_epoch.h5',
'cnn/word_accetuation/syllables/v3_3/20_final_epoch.h5',
'cnn/word_accetuation/syllabled_letters/v3_3/20_final_epoch.h5')
letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model = data.load_location_models(
'cnn/word_accetuation/cnn_dictionary/v5_2/20_final_epoch.h5',
'cnn/word_accetuation/syllables/v3_2/20_final_epoch.h5',
'cnn/word_accetuation/syllabled_letters/v3_2/20_final_epoch.h5')
letter_type_model, syllable_type_model, syllabled_letter_type_model = data.load_type_models(
'cnn/accent_classification/letters/v3_1/20_final_epoch.h5',
'cnn/accent_classification/syllables/v2_1/20_final_epoch.h5',
'cnn/accent_classification/syllabled_letters/v2_1/20_final_epoch.h5')
letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model = data.load_type_models(
'cnn/accent_classification/letters/v3_0/20_final_epoch.h5',
'cnn/accent_classification/syllables/v2_0/20_final_epoch.h5',
'cnn/accent_classification/syllabled_letters/v2_0/20_final_epoch.h5')
data = Data('s', shuffle_all_inputs=False)
new_content = data._read_content('data/sloleks-sl_v1.2.tbl')
print('Commencing accentuator!')
rate = 100000
start_timer = time.time()
with open("data/new_sloleks/new_sloleks.tab", "a") as myfile:
for index in range(0, len(new_content), rate):
if index+rate >= len(new_content):
words = [[el[0], '', el[2], el[0]] for el in new_content][index:len(new_content)]
else:
words = [[el[0], '', el[2], el[0]] for el in new_content][index:index+rate]
data = Data('l', shuffle_all_inputs=False)
location_accented_words, accented_words = data.accentuate_word(words, letter_location_model, syllable_location_model, syllabled_letters_location_model,
letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,
letter_type_model, syllable_type_model, syllabled_letter_type_model,
letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)
res = ''
for i in range(index, index + len(words)):
res += new_content[i][0] + '\t' + new_content[i][1] + '\t' + new_content[i][2] + '\t' \
+ new_content[i][3][:-1] + '\t' + location_accented_words[i-index] + '\t' + accented_words[i-index] + '\n'
print('Writing data from ' + str(index) + ' onward.')
end_timer = time.time()
print("Elapsed time: " + "{0:.2f}".format((end_timer - start_timer)/60.0) + " minutes")
myfile.write(res)

View File

@ -87,18 +87,6 @@
" i += 1" " i += 1"
] ]
}, },
{
"cell_type": "code",
"execution_count": 98,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"feature__en_dictionary = data._create_feature_dictionary()\n",
"feature__slo_dictionary = data._create_slovene_feature_dictionary()"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 3,
@ -120,7 +108,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 4,
"metadata": { "metadata": {
"collapsed": true "collapsed": true
}, },
@ -131,14 +119,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 5,
"metadata": { "metadata": {
"collapsed": true "collapsed": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"%run prepare_data.py\n", "%run prepare_data.py\n",
"data = Data('l', shuffle_all_inputs=False)\n", "data = Data('s', shuffle_all_inputs=False)\n",
"location_accented_words, accented_words = data.accentuate_word(test_input, letter_location_model, syllable_location_model, syllabled_letters_location_model,\n", "location_accented_words, accented_words = data.accentuate_word(test_input, letter_location_model, syllable_location_model, syllabled_letters_location_model,\n",
" letter_type_model, syllable_type_model, syllabled_letter_type_model,\n", " letter_type_model, syllable_type_model, syllabled_letter_type_model,\n",
" dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)" " dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)"
@ -146,15 +134,15 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"['uradní', 'podatkí', 'policíje', 'kažéjo', 'ná', 'precéj', 'napeté', 'razmeré', 'v', 'piranském', 'zalivú', 'jé', 'danés', 'poročála', 'oddajá', 'dó', 'danés', 'sé', 'jé', 'zgodílo']\n", "['uradni', 'podatkí', 'policíje', 'kažéjo', 'ná', 'precéj', 'napeté', 'razmeré', 'v', 'piranském', 'zalivú', 'jé', 'danés', 'poročála', 'oddajá', 'dó', 'danés', 'sé', 'jé', 'zgodílo']\n",
"['uradnî', 'podatkî', 'policíje', 'kažëjo', 'ná', 'precëj', 'napetë', 'razmerë', 'v', 'piranskëm', 'zalivú', 'jë', 'danës', 'poročála', 'oddajá', 'dó', 'danës', 'së', 'jë', 'zgodílo']\n" "['uradni', 'pödatki', 'polícije', 'kažëjo', 'ná', 'prëcej', 'nápete', 'räzmere', 'v', 'pîranskem', 'zálivu', 'jë', 'dánes', 'poróčala', 'öddaja', 'dó', 'dánes', 'së', 'jë', 'zgodílo']\n"
] ]
} }
], ],
@ -225,7 +213,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 9,
"metadata": { "metadata": {
"collapsed": true "collapsed": true
}, },
@ -508,6 +496,134 @@
" " " "
] ]
}, },
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'xml_words_generator' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-8-44b0367c6cbf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mgen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxml_words_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data/Sloleks_v1.2_p2.xml'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mword_glob_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mword_limit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'xml_words_generator' is not defined"
]
}
],
"source": [
"#Words proccesed: 650250\n",
"#Word indeks: 50023\n",
"#Word number: 50023\n",
"\n",
"from lxml import etree\n",
"import time\n",
"\n",
"gen = xml_words_generator('data/Sloleks_v1.2_p2.xml')\n",
"word_glob_num = 0\n",
"word_limit = 0\n",
"iter_num = 50000\n",
"word_index = 0\n",
"start_timer = time.time()\n",
"iter_index = 0\n",
"words = []\n",
"\n",
"lexical_entries_load_number = 0\n",
"lexical_entries_save_number = 0\n",
"\n",
"\n",
"# INSIDE\n",
"#word_glob_num = 1500686\n",
"word_glob_num = 1550705\n",
"\n",
"#word_limit = 1500686\n",
"word_limit = 1550705\n",
"\n",
"\n",
"iter_index = 31\n",
"\n",
"#done_lexical_entries = 33522\n",
"\n",
"with open(\"data/new_sloleks/new_sloleks.xml\", \"ab\") as myfile:\n",
" myfile2 = open('data/new_sloleks/pa' + str(iter_index) + '.xml', 'ab')\n",
" for event, element in etree.iterparse('data/Sloleks_v1.2_p2.xml', tag=\"LexicalEntry\", encoding=\"UTF-8\", remove_blank_text=True):\n",
" # LOAD NEW WORDS AND ACCENTUATE THEM\n",
" #print(\"HERE\")\n",
" \n",
"# if lexical_entries_save_number < done_lexical_entries:\n",
"# next(gen)\n",
"# #print(lexical_entries_save_number)\n",
"# lexical_entries_save_number += 1\n",
"# lexical_entries_load_number += 1\n",
"# continue\n",
" \n",
" if word_glob_num >= word_limit:\n",
" myfile2.close()\n",
" myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')\n",
" iter_index += 1\n",
" print(\"Words proccesed: \" + str(word_glob_num))\n",
"\n",
" print(\"Word indeks: \" + str(word_index))\n",
" print(\"Word number: \" + str(len(words)))\n",
" \n",
" #print(\"lexical_entries_load_number: \" + str(lexical_entries_load_number))\n",
" #print(\"lexical_entries_save_number: \" + str(lexical_entries_save_number))\n",
"\n",
" end_timer = time.time()\n",
" print(\"Elapsed time: \" + \"{0:.2f}\".format((end_timer - start_timer)/60.0) + \" minutes\")\n",
"\n",
"\n",
" word_index = 0\n",
" words = []\n",
"\n",
" while len(words) < iter_num:\n",
" try:\n",
" words.extend(next(gen))\n",
" lexical_entries_load_number += 1\n",
" except:\n",
" break\n",
" #if word_glob_num > 1:\n",
" # break\n",
"\n",
" #problem_words = words\n",
" #break\n",
" data = Data('l', shuffle_all_inputs=False)\n",
" location_accented_words, accented_words = data.accentuate_word(words, letter_location_model, syllable_location_model, syllabled_letters_location_model,\n",
" letter_type_model, syllable_type_model, syllabled_letter_type_model,\n",
" dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)\n",
"\n",
" word_limit += len(words)\n",
" \n",
" \n",
" # READ DATA\n",
" for child in element:\n",
" if child.tag == 'WordForm':\n",
" msd = None\n",
" word = None\n",
" for wf in child:\n",
" if wf.tag == 'FormRepresentation':\n",
" new_element = etree.Element('feat')\n",
" new_element.attrib['att']='naglasna_mesta_oblike'\n",
" new_element.attrib['val']=location_accented_words[word_index]\n",
" wf.append(new_element)\n",
"\n",
" new_element = etree.Element('feat')\n",
" new_element.attrib['att']='naglašena_oblika'\n",
" new_element.attrib['val']=accented_words[word_index]\n",
" wf.append(new_element)\n",
" word_glob_num += 1\n",
" word_index += 1\n",
"\n",
" # print(etree.tostring(element, encoding=\"UTF-8\"))\n",
" myfile2.write(etree.tostring(element, encoding=\"UTF-8\", pretty_print=True))\n",
" myfile.write(etree.tostring(element, encoding=\"UTF-8\", pretty_print=True))\n",
" element.clear()\n",
" lexical_entries_save_number += 1\n",
" "
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 6,

263
sloleks_accetuation2.ipynb Normal file

File diff suppressed because one or more lines are too long