Accentuation on sloleks

2018-04-14 10:25:40 +02:00
parent d4e6e1b222
commit 9f6e5b2752
5 changed files with 807 additions and 213 deletions
@@ -2,13 +2,15 @@
 <project version="4">
  <component name="ChangeListManager">
    <list default="true" id="8a8ba9af-e1a4-433a-9968-475192610776" name="Default" comment="">
+      <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/sloleks_accentuation2.py" />
+      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis_results/onedirectional_input/ensemble_errors.pkl" afterPath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis_results/onedirectional_input/ensemble_errors.pkl" />
+      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis_results/onedirectional_input/ensemble_predictions.pkl" afterPath="$PROJECT_DIR$/cnn/word_accetuation/error_analysis_results/onedirectional_input/ensemble_predictions.pkl" />
      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
-      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/accent_classification/error_analysis.ipynb" />
-      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py" afterPath="$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py" />
-      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py" afterPath="$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py" />
-      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py" afterPath="$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py" />
+      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/accent_classification/bidirectional_error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/accent_classification/bidirectional_error_analysis.ipynb" />
      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/bidirectional_error_analysis.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/bidirectional_error_analysis.ipynb" />
-      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/run_multiple_files.py" afterPath="$PROJECT_DIR$/run_multiple_files.py" />
+      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/cnn/word_accetuation/pattern_repetition.ipynb" afterPath="$PROJECT_DIR$/cnn/word_accetuation/pattern_repetition.ipynb" />
+      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/prepare_data.py" afterPath="$PROJECT_DIR$/prepare_data.py" />
+      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/sloleks_accetuation.ipynb" afterPath="$PROJECT_DIR$/sloleks_accetuation.ipynb" />
    </list>
    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
    <option name="TRACKING_ENABLED" value="true" />
@@ -35,19 +37,19 @@
          </provider>
        </entry>
      </file>
-      <file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="true">
+      <file leaf-file-name="prepare_data.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/prepare_data.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="311">
-              <caret line="1055" column="31" lean-forward="true" selection-start-line="1055" selection-start-column="31" selection-end-line="1055" selection-end-column="31" />
+            <state relative-caret-position="-221">
+              <caret line="1570" column="23" lean-forward="false" selection-start-line="1570" selection-start-column="23" selection-end-line="1570" selection-end-column="23" />
              <folding>
                <element signature="e#24#63#0" expanded="true" />
-                <element signature="e#7927#9215#0" expanded="false" />
-                <element signature="e#10871#11166#0" expanded="false" />
-                <element signature="e#18472#18774#0" expanded="false" />
-                <element signature="e#19747#20093#0" expanded="false" />
-                <element signature="e#24509#25156#0" expanded="false" />
-                <element signature="e#38245#38386#0" expanded="false" />
+                <element signature="e#7950#9238#0" expanded="false" />
+                <element signature="e#10894#11189#0" expanded="false" />
+                <element signature="e#11294#14633#0" expanded="false" />
+                <element signature="e#18495#18797#0" expanded="false" />
+                <element signature="e#19770#20116#0" expanded="false" />
+                <element signature="e#24532#25179#0" expanded="false" />
              </folding>
            </state>
          </provider>
@@ -56,7 +58,7 @@
      <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_2/workbench.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="328">
+            <state relative-caret-position="358">
              <caret line="85" column="39" lean-forward="false" selection-start-line="85" selection-start-column="39" selection-end-line="85" selection-end-column="39" />
              <folding />
            </state>
@@ -66,8 +68,8 @@
      <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2/workbench.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="400">
-              <caret line="100" column="61" lean-forward="false" selection-start-line="100" selection-start-column="61" selection-end-line="100" selection-end-column="61" />
+            <state relative-caret-position="426">
+              <caret line="87" column="84" lean-forward="true" selection-start-line="55" selection-start-column="0" selection-end-line="87" selection-end-column="84" />
              <folding />
            </state>
          </provider>
@@ -76,7 +78,7 @@
      <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_2/workbench.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="414">
+            <state relative-caret-position="1602">
              <caret line="100" column="47" lean-forward="false" selection-start-line="100" selection-start-column="47" selection-end-line="100" selection-end-column="53" />
              <folding />
            </state>
@@ -86,7 +88,7 @@
      <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="346">
+            <state relative-caret-position="468">
              <caret line="37" column="121" lean-forward="false" selection-start-line="37" selection-start-column="121" selection-end-line="37" selection-end-column="121" />
              <folding />
            </state>
@@ -96,7 +98,7 @@
      <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="306">
+            <state relative-caret-position="414">
              <caret line="34" column="52" lean-forward="false" selection-start-line="34" selection-start-column="52" selection-end-line="34" selection-end-column="52" />
              <folding />
            </state>
@@ -106,8 +108,8 @@
      <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_1/workbench.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="396">
-              <caret line="33" column="41" lean-forward="false" selection-start-line="33" selection-start-column="41" selection-end-line="33" selection-end-column="41" />
+            <state relative-caret-position="612">
+              <caret line="45" column="17" lean-forward="false" selection-start-line="45" selection-start-column="17" selection-end-line="45" selection-end-column="17" />
              <folding />
            </state>
          </provider>
@@ -116,7 +118,7 @@
      <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_1/workbench.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="1494">
+            <state relative-caret-position="1602">
              <caret line="100" column="56" lean-forward="false" selection-start-line="100" selection-start-column="56" selection-end-line="100" selection-end-column="56" />
              <folding />
            </state>
@@ -126,7 +128,7 @@
      <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_1/workbench.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="1548">
+            <state relative-caret-position="1602">
              <caret line="100" column="48" lean-forward="false" selection-start-line="100" selection-start-column="48" selection-end-line="100" selection-end-column="48" />
              <folding />
            </state>
@@ -146,8 +148,8 @@
      <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_3/workbench.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="378">
-              <caret line="32" column="45" lean-forward="false" selection-start-line="32" selection-start-column="45" selection-end-line="32" selection-end-column="45" />
+            <state relative-caret-position="1170">
+              <caret line="76" column="84" lean-forward="false" selection-start-line="38" selection-start-column="0" selection-end-line="76" selection-end-column="84" />
              <folding />
            </state>
          </provider>
@@ -156,7 +158,7 @@
      <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_3/workbench.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="342">
+            <state relative-caret-position="450">
              <caret line="36" column="47" lean-forward="false" selection-start-line="36" selection-start-column="47" selection-end-line="36" selection-end-column="47" />
              <folding />
            </state>
@@ -166,8 +168,8 @@
      <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_3/workbench.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="306">
-              <caret line="37" column="40" lean-forward="false" selection-start-line="37" selection-start-column="40" selection-end-line="37" selection-end-column="40" />
+            <state relative-caret-position="1368">
+              <caret line="87" column="84" lean-forward="false" selection-start-line="41" selection-start-column="0" selection-end-line="87" selection-end-column="84" />
              <folding />
            </state>
          </provider>
@@ -176,8 +178,8 @@
      <file leaf-file-name="run_multiple_files.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/run_multiple_files.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="180">
-              <caret line="10" column="26" lean-forward="false" selection-start-line="10" selection-start-column="26" selection-end-line="10" selection-end-column="26" />
+            <state relative-caret-position="198">
+              <caret line="11" column="26" lean-forward="false" selection-start-line="11" selection-start-column="26" selection-end-line="11" selection-end-column="26" />
              <folding />
            </state>
          </provider>
@@ -211,7 +213,19 @@
        <entry file="file://$PROJECT_DIR$/sloleks_accentuation.py">
          <provider selected="true" editor-type-id="text-editor">
            <state relative-caret-position="180">
-              <caret line="16" column="53" lean-forward="false" selection-start-line="16" selection-start-column="53" selection-end-line="16" selection-end-column="53" />
+              <caret line="16" column="28" lean-forward="true" selection-start-line="16" selection-start-column="28" selection-end-line="16" selection-end-column="28" />
+              <folding>
+                <element signature="e#24#63#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file leaf-file-name="sloleks_accentuation2.py" pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/sloleks_accentuation2.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="454">
+              <caret line="48" column="61" lean-forward="true" selection-start-line="48" selection-start-column="61" selection-end-line="48" selection-end-column="61" />
              <folding>
                <element signature="e#24#63#0" expanded="true" />
              </folding>
@@ -244,7 +258,7 @@
      <file leaf-file-name="workbench.py" pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py">
          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="418">
+            <state relative-caret-position="1512">
              <caret line="95" column="55" lean-forward="false" selection-start-line="95" selection-start-column="55" selection-end-line="95" selection-end-column="55" />
              <folding />
            </state>
@@ -262,36 +276,36 @@
  </component>
  <component name="FindInProjectRecents">
    <findStrings>
-      <find>bidirectional_basic_input</find>
-      <find>_bidirectional_basic_input</find>
-      <find>shuffeling</find>
-      <find>generate_data</find>
-      <find>_generate_inputs</find>
-      <find>content_shuffle_vector_path</find>
-      <find>content_shuffle_vector_location</find>
-      <find>_shuffle_all_inputs</find>
-      <find>_generator_instance</find>
-      <find>_x_letter_input</find>
-      <find>_generate_x_and_y</find>
-      <find>content</find>
-      <find>number_of_syllables</find>
-      <find>_create_syllables</find>
-      <find>index</find>
-      <find>x_sy</find>
-      <find>_letter_generator</find>
-      <find>translator</find>
-      <find>_bidirectional_architectural_input</find>
-      <find>print</find>
-      <find>_syllable_generator</find>
      <find>np.concatenate</find>
      <find>prepare_data</find>
      <find>assign_stress_locations</find>
      <find>test_accuracy</find>
      <find>test_acc</find>
      <find>test_set</find>
-      <find>reverse_inputs</find>
      <find>accent_classification</find>
      <find>test_type</find>
+      <find>get_word_length</find>
+      <find>input_words</find>
+      <find>content</find>
+      <find>codecs</find>
+      <find>data</find>
+      <find>load_model</find>
+      <find>accentuate_word</find>
+      <find>get_word_le</find>
+      <find>load_location_models</find>
+      <find>get_ensemble_location_predictions</find>
+      <find>reverse_inputs</find>
+      <find>_x_letter_input</find>
+      <find>reverse</find>
+      <find>print(</find>
+      <find>count_vowels</find>
+      <find>count</find>
+      <find>sylla</find>
+      <find># word</find>
+      <find>accented_word</find>
+      <find>get_ensemble_type_predictions</find>
+      <find>rever</find>
+      <find>accentuate_wo</find>
    </findStrings>
  </component>
  <component name="Git.Settings">
@@ -337,6 +351,7 @@
        <option value="$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_3/workbench.py" />
        <option value="$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_3/workbench.py" />
        <option value="$PROJECT_DIR$/prepare_data.py" />
+        <option value="$PROJECT_DIR$/sloleks_accentuation2.py" />
      </list>
    </option>
  </component>
@@ -361,7 +376,7 @@
      <foldersAlwaysOnTop value="true" />
    </navigator>
    <panes>
-      <pane id="Scope" />
+      <pane id="Scratches" />
      <pane id="ProjectPane">
        <subPane>
          <PATH>
@@ -374,63 +389,9 @@
              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
            </PATH_ELEMENT>
          </PATH>
-          <PATH>
-            <PATH_ELEMENT>
-              <option name="myItemId" value="accetuation" />
-              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
-            </PATH_ELEMENT>
-            <PATH_ELEMENT>
-              <option name="myItemId" value="accetuation" />
-              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
-            </PATH_ELEMENT>
-            <PATH_ELEMENT>
-              <option name="myItemId" value="cnn" />
-              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
-            </PATH_ELEMENT>
-          </PATH>
-          <PATH>
-            <PATH_ELEMENT>
-              <option name="myItemId" value="accetuation" />
-              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
-            </PATH_ELEMENT>
-            <PATH_ELEMENT>
-              <option name="myItemId" value="accetuation" />
-              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
-            </PATH_ELEMENT>
-            <PATH_ELEMENT>
-              <option name="myItemId" value="cnn" />
-              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
-            </PATH_ELEMENT>
-            <PATH_ELEMENT>
-              <option name="myItemId" value="accent_classification" />
-              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
-            </PATH_ELEMENT>
-          </PATH>
-          <PATH>
-            <PATH_ELEMENT>
-              <option name="myItemId" value="accetuation" />
-              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
-            </PATH_ELEMENT>
-            <PATH_ELEMENT>
-              <option name="myItemId" value="accetuation" />
-              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
-            </PATH_ELEMENT>
-            <PATH_ELEMENT>
-              <option name="myItemId" value="cnn" />
-              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
-            </PATH_ELEMENT>
-            <PATH_ELEMENT>
-              <option name="myItemId" value="accent_classification" />
-              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
-            </PATH_ELEMENT>
-            <PATH_ELEMENT>
-              <option name="myItemId" value="syllabled_letters" />
-              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
-            </PATH_ELEMENT>
-          </PATH>
        </subPane>
      </pane>
-      <pane id="Scratches" />
+      <pane id="Scope" />
    </panes>
  </component>
  <component name="PropertiesComponent">
@@ -439,11 +400,11 @@
  </component>
  <component name="RecentsManager">
    <key name="CopyFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$" />
      <recent name="$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0" />
      <recent name="$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0" />
      <recent name="$PROJECT_DIR$/cnn/accent_classification/letters/v3_0" />
      <recent name="$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_2" />
-      <recent name="$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2" />
    </key>
  </component>
  <component name="RunManager" selected="Python.TEST">
@@ -762,7 +723,6 @@
    <watches-manager />
  </component>
  <component name="editorHistoryManager">
-    <entry file="file://$PROJECT_DIR$/theanoTest.py" />
    <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.py" />
    <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/character_based_ffnn_keras.ipynb" />
    <entry file="file://$PROJECT_DIR$/hyphenation">
@@ -878,21 +838,10 @@
        </state>
      </provider>
    </entry>
-    <entry file="file://$PROJECT_DIR$/sloleks_accentuation.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="180">
-          <caret line="16" column="53" lean-forward="false" selection-start-line="16" selection-start-column="53" selection-end-line="16" selection-end-column="53" />
-          <folding>
-            <element signature="e#24#63#0" expanded="true" />
-          </folding>
-        </state>
-      </provider>
-    </entry>
    <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_1/workbench.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="414">
          <caret line="34" column="66" lean-forward="false" selection-start-line="34" selection-start-column="20" selection-end-line="34" selection-end-column="66" />
-          <folding />
        </state>
      </provider>
    </entry>
@@ -900,7 +849,6 @@
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="1476">
          <caret line="93" column="84" lean-forward="false" selection-start-line="41" selection-start-column="0" selection-end-line="93" selection-end-column="84" />
-          <folding />
        </state>
      </provider>
    </entry>
@@ -928,7 +876,6 @@
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="828">
          <caret line="57" column="0" lean-forward="false" selection-start-line="57" selection-start-column="0" selection-end-line="57" selection-end-column="0" />
-          <folding />
        </state>
      </provider>
    </entry>
@@ -960,7 +907,6 @@
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="612">
          <caret line="45" column="0" lean-forward="false" selection-start-line="45" selection-start-column="0" selection-end-line="45" selection-end-column="0" />
-          <folding />
        </state>
      </provider>
    </entry>
@@ -968,7 +914,6 @@
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="324">
          <caret line="29" column="42" lean-forward="false" selection-start-line="29" selection-start-column="17" selection-end-line="29" selection-end-column="42" />
-          <folding />
        </state>
      </provider>
    </entry>
@@ -983,7 +928,6 @@
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="774">
          <caret line="54" column="26" lean-forward="false" selection-start-line="54" selection-start-column="0" selection-end-line="54" selection-end-column="26" />
-          <folding />
        </state>
      </provider>
    </entry>
@@ -991,37 +935,20 @@
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="972">
          <caret line="65" column="70" lean-forward="false" selection-start-line="65" selection-start-column="70" selection-end-line="65" selection-end-column="70" />
-          <folding />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2/workbench.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="400">
-          <caret line="100" column="61" lean-forward="false" selection-start-line="100" selection-start-column="61" selection-end-line="100" selection-end-column="61" />
-          <folding />
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_2/workbench.py">
      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="414">
+        <state relative-caret-position="1602">
          <caret line="100" column="47" lean-forward="false" selection-start-line="100" selection-start-column="47" selection-end-line="100" selection-end-column="53" />
          <folding />
        </state>
      </provider>
    </entry>
-    <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_2/workbench.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="328">
-          <caret line="85" column="39" lean-forward="false" selection-start-line="85" selection-start-column="39" selection-end-line="85" selection-end-column="39" />
-          <folding />
-        </state>
-      </provider>
-    </entry>
    <entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_0/workbench.py">
      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="418">
+        <state relative-caret-position="1512">
          <caret line="95" column="55" lean-forward="false" selection-start-line="95" selection-start-column="55" selection-end-line="95" selection-end-column="55" />
          <folding />
        </state>
@@ -1029,7 +956,7 @@
    </entry>
    <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_0/workbench.py">
      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="346">
+        <state relative-caret-position="468">
          <caret line="37" column="121" lean-forward="false" selection-start-line="37" selection-start-column="121" selection-end-line="37" selection-end-column="121" />
          <folding />
        </state>
@@ -1037,7 +964,7 @@
    </entry>
    <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_0/workbench.py">
      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="306">
+        <state relative-caret-position="414">
          <caret line="34" column="52" lean-forward="false" selection-start-line="34" selection-start-column="52" selection-end-line="34" selection-end-column="52" />
          <folding />
        </state>
@@ -1051,17 +978,9 @@
        </state>
      </provider>
    </entry>
-    <entry file="file://$PROJECT_DIR$/run_multiple_files.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="180">
-          <caret line="10" column="26" lean-forward="false" selection-start-line="10" selection-start-column="26" selection-end-line="10" selection-end-column="26" />
-          <folding />
-        </state>
-      </provider>
-    </entry>
    <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllables/v2_1/workbench.py">
      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="1548">
+        <state relative-caret-position="1602">
          <caret line="100" column="48" lean-forward="false" selection-start-line="100" selection-start-column="48" selection-end-line="100" selection-end-column="48" />
          <folding />
        </state>
@@ -1069,56 +988,100 @@
    </entry>
    <entry file="file://$PROJECT_DIR$/cnn/accent_classification/syllabled_letters/v2_1/workbench.py">
      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="1494">
+        <state relative-caret-position="1602">
          <caret line="100" column="56" lean-forward="false" selection-start-line="100" selection-start-column="56" selection-end-line="100" selection-end-column="56" />
          <folding />
        </state>
      </provider>
    </entry>
-    <entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_1/workbench.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="396">
-          <caret line="33" column="41" lean-forward="false" selection-start-line="33" selection-start-column="41" selection-end-line="33" selection-end-column="41" />
-          <folding />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_3/workbench.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="378">
-          <caret line="32" column="45" lean-forward="false" selection-start-line="32" selection-start-column="45" selection-end-line="32" selection-end-column="45" />
-          <folding />
-        </state>
-      </provider>
-    </entry>
    <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_3/workbench.py">
      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="342">
+        <state relative-caret-position="450">
          <caret line="36" column="47" lean-forward="false" selection-start-line="36" selection-start-column="47" selection-end-line="36" selection-end-column="47" />
          <folding />
        </state>
      </provider>
    </entry>
+    <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_3/workbench.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="1170">
+          <caret line="76" column="84" lean-forward="false" selection-start-line="38" selection-start-column="0" selection-end-line="76" selection-end-column="84" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
    <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllables/v3_3/workbench.py">
      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="306">
-          <caret line="37" column="40" lean-forward="false" selection-start-line="37" selection-start-column="40" selection-end-line="37" selection-end-column="40" />
+        <state relative-caret-position="1368">
+          <caret line="87" column="84" lean-forward="false" selection-start-line="41" selection-start-column="0" selection-end-line="87" selection-end-column="84" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/cnn/accent_classification/letters/v3_1/workbench.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="612">
+          <caret line="45" column="17" lean-forward="false" selection-start-line="45" selection-start-column="17" selection-end-line="45" selection-end-column="17" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/cnn_dictionary/v5_2/workbench.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="358">
+          <caret line="85" column="39" lean-forward="false" selection-start-line="85" selection-start-column="39" selection-end-line="85" selection-end-column="39" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/cnn/word_accetuation/syllabled_letters/v3_2/workbench.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="426">
+          <caret line="87" column="84" lean-forward="true" selection-start-line="55" selection-start-column="0" selection-end-line="87" selection-end-column="84" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/run_multiple_files.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="198">
+          <caret line="11" column="26" lean-forward="false" selection-start-line="11" selection-start-column="26" selection-end-line="11" selection-end-column="26" />
          <folding />
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/prepare_data.py">
      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="311">
-          <caret line="1055" column="31" lean-forward="true" selection-start-line="1055" selection-start-column="31" selection-end-line="1055" selection-end-column="31" />
+        <state relative-caret-position="-221">
+          <caret line="1570" column="23" lean-forward="false" selection-start-line="1570" selection-start-column="23" selection-end-line="1570" selection-end-column="23" />
+          <folding>
+            <element signature="e#24#63#0" expanded="true" />
+            <element signature="e#7950#9238#0" expanded="false" />
+            <element signature="e#10894#11189#0" expanded="false" />
+            <element signature="e#11294#14633#0" expanded="false" />
+            <element signature="e#18495#18797#0" expanded="false" />
+            <element signature="e#19770#20116#0" expanded="false" />
+            <element signature="e#24532#25179#0" expanded="false" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/sloleks_accentuation.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="180">
+          <caret line="16" column="28" lean-forward="true" selection-start-line="16" selection-start-column="28" selection-end-line="16" selection-end-column="28" />
+          <folding>
+            <element signature="e#24#63#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/sloleks_accentuation2.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="454">
+          <caret line="48" column="61" lean-forward="true" selection-start-line="48" selection-start-column="61" selection-end-line="48" selection-end-column="61" />
          <folding>
            <element signature="e#24#63#0" expanded="true" />
-            <element signature="e#7927#9215#0" expanded="false" />
-            <element signature="e#10871#11166#0" expanded="false" />
-            <element signature="e#18472#18774#0" expanded="false" />
-            <element signature="e#19747#20093#0" expanded="false" />
-            <element signature="e#24509#25156#0" expanded="false" />
-            <element signature="e#38245#38386#0" expanded="false" />
          </folding>
        </state>
      </provider>
@@ -9,6 +9,8 @@ import keras.backend as K
 import os.path
 import codecs

+from copy import copy
+
 from keras import optimizers
 from keras.models import Model
 from keras.layers import Dense, Dropout, Input
@@ -968,16 +970,49 @@ class Data:
        return res

    def test_accuracy(self, predictions, x, x_other_features, y, dictionary, feature_dictionary, vowels, syllable_dictionary=None,
-                      threshold=0.4999955):
+                      threshold=0.4999955, patterns=None):
        errors = []
        num_of_pred = len(predictions)
        num_of_correct_pred = 0
+
+        # wrong_patterns = 0
+        # wrong_pattern_prediction = 0
        for i in range(predictions.shape[0]):
            correct_prediction = True
+
+            round_predictions = np.zeros(predictions[i].shape)
            for j in range(len(y[i])):
+                if predictions[i][j] < threshold:
+                    round_predictions[j] = 0.0
+                else:
+                    round_predictions[j] = 1.0
                if (predictions[i][j] < threshold and y[i][j] == 1.0) or (predictions[i][j] >= threshold and y[i][j] == 0.0):
                    correct_prediction = False
-                    break
+
+            # in_pattern = False
+            # if patterns is not None:
+            #     test_predictions = copy(predictions[i])
+            #     l = self.get_word_length(x[i])
+            #     round_predictions = np.zeros(test_predictions.shape)
+            #     for j in range(len(y[i])):
+            #         if test_predictions[j] < threshold:
+            #             round_predictions[j] = 0.0
+            #         else:
+            #             round_predictions[j] = 1.0
+            #
+            #     in_pattern = False
+            #     for pattern in patterns[l]:
+            #         if (pattern == round_predictions).all():
+            #             in_pattern = True
+            #     if not in_pattern:
+            #         wrong_patterns += 1
+            #
+            # for j in range(len(y[i])):
+            #     if (predictions[i][j] < threshold and y[i][j] == 1.0) or (predictions[i][j] >= threshold and y[i][j] == 0.0):
+            #         correct_prediction = False
+            #
+            # if not in_pattern and not correct_prediction:
+            #     wrong_pattern_prediction += 1
            # if (np.around(predictions[i]) == y[i]).all():
            if correct_prediction:
                num_of_correct_pred += 1
@@ -991,12 +1026,22 @@ class Data:
                errors.append([i,
                               decoded_x,
                               self.decode_x_other_features(feature_dictionary, [x_other_features[i]]),
-                               self.assign_stress_locations(decoded_x, np.around(predictions[i]), vowels, syllables=self._input_type != 'l'),
+                               self.assign_stress_locations(decoded_x, round_predictions, vowels, syllables=self._input_type != 'l'),
                               self.assign_stress_locations(decoded_x, y[i], vowels, syllables=self._input_type != 'l')
                               ])

+        # print(wrong_patterns)
+        # print(wrong_pattern_prediction)
        return (num_of_correct_pred / float(num_of_pred)) * 100, errors

+    # def get_word_length(self, x_el):
+    #     i = 0
+    #     for el in x_el:
+    #         if el == 0:
+    #             return i
+    #         i += 1
+    #     return 10
+
    @staticmethod
    def decode_syllable_x(word_encoded, syllable_dictionary):
        word = []
@@ -1214,8 +1259,37 @@ class Data:
    @staticmethod
    def load_location_models(letters_path, syllables_path, syllabled_letters_path):
        ############################ LOCATION ########################
-        letter_location_model = load_model(letters_path, custom_objects={'actual_accuracy': actual_accuracy})
+        nn_output_dim = 10

+        conv_input_shape = (23, 36)
+        othr_input = (140,)
+
+        conv_input = Input(shape=conv_input_shape, name='conv_input')
+        x_conv = Conv1D(115, (3), padding='same', activation='relu')(conv_input)
+        x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv)
+        x_conv = MaxPooling1D(pool_size=2)(x_conv)
+        x_conv = Flatten()(x_conv)
+
+        othr_input = Input(shape=othr_input, name='othr_input')
+
+        x = concatenate([x_conv, othr_input])
+        # x = Dense(1024, input_dim=(516 + 256), activation='relu')(x)
+        x = Dense(256, activation='relu')(x)
+        x = Dropout(0.3)(x)
+        x = Dense(256, activation='relu')(x)
+        x = Dropout(0.3)(x)
+        x = Dense(256, activation='relu')(x)
+        x = Dropout(0.3)(x)
+        x = Dense(nn_output_dim, activation='sigmoid')(x)
+
+        letter_location_model = Model(inputs=[conv_input, othr_input], outputs=x)
+        opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
+        letter_location_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ])
+
+
+        letter_location_model.load_weights(letters_path)
+
+        ##############################################################
        # num_examples = len(data.x_train)  # training set size
        nn_output_dim = 10

@@ -1244,7 +1318,10 @@ class Data:
        syllable_location_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ])
        syllable_location_model.load_weights(syllables_path)

+
+        #####################################################
        conv_input_shape = (10, 252)
+
        othr_input = (140,)

        conv_input = Input(shape=conv_input_shape, name='conv_input')
@@ -1354,6 +1431,7 @@ class Data:

    @staticmethod
    def get_ensemble_location_predictions(input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model,
+                                          letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,
                                          dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary):
        batch_size = 16
        # print(tagged_input_words[pos])
@@ -1379,10 +1457,58 @@ class Data:
        generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, syllable_letters_translator, accented_vowels)
        syllabled_letters_location_predictions = syllabled_letters_location_model.predict_generator(generator, len(x) / (batch_size))

-        return np.mean(np.array([letter_location_predictions, syllable_location_predictions, syllabled_letters_location_predictions]), axis=0)
+        ############## CORRECT ORDER INPUT ##############
+        data = Data('l', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False)
+        x, x_other_features, fake_y = data._generate_x_and_y(dictionary, max_word, max_num_vowels, input_words, vowels, accented_vowels,
+                                                             feature_dictionary, 'who cares')
+        generator = data._letter_generator(x, x_other_features, fake_y, batch_size, accented_vowels)
+        letter_location_co_predictions = letter_location_co_model.predict_generator(generator, len(x) / (batch_size))
+
+        letter_location_co_predictions = data.reverse_predictions(letter_location_co_predictions, input_words, vowels)
+
+        data = Data('s', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False)
+        x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
+                                                                 accented_vowels, feature_dictionary, 'who cares')
+        eye = np.eye(len(syllable_dictionary), dtype=int)
+        generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, eye, accented_vowels)
+        syllable_location_co_predictions = syllable_location_co_model.predict_generator(generator, len(x) / (batch_size))
+
+        syllable_location_co_predictions = data.reverse_predictions(syllable_location_co_predictions, input_words, vowels)
+
+        data = Data('sl', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False)
+        x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
+                                                             accented_vowels, feature_dictionary, 'who cares')
+        max_syllable = data._get_max_syllable(syllable_dictionary)
+        syllable_letters_translator = data._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels)
+        generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, syllable_letters_translator, accented_vowels)
+        syllabled_letters_location_co_predictions = syllabled_letters_location_co_model.predict_generator(generator, len(x) / (batch_size))
+
+        syllabled_letters_location_co_predictions = data.reverse_predictions(syllabled_letters_location_co_predictions, input_words, vowels)
+
+        return np.mean(np.array([letter_location_predictions, syllable_location_predictions, syllabled_letters_location_predictions,
+                                 letter_location_co_predictions, syllable_location_co_predictions, syllabled_letters_location_co_predictions]), axis=0)
+
+    def count_syllables(self, word, vowels):
+        j = 0
+        num_vowels = 0
+        for j in range(len(word)):
+            if self._is_vowel(word, j, vowels):
+                num_vowels += 1
+        return num_vowels
+
+    def reverse_predictions(self, predictions, words, vowels):
+        new_predictions = np.zeros(predictions.shape, dtype='float32')
+        for i in range(len(predictions)):
+            word_len = self.count_syllables(words[i][0], vowels)
+
+            for k in range(word_len):
+                new_predictions[i][k] += predictions[i][word_len - 1 - k]
+
+        return new_predictions

    @staticmethod
    def get_ensemble_type_predictions(input_words, location_y, letter_type_model, syllable_type_model, syllabled_letter_type_model,
+                                      letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
                                      dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary):
        batch_size = 16
        y_array = np.asarray(location_y)
@@ -1409,7 +1535,57 @@ class Data:
        generator = data._syllable_generator(x, x_other_features, location_y, batch_size, syllable_letters_translator, accented_vowels)
        syllabled_letter_type_predictions = syllabled_letter_type_model.predict_generator(generator, accentuation_length / batch_size)

-        return np.mean(np.array([letter_type_predictions, syllable_type_predictions, syllabled_letter_type_predictions]), axis=0)
+        ############## CORRECT ORDER INPUT ##############
+        location_y = data.reverse_predictions(location_y, input_words, vowels)
+
+        data = Data('l', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False)
+        x, x_other_features, fake_y = data._generate_x_and_y(dictionary, max_word, max_num_vowels, input_words, vowels, accented_vowels,
+                                                             feature_dictionary, 'who cares')
+        generator = data._letter_generator(x, x_other_features, location_y, batch_size, accented_vowels)
+        letter_type_co_predictions = letter_type_co_model.predict_generator(generator, accentuation_length / (batch_size))
+
+        data.reorder_correct_direction_inputs(letter_type_co_predictions, location_y)
+
+        data = Data('s', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False)
+        x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
+                                                             accented_vowels, feature_dictionary, 'who cares')
+        eye = np.eye(len(syllable_dictionary), dtype=int)
+        generator = data._syllable_generator(x, x_other_features, location_y, batch_size, eye, accented_vowels)
+        syllable_type_co_predictions = syllable_type_co_model.predict_generator(generator, accentuation_length / (batch_size))
+
+        data.reorder_correct_direction_inputs(syllable_type_co_predictions, location_y)
+
+        data = Data('sl', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False)
+        x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
+                                                             accented_vowels, feature_dictionary, 'who cares')
+        max_syllable = data._get_max_syllable(syllable_dictionary)
+        syllable_letters_translator = data._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels)
+        generator = data._syllable_generator(x, x_other_features, location_y, batch_size, syllable_letters_translator, accented_vowels)
+        syllabled_letter_type_co_predictions = syllabled_letter_type_co_model.predict_generator(generator, accentuation_length / batch_size)
+
+        data.reorder_correct_direction_inputs(syllabled_letter_type_co_predictions, location_y)
+
+        return np.mean(np.array([letter_type_predictions, syllable_type_predictions, syllabled_letter_type_predictions,
+                                 letter_type_co_predictions, syllable_type_co_predictions, syllabled_letter_type_co_predictions]), axis=0)
+
+    def reorder_correct_direction_inputs(self, predictions, y):
+        pred_i = 0
+        for i in range(len(y)):
+            num_accented_syllables = 0
+            for el in y[i]:
+                if el > 0:
+                    num_accented_syllables += 1
+            if num_accented_syllables > 1:
+                min_i = pred_i
+                max_i = pred_i + num_accented_syllables - 1
+                while (max_i > min_i):
+                    min_pred = copy(predictions[min_i])
+                    max_pred = copy(predictions[max_i])
+                    predictions[min_i] = max_pred
+                    predictions[max_i] = min_pred
+                    min_i += 1
+                    max_i -= 1
+            pred_i += num_accented_syllables

    def assign_location_stress(self, word, locations, vowels):
            #     word = list(word)
@@ -1449,12 +1625,17 @@ class Data:
        return ''.join(word_list)

    def accentuate_word(self, input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model,
+                        letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,
                        letter_type_model, syllable_type_model, syllabled_letter_type_model,
+                        letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
                        dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary):
        predictions = self.get_ensemble_location_predictions(input_words, letter_location_model, syllable_location_model,
                                                             syllabled_letters_location_model,
+                                                             letter_location_co_model, syllable_location_co_model,
+                                                             syllabled_letters_location_co_model,
                                                             dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary,
                                                             syllable_dictionary)
+        #print(predictions)
        if 'A' not in vowels:
            vowels.extend(['A', 'E', 'I', 'O', 'U'])
        location_accented_words = [self.assign_location_stress(input_words[i][0][::-1], self.decode_y(predictions[i]), vowels)[::-1] for i in
@@ -1463,6 +1644,7 @@ class Data:
        location_y = np.around(predictions)
        type_predictions = self.get_ensemble_type_predictions(input_words, location_y, letter_type_model, syllable_type_model,
                                                              syllabled_letter_type_model,
+                                                              letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
                                                              dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary,
                                                              syllable_dictionary)

@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import numpy as np
+from keras.models import load_model
+import sys
+import pickle
+import time
+
+from prepare_data import *
+
+np.random.seed(7)
+
+data = Data('l', shuffle_all_inputs=False)
+content = data._read_content('data/SlovarIJS_BESEDE_utf8.lex')
+dictionary, max_word, max_num_vowels, vowels, accented_vowels = data._create_dict(content)
+feature_dictionary = data._create_slovene_feature_dictionary()
+syllable_dictionary = data._create_syllables_dictionary(content, vowels)
+accented_vowels = ['ŕ', 'á', 'ä', 'é', 'ë', 'ě', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü']
+
+data = Data('l', shuffle_all_inputs=False)
+letter_location_model, syllable_location_model, syllabled_letters_location_model = data.load_location_models(
+    'cnn/word_accetuation/cnn_dictionary/v5_3/20_final_epoch.h5',
+    'cnn/word_accetuation/syllables/v3_3/20_final_epoch.h5',
+    'cnn/word_accetuation/syllabled_letters/v3_3/20_final_epoch.h5')
+
+letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model = data.load_location_models(
+    'cnn/word_accetuation/cnn_dictionary/v5_2/20_final_epoch.h5',
+    'cnn/word_accetuation/syllables/v3_2/20_final_epoch.h5',
+    'cnn/word_accetuation/syllabled_letters/v3_2/20_final_epoch.h5')
+
+letter_type_model, syllable_type_model, syllabled_letter_type_model = data.load_type_models(
+    'cnn/accent_classification/letters/v3_1/20_final_epoch.h5',
+    'cnn/accent_classification/syllables/v2_1/20_final_epoch.h5',
+    'cnn/accent_classification/syllabled_letters/v2_1/20_final_epoch.h5')
+
+letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model = data.load_type_models(
+    'cnn/accent_classification/letters/v3_0/20_final_epoch.h5',
+    'cnn/accent_classification/syllables/v2_0/20_final_epoch.h5',
+    'cnn/accent_classification/syllabled_letters/v2_0/20_final_epoch.h5')
+
+data = Data('s', shuffle_all_inputs=False)
+new_content = data._read_content('data/sloleks-sl_v1.2.tbl')
+
+print('Commencing accentuator!')
+
+rate = 100000
+start_timer = time.time()
+with open("data/new_sloleks/new_sloleks.tab", "a") as myfile:
+    for index in range(0, len(new_content), rate):
+        if index+rate >= len(new_content):
+            words = [[el[0], '', el[2], el[0]] for el in new_content][index:len(new_content)]
+        else:
+            words = [[el[0], '', el[2], el[0]] for el in new_content][index:index+rate]
+        data = Data('l', shuffle_all_inputs=False)
+        location_accented_words, accented_words = data.accentuate_word(words, letter_location_model, syllable_location_model, syllabled_letters_location_model,
+                                letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,
+                                letter_type_model, syllable_type_model, syllabled_letter_type_model,
+                                letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
+                                dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)
+
+        res = ''
+        for i in range(index, index + len(words)):
+            res += new_content[i][0] + '\t' + new_content[i][1] + '\t' + new_content[i][2] + '\t' \
+            + new_content[i][3][:-1] + '\t' + location_accented_words[i-index] + '\t' + accented_words[i-index] + '\n'
+
+        print('Writing data from ' + str(index) + ' onward.')
+        end_timer = time.time()
+        print("Elapsed time: " + "{0:.2f}".format((end_timer - start_timer)/60.0) + " minutes")
+        myfile.write(res)
@@ -87,18 +87,6 @@
    "    i += 1"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 98,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "feature__en_dictionary = data._create_feature_dictionary()\n",
-    "feature__slo_dictionary = data._create_slovene_feature_dictionary()"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 3,
@@ -120,7 +108,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
@@ -131,14 +119,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%run prepare_data.py\n",
-    "data = Data('l', shuffle_all_inputs=False)\n",
+    "data = Data('s', shuffle_all_inputs=False)\n",
    "location_accented_words, accented_words = data.accentuate_word(test_input, letter_location_model, syllable_location_model, syllabled_letters_location_model,\n",
    "                        letter_type_model, syllable_type_model, syllabled_letter_type_model,\n",
    "                        dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)"
@@ -146,15 +134,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "['uradní', 'podatkí', 'policíje', 'kažéjo', 'ná', 'precéj', 'napeté', 'razmeré', 'v', 'piranském', 'zalivú', 'jé', 'danés', 'poročála', 'oddajá', 'dó', 'danés', 'sé', 'jé', 'zgodílo']\n",
-      "['uradnî', 'podatkî', 'policíje', 'kažëjo', 'ná', 'precëj', 'napetë', 'razmerë', 'v', 'piranskëm', 'zalivú', 'jë', 'danës', 'poročála', 'oddajá', 'dó', 'danës', 'së', 'jë', 'zgodílo']\n"
+      "['uradni', 'podatkí', 'policíje', 'kažéjo', 'ná', 'precéj', 'napeté', 'razmeré', 'v', 'piranském', 'zalivú', 'jé', 'danés', 'poročála', 'oddajá', 'dó', 'danés', 'sé', 'jé', 'zgodílo']\n",
+      "['uradni', 'pödatki', 'polícije', 'kažëjo', 'ná', 'prëcej', 'nápete', 'räzmere', 'v', 'pîranskem', 'zálivu', 'jë', 'dánes', 'poróčala', 'öddaja', 'dó', 'dánes', 'së', 'jë', 'zgodílo']\n"
     ]
    }
   ],
@@ -225,7 +213,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
@@ -508,6 +496,134 @@
    "    "
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'xml_words_generator' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-8-44b0367c6cbf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mgen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxml_words_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data/Sloleks_v1.2_p2.xml'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      9\u001b[0m \u001b[0mword_glob_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m \u001b[0mword_limit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'xml_words_generator' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "#Words proccesed: 650250\n",
+    "#Word indeks: 50023\n",
+    "#Word number: 50023\n",
+    "\n",
+    "from lxml import etree\n",
+    "import time\n",
+    "\n",
+    "gen = xml_words_generator('data/Sloleks_v1.2_p2.xml')\n",
+    "word_glob_num = 0\n",
+    "word_limit = 0\n",
+    "iter_num = 50000\n",
+    "word_index = 0\n",
+    "start_timer = time.time()\n",
+    "iter_index = 0\n",
+    "words = []\n",
+    "\n",
+    "lexical_entries_load_number = 0\n",
+    "lexical_entries_save_number = 0\n",
+    "\n",
+    "\n",
+    "# INSIDE\n",
+    "#word_glob_num = 1500686\n",
+    "word_glob_num = 1550705\n",
+    "\n",
+    "#word_limit = 1500686\n",
+    "word_limit = 1550705\n",
+    "\n",
+    "\n",
+    "iter_index = 31\n",
+    "\n",
+    "#done_lexical_entries = 33522\n",
+    "\n",
+    "with open(\"data/new_sloleks/new_sloleks.xml\", \"ab\") as myfile:\n",
+    "    myfile2 = open('data/new_sloleks/pa' + str(iter_index) + '.xml', 'ab')\n",
+    "    for event, element in etree.iterparse('data/Sloleks_v1.2_p2.xml', tag=\"LexicalEntry\", encoding=\"UTF-8\", remove_blank_text=True):\n",
+    "        # LOAD NEW WORDS AND ACCENTUATE THEM\n",
+    "        #print(\"HERE\")\n",
+    "        \n",
+    "#        if lexical_entries_save_number < done_lexical_entries:\n",
+    "#            next(gen)\n",
+    "#            #print(lexical_entries_save_number)\n",
+    "#            lexical_entries_save_number += 1\n",
+    "#            lexical_entries_load_number += 1\n",
+    "#            continue\n",
+    "        \n",
+    "        if word_glob_num >= word_limit:\n",
+    "            myfile2.close()\n",
+    "            myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')\n",
+    "            iter_index += 1\n",
+    "            print(\"Words proccesed: \" + str(word_glob_num))\n",
+    "\n",
+    "            print(\"Word indeks: \" + str(word_index))\n",
+    "            print(\"Word number: \" + str(len(words)))\n",
+    "            \n",
+    "            #print(\"lexical_entries_load_number: \" + str(lexical_entries_load_number))\n",
+    "            #print(\"lexical_entries_save_number: \" + str(lexical_entries_save_number))\n",
+    "\n",
+    "            end_timer = time.time()\n",
+    "            print(\"Elapsed time: \" + \"{0:.2f}\".format((end_timer - start_timer)/60.0) + \" minutes\")\n",
+    "\n",
+    "\n",
+    "            word_index = 0\n",
+    "            words = []\n",
+    "\n",
+    "            while len(words) < iter_num:\n",
+    "                try:\n",
+    "                    words.extend(next(gen))\n",
+    "                    lexical_entries_load_number += 1\n",
+    "                except:\n",
+    "                    break\n",
+    "            #if word_glob_num > 1:\n",
+    "            #    break\n",
+    "\n",
+    "            #problem_words = words\n",
+    "            #break\n",
+    "            data = Data('l', shuffle_all_inputs=False)\n",
+    "            location_accented_words, accented_words = data.accentuate_word(words, letter_location_model, syllable_location_model, syllabled_letters_location_model,\n",
+    "                                    letter_type_model, syllable_type_model, syllabled_letter_type_model,\n",
+    "                                    dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)\n",
+    "\n",
+    "            word_limit += len(words)\n",
+    "            \n",
+    "        \n",
+    "        # READ DATA\n",
+    "        for child in element:\n",
+    "            if child.tag == 'WordForm':\n",
+    "                msd = None\n",
+    "                word = None\n",
+    "                for wf in child:\n",
+    "                    if wf.tag == 'FormRepresentation':\n",
+    "                        new_element = etree.Element('feat')\n",
+    "                        new_element.attrib['att']='naglasna_mesta_oblike'\n",
+    "                        new_element.attrib['val']=location_accented_words[word_index]\n",
+    "                        wf.append(new_element)\n",
+    "\n",
+    "                        new_element = etree.Element('feat')\n",
+    "                        new_element.attrib['att']='naglašena_oblika'\n",
+    "                        new_element.attrib['val']=accented_words[word_index]\n",
+    "                        wf.append(new_element)\n",
+    "                        word_glob_num += 1\n",
+    "                        word_index += 1\n",
+    "\n",
+    "        # print(etree.tostring(element, encoding=\"UTF-8\"))\n",
+    "        myfile2.write(etree.tostring(element, encoding=\"UTF-8\", pretty_print=True))\n",
+    "        myfile.write(etree.tostring(element, encoding=\"UTF-8\", pretty_print=True))\n",
+    "        element.clear()\n",
+    "        lexical_entries_save_number += 1\n",
+    "    "
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 6,