stress_asignment/prepare_data.py

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
# text in Western (Windows 1252)

import numpy as np
import h5py
import math
import keras.backend as K
import os.path
from os import remove
import codecs

from copy import copy

from keras import optimizers
from keras.models import Model
from keras.layers import Dense, Dropout, Input
from keras.layers.merge import concatenate
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import Flatten
from keras.models import load_model


class Data:
    def __init__(self, input_type, allow_shuffle_vector_generation=False, save_generated_data=True, shuffle_all_inputs=True,
                 additional_letter_attributes=True, reverse_inputs=True, accent_classification=False, number_of_syllables=False,
                 convert_multext=True, bidirectional_basic_input=False, bidirectional_architectural_input=False):
        self._input_type = input_type
        self._save_generated_data = save_generated_data
        self._allow_shuffle_vector_generation = allow_shuffle_vector_generation
        self._shuffle_all_inputs = shuffle_all_inputs
        self._additional_letter_attributes = additional_letter_attributes
        self._reverse_inputs = reverse_inputs
        self._accent_classification = accent_classification
        self._number_of_syllables = number_of_syllables
        self._convert_multext = convert_multext
        self._bidirectional_basic_input = bidirectional_basic_input
        self._bidirectional_architectural_input = bidirectional_architectural_input

        self.x_train = None
        # self.x2_train = None
        self.x_other_features_train = None
        self.y_train = None
        self.x_test = None
        # self.x2_test = None
        self.x_other_features_test = None
        self.y_test = None
        self.x_validate = None
        # self.x2_validate = None
        self.x_other_features_validate = None
        self.y_validate = None

    def generate_data(self, train_inputs_name, test_inputs_name, validate_inputs_name, test_and_validation_size=0.1,
                      force_override=False, content_name='SlovarIJS_BESEDE_utf8.lex',
                      content_shuffle_vector='content_shuffle_vector', shuffle_vector='shuffle_vector',
                      inputs_location='../../internal_representations/inputs/', content_location='../../../data/',
                      test_set=False, complete_set=False):
        content_path = '{}{}'.format(content_location, content_name)
        train_path = '{}{}.h5'.format(inputs_location, train_inputs_name)
        test_path = '{}{}.h5'.format(inputs_location, test_inputs_name)
        validate_path = '{}{}.h5'.format(inputs_location, validate_inputs_name)
        if not force_override and os.path.exists(train_path) and os.path.exists(test_path) and os.path.exists(validate_path):
            print('LOADING DATA...')
            self.x_train, self.x_other_features_train, self.y_train = self._load_inputs(train_path)
            self.x_test, self.x_other_features_test, self.y_test = self._load_inputs(test_path)
            self.x_validate, self.x_other_features_validate, self.y_validate = self._load_inputs(validate_path)
            print('LOAD SUCCESSFUL!')
        else:
            content_shuffle_vector_path = '{}{}.h5'.format(inputs_location, content_shuffle_vector)
            shuffle_vector_path = '{}{}'.format(inputs_location, shuffle_vector)

            # actual generation of inputs
            self._generate_inputs(content_path, content_shuffle_vector_path, shuffle_vector_path, test_and_validation_size, train_path, test_path,
                                  validate_path)
        if test_set:
            self.x_train = np.concatenate((self.x_train, self.x_test), axis=0)
            self.x_other_features_train = np.concatenate((self.x_other_features_train, self.x_other_features_test), axis=0)
            self.y_train = np.concatenate((self.y_train, self.y_test), axis=0)

            self.x_test = self.x_validate
            self.x_other_features_test = self.x_other_features_validate
            self.y_test = self.y_validate

        if complete_set:
            self.x_train = np.concatenate((self.x_train, self.x_test, self.x_validate), axis=0)
            self.x_other_features_train = np.concatenate((self.x_other_features_train, self.x_other_features_test, self.x_other_features_validate),
                                                         axis=0)
            self.y_train = np.concatenate((self.y_train, self.y_test, self.y_validate), axis=0)

            self.x_test = self.x_validate
            self.x_other_features_test = self.x_other_features_validate
            self.y_test = self.y_validate

    def _generate_inputs(self, content_location, content_shuffle_vector_location, shuffle_vector_location, test_and_validation_size, train_path,
                         test_path, validate_path):
        print('READING CONTENT...')
        content = self._read_content(content_location)
        print('CONTENT READ SUCCESSFULLY')
        print('CREATING DICTIONARY...')
        dictionary, max_word, max_num_vowels, vowels, accented_vowels = self._create_dict(content)
        if self._input_type == 's' or self._input_type == 'sl':
            dictionary = self._create_syllables_dictionary(content, vowels)
        print('DICTIONARY CREATION SUCCESSFUL!')
        # test_and_validation_size = 0.1
        train_content, test_content, validate_content = self._split_content(content, test_and_validation_size, content_shuffle_vector_location)
        feature_dictionary = self._create_feature_dictionary()

        # Generate X and y
        print('GENERATING X AND y...')
        self.x_train, self.x_other_features_train, self.y_train = self._generate_x_and_y(dictionary, max_word, max_num_vowels, train_content, vowels,
                                                                                         accented_vowels,
                                                                                         feature_dictionary, shuffle_vector_location + '_train.h5')
        self.x_test, self.x_other_features_test, self.y_test = self._generate_x_and_y(dictionary, max_word, max_num_vowels, test_content, vowels,
                                                                                      accented_vowels,
                                                                                      feature_dictionary, shuffle_vector_location + '_test.h5')
        self.x_validate, self.x_other_features_validate, self.y_validate = self._generate_x_and_y(dictionary, max_word, max_num_vowels,
                                                                                                  validate_content, vowels,
                                                                                                  accented_vowels, feature_dictionary,
                                                                                                  shuffle_vector_location + '_validate.h5')
        print('GENERATION SUCCESSFUL!')

        # save inputs
        if self._save_generated_data:
            self._save_inputs(train_path, self.x_train, self.x_other_features_train, self.y_train)
            self._save_inputs(test_path, self.x_test, self.x_other_features_test, self.y_test)
            self._save_inputs(validate_path, self.x_validate, self.x_other_features_validate, self.y_validate)

        # return X_train, X_other_features_train, y_train, X_test, X_other_features_test, y_test, X_validate, X_other_features_validate, y_validate

    # functions for creating X and y from content
    @staticmethod
    def _read_content(content_path):
        # with open(content_path) as f:
        with codecs.open(content_path, encoding='utf8') as f:
            content = f.readlines()
        return [x.split('\t') for x in content]

    def _create_dict(self, content):
        # CREATE dictionary AND max_word
        accented_vowels = self._get_accented_vowels()
        unaccented_vowels = self._get_unaccented_vowels()
        vowels = []
        vowels.extend(accented_vowels)
        vowels.extend(unaccented_vowels)

        dictionary_input = ['']
        line = 0
        max_word = 0
        # ADD 'EMPTY' VOWEL
        max_num_vowels = 0
        for el in content:
            num_vowels = 0
            try:
                if len(el[3]) > max_word:
                    max_word = len(el[3])
                if len(el[0]) > max_word:
                    max_word = len(el[0])
                for i in range(len(el[3])):
                    if self._is_vowel(list(el[3]), i, vowels):
                        num_vowels += 1
                for c in list(el[0]):
                    if c not in dictionary_input:
                        dictionary_input.append(c)
                if num_vowels > max_num_vowels:
                    max_num_vowels = num_vowels
            except Exception:
                print(line - 1)
                print(el)
                break
            line += 1
        dictionary_input = sorted(dictionary_input)
        # max_num_vowels += 1
        return dictionary_input, max_word, max_num_vowels, vowels, accented_vowels

    # split content so that there is no overfitting
    def _split_content(self, content, test_and_validation_ratio, content_shuffle_vector_location):
        expanded_content = [el[1] if el[1] != '=' else el[0] for el in content]
        # print(len(content))
        unique_content = sorted(set(expanded_content))

        s = self._load_shuffle_vector(content_shuffle_vector_location, len(unique_content))

        test_num = math.floor(len(unique_content) * (test_and_validation_ratio * 2))
        validation_num = math.floor(test_num * 0.5)
        shuffled_unique_train_content = [unique_content[i] for i in range(len(s)) if s[i] >= test_num]
        shuffled_unique_train_content_set = set(shuffled_unique_train_content)

        shuffled_unique_test_content = [unique_content[i] for i in range(len(s)) if test_num > s[i] >= validation_num]
        shuffled_unique_test_content_set = set(shuffled_unique_test_content)

        shuffled_unique_validate_content = [unique_content[i] for i in range(len(s)) if s[i] < validation_num]
        shuffled_unique_validate_content_set = set(shuffled_unique_validate_content)

        train_content = [content[i] for i in range(len(content)) if expanded_content[i] in shuffled_unique_train_content_set]
        test_content = [content[i] for i in range(len(content)) if expanded_content[i] in shuffled_unique_test_content_set]
        validate_content = [content[i] for i in range(len(content)) if expanded_content[i] in shuffled_unique_validate_content_set]
        return train_content, test_content, validate_content

    @staticmethod
    def _create_and_save_shuffle_vector(file_name, length):
        shuffle_vector = np.arange(length)
        np.random.shuffle(shuffle_vector)
        h5f = h5py.File(file_name, 'w')
        adict = dict(shuffle_vector=shuffle_vector)
        for k, v in adict.items():
            h5f.create_dataset(k, data=v)
        h5f.close()
        return shuffle_vector

    def _x_letter_input(self, content, dictionary, max_word, vowels, shuffle_vector_location):
        if self._additional_letter_attributes:
            if not self._bidirectional_basic_input:
                x = np.zeros((len(content), max_word, len(dictionary) + 6), dtype=int)
            else:
                x = np.zeros((len(content), 2 * max_word, len(dictionary) + 6), dtype=int)
            voiced_consonants = self._get_voiced_consonants()
            resonant_silent_consonants = self._get_resonant_silent_consonants()
            nonresonant_silent_consonants = self._get_nonresonant_silent_consonants()
            # print('HERE!!!')
        else:
            # print('HERE!!!')
            if not self._bidirectional_basic_input:
                x = np.zeros((len(content), max_word, len(dictionary)), dtype=int)
            else:
                x = np.zeros((len(content), 2 * max_word, len(dictionary)), dtype=int)

        if self._shuffle_all_inputs:
            s = self._load_shuffle_vector(shuffle_vector_location, len(content))
        else:
            s = None

        # i = 0
        for i in range(len(content)):
            if self._shuffle_all_inputs:
                mod_i = s[i]
            else:
                mod_i = i
            word = content[mod_i][0]
            if self._reverse_inputs:
                word = word[::-1]
            j = 0
            for c in list(word):
                if j >= max_word:
                    continue
                index = 0
                if self._bidirectional_basic_input:
                    j2 = max_word + (len(word) - j - 1)
                for d in dictionary:
                    if c == d:
                        x[i][j][index] = 1
                        if self._bidirectional_basic_input:
                            x[i][j2][index] = 1
                        break
                    index += 1
                if self._additional_letter_attributes:
                    if self._is_vowel(word, j, vowels):
                        x[i][j][len(dictionary)] = 1
                        if self._bidirectional_basic_input:
                            x[i][j2][len(dictionary)] = 1
                    else:
                        x[i][j][len(dictionary) + 1] = 1
                        if self._bidirectional_basic_input:
                            x[i][j2][len(dictionary) + 1] = 1
                        if c in voiced_consonants:
                            x[i][j][len(dictionary) + 2] = 1
                            if self._bidirectional_basic_input:
                                x[i][j2][len(dictionary) + 2] = 1
                        else:
                            x[i][j][len(dictionary) + 3] = 1
                            if self._bidirectional_basic_input:
                                x[i][j2][len(dictionary) + 3] = 1

                            if c in resonant_silent_consonants:
                                x[i][j][len(dictionary) + 4] = 1
                                if self._bidirectional_basic_input:
                                    x[i][j2][len(dictionary) + 4] = 1
                            elif c in nonresonant_silent_consonants:
                                x[i][j][len(dictionary) + 5] = 1
                                if self._bidirectional_basic_input:
                                    x[i][j2][len(dictionary) + 5] = 1
                j += 1
            #i += 1
        return x

    def _x_syllable_input(self, content, dictionary, max_num_vowels, vowels, shuffle_vector_location):
        if not self._bidirectional_basic_input:
            x = np.zeros((len(content), max_num_vowels), dtype=int)
        else:
            x = np.zeros((len(content), 2 * max_num_vowels), dtype=int)

        if self._shuffle_all_inputs:
            s = self._load_shuffle_vector(shuffle_vector_location, len(content))
        else:
            s = None

        for i in range(len(content)):
            if self._shuffle_all_inputs:
                mod_i = s[i]
            else:
                mod_i = i
            j = 0
            syllables = self._create_syllables(content[mod_i][0], vowels)
            if self._reverse_inputs:
                syllables = syllables[::-1]
            for syllable in syllables:
                if j >= max_num_vowels:
                    continue
                if syllable in dictionary:
                    x[i][j] = dictionary.index(syllable)
                    if self._bidirectional_basic_input:
                        x[i][max_num_vowels + (len(syllables) - j - 1)] = dictionary.index(syllable)
                else:
                    x[i][j] = 0
                j += 1
            #i += 1
        return x

    def _y_output(self, content, max_num_vowels, vowels, accentuated_vowels, shuffle_vector_location):
        y = np.zeros((len(content), max_num_vowels))
        i = 0
        if self._shuffle_all_inputs:
            s = self._load_shuffle_vector(shuffle_vector_location, len(content))
        else:
            s = None
        for i in range(len(content)):
            if self._shuffle_all_inputs:
                mod_i = s[i]
            else:
                mod_i = i
            el = content[mod_i]
            word = el[3]
            if self._reverse_inputs:
                word = word[::-1]

            j = 0
            # word_accentuations = []
            num_vowels = 0
            for c in list(word):
                index = 0
                for d in accentuated_vowels:
                    if c == d:
                        if not self._accent_classification:
                            y[i][num_vowels] = 1
                        else:
                            y[i][num_vowels] = index
                        # word_accentuations.append(num_vowels)
                        break
                    index += 1
                if self._is_vowel(word, j, vowels):
                    num_vowels += 1
                j += 1
        return y

    # Generate each y as an array of 11 numbers (with possible values between 0 and 1)
    def _generate_x_and_y(self, dictionary, max_word, max_num_vowels, content, vowels, accentuated_vowels, feature_dictionary,
                          shuffle_vector_location):
        if self._input_type == 'l':
            x = self._x_letter_input(content, dictionary, max_word, vowels, shuffle_vector_location)
        elif self._input_type == 's' or self._input_type == 'sl':
            x = self._x_syllable_input(content, dictionary, max_num_vowels, vowels, shuffle_vector_location)
        else:
            raise ValueError('No input_type provided. It could be \'l\', \'s\' or \'sl\'.')
        y = self._y_output(content, max_num_vowels, vowels, accentuated_vowels, shuffle_vector_location)

        # print('CREATING OTHER FEATURES...')
        x_other_features = self._create_x_features(content, feature_dictionary, vowels, shuffle_vector_location)
        # print('OTHER FEATURES CREATED!')

        if self._shuffle_all_inputs:
            print('SHUFFELING INPUTS...')
            #x, x_other_features, y = self._shuffle_inputs(x, x_other_features, y, shuffle_vector_location)
            print('INPUTS SHUFFELED!')
        return x, x_other_features, y

    def _create_syllables_dictionary(self, content, vowels):
        dictionary = []
        for el in content:
            syllables = self._create_syllables(el[0], vowels)
            for syllable in syllables:
                if syllable not in dictionary:
                    dictionary.append(syllable)
        dictionary.append('')
        return sorted(dictionary)

    def _create_syllables(self, word, vowels):
        word_list = list(word)
        consonants = []
        syllables = []
        for i in range(len(word_list)):
            if self._is_vowel(word_list, i, vowels):
                if syllables == []:
                    consonants.append(word_list[i])
                    syllables.append(''.join(consonants))
                else:
                    left_consonants, right_consonants = self._split_consonants(list(''.join(consonants).lower()))
                    syllables[-1] += ''.join(left_consonants)
                    right_consonants.append(word_list[i])
                    syllables.append(''.join(right_consonants))
                consonants = []
            else:
                consonants.append(word_list[i])
        if len(syllables) < 1:
            return word
        syllables[-1] += ''.join(consonants)

        return syllables

    def _is_vowel(self, word_list, position, vowels):
        if word_list[position] in vowels:
            return True
        if (word_list[position] == u'r' or word_list[position] == u'R') and (position - 1 < 0 or word_list[position - 1] not in vowels) and (
                            position + 1 >= len(word_list) or word_list[position + 1] not in vowels):
            return True
        return False

    def _split_consonants(self, consonants):
        voiced_consonants = self._get_voiced_consonants()
        resonant_silent_consonants = self._get_resonant_silent_consonants()
        unresonant_silent_consonants = self._get_nonresonant_silent_consonants()
        if len(consonants) == 0:
            return [''], ['']
        elif len(consonants) == 1:
            return [''], consonants
        else:
            split_options = []
            for i in range(len(consonants) - 1):
                if consonants[i] == '-' or consonants[i] == '_':
                    split_options.append([i, -1])
                elif consonants[i] == consonants[i + 1]:
                    split_options.append([i, 0])
                elif consonants[i] in voiced_consonants:
                    if consonants[i + 1] in resonant_silent_consonants or consonants[i + 1] in unresonant_silent_consonants:
                        split_options.append([i, 2])
                elif consonants[i] in resonant_silent_consonants:
                    if consonants[i + 1] in resonant_silent_consonants:
                        split_options.append([i, 1])
                    elif consonants[i + 1] in unresonant_silent_consonants:
                        split_options.append([i, 3])
                elif consonants[i] in unresonant_silent_consonants:
                    if consonants[i + 1] in resonant_silent_consonants:
                        split_options.append([i, 4])

            if split_options == []:
                return [''], consonants
            else:
                split = min(split_options, key=lambda x: x[1])
                return consonants[:split[0] + 1], consonants[split[0] + 1:]

    def _create_x_features(self, content, feature_dictionary, vowels, shuffle_vector_location):
        content = content
        x_other_features = []
        if self._shuffle_all_inputs:
            s = self._load_shuffle_vector(shuffle_vector_location, len(content))
        else:
            s = None
        for index in range(len(content)):
            if self._shuffle_all_inputs:
                mod_i = s[index]
            else:
                mod_i = index
            el = content[mod_i]
            x_el_other_features = []
            if self._convert_multext:
                converted_el = ''.join(self._convert_to_multext_east_v4(list(el[2]), feature_dictionary))
            else:
                converted_el = el[2]
            for feature in feature_dictionary:
                if converted_el[0] == feature[1]:
                    x_el_other_features.append(1)
                    for i in range(2, len(feature)):
                        for j in range(len(feature[i])):
                            if i - 1 < len(converted_el) and feature[i][j] == converted_el[i - 1]:
                                x_el_other_features.append(1)
                            else:
                                x_el_other_features.append(0)
                else:
                    x_el_other_features.extend([0] * feature[0])
            if self._number_of_syllables:
                list_of_letters = list(el[0])
                num_of_vowels = 0
                for i in range(len(list_of_letters)):
                    if self._is_vowel(list(el[0]), i, vowels):
                        num_of_vowels += 1
                x_el_other_features.append(num_of_vowels)

            x_other_features.append(x_el_other_features)
        return np.array(x_other_features)

    def _shuffle_inputs(self, x, x_other_features, y, shuffle_vector_location):
        s = self._load_shuffle_vector(shuffle_vector_location, x.shape[0])
        x = x[s]
        y = y[s]
        x_other_features = x_other_features[s]
        return x, x_other_features, y

    # functions for saving, loading and shuffling whole arrays to ram
    @staticmethod
    def _save_inputs(file_name, x, x_other_features, y):
        h5f = h5py.File(file_name, 'w')
        a_dict = dict(X=x, X_other_features=x_other_features, y=y)
        for k, v in a_dict.items():
            h5f.create_dataset(k, data=v)
        h5f.close()

    @staticmethod
    def _load_inputs(file_name):
        h5f = h5py.File(file_name, 'r')
        x = h5f['X'][:]
        y = h5f['y'][:]
        x_other_features = h5f['X_other_features'][:]
        h5f.close()
        return x, x_other_features, y

    def _load_shuffle_vector(self, file_path, length=0):
        if os.path.exists(file_path):
            h5f = h5py.File(file_path, 'r')
            shuffle_vector = h5f['shuffle_vector'][:]
            h5f.close()
        else:
            if self._allow_shuffle_vector_generation:
                shuffle_vector = self._create_and_save_shuffle_vector(file_path, length)
            else:
                raise ValueError('Shuffle vector on path: \'{}\' does not exist! Either generate new vector (with initializing new Data object with '
                                 'parameter allow_shuffle_vector_generation=True or paste one that is already generated!'.format(file_path))
        return shuffle_vector

    @staticmethod
    def _convert_to_multext_east_v4(old_features, feature_dictionary):
        new_features = ['-'] * 9
        new_features[:len(old_features)] = old_features
        if old_features[0] == 'A':
            if old_features[1] == 'f' or old_features[1] == 'o':
                new_features[1] = 'g'
            return new_features[:len(feature_dictionary[0]) - 1]
        if old_features[0] == 'C':
            return new_features[:len(feature_dictionary[1]) - 1]
        if old_features[0] == 'I':
            return new_features[:len(feature_dictionary[2]) - 1]
        if old_features[0] == 'M':
            new_features[2:6] = old_features[1:5]
            new_features[1] = old_features[5]
            if new_features[2] == 'm':
                new_features[2] = '-'
            return new_features[:len(feature_dictionary[3]) - 1]
        if old_features[0] == 'N':
            if len(old_features) >= 7:
                new_features[5] = old_features[7]
            return new_features[:len(feature_dictionary[4]) - 1]
        if old_features[0] == 'P':
            if new_features[8] == 'n':
                new_features[8] = 'b'
            return new_features[:len(feature_dictionary[5]) - 1]
        if old_features[0] == 'Q':
            return new_features[:len(feature_dictionary[6]) - 1]
        if old_features[0] == 'R':
            return new_features[:len(feature_dictionary[7]) - 1]
        if old_features[0] == 'S':
            if len(old_features) == 4:
                new_features[1] = old_features[3]
            else:
                new_features[1] = '-'
            return new_features[:len(feature_dictionary[8]) - 1]
        if old_features[0] == 'V':
            if old_features[1] == 'o' or old_features[1] == 'c':
                new_features[1] = 'm'
            new_features[3] = old_features[2]
            new_features[2] = '-'
            if old_features[2] == 'i':
                new_features[3] = 'r'
            if len(old_features) > 3 and old_features[3] == 'p':
                new_features[3] = 'r'
            elif len(old_features) > 3 and old_features[3] == 'f':
                new_features[3] = 'f'
            if len(old_features) >= 9:
                new_features[7] = old_features[8]
            else:
                new_features[7] = '-'
            return new_features[:len(feature_dictionary[9]) - 1]
        return ''

    # generator for inputs for tracking of data fitting
    def generator(self, data_type, batch_size, x=None, x_other_features_validate=None, y_validate=None, content_name='SlovarIJS_BESEDE_utf8.lex',
                  content_location='../../../data/', oversampling=np.ones(13)):
        content_path = '{}{}'.format(content_location, content_name)
        if data_type == 'train':
            return self._generator_instance(self.x_train, self.x_other_features_train, self.y_train, batch_size, content_path, oversampling)
        elif data_type == 'test':
            return self._generator_instance(self.x_test, self.x_other_features_test, self.y_test, batch_size, content_path, oversampling)
        elif data_type == 'validate':
            return self._generator_instance(self.x_validate, self.x_other_features_validate, self.y_validate, batch_size, content_path, oversampling)
        else:
            return self._generator_instance(x, x_other_features_validate, y_validate, batch_size)

            # if self._input_type

    def _generator_instance(self, orig_x, orig_x_additional, orig_y, batch_size, content_path, oversampling):
        if self._input_type == 'l':
            content = self._read_content(content_path)
            dictionary, max_word, max_num_vowels, vowels, accented_vowels = self._create_dict(content)
            return self._letter_generator(orig_x, orig_x_additional, orig_y, batch_size, accented_vowels)
        elif self._input_type == 's':
            content = self._read_content(content_path)
            dictionary, max_word, max_num_vowels, vowels, accented_vowels = self._create_dict(content)
            syllable_dictionary = self._create_syllables_dictionary(content, vowels)
            eye = np.eye(len(syllable_dictionary), dtype=int)
            return self._syllable_generator(orig_x, orig_x_additional, orig_y, batch_size, eye, accented_vowels, oversampling)
        elif self._input_type == 'sl':
            content = self._read_content(content_path)
            dictionary, max_word, max_num_vowels, vowels, accented_vowels = self._create_dict(content)
            syllable_dictionary = self._create_syllables_dictionary(content, vowels)
            max_syllable = self._get_max_syllable(syllable_dictionary)
            syllable_letters_translator = self._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels)
            return self._syllable_generator(orig_x, orig_x_additional, orig_y, batch_size, syllable_letters_translator, accented_vowels, oversampling)

    # generator for inputs for tracking of data fitting
    def _letter_generator(self, orig_x, orig_x_additional, orig_y, batch_size, accented_vowels):
        size = orig_x.shape[0]
        while 1:
            loc = 0
            if self._accent_classification:
                eye = np.eye(len(accented_vowels), dtype=int)
                eye_input_accent = np.eye(len(orig_y[0]), dtype=int)
                input_x_stack = []
                input_x_other_features_stack = []
                input_y_stack = []
                while loc < size:
                    while len(input_x_stack) < batch_size and loc < size:
                        accent_loc = 0
                        for accent in orig_y[loc]:
                            if accent > 0:
                                new_orig_x_additional = orig_x_additional[loc]
                                new_orig_x_additional = np.concatenate((new_orig_x_additional, eye_input_accent[accent_loc]))
                                input_x_stack.append(orig_x[loc])
                                input_x_other_features_stack.append(new_orig_x_additional)
                                input_y_stack.append(eye[int(accent)])
                            accent_loc += 1
                        loc += 1
                    if len(input_x_stack) > batch_size:
                        yield ([np.array(input_x_stack[:batch_size]),
                                np.array(input_x_other_features_stack[:batch_size])], np.array(input_y_stack)[:batch_size])
                        input_x_stack = input_x_stack[batch_size:]
                        input_x_other_features_stack = input_x_other_features_stack[batch_size:]
                        input_y_stack = input_y_stack[batch_size:]
                    else:
                        # print('BBB')
                        # print(np.array(input_stack))
                        # yield (np.array(input_stack))
                        yield ([np.array(input_x_stack), np.array(input_x_other_features_stack)], np.array(input_y_stack))
                        input_x_stack = []
                        input_x_other_features_stack = []
                        input_y_stack = []
            else:
                while loc < size:
                    if loc + batch_size >= size:
                        if self._bidirectional_architectural_input:
                            split_orig_x = np.hsplit(orig_x[loc:size], 2)
                            yield ([split_orig_x[0], split_orig_x[1], orig_x_additional[loc:size]], orig_y[loc:size])
                        else:
                            yield ([orig_x[loc:size], orig_x_additional[loc:size]], orig_y[loc:size])
                    else:
                        if self._bidirectional_architectural_input:
                            split_orig_x = np.hsplit(orig_x[loc:loc + batch_size], 2)
                            yield ([split_orig_x[0], split_orig_x[1], orig_x_additional[loc:loc + batch_size]], orig_y[loc:loc + batch_size])
                        else:
                            yield ([orig_x[loc:loc + batch_size], orig_x_additional[loc:loc + batch_size]], orig_y[loc:loc + batch_size])
                    loc += batch_size

    # generator for inputs for tracking of data fitting
    def _syllable_generator(self, orig_x, orig_x_additional, orig_y, batch_size, translator, accented_vowels, oversampling=np.ones(13)):
        size = orig_x.shape[0]
        while 1:
            loc = 0
            if self._accent_classification:
                eye = np.eye(len(accented_vowels), dtype=int)
                eye_input_accent = np.eye(len(orig_y[0]), dtype=int)
                input_x_stack = []
                input_x_other_features_stack = []
                input_y_stack = []
                while loc < size:
                    while len(input_x_stack) < batch_size and loc < size:
                        accent_loc = 0
                        for accent in orig_y[loc]:
                            if accent > 0:
                                new_orig_x_additional = orig_x_additional[loc]
                                new_orig_x_additional = np.concatenate((new_orig_x_additional, eye_input_accent[accent_loc]))
                                for i in range(int(oversampling[int(accent)])):
                                    input_x_stack.append(orig_x[loc])
                                    input_x_other_features_stack.append(new_orig_x_additional)
                                    input_y_stack.append(eye[int(accent)])
                            accent_loc += 1
                        loc += 1
                    if len(input_x_stack) > batch_size:
                        gen_orig_x = translator[np.array(input_x_stack[:batch_size])]

                        if self._bidirectional_architectural_input:
                            split_orig_x = np.hsplit(gen_orig_x, 2)
                            yield ([split_orig_x[0], split_orig_x[1], np.array(input_x_other_features_stack[:batch_size])],
                                   np.array(input_y_stack)[:batch_size])
                        else:
                            yield ([gen_orig_x, np.array(input_x_other_features_stack[:batch_size])], np.array(input_y_stack)[:batch_size])

                        # yield ([gen_orig_x, np.array(input_x_other_features_stack[:batch_size])], np.array(input_y_stack)[:batch_size])
                        input_x_stack = input_x_stack[batch_size:]
                        input_x_other_features_stack = input_x_other_features_stack[batch_size:]
                        input_y_stack = input_y_stack[batch_size:]
                    else:
                        #print('-------------------------------------------------------------------------------------------')
                        #if dictionary is not None:
                        #    print(self.decode_x(word_encoded, dictionary))
                        #print(input_x_stack)
                        #print(input_x_other_features_stack)
                        #print(input_y_stack)
                        #print(loc)
                        if len(input_x_stack) == 0:
                            continue
                        gen_orig_x = translator[np.array(input_x_stack)]

                        if self._bidirectional_architectural_input:
                            split_orig_x = np.hsplit(gen_orig_x, 2)
                            yield ([split_orig_x[0], split_orig_x[1], np.array(input_x_other_features_stack)],
                                   np.array(input_y_stack))
                        else:
                            yield ([gen_orig_x, np.array(input_x_other_features_stack)], np.array(input_y_stack))

                        # yield ([gen_orig_x, np.array(input_x_other_features_stack)], np.array(input_y_stack))
                        input_x_stack = []
                        input_x_other_features_stack = []
                        input_y_stack = []
            else:
                while loc < size:
                    if loc + batch_size >= size:
                        gen_orig_x = translator[orig_x[loc:size]]

                        if self._bidirectional_architectural_input:
                            split_orig_x = np.hsplit(gen_orig_x, 2)
                            yield ([split_orig_x[0], split_orig_x[1], orig_x_additional[loc:size]], orig_y[loc:size])
                        else:
                            yield ([gen_orig_x, orig_x_additional[loc:size]], orig_y[loc:size])

                        #yield ([gen_orig_x, orig_x_additional[loc:size]], orig_y[loc:size])
                    else:
                        gen_orig_x = translator[orig_x[loc:loc + batch_size]]

                        if self._bidirectional_architectural_input:
                            split_orig_x = np.hsplit(gen_orig_x, 2)
                            yield ([split_orig_x[0], split_orig_x[1], orig_x_additional[loc:loc + batch_size]], orig_y[loc:loc + batch_size])
                        else:
                            yield ([gen_orig_x, orig_x_additional[loc:loc + batch_size]], orig_y[loc:loc + batch_size])

                        #yield ([gen_orig_x, orig_x_additional[loc:loc + batch_size]], orig_y[loc:loc + batch_size])
                    loc += batch_size

    def _get_max_syllable(self, syllable_dictionary):
        max_len = 0
        for el in syllable_dictionary:
            if len(el) > max_len:
                max_len = len(el)
        return max_len

    def _create_syllable_letters_translator(self, max_syllable, syllable_dictionary, dictionary, vowels, aditional_letter_attributes=True):
        if aditional_letter_attributes:
            voiced_consonants = self._get_voiced_consonants()
            resonant_silent_consonants = self._get_resonant_silent_consonants()
            nonresonant_silent_consonants = self._get_nonresonant_silent_consonants()

        syllable_letters_translator = []
        for syllable in syllable_dictionary:
            di_syllable = []
            for let in range(max_syllable):
                # di_let = []
                for a in dictionary:
                    if let < len(syllable) and a == list(syllable)[let]:
                        di_syllable.append(1)
                    else:
                        di_syllable.append(0)

                if aditional_letter_attributes:
                    if let >= len(syllable):
                        di_syllable.extend([0, 0, 0, 0, 0, 0])
                    elif self._is_vowel(list(syllable), let, vowels):
                        di_syllable.extend([1, 0, 0, 0, 0, 0])
                    else:
                        # X[i][j][len(dictionary) + 1] = 1
                        if list(syllable)[let] in voiced_consonants:
                            # X[i][j][len(dictionary) + 2] = 1
                            di_syllable.extend([0, 1, 1, 0, 0, 0])
                        else:
                            # X[i][j][len(dictionary) + 3] = 1
                            if list(syllable)[let] in resonant_silent_consonants:
                                # X[i][j][len(dictionary) + 4] = 1
                                di_syllable.extend([0, 1, 0, 1, 1, 0])
                            elif list(syllable)[let] in nonresonant_silent_consonants:
                                # X[i][j][len(dictionary) + 5] = 1
                                di_syllable.extend([0, 1, 0, 1, 0, 1])
                            else:
                                di_syllable.extend([0, 0, 0, 0, 0, 0])
                                # di_syllable.append(di_let)
            syllable_letters_translator.append(di_syllable)
        syllable_letters_translator = np.array(syllable_letters_translator, dtype=int)
        return syllable_letters_translator

    @staticmethod
    def _get_accented_vowels():
        return [u'à', u'á', u'ä', u'é', u'ë', u'ì', u'í', u'î', u'ó', u'ô', u'ö', u'ú', u'ü']

    @staticmethod
    def _get_unaccented_vowels():
        return [u'a', u'e', u'i', u'o', u'u']

    @staticmethod
    def _get_voiced_consonants():
        return ['m', 'n', 'v', 'l', 'r', 'j', 'y', 'w']

    @staticmethod
    def _get_resonant_silent_consonants():
        return ['b', 'd', 'z', 'ž', 'g']

    @staticmethod
    def _get_nonresonant_silent_consonants():
        return ['p', 't', 's', 'š', 'č', 'k', 'f', 'h', 'c']

    @staticmethod
    def _create_slovene_feature_dictionary():
        # old: http://nl.ijs.si/ME/Vault/V3/msd/html/
        # new: http://nl.ijs.si/ME/V4/msd/html/
        # changes: http://nl.ijs.si/jos/msd/html-en/msd.diffs.html
        return [[21,
                 'P',
                 ['p', 's'],
                 ['n', 'p', 's'],
                 ['m', 'z', 's'],
                 ['e', 'd', 'm'],
                 ['i', 'r', 'd', 't', 'm', 'o'],
                 ['-', 'n', 'd']],
                [3, 'V', ['p', 'd']],
                [1, 'M'],
                [21,
                 'K',
                 ['b'],
                 ['-', 'g', 'v', 'd'],
                 ['m', 'z', 's'],
                 ['e', 'd', 'm'],
                 ['i', 'r', 'd', 't', 'm', 'o'],
                 ['-', 'n', 'd']],
                [17,
                 'S',
                 ['o'],
                 ['m', 'z', 's'],
                 ['e', 'd', 'm'],
                 ['i', 'r', 'd', 't', 'm', 'o'],
                 ['-', 'n', 'd']],
                [40,
                 'Z',
                 ['o', 's', 'k', 'z', 'p', 'c', 'v', 'n', 'l'],
                 ['-', 'p', 'd', 't'],
                 ['-', 'm', 'z', 's'],
                 ['-', 'e', 'd', 'm'],
                 ['-', 'i', 'r', 'd', 't', 'm', 'o'],
                 ['-', 'e', 'd', 'm'],
                 ['-', 'm', 'z', 's'],
                 ['-', 'k', 'z']],
                [1, 'L'],
                [5, 'R', ['s'], ['n', 'r', 's']],
                [7, 'D', ['-', 'r', 'd', 't', 'm', 'o']],
                [24,
                 'G',
                 ['g'],
                 ['-'],
                 ['n', 'm', 'd', 's', 'p', 'g'],
                 ['-', 'p', 'd', 't'],
                 ['-', 'e', 'm', 'd'],
                 ['-', 'm', 'z', 's'],
                 ['-', 'n', 'd']]
                ]

    @staticmethod
    def _create_feature_dictionary():
        # old: http://nl.ijs.si/ME/Vault/V3/msd/html/
        # new: http://nl.ijs.si/ME/V4/msd/html/
        # changes: http://nl.ijs.si/jos/msd/html-en/msd.diffs.html
        return [[21,
                 'A',
                 ['g', 's'],
                 ['p', 'c', 's'],
                 ['m', 'f', 'n'],
                 ['s', 'd', 'p'],
                 ['n', 'g', 'd', 'a', 'l', 'i'],
                 ['-', 'n', 'y']],
                [3, 'C', ['c', 's']],
                [1, 'I'],
                [21,
                 'M',
                 ['l'],
                 ['-', 'c', 'o', 's'],
                 ['m', 'f', 'n'],
                 ['s', 'd', 'p'],
                 ['n', 'g', 'd', 'a', 'l', 'i'],
                 ['-', 'n', 'y']],
                [17,
                 'N',
                 ['c'],
                 ['m', 'f', 'n'],
                 ['s', 'd', 'p'],
                 ['n', 'g', 'd', 'a', 'l', 'i'],
                 ['-', 'n', 'y']],
                [40,
                 'P',
                 ['p', 's', 'd', 'r', 'x', 'g', 'q', 'i', 'z'],
                 ['-', '1', '2', '3'],
                 ['-', 'm', 'f', 'n'],
                 ['-', 's', 'd', 'p'],
                 ['-', 'n', 'g', 'd', 'a', 'l', 'i'],
                 ['-', 's', 'd', 'p'],
                 ['-', 'm', 'f', 'n'],
                 ['-', 'y', 'b']],
                [1, 'Q'],
                [5, 'R', ['g'], ['p', 'c', 's']],
                [7, 'S', ['-', 'g', 'd', 'a', 'l', 'i']],
                [24,
                 'V',
                 ['m'],
                 ['-'],
                 ['n', 'u', 'p', 'r', 'f', 'c'],
                 ['-', '1', '2', '3'],
                 ['-', 's', 'p', 'd'],
                 ['-', 'm', 'f', 'n'],
                 ['-', 'n', 'y']]
                ]

    # Decoders for inputs and outputs
    @staticmethod
    def decode_x(word_encoded, dictionary):
        word = ''
        for el in word_encoded:
            i = 0
            for num in el:
                if num == 1:
                    word += dictionary[i]
                    break
                i += 1
        return word

    @staticmethod
    def decode_x_other_features(feature_dictionary, x_other_features):
        final_word = []
        for word in x_other_features:
            final_word = []
            i = 0
            for z in range(len(feature_dictionary)):
                for j in range(1, len(feature_dictionary[z])):
                    if j == 1:
                        if word[i] == 1:
                            final_word.append(feature_dictionary[z][1])
                        i += 1
                    else:
                        for k in range(len(feature_dictionary[z][j])):
                            if word[i] == 1:
                                final_word.append(feature_dictionary[z][j][k])
                            i += 1
                            # print(u''.join(final_word))
        return u''.join(final_word)

    @staticmethod
    def decode_y(y):
        i = 0
        res = []
        for el in y:
            if el >= 0.5:
                res.append(i)
            i += 1
        return res

    def test_accuracy(self, predictions, x, x_other_features, y, dictionary, feature_dictionary, vowels, syllable_dictionary=None,
                      threshold=0.4999955, patterns=None):
        errors = []
        num_of_pred = len(predictions)
        num_of_correct_pred = 0

        # wrong_patterns = 0
        # wrong_pattern_prediction = 0
        for i in range(predictions.shape[0]):
            correct_prediction = True

            round_predictions = np.zeros(predictions[i].shape)
            for j in range(len(y[i])):
                if predictions[i][j] < threshold:
                    round_predictions[j] = 0.0
                else:
                    round_predictions[j] = 1.0
                if (predictions[i][j] < threshold and y[i][j] == 1.0) or (predictions[i][j] >= threshold and y[i][j] == 0.0):
                    correct_prediction = False

            # in_pattern = False
            # if patterns is not None:
            #     test_predictions = copy(predictions[i])
            #     l = self.get_word_length(x[i])
            #     round_predictions = np.zeros(test_predictions.shape)
            #     for j in range(len(y[i])):
            #         if test_predictions[j] < threshold:
            #             round_predictions[j] = 0.0
            #         else:
            #             round_predictions[j] = 1.0
            #
            #     in_pattern = False
            #     for pattern in patterns[l]:
            #         if (pattern == round_predictions).all():
            #             in_pattern = True
            #     if not in_pattern:
            #         wrong_patterns += 1
            #
            # for j in range(len(y[i])):
            #     if (predictions[i][j] < threshold and y[i][j] == 1.0) or (predictions[i][j] >= threshold and y[i][j] == 0.0):
            #         correct_prediction = False
            #
            # if not in_pattern and not correct_prediction:
            #     wrong_pattern_prediction += 1
            # if (np.around(predictions[i]) == y[i]).all():
            if correct_prediction:
                num_of_correct_pred += 1
            else:
                if self._input_type == 'l':
                    decoded_x = self.decode_x(x[i], dictionary)
                else:
                    decoded_x = self.decode_syllable_x(x[i], syllable_dictionary)
                if self._bidirectional_basic_input:
                    decoded_x = decoded_x[:int(len(decoded_x)/2)]
                errors.append([i,
                               decoded_x,
                               self.decode_x_other_features(feature_dictionary, [x_other_features[i]]),
                               self.assign_stress_locations(decoded_x, round_predictions, vowels, syllables=self._input_type != 'l'),
                               self.assign_stress_locations(decoded_x, y[i], vowels, syllables=self._input_type != 'l')
                               ])

        # print(wrong_patterns)
        # print(wrong_pattern_prediction)
        return (num_of_correct_pred / float(num_of_pred)) * 100, errors

    # def get_word_length(self, x_el):
    #     i = 0
    #     for el in x_el:
    #         if el == 0:
    #             return i
    #         i += 1
    #     return 10

    @staticmethod
    def decode_syllable_x(word_encoded, syllable_dictionary):
        word = []
        for i in range(len(word_encoded)):
            word.append(syllable_dictionary[word_encoded[i]])
        return ''.join(word[::-1])

    def assign_stress_locations(self, word, y, vowels, syllables=False):
        if not syllables:
            word_list = list(word)
        else:
            if self._reverse_inputs:
                word_list = list(word)[::-1]
            else:
                word_list = list(word)
        vowel_num = 0
        for i in range(len(word_list)):
            if self._is_vowel(word_list, i, vowels):
                if word_list[i] == 'a' and y[vowel_num] == 1:
                    word_list[i] = 'á'
                elif word_list[i] == 'e' and y[vowel_num] == 1:
                    word_list[i] = 'é'
                elif word_list[i] == 'i' and y[vowel_num] == 1:
                    word_list[i] = 'í'
                elif word_list[i] == 'o' and y[vowel_num] == 1:
                    word_list[i] = 'ó'
                elif word_list[i] == 'u' and y[vowel_num] == 1:
                    word_list[i] = 'ú'
                elif word_list[i] == 'r' and y[vowel_num] == 1:
                    word_list[i] = 'ŕ'
                elif word_list[i] == 'A' and y[vowel_num] == 1:
                    word_list[i] = 'Á'
                elif word_list[i] == 'E' and y[vowel_num] == 1:
                    word_list[i] = 'É'
                elif word_list[i] == 'I' and y[vowel_num] == 1:
                    word_list[i] = 'Í'
                elif word_list[i] == 'O' and y[vowel_num] == 1:
                    word_list[i] = 'Ó'
                elif word_list[i] == 'U' and y[vowel_num] == 1:
                    word_list[i] = 'Ú'
                elif word_list[i] == 'R' and y[vowel_num] == 1:
                    word_list[i] = 'Ŕ'
                vowel_num += 1
        if not syllables:
            return ''.join(word_list)
        else:
            return ''.join(word_list[::-1])

    def test_type_accuracy(self, predictions, x, x_other_features, y, dictionary, feature_dictionary, vowels, accented_vowels,
                      syllable_dictionary=None):
        errors = []
        num_of_pred = len(predictions)
        num_of_correct_pred = 0
        num_of_correct_pred_words = 0
        accentuation_index = 0
        eye = np.eye(len(accented_vowels), dtype=int)
        for i in range(len(y)):
            correct_prediction = True
            if self._input_type == 'l':
                decoded_x = self.decode_x(x[i], dictionary)
            else:
                decoded_x = self.decode_syllable_x(x[i], syllable_dictionary)
            wrong_word = decoded_x
            correct_word = decoded_x

            for j in range(len(y[i])):
                if y[i][j] > 0:
                    # ERROR AS IT IS CALCULATED
                    # arounded_predictions = np.around(predictions[accentuation_index]).astype(int)

                    # MAX ELEMENT ONLY
                    # arounded_predictions = np.zeros(len(predictions[accentuation_index]))
                    # arounded_predictions[np.argmax(predictions[accentuation_index]).astype(int)] = 1

                    # MAX ELEMENT AMONGT POSSIBLE ONES
                    # if i == 313:
                    #    print(decoded_x)
                    stressed_letter = self.get_accentuated_letter(decoded_x, j, vowels, syllables=self._input_type != 'l')
                    possible_places = np.zeros(len(predictions[accentuation_index]))
                    if stressed_letter == 'r':
                        possible_places[0] = 1
                    elif stressed_letter == 'a':
                        possible_places[1] = 1
                        possible_places[2] = 1
                    elif stressed_letter == 'e':
                        possible_places[3] = 1
                        possible_places[4] = 1
                        possible_places[5] = 1
                    elif stressed_letter == 'i':
                        possible_places[6] = 1
                        possible_places[7] = 1
                    elif stressed_letter == 'o':
                        possible_places[8] = 1
                        possible_places[9] = 1
                        possible_places[10] = 1
                    elif stressed_letter == 'u':
                        possible_places[11] = 1
                        possible_places[12] = 1
                    possible_predictions = predictions[accentuation_index] * possible_places

                    arounded_predictions = np.zeros(len(predictions[accentuation_index]), dtype=int)
                    arounded_predictions[np.argmax(possible_predictions).astype(int)] = 1

                    wrong_word = self.assign_word_accentuation_type(wrong_word, j, arounded_predictions, vowels, accented_vowels,
                                                               syllables=self._input_type != 'l', debug=i == 313)
                    correct_word = self.assign_word_accentuation_type(correct_word, j, eye[int(y[i][j])], vowels, accented_vowels,
                                                                 syllables=self._input_type != 'l', debug=i == 313)

                    if (eye[int(y[i][j])] == arounded_predictions).all():
                        num_of_correct_pred += 1
                    else:
                        correct_prediction = False

                    accentuation_index += 1

            if correct_prediction:
                num_of_correct_pred_words += 1
            else:
                if self._input_type == 'l':
                    errors.append([i,
                                   decoded_x[::-1],
                                   self.decode_x_other_features(feature_dictionary, [x_other_features[i]]),
                                   wrong_word[::-1],
                                   correct_word[::-1]
                                   ])
                else:
                    errors.append([i,
                                   decoded_x,
                                   self.decode_x_other_features(feature_dictionary, [x_other_features[i]]),
                                   wrong_word,
                                   correct_word
                                   ])
        print(num_of_pred)
        print(len(y))
        print(num_of_correct_pred_words)
        print(len(errors))
        print(num_of_correct_pred_words + len(errors))
        return (num_of_correct_pred / float(num_of_pred)) * 100, (num_of_correct_pred_words / float(len(y))) * 100, errors

    def get_accentuated_letter(self, word, location, vowels, syllables=False, debug=False):
        # print(location)
        vowel_index = 0
        word_list = list(word)
        if not syllables:
            word_list = list(word)
        else:
            word_list = list(word[::-1])
        for i in range(len(word_list)):
            if self._is_vowel(word_list, i, vowels):
                if location == vowel_index:
                    return word_list[i]
                vowel_index += 1

    def assign_word_accentuation_type(self, word, location, y, vowels, accented_vowels, syllables=False, debug=False):
        vowel_index = 0
        if not syllables:
            word_list = list(word)
        else:
            word_list = list(word[::-1])
        for i in range(len(word_list)):
            if self._is_vowel(word_list, i, vowels + accented_vowels):
                if location == vowel_index:
                    if len(np.where(y == 1)[0]) == 1:
                        word_list[i] = accented_vowels[np.where(y == 1)[0][0]]
                vowel_index += 1
        if not syllables:
            return ''.join(word_list)
        else:
            return ''.join(word_list[::-1])

    def assign_stress_types(self, predictions, word, y, vowels, accented_vowels):
        words = []
        accentuation_index = 0
        for i in range(len(y)):
            wrong_word = word[i][::-1]

            for j in range(len(y[i])):
                if y[i][j] > 0:
                    stressed_letter = self.get_accentuated_letter(word[i][::-1], j, vowels, syllables=self._input_type != 'l')
                    possible_places = np.zeros(len(predictions[accentuation_index]))
                    if stressed_letter == 'r':
                        possible_places[0] = 1
                    elif stressed_letter == 'a':
                        possible_places[1] = 1
                        possible_places[2] = 1
                    elif stressed_letter == 'e':
                        possible_places[3] = 1
                        possible_places[4] = 1
                        possible_places[5] = 1
                    elif stressed_letter == 'i':
                        possible_places[6] = 1
                        possible_places[7] = 1
                    elif stressed_letter == 'o':
                        possible_places[8] = 1
                        possible_places[9] = 1
                        possible_places[10] = 1
                    elif stressed_letter == 'u':
                        possible_places[11] = 1
                        possible_places[12] = 1
                    possible_predictions = predictions[accentuation_index] * possible_places

                    arounded_predictions = np.zeros(len(predictions[accentuation_index]), dtype=int)

                    arounded_predictions[np.argmax(possible_predictions).astype(int)] = 1

                    if np.max(possible_predictions) != 0:
                        wrong_word = self.assign_word_accentuation_type(wrong_word, j, arounded_predictions, vowels, accented_vowels,
                                                                    syllables=self._input_type != 'l', debug=i == 313)

                    accentuation_index += 1

            words.append(wrong_word[::-1])
        return words

    @staticmethod
    def load_location_models(letters_path, syllables_path, syllabled_letters_path):
        ############################ LOCATION ########################
        nn_output_dim = 10

        conv_input_shape = (23, 36)
        othr_input = (140,)

        conv_input = Input(shape=conv_input_shape, name='conv_input')
        x_conv = Conv1D(115, (3), padding='same', activation='relu')(conv_input)
        x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv)
        x_conv = MaxPooling1D(pool_size=2)(x_conv)
        x_conv = Flatten()(x_conv)

        othr_input = Input(shape=othr_input, name='othr_input')

        x = concatenate([x_conv, othr_input])
        # x = Dense(1024, input_dim=(516 + 256), activation='relu')(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(nn_output_dim, activation='sigmoid')(x)

        letter_location_model = Model(inputs=[conv_input, othr_input], outputs=x)
        opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        letter_location_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ])


        letter_location_model.load_weights(letters_path)

        ##############################################################
        # num_examples = len(data.x_train)  # training set size
        nn_output_dim = 10

        conv_input_shape = (10, 5168)
        othr_input = (140,)
        conv_input = Input(shape=conv_input_shape, name='conv_input')

        # syllabled letters
        x_conv = Conv1D(200, (2), padding='same', activation='relu')(conv_input)
        x_conv = MaxPooling1D(pool_size=2)(x_conv)
        x_conv = Flatten()(x_conv)

        othr_input = Input(shape=othr_input, name='othr_input')

        x = concatenate([x_conv, othr_input])
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(nn_output_dim, activation='sigmoid')(x)

        syllable_location_model = Model(inputs=[conv_input, othr_input], outputs=x)
        opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        syllable_location_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ])
        syllable_location_model.load_weights(syllables_path)


        #####################################################
        conv_input_shape = (10, 252)

        othr_input = (140,)

        conv_input = Input(shape=conv_input_shape, name='conv_input')

        # syllabled letters
        x_conv = Conv1D(200, (2), padding='same', activation='relu')(conv_input)
        x_conv = MaxPooling1D(pool_size=2)(x_conv)
        x_conv = Flatten()(x_conv)

        othr_input = Input(shape=othr_input, name='othr_input')

        x = concatenate([x_conv, othr_input])
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(nn_output_dim, activation='sigmoid')(x)

        syllabled_letters_location_model = Model(inputs=[conv_input, othr_input], outputs=x)
        opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        syllabled_letters_location_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ])
        syllabled_letters_location_model.load_weights(syllabled_letters_path)

        return letter_location_model, syllable_location_model, syllabled_letters_location_model

    @staticmethod
    def load_type_models(letters_path, syllables_path, syllabled_letters_path):
        nn_output_dim = 13

        # letters
        conv_input_shape = (23, 36)
        othr_input = (150,)
        conv_input = Input(shape=conv_input_shape, name='conv_input')
        # letters
        x_conv = Conv1D(115, (3), padding='same', activation='relu')(conv_input)
        x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv)

        # syllabled letters
        x_conv = MaxPooling1D(pool_size=2)(x_conv)
        x_conv = Flatten()(x_conv)

        othr_input = Input(shape=othr_input, name='othr_input')
        x = concatenate([x_conv, othr_input])
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(nn_output_dim, activation='sigmoid')(x)

        letter_type_model = Model(inputs=[conv_input, othr_input], outputs=x)
        opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        letter_type_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ])
        letter_type_model.load_weights(letters_path)

        conv_input_shape = (10, 5168)
        othr_input = (150,)
        conv_input = Input(shape=conv_input_shape, name='conv_input')

        x_conv = Conv1D(200, (2), padding='same', activation='relu')(conv_input)
        x_conv = MaxPooling1D(pool_size=2)(x_conv)
        x_conv = Flatten()(x_conv)

        othr_input = Input(shape=othr_input, name='othr_input')
        x = concatenate([x_conv, othr_input])
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(nn_output_dim, activation='sigmoid')(x)

        syllable_type_model = Model(inputs=[conv_input, othr_input], outputs=x)
        opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        syllable_type_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ])
        syllable_type_model.load_weights(syllables_path)

        # syllabled letters
        conv_input_shape = (10, 252)
        othr_input = (150,)
        conv_input = Input(shape=conv_input_shape, name='conv_input')

        x_conv = Conv1D(200, (2), padding='same', activation='relu')(conv_input)
        x_conv = MaxPooling1D(pool_size=2)(x_conv)
        x_conv = Flatten()(x_conv)

        othr_input = Input(shape=othr_input, name='othr_input')
        x = concatenate([x_conv, othr_input])
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(nn_output_dim, activation='sigmoid')(x)

        syllabled_letter_type_model = Model(inputs=[conv_input, othr_input], outputs=x)
        opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        syllabled_letter_type_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ])
        syllabled_letter_type_model.load_weights(syllabled_letters_path)

        return letter_type_model, syllable_type_model, syllabled_letter_type_model

    @staticmethod
    def get_ensemble_location_predictions(input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model,
                                          letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,
                                          dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary):
        batch_size = 16
        # print(tagged_input_words[pos])

        data = Data('l', shuffle_all_inputs=False, convert_multext=False)
        x, x_other_features, fake_y = data._generate_x_and_y(dictionary, max_word, max_num_vowels, input_words, vowels, accented_vowels,
                                                             feature_dictionary, 'who cares')
        generator = data._letter_generator(x, x_other_features, fake_y, batch_size, accented_vowels)
        letter_location_predictions = letter_location_model.predict_generator(generator, len(x) / (batch_size))

        data = Data('s', shuffle_all_inputs=False, convert_multext=False)
        x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
                                                             accented_vowels, feature_dictionary, 'who cares')
        eye = np.eye(len(syllable_dictionary), dtype=int)
        generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, eye, accented_vowels)
        syllable_location_predictions = syllable_location_model.predict_generator(generator, len(x) / (batch_size))

        data = Data('sl', shuffle_all_inputs=False, convert_multext=False)
        x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
                                                             accented_vowels, feature_dictionary, 'who cares')
        max_syllable = data._get_max_syllable(syllable_dictionary)
        syllable_letters_translator = data._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels)
        generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, syllable_letters_translator, accented_vowels)
        syllabled_letters_location_predictions = syllabled_letters_location_model.predict_generator(generator, len(x) / (batch_size))

        ############## CORRECT ORDER INPUT ##############
        data = Data('l', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False)
        x, x_other_features, fake_y = data._generate_x_and_y(dictionary, max_word, max_num_vowels, input_words, vowels, accented_vowels,
                                                             feature_dictionary, 'who cares')
        generator = data._letter_generator(x, x_other_features, fake_y, batch_size, accented_vowels)
        letter_location_co_predictions = letter_location_co_model.predict_generator(generator, len(x) / (batch_size))

        letter_location_co_predictions = data.reverse_predictions(letter_location_co_predictions, input_words, vowels)

        data = Data('s', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False)
        x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
                                                                 accented_vowels, feature_dictionary, 'who cares')
        eye = np.eye(len(syllable_dictionary), dtype=int)
        generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, eye, accented_vowels)
        syllable_location_co_predictions = syllable_location_co_model.predict_generator(generator, len(x) / (batch_size))

        syllable_location_co_predictions = data.reverse_predictions(syllable_location_co_predictions, input_words, vowels)

        data = Data('sl', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False)
        x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
                                                             accented_vowels, feature_dictionary, 'who cares')
        max_syllable = data._get_max_syllable(syllable_dictionary)
        syllable_letters_translator = data._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels)
        generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, syllable_letters_translator, accented_vowels)
        syllabled_letters_location_co_predictions = syllabled_letters_location_co_model.predict_generator(generator, len(x) / (batch_size))

        syllabled_letters_location_co_predictions = data.reverse_predictions(syllabled_letters_location_co_predictions, input_words, vowels)

        return np.mean(np.array([letter_location_predictions, syllable_location_predictions, syllabled_letters_location_predictions,
                                 letter_location_co_predictions, syllable_location_co_predictions, syllabled_letters_location_co_predictions]), axis=0)

    def count_syllables(self, word, vowels):
        j = 0
        num_vowels = 0
        for j in range(len(word)):
            if self._is_vowel(word, j, vowels):
                num_vowels += 1
        return num_vowels

    def reverse_predictions(self, predictions, words, vowels):
        new_predictions = np.zeros(predictions.shape, dtype='float32')
        for i in range(len(predictions)):
            word_len = self.count_syllables(words[i][0], vowels)
            if word_len > 10:
                word_len = 10
            for k in range(word_len):
                new_predictions[i][k] += predictions[i][word_len - 1 - k]

        return new_predictions

    @staticmethod
    def get_ensemble_type_predictions(input_words, location_y, letter_type_model, syllable_type_model, syllabled_letter_type_model,
                                      letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
                                      dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary):
        batch_size = 16
        y_array = np.asarray(location_y)
        accentuation_length = (y_array > 0).sum()

        data = Data('l', shuffle_all_inputs=False, accent_classification=True, convert_multext=False)
        x, x_other_features, fake_y = data._generate_x_and_y(dictionary, max_word, max_num_vowels, input_words, vowels, accented_vowels,
                                                             feature_dictionary, 'who cares')
        generator = data._letter_generator(x, x_other_features, location_y, batch_size, accented_vowels)
        letter_type_predictions = letter_type_model.predict_generator(generator, accentuation_length / (batch_size))

        data = Data('s', shuffle_all_inputs=False, accent_classification=True, convert_multext=False)
        x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
                                                             accented_vowels, feature_dictionary, 'who cares')
        eye = np.eye(len(syllable_dictionary), dtype=int)
        generator = data._syllable_generator(x, x_other_features, location_y, batch_size, eye, accented_vowels)
        syllable_type_predictions = syllable_type_model.predict_generator(generator, accentuation_length / (batch_size))

        data = Data('sl', shuffle_all_inputs=False, accent_classification=True, convert_multext=False)
        x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
                                                             accented_vowels, feature_dictionary, 'who cares')
        max_syllable = data._get_max_syllable(syllable_dictionary)
        syllable_letters_translator = data._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels)
        generator = data._syllable_generator(x, x_other_features, location_y, batch_size, syllable_letters_translator, accented_vowels)
        syllabled_letter_type_predictions = syllabled_letter_type_model.predict_generator(generator, accentuation_length / batch_size)

        ############## CORRECT ORDER INPUT ##############
        location_y = data.reverse_predictions(location_y, input_words, vowels)

        data = Data('l', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False)
        x, x_other_features, fake_y = data._generate_x_and_y(dictionary, max_word, max_num_vowels, input_words, vowels, accented_vowels,
                                                             feature_dictionary, 'who cares')
        generator = data._letter_generator(x, x_other_features, location_y, batch_size, accented_vowels)
        letter_type_co_predictions = letter_type_co_model.predict_generator(generator, accentuation_length / (batch_size))

        data.reorder_correct_direction_inputs(letter_type_co_predictions, location_y)

        data = Data('s', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False)
        x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
                                                             accented_vowels, feature_dictionary, 'who cares')
        eye = np.eye(len(syllable_dictionary), dtype=int)
        generator = data._syllable_generator(x, x_other_features, location_y, batch_size, eye, accented_vowels)
        syllable_type_co_predictions = syllable_type_co_model.predict_generator(generator, accentuation_length / (batch_size))

        data.reorder_correct_direction_inputs(syllable_type_co_predictions, location_y)

        data = Data('sl', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False)
        x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels,
                                                             accented_vowels, feature_dictionary, 'who cares')
        max_syllable = data._get_max_syllable(syllable_dictionary)
        syllable_letters_translator = data._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels)
        generator = data._syllable_generator(x, x_other_features, location_y, batch_size, syllable_letters_translator, accented_vowels)
        syllabled_letter_type_co_predictions = syllabled_letter_type_co_model.predict_generator(generator, accentuation_length / batch_size)

        data.reorder_correct_direction_inputs(syllabled_letter_type_co_predictions, location_y)

        return np.mean(np.array([letter_type_predictions, syllable_type_predictions, syllabled_letter_type_predictions,
                                 letter_type_co_predictions, syllable_type_co_predictions, syllabled_letter_type_co_predictions]), axis=0)

    def reorder_correct_direction_inputs(self, predictions, y):
        pred_i = 0
        for i in range(len(y)):
            num_accented_syllables = 0
            for el in y[i]:
                if el > 0:
                    num_accented_syllables += 1
            if num_accented_syllables > 1:
                min_i = pred_i
                max_i = pred_i + num_accented_syllables - 1
                while (max_i > min_i):
                    min_pred = copy(predictions[min_i])
                    max_pred = copy(predictions[max_i])
                    predictions[min_i] = max_pred
                    predictions[max_i] = min_pred
                    min_i += 1
                    max_i -= 1
            pred_i += num_accented_syllables

    def assign_location_stress(self, word, locations, vowels):
            #     word = list(word)
        word_list = list(word)
        for loc in locations:
            vowel_num = 0
            # if loc == 0:
            #    return word
            for i in range(len(word_list)):
                if self._is_vowel(word_list, i, vowels):
                    if word_list[i] == 'a' and vowel_num == loc:
                        word_list[i] = 'á'
                    elif word_list[i] == 'e' and vowel_num == loc:
                        word_list[i] = 'é'
                    elif word_list[i] == 'i' and vowel_num == loc:
                        word_list[i] = 'í'
                    elif word_list[i] == 'o' and vowel_num == loc:
                        word_list[i] = 'ó'
                    elif word_list[i] == 'u' and vowel_num == loc:
                        word_list[i] = 'ú'
                    elif word_list[i] == 'r' and vowel_num == loc:
                        word_list[i] = 'ŕ'
                    elif word_list[i] == 'A' and vowel_num == loc:
                        word_list[i] = 'Á'
                    elif word_list[i] == 'E' and vowel_num == loc:
                        word_list[i] = 'É'
                    elif word_list[i] == 'I' and vowel_num == loc:
                        word_list[i] = 'Í'
                    elif word_list[i] == 'O' and vowel_num == loc:
                        word_list[i] = 'Ó'
                    elif word_list[i] == 'U' and vowel_num == loc:
                        word_list[i] = 'Ú'
                    elif word_list[i] == 'R' and vowel_num == loc:
                        word_list[i] = 'Ŕ'
                    vowel_num += 1
                    #     print(word_list)
        return ''.join(word_list)

    def accentuate_word(self, input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model,
                        letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,
                        letter_type_model, syllable_type_model, syllabled_letter_type_model,
                        letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
                        dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary):
        predictions = self.get_ensemble_location_predictions(input_words, letter_location_model, syllable_location_model,
                                                             syllabled_letters_location_model,
                                                             letter_location_co_model, syllable_location_co_model,
                                                             syllabled_letters_location_co_model,
                                                             dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary,
                                                             syllable_dictionary)
        #print(predictions)
        if 'A' not in vowels:
            vowels.extend(['A', 'E', 'I', 'O', 'U'])
        location_accented_words = [self.assign_location_stress(input_words[i][0][::-1], self.decode_y(predictions[i]), vowels)[::-1] for i in
                          range(len(input_words))]

        location_y = np.around(predictions)
        type_predictions = self.get_ensemble_type_predictions(input_words, location_y, letter_type_model, syllable_type_model,
                                                              syllabled_letter_type_model,
                                                              letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,
                                                              dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary,
                                                              syllable_dictionary)

        only_words = [el[0] for el in input_words]
        accented_words = self.assign_stress_types(type_predictions, only_words, location_y, vowels, accented_vowels)

        return location_accented_words, accented_words

    def tag_words(self, reldi_location, original_location):
        # generates text with every word in new line
        with open(original_location) as f:
            original_text = f.readlines()
        original_text = ''.join(original_text)
        # print(original_text)
        text_with_whitespaces = original_text.replace(',', ' ,').replace('.', ' .').replace('\n', ' ').replace("\"", " \" ").replace(":",
                                                                                                                                     " :").replace(
            "ć", "č").replace('–', '-')
        # print('-------------------------------------------------')
        text_with_whitespaces = '\n'.join(text_with_whitespaces.split())
        text_with_whitespaces += '\n\n'
        # print(text_with_whitespaces)
        with open('.words_with_whitespaces', "w") as text_file:
            text_file.write(text_with_whitespaces)

        # generates text with PoS tags
        import subprocess

        myinput = open('.words_with_whitespaces', 'r')
        myoutput = open('.word_tags', 'w')
        # print(myinput.readlines())
        python3_command = reldi_location + "/tagger.py sl"  # launch your python2 script using bash

        process = subprocess.run(python3_command.split(), stdin=myinput, stdout=myoutput)

        # generates interesting words
        pointless_words = ['.', ',', '\"', ':', '-']
        with open('.word_tags', "r") as text_file:
            tagged_input_words = []
            for x in text_file.readlines()[:-1]:
                splited_line = x[:-1].split('\t')
                if splited_line[0] not in pointless_words and not any(char.isdigit() for char in splited_line[0]):
                    tagged_input_words.append([splited_line[0].lower(), '', splited_line[1], splited_line[0].lower()])

        remove(".words_with_whitespaces")
        remove(".word_tags")
        return tagged_input_words, original_text

    def create_connected_text_locations(self, tagged_input_words, original_text, predictions, vowels):
        if 'A' not in vowels:
            vowels.extend(['A', 'E', 'I', 'O', 'U'])
        accented_words = [self.assign_location_stress(tagged_input_words[i][0][::-1], self.decode_y(predictions[i]), vowels)[::-1] for i in
                          range(len(tagged_input_words))]

        # print(accented_words[:20])
        # print(tagged_input_words[:20])

        words_and_accetuation_loc = [[tagged_input_words[i][0], self.decode_y(predictions[i])] for i in range(len(tagged_input_words))]

        original_text_list = list(original_text)
        original_text_lowercase = original_text.lower()
        end_pos = 0
        for word in words_and_accetuation_loc:
            posit = original_text_lowercase.find(word[0], end_pos)
            if posit != -1:
                start_pos = posit
                end_pos = start_pos + len(word[0])

            original_text_list[start_pos:end_pos] = list(
                self.assign_location_stress(''.join(original_text_list[start_pos:end_pos][::-1]), word[1], vowels)[::-1])

        return ''.join(original_text_list)

    def create_connected_text_accented(self, tagged_input_words, original_text, type_predictions, location_y, vowels, accented_vowels):

        input_words = [el[0] for el in tagged_input_words]
        words = self.assign_stress_types(type_predictions, input_words, location_y, vowels, accented_vowels)

        # print(original_text)

        original_text_list = list(original_text)
        original_text_lowercase = original_text.lower()
        end_pos = 0
        for i in range(len(words)):
            posit = original_text_lowercase.find(input_words[i], end_pos)
            if posit != -1:
                start_pos = posit
                end_pos = start_pos + len(words[i])

                orig_word = original_text_list[start_pos:end_pos]
                new_word = list(words[i])
                for j in range(len(orig_word)):
                    if orig_word[j].isupper():
                        new_word[j] = new_word[j].upper()

                original_text_list[start_pos:end_pos] = new_word

        return ''.join(original_text_list)
# def count_vowels(content, vowels):
#     num_all_vowels = 0
#     for el in content:
#         for m in range(len(el[0])):
#             if is_vowel(list(el[0]), m, vowels):
#                 num_all_vowels += 1
#     return num_all_vowels


# metric for calculation of correct results
# test with:
# print(mean_pred(y_validate[pos], predictions[pos]).eval())
# print(mean_pred(np.array([[ 0.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
#                           [ 0.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]]),
#                 np.array([[ 0.,  0.51,  0.,  0.51,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
#                           [ 0.,  0.92,  0.,  0.51,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])).eval())
def actual_accuracy(y_true, y_pred):
    return K.mean(K.equal(K.mean(K.equal(K.round(y_true), K.round(y_pred)), axis=-1), 1.0))


def convert_to_correct_stress(w):
    w = w.replace('ì', 'ê')
    w = w.replace('à', 'ŕ')
    w = w.replace('ä', 'à')
    w = w.replace('ë', 'è')
    w = w.replace('ě', 'ê')
    w = w.replace('î', 'ì')
    w = w.replace('ö', 'ò')
    w = w.replace('ü', 'ù')

    return w