stress_asignment/text2SAMPA.py

from copy import copy
import sys


vowels = ['à', 'á', 'ä', 'é', 'ë', 'ì', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü', 'a', 'e', 'i', 'o', 'u', 'O', 'E']

def syllable_stressed(syllable):
    stressed_letters = [u'ŕ', u'á', u'ä', u'é', u'ë', u'ě', u'í', u'î', u'ó', u'ô', u'ö', u'ú', u'ü']
    for letter in syllable:
        if letter in stressed_letters:
            return True
    return False

def is_vowel(word_list, position, vowels):
    if word_list[position] in vowels:
        return True
    if (word_list[position] == u'r' or word_list[position] == u'R') and (position - 1 < 0 or word_list[position - 1] not in vowels) and (
                        position + 1 >= len(word_list) or word_list[position + 1] not in vowels):
        return True
    return False

def get_voiced_consonants():
    return ['m', 'n', 'v', 'l', 'r', 'j', 'y', 'w', 'F', 'N']

def get_resonant_silent_consonants():
    return ['b', 'd', 'z', 'ž', 'g']

def get_nonresonant_silent_consonants():
    return ['p', 't', 's', 'š', 'č', 'k', 'f', 'h', 'c', 'x']

def split_consonants(consonants):
    voiced_consonants = get_voiced_consonants()
    resonant_silent_consonants = get_resonant_silent_consonants()
    unresonant_silent_consonants = get_nonresonant_silent_consonants()
    if len(consonants) == 0:
        return [''], ['']
    elif len(consonants) == 1:
        return [''], consonants
    else:
        split_options = []
        for i in range(len(consonants) - 1):
            if consonants[i] == '-' or consonants[i] == '_':
                split_options.append([i, -1])
            elif consonants[i] == consonants[i + 1]:
                split_options.append([i, 0])
            elif consonants[i] in voiced_consonants:
                if consonants[i + 1] in resonant_silent_consonants or consonants[i + 1] in unresonant_silent_consonants:
                    split_options.append([i, 2])
            elif consonants[i] in resonant_silent_consonants:
                if consonants[i + 1] in resonant_silent_consonants:
                    split_options.append([i, 1])
                elif consonants[i + 1] in unresonant_silent_consonants:
                    split_options.append([i, 3])
            elif consonants[i] in unresonant_silent_consonants:
                if consonants[i + 1] in resonant_silent_consonants:
                    split_options.append([i, 4])

        if split_options == []:
            return [''], consonants
        else:
            split = min(split_options, key=lambda x: x[1])
            return consonants[:split[0] + 1], consonants[split[0] + 1:]

def create_syllables(word, vowels):
    word_list = list(word)
    consonants = []
    syllables = []
    for i in range(len(word_list)):
        if is_vowel(word_list, i, vowels):
            if syllables == []:
                consonants.append(word_list[i])
                syllables.append(''.join(consonants))
            else:
                left_consonants, right_consonants = split_consonants(list(''.join(consonants).lower()))
                syllables[-1] += ''.join(left_consonants)
                right_consonants.append(word_list[i])
                syllables.append(''.join(right_consonants))
            consonants = []
        else:
            consonants.append(word_list[i])
    if len(syllables) < 1:
        return word
    syllables[-1] += ''.join(consonants)

    return syllables


def convert_to_SAMPA(word):
    word = word.lower()
    syllables = create_syllables(word, vowels)
    letters_in_stressed_syllable = [False] * len(word)
    # print(syllables)
    l = 0
    for syllable in syllables:
        if syllable_stressed(syllable):
            for i in range(len(syllable)):
                letters_in_stressed_syllable[l + i] = True
        # print(l)
        l += len(syllable)
    previous_letter = ''
    word = list(word)
    for i in range(len(word)):
        if word[i] == 'e':
            word[i] = 'E'
        elif word[i] == 'o':
            word[i] = 'O'
        elif word[i] == 'š':
            word[i] = 'S'
        elif word[i] == 'ž':
            word[i] = 'Z'
        elif word[i] == 'h':
            word[i] = 'x'
        elif word[i] == 'c':
            word[i] = 'ts'
        elif word[i] == 'č':
            word[i] = 'tS'
        elif word[i] == 'á':
            word[i] = 'a:'
        elif word[i] == 'ä':
            word[i] = 'a'
        elif word[i] == 'é':
            word[i] = 'e:'
        elif word[i] == 'ë':
            word[i] = 'E'
        elif word[i] == 'ě':
            word[i] = 'E:'
        elif word[i] == 'í':
            word[i] = 'i:'
        elif word[i] == 'î':
            word[i] = 'i'
        elif word[i] == 'ó':
            word[i] = 'o:'
        elif word[i] == 'ô':
            word[i] = 'O:'
        elif word[i] == 'ö':
            word[i] = 'O'
        elif word[i] == 'ú':
            word[i] = 'u:'
        elif word[i] == 'ü':
            word[i] = 'u'
        elif word[i] == 'ŕ':
            word[i] = '@r'

    if letters_in_stressed_syllable[0]:
        word[0] = '\"' + word[0]
    for i in range(1, len(letters_in_stressed_syllable)):
        if not letters_in_stressed_syllable[i - 1] and letters_in_stressed_syllable[i]:
            word[i] = '\"' + word[i]
            # if letters_in_stressed_syllable[i - 1] and not letters_in_stressed_syllable[i]:
            #    word[i - 1] = word[i - 1] + ':'
    # if letters_in_stressed_syllable[-1]:
    #    word[-1] = word[-1] + ':'

    word = list(''.join(word))

    test_word = ''.join(word)
    test_word = test_word.replace('"', '').replace(':', '')
    if len(test_word) <= 1:
        return ''.join(word)

    previous_letter_i = -1
    letter_i = 0
    next_letter_i = 1
    if word[0] == '\"':
        letter_i = 1
        if len(word) > 2 and word[2] == ':':
            if len(word) > 3:
                next_letter_i = 3
            else:
                #if word[next_letter_i] == 'l':
                #    word[next_letter_i] = 'l\''
                #elif word[next_letter_i] == 'n':
                #    word[next_letter_i] = 'n\''
                return ''.join(word)
        else:
            if len(word) > 2:
                next_letter_i = 2
            else:
                return ''.join(word)
    elif len(word) > 1 and word[1] == '\"':
        next_letter_i = 2
    # {('m', 'f'): 'F'}

    new_word = copy(word)
    while True:
        if word[letter_i] == 'm' and (word[next_letter_i] == 'f' or word[next_letter_i] == 'v'):
            new_word[letter_i] = 'F'
        elif word[letter_i] == 'n' and (word[next_letter_i] == 'k' or word[next_letter_i] == 'g' or word[next_letter_i] == 'x'):
            new_word[letter_i] = 'N'
        elif word[letter_i] == 'n' and (word[next_letter_i] == 'f' or word[next_letter_i] == 'v'):
            new_word[letter_i] = 'F'
        elif word[letter_i] == 'n' and not word[next_letter_i] in vowels and letter_i == len(word) - 2:
            new_word[letter_i] = 'n\''
        elif word[letter_i] == 'l' and not word[next_letter_i] in vowels and letter_i == len(word) - 2:
            new_word[letter_i] = 'l\''
        elif previous_letter_i >= 0 and word[letter_i] == 'v' and not word[previous_letter_i] in vowels and word[
            next_letter_i] in get_voiced_consonants():
            new_word[letter_i] = 'w'
        elif previous_letter_i >= 0 and word[letter_i] == 'v' and not word[previous_letter_i] in vowels and word[
            next_letter_i] in get_nonresonant_silent_consonants():
            new_word[letter_i] = 'W'
        elif word[letter_i] == 'p' and word[next_letter_i] == 'm':
            new_word[letter_i] = 'p_n'
        elif word[letter_i] == 'p' and (word[next_letter_i] == 'f' or word[next_letter_i] == 'v'):
            new_word[letter_i] = 'p_f'
        elif word[letter_i] == 'b' and word[next_letter_i] == 'm':
            new_word[letter_i] = 'b_n'
        elif word[letter_i] == 'b' and (word[next_letter_i] == 'f' or word[next_letter_i] == 'v'):
            new_word[letter_i] = 'b_f'
        elif word[letter_i] == 't' and word[next_letter_i] == 'l':
            new_word[letter_i] = 't_l'
        elif word[letter_i] == 't' and word[next_letter_i] == 'n':
            new_word[letter_i] = 't_n'
        elif word[letter_i] == 'd' and word[next_letter_i] == 'l':
            new_word[letter_i] = 'd_l'
        elif word[letter_i] == 'd' and word[next_letter_i] == 'n':
            new_word[letter_i] = 'd_n'

        if len(word) > next_letter_i + 1:
            if word[next_letter_i + 1] == ':' or word[next_letter_i + 1] == '\"':
                if len(word) > next_letter_i + 2:
                    previous_letter_i = letter_i
                    letter_i = next_letter_i
                    next_letter_i = next_letter_i + 2
                else:
                    #if word[next_letter_i] == 'l':
                    #    new_word[next_letter_i] = 'l\''
                    #elif word[next_letter_i] == 'n':
                    #    new_word[next_letter_i] = 'n\''
                    return ''.join(new_word)
            else:
                previous_letter_i = letter_i
                letter_i = next_letter_i
                next_letter_i = next_letter_i + 1
        else:
            #if word[next_letter_i] == 'l':
            #    new_word[next_letter_i] = 'l\''
            #elif word[next_letter_i] == 'n':
            #    new_word[next_letter_i] = 'n\''
            return ''.join(new_word)
            # print(word)

#result = convert_to_SAMPA(sys.argv[1])
#final_result = result.replace('\"', '\'')
#print(final_result)
#return final_result
Added text to speech script and text to SAMPA 2018-05-31 09:22:42 +00:00			`from copy import copy`
			`import sys`


			`vowels = ['à', 'á', 'ä', 'é', 'ë', 'ì', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü', 'a', 'e', 'i', 'o', 'u', 'O', 'E']`

			`def syllable_stressed(syllable):`
			`stressed_letters = [u'ŕ', u'á', u'ä', u'é', u'ë', u'ě', u'í', u'î', u'ó', u'ô', u'ö', u'ú', u'ü']`
			`for letter in syllable:`
			`if letter in stressed_letters:`
			`return True`
			`return False`

			`def is_vowel(word_list, position, vowels):`
			`if word_list[position] in vowels:`
			`return True`
			`if (word_list[position] == u'r' or word_list[position] == u'R') and (position - 1 < 0 or word_list[position - 1] not in vowels) and (`
			`position + 1 >= len(word_list) or word_list[position + 1] not in vowels):`
			`return True`
			`return False`

			`def get_voiced_consonants():`
			`return ['m', 'n', 'v', 'l', 'r', 'j', 'y', 'w', 'F', 'N']`

			`def get_resonant_silent_consonants():`
			`return ['b', 'd', 'z', 'ž', 'g']`

			`def get_nonresonant_silent_consonants():`
			`return ['p', 't', 's', 'š', 'č', 'k', 'f', 'h', 'c', 'x']`

			`def split_consonants(consonants):`
			`voiced_consonants = get_voiced_consonants()`
			`resonant_silent_consonants = get_resonant_silent_consonants()`
			`unresonant_silent_consonants = get_nonresonant_silent_consonants()`
			`if len(consonants) == 0:`
			`return [''], ['']`
			`elif len(consonants) == 1:`
			`return [''], consonants`
			`else:`
			`split_options = []`
			`for i in range(len(consonants) - 1):`
			`if consonants[i] == '-' or consonants[i] == '_':`
			`split_options.append([i, -1])`
			`elif consonants[i] == consonants[i + 1]:`
			`split_options.append([i, 0])`
			`elif consonants[i] in voiced_consonants:`
			`if consonants[i + 1] in resonant_silent_consonants or consonants[i + 1] in unresonant_silent_consonants:`
			`split_options.append([i, 2])`
			`elif consonants[i] in resonant_silent_consonants:`
			`if consonants[i + 1] in resonant_silent_consonants:`
			`split_options.append([i, 1])`
			`elif consonants[i + 1] in unresonant_silent_consonants:`
			`split_options.append([i, 3])`
			`elif consonants[i] in unresonant_silent_consonants:`
			`if consonants[i + 1] in resonant_silent_consonants:`
			`split_options.append([i, 4])`

			`if split_options == []:`
			`return [''], consonants`
			`else:`
			`split = min(split_options, key=lambda x: x[1])`
			`return consonants[:split[0] + 1], consonants[split[0] + 1:]`

			`def create_syllables(word, vowels):`
			`word_list = list(word)`
			`consonants = []`
			`syllables = []`
			`for i in range(len(word_list)):`
			`if is_vowel(word_list, i, vowels):`
			`if syllables == []:`
			`consonants.append(word_list[i])`
			`syllables.append(''.join(consonants))`
			`else:`
			`left_consonants, right_consonants = split_consonants(list(''.join(consonants).lower()))`
			`syllables[-1] += ''.join(left_consonants)`
			`right_consonants.append(word_list[i])`
			`syllables.append(''.join(right_consonants))`
			`consonants = []`
			`else:`
			`consonants.append(word_list[i])`
			`if len(syllables) < 1:`
			`return word`
			`syllables[-1] += ''.join(consonants)`

			`return syllables`


			`def convert_to_SAMPA(word):`
Added managing of ''' and ':' 2018-08-17 09:53:25 +00:00			`word = word.lower()`
Added text to speech script and text to SAMPA 2018-05-31 09:22:42 +00:00			`syllables = create_syllables(word, vowels)`
			`letters_in_stressed_syllable = [False] * len(word)`
			`# print(syllables)`
			`l = 0`
			`for syllable in syllables:`
			`if syllable_stressed(syllable):`
			`for i in range(len(syllable)):`
			`letters_in_stressed_syllable[l + i] = True`
			`# print(l)`
			`l += len(syllable)`
			`previous_letter = ''`
			`word = list(word)`
			`for i in range(len(word)):`
			`if word[i] == 'e':`
			`word[i] = 'E'`
			`elif word[i] == 'o':`
			`word[i] = 'O'`
			`elif word[i] == 'š':`
			`word[i] = 'S'`
			`elif word[i] == 'ž':`
			`word[i] = 'Z'`
			`elif word[i] == 'h':`
			`word[i] = 'x'`
			`elif word[i] == 'c':`
			`word[i] = 'ts'`
			`elif word[i] == 'č':`
			`word[i] = 'tS'`
			`elif word[i] == 'á':`
			`word[i] = 'a:'`
			`elif word[i] == 'ä':`
			`word[i] = 'a'`
			`elif word[i] == 'é':`
			`word[i] = 'e:'`
			`elif word[i] == 'ë':`
			`word[i] = 'E'`
			`elif word[i] == 'ě':`
			`word[i] = 'E:'`
			`elif word[i] == 'í':`
			`word[i] = 'i:'`
			`elif word[i] == 'î':`
			`word[i] = 'i'`
			`elif word[i] == 'ó':`
			`word[i] = 'o:'`
			`elif word[i] == 'ô':`
			`word[i] = 'O:'`
			`elif word[i] == 'ö':`
			`word[i] = 'O'`
			`elif word[i] == 'ú':`
			`word[i] = 'u:'`
			`elif word[i] == 'ü':`
			`word[i] = 'u'`
			`elif word[i] == 'ŕ':`
			`word[i] = '@r'`

			`if letters_in_stressed_syllable[0]:`
			`word[0] = '\"' + word[0]`
			`for i in range(1, len(letters_in_stressed_syllable)):`
			`if not letters_in_stressed_syllable[i - 1] and letters_in_stressed_syllable[i]:`
			`word[i] = '\"' + word[i]`
			`# if letters_in_stressed_syllable[i - 1] and not letters_in_stressed_syllable[i]:`
			`# word[i - 1] = word[i - 1] + ':'`
			`# if letters_in_stressed_syllable[-1]:`
			`# word[-1] = word[-1] + ':'`

			`word = list(''.join(word))`

Added managing of ''' and ':' 2018-08-17 09:53:25 +00:00			`test_word = ''.join(word)`
			`test_word = test_word.replace('"', '').replace(':', '')`
			`if len(test_word) <= 1:`
			`return ''.join(word)`

Added text to speech script and text to SAMPA 2018-05-31 09:22:42 +00:00			`previous_letter_i = -1`
			`letter_i = 0`
			`next_letter_i = 1`
			`if word[0] == '\"':`
			`letter_i = 1`
Added " error fix 2018-05-31 13:53:35 +00:00			`if len(word) > 2 and word[2] == ':':`
Added text to speech script and text to SAMPA 2018-05-31 09:22:42 +00:00			`if len(word) > 3:`
			`next_letter_i = 3`
			`else:`
			`#if word[next_letter_i] == 'l':`
			`# word[next_letter_i] = 'l\''`
			`#elif word[next_letter_i] == 'n':`
			`# word[next_letter_i] = 'n\''`
			`return ''.join(word)`
			`else:`
Added " error fix 2018-05-31 13:53:35 +00:00			`if len(word) > 2:`
			`next_letter_i = 2`
			`else:`
			`return ''.join(word)`
Added text to speech script and text to SAMPA 2018-05-31 09:22:42 +00:00			`elif len(word) > 1 and word[1] == '\"':`
			`next_letter_i = 2`
			`# {('m', 'f'): 'F'}`

			`new_word = copy(word)`
			`while True:`
			`if word[letter_i] == 'm' and (word[next_letter_i] == 'f' or word[next_letter_i] == 'v'):`
			`new_word[letter_i] = 'F'`
			`elif word[letter_i] == 'n' and (word[next_letter_i] == 'k' or word[next_letter_i] == 'g' or word[next_letter_i] == 'x'):`
			`new_word[letter_i] = 'N'`
			`elif word[letter_i] == 'n' and (word[next_letter_i] == 'f' or word[next_letter_i] == 'v'):`
			`new_word[letter_i] = 'F'`
			`elif word[letter_i] == 'n' and not word[next_letter_i] in vowels and letter_i == len(word) - 2:`
			`new_word[letter_i] = 'n\''`
			`elif word[letter_i] == 'l' and not word[next_letter_i] in vowels and letter_i == len(word) - 2:`
			`new_word[letter_i] = 'l\''`
			`elif previous_letter_i >= 0 and word[letter_i] == 'v' and not word[previous_letter_i] in vowels and word[`
			`next_letter_i] in get_voiced_consonants():`
			`new_word[letter_i] = 'w'`
			`elif previous_letter_i >= 0 and word[letter_i] == 'v' and not word[previous_letter_i] in vowels and word[`
			`next_letter_i] in get_nonresonant_silent_consonants():`
			`new_word[letter_i] = 'W'`
			`elif word[letter_i] == 'p' and word[next_letter_i] == 'm':`
			`new_word[letter_i] = 'p_n'`
			`elif word[letter_i] == 'p' and (word[next_letter_i] == 'f' or word[next_letter_i] == 'v'):`
			`new_word[letter_i] = 'p_f'`
			`elif word[letter_i] == 'b' and word[next_letter_i] == 'm':`
			`new_word[letter_i] = 'b_n'`
			`elif word[letter_i] == 'b' and (word[next_letter_i] == 'f' or word[next_letter_i] == 'v'):`
			`new_word[letter_i] = 'b_f'`
			`elif word[letter_i] == 't' and word[next_letter_i] == 'l':`
			`new_word[letter_i] = 't_l'`
			`elif word[letter_i] == 't' and word[next_letter_i] == 'n':`
			`new_word[letter_i] = 't_n'`
			`elif word[letter_i] == 'd' and word[next_letter_i] == 'l':`
			`new_word[letter_i] = 'd_l'`
			`elif word[letter_i] == 'd' and word[next_letter_i] == 'n':`
			`new_word[letter_i] = 'd_n'`

			`if len(word) > next_letter_i + 1:`
			`if word[next_letter_i + 1] == ':' or word[next_letter_i + 1] == '\"':`
			`if len(word) > next_letter_i + 2:`
			`previous_letter_i = letter_i`
			`letter_i = next_letter_i`
			`next_letter_i = next_letter_i + 2`
			`else:`
			`#if word[next_letter_i] == 'l':`
			`# new_word[next_letter_i] = 'l\''`
			`#elif word[next_letter_i] == 'n':`
			`# new_word[next_letter_i] = 'n\''`
			`return ''.join(new_word)`
			`else:`
			`previous_letter_i = letter_i`
			`letter_i = next_letter_i`
			`next_letter_i = next_letter_i + 1`
			`else:`
			`#if word[next_letter_i] == 'l':`
			`# new_word[next_letter_i] = 'l\''`
			`#elif word[next_letter_i] == 'n':`
			`# new_word[next_letter_i] = 'n\''`
			`return ''.join(new_word)`
			`# print(word)`

results 2018-05-31 13:54:24 +00:00			`#result = convert_to_SAMPA(sys.argv[1])`
			`#final_result = result.replace('\"', '\'')`
			`#print(final_result)`
Added text to speech script and text to SAMPA 2018-05-31 09:22:42 +00:00			`#return final_result`