From d09b5a8293d08647649cc950617ffdf9c92e3f55 Mon Sep 17 00:00:00 2001
From: Luka <krsnik.luka92@gmail.com>
Date: Wed, 22 Aug 2018 08:46:51 +0200
Subject: [PATCH] Added fix for lacking stressed data in tab form

---
 sloleks_accentuation2.py         |  2 +-
 sloleks_accentuation2_tab2xml.py |  2 ++
 text2SAMPA.py                    | 13 +++++++------
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/sloleks_accentuation2.py b/sloleks_accentuation2.py
index cd00c6d..ebf3f9a 100755
--- a/sloleks_accentuation2.py
+++ b/sloleks_accentuation2.py
@@ -47,7 +47,7 @@ print('Commencing accentuator!')
 rate = 100000
 start_timer = time.time()
 with open("data/new_sloleks/new_sloleks2.tab", "a") as myfile:
-    for index in range(300000, len(new_content), rate):
+    for index in range(0, len(new_content), rate):
         if index+rate >= len(new_content):
             words = [[el[0], '', el[2], el[0]] for el in new_content][index:len(new_content)]
         else:
diff --git a/sloleks_accentuation2_tab2xml.py b/sloleks_accentuation2_tab2xml.py
index 7ae8ab8..36a8612 100755
--- a/sloleks_accentuation2_tab2xml.py
+++ b/sloleks_accentuation2_tab2xml.py
@@ -136,6 +136,8 @@ with open("data/new_sloleks/final_sloleks2.xml", "ab") as myfile:
                         new_element = etree.Element('feat')
                         new_element.attrib['att'] = 'SAMPA'
                         print(accentuated_word)
+                        if lemma == 'Barrymore':
+                            print("HERE!")
                         new_element.attrib['val'] = convert_to_SAMPA(accentuated_word)
                         wf.append(new_element)
 
diff --git a/text2SAMPA.py b/text2SAMPA.py
index 1a3000a..1d18030 100755
--- a/text2SAMPA.py
+++ b/text2SAMPA.py
@@ -5,7 +5,8 @@ import sys
 vowels = ['à', 'á', 'ä', 'é', 'ë', 'ì', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü', 'a', 'e', 'i', 'o', 'u', 'O', 'E']
 
 def syllable_stressed(syllable):
-    stressed_letters = [u'ŕ', u'á', u'ä', u'é', u'ë', u'ě', u'í', u'î', u'ó', u'ô', u'ö', u'ú', u'ü']
+    # stressed_letters = [u'ŕ', u'á', u'ä', u'é', u'ë', u'ě', u'í', u'î', u'ó', u'ô', u'ö', u'ú', u'ü']
+    stressed_letters = [u'ŕ', u'á', u'à', u'é', u'è', u'ê', u'í', u'ì', u'ó', u'ô', u'ò', u'ú', u'ù']
     for letter in syllable:
         if letter in stressed_letters:
             return True
@@ -116,13 +117,13 @@ def convert_to_SAMPA(word):
             word[i] = 'tS'
         elif word[i] == 'á':
             word[i] = 'a:'
-        elif word[i] == 'ä':
+        elif word[i] == 'à':
             word[i] = 'a'
         elif word[i] == 'é':
             word[i] = 'e:'
-        elif word[i] == 'ë':
+        elif word[i] == 'è':
             word[i] = 'E'
-        elif word[i] == 'ě':
+        elif word[i] == 'ê':
             word[i] = 'E:'
         elif word[i] == 'í':
             word[i] = 'i:'
@@ -132,11 +133,11 @@ def convert_to_SAMPA(word):
             word[i] = 'o:'
         elif word[i] == 'ô':
             word[i] = 'O:'
-        elif word[i] == 'ö':
+        elif word[i] == 'ò':
             word[i] = 'O'
         elif word[i] == 'ú':
             word[i] = 'u:'
-        elif word[i] == 'ü':
+        elif word[i] == 'ù':
             word[i] = 'u'
         elif word[i] == 'ŕ':
             word[i] = '@r'