A couple of fixes
This commit is contained in:
parent
d4ea584fc4
commit
ad7d10563e
9
.gitignore
vendored
9
.gitignore
vendored
|
@ -98,5 +98,14 @@ grid_results/
|
||||||
.idea/
|
.idea/
|
||||||
cnn/word_accetuation/svm/data/
|
cnn/word_accetuation/svm/data/
|
||||||
postprocessing/data_merge.ipynb
|
postprocessing/data_merge.ipynb
|
||||||
|
data_merge.ipynb
|
||||||
postprocessing/data_merge.py
|
postprocessing/data_merge.py
|
||||||
|
data_merge.py
|
||||||
postprocessing/sp_data_merge.py
|
postprocessing/sp_data_merge.py
|
||||||
|
sp_data_merge.py
|
||||||
|
postprocessing/data_merge_tab2xml.py
|
||||||
|
data_merge_tab2xml.py
|
||||||
|
postprocessing/data_merge_analysis.py
|
||||||
|
data_merge_analysis.py
|
||||||
|
postprocessing/sp_sloleks_data_merge.py
|
||||||
|
sp_sloleks_data_merge.py
|
||||||
|
|
|
@ -140,9 +140,34 @@ with open("data/contextual_changes/accented_lemmas_final_sloleks2_small.xml", "a
|
||||||
or re.match(r'K.*', msd) or re.match(r'D.', msd) or re.match(r'L', msd) or re.match(r'M', msd) \
|
or re.match(r'K.*', msd) or re.match(r'D.', msd) or re.match(r'L', msd) or re.match(r'M', msd) \
|
||||||
or re.match(r'O', msd) or re.match(r'Z.*', msd) or re.match(r'V.', msd) or re.match(r'Rsr.', msd)\
|
or re.match(r'O', msd) or re.match(r'Z.*', msd) or re.match(r'V.', msd) or re.match(r'Rsr.', msd)\
|
||||||
or msd == "":
|
or msd == "":
|
||||||
|
|
||||||
|
# when lemma does not equal unstressed version of what is supposed to be lemma, try to find parts of the
|
||||||
|
# word that are equal and transfer stress to lemma (if possible)
|
||||||
if lemma != stressed2unstressed(stressed_lemma):
|
if lemma != stressed2unstressed(stressed_lemma):
|
||||||
|
identical_length = 0
|
||||||
|
# if lemma == 'Latkov':
|
||||||
|
# print('HERE')
|
||||||
|
for i in range(min(len(lemma), len(stressed2unstressed(stressed_lemma)))):
|
||||||
|
# a = list(lemma)
|
||||||
|
# b = list(stressed2unstressed(stressed_lemma))
|
||||||
|
identical_length += 1
|
||||||
|
if list(lemma)[i] != list(stressed2unstressed(stressed_lemma))[i]:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
for l in list(stressed_lemma[identical_length:]):
|
||||||
|
if l in accented_vowels:
|
||||||
|
# print(lemma)
|
||||||
|
# print(stressed2unstressed(stressed_lemma))
|
||||||
|
# print(stressed_lemma[identical_length:])
|
||||||
print(lemma + " : " + stressed_lemma + " - " + msd)
|
print(lemma + " : " + stressed_lemma + " - " + msd)
|
||||||
pass
|
stressed_lemma = stressed_lemma[:identical_length] + lemma[identical_length:]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# pass
|
||||||
|
# if lemma != stressed2unstressed(stressed_lemma):
|
||||||
|
# print(lemma + " : " + stressed_lemma + " - " + msd)
|
||||||
else:
|
else:
|
||||||
# print("Error2 - " + msd + " " + lemma + " - " + stressed_lemma)
|
# print("Error2 - " + msd + " " + lemma + " - " + stressed_lemma)
|
||||||
# print(lemma + " - " + msd)
|
# print(lemma + " - " + msd)
|
||||||
|
|
|
@ -1771,7 +1771,7 @@ def convert_to_correct_stress(w):
|
||||||
w = w.replace('à', 'ŕ')
|
w = w.replace('à', 'ŕ')
|
||||||
w = w.replace('ä', 'à')
|
w = w.replace('ä', 'à')
|
||||||
w = w.replace('ë', 'è')
|
w = w.replace('ë', 'è')
|
||||||
# cor_content[i][3] = cor_content[i][3].replace('ě', 'ê')
|
w = w.replace('ě', 'ê')
|
||||||
w = w.replace('î', 'ì')
|
w = w.replace('î', 'ì')
|
||||||
w = w.replace('ö', 'ò')
|
w = w.replace('ö', 'ò')
|
||||||
w = w.replace('ü', 'ù')
|
w = w.replace('ü', 'ù')
|
||||||
|
|
|
@ -136,8 +136,7 @@ with open("data/new_sloleks/final_sloleks2.xml", "ab") as myfile:
|
||||||
new_element = etree.Element('feat')
|
new_element = etree.Element('feat')
|
||||||
new_element.attrib['att'] = 'SAMPA'
|
new_element.attrib['att'] = 'SAMPA'
|
||||||
print(accentuated_word)
|
print(accentuated_word)
|
||||||
if lemma == 'Barrymore':
|
|
||||||
print("HERE!")
|
|
||||||
new_element.attrib['val'] = convert_to_SAMPA(accentuated_word)
|
new_element.attrib['val'] = convert_to_SAMPA(accentuated_word)
|
||||||
wf.append(new_element)
|
wf.append(new_element)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user