From 36c8880bfebe3902231272bb3b0035ef608f808c Mon Sep 17 00:00:00 2001 From: lkrsnik Date: Wed, 22 Aug 2018 08:01:06 +0200 Subject: [PATCH] Added some fixes in converting sloleks to the one with stressed words --- sloleks_accentuation2.py | 2 +- sloleks_accentuation2_tab2xml.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sloleks_accentuation2.py b/sloleks_accentuation2.py index 09f22f1..cd00c6d 100755 --- a/sloleks_accentuation2.py +++ b/sloleks_accentuation2.py @@ -46,7 +46,7 @@ print('Commencing accentuator!') rate = 100000 start_timer = time.time() -with open("data/new_sloleks/new_sloleks.tab", "a") as myfile: +with open("data/new_sloleks/new_sloleks2.tab", "a") as myfile: for index in range(300000, len(new_content), rate): if index+rate >= len(new_content): words = [[el[0], '', el[2], el[0]] for el in new_content][index:len(new_content)] diff --git a/sloleks_accentuation2_tab2xml.py b/sloleks_accentuation2_tab2xml.py index c139982..7ae8ab8 100755 --- a/sloleks_accentuation2_tab2xml.py +++ b/sloleks_accentuation2_tab2xml.py @@ -52,14 +52,14 @@ word_index = 0 # done_lexical_entries = 33522 data = Data('s', shuffle_all_inputs=False) -accentuated_content = data._read_content('data/new_sloleks/new_sloleks.tab') +accentuated_content = data._read_content('data/new_sloleks/final_sloleks2.tab') start_timer = time.time() print('Copy initialization complete') -with open("data/new_sloleks/final_sloleks.xml", "ab") as myfile: +with open("data/new_sloleks/final_sloleks2.xml", "ab") as myfile: # myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab') - for event, element in etree.iterparse('data/new_sloleks/final_sloleks_read.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True): + for event, element in etree.iterparse('data/Sloleks_v1.2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True): # for event, element in etree.iterparse('data/Sloleks_v1.2.xml', tag="LexicalEntry", encoding="UTF-8", remove_blank_text=True): # if word_glob_num >= word_limit: # myfile2.close() @@ -135,6 +135,7 @@ with open("data/new_sloleks/final_sloleks.xml", "ab") as myfile: new_element = etree.Element('feat') new_element.attrib['att'] = 'SAMPA' + print(accentuated_word) new_element.attrib['val'] = convert_to_SAMPA(accentuated_word) wf.append(new_element)