From 9f6e5b2752365a564a9dfc7570bd120789eee02b Mon Sep 17 00:00:00 2001 From: Luka Date: Sat, 14 Apr 2018 10:25:40 +0200 Subject: [PATCH] Accentuation on sloleks --- .idea/workspace.xml | 319 ++++++++++++++++--------------------- prepare_data.py | 196 ++++++++++++++++++++++- sloleks_accentuation2.py | 70 ++++++++ sloleks_accetuation.ipynb | 154 +++++++++++++++--- sloleks_accetuation2.ipynb | 263 ++++++++++++++++++++++++++++++ 5 files changed, 798 insertions(+), 204 deletions(-) create mode 100644 sloleks_accentuation2.py create mode 100644 sloleks_accetuation2.ipynb diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 6dae302..dc1ebcc 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,13 +2,15 @@ + + + - - - - + - + + + - bidirectional_basic_input - _bidirectional_basic_input - shuffeling - generate_data - _generate_inputs - content_shuffle_vector_path - content_shuffle_vector_location - _shuffle_all_inputs - _generator_instance - _x_letter_input - _generate_x_and_y - content - number_of_syllables - _create_syllables - index - x_sy - _letter_generator - translator - _bidirectional_architectural_input - print - _syllable_generator np.concatenate prepare_data assign_stress_locations test_accuracy test_acc test_set - reverse_inputs accent_classification test_type + get_word_length + input_words + content + codecs + data + load_model + accentuate_word + get_word_le + load_location_models + get_ensemble_location_predictions + reverse_inputs + _x_letter_input + reverse + print( + count_vowels + count + sylla + # word + accented_word + get_ensemble_type_predictions + rever + accentuate_wo @@ -337,6 +351,7 @@ @@ -361,7 +376,7 @@ - + @@ -374,63 +389,9 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + @@ -439,11 +400,11 @@ + - @@ -762,7 +723,6 @@ - @@ -878,21 +838,10 @@ - - - - - - - - - - - @@ -900,7 +849,6 @@ - @@ -928,7 +876,6 @@ - @@ -960,7 +907,6 @@ - @@ -968,7 +914,6 @@ - @@ -983,7 +928,6 @@ - @@ -991,37 +935,20 @@ - - - - - - - - - - + - - - - - - - - - + @@ -1029,7 +956,7 @@ - + @@ -1037,7 +964,7 @@ - + @@ -1051,74 +978,110 @@ - + - - + + - + - - + + - + - - + + + + + + + + + + + + + + + + + + - - + + - + - - + + - + - - + + - + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - diff --git a/prepare_data.py b/prepare_data.py index fae903d..7dd5216 100644 --- a/prepare_data.py +++ b/prepare_data.py @@ -9,6 +9,8 @@ import keras.backend as K import os.path import codecs +from copy import copy + from keras import optimizers from keras.models import Model from keras.layers import Dense, Dropout, Input @@ -968,16 +970,49 @@ class Data: return res def test_accuracy(self, predictions, x, x_other_features, y, dictionary, feature_dictionary, vowels, syllable_dictionary=None, - threshold=0.4999955): + threshold=0.4999955, patterns=None): errors = [] num_of_pred = len(predictions) num_of_correct_pred = 0 + + # wrong_patterns = 0 + # wrong_pattern_prediction = 0 for i in range(predictions.shape[0]): correct_prediction = True + + round_predictions = np.zeros(predictions[i].shape) for j in range(len(y[i])): + if predictions[i][j] < threshold: + round_predictions[j] = 0.0 + else: + round_predictions[j] = 1.0 if (predictions[i][j] < threshold and y[i][j] == 1.0) or (predictions[i][j] >= threshold and y[i][j] == 0.0): correct_prediction = False - break + + # in_pattern = False + # if patterns is not None: + # test_predictions = copy(predictions[i]) + # l = self.get_word_length(x[i]) + # round_predictions = np.zeros(test_predictions.shape) + # for j in range(len(y[i])): + # if test_predictions[j] < threshold: + # round_predictions[j] = 0.0 + # else: + # round_predictions[j] = 1.0 + # + # in_pattern = False + # for pattern in patterns[l]: + # if (pattern == round_predictions).all(): + # in_pattern = True + # if not in_pattern: + # wrong_patterns += 1 + # + # for j in range(len(y[i])): + # if (predictions[i][j] < threshold and y[i][j] == 1.0) or (predictions[i][j] >= threshold and y[i][j] == 0.0): + # correct_prediction = False + # + # if not in_pattern and not correct_prediction: + # wrong_pattern_prediction += 1 # if (np.around(predictions[i]) == y[i]).all(): if correct_prediction: num_of_correct_pred += 1 @@ -991,12 +1026,22 @@ class Data: errors.append([i, decoded_x, self.decode_x_other_features(feature_dictionary, [x_other_features[i]]), - self.assign_stress_locations(decoded_x, np.around(predictions[i]), vowels, syllables=self._input_type != 'l'), + self.assign_stress_locations(decoded_x, round_predictions, vowels, syllables=self._input_type != 'l'), self.assign_stress_locations(decoded_x, y[i], vowels, syllables=self._input_type != 'l') ]) + # print(wrong_patterns) + # print(wrong_pattern_prediction) return (num_of_correct_pred / float(num_of_pred)) * 100, errors + # def get_word_length(self, x_el): + # i = 0 + # for el in x_el: + # if el == 0: + # return i + # i += 1 + # return 10 + @staticmethod def decode_syllable_x(word_encoded, syllable_dictionary): word = [] @@ -1214,8 +1259,37 @@ class Data: @staticmethod def load_location_models(letters_path, syllables_path, syllabled_letters_path): ############################ LOCATION ######################## - letter_location_model = load_model(letters_path, custom_objects={'actual_accuracy': actual_accuracy}) + nn_output_dim = 10 + + conv_input_shape = (23, 36) + othr_input = (140,) + + conv_input = Input(shape=conv_input_shape, name='conv_input') + x_conv = Conv1D(115, (3), padding='same', activation='relu')(conv_input) + x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv) + x_conv = MaxPooling1D(pool_size=2)(x_conv) + x_conv = Flatten()(x_conv) + othr_input = Input(shape=othr_input, name='othr_input') + + x = concatenate([x_conv, othr_input]) + # x = Dense(1024, input_dim=(516 + 256), activation='relu')(x) + x = Dense(256, activation='relu')(x) + x = Dropout(0.3)(x) + x = Dense(256, activation='relu')(x) + x = Dropout(0.3)(x) + x = Dense(256, activation='relu')(x) + x = Dropout(0.3)(x) + x = Dense(nn_output_dim, activation='sigmoid')(x) + + letter_location_model = Model(inputs=[conv_input, othr_input], outputs=x) + opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08) + letter_location_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ]) + + + letter_location_model.load_weights(letters_path) + + ############################################################## # num_examples = len(data.x_train) # training set size nn_output_dim = 10 @@ -1244,7 +1318,10 @@ class Data: syllable_location_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accuracy, ]) syllable_location_model.load_weights(syllables_path) + + ##################################################### conv_input_shape = (10, 252) + othr_input = (140,) conv_input = Input(shape=conv_input_shape, name='conv_input') @@ -1354,6 +1431,7 @@ class Data: @staticmethod def get_ensemble_location_predictions(input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model, + letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model, dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary): batch_size = 16 # print(tagged_input_words[pos]) @@ -1379,11 +1457,59 @@ class Data: generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, syllable_letters_translator, accented_vowels) syllabled_letters_location_predictions = syllabled_letters_location_model.predict_generator(generator, len(x) / (batch_size)) - return np.mean(np.array([letter_location_predictions, syllable_location_predictions, syllabled_letters_location_predictions]), axis=0) + ############## CORRECT ORDER INPUT ############## + data = Data('l', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False) + x, x_other_features, fake_y = data._generate_x_and_y(dictionary, max_word, max_num_vowels, input_words, vowels, accented_vowels, + feature_dictionary, 'who cares') + generator = data._letter_generator(x, x_other_features, fake_y, batch_size, accented_vowels) + letter_location_co_predictions = letter_location_co_model.predict_generator(generator, len(x) / (batch_size)) + + letter_location_co_predictions = data.reverse_predictions(letter_location_co_predictions, input_words, vowels) + + data = Data('s', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False) + x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels, + accented_vowels, feature_dictionary, 'who cares') + eye = np.eye(len(syllable_dictionary), dtype=int) + generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, eye, accented_vowels) + syllable_location_co_predictions = syllable_location_co_model.predict_generator(generator, len(x) / (batch_size)) + + syllable_location_co_predictions = data.reverse_predictions(syllable_location_co_predictions, input_words, vowels) + + data = Data('sl', shuffle_all_inputs=False, convert_multext=False, reverse_inputs=False) + x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels, + accented_vowels, feature_dictionary, 'who cares') + max_syllable = data._get_max_syllable(syllable_dictionary) + syllable_letters_translator = data._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels) + generator = data._syllable_generator(x, x_other_features, fake_y, batch_size, syllable_letters_translator, accented_vowels) + syllabled_letters_location_co_predictions = syllabled_letters_location_co_model.predict_generator(generator, len(x) / (batch_size)) + + syllabled_letters_location_co_predictions = data.reverse_predictions(syllabled_letters_location_co_predictions, input_words, vowels) + + return np.mean(np.array([letter_location_predictions, syllable_location_predictions, syllabled_letters_location_predictions, + letter_location_co_predictions, syllable_location_co_predictions, syllabled_letters_location_co_predictions]), axis=0) + + def count_syllables(self, word, vowels): + j = 0 + num_vowels = 0 + for j in range(len(word)): + if self._is_vowel(word, j, vowels): + num_vowels += 1 + return num_vowels + + def reverse_predictions(self, predictions, words, vowels): + new_predictions = np.zeros(predictions.shape, dtype='float32') + for i in range(len(predictions)): + word_len = self.count_syllables(words[i][0], vowels) + + for k in range(word_len): + new_predictions[i][k] += predictions[i][word_len - 1 - k] + + return new_predictions @staticmethod def get_ensemble_type_predictions(input_words, location_y, letter_type_model, syllable_type_model, syllabled_letter_type_model, - dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary): + letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model, + dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary): batch_size = 16 y_array = np.asarray(location_y) accentuation_length = (y_array > 0).sum() @@ -1409,7 +1535,57 @@ class Data: generator = data._syllable_generator(x, x_other_features, location_y, batch_size, syllable_letters_translator, accented_vowels) syllabled_letter_type_predictions = syllabled_letter_type_model.predict_generator(generator, accentuation_length / batch_size) - return np.mean(np.array([letter_type_predictions, syllable_type_predictions, syllabled_letter_type_predictions]), axis=0) + ############## CORRECT ORDER INPUT ############## + location_y = data.reverse_predictions(location_y, input_words, vowels) + + data = Data('l', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False) + x, x_other_features, fake_y = data._generate_x_and_y(dictionary, max_word, max_num_vowels, input_words, vowels, accented_vowels, + feature_dictionary, 'who cares') + generator = data._letter_generator(x, x_other_features, location_y, batch_size, accented_vowels) + letter_type_co_predictions = letter_type_co_model.predict_generator(generator, accentuation_length / (batch_size)) + + data.reorder_correct_direction_inputs(letter_type_co_predictions, location_y) + + data = Data('s', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False) + x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels, + accented_vowels, feature_dictionary, 'who cares') + eye = np.eye(len(syllable_dictionary), dtype=int) + generator = data._syllable_generator(x, x_other_features, location_y, batch_size, eye, accented_vowels) + syllable_type_co_predictions = syllable_type_co_model.predict_generator(generator, accentuation_length / (batch_size)) + + data.reorder_correct_direction_inputs(syllable_type_co_predictions, location_y) + + data = Data('sl', shuffle_all_inputs=False, accent_classification=True, convert_multext=False, reverse_inputs=False) + x, x_other_features, fake_y = data._generate_x_and_y(syllable_dictionary, max_word, max_num_vowels, input_words, vowels, + accented_vowels, feature_dictionary, 'who cares') + max_syllable = data._get_max_syllable(syllable_dictionary) + syllable_letters_translator = data._create_syllable_letters_translator(max_syllable, syllable_dictionary, dictionary, vowels) + generator = data._syllable_generator(x, x_other_features, location_y, batch_size, syllable_letters_translator, accented_vowels) + syllabled_letter_type_co_predictions = syllabled_letter_type_co_model.predict_generator(generator, accentuation_length / batch_size) + + data.reorder_correct_direction_inputs(syllabled_letter_type_co_predictions, location_y) + + return np.mean(np.array([letter_type_predictions, syllable_type_predictions, syllabled_letter_type_predictions, + letter_type_co_predictions, syllable_type_co_predictions, syllabled_letter_type_co_predictions]), axis=0) + + def reorder_correct_direction_inputs(self, predictions, y): + pred_i = 0 + for i in range(len(y)): + num_accented_syllables = 0 + for el in y[i]: + if el > 0: + num_accented_syllables += 1 + if num_accented_syllables > 1: + min_i = pred_i + max_i = pred_i + num_accented_syllables - 1 + while (max_i > min_i): + min_pred = copy(predictions[min_i]) + max_pred = copy(predictions[max_i]) + predictions[min_i] = max_pred + predictions[max_i] = min_pred + min_i += 1 + max_i -= 1 + pred_i += num_accented_syllables def assign_location_stress(self, word, locations, vowels): # word = list(word) @@ -1449,12 +1625,17 @@ class Data: return ''.join(word_list) def accentuate_word(self, input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model, + letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model, letter_type_model, syllable_type_model, syllabled_letter_type_model, + letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model, dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary): predictions = self.get_ensemble_location_predictions(input_words, letter_location_model, syllable_location_model, syllabled_letters_location_model, + letter_location_co_model, syllable_location_co_model, + syllabled_letters_location_co_model, dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary) + #print(predictions) if 'A' not in vowels: vowels.extend(['A', 'E', 'I', 'O', 'U']) location_accented_words = [self.assign_location_stress(input_words[i][0][::-1], self.decode_y(predictions[i]), vowels)[::-1] for i in @@ -1463,6 +1644,7 @@ class Data: location_y = np.around(predictions) type_predictions = self.get_ensemble_type_predictions(input_words, location_y, letter_type_model, syllable_type_model, syllabled_letter_type_model, + letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model, dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary) diff --git a/sloleks_accentuation2.py b/sloleks_accentuation2.py new file mode 100644 index 0000000..d038c8b --- /dev/null +++ b/sloleks_accentuation2.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import numpy as np +from keras.models import load_model +import sys +import pickle +import time + +from prepare_data import * + +np.random.seed(7) + +data = Data('l', shuffle_all_inputs=False) +content = data._read_content('data/SlovarIJS_BESEDE_utf8.lex') +dictionary, max_word, max_num_vowels, vowels, accented_vowels = data._create_dict(content) +feature_dictionary = data._create_slovene_feature_dictionary() +syllable_dictionary = data._create_syllables_dictionary(content, vowels) +accented_vowels = ['ŕ', 'á', 'ä', 'é', 'ë', 'ě', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü'] + +data = Data('l', shuffle_all_inputs=False) +letter_location_model, syllable_location_model, syllabled_letters_location_model = data.load_location_models( + 'cnn/word_accetuation/cnn_dictionary/v5_3/20_final_epoch.h5', + 'cnn/word_accetuation/syllables/v3_3/20_final_epoch.h5', + 'cnn/word_accetuation/syllabled_letters/v3_3/20_final_epoch.h5') + +letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model = data.load_location_models( + 'cnn/word_accetuation/cnn_dictionary/v5_2/20_final_epoch.h5', + 'cnn/word_accetuation/syllables/v3_2/20_final_epoch.h5', + 'cnn/word_accetuation/syllabled_letters/v3_2/20_final_epoch.h5') + +letter_type_model, syllable_type_model, syllabled_letter_type_model = data.load_type_models( + 'cnn/accent_classification/letters/v3_1/20_final_epoch.h5', + 'cnn/accent_classification/syllables/v2_1/20_final_epoch.h5', + 'cnn/accent_classification/syllabled_letters/v2_1/20_final_epoch.h5') + +letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model = data.load_type_models( + 'cnn/accent_classification/letters/v3_0/20_final_epoch.h5', + 'cnn/accent_classification/syllables/v2_0/20_final_epoch.h5', + 'cnn/accent_classification/syllabled_letters/v2_0/20_final_epoch.h5') + +data = Data('s', shuffle_all_inputs=False) +new_content = data._read_content('data/sloleks-sl_v1.2.tbl') + +print('Commencing accentuator!') + +rate = 100000 +start_timer = time.time() +with open("data/new_sloleks/new_sloleks.tab", "a") as myfile: + for index in range(0, len(new_content), rate): + if index+rate >= len(new_content): + words = [[el[0], '', el[2], el[0]] for el in new_content][index:len(new_content)] + else: + words = [[el[0], '', el[2], el[0]] for el in new_content][index:index+rate] + data = Data('l', shuffle_all_inputs=False) + location_accented_words, accented_words = data.accentuate_word(words, letter_location_model, syllable_location_model, syllabled_letters_location_model, + letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model, + letter_type_model, syllable_type_model, syllabled_letter_type_model, + letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model, + dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary) + + res = '' + for i in range(index, index + len(words)): + res += new_content[i][0] + '\t' + new_content[i][1] + '\t' + new_content[i][2] + '\t' \ + + new_content[i][3][:-1] + '\t' + location_accented_words[i-index] + '\t' + accented_words[i-index] + '\n' + + print('Writing data from ' + str(index) + ' onward.') + end_timer = time.time() + print("Elapsed time: " + "{0:.2f}".format((end_timer - start_timer)/60.0) + " minutes") + myfile.write(res) diff --git a/sloleks_accetuation.ipynb b/sloleks_accetuation.ipynb index 752a333..7cefdc9 100644 --- a/sloleks_accetuation.ipynb +++ b/sloleks_accetuation.ipynb @@ -87,18 +87,6 @@ " i += 1" ] }, - { - "cell_type": "code", - "execution_count": 98, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "feature__en_dictionary = data._create_feature_dictionary()\n", - "feature__slo_dictionary = data._create_slovene_feature_dictionary()" - ] - }, { "cell_type": "code", "execution_count": 3, @@ -120,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": { "collapsed": true }, @@ -131,14 +119,14 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%run prepare_data.py\n", - "data = Data('l', shuffle_all_inputs=False)\n", + "data = Data('s', shuffle_all_inputs=False)\n", "location_accented_words, accented_words = data.accentuate_word(test_input, letter_location_model, syllable_location_model, syllabled_letters_location_model,\n", " letter_type_model, syllable_type_model, syllabled_letter_type_model,\n", " dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)" @@ -146,15 +134,15 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "['uradní', 'podatkí', 'policíje', 'kažéjo', 'ná', 'precéj', 'napeté', 'razmeré', 'v', 'piranském', 'zalivú', 'jé', 'danés', 'poročála', 'oddajá', 'dó', 'danés', 'sé', 'jé', 'zgodílo']\n", - "['uradnî', 'podatkî', 'policíje', 'kažëjo', 'ná', 'precëj', 'napetë', 'razmerë', 'v', 'piranskëm', 'zalivú', 'jë', 'danës', 'poročála', 'oddajá', 'dó', 'danës', 'së', 'jë', 'zgodílo']\n" + "['uradni', 'podatkí', 'policíje', 'kažéjo', 'ná', 'precéj', 'napeté', 'razmeré', 'v', 'piranském', 'zalivú', 'jé', 'danés', 'poročála', 'oddajá', 'dó', 'danés', 'sé', 'jé', 'zgodílo']\n", + "['uradni', 'pödatki', 'polícije', 'kažëjo', 'ná', 'prëcej', 'nápete', 'räzmere', 'v', 'pîranskem', 'zálivu', 'jë', 'dánes', 'poróčala', 'öddaja', 'dó', 'dánes', 'së', 'jë', 'zgodílo']\n" ] } ], @@ -225,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "metadata": { "collapsed": true }, @@ -508,6 +496,134 @@ " " ] }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'xml_words_generator' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mgen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxml_words_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data/Sloleks_v1.2_p2.xml'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mword_glob_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mword_limit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'xml_words_generator' is not defined" + ] + } + ], + "source": [ + "#Words proccesed: 650250\n", + "#Word indeks: 50023\n", + "#Word number: 50023\n", + "\n", + "from lxml import etree\n", + "import time\n", + "\n", + "gen = xml_words_generator('data/Sloleks_v1.2_p2.xml')\n", + "word_glob_num = 0\n", + "word_limit = 0\n", + "iter_num = 50000\n", + "word_index = 0\n", + "start_timer = time.time()\n", + "iter_index = 0\n", + "words = []\n", + "\n", + "lexical_entries_load_number = 0\n", + "lexical_entries_save_number = 0\n", + "\n", + "\n", + "# INSIDE\n", + "#word_glob_num = 1500686\n", + "word_glob_num = 1550705\n", + "\n", + "#word_limit = 1500686\n", + "word_limit = 1550705\n", + "\n", + "\n", + "iter_index = 31\n", + "\n", + "#done_lexical_entries = 33522\n", + "\n", + "with open(\"data/new_sloleks/new_sloleks.xml\", \"ab\") as myfile:\n", + " myfile2 = open('data/new_sloleks/pa' + str(iter_index) + '.xml', 'ab')\n", + " for event, element in etree.iterparse('data/Sloleks_v1.2_p2.xml', tag=\"LexicalEntry\", encoding=\"UTF-8\", remove_blank_text=True):\n", + " # LOAD NEW WORDS AND ACCENTUATE THEM\n", + " #print(\"HERE\")\n", + " \n", + "# if lexical_entries_save_number < done_lexical_entries:\n", + "# next(gen)\n", + "# #print(lexical_entries_save_number)\n", + "# lexical_entries_save_number += 1\n", + "# lexical_entries_load_number += 1\n", + "# continue\n", + " \n", + " if word_glob_num >= word_limit:\n", + " myfile2.close()\n", + " myfile2 = open('data/new_sloleks/p' + str(iter_index) + '.xml', 'ab')\n", + " iter_index += 1\n", + " print(\"Words proccesed: \" + str(word_glob_num))\n", + "\n", + " print(\"Word indeks: \" + str(word_index))\n", + " print(\"Word number: \" + str(len(words)))\n", + " \n", + " #print(\"lexical_entries_load_number: \" + str(lexical_entries_load_number))\n", + " #print(\"lexical_entries_save_number: \" + str(lexical_entries_save_number))\n", + "\n", + " end_timer = time.time()\n", + " print(\"Elapsed time: \" + \"{0:.2f}\".format((end_timer - start_timer)/60.0) + \" minutes\")\n", + "\n", + "\n", + " word_index = 0\n", + " words = []\n", + "\n", + " while len(words) < iter_num:\n", + " try:\n", + " words.extend(next(gen))\n", + " lexical_entries_load_number += 1\n", + " except:\n", + " break\n", + " #if word_glob_num > 1:\n", + " # break\n", + "\n", + " #problem_words = words\n", + " #break\n", + " data = Data('l', shuffle_all_inputs=False)\n", + " location_accented_words, accented_words = data.accentuate_word(words, letter_location_model, syllable_location_model, syllabled_letters_location_model,\n", + " letter_type_model, syllable_type_model, syllabled_letter_type_model,\n", + " dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)\n", + "\n", + " word_limit += len(words)\n", + " \n", + " \n", + " # READ DATA\n", + " for child in element:\n", + " if child.tag == 'WordForm':\n", + " msd = None\n", + " word = None\n", + " for wf in child:\n", + " if wf.tag == 'FormRepresentation':\n", + " new_element = etree.Element('feat')\n", + " new_element.attrib['att']='naglasna_mesta_oblike'\n", + " new_element.attrib['val']=location_accented_words[word_index]\n", + " wf.append(new_element)\n", + "\n", + " new_element = etree.Element('feat')\n", + " new_element.attrib['att']='naglašena_oblika'\n", + " new_element.attrib['val']=accented_words[word_index]\n", + " wf.append(new_element)\n", + " word_glob_num += 1\n", + " word_index += 1\n", + "\n", + " # print(etree.tostring(element, encoding=\"UTF-8\"))\n", + " myfile2.write(etree.tostring(element, encoding=\"UTF-8\", pretty_print=True))\n", + " myfile.write(etree.tostring(element, encoding=\"UTF-8\", pretty_print=True))\n", + " element.clear()\n", + " lexical_entries_save_number += 1\n", + " " + ] + }, { "cell_type": "code", "execution_count": 6, diff --git a/sloleks_accetuation2.ipynb b/sloleks_accetuation2.ipynb new file mode 100644 index 0000000..7ee70b1 --- /dev/null +++ b/sloleks_accetuation2.ipynb @@ -0,0 +1,263 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "# -*- coding: utf-8 -*-\n", + "from __future__ import unicode_literals\n", + "\n", + "import numpy as np\n", + "from keras.models import load_model\n", + "import sys\n", + "import pickle\n", + "import time\n", + "\n", + "from prepare_data import *\n", + "\n", + "np.random.seed(7)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "data = Data('l', shuffle_all_inputs=False)\n", + "content = data._read_content('data/SlovarIJS_BESEDE_utf8.lex')\n", + "dictionary, max_word, max_num_vowels, vowels, accented_vowels = data._create_dict(content)\n", + "feature_dictionary = data._create_slovene_feature_dictionary()\n", + "syllable_dictionary = data._create_syllables_dictionary(content, vowels)\n", + "accented_vowels = ['ŕ', 'á', 'ä', 'é', 'ë', 'ě', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü']\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "environment = {}\n", + "environment['dictionary'] = dictionary\n", + "environment['max_word'] = max_word\n", + "environment['max_num_vowels'] = max_num_vowels\n", + "environment['vowels'] = vowels\n", + "environment['accented_vowels'] = accented_vowels\n", + "environment['feature_dictionary'] = feature_dictionary\n", + "environment['eng_feature_dictionary'] = feature_dictionary\n", + "environment['syllable_dictionary'] = syllable_dictionary\n", + "output = open('environment.pkl', 'wb')\n", + "pickle.dump(environment, output)\n", + "output.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "407\n" + ] + } + ], + "source": [ + "i = 0\n", + "for el in syllable_dictionary:\n", + " if el == \"da\":\n", + " print(i)\n", + " i += 1" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "%run prepare_data.py\n", + "\n", + "data = Data('l', shuffle_all_inputs=False)\n", + "letter_location_model, syllable_location_model, syllabled_letters_location_model = data.load_location_models(\n", + " 'cnn/word_accetuation/cnn_dictionary/v5_3/20_final_epoch.h5',\n", + " 'cnn/word_accetuation/syllables/v3_3/20_final_epoch.h5',\n", + " 'cnn/word_accetuation/syllabled_letters/v3_3/20_final_epoch.h5')\n", + "\n", + "letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model = data.load_location_models(\n", + " 'cnn/word_accetuation/cnn_dictionary/v5_2/20_final_epoch.h5',\n", + " 'cnn/word_accetuation/syllables/v3_2/20_final_epoch.h5',\n", + " 'cnn/word_accetuation/syllabled_letters/v3_2/20_final_epoch.h5')\n", + "\n", + "letter_type_model, syllable_type_model, syllabled_letter_type_model = data.load_type_models(\n", + " 'cnn/accent_classification/letters/v3_1/20_final_epoch.h5',\n", + " 'cnn/accent_classification/syllables/v2_1/20_final_epoch.h5',\n", + " 'cnn/accent_classification/syllabled_letters/v2_1/20_final_epoch.h5')\n", + "\n", + "letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model = data.load_type_models(\n", + " 'cnn/accent_classification/letters/v3_0/20_final_epoch.h5',\n", + " 'cnn/accent_classification/syllables/v2_0/20_final_epoch.h5',\n", + " 'cnn/accent_classification/syllabled_letters/v2_0/20_final_epoch.h5')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "test_input = [['uradni', '', 'Agpmpn', 'uradni'], ['podatki', '', 'Ncmpn', 'podatki'], ['policije', '', 'Ncfsg', 'policije'], ['kažejo', '', 'Vmpr3p', 'kažejo'], ['na', '', 'Sa', 'na'], ['precej', '', 'Rgp', 'precej'], ['napete', '', 'Appfpa', 'napete'], ['razmere', '', 'Ncfpa', 'razmere'], ['v', '', 'Sl', 'v'], ['piranskem', '', 'Agpmsl', 'piranskem'], ['zalivu', '', 'Ncmsl', 'zalivu'], ['je', '', 'Va-r3s-n', 'je'], ['danes', '', 'Rgp', 'danes'], ['poročala', '', 'Vmpp-sf', 'poročala'], ['oddaja', '', 'Ncfsn', 'oddaja'], ['do', '', 'Sg', 'do'], ['danes', '', 'Rgp', 'danes'], ['se', '', 'Px------y', 'se'], ['je', '', 'Va-r3s-n', 'je'], ['zgodilo', '', 'Vmep-sn', 'zgodilo']]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "accented_vowels = ['ŕ', 'á', 'ä', 'é', 'ë', 'ě', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü']\n", + "words = [[\"Gorejevemu\", \"\", \"Psnsed\", \"Gorejevemu\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Gorejévemu']\n", + "['Gorejěvemu']\n", + "[['Gorejevemu', '', 'Psnsed', 'Gorejevemu']]\n" + ] + } + ], + "source": [ + "pos = 4282\n", + "print(location_accented_words)\n", + "print(accented_words)\n", + "print(words)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "data = Data('s', shuffle_all_inputs=False)\n", + "new_content = data._read_content('data/sloleks-sl_v1.2.tbl')" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "words = [[el[0], '', el[2], el[0]] for el in new_content][1146450:1146550]\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['adventistovo', '', 'Psnzeo', 'adventistovo'], ['adventistovo', '', 'Psnzet', 'adventistovo'], ['adventistu', '', 'Somed', 'adventistu'], ['adventistu', '', 'Somem', 'adventistu'], ['adventiven', '', 'Ppnmein', 'adventiven'], ['adventiven', '', 'Ppnmetn', 'adventiven'], ['adventivna', '', 'Ppnmdi', 'adventivna'], ['adventivna', '', 'Ppnmdt', 'adventivna'], ['adventivna', '', 'Ppnsmi', 'adventivna'], ['adventivna', '', 'Ppnsmt', 'adventivna'], ['adventivna', '', 'Ppnzei', 'adventivna'], ['adventivne', '', 'Ppnmmt', 'adventivne'], ['adventivne', '', 'Ppnzer', 'adventivne'], ['adventivne', '', 'Ppnzmi', 'adventivne'], ['adventivne', '', 'Ppnzmt', 'adventivne'], ['adventivnega', '', 'Ppnmer', 'adventivnega'], ['adventivnega', '', 'Ppnmet', 'adventivnega'], ['adventivnega', '', 'Ppnser', 'adventivnega'], ['adventivnem', '', 'Ppnmem', 'adventivnem'], ['adventivnem', '', 'Ppnsem', 'adventivnem'], ['adventivnemu', '', 'Ppnmed', 'adventivnemu'], ['adventivnemu', '', 'Ppnsed', 'adventivnemu'], ['adventivni', '', 'Ppnmeid', 'adventivni'], ['adventivni', '', 'Ppnmetd', 'adventivni'], ['adventivni', '', 'Ppnmmi', 'adventivni'], ['adventivni', '', 'Ppnsdi', 'adventivni'], ['adventivni', '', 'Ppnsdt', 'adventivni'], ['adventivni', '', 'Ppnzdi', 'adventivni'], ['adventivni', '', 'Ppnzdt', 'adventivni'], ['adventivni', '', 'Ppnzed', 'adventivni'], ['adventivni', '', 'Ppnzem', 'adventivni'], ['adventivnih', '', 'Ppnmdm', 'adventivnih'], ['adventivnih', '', 'Ppnmdr', 'adventivnih'], ['adventivnih', '', 'Ppnmmm', 'adventivnih'], ['adventivnih', '', 'Ppnmmr', 'adventivnih'], ['adventivnih', '', 'Ppnsdm', 'adventivnih'], ['adventivnih', '', 'Ppnsdr', 'adventivnih'], ['adventivnih', '', 'Ppnsmm', 'adventivnih'], ['adventivnih', '', 'Ppnsmr', 'adventivnih'], ['adventivnih', '', 'Ppnzdm', 'adventivnih'], ['adventivnih', '', 'Ppnzdr', 'adventivnih'], ['adventivnih', '', 'Ppnzmm', 'adventivnih'], ['adventivnih', '', 'Ppnzmr', 'adventivnih'], ['adventivnima', '', 'Ppnmdd', 'adventivnima'], ['adventivnima', '', 'Ppnmdo', 'adventivnima'], ['adventivnima', '', 'Ppnsdd', 'adventivnima'], ['adventivnima', '', 'Ppnsdo', 'adventivnima'], ['adventivnima', '', 'Ppnzdd', 'adventivnima'], ['adventivnima', '', 'Ppnzdo', 'adventivnima'], ['adventivnim', '', 'Ppnmeo', 'adventivnim'], ['adventivnim', '', 'Ppnmmd', 'adventivnim'], ['adventivnim', '', 'Ppnseo', 'adventivnim'], ['adventivnim', '', 'Ppnsmd', 'adventivnim'], ['adventivnim', '', 'Ppnzmd', 'adventivnim'], ['adventivnimi', '', 'Ppnmmo', 'adventivnimi'], ['adventivnimi', '', 'Ppnsmo', 'adventivnimi'], ['adventivnimi', '', 'Ppnzmo', 'adventivnimi'], ['adventivno', '', 'Ppnsei', 'adventivno'], ['adventivno', '', 'Ppnset', 'adventivno'], ['adventivno', '', 'Ppnzeo', 'adventivno'], ['adventivno', '', 'Ppnzet', 'adventivno'], ['adventna', '', 'Ppnmdi', 'adventna'], ['adventna', '', 'Ppnmdt', 'adventna'], ['adventna', '', 'Ppnsmi', 'adventna'], ['adventna', '', 'Ppnsmt', 'adventna'], ['adventna', '', 'Ppnzei', 'adventna'], ['adventne', '', 'Ppnmmt', 'adventne'], ['adventne', '', 'Ppnzer', 'adventne'], ['adventne', '', 'Ppnzmi', 'adventne'], ['adventne', '', 'Ppnzmt', 'adventne'], ['adventnega', '', 'Ppnmer', 'adventnega'], ['adventnega', '', 'Ppnmet', 'adventnega'], ['adventnega', '', 'Ppnser', 'adventnega'], ['adventnem', '', 'Ppnmem', 'adventnem'], ['adventnem', '', 'Ppnsem', 'adventnem'], ['adventnemu', '', 'Ppnmed', 'adventnemu'], ['adventnemu', '', 'Ppnsed', 'adventnemu'], ['adventni', '', 'Ppnmeid', 'adventni'], ['adventni', '', 'Ppnmetd', 'adventni'], ['adventni', '', 'Ppnmmi', 'adventni'], ['adventni', '', 'Ppnsdi', 'adventni'], ['adventni', '', 'Ppnsdt', 'adventni'], ['adventni', '', 'Ppnzdi', 'adventni'], ['adventni', '', 'Ppnzdt', 'adventni'], ['adventni', '', 'Ppnzed', 'adventni'], ['adventni', '', 'Ppnzem', 'adventni'], ['adventnih', '', 'Ppnmdm', 'adventnih'], ['adventnih', '', 'Ppnmdr', 'adventnih'], ['adventnih', '', 'Ppnmmm', 'adventnih'], ['adventnih', '', 'Ppnmmr', 'adventnih'], ['adventnih', '', 'Ppnsdm', 'adventnih'], ['adventnih', '', 'Ppnsdr', 'adventnih'], ['adventnih', '', 'Ppnsmm', 'adventnih'], ['adventnih', '', 'Ppnsmr', 'adventnih'], ['adventnih', '', 'Ppnzdm', 'adventnih'], ['adventnih', '', 'Ppnzdr', 'adventnih'], ['adventnih', '', 'Ppnzmm', 'adventnih'], ['adventnih', '', 'Ppnzmr', 'adventnih'], ['adventnima', '', 'Ppnmdd', 'adventnima'], ['adventnima', '', 'Ppnmdo', 'adventnima'], ['adventnima', '', 'Ppnsdd', 'adventnima'], ['adventnima', '', 'Ppnsdo', 'adventnima'], ['adventnima', '', 'Ppnzdd', 'adventnima'], ['adventnima', '', 'Ppnzdo', 'adventnima'], ['adventnim', '', 'Ppnmeo', 'adventnim'], ['adventnim', '', 'Ppnmmd', 'adventnim'], ['adventnim', '', 'Ppnseo', 'adventnim'], ['adventnim', '', 'Ppnsmd', 'adventnim'], ['adventnim', '', 'Ppnzmd', 'adventnim'], ['adventnimi', '', 'Ppnmmo', 'adventnimi'], ['adventnimi', '', 'Ppnsmo', 'adventnimi'], ['adventnimi', '', 'Ppnzmo', 'adventnimi'], ['adventno', '', 'Ppnsei', 'adventno'], ['adventno', '', 'Ppnset', 'adventno'], ['adventno', '', 'Ppnzeo', 'adventno'], ['adventno', '', 'Ppnzet', 'adventno'], ['adventoma', '', 'Somdd', 'adventoma'], ['adventoma', '', 'Somdo', 'adventoma'], ['adventom', '', 'Someo', 'adventom'], ['adventom', '', 'Sommd', 'adventom'], ['adventov', '', 'Somdr', 'adventov'], ['adventov', '', 'Sommr', 'adventov'], ['adventu', '', 'Somed', 'adventu'], ['adventu', '', 'Somem', 'adventu'], ['adverba', '', 'Somdi', 'adverba'], ['adverba', '', 'Somdt', 'adverba'], ['adverba', '', 'Somer', 'adverba'], ['adverb', '', 'Somei', 'adverb'], ['adverb', '', 'Sometn', 'adverb'], ['adverbe', '', 'Sommt', 'adverbe'], ['adverbi', '', 'Sommi', 'adverbi'], ['adverbi', '', 'Sommo', 'adverbi'], ['adverbih', '', 'Somdm', 'adverbih'], ['adverbih', '', 'Sommm', 'adverbih'], ['adverboma', '', 'Somdd', 'adverboma'], ['adverboma', '', 'Somdo', 'adverboma'], ['adverbom', '', 'Someo', 'adverbom'], ['adverbom', '', 'Sommd', 'adverbom'], ['adverbov', '', 'Somdr', 'adverbov'], ['adverbov', '', 'Sommr', 'adverbov'], ['adverbu', '', 'Somed', 'adverbu'], ['adverbu', '', 'Somem', 'adverbu'], ['advokata', '', 'Somdi', 'advokata'], ['advokata', '', 'Somdt', 'advokata'], ['advokata', '', 'Somer', 'advokata'], ['advokata', '', 'Sometd', 'advokata'], ['advokat', '', 'Somei', 'advokat'], ['advokate', '', 'Sommt', 'advokate'], ['advokati', '', 'Sommi', 'advokati'], ['advokati', '', 'Sommo', 'advokati'], ['advokatih', '', 'Somdm', 'advokatih'], ['advokatih', '', 'Sommm', 'advokatih'], ['advokatka', '', 'Sozei', 'advokatka'], ['advokatk', '', 'Sozdr', 'advokatk'], ['advokatk', '', 'Sozmr', 'advokatk'], ['advokatkah', '', 'Sozdm', 'advokatkah'], ['advokatkah', '', 'Sozmm', 'advokatkah'], ['advokatkama', '', 'Sozdd', 'advokatkama'], ['advokatkama', '', 'Sozdo', 'advokatkama'], ['advokatkam', '', 'Sozmd', 'advokatkam'], ['advokatkami', '', 'Sozmo', 'advokatkami'], ['advokatke', '', 'Sozer', 'advokatke'], ['advokatke', '', 'Sozmi', 'advokatke'], ['advokatke', '', 'Sozmt', 'advokatke'], ['advokatki', '', 'Sozdi', 'advokatki'], ['advokatki', '', 'Sozdt', 'advokatki'], ['advokatki', '', 'Sozed', 'advokatki'], ['advokatki', '', 'Sozem', 'advokatki'], ['advokatko', '', 'Sozeo', 'advokatko'], ['advokatko', '', 'Sozet', 'advokatko'], ['advokatoma', '', 'Somdd', 'advokatoma'], ['advokatoma', '', 'Somdo', 'advokatoma'], ['advokatom', '', 'Someo', 'advokatom'], ['advokatom', '', 'Sommd', 'advokatom'], ['advokatova', '', 'Psnmdi', 'advokatova'], ['advokatova', '', 'Psnmdt', 'advokatova'], ['advokatova', '', 'Psnsmi', 'advokatova'], ['advokatova', '', 'Psnsmt', 'advokatova'], ['advokatova', '', 'Psnzei', 'advokatova'], ['advokatov', '', 'Somdr', 'advokatov'], ['advokatov', '', 'Sommr', 'advokatov'], ['advokatov', '', 'Psnmein', 'advokatov'], ['advokatov', '', 'Psnmetn', 'advokatov'], ['advokatove', '', 'Psnmmt', 'advokatove'], ['advokatove', '', 'Psnzer', 'advokatove'], ['advokatove', '', 'Psnzmi', 'advokatove'], ['advokatove', '', 'Psnzmt', 'advokatove'], ['advokatovega', '', 'Psnmer', 'advokatovega'], ['advokatovega', '', 'Psnmet', 'advokatovega'], ['advokatovega', '', 'Psnser', 'advokatovega'], ['advokatovem', '', 'Psnmem', 'advokatovem'], ['advokatovem', '', 'Psnsem', 'advokatovem'], ['advokatovemu', '', 'Psnmed', 'advokatovemu'], ['advokatovemu', '', 'Psnsed', 'advokatovemu'], ['advokatovi', '', 'Psnmmi', 'advokatovi'], ['advokatovi', '', 'Psnsdi', 'advokatovi'], ['advokatovi', '', 'Psnsdt', 'advokatovi'], ['advokatovi', '', 'Psnzdi', 'advokatovi'], ['advokatovi', '', 'Psnzdt', 'advokatovi'], ['advokatovi', '', 'Psnzed', 'advokatovi'], ['advokatovi', '', 'Psnzem', 'advokatovi'], ['advokatovih', '', 'Psnmdm', 'advokatovih'], ['advokatovih', '', 'Psnmdr', 'advokatovih'], ['advokatovih', '', 'Psnmmm', 'advokatovih'], ['advokatovih', '', 'Psnmmr', 'advokatovih'], ['advokatovih', '', 'Psnsdm', 'advokatovih'], ['advokatovih', '', 'Psnsdr', 'advokatovih'], ['advokatovih', '', 'Psnsmm', 'advokatovih'], ['advokatovih', '', 'Psnsmr', 'advokatovih'], ['advokatovih', '', 'Psnzdm', 'advokatovih'], ['advokatovih', '', 'Psnzdr', 'advokatovih'], ['advokatovih', '', 'Psnzmm', 'advokatovih'], ['advokatovih', '', 'Psnzmr', 'advokatovih'], ['advokatovima', '', 'Psnmdd', 'advokatovima'], ['advokatovima', '', 'Psnmdo', 'advokatovima'], ['advokatovima', '', 'Psnsdd', 'advokatovima'], ['advokatovima', '', 'Psnsdo', 'advokatovima'], ['advokatovima', '', 'Psnzdd', 'advokatovima'], ['advokatovima', '', 'Psnzdo', 'advokatovima'], ['advokatovim', '', 'Psnmeo', 'advokatovim'], ['advokatovim', '', 'Psnmmd', 'advokatovim'], ['advokatovim', '', 'Psnseo', 'advokatovim'], ['advokatovim', '', 'Psnsmd', 'advokatovim'], ['advokatovim', '', 'Psnzmd', 'advokatovim'], ['advokatovimi', '', 'Psnmmo', 'advokatovimi'], ['advokatovimi', '', 'Psnsmo', 'advokatovimi'], ['advokatovimi', '', 'Psnzmo', 'advokatovimi'], ['advokatovo', '', 'Psnsei', 'advokatovo'], ['advokatovo', '', 'Psnset', 'advokatovo'], ['advokatovo', '', 'Psnzeo', 'advokatovo'], ['advokatovo', '', 'Psnzet', 'advokatovo'], ['advokatska', '', 'Ppnmdi', 'advokatska'], ['advokatska', '', 'Ppnmdt', 'advokatska'], ['advokatska', '', 'Ppnsmi', 'advokatska'], ['advokatska', '', 'Ppnsmt', 'advokatska'], ['advokatska', '', 'Ppnzei', 'advokatska'], ['advokatske', '', 'Ppnmmt', 'advokatske'], ['advokatske', '', 'Ppnzer', 'advokatske'], ['advokatske', '', 'Ppnzmi', 'advokatske'], ['advokatske', '', 'Ppnzmt', 'advokatske'], ['advokatskega', '', 'Ppnmer', 'advokatskega'], ['advokatskega', '', 'Ppnmet', 'advokatskega'], ['advokatskega', '', 'Ppnser', 'advokatskega'], ['advokatskem', '', 'Ppnmem', 'advokatskem'], ['advokatskem', '', 'Ppnsem', 'advokatskem'], ['advokatskemu', '', 'Ppnmed', 'advokatskemu'], ['advokatskemu', '', 'Ppnsed', 'advokatskemu'], ['advokatski', '', 'Ppnmeid', 'advokatski'], ['advokatski', '', 'Ppnmetd', 'advokatski'], ['advokatski', '', 'Ppnmmi', 'advokatski'], ['advokatski', '', 'Ppnsdi', 'advokatski'], ['advokatski', '', 'Ppnsdt', 'advokatski'], ['advokatski', '', 'Ppnzdi', 'advokatski'], ['advokatski', '', 'Ppnzdt', 'advokatski'], ['advokatski', '', 'Ppnzed', 'advokatski'], ['advokatski', '', 'Ppnzem', 'advokatski'], ['advokatskih', '', 'Ppnmdm', 'advokatskih'], ['advokatskih', '', 'Ppnmdr', 'advokatskih'], ['advokatskih', '', 'Ppnmmm', 'advokatskih'], ['advokatskih', '', 'Ppnmmr', 'advokatskih'], ['advokatskih', '', 'Ppnsdm', 'advokatskih'], ['advokatskih', '', 'Ppnsdr', 'advokatskih'], ['advokatskih', '', 'Ppnsmm', 'advokatskih'], ['advokatskih', '', 'Ppnsmr', 'advokatskih'], ['advokatskih', '', 'Ppnzdm', 'advokatskih'], ['advokatskih', '', 'Ppnzdr', 'advokatskih'], ['advokatskih', '', 'Ppnzmm', 'advokatskih'], ['advokatskih', '', 'Ppnzmr', 'advokatskih'], ['advokatskima', '', 'Ppnmdd', 'advokatskima'], ['advokatskima', '', 'Ppnmdo', 'advokatskima'], ['advokatskima', '', 'Ppnsdd', 'advokatskima'], ['advokatskima', '', 'Ppnsdo', 'advokatskima'], ['advokatskima', '', 'Ppnzdd', 'advokatskima'], ['advokatskima', '', 'Ppnzdo', 'advokatskima'], ['advokatskim', '', 'Ppnmeo', 'advokatskim'], ['advokatskim', '', 'Ppnmmd', 'advokatskim'], ['advokatskim', '', 'Ppnseo', 'advokatskim'], ['advokatskim', '', 'Ppnsmd', 'advokatskim'], ['advokatskim', '', 'Ppnzmd', 'advokatskim'], ['advokatskimi', '', 'Ppnmmo', 'advokatskimi'], ['advokatskimi', '', 'Ppnsmo', 'advokatskimi'], ['advokatskimi', '', 'Ppnzmo', 'advokatskimi'], ['advokatsko', '', 'Ppnsei', 'advokatsko'], ['advokatsko', '', 'Ppnset', 'advokatsko'], ['advokatsko', '', 'Ppnzeo', 'advokatsko'], ['advokatsko', '', 'Ppnzet', 'advokatsko'], ['advokatsko', '', 'Rsn', 'advokatsko'], ['advokatu', '', 'Somed', 'advokatu'], ['advokatu', '', 'Somem', 'advokatu'], ['advokatura', '', 'Sozei', 'advokatura'], ['advokatur', '', 'Sozdr', 'advokatur'], ['advokatur', '', 'Sozmr', 'advokatur'], ['advokaturah', '', 'Sozdm', 'advokaturah'], ['advokaturah', '', 'Sozmm', 'advokaturah'], ['advokaturama', '', 'Sozdd', 'advokaturama'], ['advokaturama', '', 'Sozdo', 'advokaturama'], ['advokaturam', '', 'Sozmd', 'advokaturam'], ['advokaturami', '', 'Sozmo', 'advokaturami'], ['advokature', '', 'Sozer', 'advokature'], ['advokature', '', 'Sozmi', 'advokature'], ['advokature', '', 'Sozmt', 'advokature'], ['advokaturi', '', 'Sozdi', 'advokaturi'], ['advokaturi', '', 'Sozdt', 'advokaturi'], ['advokaturi', '', 'Sozed', 'advokaturi'], ['advokaturi', '', 'Sozem', 'advokaturi'], ['advokaturo', '', 'Sozeo', 'advokaturo'], ['advokaturo', '', 'Sozet', 'advokaturo'], ['Adžićeva', '', 'Psnmdi', 'Adžićeva'], ['Adžićeva', '', 'Psnmdt', 'Adžićeva'], ['Adžićeva', '', 'Psnsmi', 'Adžićeva'], ['Adžićeva', '', 'Psnsmt', 'Adžićeva'], ['Adžićeva', '', 'Psnzei', 'Adžićeva'], ['Adžićev', '', 'Psnmein', 'Adžićev'], ['Adžićev', '', 'Psnmetn', 'Adžićev'], ['Adžićeve', '', 'Psnmmt', 'Adžićeve'], ['Adžićeve', '', 'Psnzer', 'Adžićeve'], ['Adžićeve', '', 'Psnzmi', 'Adžićeve'], ['Adžićeve', '', 'Psnzmt', 'Adžićeve'], ['Adžićevega', '', 'Psnmer', 'Adžićevega'], ['Adžićevega', '', 'Psnmet', 'Adžićevega'], ['Adžićevega', '', 'Psnser', 'Adžićevega'], ['Adžićevem', '', 'Psnmem', 'Adžićevem'], ['Adžićevem', '', 'Psnsem', 'Adžićevem'], ['Adžićevemu', '', 'Psnmed', 'Adžićevemu'], ['Adžićevemu', '', 'Psnsed', 'Adžićevemu'], ['Adžićevi', '', 'Psnmmi', 'Adžićevi'], ['Adžićevi', '', 'Psnsdi', 'Adžićevi'], ['Adžićevi', '', 'Psnsdt', 'Adžićevi'], ['Adžićevi', '', 'Psnzdi', 'Adžićevi'], ['Adžićevi', '', 'Psnzdt', 'Adžićevi'], ['Adžićevi', '', 'Psnzed', 'Adžićevi'], ['Adžićevi', '', 'Psnzem', 'Adžićevi'], ['Adžićevih', '', 'Psnmdm', 'Adžićevih'], ['Adžićevih', '', 'Psnmdr', 'Adžićevih'], ['Adžićevih', '', 'Psnmmm', 'Adžićevih'], ['Adžićevih', '', 'Psnmmr', 'Adžićevih'], ['Adžićevih', '', 'Psnsdm', 'Adžićevih'], ['Adžićevih', '', 'Psnsdr', 'Adžićevih'], ['Adžićevih', '', 'Psnsmm', 'Adžićevih'], ['Adžićevih', '', 'Psnsmr', 'Adžićevih'], ['Adžićevih', '', 'Psnzdm', 'Adžićevih'], ['Adžićevih', '', 'Psnzdr', 'Adžićevih'], ['Adžićevih', '', 'Psnzmm', 'Adžićevih'], ['Adžićevih', '', 'Psnzmr', 'Adžićevih'], ['Adžićevima', '', 'Psnmdd', 'Adžićevima'], ['Adžićevima', '', 'Psnmdo', 'Adžićevima'], ['Adžićevima', '', 'Psnsdd', 'Adžićevima'], ['Adžićevima', '', 'Psnsdo', 'Adžićevima'], ['Adžićevima', '', 'Psnzdd', 'Adžićevima'], ['Adžićevima', '', 'Psnzdo', 'Adžićevima'], ['Adžićevim', '', 'Psnmeo', 'Adžićevim'], ['Adžićevim', '', 'Psnmmd', 'Adžićevim'], ['Adžićevim', '', 'Psnseo', 'Adžićevim'], ['Adžićevim', '', 'Psnsmd', 'Adžićevim'], ['Adžićevim', '', 'Psnzmd', 'Adžićevim'], ['Adžićevimi', '', 'Psnmmo', 'Adžićevimi'], ['Adžićevimi', '', 'Psnsmo', 'Adžićevimi'], ['Adžićevimi', '', 'Psnzmo', 'Adžićevimi'], ['Adžićevo', '', 'Psnsei', 'Adžićevo'], ['Adžićevo', '', 'Psnset', 'Adžićevo'], ['Adžićevo', '', 'Psnzeo', 'Adžićevo'], ['Adžićevo', '', 'Psnzet', 'Adžićevo'], ['Aera', '', 'Slmer', 'Aera'], ['aerira', '', 'Ggvste', 'aerira'], ['aeriraj', '', 'Ggvvde', 'aeriraj'], ['aerirajmo', '', 'Ggvvpm', 'aerirajmo'], ['aerirajo', '', 'Ggvstm', 'aerirajo'], ['aerirajta', '', 'Ggvvdd', 'aerirajta'], ['aerirajte', '', 'Ggvvdm', 'aerirajte'], ['aerirajva', '', 'Ggvvpd', 'aerirajva'], ['aerirala', '', 'Ggvd-dm', 'aerirala'], ['aerirala', '', 'Ggvd-ez', 'aerirala'], ['aerirala', '', 'Ggvd-ms', 'aerirala'], ['aeriral', '', 'Ggvd-em', 'aeriral'], ['aerirale', '', 'Ggvd-mz', 'aerirale'], ['aerirali', '', 'Ggvd-ds', 'aerirali'], ['aerirali', '', 'Ggvd-dz', 'aerirali'], ['aerirali', '', 'Ggvd-mm', 'aerirali'], ['aeriralo', '', 'Ggvd-es', 'aeriralo'], ['aeriram', '', 'Ggvspe', 'aeriram'], ['aeriramo', '', 'Ggvspm', 'aeriramo'], ['aeriranja', '', 'Soser', 'aeriranja'], ['aeriranja', '', 'Sosmi', 'aeriranja'], ['aeriranja', '', 'Sosmt', 'aeriranja'], ['aeriranj', '', 'Sosdr', 'aeriranj'], ['aeriranj', '', 'Sosmr', 'aeriranj'], ['aeriranje', '', 'Sosei', 'aeriranje'], ['aeriranje', '', 'Soset', 'aeriranje'], ['aeriranjema', '', 'Sosdd', 'aeriranjema'], ['aeriranjema', '', 'Sosdo', 'aeriranjema'], ['aeriranjem', '', 'Soseo', 'aeriranjem'], ['aeriranjem', '', 'Sosmd', 'aeriranjem'], ['aeriranji', '', 'Sosdi', 'aeriranji'], ['aeriranji', '', 'Sosdt', 'aeriranji'], ['aeriranji', '', 'Sosmo', 'aeriranji'], ['aeriranjih', '', 'Sosdm', 'aeriranjih'], ['aeriranjih', '', 'Sosmm', 'aeriranjih'], ['aeriranju', '', 'Sosed', 'aeriranju'], ['aeriranju', '', 'Sosem', 'aeriranju'], ['aeriraš', '', 'Ggvsde', 'aeriraš'], ['aerirata', '', 'Ggvsdd', 'aerirata'], ['aerirata', '', 'Ggvstd', 'aerirata'], ['aerirat', '', 'Ggvm', 'aerirat'], ['aerirate', '', 'Ggvsdm', 'aerirate'], ['aerirati', '', 'Ggvn', 'aerirati'], ['aerirava', '', 'Ggvspd', 'aerirava'], ['Aero', '', 'Slmei', 'Aero'], ['Aero', '', 'Slmetn', 'Aero'], ['aeroben', '', 'Ppnmein', 'aeroben'], ['aeroben', '', 'Ppnmetn', 'aeroben'], ['aerobika', '', 'Sozei', 'aerobika'], ['aerobike', '', 'Sozer', 'aerobike'], ['aerobiki', '', 'Sozed', 'aerobiki'], ['aerobiki', '', 'Sozem', 'aerobiki'], ['aerobiko', '', 'Sozeo', 'aerobiko'], ['aerobiko', '', 'Sozet', 'aerobiko'], ['aerobna', '', 'Ppnmdi', 'aerobna'], ['aerobna', '', 'Ppnmdt', 'aerobna'], ['aerobna', '', 'Ppnsmi', 'aerobna'], ['aerobna', '', 'Ppnsmt', 'aerobna'], ['aerobna', '', 'Ppnzei', 'aerobna'], ['aerobne', '', 'Ppnmmt', 'aerobne'], ['aerobne', '', 'Ppnzer', 'aerobne'], ['aerobne', '', 'Ppnzmi', 'aerobne'], ['aerobne', '', 'Ppnzmt', 'aerobne'], ['aerobnega', '', 'Ppnmer', 'aerobnega'], ['aerobnega', '', 'Ppnmet', 'aerobnega'], ['aerobnega', '', 'Ppnser', 'aerobnega'], ['aerobnem', '', 'Ppnmem', 'aerobnem'], ['aerobnem', '', 'Ppnsem', 'aerobnem'], ['aerobnemu', '', 'Ppnmed', 'aerobnemu'], ['aerobnemu', '', 'Ppnsed', 'aerobnemu'], ['aerobni', '', 'Ppnmeid', 'aerobni'], ['aerobni', '', 'Ppnmetd', 'aerobni'], ['aerobni', '', 'Ppnmmi', 'aerobni'], ['aerobni', '', 'Ppnsdi', 'aerobni'], ['aerobni', '', 'Ppnsdt', 'aerobni'], ['aerobni', '', 'Ppnzdi', 'aerobni'], ['aerobni', '', 'Ppnzdt', 'aerobni'], ['aerobni', '', 'Ppnzed', 'aerobni'], ['aerobni', '', 'Ppnzem', 'aerobni'], ['aerobnih', '', 'Ppnmdm', 'aerobnih'], ['aerobnih', '', 'Ppnmdr', 'aerobnih'], ['aerobnih', '', 'Ppnmmm', 'aerobnih'], ['aerobnih', '', 'Ppnmmr', 'aerobnih'], ['aerobnih', '', 'Ppnsdm', 'aerobnih'], ['aerobnih', '', 'Ppnsdr', 'aerobnih'], ['aerobnih', '', 'Ppnsmm', 'aerobnih'], ['aerobnih', '', 'Ppnsmr', 'aerobnih'], ['aerobnih', '', 'Ppnzdm', 'aerobnih'], ['aerobnih', '', 'Ppnzdr', 'aerobnih'], ['aerobnih', '', 'Ppnzmm', 'aerobnih'], ['aerobnih', '', 'Ppnzmr', 'aerobnih'], ['aerobnima', '', 'Ppnmdd', 'aerobnima'], ['aerobnima', '', 'Ppnmdo', 'aerobnima'], ['aerobnima', '', 'Ppnsdd', 'aerobnima'], ['aerobnima', '', 'Ppnsdo', 'aerobnima'], ['aerobnima', '', 'Ppnzdd', 'aerobnima'], ['aerobnima', '', 'Ppnzdo', 'aerobnima'], ['aerobnim', '', 'Ppnmeo', 'aerobnim'], ['aerobnim', '', 'Ppnmmd', 'aerobnim'], ['aerobnim', '', 'Ppnseo', 'aerobnim'], ['aerobnim', '', 'Ppnsmd', 'aerobnim'], ['aerobnim', '', 'Ppnzmd', 'aerobnim'], ['aerobnimi', '', 'Ppnmmo', 'aerobnimi'], ['aerobnimi', '', 'Ppnsmo', 'aerobnimi'], ['aerobnimi', '', 'Ppnzmo', 'aerobnimi'], ['aerobno', '', 'Ppnsei', 'aerobno'], ['aerobno', '', 'Ppnset', 'aerobno'], ['aerobno', '', 'Ppnzeo', 'aerobno'], ['aerobno', '', 'Ppnzet', 'aerobno'], ['aerodinamičen', '', 'Ppnmein', 'aerodinamičen'], ['aerodinamičen', '', 'Ppnmetn', 'aerodinamičen'], ['aerodinamična', '', 'Ppnmdi', 'aerodinamična'], ['aerodinamična', '', 'Ppnmdt', 'aerodinamična'], ['aerodinamična', '', 'Ppnsmi', 'aerodinamična'], ['aerodinamična', '', 'Ppnsmt', 'aerodinamična'], ['aerodinamična', '', 'Ppnzei', 'aerodinamična'], ['aerodinamične', '', 'Ppnmmt', 'aerodinamične'], ['aerodinamične', '', 'Ppnzer', 'aerodinamične'], ['aerodinamične', '', 'Ppnzmi', 'aerodinamične'], ['aerodinamične', '', 'Ppnzmt', 'aerodinamične'], ['aerodinamičnega', '', 'Ppnmer', 'aerodinamičnega'], ['aerodinamičnega', '', 'Ppnmet', 'aerodinamičnega'], ['aerodinamičnega', '', 'Ppnser', 'aerodinamičnega'], ['aerodinamičnem', '', 'Ppnmem', 'aerodinamičnem'], ['aerodinamičnem', '', 'Ppnsem', 'aerodinamičnem'], ['aerodinamičnemu', '', 'Ppnmed', 'aerodinamičnemu'], ['aerodinamičnemu', '', 'Ppnsed', 'aerodinamičnemu'], ['aerodinamični', '', 'Ppnmeid', 'aerodinamični'], ['aerodinamični', '', 'Ppnmetd', 'aerodinamični'], ['aerodinamični', '', 'Ppnmmi', 'aerodinamični'], ['aerodinamični', '', 'Ppnsdi', 'aerodinamični'], ['aerodinamični', '', 'Ppnsdt', 'aerodinamični'], ['aerodinamični', '', 'Ppnzdi', 'aerodinamični'], ['aerodinamični', '', 'Ppnzdt', 'aerodinamični'], ['aerodinamični', '', 'Ppnzed', 'aerodinamični'], ['aerodinamični', '', 'Ppnzem', 'aerodinamični'], ['aerodinamičnih', '', 'Ppnmdm', 'aerodinamičnih'], ['aerodinamičnih', '', 'Ppnmdr', 'aerodinamičnih'], ['aerodinamičnih', '', 'Ppnmmm', 'aerodinamičnih'], ['aerodinamičnih', '', 'Ppnmmr', 'aerodinamičnih'], ['aerodinamičnih', '', 'Ppnsdm', 'aerodinamičnih'], ['aerodinamičnih', '', 'Ppnsdr', 'aerodinamičnih'], ['aerodinamičnih', '', 'Ppnsmm', 'aerodinamičnih'], ['aerodinamičnih', '', 'Ppnsmr', 'aerodinamičnih'], ['aerodinamičnih', '', 'Ppnzdm', 'aerodinamičnih'], ['aerodinamičnih', '', 'Ppnzdr', 'aerodinamičnih'], ['aerodinamičnih', '', 'Ppnzmm', 'aerodinamičnih'], ['aerodinamičnih', '', 'Ppnzmr', 'aerodinamičnih'], ['aerodinamičnima', '', 'Ppnmdd', 'aerodinamičnima'], ['aerodinamičnima', '', 'Ppnmdo', 'aerodinamičnima'], ['aerodinamičnima', '', 'Ppnsdd', 'aerodinamičnima'], ['aerodinamičnima', '', 'Ppnsdo', 'aerodinamičnima'], ['aerodinamičnima', '', 'Ppnzdd', 'aerodinamičnima'], ['aerodinamičnima', '', 'Ppnzdo', 'aerodinamičnima'], ['aerodinamičnim', '', 'Ppnmeo', 'aerodinamičnim'], ['aerodinamičnim', '', 'Ppnmmd', 'aerodinamičnim'], ['aerodinamičnim', '', 'Ppnseo', 'aerodinamičnim'], ['aerodinamičnim', '', 'Ppnsmd', 'aerodinamičnim'], ['aerodinamičnim', '', 'Ppnzmd', 'aerodinamičnim'], ['aerodinamičnimi', '', 'Ppnmmo', 'aerodinamičnimi'], ['aerodinamičnimi', '', 'Ppnsmo', 'aerodinamičnimi'], ['aerodinamičnimi', '', 'Ppnzmo', 'aerodinamičnimi'], ['aerodinamično', '', 'Ppnsei', 'aerodinamično'], ['aerodinamično', '', 'Ppnset', 'aerodinamično'], ['aerodinamično', '', 'Ppnzeo', 'aerodinamično'], ['aerodinamično', '', 'Ppnzet', 'aerodinamično'], ['aerodinamično', '', 'Rsn', 'aerodinamično'], ['aerodinamika', '', 'Sozei', 'aerodinamika'], ['aerodinamike', '', 'Sozer', 'aerodinamike'], ['aerodinamiki', '', 'Sozed', 'aerodinamiki'], ['aerodinamiki', '', 'Sozem', 'aerodinamiki'], ['aerodinamiko', '', 'Sozeo', 'aerodinamiko'], ['aerodinamiko', '', 'Sozet', 'aerodinamiko'], ['aerodroma', '', 'Somdi', 'aerodroma'], ['aerodroma', '', 'Somdt', 'aerodroma'], ['aerodroma', '', 'Somer', 'aerodroma'], ['aerodrom', '', 'Somei', 'aerodrom'], ['aerodrom', '', 'Sometn', 'aerodrom'], ['aerodrome', '', 'Sommt', 'aerodrome'], ['aerodromi', '', 'Sommi', 'aerodromi'], ['aerodromi', '', 'Sommo', 'aerodromi'], ['aerodromih', '', 'Somdm', 'aerodromih'], ['aerodromih', '', 'Sommm', 'aerodromih'], ['aerodromoma', '', 'Somdd', 'aerodromoma'], ['aerodromoma', '', 'Somdo', 'aerodromoma'], ['aerodromom', '', 'Someo', 'aerodromom'], ['aerodromom', '', 'Sommd', 'aerodromom'], ['aerodromov', '', 'Somdr', 'aerodromov'], ['aerodromov', '', 'Sommr', 'aerodromov'], ['aerodromu', '', 'Somed', 'aerodromu'], ['aerodromu', '', 'Somem', 'aerodromu'], ['aerofotografija', '', 'Sozei', 'aerofotografija'], ['aerofotografij', '', 'Sozdr', 'aerofotografij'], ['aerofotografij', '', 'Sozmr', 'aerofotografij'], ['aerofotografijah', '', 'Sozdm', 'aerofotografijah'], ['aerofotografijah', '', 'Sozmm', 'aerofotografijah'], ['aerofotografijama', '', 'Sozdd', 'aerofotografijama'], ['aerofotografijama', '', 'Sozdo', 'aerofotografijama'], ['aerofotografijam', '', 'Sozmd', 'aerofotografijam'], ['aerofotografijami', '', 'Sozmo', 'aerofotografijami'], ['aerofotografije', '', 'Sozer', 'aerofotografije'], ['aerofotografije', '', 'Sozmi', 'aerofotografije'], ['aerofotografije', '', 'Sozmt', 'aerofotografije'], ['aerofotografiji', '', 'Sozdi', 'aerofotografiji'], ['aerofotografiji', '', 'Sozdt', 'aerofotografiji'], ['aerofotografiji', '', 'Sozed', 'aerofotografiji'], ['aerofotografiji', '', 'Sozem', 'aerofotografiji'], ['aerofotografijo', '', 'Sozeo', 'aerofotografijo'], ['aerofotografijo', '', 'Sozet', 'aerofotografijo'], ['aerofotogrametrija', '', 'Sozei', 'aerofotogrametrija'], ['aerofotogrametrij', '', 'Sozdr', 'aerofotogrametrij'], ['aerofotogrametrij', '', 'Sozmr', 'aerofotogrametrij'], ['aerofotogrametrijah', '', 'Sozdm', 'aerofotogrametrijah'], ['aerofotogrametrijah', '', 'Sozmm', 'aerofotogrametrijah'], ['aerofotogrametrijama', '', 'Sozdd', 'aerofotogrametrijama'], ['aerofotogrametrijama', '', 'Sozdo', 'aerofotogrametrijama'], ['aerofotogrametrijam', '', 'Sozmd', 'aerofotogrametrijam'], ['aerofotogrametrijami', '', 'Sozmo', 'aerofotogrametrijami'], ['aerofotogrametrije', '', 'Sozer', 'aerofotogrametrije'], ['aerofotogrametrije', '', 'Sozmi', 'aerofotogrametrije'], ['aerofotogrametrije', '', 'Sozmt', 'aerofotogrametrije'], ['aerofotogrametriji', '', 'Sozdi', 'aerofotogrametriji'], ['aerofotogrametriji', '', 'Sozdt', 'aerofotogrametriji'], ['aerofotogrametriji', '', 'Sozed', 'aerofotogrametriji'], ['aerofotogrametriji', '', 'Sozem', 'aerofotogrametriji'], ['aerofotogrametrijo', '', 'Sozeo', 'aerofotogrametrijo'], ['aerofotogrametrijo', '', 'Sozet', 'aerofotogrametrijo'], ['aerokluba', '', 'Somdi', 'aerokluba'], ['aerokluba', '', 'Somdt', 'aerokluba'], ['aerokluba', '', 'Somer', 'aerokluba'], ['aeroklub', '', 'Somei', 'aeroklub'], ['aeroklub', '', 'Sometn', 'aeroklub'], ['aeroklube', '', 'Sommt', 'aeroklube'], ['aeroklubi', '', 'Sommi', 'aeroklubi'], ['aeroklubi', '', 'Sommo', 'aeroklubi'], ['aeroklubih', '', 'Somdm', 'aeroklubih'], ['aeroklubih', '', 'Sommm', 'aeroklubih'], ['aerokluboma', '', 'Somdd', 'aerokluboma'], ['aerokluboma', '', 'Somdo', 'aerokluboma'], ['aeroklubom', '', 'Someo', 'aeroklubom'], ['aeroklubom', '', 'Sommd', 'aeroklubom'], ['aeroklubov', '', 'Somdr', 'aeroklubov'], ['aeroklubov', '', 'Sommr', 'aeroklubov'], ['aeroklubu', '', 'Somed', 'aeroklubu'], ['aeroklubu', '', 'Somem', 'aeroklubu'], ['aerolita', '', 'Somdi', 'aerolita'], ['aerolita', '', 'Somdt', 'aerolita'], ['aerolita', '', 'Somer', 'aerolita'], ['aerolit', '', 'Somei', 'aerolit'], ['aerolit', '', 'Sometn', 'aerolit'], ['aerolite', '', 'Sommt', 'aerolite'], ['aeroliti', '', 'Sommi', 'aeroliti'], ['aeroliti', '', 'Sommo', 'aeroliti'], ['aerolitih', '', 'Somdm', 'aerolitih'], ['aerolitih', '', 'Sommm', 'aerolitih'], ['aerolitoma', '', 'Somdd', 'aerolitoma'], ['aerolitoma', '', 'Somdo', 'aerolitoma'], ['aerolitom', '', 'Someo', 'aerolitom'], ['aerolitom', '', 'Sommd', 'aerolitom'], ['aerolitov', '', 'Somdr', 'aerolitov'], ['aerolitov', '', 'Sommr', 'aerolitov'], ['aerolitu', '', 'Somed', 'aerolitu'], ['aerolitu', '', 'Somem', 'aerolitu'], ['Aerom', '', 'Slmeo', 'Aerom'], ['aerometer', '', 'Somei', 'aerometer'], ['aerometer', '', 'Sometn', 'aerometer'], ['aerometra', '', 'Somdi', 'aerometra'], ['aerometra', '', 'Somdt', 'aerometra'], ['aerometra', '', 'Somer', 'aerometra'], ['aerometre', '', 'Sommt', 'aerometre'], ['aerometri', '', 'Sommi', 'aerometri'], ['aerometri', '', 'Sommo', 'aerometri'], ['aerometrih', '', 'Somdm', 'aerometrih'], ['aerometrih', '', 'Sommm', 'aerometrih'], ['aerometroma', '', 'Somdd', 'aerometroma'], ['aerometroma', '', 'Somdo', 'aerometroma'], ['aerometrom', '', 'Someo', 'aerometrom'], ['aerometrom', '', 'Sommd', 'aerometrom'], ['aerometrov', '', 'Somdr', 'aerometrov'], ['aerometrov', '', 'Sommr', 'aerometrov'], ['aerometru', '', 'Somed', 'aerometru'], ['aerometru', '', 'Somem', 'aerometru'], ['aeromitinga', '', 'Somdi', 'aeromitinga'], ['aeromitinga', '', 'Somdt', 'aeromitinga'], ['aeromitinga', '', 'Somer', 'aeromitinga'], ['aeromiting', '', 'Somei', 'aeromiting'], ['aeromiting', '', 'Sometn', 'aeromiting'], ['aeromitinge', '', 'Sommt', 'aeromitinge'], ['aeromitingi', '', 'Sommi', 'aeromitingi'], ['aeromitingi', '', 'Sommo', 'aeromitingi'], ['aeromitingih', '', 'Somdm', 'aeromitingih'], ['aeromitingih', '', 'Sommm', 'aeromitingih'], ['aeromitingoma', '', 'Somdd', 'aeromitingoma'], ['aeromitingoma', '', 'Somdo', 'aeromitingoma'], ['aeromitingom', '', 'Someo', 'aeromitingom'], ['aeromitingom', '', 'Sommd', 'aeromitingom'], ['aeromitingov', '', 'Somdr', 'aeromitingov'], ['aeromitingov', '', 'Sommr', 'aeromitingov'], ['aeromitingu', '', 'Somed', 'aeromitingu'], ['aeromitingu', '', 'Somem', 'aeromitingu'], ['aeronautika', '', 'Sozei', 'aeronautika'], ['aeronautik', '', 'Sozdr', 'aeronautik'], ['aeronautik', '', 'Sozmr', 'aeronautik'], ['aeronautikah', '', 'Sozdm', 'aeronautikah'], ['aeronautikah', '', 'Sozmm', 'aeronautikah'], ['aeronautikama', '', 'Sozdd', 'aeronautikama'], ['aeronautikama', '', 'Sozdo', 'aeronautikama'], ['aeronautikam', '', 'Sozmd', 'aeronautikam'], ['aeronautikami', '', 'Sozmo', 'aeronautikami'], ['aeronautike', '', 'Sozer', 'aeronautike'], ['aeronautike', '', 'Sozmi', 'aeronautike'], ['aeronautike', '', 'Sozmt', 'aeronautike'], ['aeronautiki', '', 'Sozdi', 'aeronautiki'], ['aeronautiki', '', 'Sozdt', 'aeronautiki'], ['aeronautiki', '', 'Sozed', 'aeronautiki'], ['aeronautiki', '', 'Sozem', 'aeronautiki'], ['aeronautiko', '', 'Sozeo', 'aeronautiko'], ['aeronautiko', '', 'Sozet', 'aeronautiko'], ['aeronavta', '', 'Somdi', 'aeronavta'], ['aeronavta', '', 'Somdt', 'aeronavta'], ['aeronavta', '', 'Somer', 'aeronavta'], ['aeronavta', '', 'Sometd', 'aeronavta'], ['aeronavt', '', 'Somei', 'aeronavt'], ['aeronavte', '', 'Sommt', 'aeronavte'], ['aeronavti', '', 'Sommi', 'aeronavti'], ['aeronavti', '', 'Sommo', 'aeronavti'], ['aeronavtičen', '', 'Ppnmein', 'aeronavtičen'], ['aeronavtičen', '', 'Ppnmetn', 'aeronavtičen'], ['aeronavtična', '', 'Ppnmdi', 'aeronavtična'], ['aeronavtična', '', 'Ppnmdt', 'aeronavtična'], ['aeronavtična', '', 'Ppnsmi', 'aeronavtična'], ['aeronavtična', '', 'Ppnsmt', 'aeronavtična'], ['aeronavtična', '', 'Ppnzei', 'aeronavtična'], ['aeronavtične', '', 'Ppnmmt', 'aeronavtične'], ['aeronavtične', '', 'Ppnzer', 'aeronavtične'], ['aeronavtične', '', 'Ppnzmi', 'aeronavtične'], ['aeronavtične', '', 'Ppnzmt', 'aeronavtične'], ['aeronavtičnega', '', 'Ppnmer', 'aeronavtičnega'], ['aeronavtičnega', '', 'Ppnmet', 'aeronavtičnega'], ['aeronavtičnega', '', 'Ppnser', 'aeronavtičnega'], ['aeronavtičnem', '', 'Ppnmem', 'aeronavtičnem'], ['aeronavtičnem', '', 'Ppnsem', 'aeronavtičnem'], ['aeronavtičnemu', '', 'Ppnmed', 'aeronavtičnemu'], ['aeronavtičnemu', '', 'Ppnsed', 'aeronavtičnemu'], ['aeronavtični', '', 'Ppnmeid', 'aeronavtični'], ['aeronavtični', '', 'Ppnmetd', 'aeronavtični'], ['aeronavtični', '', 'Ppnmmi', 'aeronavtični'], ['aeronavtični', '', 'Ppnsdi', 'aeronavtični'], ['aeronavtični', '', 'Ppnsdt', 'aeronavtični'], ['aeronavtični', '', 'Ppnzdi', 'aeronavtični'], ['aeronavtični', '', 'Ppnzdt', 'aeronavtični'], ['aeronavtični', '', 'Ppnzed', 'aeronavtični'], ['aeronavtični', '', 'Ppnzem', 'aeronavtični'], ['aeronavtičnih', '', 'Ppnmdm', 'aeronavtičnih'], ['aeronavtičnih', '', 'Ppnmdr', 'aeronavtičnih'], ['aeronavtičnih', '', 'Ppnmmm', 'aeronavtičnih'], ['aeronavtičnih', '', 'Ppnmmr', 'aeronavtičnih'], ['aeronavtičnih', '', 'Ppnsdm', 'aeronavtičnih'], ['aeronavtičnih', '', 'Ppnsdr', 'aeronavtičnih'], ['aeronavtičnih', '', 'Ppnsmm', 'aeronavtičnih'], ['aeronavtičnih', '', 'Ppnsmr', 'aeronavtičnih'], ['aeronavtičnih', '', 'Ppnzdm', 'aeronavtičnih'], ['aeronavtičnih', '', 'Ppnzdr', 'aeronavtičnih'], ['aeronavtičnih', '', 'Ppnzmm', 'aeronavtičnih'], ['aeronavtičnih', '', 'Ppnzmr', 'aeronavtičnih'], ['aeronavtičnima', '', 'Ppnmdd', 'aeronavtičnima'], ['aeronavtičnima', '', 'Ppnmdo', 'aeronavtičnima'], ['aeronavtičnima', '', 'Ppnsdd', 'aeronavtičnima'], ['aeronavtičnima', '', 'Ppnsdo', 'aeronavtičnima'], ['aeronavtičnima', '', 'Ppnzdd', 'aeronavtičnima'], ['aeronavtičnima', '', 'Ppnzdo', 'aeronavtičnima'], ['aeronavtičnim', '', 'Ppnmeo', 'aeronavtičnim'], ['aeronavtičnim', '', 'Ppnmmd', 'aeronavtičnim'], ['aeronavtičnim', '', 'Ppnseo', 'aeronavtičnim'], ['aeronavtičnim', '', 'Ppnsmd', 'aeronavtičnim'], ['aeronavtičnim', '', 'Ppnzmd', 'aeronavtičnim'], ['aeronavtičnimi', '', 'Ppnmmo', 'aeronavtičnimi'], ['aeronavtičnimi', '', 'Ppnsmo', 'aeronavtičnimi'], ['aeronavtičnimi', '', 'Ppnzmo', 'aeronavtičnimi'], ['aeronavtično', '', 'Ppnsei', 'aeronavtično'], ['aeronavtično', '', 'Ppnset', 'aeronavtično'], ['aeronavtično', '', 'Ppnzeo', 'aeronavtično'], ['aeronavtično', '', 'Ppnzet', 'aeronavtično'], ['aeronavtih', '', 'Somdm', 'aeronavtih'], ['aeronavtih', '', 'Sommm', 'aeronavtih'], ['aeronavtika', '', 'Sozei', 'aeronavtika'], ['aeronavtik', '', 'Sozdr', 'aeronavtik'], ['aeronavtik', '', 'Sozmr', 'aeronavtik'], ['aeronavtikah', '', 'Sozdm', 'aeronavtikah'], ['aeronavtikah', '', 'Sozmm', 'aeronavtikah'], ['aeronavtikama', '', 'Sozdd', 'aeronavtikama'], ['aeronavtikama', '', 'Sozdo', 'aeronavtikama'], ['aeronavtikam', '', 'Sozmd', 'aeronavtikam'], ['aeronavtikami', '', 'Sozmo', 'aeronavtikami'], ['aeronavtike', '', 'Sozer', 'aeronavtike'], ['aeronavtike', '', 'Sozmi', 'aeronavtike'], ['aeronavtike', '', 'Sozmt', 'aeronavtike'], ['aeronavtiki', '', 'Sozdi', 'aeronavtiki'], ['aeronavtiki', '', 'Sozdt', 'aeronavtiki'], ['aeronavtiki', '', 'Sozed', 'aeronavtiki'], ['aeronavtiki', '', 'Sozem', 'aeronavtiki'], ['aeronavtiko', '', 'Sozeo', 'aeronavtiko'], ['aeronavtiko', '', 'Sozet', 'aeronavtiko'], ['aeronavtka', '', 'Sozei', 'aeronavtka'], ['aeronavtk', '', 'Sozdr', 'aeronavtk'], ['aeronavtk', '', 'Sozmr', 'aeronavtk'], ['aeronavtkah', '', 'Sozdm', 'aeronavtkah'], ['aeronavtkah', '', 'Sozmm', 'aeronavtkah'], ['aeronavtkama', '', 'Sozdd', 'aeronavtkama'], ['aeronavtkama', '', 'Sozdo', 'aeronavtkama'], ['aeronavtkam', '', 'Sozmd', 'aeronavtkam'], ['aeronavtkami', '', 'Sozmo', 'aeronavtkami'], ['aeronavtke', '', 'Sozer', 'aeronavtke'], ['aeronavtke', '', 'Sozmi', 'aeronavtke'], ['aeronavtke', '', 'Sozmt', 'aeronavtke'], ['aeronavtki', '', 'Sozdi', 'aeronavtki'], ['aeronavtki', '', 'Sozdt', 'aeronavtki'], ['aeronavtki', '', 'Sozed', 'aeronavtki'], ['aeronavtki', '', 'Sozem', 'aeronavtki'], ['aeronavtko', '', 'Sozeo', 'aeronavtko'], ['aeronavtko', '', 'Sozet', 'aeronavtko'], ['aeronavtoma', '', 'Somdd', 'aeronavtoma'], ['aeronavtoma', '', 'Somdo', 'aeronavtoma'], ['aeronavtom', '', 'Someo', 'aeronavtom'], ['aeronavtom', '', 'Sommd', 'aeronavtom'], ['aeronavtov', '', 'Somdr', 'aeronavtov'], ['aeronavtov', '', 'Sommr', 'aeronavtov'], ['aeronavtu', '', 'Somed', 'aeronavtu'], ['aeronavtu', '', 'Somem', 'aeronavtu'], ['aeroplana', '', 'Somdi', 'aeroplana'], ['aeroplana', '', 'Somdt', 'aeroplana'], ['aeroplana', '', 'Somer', 'aeroplana'], ['aeroplan', '', 'Somei', 'aeroplan'], ['aeroplan', '', 'Sometn', 'aeroplan'], ['aeroplane', '', 'Sommt', 'aeroplane'], ['aeroplani', '', 'Sommi', 'aeroplani'], ['aeroplani', '', 'Sommo', 'aeroplani'], ['aeroplanih', '', 'Somdm', 'aeroplanih'], ['aeroplanih', '', 'Sommm', 'aeroplanih'], ['aeroplanoma', '', 'Somdd', 'aeroplanoma'], ['aeroplanoma', '', 'Somdo', 'aeroplanoma'], ['aeroplanom', '', 'Someo', 'aeroplanom'], ['aeroplanom', '', 'Sommd', 'aeroplanom'], ['aeroplanov', '', 'Somdr', 'aeroplanov'], ['aeroplanov', '', 'Sommr', 'aeroplanov'], ['aeroplanu', '', 'Somed', 'aeroplanu'], ['aeroplanu', '', 'Somem', 'aeroplanu'], ['aeroporta', '', 'Somdi', 'aeroporta'], ['aeroporta', '', 'Somdt', 'aeroporta'], ['aeroporta', '', 'Somer', 'aeroporta'], ['aeroport', '', 'Somei', 'aeroport'], ['aeroport', '', 'Sometn', 'aeroport'], ['aeroporte', '', 'Sommt', 'aeroporte'], ['aeroporti', '', 'Sommi', 'aeroporti'], ['aeroporti', '', 'Sommo', 'aeroporti'], ['aeroportih', '', 'Somdm', 'aeroportih'], ['aeroportih', '', 'Sommm', 'aeroportih'], ['aeroportoma', '', 'Somdd', 'aeroportoma'], ['aeroportoma', '', 'Somdo', 'aeroportoma'], ['aeroportom', '', 'Someo', 'aeroportom'], ['aeroportom', '', 'Sommd', 'aeroportom'], ['aeroportov', '', 'Somdr', 'aeroportov'], ['aeroportov', '', 'Sommr', 'aeroportov'], ['aeroportu', '', 'Somed', 'aeroportu'], ['aeroportu', '', 'Somem', 'aeroportu'], ['aeroposnetek', '', 'Somei', 'aeroposnetek'], ['aeroposnetek', '', 'Sometn', 'aeroposnetek'], ['aeroposnetka', '', 'Somdi', 'aeroposnetka'], ['aeroposnetka', '', 'Somdt', 'aeroposnetka'], ['aeroposnetka', '', 'Somer', 'aeroposnetka'], ['aeroposnetke', '', 'Sommt', 'aeroposnetke'], ['aeroposnetki', '', 'Sommi', 'aeroposnetki'], ['aeroposnetki', '', 'Sommo', 'aeroposnetki'], ['aeroposnetkih', '', 'Somdm', 'aeroposnetkih'], ['aeroposnetkih', '', 'Sommm', 'aeroposnetkih'], ['aeroposnetkoma', '', 'Somdd', 'aeroposnetkoma'], ['aeroposnetkoma', '', 'Somdo', 'aeroposnetkoma'], ['aeroposnetkom', '', 'Someo', 'aeroposnetkom'], ['aeroposnetkom', '', 'Sommd', 'aeroposnetkom'], ['aeroposnetkov', '', 'Somdr', 'aeroposnetkov'], ['aeroposnetkov', '', 'Sommr', 'aeroposnetkov'], ['aeroposnetku', '', 'Somed', 'aeroposnetku'], ['aeroposnetku', '', 'Somem', 'aeroposnetku'], ['aerosola', '', 'Somdi', 'aerosola'], ['aerosola', '', 'Somdt', 'aerosola'], ['aerosola', '', 'Somer', 'aerosola'], ['aerosol', '', 'Somei', 'aerosol'], ['aerosol', '', 'Sometn', 'aerosol'], ['aerosole', '', 'Sommt', 'aerosole'], ['aerosoli', '', 'Sommi', 'aerosoli'], ['aerosoli', '', 'Sommo', 'aerosoli'], ['aerosolih', '', 'Somdm', 'aerosolih'], ['aerosolih', '', 'Sommm', 'aerosolih'], ['aerosoloma', '', 'Somdd', 'aerosoloma'], ['aerosoloma', '', 'Somdo', 'aerosoloma'], ['aerosolom', '', 'Someo', 'aerosolom'], ['aerosolom', '', 'Sommd', 'aerosolom'], ['aerosolov', '', 'Somdr', 'aerosolov'], ['aerosolov', '', 'Sommr', 'aerosolov'], ['aerosolu', '', 'Somed', 'aerosolu'], ['aerosolu', '', 'Somem', 'aerosolu'], ['aerostata', '', 'Somdi', 'aerostata'], ['aerostata', '', 'Somdt', 'aerostata'], ['aerostata', '', 'Somer', 'aerostata'], ['aerostat', '', 'Somei', 'aerostat'], ['aerostat', '', 'Sometn', 'aerostat'], ['aerostate', '', 'Sommt', 'aerostate'], ['aerostati', '', 'Sommi', 'aerostati'], ['aerostati', '', 'Sommo', 'aerostati'], ['aerostatih', '', 'Somdm', 'aerostatih'], ['aerostatih', '', 'Sommm', 'aerostatih'], ['aerostatika', '', 'Sozei', 'aerostatika'], ['aerostatike', '', 'Sozer', 'aerostatike'], ['aerostatiki', '', 'Sozed', 'aerostatiki'], ['aerostatiki', '', 'Sozem', 'aerostatiki'], ['aerostatiko', '', 'Sozeo', 'aerostatiko'], ['aerostatiko', '', 'Sozet', 'aerostatiko'], ['aerostatoma', '', 'Somdd', 'aerostatoma'], ['aerostatoma', '', 'Somdo', 'aerostatoma'], ['aerostatom', '', 'Someo', 'aerostatom'], ['aerostatom', '', 'Sommd', 'aerostatom'], ['aerostatov', '', 'Somdr', 'aerostatov'], ['aerostatov', '', 'Sommr', 'aerostatov'], ['aerostatu', '', 'Somed', 'aerostatu'], ['aerostatu', '', 'Somem', 'aerostatu'], ['Aerova', '', 'Psnmdi', 'Aerova'], ['Aerova', '', 'Psnmdt', 'Aerova'], ['Aerova', '', 'Psnsmi', 'Aerova'], ['Aerova', '', 'Psnsmt', 'Aerova'], ['Aerova', '', 'Psnzei', 'Aerova'], ['Aerov', '', 'Psnmein', 'Aerov'], ['Aerov', '', 'Psnmetn', 'Aerov'], ['Aerove', '', 'Psnmmt', 'Aerove'], ['Aerove', '', 'Psnzer', 'Aerove'], ['Aerove', '', 'Psnzmi', 'Aerove'], ['Aerove', '', 'Psnzmt', 'Aerove'], ['Aerovega', '', 'Psnmer', 'Aerovega'], ['Aerovega', '', 'Psnmet', 'Aerovega']]\n" + ] + } + ], + "source": [ + "print(words.append['nadnaravno', '', 'Ppnsei'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Words proccesed: 650250\n", + "#Word indeks: 50023\n", + "#Word number: 50023\n", + "\n", + "#done_lexical_entries = 33522\n", + "\n", + "#new_content = data._read_content('sloleks-sl_v1.2.tbl')\n", + "rate = 100000\n", + "start_timer = time.time()\n", + "with open(\"data/new_sloleks/new_sloleks.tab\", \"a\") as myfile:\n", + " for index in range(0, len(new_content), rate):\n", + " if index+rate >= len(new_content):\n", + " words = [[el[0], '', el[2], el[0]] for el in new_content][index:len(new_content)]\n", + " else:\n", + " words = [[el[0], '', el[2], el[0]] for el in new_content][index:index+rate]\n", + " data = Data('l', shuffle_all_inputs=False)\n", + " location_accented_words, accented_words = data.accentuate_word(words, letter_location_model, syllable_location_model, syllabled_letters_location_model,\n", + " letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,\n", + " letter_type_model, syllable_type_model, syllabled_letter_type_model,\n", + " letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,\n", + " dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)\n", + "\n", + " res = ''\n", + " for i in range(index, index + len(words)):\n", + " res += new_content[i][0] + '\\t' + new_content[i][1] + '\\t' + new_content[i][2] + '\\t' \\\n", + " + new_content[i][3][:-1] + '\\t' + location_accented_words[i-index] + '\\t' + accented_words[i-index] + '\\n'\n", + "\n", + " print('Writing data from ' + str(index) + ' onward.')\n", + " end_timer = time.time()\n", + " print(\"Elapsed time: \" + \"{0:.2f}\".format((end_timer - start_timer)/60.0) + \" minutes\")\n", + " myfile.write(res)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}