stress_asignment/sloleks_accetuation2.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "from __future__ import unicode_literals\n",
    "\n",
    "import numpy as np\n",
    "from keras.models import load_model\n",
    "import sys\n",
    "import pickle\n",
    "import time\n",
    "\n",
    "from prepare_data import *\n",
    "\n",
    "np.random.seed(7)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data = Data('l', shuffle_all_inputs=False)\n",
    "content = data._read_content('data/SlovarIJS_BESEDE_utf8.lex')\n",
    "dictionary, max_word, max_num_vowels, vowels, accented_vowels = data._create_dict(content)\n",
    "feature_dictionary = data._create_slovene_feature_dictionary()\n",
    "syllable_dictionary = data._create_syllables_dictionary(content, vowels)\n",
    "accented_vowels = ['ŕ', 'á', 'ä', 'é', 'ë', 'ě', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü']\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "environment = {}\n",
    "environment['dictionary'] = dictionary\n",
    "environment['max_word'] = max_word\n",
    "environment['max_num_vowels'] = max_num_vowels\n",
    "environment['vowels'] = vowels\n",
    "environment['accented_vowels'] = accented_vowels\n",
    "environment['feature_dictionary'] = feature_dictionary\n",
    "environment['eng_feature_dictionary'] = feature_dictionary\n",
    "environment['syllable_dictionary'] = syllable_dictionary\n",
    "output = open('environment.pkl', 'wb')\n",
    "pickle.dump(environment, output)\n",
    "output.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "407\n"
     ]
    }
   ],
   "source": [
    "i = 0\n",
    "for el in syllable_dictionary:\n",
    "    if el == \"da\":\n",
    "        print(i)\n",
    "    i += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%run prepare_data.py\n",
    "\n",
    "data = Data('l', shuffle_all_inputs=False)\n",
    "letter_location_model, syllable_location_model, syllabled_letters_location_model = data.load_location_models(\n",
    "    'cnn/word_accetuation/cnn_dictionary/v5_3/20_final_epoch.h5',\n",
    "    'cnn/word_accetuation/syllables/v3_3/20_final_epoch.h5',\n",
    "    'cnn/word_accetuation/syllabled_letters/v3_3/20_final_epoch.h5')\n",
    "\n",
    "letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model = data.load_location_models(\n",
    "    'cnn/word_accetuation/cnn_dictionary/v5_2/20_final_epoch.h5',\n",
    "    'cnn/word_accetuation/syllables/v3_2/20_final_epoch.h5',\n",
    "    'cnn/word_accetuation/syllabled_letters/v3_2/20_final_epoch.h5')\n",
    "\n",
    "letter_type_model, syllable_type_model, syllabled_letter_type_model = data.load_type_models(\n",
    "    'cnn/accent_classification/letters/v3_1/20_final_epoch.h5',\n",
    "    'cnn/accent_classification/syllables/v2_1/20_final_epoch.h5',\n",
    "    'cnn/accent_classification/syllabled_letters/v2_1/20_final_epoch.h5')\n",
    "\n",
    "letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model = data.load_type_models(\n",
    "    'cnn/accent_classification/letters/v3_0/20_final_epoch.h5',\n",
    "    'cnn/accent_classification/syllables/v2_0/20_final_epoch.h5',\n",
    "    'cnn/accent_classification/syllabled_letters/v2_0/20_final_epoch.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "test_input = [['uradni', '', 'Agpmpn', 'uradni'], ['podatki', '', 'Ncmpn', 'podatki'], ['policije', '', 'Ncfsg', 'policije'], ['kažejo', '', 'Vmpr3p', 'kažejo'], ['na', '', 'Sa', 'na'], ['precej', '', 'Rgp', 'precej'], ['napete', '', 'Appfpa', 'napete'], ['razmere', '', 'Ncfpa', 'razmere'], ['v', '', 'Sl', 'v'], ['piranskem', '', 'Agpmsl', 'piranskem'], ['zalivu', '', 'Ncmsl', 'zalivu'], ['je', '', 'Va-r3s-n', 'je'], ['danes', '', 'Rgp', 'danes'], ['poročala', '', 'Vmpp-sf', 'poročala'], ['oddaja', '', 'Ncfsn', 'oddaja'], ['do', '', 'Sg', 'do'], ['danes', '', 'Rgp', 'danes'], ['se', '', 'Px------y', 'se'], ['je', '', 'Va-r3s-n', 'je'], ['zgodilo', '', 'Vmep-sn', 'zgodilo']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "accented_vowels = ['ŕ', 'á', 'ä', 'é', 'ë', 'ě', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü']\n",
    "words = [[\"Gorejevemu\", \"\", \"Psnsed\", \"Gorejevemu\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Gorejévemu']\n",
      "['Gorejěvemu']\n",
      "[['Gorejevemu', '', 'Psnsed', 'Gorejevemu']]\n"
     ]
    }
   ],
   "source": [
    "pos = 4282\n",
    "print(location_accented_words)\n",
    "print(accented_words)\n",
    "print(words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data = Data('s', shuffle_all_inputs=False)\n",
    "new_content = data._read_content('data/sloleks-sl_v1.2.tbl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "words = [[el[0], '', el[2], el[0]] for el in new_content][1146450:1146550]\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[['adventistovo', '', 'Psnzeo', 'adventistovo'], ['adventistovo', '', 'Psnzet', 'adventistovo'], ['adventistu', '', 'Somed', 'adventistu'], ['adventistu', '', 'Somem', 'adventistu'], ['adventiven', '', 'Ppnmein', 'adventiven'], ['adventiven', '', 'Ppnmetn', 'adventiven'], ['adventivna', '', 'Ppnmdi', 'adventivna'], ['adventivna', '', 'Ppnmdt', 'adventivna'], ['adventivna', '', 'Ppnsmi', 'adventivna'], ['adventivna', '', 'Ppnsmt', 'adventivna'], ['adventivna', '', 'Ppnzei', 'adventivna'], ['adventivne', '', 'Ppnmmt', 'adventivne'], ['adventivne', '', 'Ppnzer', 'adventivne'], ['adventivne', '', 'Ppnzmi', 'adventivne'], ['adventivne', '', 'Ppnzmt', 'adventivne'], ['adventivnega', '', 'Ppnmer', 'adventivnega'], ['adventivnega', '', 'Ppnmet', 'adventivnega'], ['adventivnega', '', 'Ppnser', 'adventivnega'], ['adventivnem', '', 'Ppnmem', 'adventivnem'], ['adventivnem', '', 'Ppnsem', 'adventivnem'], ['adventivnemu', '', 'Ppnmed', 'adventivnemu'], ['adventivnemu', '', 'Ppnsed', 'adventivnemu'], ['adventivni', '', 'Ppnmeid', 'adventivni'], ['adventivni', '', 'Ppnmetd', 'adventivni'], ['adventivni', '', 'Ppnmmi', 'adventivni'], ['adventivni', '', 'Ppnsdi', 'adventivni'], ['adventivni', '', 'Ppnsdt', 'adventivni'], ['adventivni', '', 'Ppnzdi', 'adventivni'], ['adventivni', '', 'Ppnzdt', 'adventivni'], ['adventivni', '', 'Ppnzed', 'adventivni'], ['adventivni', '', 'Ppnzem', 'adventivni'], ['adventivnih', '', 'Ppnmdm', 'adventivnih'], ['adventivnih', '', 'Ppnmdr', 'adventivnih'], ['adventivnih', '', 'Ppnmmm', 'adventivnih'], ['adventivnih', '', 'Ppnmmr', 'adventivnih'], ['adventivnih', '', 'Ppnsdm', 'adventivnih'], ['adventivnih', '', 'Ppnsdr', 'adventivnih'], ['adventivnih', '', 'Ppnsmm', 'adventivnih'], ['adventivnih', '', 'Ppnsmr', 'adventivnih'], ['adventivnih', '', 'Ppnzdm', 'adventivnih'], ['adventivnih', '', 'Ppnzdr', 'adventivnih'], ['adventivnih', '', 'Ppnzmm', 'adventivnih'], ['adventivnih', '', 'Ppnzmr', 'adventivnih'], ['adventivnima', '', 'Ppnmdd', 'adventivnima'], ['adventivnima', '', 'Ppnmdo', 'adventivnima'], ['adventivnima', '', 'Ppnsdd', 'adventivnima'], ['adventivnima', '', 'Ppnsdo', 'adventivnima'], ['adventivnima', '', 'Ppnzdd', 'adventivnima'], ['adventivnima', '', 'Ppnzdo', 'adventivnima'], ['adventivnim', '', 'Ppnmeo', 'adventivnim'], ['adventivnim', '', 'Ppnmmd', 'adventivnim'], ['adventivnim', '', 'Ppnseo', 'adventivnim'], ['adventivnim', '', 'Ppnsmd', 'adventivnim'], ['adventivnim', '', 'Ppnzmd', 'adventivnim'], ['adventivnimi', '', 'Ppnmmo', 'adventivnimi'], ['adventivnimi', '', 'Ppnsmo', 'adventivnimi'], ['adventivnimi', '', 'Ppnzmo', 'adventivnimi'], ['adventivno', '', 'Ppnsei', 'adventivno'], ['adventivno', '', 'Ppnset', 'adventivno'], ['adventivno', '', 'Ppnzeo', 'adventivno'], ['adventivno', '', 'Ppnzet', 'adventivno'], ['adventna', '', 'Ppnmdi', 'adventna'], ['adventna', '', 'Ppnmdt', 'adventna'], ['adventna', '', 'Ppnsmi', 'adventna'], ['adventna', '', 'Ppnsmt', 'adventna'], ['adventna', '', 'Ppnzei', 'adventna'], ['adventne', '', 'Ppnmmt', 'adventne'], ['adventne', '', 'Ppnzer', 'adventne'], ['adventne', '', 'Ppnzmi', 'adventne'], ['adventne', '', 'Ppnzmt', 'adventne'], ['adventnega', '', 'Ppnmer', 'adventnega'], ['adventnega', '', 'Ppnmet', 'adventnega'], ['adventnega', '', 'Ppnser', 'adventnega'], ['adventnem', '', 'Ppnmem', 'adventnem'], ['adventnem', '', 'Ppnsem', 'adventnem'], ['adventnemu', '', 'Ppnmed', 'adventnemu'], ['adventnemu', '', 'Ppnsed', 'adventnemu'], ['adventni', '', 'Ppnmeid', 'adventni'], ['adventni', '', 'Ppnmetd', 'adventni'], ['adventni', '', 'Ppnmmi', 'adventni'], ['adventni', '', 'Ppnsdi', 'adventni'], ['adventni', '', 'Ppnsdt', 'adventni'], ['adventni', '', 'Ppnzdi', 'adventni'], ['adventni', '', 'Ppnzdt', 'adventni'], ['adventni', '', 'Ppnzed', 'adventni'], ['adventni', '', 'Ppnzem', 'adventni'], ['adventnih', '', 'Ppnmdm', 'adventnih'], ['adventnih', '', 'Ppnmdr', 'adventnih'], ['adventnih', '', 'Ppnmmm', 'adventnih'], ['adventnih', '', 'Ppnmmr', 'adventnih'], ['adventnih', '', 'Ppnsdm', 'adventnih'], ['adventnih', '', 'Ppnsdr', 'adventnih'], ['adventnih', '', 'Pp
     ]
    }
   ],
   "source": [
    "print(words.append['nadnaravno', '', 'Ppnsei'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Words proccesed: 650250\n",
    "#Word indeks: 50023\n",
    "#Word number: 50023\n",
    "\n",
    "#done_lexical_entries = 33522\n",
    "\n",
    "#new_content = data._read_content('sloleks-sl_v1.2.tbl')\n",
    "rate = 100000\n",
    "start_timer = time.time()\n",
    "with open(\"data/new_sloleks/new_sloleks.tab\", \"a\") as myfile:\n",
    "    for index in range(0, len(new_content), rate):\n",
    "        if index+rate >= len(new_content):\n",
    "            words = [[el[0], '', el[2], el[0]] for el in new_content][index:len(new_content)]\n",
    "        else:\n",
    "            words = [[el[0], '', el[2], el[0]] for el in new_content][index:index+rate]\n",
    "        data = Data('l', shuffle_all_inputs=False)\n",
    "        location_accented_words, accented_words = data.accentuate_word(words, letter_location_model, syllable_location_model, syllabled_letters_location_model,\n",
    "                                letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,\n",
    "                                letter_type_model, syllable_type_model, syllabled_letter_type_model,\n",
    "                                letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,\n",
    "                                dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)\n",
    "\n",
    "        res = ''\n",
    "        for i in range(index, index + len(words)):\n",
    "            res += new_content[i][0] + '\\t' + new_content[i][1] + '\\t' + new_content[i][2] + '\\t' \\\n",
    "            + new_content[i][3][:-1] + '\\t' + location_accented_words[i-index] + '\\t' + accented_words[i-index] + '\\n'\n",
    "\n",
    "        print('Writing data from ' + str(index) + ' onward.')\n",
    "        end_timer = time.time()\n",
    "        print(\"Elapsed time: \" + \"{0:.2f}\".format((end_timer - start_timer)/60.0) + \" minutes\")\n",
    "        myfile.write(res)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
Accentuation on sloleks 2018-04-14 08:25:40 +00:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
			`"execution_count": 75,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"# -- coding: utf-8 --\n",`
			`"from __future__ import unicode_literals\n",`
			`"\n",`
			`"import numpy as np\n",`
			`"from keras.models import load_model\n",`
			`"import sys\n",`
			`"import pickle\n",`
			`"import time\n",`
			`"\n",`
			`"from prepare_data import *\n",`
			`"\n",`
			`"np.random.seed(7)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 2,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"data = Data('l', shuffle_all_inputs=False)\n",`
			`"content = data._read_content('data/SlovarIJS_BESEDE_utf8.lex')\n",`
			`"dictionary, max_word, max_num_vowels, vowels, accented_vowels = data._create_dict(content)\n",`
			`"feature_dictionary = data._create_slovene_feature_dictionary()\n",`
			`"syllable_dictionary = data._create_syllables_dictionary(content, vowels)\n",`
			`"accented_vowels = ['ŕ', 'á', 'ä', 'é', 'ë', 'ě', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü']\n",`
			`"\n"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 15,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"environment = {}\n",`
			`"environment['dictionary'] = dictionary\n",`
			`"environment['max_word'] = max_word\n",`
			`"environment['max_num_vowels'] = max_num_vowels\n",`
			`"environment['vowels'] = vowels\n",`
			`"environment['accented_vowels'] = accented_vowels\n",`
			`"environment['feature_dictionary'] = feature_dictionary\n",`
			`"environment['eng_feature_dictionary'] = feature_dictionary\n",`
			`"environment['syllable_dictionary'] = syllable_dictionary\n",`
			`"output = open('environment.pkl', 'wb')\n",`
			`"pickle.dump(environment, output)\n",`
			`"output.close()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 12,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"407\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"i = 0\n",`
			`"for el in syllable_dictionary:\n",`
			`" if el == \"da\":\n",`
			`" print(i)\n",`
			`" i += 1"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 78,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"%run prepare_data.py\n",`
			`"\n",`
			`"data = Data('l', shuffle_all_inputs=False)\n",`
			`"letter_location_model, syllable_location_model, syllabled_letters_location_model = data.load_location_models(\n",`
			`" 'cnn/word_accetuation/cnn_dictionary/v5_3/20_final_epoch.h5',\n",`
			`" 'cnn/word_accetuation/syllables/v3_3/20_final_epoch.h5',\n",`
			`" 'cnn/word_accetuation/syllabled_letters/v3_3/20_final_epoch.h5')\n",`
			`"\n",`
			`"letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model = data.load_location_models(\n",`
			`" 'cnn/word_accetuation/cnn_dictionary/v5_2/20_final_epoch.h5',\n",`
			`" 'cnn/word_accetuation/syllables/v3_2/20_final_epoch.h5',\n",`
			`" 'cnn/word_accetuation/syllabled_letters/v3_2/20_final_epoch.h5')\n",`
			`"\n",`
			`"letter_type_model, syllable_type_model, syllabled_letter_type_model = data.load_type_models(\n",`
			`" 'cnn/accent_classification/letters/v3_1/20_final_epoch.h5',\n",`
			`" 'cnn/accent_classification/syllables/v2_1/20_final_epoch.h5',\n",`
			`" 'cnn/accent_classification/syllabled_letters/v2_1/20_final_epoch.h5')\n",`
			`"\n",`
			`"letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model = data.load_type_models(\n",`
			`" 'cnn/accent_classification/letters/v3_0/20_final_epoch.h5',\n",`
			`" 'cnn/accent_classification/syllables/v2_0/20_final_epoch.h5',\n",`
			`" 'cnn/accent_classification/syllabled_letters/v2_0/20_final_epoch.h5')"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 4,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			"test_input = [['uradni', '', 'Agpmpn', 'uradni'], ['podatki', '', 'Ncmpn', 'podatki'], ['policije', '', 'Ncfsg', 'policije'], ['kažejo', '', 'Vmpr3p', 'kažejo'], ['na', '', 'Sa', 'na'], ['precej', '', 'Rgp', 'precej'], ['napete', '', 'Appfpa', 'napete'], ['razmere', '', 'Ncfpa', 'razmere'], ['v', '', 'Sl', 'v'], ['piranskem', '', 'Agpmsl', 'piranskem'], ['zalivu', '', 'Ncmsl', 'zalivu'], ['je', '', 'Va-r3s-n', 'je'], ['danes', '', 'Rgp', 'danes'], ['poročala', '', 'Vmpp-sf', 'poročala'], ['oddaja', '', 'Ncfsn', 'oddaja'], ['do', '', 'Sg', 'do'], ['danes', '', 'Rgp', 'danes'], ['se', '', 'Px------y', 'se'], ['je', '', 'Va-r3s-n', 'je'], ['zgodilo', '', 'Vmep-sn', 'zgodilo']]"
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 5,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"accented_vowels = ['ŕ', 'á', 'ä', 'é', 'ë', 'ě', 'í', 'î', 'ó', 'ô', 'ö', 'ú', 'ü']\n",`
			`"words = [[\"Gorejevemu\", \"\", \"Psnsed\", \"Gorejevemu\"]]"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 159,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"['Gorejévemu']\n",`
			`"['Gorejěvemu']\n",`
			`"[['Gorejevemu', '', 'Psnsed', 'Gorejevemu']]\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"pos = 4282\n",`
			`"print(location_accented_words)\n",`
			`"print(accented_words)\n",`
			`"print(words)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 5,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"data = Data('s', shuffle_all_inputs=False)\n",`
			`"new_content = data._read_content('data/sloleks-sl_v1.2.tbl')"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 49,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"words = [[el[0], '', el[2], el[0]] for el in new_content][1146450:1146550]\n",`
			`"\n",`
			`" "`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 45,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			"[['adventistovo', '', 'Psnzeo', 'adventistovo'], ['adventistovo', '', 'Psnzet', 'adventistovo'], ['adventistu', '', 'Somed', 'adventistu'], ['adventistu', '', 'Somem', 'adventistu'], ['adventiven', '', 'Ppnmein', 'adventiven'], ['adventiven', '', 'Ppnmetn', 'adventiven'], ['adventivna', '', 'Ppnmdi', 'adventivna'], ['adventivna', '', 'Ppnmdt', 'adventivna'], ['adventivna', '', 'Ppnsmi', 'adventivna'], ['adventivna', '', 'Ppnsmt', 'adventivna'], ['adventivna', '', 'Ppnzei', 'adventivna'], ['adventivne', '', 'Ppnmmt', 'adventivne'], ['adventivne', '', 'Ppnzer', 'adventivne'], ['adventivne', '', 'Ppnzmi', 'adventivne'], ['adventivne', '', 'Ppnzmt', 'adventivne'], ['adventivnega', '', 'Ppnmer', 'adventivnega'], ['adventivnega', '', 'Ppnmet', 'adventivnega'], ['adventivnega', '', 'Ppnser', 'adventivnega'], ['adventivnem', '', 'Ppnmem', 'adventivnem'], ['adventivnem', '', 'Ppnsem', 'adventivnem'], ['adventivnemu', '', 'Ppnmed', 'adventivnemu'], ['adventivnemu', '', 'Ppnsed', 'adventivnemu'], ['adventivni', '', 'Ppnmeid', 'adventivni'], ['adventivni', '', 'Ppnmetd', 'adventivni'], ['adventivni', '', 'Ppnmmi', 'adventivni'], ['adventivni', '', 'Ppnsdi', 'adventivni'], ['adventivni', '', 'Ppnsdt', 'adventivni'], ['adventivni', '', 'Ppnzdi', 'adventivni'], ['adventivni', '', 'Ppnzdt', 'adventivni'], ['adventivni', '', 'Ppnzed', 'adventivni'], ['adventivni', '', 'Ppnzem', 'adventivni'], ['adventivnih', '', 'Ppnmdm', 'adventivnih'], ['adventivnih', '', 'Ppnmdr', 'adventivnih'], ['adventivnih', '', 'Ppnmmm', 'adventivnih'], ['adventivnih', '', 'Ppnmmr', 'adventivnih'], ['adventivnih', '', 'Ppnsdm', 'adventivnih'], ['adventivnih', '', 'Ppnsdr', 'adventivnih'], ['adventivnih', '', 'Ppnsmm', 'adventivnih'], ['adventivnih', '', 'Ppnsmr', 'adventivnih'], ['adventivnih', '', 'Ppnzdm', 'adventivnih'], ['adventivnih', '', 'Ppnzdr', 'adventivnih'], ['adventivnih', '', 'Ppnzmm', 'adventivnih'], ['adventivnih', '', 'Ppnzmr', 'adventivnih'], ['adventivnima', '', 'Ppnmdd', 'adventivnima'], ['adventivnima', '', 'Ppnmdo', 'adventivnima'], ['adventivnima', '', 'Ppnsdd', 'adventivnima'], ['adventivnima', '', 'Ppnsdo', 'adventivnima'], ['adventivnima', '', 'Ppnzdd', 'adventivnima'], ['adventivnima', '', 'Ppnzdo', 'adventivnima'], ['adventivnim', '', 'Ppnmeo', 'adventivnim'], ['adventivnim', '', 'Ppnmmd', 'adventivnim'], ['adventivnim', '', 'Ppnseo', 'adventivnim'], ['adventivnim', '', 'Ppnsmd', 'adventivnim'], ['adventivnim', '', 'Ppnzmd', 'adventivnim'], ['adventivnimi', '', 'Ppnmmo', 'adventivnimi'], ['adventivnimi', '', 'Ppnsmo', 'adventivnimi'], ['adventivnimi', '', 'Ppnzmo', 'adventivnimi'], ['adventivno', '', 'Ppnsei', 'adventivno'], ['adventivno', '', 'Ppnset', 'adventivno'], ['adventivno', '', 'Ppnzeo', 'adventivno'], ['adventivno', '', 'Ppnzet', 'adventivno'], ['adventna', '', 'Ppnmdi', 'adventna'], ['adventna', '', 'Ppnmdt', 'adventna'], ['adventna', '', 'Ppnsmi', 'adventna'], ['adventna', '', 'Ppnsmt', 'adventna'], ['adventna', '', 'Ppnzei', 'adventna'], ['adventne', '', 'Ppnmmt', 'adventne'], ['adventne', '', 'Ppnzer', 'adventne'], ['adventne', '', 'Ppnzmi', 'adventne'], ['adventne', '', 'Ppnzmt', 'adventne'], ['adventnega', '', 'Ppnmer', 'adventnega'], ['adventnega', '', 'Ppnmet', 'adventnega'], ['adventnega', '', 'Ppnser', 'adventnega'], ['adventnem', '', 'Ppnmem', 'adventnem'], ['adventnem', '', 'Ppnsem', 'adventnem'], ['adventnemu', '', 'Ppnmed', 'adventnemu'], ['adventnemu', '', 'Ppnsed', 'adventnemu'], ['adventni', '', 'Ppnmeid', 'adventni'], ['adventni', '', 'Ppnmetd', 'adventni'], ['adventni', '', 'Ppnmmi', 'adventni'], ['adventni', '', 'Ppnsdi', 'adventni'], ['adventni', '', 'Ppnsdt', 'adventni'], ['adventni', '', 'Ppnzdi', 'adventni'], ['adventni', '', 'Ppnzdt', 'adventni'], ['adventni', '', 'Ppnzed', 'adventni'], ['adventni', '', 'Ppnzem', 'adventni'], ['adventnih', '', 'Ppnmdm', 'adventnih'], ['adventnih', '', 'Ppnmdr', 'adventnih'], ['adventnih', '', 'Ppnmmm', 'adventnih'], ['adventnih', '', 'Ppnmmr', 'adventnih'], ['adventnih', '', 'Ppnsdm', 'adventnih'], ['adventnih', '', 'Ppnsdr', 'adventnih'], ['adventnih', '', 'Pp
			`]`
			`}`
			`],`
			`"source": [`
			`"print(words.append['nadnaravno', '', 'Ppnsei'])"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"#Words proccesed: 650250\n",`
			`"#Word indeks: 50023\n",`
			`"#Word number: 50023\n",`
			`"\n",`
			`"#done_lexical_entries = 33522\n",`
			`"\n",`
			`"#new_content = data._read_content('sloleks-sl_v1.2.tbl')\n",`
			`"rate = 100000\n",`
			`"start_timer = time.time()\n",`
			`"with open(\"data/new_sloleks/new_sloleks.tab\", \"a\") as myfile:\n",`
			`" for index in range(0, len(new_content), rate):\n",`
			`" if index+rate >= len(new_content):\n",`
			`" words = [[el[0], '', el[2], el[0]] for el in new_content][index:len(new_content)]\n",`
			`" else:\n",`
			`" words = [[el[0], '', el[2], el[0]] for el in new_content][index:index+rate]\n",`
			`" data = Data('l', shuffle_all_inputs=False)\n",`
			`" location_accented_words, accented_words = data.accentuate_word(words, letter_location_model, syllable_location_model, syllabled_letters_location_model,\n",`
			`" letter_location_co_model, syllable_location_co_model, syllabled_letters_location_co_model,\n",`
			`" letter_type_model, syllable_type_model, syllabled_letter_type_model,\n",`
			`" letter_type_co_model, syllable_type_co_model, syllabled_letter_type_co_model,\n",`
			`" dictionary, max_word, max_num_vowels, vowels, accented_vowels, feature_dictionary, syllable_dictionary)\n",`
			`"\n",`
			`" res = ''\n",`
			`" for i in range(index, index + len(words)):\n",`
			`" res += new_content[i][0] + '\\t' + new_content[i][1] + '\\t' + new_content[i][2] + '\\t' \\\n",`
			`" + new_content[i][3][:-1] + '\\t' + location_accented_words[i-index] + '\\t' + accented_words[i-index] + '\\n'\n",`
			`"\n",`
			`" print('Writing data from ' + str(index) + ' onward.')\n",`
			`" end_timer = time.time()\n",`
			`" print(\"Elapsed time: \" + \"{0:.2f}\".format((end_timer - start_timer)/60.0) + \" minutes\")\n",`
			`" myfile.write(res)"`
			`]`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.5.2"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 2`
			`}`