From dbd0b90f92a787cac0b53c53af9f061ba23d9231 Mon Sep 17 00:00:00 2001 From: lkrsnik Date: Thu, 7 Sep 2017 19:32:36 +0200 Subject: [PATCH] Added multiple results and error analysis --- .idea/workspace.xml | 118 ++++---- .../err.txt | 6 + .../gmlog/description | 1 + .../gmlog/diag | 8 + .../gmlog/errors | 259 ++++++++++++++++++ .../gmlog/grami | 39 +++ .../gmlog/input | 0 .../gmlog/local | 26 ++ .../gmlog/output | 8 + .../gmlog/statistics | 0 .../gmlog/xml | 33 +++ .../out.txt | 0 prepare_data.py | 183 ++++++++++++- workbench.py | 34 ++- 14 files changed, 653 insertions(+), 62 deletions(-) create mode 100644 e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/err.txt create mode 100644 e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/description create mode 100644 e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/diag create mode 100644 e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/errors create mode 100644 e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/grami create mode 100644 e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/input create mode 100644 e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/local create mode 100644 e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/output create mode 100644 e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/statistics create mode 100644 e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/xml create mode 100644 e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/out.txt diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 7335d64..5040063 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,16 +2,27 @@ - - + + + + + + - - + + + + + + + + + + - - + @@ -38,11 +49,11 @@ - + - - + + @@ -60,6 +71,16 @@ + + + + + + + + + + @@ -70,13 +91,14 @@ - + - - + + + @@ -161,14 +183,6 @@ - _get_unresonant_silent_consonants - el[0] - max_num_vowels - index - accentuated - create_syll - shuffle_all_inputs - accented _accented size decode_x @@ -191,6 +205,14 @@ test_and_validation_size _accent_classification self.y_train + _additional_letter_attributes + np.random.seed + round + is_vow + self._input_type == 'l' + print + np.eye + allow_shuffle_vector_generation @@ -209,9 +231,10 @@ @@ -236,6 +259,7 @@ + @@ -251,7 +275,6 @@ - @@ -517,7 +540,7 @@ - + @@ -550,16 +573,6 @@ - - - - - - - - - - @@ -840,6 +853,14 @@ + + + + + + + + @@ -888,18 +909,29 @@ - + - - + + + + + + + + + + + + + - - + + @@ -916,15 +948,5 @@ - - - - - - - - - - \ No newline at end of file diff --git a/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/err.txt b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/err.txt new file mode 100644 index 0000000..9f33edf --- /dev/null +++ b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/err.txt @@ -0,0 +1,6 @@ +Using Theano backend. +WARNING (theano.sandbox.cuda): The cuda backend is deprecated and will be removed in the next release (v0.10). Please switch to the gpuarray backend. You can get more information about how to switch at this URL: + https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29 + +WARNING (theano.sandbox.cuda): CUDA is installed, but device gpu is not available (error: Unable to get the number of gpus available: no CUDA-capable device is detected) +WARNING (theano.tensor.blas): We did not found a dynamic library into the library_dir of the library we use for blas. If you use ATLAS, make sure to compile it with dynamics library. diff --git a/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/description b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/description new file mode 100644 index 0000000..49ad113 --- /dev/null +++ b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/description @@ -0,0 +1 @@ +&( executable = "workbench.sh" )( stdout = "out.txt" )( stderr = "err.txt" )( cputime = "45000" )( walltime = "45000" )( memory = "12000" )( inputfiles = ( "workbench.py" ".412054917326" ) ( "prepare_data.py" ".422203461399818" ) ( "content_shuffle_vector.h5" "124536" ) ( "shuffle_vector_test.h5" "418608" ) ( "shuffle_vector_train.h5" "3443352" ) ( "shuffle_vector_validate.h5" "435920" ) ( "SlovarIJS_BESEDE_utf8.lex" "29360549" ) ( "40_epoch.h5" ".04294967295" ) ( "40_epoch_history.pkl" ".04294967295" ) ( "notes" ".791623890922" ) ( "workbench.sh" ".1433001277835" ) )( executables = "workbench.sh" )( outputfiles = ( "40_epoch.h5" "" ) ( "workbench.py" "" ) ( "workbench.sh" "" ) ( "40_epoch_history.pkl" "" ) ( "notes" "" ) ( "out.txt" "" ) ( "err.txt" "" ) ( "gmlog" "" ) )( queue = "gridlong_nsc" )( runtimeenvironment = "APPS/BASE/THEANO-GPU-0.9" )( gmlog = "gmlog" )( jobname = "accetuation_nn" )( action = "request" )( clientsoftware = "libarccompute-5.0.5" )( clientxrsl = "&( jobname = ""accetuation_nn"" )( executable = ""workbench.sh"" )( inputfiles = ( ""workbench.py"" """" ) ( ""prepare_data.py"" """" ) ( ""content_shuffle_vector.h5"" ""cnn/internal_representations/inputs/content_shuffle_vector.h5"" ) ( ""shuffle_vector_test.h5"" ""cnn/internal_representations/inputs/shuffle_vector_test.h5"" ) ( ""shuffle_vector_train.h5"" ""cnn/internal_representations/inputs/shuffle_vector_train.h5"" ) ( ""shuffle_vector_validate.h5"" ""cnn/internal_representations/inputs/shuffle_vector_validate.h5"" ) ( ""SlovarIJS_BESEDE_utf8.lex"" ""data/SlovarIJS_BESEDE_utf8.lex"" ) ( ""40_epoch.h5"" """" ) ( ""40_epoch_history.pkl"" """" ) ( ""notes"" """" ) )( outputfiles = ( ""40_epoch.h5"" """" ) ( ""workbench.py"" """" ) ( ""workbench.sh"" """" ) ( ""40_epoch_history.pkl"" """" ) ( ""notes"" """" ) )( stdout = ""out.txt"" )( stderr = ""err.txt"" )( gmlog = ""gmlog"" )( runtimeenvironment = ""APPS/BASE/THEANO-GPU-0.9"" )( gridtime = ""750"" )( memory = ""12000"" )" )( hostname = "luka-laptop" )( savestate = "yes" ) \ No newline at end of file diff --git a/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/diag b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/diag new file mode 100644 index 0000000..141e4b3 --- /dev/null +++ b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/diag @@ -0,0 +1,8 @@ +runtimeenvironments=APPS/BASE/THEANO-GPU-0.9; + +nodename=nsc-fp006.ijs.si +WallTime=45010s +Processors=1 +UserTime=45010s +LRMSStartTime=20170906082225Z +LRMSEndTime=20170906205235Z diff --git a/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/errors b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/errors new file mode 100644 index 0000000..1e834ac --- /dev/null +++ b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/errors @@ -0,0 +1,259 @@ +2017-09-06T08:21:51Z Job state change UNDEFINED -> ACCEPTED Reason: (Re)Accepting new job +2017-09-06T08:21:51Z Job state change ACCEPTED -> PREPARING Reason: Starting job processing +2017-09-06T08:22:24Z Job state change PREPARING -> SUBMIT Reason: Pre-staging finished, passing job to LRMS +----- starting submit_slurm_job ----- +SLURM jobname: accetuation_nn +SLURM job script built +SLURM script follows: +------------------------------------------------------------------- +#!/bin/bash -l +# SLURM batch job script built by grid-manager +#SBATCH --no-requeue +#SBATCH -e /net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm.comment +#SBATCH -o /net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm.comment + +#SBATCH -p gridlong +#SBATCH --nice=50 +#SBATCH -J 'accetuation_nn' +#SBATCH --get-user-env=10L +#SBATCH -n 1 +#SBATCH --constraint=gpu --gres=gpu:1 +#SBATCH -t 750:0 +#SBATCH -t 750:0 +#SBATCH --mem-per-cpu=12000 +# run singularity image if RTE with singularity is required +if [ -z $SINGULARITY_CONTAINER ]; then +exec /bin/singularity exec -B /var/spool/slurm,/cvmfs,/net/hold/data1,/data1,/data1/slurm,/home,/usr/lib64/nvidia /net/hold/data1/singularity-images/theano-gpu-2.img $0 +fi + +# Overide umask of execution node (sometime values are really strange) +umask 077 + +# source with arguments for DASH shells +sourcewithargs() { +script=$1 +shift +. $script +} +# Setting environment variables as specified by user +export 'GRID_GLOBAL_JOBID=gsiftp://nsc.ijs.si:2811/jobs/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm' + +RUNTIME_JOB_DIR=/net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm +RUNTIME_JOB_STDIN=/dev/null +RUNTIME_JOB_STDOUT=/net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/out.txt +RUNTIME_JOB_STDERR=/net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/err.txt +RUNTIME_JOB_DIAG=/net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm.diag +if [ ! -z "$RUNTIME_GRIDAREA_DIR" ] ; then + RUNTIME_JOB_DIR=$RUNTIME_GRIDAREA_DIR/`basename $RUNTIME_JOB_DIR` + RUNTIME_JOB_STDIN=`echo "$RUNTIME_JOB_STDIN" | sed "s#^$RUNTIME_JOB_DIR#$RUNTIME_GRIDAREA_DIR#"` + RUNTIME_JOB_STDOUT=`echo "$RUNTIME_JOB_STDOUT" | sed "s#^$RUNTIME_JOB_DIR#$RUNTIME_GRIDAREA_DIR#"` + RUNTIME_JOB_STDERR=`echo "$RUNTIME_JOB_STDERR" | sed "s#^$RUNTIME_JOB_DIR#$RUNTIME_GRIDAREA_DIR#"` + RUNTIME_JOB_DIAG=`echo "$RUNTIME_JOB_DIAG" | sed "s#^$RUNTIME_JOB_DIR#$RUNTIME_GRIDAREA_DIR#"` + RUNTIME_CONTROL_DIR=`echo "$RUNTIME_CONTROL_DIR" | sed "s#^$RUNTIME_JOB_DIR#$RUNTIME_GRIDAREA_DIR#"` +fi +RUNTIME_LOCAL_SCRATCH_DIR=${RUNTIME_LOCAL_SCRATCH_DIR:-$WORKDIR} +RUNTIME_FRONTEND_SEES_NODE=${RUNTIME_FRONTEND_SEES_NODE:-} +RUNTIME_NODE_SEES_FRONTEND=${RUNTIME_NODE_SEES_FRONTEND:-yes} + if [ ! -z "$RUNTIME_LOCAL_SCRATCH_DIR" ] && [ ! -z "$RUNTIME_NODE_SEES_FRONTEND" ]; then + RUNTIME_NODE_JOB_DIR="$RUNTIME_LOCAL_SCRATCH_DIR"/`basename "$RUNTIME_JOB_DIR"` + rm -rf "$RUNTIME_NODE_JOB_DIR" + mkdir -p "$RUNTIME_NODE_JOB_DIR" + # move directory contents + for f in "$RUNTIME_JOB_DIR"/.* "$RUNTIME_JOB_DIR"/*; do + [ "$f" = "$RUNTIME_JOB_DIR/*" ] && continue # glob failed, no files + [ "$f" = "$RUNTIME_JOB_DIR/." ] && continue + [ "$f" = "$RUNTIME_JOB_DIR/.." ] && continue + [ "$f" = "$RUNTIME_JOB_DIR/.diag" ] && continue + [ "$f" = "$RUNTIME_JOB_DIR/.comment" ] && continue + if ! mv "$f" "$RUNTIME_NODE_JOB_DIR"; then + echo "Failed to move '$f' to '$RUNTIME_NODE_JOB_DIR'" 1>&2 + exit 1 + fi + done + if [ ! -z "$RUNTIME_FRONTEND_SEES_NODE" ] ; then + # creating link for whole directory + ln -s "$RUNTIME_FRONTEND_SEES_NODE"/`basename "$RUNTIME_JOB_DIR"` "$RUNTIME_JOB_DIR" + else + # keep stdout, stderr and control directory on frontend + # recreate job directory + mkdir -p "$RUNTIME_JOB_DIR" + # make those files + mkdir -p `dirname "$RUNTIME_JOB_STDOUT"` + mkdir -p `dirname "$RUNTIME_JOB_STDERR"` + touch "$RUNTIME_JOB_STDOUT" + touch "$RUNTIME_JOB_STDERR" + RUNTIME_JOB_STDOUT__=`echo "$RUNTIME_JOB_STDOUT" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"` + RUNTIME_JOB_STDERR__=`echo "$RUNTIME_JOB_STDERR" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"` + rm "$RUNTIME_JOB_STDOUT__" 2>/dev/null + rm "$RUNTIME_JOB_STDERR__" 2>/dev/null + if [ ! -z "$RUNTIME_JOB_STDOUT__" ] && [ "$RUNTIME_JOB_STDOUT" != "$RUNTIME_JOB_STDOUT__" ]; then + ln -s "$RUNTIME_JOB_STDOUT" "$RUNTIME_JOB_STDOUT__" + fi + if [ "$RUNTIME_JOB_STDOUT__" != "$RUNTIME_JOB_STDERR__" ] ; then + if [ ! -z "$RUNTIME_JOB_STDERR__" ] && [ "$RUNTIME_JOB_STDERR" != "$RUNTIME_JOB_STDERR__" ]; then + ln -s "$RUNTIME_JOB_STDERR" "$RUNTIME_JOB_STDERR__" + fi + fi + if [ ! -z "$RUNTIME_CONTROL_DIR" ] ; then + # move control directory back to frontend + RUNTIME_CONTROL_DIR__=`echo "$RUNTIME_CONTROL_DIR" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"` + mv "$RUNTIME_CONTROL_DIR__" "$RUNTIME_CONTROL_DIR" + fi + fi + # adjust stdin,stdout & stderr pointers + RUNTIME_JOB_STDIN=`echo "$RUNTIME_JOB_STDIN" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"` + RUNTIME_JOB_STDOUT=`echo "$RUNTIME_JOB_STDOUT" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"` + RUNTIME_JOB_STDERR=`echo "$RUNTIME_JOB_STDERR" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"` + RUNTIME_FRONTEND_JOB_DIR="$RUNTIME_JOB_DIR" + RUNTIME_JOB_DIR="$RUNTIME_NODE_JOB_DIR" + fi + if [ -z "$RUNTIME_NODE_SEES_FRONTEND" ] ; then + mkdir -p "$RUNTIME_JOB_DIR" + fi + +RESULT=0 + +if [ "$RESULT" = '0' ] ; then +# Running runtime scripts +export RUNTIME_CONFIG_DIR=${RUNTIME_CONFIG_DIR:-/net/hold/data1/arc/runtime/} +runtimeenvironments= +if [ ! -z "$RUNTIME_CONFIG_DIR" ] ; then + if [ -r "${RUNTIME_CONFIG_DIR}/APPS/BASE/THEANO-GPU-0.9" ] ; then + runtimeenvironments="${runtimeenvironments}APPS/BASE/THEANO-GPU-0.9;" + cmdl=${RUNTIME_CONFIG_DIR}/APPS/BASE/THEANO-GPU-0.9 + sourcewithargs $cmdl 1 + if [ $? -ne '0' ] ; then + echo "Runtime APPS/BASE/THEANO-GPU-0.9 script failed " 1>&2 + echo "Runtime APPS/BASE/THEANO-GPU-0.9 script failed " 1>"$RUNTIME_JOB_DIAG" + exit 1 + fi + fi +fi + +echo "runtimeenvironments=$runtimeenvironments" >> "$RUNTIME_JOB_DIAG" +if [ ! "X$SLURM_NODEFILE" = 'X' ] ; then + if [ -r "$SLURM_NODEFILE" ] ; then + cat "$SLURM_NODEFILE" | sed 's/\(.*\)/nodename=\1/' >> "$RUNTIME_JOB_DIAG" + NODENAME_WRITTEN="1" + else + SLURM_NODEFILE= + fi +fi +if [ "$RESULT" = '0' ] ; then + # Changing to session directory + HOME=$RUNTIME_JOB_DIR + export HOME + if ! cd "$RUNTIME_JOB_DIR"; then + echo "Failed to switch to '$RUNTIME_JOB_DIR'" 1>&2 + RESULT=1 + fi + if [ ! -z "$RESULT" ] && [ "$RESULT" != 0 ]; then + exit $RESULT + fi +nodename=`/bin/hostname -f` +echo "nodename=$nodename" >> "$RUNTIME_JOB_DIAG" +echo "Processors=1" >> "$RUNTIME_JOB_DIAG" +executable='./workbench.sh' +# Check if executable exists +if [ ! -f "$executable" ]; +then + echo "Path \"$executable\" does not seem to exist" 1>$RUNTIME_JOB_STDOUT 2>$RUNTIME_JOB_STDERR 1>&2 + exit 1 +fi +# See if executable is a script, and extract the name of the interpreter +line1=`dd if="$executable" count=1 2>/dev/null | head -n 1` +command=`echo $line1 | sed -n 's/^#! *//p'` +interpreter=`echo $command | awk '{print $1}'` +if [ "$interpreter" = /usr/bin/env ]; then interpreter=`echo $command | awk '{print $2}'`; fi +# If it's a script and the interpreter is not found ... +[ "x$interpreter" = x ] || type "$interpreter" > /dev/null 2>&1 || { + + echo "Cannot run $executable: $interpreter: not found" 1>$RUNTIME_JOB_STDOUT 2>$RUNTIME_JOB_STDERR 1>&2 + exit 1; } +GNU_TIME='/usr/bin/time' +if [ ! -z "$GNU_TIME" ] && ! "$GNU_TIME" --version >/dev/null 2>&1; then + echo "WARNING: GNU time not found at: $GNU_TIME" 2>&1; + GNU_TIME= +fi + +if [ -z "$GNU_TIME" ] ; then + "./workbench.sh" <$RUNTIME_JOB_STDIN 1>$RUNTIME_JOB_STDOUT 2>$RUNTIME_JOB_STDERR +else + $GNU_TIME -o "$RUNTIME_JOB_DIAG" -a -f 'WallTime=%es\nKernelTime=%Ss\nUserTime=%Us\nCPUUsage=%P\nMaxResidentMemory=%MkB\nAverageResidentMemory=%tkB\nAverageTotalMemory=%KkB\nAverageUnsharedMemory=%DkB\nAverageUnsharedStack=%pkB\nAverageSharedMemory=%XkB\nPageSize=%ZB\nMajorPageFaults=%F\nMinorPageFaults=%R\nSwaps=%W\nForcedSwitches=%c\nWaitSwitches=%w\nInputs=%I\nOutputs=%O\nSocketReceived=%r\nSocketSent=%s\nSignals=%k\n' "./workbench.sh" <$RUNTIME_JOB_STDIN 1>$RUNTIME_JOB_STDOUT 2>$RUNTIME_JOB_STDERR + +fi +RESULT=$? + +fi +fi +if [ ! -z "$RUNTIME_CONFIG_DIR" ] ; then + if [ -r "${RUNTIME_CONFIG_DIR}/APPS/BASE/THEANO-GPU-0.9" ] ; then + cmdl=${RUNTIME_CONFIG_DIR}/APPS/BASE/THEANO-GPU-0.9 + sourcewithargs $cmdl 2 + fi +fi + +if [ ! -z "$RUNTIME_LOCAL_SCRATCH_DIR" ] ; then + find ./ -type l -exec rm -f "{}" ";" + find ./ -type f -exec chmod u+w "{}" ";" + chmod -R u-w "$RUNTIME_JOB_DIR"/'40_epoch.h5' 2>/dev/null + chmod -R u-w "$RUNTIME_JOB_DIR"/'workbench.py' 2>/dev/null + chmod -R u-w "$RUNTIME_JOB_DIR"/'workbench.sh' 2>/dev/null + chmod -R u-w "$RUNTIME_JOB_DIR"/'40_epoch_history.pkl' 2>/dev/null + chmod -R u-w "$RUNTIME_JOB_DIR"/'notes' 2>/dev/null + chmod -R u-w "$RUNTIME_JOB_DIR"/'out.txt' 2>/dev/null + chmod -R u-w "$RUNTIME_JOB_DIR"/'err.txt' 2>/dev/null + chmod -R u-w "$RUNTIME_JOB_DIR"/'gmlog' 2>/dev/null + find ./ -type f -perm /200 -exec rm -f "{}" ";" + find ./ -type f -exec chmod u+w "{}" ";" +fi + + if [ ! -z "$RUNTIME_LOCAL_SCRATCH_DIR" ] && [ ! -z "$RUNTIME_NODE_SEES_FRONTEND" ]; then + if [ ! -z "$RUNTIME_FRONTEND_SEES_NODE" ] ; then + # just move it + rm -rf "$RUNTIME_FRONTEND_JOB_DIR" + destdir=`dirname "$RUNTIME_FRONTEND_JOB_DIR"` + if ! mv "$RUNTIME_NODE_JOB_DIR" "$destdir"; then + echo "Failed to move '$RUNTIME_NODE_JOB_DIR' to '$destdir'" 1>&2 + RESULT=1 + fi + else + # remove links + rm -f "$RUNTIME_JOB_STDOUT" 2>/dev/null + rm -f "$RUNTIME_JOB_STDERR" 2>/dev/null + # move directory contents + for f in "$RUNTIME_NODE_JOB_DIR"/.* "$RUNTIME_NODE_JOB_DIR"/*; do + [ "$f" = "$RUNTIME_NODE_JOB_DIR/*" ] && continue # glob failed, no files + [ "$f" = "$RUNTIME_NODE_JOB_DIR/." ] && continue + [ "$f" = "$RUNTIME_NODE_JOB_DIR/.." ] && continue + [ "$f" = "$RUNTIME_NODE_JOB_DIR/.diag" ] && continue + [ "$f" = "$RUNTIME_NODE_JOB_DIR/.comment" ] && continue + if ! mv "$f" "$RUNTIME_FRONTEND_JOB_DIR"; then + echo "Failed to move '$f' to '$RUNTIME_FRONTEND_JOB_DIR'" 1>&2 + RESULT=1 + fi + done + rm -rf "$RUNTIME_NODE_JOB_DIR" + fi + fi + echo "exitcode=$RESULT" >> "$RUNTIME_JOB_DIAG" + exit $RESULT +------------------------------------------------------------------- + +job submitted successfully! +local job id: 734035 +----- exiting submit_slurm_job ----- + +2017-09-06T08:22:24Z Job state change SUBMIT -> INLRMS Reason: Job is passed to LRMS +------- Contents of output stream forwarded by the LRMS --------- +WARNING: GNU time not found at: /usr/bin/time +slurmstepd: error: *** JOB 734035 ON nsc-fp006 CANCELLED AT 2017-09-06T22:52:34 DUE TO TIME LIMIT *** +------------------------- End of output ------------------------- +2017-09-06T20:55:12Z Job state change INLRMS -> FINISHING Reason: Job finished executing in LRMS +2017-09-06T20:56:12Z Job state change FINISHING -> FINISHED Reason: Stage-out finished. +2017-09-06T21:49:02Z Job state change UNDEFINED -> FINISHED Reason: (Re)Accepting new job +2017-09-06T23:49:20Z Job state change UNDEFINED -> FINISHED Reason: (Re)Accepting new job +2017-09-07T01:49:53Z Job state change UNDEFINED -> FINISHED Reason: (Re)Accepting new job +2017-09-07T03:50:36Z Job state change UNDEFINED -> FINISHED Reason: (Re)Accepting new job +2017-09-07T05:50:39Z Job state change UNDEFINED -> FINISHED Reason: (Re)Accepting new job +2017-09-07T07:50:54Z Job state change UNDEFINED -> FINISHED Reason: (Re)Accepting new job diff --git a/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/grami b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/grami new file mode 100644 index 0000000..caf614a --- /dev/null +++ b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/grami @@ -0,0 +1,39 @@ +joboption_directory='/net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm' +joboption_controldir='/var/spool/arc/jobstatus/' +joboption_arg_0='./workbench.sh' +joboption_stdin='/dev/null' +joboption_stdout='/net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/out.txt' +joboption_stderr='/net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/err.txt' +joboption_env_0=GRID_GLOBAL_JOBID='gsiftp://nsc.ijs.si:2811/jobs/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm' +joboption_cputime=45000 +joboption_walltime=45000 +joboption_memory=12000 +joboption_virtualmemory= +joboption_disk= +joboption_count=1 +joboption_runtime_0='APPS/BASE/THEANO-GPU-0.9' +joboption_jobname='accetuation_nn' +joboption_queue='gridlong' +joboption_starttime= +joboption_gridid='e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm' +joboption_priority=50 +joboption_inputfile_0='/workbench.py' +joboption_inputfile_1='/prepare_data.py' +joboption_inputfile_2='/content_shuffle_vector.h5' +joboption_inputfile_3='/shuffle_vector_test.h5' +joboption_inputfile_4='/shuffle_vector_train.h5' +joboption_inputfile_5='/shuffle_vector_validate.h5' +joboption_inputfile_6='/SlovarIJS_BESEDE_utf8.lex' +joboption_inputfile_7='/40_epoch.h5' +joboption_inputfile_8='/40_epoch_history.pkl' +joboption_inputfile_9='/notes' +joboption_inputfile_10='/workbench.sh' +joboption_outputfile_0='/40_epoch.h5' +joboption_outputfile_1='/workbench.py' +joboption_outputfile_2='/workbench.sh' +joboption_outputfile_3='/40_epoch_history.pkl' +joboption_outputfile_4='/notes' +joboption_outputfile_5='/out.txt' +joboption_outputfile_6='/err.txt' +joboption_outputfile_7='/gmlog' +joboption_jobid=734035 diff --git a/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/input b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/input new file mode 100644 index 0000000..e69de29 diff --git a/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/local b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/local new file mode 100644 index 0000000..a6e40fb --- /dev/null +++ b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/local @@ -0,0 +1,26 @@ +globalid=gsiftp://nsc.ijs.si:2811/jobs/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm +headnode=gsiftp://nsc.ijs.si:2811/jobs +interface=org.nordugrid.gridftpjob +lrms=SLURM +queue=gridlong +localid=734035 +args=workbench.sh +argscode=0 +subject=/C=SI/O=SiGNET/O=FRI Uni-Lj/CN=Luka Krsnik +starttime=20170906082151Z +lifetime=604800 +rerun=0 +downloads=0 +uploads=0 +jobname=accetuation_nn +gmlog=gmlog +cleanuptime=20170913205612Z +delegexpiretime=20170906200810Z +clientname=88.200.99.117:59172 +delegationid=b01f4f77dbc8 +sessiondir=/net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm +diskspace=0 +freestagein=no +voms=/gen.vo.sling.si +transfershare=_default +priority=50 diff --git a/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/output b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/output new file mode 100644 index 0000000..3a1d74f --- /dev/null +++ b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/output @@ -0,0 +1,8 @@ +/40_epoch.h5 +/workbench.py +/workbench.sh +/40_epoch_history.pkl +/notes +/out.txt +/err.txt +/gmlog diff --git a/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/statistics b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/statistics new file mode 100644 index 0000000..e69de29 diff --git a/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/xml b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/xml new file mode 100644 index 0000000..3b116bd --- /dev/null +++ b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/gmlog/xml @@ -0,0 +1,33 @@ + + urn:caid:nsc.ijs.si:org.nordugrid.gridftpjob:e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm + accetuation_nn + SubmittedVia=org.nordugrid.gridftpjob + single + urn:idfe:e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm + nordugrid:xrsl + nordugrid:FINISHED + bes:Finished + emies:terminal + emiesattr:client-stageout-possible + /C=SI/O=SiGNET/O=FRI Uni-Lj/CN=Luka Krsnik + gen0006 + 45000 + 45000 + 1 + APPS/BASE/THEANO-GPU-0.9 + /dev/null + out.txt + err.txt + gmlog + nsc-fp006.ijs.si + gridlong + 45010 + 2017-09-06T08:21:51Z + 2017-09-07T07:50:54Z + 2017-09-13T20:56:12Z + 2017-09-06T20:08:10Z + 88.200.99.117 + + urn:ogf:ComputingShare:nsc.ijs.si:gridlong + + diff --git a/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/out.txt b/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/out.txt new file mode 100644 index 0000000..e69de29 diff --git a/prepare_data.py b/prepare_data.py index b3a7a6e..487ad85 100644 --- a/prepare_data.py +++ b/prepare_data.py @@ -801,7 +801,7 @@ class Data: if word[i] == 1: final_word.append(feature_dictionary[z][j][k]) i += 1 - print(u''.join(final_word)) + # print(u''.join(final_word)) return u''.join(final_word) @staticmethod @@ -814,7 +814,188 @@ class Data: i += 1 return res + def test_accuracy(self, predictions, x, x_other_features, y, dictionary, feature_dictionary, vowels, syllable_dictionary=None): + errors = [] + num_of_pred = len(predictions) + num_of_correct_pred = 0 + for i in range(predictions.shape[0]): + if (np.around(predictions[i]) == y[i]).all(): + num_of_correct_pred += 1 + else: + if self._input_type == 'l': + decoded_x = self.decode_x(x[i], dictionary) + else: + decoded_x = self.decode_syllable_x(x[i], syllable_dictionary) + errors.append([i, + decoded_x, + self.decode_x_other_features(feature_dictionary, [x_other_features[i]]), + self.assign_stress_locations(decoded_x, np.around(predictions[i]), vowels, syllables=self._input_type != 'l'), + self.assign_stress_locations(decoded_x, y[i], vowels, syllables=self._input_type != 'l') + ]) + + return (num_of_correct_pred / float(num_of_pred)) * 100, errors + + @staticmethod + def decode_syllable_x(word_encoded, syllable_dictionary): + word = [] + for i in range(len(word_encoded)): + word.append(syllable_dictionary[word_encoded[i]]) + return ''.join(word[::-1]) + + def assign_stress_locations(self, word, y, vowels, syllables=False): + if not syllables: + word_list = list(word) + else: + word_list = list(word)[::-1] + vowel_num = 0 + for i in range(len(word_list)): + if self._is_vowel(word_list, i, vowels): + if word_list[i] == 'a' and y[vowel_num] == 1: + word_list[i] = 'á' + elif word_list[i] == 'e' and y[vowel_num] == 1: + word_list[i] = 'é' + elif word_list[i] == 'i' and y[vowel_num] == 1: + word_list[i] = 'í' + elif word_list[i] == 'o' and y[vowel_num] == 1: + word_list[i] = 'ó' + elif word_list[i] == 'u' and y[vowel_num] == 1: + word_list[i] = 'ú' + elif word_list[i] == 'r' and y[vowel_num] == 1: + word_list[i] = 'ŕ' + elif word_list[i] == 'A' and y[vowel_num] == 1: + word_list[i] = 'Á' + elif word_list[i] == 'E' and y[vowel_num] == 1: + word_list[i] = 'É' + elif word_list[i] == 'I' and y[vowel_num] == 1: + word_list[i] = 'Í' + elif word_list[i] == 'O' and y[vowel_num] == 1: + word_list[i] = 'Ó' + elif word_list[i] == 'U' and y[vowel_num] == 1: + word_list[i] = 'Ú' + elif word_list[i] == 'R' and y[vowel_num] == 1: + word_list[i] = 'Ŕ' + vowel_num += 1 + if not syllables: + return ''.join(word_list) + else: + return ''.join(word_list[::-1]) + + def test_type_accuracy(self, predictions, x, x_other_features, y, dictionary, feature_dictionary, vowels, accented_vowels, + syllable_dictionary=None): + errors = [] + num_of_pred = len(predictions) + num_of_correct_pred = 0 + num_of_correct_pred_words = 0 + accentuation_index = 0 + eye = np.eye(len(accented_vowels), dtype=int) + for i in range(len(y)): + correct_prediction = True + if self._input_type == 'l': + decoded_x = self.decode_x(x[i], dictionary) + else: + decoded_x = self.decode_syllable_x(x[i], syllable_dictionary) + wrong_word = decoded_x + correct_word = decoded_x + + for j in range(len(y[i])): + if y[i][j] > 0: + # ERROR AS IT IS CALCULATED + # arounded_predictions = np.around(predictions[accentuation_index]).astype(int) + + # MAX ELEMENT ONLY + # arounded_predictions = np.zeros(len(predictions[accentuation_index])) + # arounded_predictions[np.argmax(predictions[accentuation_index]).astype(int)] = 1 + + # MAX ELEMENT AMONGT POSSIBLE ONES + # if i == 313: + # print(decoded_x) + stressed_letter = self.get_accentuated_letter(decoded_x, j, vowels, syllables=self._input_type != 'l') + possible_places = np.zeros(len(predictions[accentuation_index])) + if stressed_letter == 'r': + possible_places[0] = 1 + elif stressed_letter == 'a': + possible_places[1] = 1 + possible_places[2] = 1 + elif stressed_letter == 'e': + possible_places[3] = 1 + possible_places[4] = 1 + possible_places[5] = 1 + elif stressed_letter == 'i': + possible_places[6] = 1 + possible_places[7] = 1 + elif stressed_letter == 'o': + possible_places[8] = 1 + possible_places[9] = 1 + possible_places[10] = 1 + elif stressed_letter == 'u': + possible_places[11] = 1 + possible_places[12] = 1 + possible_predictions = predictions[accentuation_index] * possible_places + + arounded_predictions = np.zeros(len(predictions[accentuation_index]), dtype=int) + arounded_predictions[np.argmax(possible_predictions).astype(int)] = 1 + + wrong_word = self.assign_word_accentuation_type(wrong_word, j, arounded_predictions, vowels, accented_vowels, + syllables=self._input_type != 'l', debug=i == 313) + correct_word = self.assign_word_accentuation_type(correct_word, j, eye[int(y[i][j])], vowels, accented_vowels, + syllables=self._input_type != 'l', debug=i == 313) + + if (eye[int(y[i][j])] == arounded_predictions).all(): + num_of_correct_pred += 1 + else: + correct_prediction = False + accentuation_index += 1 + + if correct_prediction: + num_of_correct_pred_words += 1 + else: + if self._input_type == 'l': + errors.append([i, + decoded_x[::-1], + self.decode_x_other_features(feature_dictionary, [x_other_features[i]]), + wrong_word[::-1], + correct_word[::-1] + ]) + else: + errors.append([i, + decoded_x, + self.decode_x_other_features(feature_dictionary, [x_other_features[i]]), + wrong_word, + correct_word + ]) + return (num_of_correct_pred / float(num_of_pred)) * 100, (num_of_correct_pred_words / float(len(y))) * 100, errors + + def get_accentuated_letter(self, word, location, vowels, syllables=False, debug=False): + # print(location) + vowel_index = 0 + word_list = list(word) + if not syllables: + word_list = list(word) + else: + word_list = list(word[::-1]) + for i in range(len(word_list)): + if self._is_vowel(word_list, i, vowels): + if location == vowel_index: + return word_list[i] + vowel_index += 1 + + def assign_word_accentuation_type(self, word, location, y, vowels, accented_vowels, syllables=False, debug=False): + vowel_index = 0 + if not syllables: + word_list = list(word) + else: + word_list = list(word[::-1]) + for i in range(len(word_list)): + if self._is_vowel(word_list, i, vowels): + if location == vowel_index: + if len(np.where(y == 1)[0]) == 1: + word_list[i] = accented_vowels[np.where(y == 1)[0][0]] + vowel_index += 1 + if not syllables: + return ''.join(word_list) + else: + return ''.join(word_list[::-1]) # def count_vowels(content, vowels): # num_all_vowels = 0 diff --git a/workbench.py b/workbench.py index b64a6f9..0eb7081 100644 --- a/workbench.py +++ b/workbench.py @@ -31,30 +31,40 @@ from prepare_data import * # data = Data('l', save_generated_data=False, number_of_syllables=True) # syllabled letters -data = Data('l', save_generated_data=False, accent_classification=True) +data = Data('s', save_generated_data=False, accent_classification=True) data.generate_data('letters_word_accetuation_train', 'letters_word_accetuation_test', 'letters_word_accetuation_validate', content_name='SlovarIJS_BESEDE_utf8.lex', content_shuffle_vector='content_shuffle_vector', shuffle_vector='shuffle_vector', inputs_location='', content_location='') +# concatenate test and train data +# data.x_train = np.concatenate((data.x_train, data.x_test), axis=0) +# data.x_other_features_train = np.concatenate((data.x_other_features_train, data.x_other_features_test), axis=0) +# data.y_train = np.concatenate((data.y_train, data.y_test), axis=0) + +# concatenate all data +data.x_train = np.concatenate((data.x_train, data.x_test, data.x_validate), axis=0) +data.x_other_features_train = np.concatenate((data.x_other_features_train, data.x_other_features_test, data.x_other_features_validate), axis=0) +data.y_train = np.concatenate((data.y_train, data.y_test, data.y_validate), axis=0) num_examples = len(data.x_train) # training set size nn_output_dim = 13 nn_hdim = 516 batch_size = 16 # actual_epoch = 1 -actual_epoch = 40 +actual_epoch = 20 # num_fake_epoch = 2 num_fake_epoch = 20 - - # letters -conv_input_shape=(23, 36) +# conv_input_shape=(23, 36) # syllabled letters -# conv_input_shape=(10, 5168) +# conv_input_shape=(10, 252) + +# syllables +conv_input_shape=(10, 5168) # othr_input = (140, ) @@ -62,11 +72,11 @@ othr_input = (150, ) conv_input = Input(shape=conv_input_shape, name='conv_input') # letters -x_conv = Conv1D(115, (3), padding='same', activation='relu')(conv_input) -x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv) +# x_conv = Conv1D(115, (3), padding='same', activation='relu')(conv_input) +# x_conv = Conv1D(46, (3), padding='same', activation='relu')(x_conv) # syllabled letters -# x_conv = Conv1D(200, (2), padding='same', activation='relu')(conv_input) +x_conv = Conv1D(200, (2), padding='same', activation='relu')(conv_input) x_conv = MaxPooling1D(pool_size=2)(x_conv) x_conv = Flatten()(x_conv) @@ -76,9 +86,9 @@ x = concatenate([x_conv, othr_input]) # x = Dense(1024, input_dim=(516 + 256), activation='relu')(x) x = Dense(256, activation='relu')(x) x = Dropout(0.3)(x) -x = Dense(512, activation='relu')(x) +x = Dense(256, activation='relu')(x) x = Dropout(0.3)(x) -x = Dense(512, activation='relu')(x) +x = Dense(256, activation='relu')(x) x = Dropout(0.3)(x) x = Dense(nn_output_dim, activation='sigmoid')(x) @@ -94,8 +104,6 @@ model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[actual_accurac history = model.fit_generator(data.generator('train', batch_size, content_name='SlovarIJS_BESEDE_utf8.lex', content_location=''), data.x_train.shape[0]/(batch_size * num_fake_epoch), epochs=actual_epoch*num_fake_epoch, - validation_data=data.generator('test', batch_size, content_name='SlovarIJS_BESEDE_utf8.lex', content_location=''), - validation_steps=data.x_test.shape[0]/(batch_size * num_fake_epoch), verbose=2 )