2017-09-06T08:21:51Z Job state change UNDEFINED -> ACCEPTED Reason: (Re)Accepting new job 2017-09-06T08:21:51Z Job state change ACCEPTED -> PREPARING Reason: Starting job processing 2017-09-06T08:22:24Z Job state change PREPARING -> SUBMIT Reason: Pre-staging finished, passing job to LRMS ----- starting submit_slurm_job ----- SLURM jobname: accetuation_nn SLURM job script built SLURM script follows: ------------------------------------------------------------------- #!/bin/bash -l # SLURM batch job script built by grid-manager #SBATCH --no-requeue #SBATCH -e /net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm.comment #SBATCH -o /net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm.comment #SBATCH -p gridlong #SBATCH --nice=50 #SBATCH -J 'accetuation_nn' #SBATCH --get-user-env=10L #SBATCH -n 1 #SBATCH --constraint=gpu --gres=gpu:1 #SBATCH -t 750:0 #SBATCH -t 750:0 #SBATCH --mem-per-cpu=12000 # run singularity image if RTE with singularity is required if [ -z $SINGULARITY_CONTAINER ]; then exec /bin/singularity exec -B /var/spool/slurm,/cvmfs,/net/hold/data1,/data1,/data1/slurm,/home,/usr/lib64/nvidia /net/hold/data1/singularity-images/theano-gpu-2.img $0 fi # Overide umask of execution node (sometime values are really strange) umask 077 # source with arguments for DASH shells sourcewithargs() { script=$1 shift . $script } # Setting environment variables as specified by user export 'GRID_GLOBAL_JOBID=gsiftp://nsc.ijs.si:2811/jobs/e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm' RUNTIME_JOB_DIR=/net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm RUNTIME_JOB_STDIN=/dev/null RUNTIME_JOB_STDOUT=/net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/out.txt RUNTIME_JOB_STDERR=/net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm/err.txt RUNTIME_JOB_DIAG=/net/hold/data1/arc/session//e1PMDmzlk9qnOeFSGmVnjcgoABFKDmABFKDmZmMKDmABFKDmnXN6sm.diag if [ ! -z "$RUNTIME_GRIDAREA_DIR" ] ; then RUNTIME_JOB_DIR=$RUNTIME_GRIDAREA_DIR/`basename $RUNTIME_JOB_DIR` RUNTIME_JOB_STDIN=`echo "$RUNTIME_JOB_STDIN" | sed "s#^$RUNTIME_JOB_DIR#$RUNTIME_GRIDAREA_DIR#"` RUNTIME_JOB_STDOUT=`echo "$RUNTIME_JOB_STDOUT" | sed "s#^$RUNTIME_JOB_DIR#$RUNTIME_GRIDAREA_DIR#"` RUNTIME_JOB_STDERR=`echo "$RUNTIME_JOB_STDERR" | sed "s#^$RUNTIME_JOB_DIR#$RUNTIME_GRIDAREA_DIR#"` RUNTIME_JOB_DIAG=`echo "$RUNTIME_JOB_DIAG" | sed "s#^$RUNTIME_JOB_DIR#$RUNTIME_GRIDAREA_DIR#"` RUNTIME_CONTROL_DIR=`echo "$RUNTIME_CONTROL_DIR" | sed "s#^$RUNTIME_JOB_DIR#$RUNTIME_GRIDAREA_DIR#"` fi RUNTIME_LOCAL_SCRATCH_DIR=${RUNTIME_LOCAL_SCRATCH_DIR:-$WORKDIR} RUNTIME_FRONTEND_SEES_NODE=${RUNTIME_FRONTEND_SEES_NODE:-} RUNTIME_NODE_SEES_FRONTEND=${RUNTIME_NODE_SEES_FRONTEND:-yes} if [ ! -z "$RUNTIME_LOCAL_SCRATCH_DIR" ] && [ ! -z "$RUNTIME_NODE_SEES_FRONTEND" ]; then RUNTIME_NODE_JOB_DIR="$RUNTIME_LOCAL_SCRATCH_DIR"/`basename "$RUNTIME_JOB_DIR"` rm -rf "$RUNTIME_NODE_JOB_DIR" mkdir -p "$RUNTIME_NODE_JOB_DIR" # move directory contents for f in "$RUNTIME_JOB_DIR"/.* "$RUNTIME_JOB_DIR"/*; do [ "$f" = "$RUNTIME_JOB_DIR/*" ] && continue # glob failed, no files [ "$f" = "$RUNTIME_JOB_DIR/." ] && continue [ "$f" = "$RUNTIME_JOB_DIR/.." ] && continue [ "$f" = "$RUNTIME_JOB_DIR/.diag" ] && continue [ "$f" = "$RUNTIME_JOB_DIR/.comment" ] && continue if ! mv "$f" "$RUNTIME_NODE_JOB_DIR"; then echo "Failed to move '$f' to '$RUNTIME_NODE_JOB_DIR'" 1>&2 exit 1 fi done if [ ! -z "$RUNTIME_FRONTEND_SEES_NODE" ] ; then # creating link for whole directory ln -s "$RUNTIME_FRONTEND_SEES_NODE"/`basename "$RUNTIME_JOB_DIR"` "$RUNTIME_JOB_DIR" else # keep stdout, stderr and control directory on frontend # recreate job directory mkdir -p "$RUNTIME_JOB_DIR" # make those files mkdir -p `dirname "$RUNTIME_JOB_STDOUT"` mkdir -p `dirname "$RUNTIME_JOB_STDERR"` touch "$RUNTIME_JOB_STDOUT" touch "$RUNTIME_JOB_STDERR" RUNTIME_JOB_STDOUT__=`echo "$RUNTIME_JOB_STDOUT" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"` RUNTIME_JOB_STDERR__=`echo "$RUNTIME_JOB_STDERR" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"` rm "$RUNTIME_JOB_STDOUT__" 2>/dev/null rm "$RUNTIME_JOB_STDERR__" 2>/dev/null if [ ! -z "$RUNTIME_JOB_STDOUT__" ] && [ "$RUNTIME_JOB_STDOUT" != "$RUNTIME_JOB_STDOUT__" ]; then ln -s "$RUNTIME_JOB_STDOUT" "$RUNTIME_JOB_STDOUT__" fi if [ "$RUNTIME_JOB_STDOUT__" != "$RUNTIME_JOB_STDERR__" ] ; then if [ ! -z "$RUNTIME_JOB_STDERR__" ] && [ "$RUNTIME_JOB_STDERR" != "$RUNTIME_JOB_STDERR__" ]; then ln -s "$RUNTIME_JOB_STDERR" "$RUNTIME_JOB_STDERR__" fi fi if [ ! -z "$RUNTIME_CONTROL_DIR" ] ; then # move control directory back to frontend RUNTIME_CONTROL_DIR__=`echo "$RUNTIME_CONTROL_DIR" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"` mv "$RUNTIME_CONTROL_DIR__" "$RUNTIME_CONTROL_DIR" fi fi # adjust stdin,stdout & stderr pointers RUNTIME_JOB_STDIN=`echo "$RUNTIME_JOB_STDIN" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"` RUNTIME_JOB_STDOUT=`echo "$RUNTIME_JOB_STDOUT" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"` RUNTIME_JOB_STDERR=`echo "$RUNTIME_JOB_STDERR" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"` RUNTIME_FRONTEND_JOB_DIR="$RUNTIME_JOB_DIR" RUNTIME_JOB_DIR="$RUNTIME_NODE_JOB_DIR" fi if [ -z "$RUNTIME_NODE_SEES_FRONTEND" ] ; then mkdir -p "$RUNTIME_JOB_DIR" fi RESULT=0 if [ "$RESULT" = '0' ] ; then # Running runtime scripts export RUNTIME_CONFIG_DIR=${RUNTIME_CONFIG_DIR:-/net/hold/data1/arc/runtime/} runtimeenvironments= if [ ! -z "$RUNTIME_CONFIG_DIR" ] ; then if [ -r "${RUNTIME_CONFIG_DIR}/APPS/BASE/THEANO-GPU-0.9" ] ; then runtimeenvironments="${runtimeenvironments}APPS/BASE/THEANO-GPU-0.9;" cmdl=${RUNTIME_CONFIG_DIR}/APPS/BASE/THEANO-GPU-0.9 sourcewithargs $cmdl 1 if [ $? -ne '0' ] ; then echo "Runtime APPS/BASE/THEANO-GPU-0.9 script failed " 1>&2 echo "Runtime APPS/BASE/THEANO-GPU-0.9 script failed " 1>"$RUNTIME_JOB_DIAG" exit 1 fi fi fi echo "runtimeenvironments=$runtimeenvironments" >> "$RUNTIME_JOB_DIAG" if [ ! "X$SLURM_NODEFILE" = 'X' ] ; then if [ -r "$SLURM_NODEFILE" ] ; then cat "$SLURM_NODEFILE" | sed 's/\(.*\)/nodename=\1/' >> "$RUNTIME_JOB_DIAG" NODENAME_WRITTEN="1" else SLURM_NODEFILE= fi fi if [ "$RESULT" = '0' ] ; then # Changing to session directory HOME=$RUNTIME_JOB_DIR export HOME if ! cd "$RUNTIME_JOB_DIR"; then echo "Failed to switch to '$RUNTIME_JOB_DIR'" 1>&2 RESULT=1 fi if [ ! -z "$RESULT" ] && [ "$RESULT" != 0 ]; then exit $RESULT fi nodename=`/bin/hostname -f` echo "nodename=$nodename" >> "$RUNTIME_JOB_DIAG" echo "Processors=1" >> "$RUNTIME_JOB_DIAG" executable='./workbench.sh' # Check if executable exists if [ ! -f "$executable" ]; then echo "Path \"$executable\" does not seem to exist" 1>$RUNTIME_JOB_STDOUT 2>$RUNTIME_JOB_STDERR 1>&2 exit 1 fi # See if executable is a script, and extract the name of the interpreter line1=`dd if="$executable" count=1 2>/dev/null | head -n 1` command=`echo $line1 | sed -n 's/^#! *//p'` interpreter=`echo $command | awk '{print $1}'` if [ "$interpreter" = /usr/bin/env ]; then interpreter=`echo $command | awk '{print $2}'`; fi # If it's a script and the interpreter is not found ... [ "x$interpreter" = x ] || type "$interpreter" > /dev/null 2>&1 || { echo "Cannot run $executable: $interpreter: not found" 1>$RUNTIME_JOB_STDOUT 2>$RUNTIME_JOB_STDERR 1>&2 exit 1; } GNU_TIME='/usr/bin/time' if [ ! -z "$GNU_TIME" ] && ! "$GNU_TIME" --version >/dev/null 2>&1; then echo "WARNING: GNU time not found at: $GNU_TIME" 2>&1; GNU_TIME= fi if [ -z "$GNU_TIME" ] ; then "./workbench.sh" <$RUNTIME_JOB_STDIN 1>$RUNTIME_JOB_STDOUT 2>$RUNTIME_JOB_STDERR else $GNU_TIME -o "$RUNTIME_JOB_DIAG" -a -f 'WallTime=%es\nKernelTime=%Ss\nUserTime=%Us\nCPUUsage=%P\nMaxResidentMemory=%MkB\nAverageResidentMemory=%tkB\nAverageTotalMemory=%KkB\nAverageUnsharedMemory=%DkB\nAverageUnsharedStack=%pkB\nAverageSharedMemory=%XkB\nPageSize=%ZB\nMajorPageFaults=%F\nMinorPageFaults=%R\nSwaps=%W\nForcedSwitches=%c\nWaitSwitches=%w\nInputs=%I\nOutputs=%O\nSocketReceived=%r\nSocketSent=%s\nSignals=%k\n' "./workbench.sh" <$RUNTIME_JOB_STDIN 1>$RUNTIME_JOB_STDOUT 2>$RUNTIME_JOB_STDERR fi RESULT=$? fi fi if [ ! -z "$RUNTIME_CONFIG_DIR" ] ; then if [ -r "${RUNTIME_CONFIG_DIR}/APPS/BASE/THEANO-GPU-0.9" ] ; then cmdl=${RUNTIME_CONFIG_DIR}/APPS/BASE/THEANO-GPU-0.9 sourcewithargs $cmdl 2 fi fi if [ ! -z "$RUNTIME_LOCAL_SCRATCH_DIR" ] ; then find ./ -type l -exec rm -f "{}" ";" find ./ -type f -exec chmod u+w "{}" ";" chmod -R u-w "$RUNTIME_JOB_DIR"/'40_epoch.h5' 2>/dev/null chmod -R u-w "$RUNTIME_JOB_DIR"/'workbench.py' 2>/dev/null chmod -R u-w "$RUNTIME_JOB_DIR"/'workbench.sh' 2>/dev/null chmod -R u-w "$RUNTIME_JOB_DIR"/'40_epoch_history.pkl' 2>/dev/null chmod -R u-w "$RUNTIME_JOB_DIR"/'notes' 2>/dev/null chmod -R u-w "$RUNTIME_JOB_DIR"/'out.txt' 2>/dev/null chmod -R u-w "$RUNTIME_JOB_DIR"/'err.txt' 2>/dev/null chmod -R u-w "$RUNTIME_JOB_DIR"/'gmlog' 2>/dev/null find ./ -type f -perm /200 -exec rm -f "{}" ";" find ./ -type f -exec chmod u+w "{}" ";" fi if [ ! -z "$RUNTIME_LOCAL_SCRATCH_DIR" ] && [ ! -z "$RUNTIME_NODE_SEES_FRONTEND" ]; then if [ ! -z "$RUNTIME_FRONTEND_SEES_NODE" ] ; then # just move it rm -rf "$RUNTIME_FRONTEND_JOB_DIR" destdir=`dirname "$RUNTIME_FRONTEND_JOB_DIR"` if ! mv "$RUNTIME_NODE_JOB_DIR" "$destdir"; then echo "Failed to move '$RUNTIME_NODE_JOB_DIR' to '$destdir'" 1>&2 RESULT=1 fi else # remove links rm -f "$RUNTIME_JOB_STDOUT" 2>/dev/null rm -f "$RUNTIME_JOB_STDERR" 2>/dev/null # move directory contents for f in "$RUNTIME_NODE_JOB_DIR"/.* "$RUNTIME_NODE_JOB_DIR"/*; do [ "$f" = "$RUNTIME_NODE_JOB_DIR/*" ] && continue # glob failed, no files [ "$f" = "$RUNTIME_NODE_JOB_DIR/." ] && continue [ "$f" = "$RUNTIME_NODE_JOB_DIR/.." ] && continue [ "$f" = "$RUNTIME_NODE_JOB_DIR/.diag" ] && continue [ "$f" = "$RUNTIME_NODE_JOB_DIR/.comment" ] && continue if ! mv "$f" "$RUNTIME_FRONTEND_JOB_DIR"; then echo "Failed to move '$f' to '$RUNTIME_FRONTEND_JOB_DIR'" 1>&2 RESULT=1 fi done rm -rf "$RUNTIME_NODE_JOB_DIR" fi fi echo "exitcode=$RESULT" >> "$RUNTIME_JOB_DIAG" exit $RESULT ------------------------------------------------------------------- job submitted successfully! local job id: 734035 ----- exiting submit_slurm_job ----- 2017-09-06T08:22:24Z Job state change SUBMIT -> INLRMS Reason: Job is passed to LRMS ------- Contents of output stream forwarded by the LRMS --------- WARNING: GNU time not found at: /usr/bin/time slurmstepd: error: *** JOB 734035 ON nsc-fp006 CANCELLED AT 2017-09-06T22:52:34 DUE TO TIME LIMIT *** ------------------------- End of output ------------------------- 2017-09-06T20:55:12Z Job state change INLRMS -> FINISHING Reason: Job finished executing in LRMS 2017-09-06T20:56:12Z Job state change FINISHING -> FINISHED Reason: Stage-out finished. 2017-09-06T21:49:02Z Job state change UNDEFINED -> FINISHED Reason: (Re)Accepting new job 2017-09-06T23:49:20Z Job state change UNDEFINED -> FINISHED Reason: (Re)Accepting new job 2017-09-07T01:49:53Z Job state change UNDEFINED -> FINISHED Reason: (Re)Accepting new job 2017-09-07T03:50:36Z Job state change UNDEFINED -> FINISHED Reason: (Re)Accepting new job 2017-09-07T05:50:39Z Job state change UNDEFINED -> FINISHED Reason: (Re)Accepting new job 2017-09-07T07:50:54Z Job state change UNDEFINED -> FINISHED Reason: (Re)Accepting new job