Spaces:
Runtime error
Runtime error
| #SBATCH --account=cstdl | |
| #SBATCH --nodes=8 | |
| #SBATCH --ntasks-per-node=4 | |
| #SBATCH --cpus-per-task=8 | |
| #SBATCH --time=06:00:00 | |
| #SBATCH --gres=gpu | |
| #SBATCH --partition=batch | |
| ml purge | |
| ml use $OTHERSTAGES | |
| ml Stages/2022 | |
| ml GCC/11.2.0 | |
| ml OpenMPI/4.1.2 | |
| ml CUDA/11.5 | |
| ml cuDNN/8.3.1.22-CUDA-11.5 | |
| ml NCCL/2.12.7-1-CUDA-11.5 | |
| ml PyTorch/1.11-CUDA-11.5 | |
| ml Horovod/0.24 | |
| ml torchvision/0.12.0 | |
| source envs/hdfml/bin/activate | |
| export CUDA_VISIBLE_DEVICES=0,1,2,3 | |
| echo "Job id: $SLURM_JOB_ID" | |
| export TOKENIZERS_PARALLELISM=false | |
| export NCCL_ASYNC_ERROR_HANDLING=1 | |
| srun python -u $* | |