Spaces:
Runtime error
Runtime error
Mehdi Cherti
commited on
Commit
·
a8858ff
1
Parent(s):
8ab4de9
remove scripts
Browse files- eval_all.sh +0 -21
- scripts/fid.sh +0 -0
- scripts/init.sh +0 -14
- scripts/init_2020.sh +0 -69
- scripts/init_2022.sh +0 -34
- scripts/run_hdfml.sh +0 -25
- scripts/run_jurecadc_conda.sh +0 -23
- scripts/run_jurecadc_ddp.sh +0 -20
- scripts/run_jusuf_ddp.sh +0 -14
- scripts/run_juwelsbooster_conda.sh +0 -19
- scripts/run_juwelsbooster_ddp.sh +0 -17
eval_all.sh
DELETED
|
@@ -1,21 +0,0 @@
|
|
| 1 |
-
#!/bin/bash
|
| 2 |
-
#for model in ddgan_sd_v10 ddgan_laion2b_v2 ddgan_ddb_v1 ddgan_ddb_v2 ddgan_ddb_v3 ddgan_ddb_v4;do
|
| 3 |
-
#for model in ddgan_ddb_v2 ddgan_ddb_v3 ddgan_ddb_v4 ddgan_ddb_v5;do
|
| 4 |
-
#for model in ddgan_ddb_v4 ddgan_ddb_v6 ddgan_ddb_v7 ddgan_laion_aesthetic_v15;do
|
| 5 |
-
#for model in ddgan_ddb_v6;do
|
| 6 |
-
for model in ddgan_laion_aesthetic_v15;do
|
| 7 |
-
if [ "$model" == "ddgan_ddb_v3" ]; then
|
| 8 |
-
bs=32
|
| 9 |
-
elif [ "$model" == "ddgan_laion_aesthetic_v15" ]; then
|
| 10 |
-
bs=32
|
| 11 |
-
elif [ "$model" == "ddgan_ddb_v6" ]; then
|
| 12 |
-
bs=32
|
| 13 |
-
elif [ "$model" == "ddgan_ddb_v4" ]; then
|
| 14 |
-
bs=16
|
| 15 |
-
else
|
| 16 |
-
bs=64
|
| 17 |
-
fi
|
| 18 |
-
sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh run.py test $model --cond-text=parti_prompts.txt --batch-size=$bs --epoch=-1 --compute-clip-score --eval-name=parti;
|
| 19 |
-
sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh run.py test $model --fid --real-img-dir inception_statistics_coco_val2014_256x256.npz --cond-text coco_val2014_captions.txt --batch-size=$bs --epoch=-1 --nb-images-for-fid=30000 --eval-name=coco --compute-clip-score;
|
| 20 |
-
sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh run.py test $model --cond-text=drawbench.txt --batch-size=$bs --epoch=-1 --compute-clip-score --eval-name=drawbench;
|
| 21 |
-
done
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/fid.sh
DELETED
|
File without changes
|
scripts/init.sh
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
ml purge
|
| 2 |
-
ml use $OTHERSTAGES
|
| 3 |
-
ml Stages/2022
|
| 4 |
-
ml GCC/11.2.0
|
| 5 |
-
ml OpenMPI/4.1.2
|
| 6 |
-
ml CUDA/11.5
|
| 7 |
-
ml cuDNN/8.3.1.22-CUDA-11.5
|
| 8 |
-
ml NCCL/2.12.7-1-CUDA-11.5
|
| 9 |
-
ml PyTorch/1.11-CUDA-11.5
|
| 10 |
-
ml Horovod/0.24
|
| 11 |
-
ml torchvision/0.12.0
|
| 12 |
-
source /p/home/jusers/cherti1/jureca/ccstdl/code/feed_forward_vqgan_clip/envs/jureca_2022/bin/activate
|
| 13 |
-
export HOROVOD_CACHE_CAPACITY=4096
|
| 14 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/init_2020.sh
DELETED
|
@@ -1,69 +0,0 @@
|
|
| 1 |
-
machine=$(cat /etc/FZJ/systemname)
|
| 2 |
-
if [[ "$machine" == jurecadc ]]; then
|
| 3 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
| 4 |
-
#ml use $OTHERSTAGES
|
| 5 |
-
#ml Stages/2020
|
| 6 |
-
#ml GCC/9.3.0
|
| 7 |
-
#ml OpenMPI/4.1.0rc1
|
| 8 |
-
#ml CUDA/11.0
|
| 9 |
-
#ml cuDNN/8.0.2.39-CUDA-11.0
|
| 10 |
-
#ml NCCL/2.8.3-1-CUDA-11.0
|
| 11 |
-
#ml PyTorch
|
| 12 |
-
#ml Horovod/0.20.3-Python-3.8.5
|
| 13 |
-
#ml scikit
|
| 14 |
-
#source /p/project/covidnetx/environments/jureca/bin/activate
|
| 15 |
-
ml purge
|
| 16 |
-
ml use $OTHERSTAGES
|
| 17 |
-
ml Stages/2020
|
| 18 |
-
ml GCC/10.3.0
|
| 19 |
-
ml OpenMPI/4.1.1
|
| 20 |
-
ml Horovod/0.23.0-Python-3.8.5
|
| 21 |
-
ml scikit
|
| 22 |
-
source /p/project/covidnetx/environments/jureca/bin/activate
|
| 23 |
-
fi
|
| 24 |
-
if [[ "$machine" == juwelsbooster ]]; then
|
| 25 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
| 26 |
-
#ml use $OTHERSTAGES
|
| 27 |
-
#ml Stages/2020
|
| 28 |
-
#ml GCC/9.3.0
|
| 29 |
-
#ml OpenMPI/4.1.0rc1
|
| 30 |
-
#ml CUDA/11.0
|
| 31 |
-
#ml cuDNN/8.0.2.39-CUDA-11.0
|
| 32 |
-
#ml NCCL/2.8.3-1-CUDA-11.0
|
| 33 |
-
#ml PyTorch
|
| 34 |
-
#ml Horovod/0.20.3-Python-3.8.5
|
| 35 |
-
#ml scikit
|
| 36 |
-
|
| 37 |
-
#ml Stages/2021
|
| 38 |
-
#ml GCC
|
| 39 |
-
#ml OpenMPI
|
| 40 |
-
#ml CUDA
|
| 41 |
-
#ml cuDNN
|
| 42 |
-
#ml NCCL
|
| 43 |
-
#ml PyTorch
|
| 44 |
-
#ml Horovod
|
| 45 |
-
#ml scikit
|
| 46 |
-
|
| 47 |
-
ml purge
|
| 48 |
-
ml use $OTHERSTAGES
|
| 49 |
-
ml Stages/2020
|
| 50 |
-
ml GCC/10.3.0
|
| 51 |
-
ml OpenMPI/4.1.1
|
| 52 |
-
ml Horovod/0.23.0-Python-3.8.5
|
| 53 |
-
ml scikit
|
| 54 |
-
source /p/project/covidnetx/environments/juwels_booster/bin/activate
|
| 55 |
-
fi
|
| 56 |
-
if [[ "$machine" == jusuf ]]; then
|
| 57 |
-
ml purge
|
| 58 |
-
ml use $OTHERSTAGES
|
| 59 |
-
ml Stages/2020
|
| 60 |
-
ml GCC/9.3.0
|
| 61 |
-
ml OpenMPI/4.1.0rc1
|
| 62 |
-
ml CUDA/11.0
|
| 63 |
-
ml cuDNN/8.0.2.39-CUDA-11.0
|
| 64 |
-
ml NCCL/2.8.3-1-CUDA-11.0
|
| 65 |
-
ml PyTorch
|
| 66 |
-
ml Horovod/0.20.3-Python-3.8.5
|
| 67 |
-
#ml scikit
|
| 68 |
-
source /p/project/covidnetx/environments/jusuf/bin/activate
|
| 69 |
-
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/init_2022.sh
DELETED
|
@@ -1,34 +0,0 @@
|
|
| 1 |
-
machine=$(cat /etc/FZJ/systemname)
|
| 2 |
-
if [[ "$machine" == jurecadc ]]; then
|
| 3 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
| 4 |
-
ml purge
|
| 5 |
-
ml use $OTHERSTAGES
|
| 6 |
-
ml Stages/2022
|
| 7 |
-
ml GCC/11.2.0
|
| 8 |
-
ml OpenMPI/4.1.2
|
| 9 |
-
ml CUDA/11.5
|
| 10 |
-
ml cuDNN/8.3.1.22-CUDA-11.5
|
| 11 |
-
ml NCCL/2.12.7-1-CUDA-11.5
|
| 12 |
-
ml PyTorch/1.11-CUDA-11.5
|
| 13 |
-
ml Horovod/0.24
|
| 14 |
-
ml torchvision/0.12.0
|
| 15 |
-
source /p/project/covidnetx/environments/jureca_2022/bin/activate
|
| 16 |
-
fi
|
| 17 |
-
if [[ "$machine" == juwelsbooster ]]; then
|
| 18 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
| 19 |
-
ml purge
|
| 20 |
-
ml use $OTHERSTAGES
|
| 21 |
-
ml Stages/2022
|
| 22 |
-
ml GCC/11.2.0
|
| 23 |
-
ml OpenMPI/4.1.2
|
| 24 |
-
ml CUDA/11.5
|
| 25 |
-
ml cuDNN/8.3.1.22-CUDA-11.5
|
| 26 |
-
ml NCCL/2.12.7-1-CUDA-11.5
|
| 27 |
-
ml PyTorch/1.11-CUDA-11.5
|
| 28 |
-
ml Horovod/0.24
|
| 29 |
-
ml torchvision/0.12.0
|
| 30 |
-
source /p/project/covidnetx/environments/juwels_booster_2022/bin/activate
|
| 31 |
-
fi
|
| 32 |
-
if [[ "$machine" == jusuf ]]; then
|
| 33 |
-
echo not supported
|
| 34 |
-
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_hdfml.sh
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
#!/bin/bash -x
|
| 2 |
-
#SBATCH --account=cstdl
|
| 3 |
-
#SBATCH --nodes=8
|
| 4 |
-
#SBATCH --ntasks-per-node=4
|
| 5 |
-
#SBATCH --cpus-per-task=8
|
| 6 |
-
#SBATCH --time=06:00:00
|
| 7 |
-
#SBATCH --gres=gpu
|
| 8 |
-
#SBATCH --partition=batch
|
| 9 |
-
ml purge
|
| 10 |
-
ml use $OTHERSTAGES
|
| 11 |
-
ml Stages/2022
|
| 12 |
-
ml GCC/11.2.0
|
| 13 |
-
ml OpenMPI/4.1.2
|
| 14 |
-
ml CUDA/11.5
|
| 15 |
-
ml cuDNN/8.3.1.22-CUDA-11.5
|
| 16 |
-
ml NCCL/2.12.7-1-CUDA-11.5
|
| 17 |
-
ml PyTorch/1.11-CUDA-11.5
|
| 18 |
-
ml Horovod/0.24
|
| 19 |
-
ml torchvision/0.12.0
|
| 20 |
-
source envs/hdfml/bin/activate
|
| 21 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
| 22 |
-
echo "Job id: $SLURM_JOB_ID"
|
| 23 |
-
export TOKENIZERS_PARALLELISM=false
|
| 24 |
-
export NCCL_ASYNC_ERROR_HANDLING=1
|
| 25 |
-
srun python -u $*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_jurecadc_conda.sh
DELETED
|
@@ -1,23 +0,0 @@
|
|
| 1 |
-
#!/bin/bash -x
|
| 2 |
-
#SBATCH --account=zam
|
| 3 |
-
#SBATCH --nodes=1
|
| 4 |
-
#SBATCH --ntasks-per-node=4
|
| 5 |
-
#SBATCH --cpus-per-task=24
|
| 6 |
-
#SBATCH --time=06:00:00
|
| 7 |
-
#SBATCH --gres=gpu:4
|
| 8 |
-
#SBATCH --partition=dc-gpu
|
| 9 |
-
ml CUDA
|
| 10 |
-
source /p/project/laionize/miniconda/bin/activate
|
| 11 |
-
conda activate ddgan
|
| 12 |
-
#source scripts/init_2022.sh
|
| 13 |
-
#source scripts/init_2020.sh
|
| 14 |
-
#source scripts/init.sh
|
| 15 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
| 16 |
-
echo "Job id: $SLURM_JOB_ID"
|
| 17 |
-
export TOKENIZERS_PARALLELISM=false
|
| 18 |
-
#export NCCL_ASYNC_ERROR_HANDLING=1
|
| 19 |
-
export NCCL_IB_TIMEOUT=50
|
| 20 |
-
export UCX_RC_TIMEOUT=4s
|
| 21 |
-
export NCCL_IB_RETRY_CNT=10
|
| 22 |
-
export TORCH_DISTRIBUTED_DEBUG=INFO
|
| 23 |
-
srun python -u $*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_jurecadc_ddp.sh
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
#!/bin/bash -x
|
| 2 |
-
#SBATCH --account=zam
|
| 3 |
-
#SBATCH --nodes=1
|
| 4 |
-
#SBATCH --ntasks-per-node=4
|
| 5 |
-
#SBATCH --cpus-per-task=24
|
| 6 |
-
#SBATCH --time=06:00:00
|
| 7 |
-
#SBATCH --gres=gpu:4
|
| 8 |
-
#SBATCH --partition=dc-gpu
|
| 9 |
-
source set_torch_distributed_vars.sh
|
| 10 |
-
#source scripts/init_2022.sh
|
| 11 |
-
#source scripts/init_2020.sh
|
| 12 |
-
source scripts/init.sh
|
| 13 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
| 14 |
-
echo "Job id: $SLURM_JOB_ID"
|
| 15 |
-
export TOKENIZERS_PARALLELISM=false
|
| 16 |
-
#export NCCL_ASYNC_ERROR_HANDLING=1
|
| 17 |
-
export NCCL_IB_TIMEOUT=50
|
| 18 |
-
export UCX_RC_TIMEOUT=4s
|
| 19 |
-
export NCCL_IB_RETRY_CNT=10
|
| 20 |
-
srun python -u $*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_jusuf_ddp.sh
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
#!/bin/bash -x
|
| 2 |
-
#SBATCH --account=zam
|
| 3 |
-
#SBATCH --nodes=1
|
| 4 |
-
#SBATCH --ntasks-per-node=1
|
| 5 |
-
#SBATCH --cpus-per-task=24
|
| 6 |
-
#SBATCH --time=06:00:00
|
| 7 |
-
#SBATCH --gres=gpu:1
|
| 8 |
-
#SBATCH --partition=gpus
|
| 9 |
-
source set_torch_distributed_vars.sh
|
| 10 |
-
source scripts/init.sh
|
| 11 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
| 12 |
-
echo "Job id: $SLURM_JOB_ID"
|
| 13 |
-
export TOKENIZERS_PARALLELISM=false
|
| 14 |
-
srun python -u $*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_juwelsbooster_conda.sh
DELETED
|
@@ -1,19 +0,0 @@
|
|
| 1 |
-
#!/bin/bash -x
|
| 2 |
-
#SBATCH --account=laionize
|
| 3 |
-
#SBATCH --nodes=1
|
| 4 |
-
#SBATCH --ntasks-per-node=4
|
| 5 |
-
#SBATCH --cpus-per-task=24
|
| 6 |
-
#SBATCH --time=06:00:00
|
| 7 |
-
#SBATCH --gres=gpu:4
|
| 8 |
-
#SBATCH --partition=booster
|
| 9 |
-
ml CUDA
|
| 10 |
-
source /p/project/laionize/miniconda/bin/activate
|
| 11 |
-
conda activate ddgan
|
| 12 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
| 13 |
-
echo "Job id: $SLURM_JOB_ID"
|
| 14 |
-
export TOKENIZERS_PARALLELISM=false
|
| 15 |
-
#export NCCL_ASYNC_ERROR_HANDLING=1
|
| 16 |
-
export NCCL_IB_TIMEOUT=50
|
| 17 |
-
export UCX_RC_TIMEOUT=4s
|
| 18 |
-
export NCCL_IB_RETRY_CNT=10
|
| 19 |
-
srun python -u $*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_juwelsbooster_ddp.sh
DELETED
|
@@ -1,17 +0,0 @@
|
|
| 1 |
-
#!/bin/bash -x
|
| 2 |
-
#SBATCH --account=covidnetx
|
| 3 |
-
#SBATCH --nodes=4
|
| 4 |
-
#SBATCH --ntasks-per-node=4
|
| 5 |
-
#SBATCH --cpus-per-task=24
|
| 6 |
-
#SBATCH --time=06:00:00
|
| 7 |
-
#SBATCH --gres=gpu:4
|
| 8 |
-
#SBATCH --partition=booster
|
| 9 |
-
source set_torch_distributed_vars.sh
|
| 10 |
-
#source scripts/init_2022.sh
|
| 11 |
-
#source scripts/init_2020.sh
|
| 12 |
-
source scripts/init.sh
|
| 13 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
| 14 |
-
echo "Job id: $SLURM_JOB_ID"
|
| 15 |
-
export TOKENIZERS_PARALLELISM=false
|
| 16 |
-
export NCCL_ASYNC_ERROR_HANDLING=1
|
| 17 |
-
srun python -u $*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|