VRIS_vip / LAVT-RIS /donghwa /scripts /submit_train_refcocog_mosaic.sh

Upload folder using huggingface_hub

8d82201 verified 28 days ago

3.5 kB

	#!/bin/bash

	#SBATCH --job-name=lavt_ccggr # Submit a job named "example"
	#SBATCH [email protected]
	#SBATCH --mail-type=BEGIN,END,FAIL
	#SBATCH --partition=a3000 # a6000 or a100
	#SBATCH --gres=gpu:2
	#SBATCH --time=7-00:00:00 # d-hh:mm:ss, max time limit
	#SBATCH --mem=84000 # cpu memory size
	#SBATCH --cpus-per-task=8 # cpu num
	#SBATCH --output=log_refcocog_google_random_460_0up_resume.txt # std output filename

	ml cuda/11.0 # 필요한 쿠다 버전 로드
	eval "$(conda shell.bash hook)" # Initialize Conda Environment
	conda activate lavt # Activate your conda environment



	# train
	# mkdir ./models

	# mkdir ./models/gref_umd/
	# srun python -m torch.distributed.launch --nproc_per_node 2 --master_port 12345 train_mosaic.py --model lavt --dataset refcocog --splitBy umd --model_id gref_umd --batch-size 14 --lr 0.00005 --wd 1e-2 --swin_type base --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth --epochs 40 --img_size 480 2>&1 \| tee ./models/gref_umd/output

	# mkdir ./models/mosaic_gref_umd_lavt_one/
	# srun python -m torch.distributed.launch --nproc_per_node 2 --master_port 12345 train_mosaic.py --model lavt_one --dataset refcocog --splitBy umd --model_id mosaic_gref_umd_lavt_one --batch-size 14 --lr 0.00005 --wd 1e-2 --swin_type base --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth --epochs 40 --img_size 480 2>&1 \| tee ./models/mosaic_gref_umd_lavt_one/output

	# mkdir ./models/gref_google
	# srun python -m torch.distributed.launch --nproc_per_node 2 --master_port 12345 train_mosaic.py --model lavt_one --dataset refcocog --splitBy google --model_id gref_google_lavt_one --batch-size 8 --lr 0.00005 --wd 1e-2 --swin_type base --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth --epochs 40 --img_size 480 2>&1 \| tee ./models/mosaic_gref_google_lavt_one/output

	# mkdir ./models/mosaic_gref_google_lavt_one
	# srun python -m torch.distributed.launch --nproc_per_node 2 --master_port 13347 train_mosaic.py --model lavt_one --dataset refcocog --splitBy google --model_id mosaic_gref_google_lavt_one --batch-size 14 --lr 0.00005 --wd 1e-2 --swin_type base --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth --epochs 50 --img_size 480 2>&1 \| tee ./models/mosaic_gref_google_lavt_one/output

	# tensorboard X
	# srun python -m torch.distributed.launch --nproc_per_node 1 --master_port 14567 train_mosaic.py --model lavt_one --dataset refcocog --splitBy umd --model_id lmdb_test --batch-size 5 --lr 0.00005 --wd 1e-2 --swin_type base --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth --epochs 50 --img_size 480 2>&1 \| tee ./models/lmdb_test/output

	# tensorboard O
	# mkdir ./experiments/refcocog_google/gref_google_random_460_0up
	srun python -m torch.distributed.launch --nproc_per_node 2 --master_port 35327 train_mosaic.py \
	--model lavt_one --dataset refcocog --splitBy google --model_id gref_google_random_460_0up \
	--batch-size 16 --lr 0.00005 --wd 1e-2 --swin_type base --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth \
	--epochs 40 --img_size 480 --config config/random_460.yaml \
	--resume experiments/refcocog_google/gref_google_random_460_0up/model_best_gref_google_random_460_0up.pth 2>&1 \| tee ./experiments/refcocog_google/gref_google_random_460_0up/log_resume.txt