File size: 2,355 Bytes
8d82201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/bin/bash

#SBATCH --job-name=lavt_ccg      # Submit a job named "example"
#SBATCH [email protected]
#SBATCH --mail-type=BEGIN,END,FAIL
#SBATCH --partition=a100         # a6000 or a100
#SBATCH --gres=gpu:2
#SBATCH --time=7-00:00:00        # d-hh:mm:ss, max time limit
#SBATCH --mem=84000              # cpu memory size
#SBATCH --cpus-per-task=8        # cpu num
#SBATCH --output=log_refcocog_google_lavt_one.txt         # std output filename

ml cuda/11.0                # ํ•„์š”ํ•œ ์ฟ ๋‹ค ๋ฒ„์ „ ๋กœ๋“œ
eval "$(conda shell.bash hook)"  # Initialize Conda Environment
conda activate lavt             # Activate your conda environment


# train
# mkdir ./models

# mkdir ./models/gref_umd/
# CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node 2 --master_port 12345 train.py --model lavt --dataset refcocog --splitBy umd --model_id gref_umd --batch-size 14 --lr 0.00005 --wd 1e-2 --swin_type base --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth --epochs 40 --img_size 480 2>&1 | tee ./models/gref_umd/output

# mkdir ./models/gref_umd_lavt_one/
# srun python -m torch.distributed.launch --nproc_per_node 2 --master_port 12345 train.py --model lavt_one --dataset refcocog --splitBy umd --model_id gref_umd_lavt_one --batch-size 14 --lr 0.00005 --wd 1e-2 --swin_type base --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth --epochs 40 --img_size 480 2>&1 | tee ./models/gref_umd_lavt_one/output

# mkdir ./models/gref_google
# CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node 4 --master_port 12345 train.py --model lavt --dataset refcocog --splitBy google --model_id gref_google --batch-size 8 --lr 0.00005 --wd 1e-2 --swin_type base --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth --epochs 40 --img_size 480 2>&1 | tee ./models/gref_google/output

mkdir ./models/gref_google_lavt_one
srun python -m torch.distributed.launch --nproc_per_node 2 --master_port 12347 train.py --model lavt_one --dataset refcocog --splitBy google --model_id gref_google_lavt_one --batch-size 14 --lr 0.00005 --wd 1e-2 --swin_type base --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth --epochs 50 --img_size 480 2>&1 | tee ./models/gref_google_lavt_one/output