#!/bin/bash #SBATCH --job-name=dbs6-ace3 #SBATCH --partition=a6000 #SBATCH --gres=gpu:1 #SBATCH --time=12-00:00:00 # d-hh:mm:ss, job time limit #SBATCH --mem=28000 # cpu memory size #SBATCH --cpus-per-task=4 #SBATCH --output=./trainlog/dmmi_ACE_gref_m10_tmp005_bs6.log ml purge ml load cuda/11.8 eval "$(conda shell.bash hook)" conda activate risall cd /data2/projects/chaeyun/RIS-DMMI export NCCL_P2P_DISABLE=1 export NVIDIA_TF32_OVERRIDE=0 GPUS=1 OUTPUT_DIR=$1 EXP_NAME=$2 MARGIN=$3 TEMP=$4 MODE=$5 MASTER_PORT=5728 # TRAIN # hardpos_only, hardpos_only_rev python_args="--model dmmi_swin_hardpos_only \ --dataset refcocog \ --splitBy umd \ --output_dir ${OUTPUT_DIR} \ --model_id ${EXP_NAME} \ --batch-size 6 \ --lr 0.00005 \ --wd 1e-2 \ --window12 \ --swin_type base \ --pretrained_backbone /data2/projects/chaeyun/LAVT-RIS/pretrained_weights/swin_base_patch4_window12_384_22k.pth \ --epochs 40 \ --img_size 480 \ --metric_learning \ --margin_value ${MARGIN} \ --temperature ${TEMP} \ --metric_mode ${MODE} \ --exclude_multiobj " CUDA_VISIBLE_DEVICES=0 torchrun --nproc_per_node=$GPUS --master_port=$MASTER_PORT train_rev.py $python_args # python -m torch.distributed.launch --nproc_per_node=$GPUS train_rev.py $python_args # CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=$GPUS train_rev.py $python_args # sbatch train_ace_bs4.sh ./experiments/dmmi_grefu_ace/gref_m10_tmp007_bs6 gref_m10_tmp007_bs6 10 0.07 hardpos_only # sbatch train_ace_bs4.sh ./experiments/dmmi_grefu_ace/gref_m12_tmp007_bs6 gref_m12_tmp007_bs6 12 0.07 hardpos_only # sbatch train_ace_bs4.sh ./experiments/dmmi_grefu_ace/gref_m10_tmp005_bs6 gref_m10_tmp005_bs6 10 0.05 hardpos_only