VerbCentric-RIS / scripts /train_tmp_seunghoon.sh
dianecy's picture
Upload folder using huggingface_hub
599450c verified
#!/bin/bash
# ํ™˜๊ฒฝ ์„ค์ •
source ${HOME}/.bashrc
eval "$(conda shell.bash hook)"
conda activate cris
# CUDA ๋ฐ ๋ถ„์‚ฐ ํ•™์Šต ํ™˜๊ฒฝ ์„ค์ •
export NCCL_P2P_DISABLE=1
export NCCL_DEBUG=INFO
export NCCL_IB_DISABLE=1
export NCCL_SOCKET_IFNAME=^docker0,lo
export CUDA_VISIBLE_DEVICES=0,1
cd /home/s1/chaeyunkim/VerbCentric_CY
# ์ธ์ž ํ™•์ธ
if [ "$#" -ne 3 ]; then
echo "Usage: $0 <output_dir> <batch_size> <exp_name>"
exit 1
fi
# ๋ณ€์ˆ˜ ์„ค์ •
OUTPUT_DIR=$1
BATCH_SIZE=$2
EXP_NAME=$3
echo $OUTPUT_DIR
echo $BATCH_SIZE
echo $EXP_NAME
# ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
if [[ ! -d "$OUTPUT_DIR" ]]; then
echo "Directory $OUTPUT_DIR does not exist. Creating it..."
mkdir -p "$OUTPUT_DIR"
fi
# ๋ชจ๋ธ ์ฒดํฌํฌ์ธํŠธ ํ™•์ธ
FINAL_MODEL="${OUTPUT_DIR}/last_model.pth"
if [[ ! -f "$FINAL_MODEL" ]]; then
resume_arg=""
else
resume_arg="--resume"
model_weights="${FINAL_MODEL}"
fi
# Python ์‹คํ–‰ ์ธ์ž ๊ตฌ์„ฑ
python_args="--config config/cris_verbonly_b64_nopos.yaml --opts TRAIN.metric_mode hardpos_only TRAIN.metric_loss_weight 0.1 TRAIN.hn_prob 0.0 TRAIN.batch_size ${BATCH_SIZE} TRAIN.margin_value 15 TRAIN.temperature 0.05 TRAIN.exp_name ${EXP_NAME} TRAIN.output_folder ${OUTPUT_DIR} Distributed.dist_url tcp://localhost:7023"
# Resume ์ธ์ž ์ถ”๊ฐ€
if [[ -n "$resume_arg" ]]; then
python_args="$resume_arg ${model_weights} $python_args"
fi
# ํ•™์Šต ์‹คํ–‰ ๋ฐ ๋กœ๊ทธ ์ €์žฅ
echo "Starting training..."
python -u train_angular_verb.py $python_args 2>&1 | tee ${OUTPUT_DIR}/training.log