# ํ๊ฒฝ ์ค์ | |
source ${HOME}/.bashrc | |
eval "$(conda shell.bash hook)" | |
conda activate cris | |
# CUDA ๋ฐ ๋ถ์ฐ ํ์ต ํ๊ฒฝ ์ค์ | |
export NCCL_P2P_DISABLE=1 | |
export NCCL_DEBUG=INFO | |
export NCCL_IB_DISABLE=1 | |
export NCCL_SOCKET_IFNAME=^docker0,lo | |
export CUDA_VISIBLE_DEVICES=0,1 | |
cd /home/s1/chaeyunkim/VerbCentric_CY | |
# ์ธ์ ํ์ธ | |
if [ "$#" -ne 3 ]; then | |
echo "Usage: $0 <output_dir> <batch_size> <exp_name>" | |
exit 1 | |
fi | |
# ๋ณ์ ์ค์ | |
OUTPUT_DIR=$1 | |
BATCH_SIZE=$2 | |
EXP_NAME=$3 | |
echo $OUTPUT_DIR | |
echo $BATCH_SIZE | |
echo $EXP_NAME | |
# ์ถ๋ ฅ ๋๋ ํ ๋ฆฌ ์์ฑ | |
if [[ ! -d "$OUTPUT_DIR" ]]; then | |
echo "Directory $OUTPUT_DIR does not exist. Creating it..." | |
mkdir -p "$OUTPUT_DIR" | |
fi | |
# ๋ชจ๋ธ ์ฒดํฌํฌ์ธํธ ํ์ธ | |
FINAL_MODEL="${OUTPUT_DIR}/last_model.pth" | |
if [[ ! -f "$FINAL_MODEL" ]]; then | |
resume_arg="" | |
else | |
resume_arg="--resume" | |
model_weights="${FINAL_MODEL}" | |
fi | |
# Python ์คํ ์ธ์ ๊ตฌ์ฑ | |
python_args="--config config/cris_verbonly_b64_nopos.yaml --opts TRAIN.metric_mode hardpos_only TRAIN.metric_loss_weight 0.1 TRAIN.hn_prob 0.0 TRAIN.batch_size ${BATCH_SIZE} TRAIN.margin_value 15 TRAIN.temperature 0.05 TRAIN.exp_name ${EXP_NAME} TRAIN.output_folder ${OUTPUT_DIR} Distributed.dist_url tcp://localhost:7023" | |
# Resume ์ธ์ ์ถ๊ฐ | |
if [[ -n "$resume_arg" ]]; then | |
python_args="$resume_arg ${model_weights} $python_args" | |
fi | |
# ํ์ต ์คํ ๋ฐ ๋ก๊ทธ ์ ์ฅ | |
echo "Starting training..." | |
python -u train_angular_verb.py $python_args 2>&1 | tee ${OUTPUT_DIR}/training.log |