File size: 2,669 Bytes
24c2665 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
#!/bin/bash
# AZR ๋ชจ๋ธ๋ค๊ณผ Base ๋ชจ๋ธ ์ฑ๋ฅ ๋น๊ต ์คํฌ๋ฆฝํธ
# ์ฌ์ฉ๋ฒ: bash compare_azr_models.sh
# GPU ์ค์ - GPU 6๋ฒ๋ง ์ฌ์ฉ
export CUDA_VISIBLE_DEVICES=6
echo "๐ฏ GPU ์ค์ : GPU 6๋ฒ๋ง ์ฌ์ฉ (CUDA_VISIBLE_DEVICES=6)"
echo "=== AZR Models vs Base Model Comparison Script ==="
echo "์ด ์คํฌ๋ฆฝํธ๋ ๋ค์ 4๊ฐ ๋ชจ๋ธ์ ๋น๊ตํฉ๋๋ค:"
echo "1. Qwen/Qwen2.5-7B (Base Model)"
echo "2. andrewzh/Absolute_Zero_Reasoner-Coder-7b (AZR Coder)"
echo "3. andrewzh2/Absolute_Zero_Reasoner-Base-7b (AZR Base)"
echo "4. Qwen/Qwen2.5-7B-Coder (Coder Base Model)"
echo ""
echo "๐ ๋ฒค์น๋งํฌ: $BENCHMARKS"
echo "๐ฏ ์ค์ : temperature=$TEMPERATURE, max_tokens=$MAX_TOKENS, seed=$SEED"
echo ""
cd /home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/math_eval
# ๊ณตํต ์ค์
BENCHMARKS="aime24,aime25,amc23,math500,olympiadbench,minerva_math"
SEED=42
TEMPERATURE=0
MAX_TOKENS=16000
echo "=== 1. Base Model (Qwen2.5-7B) ํ๊ฐ ==="
bash eval_math_nodes.sh \
--run_name qwen25_7b_base \
--init_model "Qwen/Qwen2.5-7B" \
--template qwen25 \
--tp_size 1 \
--temperature $TEMPERATURE \
--top_p 0.95 \
--max_tokens $MAX_TOKENS \
--benchmarks $BENCHMARKS \
--n_sampling 1 \
--just_wandb false \
--seed $SEED
echo ""
echo "=== 2. AZR Coder 7B ํ๊ฐ ==="
bash eval_math_nodes.sh \
--run_name azr_coder_7b_hf \
--init_model "andrewzh/Absolute_Zero_Reasoner-Coder-7b" \
--template azr \
--tp_size 1 \
--temperature $TEMPERATURE \
--top_p 0.95 \
--max_tokens $MAX_TOKENS \
--benchmarks $BENCHMARKS \
--n_sampling 1 \
--just_wandb false \
--seed $SEED
echo ""
echo "=== 3. AZR Base 7B ํ๊ฐ ==="
bash eval_math_nodes.sh \
--run_name azr_base_7b_hf \
--init_model "andrewzh2/Absolute_Zero_Reasoner-Base-7b" \
--template azr \
--tp_size 1 \
--temperature $TEMPERATURE \
--top_p 0.95 \
--max_tokens $MAX_TOKENS \
--benchmarks $BENCHMARKS \
--n_sampling 1 \
--just_wandb false \
--seed $SEED
echo ""
echo "=== 4. Qwen2.5-7B-Coder ํ๊ฐ ==="
bash eval_math_nodes.sh \
--run_name qwen25_7b_coder \
--init_model "Qwen/Qwen2.5-7B-Coder" \
--template qwen25 \
--tp_size 1 \
--temperature $TEMPERATURE \
--top_p 0.95 \
--max_tokens $MAX_TOKENS \
--benchmarks $BENCHMARKS \
--n_sampling 1 \
--just_wandb false \
--seed $SEED
echo ""
echo "=== ํ๊ฐ ์๋ฃ ==="
echo "๊ฒฐ๊ณผ ํ์ธ ๋ฐฉ๋ฒ:"
echo "1. wandb ๋์๋ณด๋์์ ๊ฐ ์คํ ๊ฒฐ๊ณผ ํ์ธ"
echo "2. ๋ก์ปฌ ๊ฒฐ๊ณผ ํ์ผ: evaluation/math_eval/eval/eval_results/"
echo "3. ๋น๊ต ๋ถ์์ ์ํด compare_results.py ์คํ" |