|
#!/bin/bash |
|
|
|
|
|
|
|
|
|
|
|
export CUDA_VISIBLE_DEVICES=6 |
|
echo "๐ฏ GPU ์ค์ : GPU 6๋ฒ๋ง ์ฌ์ฉ (CUDA_VISIBLE_DEVICES=6)" |
|
|
|
echo "=== AZR Models vs Base Model Comparison Script ===" |
|
echo "์ด ์คํฌ๋ฆฝํธ๋ ๋ค์ 4๊ฐ ๋ชจ๋ธ์ ๋น๊ตํฉ๋๋ค:" |
|
echo "1. Qwen/Qwen2.5-7B (Base Model)" |
|
echo "2. andrewzh/Absolute_Zero_Reasoner-Coder-7b (AZR Coder)" |
|
echo "3. andrewzh2/Absolute_Zero_Reasoner-Base-7b (AZR Base)" |
|
echo "4. Qwen/Qwen2.5-7B-Coder (Coder Base Model)" |
|
echo "" |
|
echo "๐ ๋ฒค์น๋งํฌ: $BENCHMARKS" |
|
echo "๐ฏ ์ค์ : temperature=$TEMPERATURE, max_tokens=$MAX_TOKENS, seed=$SEED" |
|
echo "" |
|
|
|
cd /home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/math_eval |
|
|
|
|
|
BENCHMARKS="aime24,aime25,amc23,math500,olympiadbench,minerva_math" |
|
SEED=42 |
|
TEMPERATURE=0 |
|
MAX_TOKENS=16000 |
|
|
|
echo "=== 1. Base Model (Qwen2.5-7B) ํ๊ฐ ===" |
|
bash eval_math_nodes.sh \ |
|
--run_name qwen25_7b_base \ |
|
--init_model "Qwen/Qwen2.5-7B" \ |
|
--template qwen25 \ |
|
--tp_size 1 \ |
|
--temperature $TEMPERATURE \ |
|
--top_p 0.95 \ |
|
--max_tokens $MAX_TOKENS \ |
|
--benchmarks $BENCHMARKS \ |
|
--n_sampling 1 \ |
|
--just_wandb false \ |
|
--seed $SEED |
|
|
|
echo "" |
|
echo "=== 2. AZR Coder 7B ํ๊ฐ ===" |
|
bash eval_math_nodes.sh \ |
|
--run_name azr_coder_7b_hf \ |
|
--init_model "andrewzh/Absolute_Zero_Reasoner-Coder-7b" \ |
|
--template azr \ |
|
--tp_size 1 \ |
|
--temperature $TEMPERATURE \ |
|
--top_p 0.95 \ |
|
--max_tokens $MAX_TOKENS \ |
|
--benchmarks $BENCHMARKS \ |
|
--n_sampling 1 \ |
|
--just_wandb false \ |
|
--seed $SEED |
|
|
|
echo "" |
|
echo "=== 3. AZR Base 7B ํ๊ฐ ===" |
|
bash eval_math_nodes.sh \ |
|
--run_name azr_base_7b_hf \ |
|
--init_model "andrewzh2/Absolute_Zero_Reasoner-Base-7b" \ |
|
--template azr \ |
|
--tp_size 1 \ |
|
--temperature $TEMPERATURE \ |
|
--top_p 0.95 \ |
|
--max_tokens $MAX_TOKENS \ |
|
--benchmarks $BENCHMARKS \ |
|
--n_sampling 1 \ |
|
--just_wandb false \ |
|
--seed $SEED |
|
|
|
echo "" |
|
echo "=== 4. Qwen2.5-7B-Coder ํ๊ฐ ===" |
|
bash eval_math_nodes.sh \ |
|
--run_name qwen25_7b_coder \ |
|
--init_model "Qwen/Qwen2.5-7B-Coder" \ |
|
--template qwen25 \ |
|
--tp_size 1 \ |
|
--temperature $TEMPERATURE \ |
|
--top_p 0.95 \ |
|
--max_tokens $MAX_TOKENS \ |
|
--benchmarks $BENCHMARKS \ |
|
--n_sampling 1 \ |
|
--just_wandb false \ |
|
--seed $SEED |
|
|
|
echo "" |
|
echo "=== ํ๊ฐ ์๋ฃ ===" |
|
echo "๊ฒฐ๊ณผ ํ์ธ ๋ฐฉ๋ฒ:" |
|
echo "1. wandb ๋์๋ณด๋์์ ๊ฐ ์คํ ๊ฒฐ๊ณผ ํ์ธ" |
|
echo "2. ๋ก์ปฌ ๊ฒฐ๊ณผ ํ์ผ: evaluation/math_eval/eval/eval_results/" |
|
echo "3. ๋น๊ต ๋ถ์์ ์ํด compare_results.py ์คํ" |