File size: 2,669 Bytes
24c2665
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/bin/bash

# AZR ๋ชจ๋ธ๋“ค๊ณผ Base ๋ชจ๋ธ ์„ฑ๋Šฅ ๋น„๊ต ์Šคํฌ๋ฆฝํŠธ
# ์‚ฌ์šฉ๋ฒ•: bash compare_azr_models.sh

# GPU ์„ค์ • - GPU 6๋ฒˆ๋งŒ ์‚ฌ์šฉ
export CUDA_VISIBLE_DEVICES=6
echo "๐ŸŽฏ GPU ์„ค์ •: GPU 6๋ฒˆ๋งŒ ์‚ฌ์šฉ (CUDA_VISIBLE_DEVICES=6)"

echo "=== AZR Models vs Base Model Comparison Script ==="
echo "์ด ์Šคํฌ๋ฆฝํŠธ๋Š” ๋‹ค์Œ 4๊ฐœ ๋ชจ๋ธ์„ ๋น„๊ตํ•ฉ๋‹ˆ๋‹ค:"
echo "1. Qwen/Qwen2.5-7B (Base Model)"
echo "2. andrewzh/Absolute_Zero_Reasoner-Coder-7b (AZR Coder)"
echo "3. andrewzh2/Absolute_Zero_Reasoner-Base-7b (AZR Base)"
echo "4. Qwen/Qwen2.5-7B-Coder (Coder Base Model)"
echo ""
echo "๐Ÿ“Š ๋ฒค์น˜๋งˆํฌ: $BENCHMARKS"
echo "๐ŸŽฏ ์„ค์ •: temperature=$TEMPERATURE, max_tokens=$MAX_TOKENS, seed=$SEED"
echo ""

cd /home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/math_eval

# ๊ณตํ†ต ์„ค์ •
BENCHMARKS="aime24,aime25,amc23,math500,olympiadbench,minerva_math"
SEED=42
TEMPERATURE=0
MAX_TOKENS=16000

echo "=== 1. Base Model (Qwen2.5-7B) ํ‰๊ฐ€ ==="
bash eval_math_nodes.sh \
    --run_name qwen25_7b_base \
    --init_model "Qwen/Qwen2.5-7B" \
    --template qwen25 \
    --tp_size 1 \
    --temperature $TEMPERATURE \
    --top_p 0.95 \
    --max_tokens $MAX_TOKENS \
    --benchmarks $BENCHMARKS \
    --n_sampling 1 \
    --just_wandb false \
    --seed $SEED

echo ""
echo "=== 2. AZR Coder 7B ํ‰๊ฐ€ ==="
bash eval_math_nodes.sh \
    --run_name azr_coder_7b_hf \
    --init_model "andrewzh/Absolute_Zero_Reasoner-Coder-7b" \
    --template azr \
    --tp_size 1 \
    --temperature $TEMPERATURE \
    --top_p 0.95 \
    --max_tokens $MAX_TOKENS \
    --benchmarks $BENCHMARKS \
    --n_sampling 1 \
    --just_wandb false \
    --seed $SEED

echo ""
echo "=== 3. AZR Base 7B ํ‰๊ฐ€ ==="
bash eval_math_nodes.sh \
    --run_name azr_base_7b_hf \
    --init_model "andrewzh2/Absolute_Zero_Reasoner-Base-7b" \
    --template azr \
    --tp_size 1 \
    --temperature $TEMPERATURE \
    --top_p 0.95 \
    --max_tokens $MAX_TOKENS \
    --benchmarks $BENCHMARKS \
    --n_sampling 1 \
    --just_wandb false \
    --seed $SEED

echo ""
echo "=== 4. Qwen2.5-7B-Coder ํ‰๊ฐ€ ==="
bash eval_math_nodes.sh \
    --run_name qwen25_7b_coder \
    --init_model "Qwen/Qwen2.5-7B-Coder" \
    --template qwen25 \
    --tp_size 1 \
    --temperature $TEMPERATURE \
    --top_p 0.95 \
    --max_tokens $MAX_TOKENS \
    --benchmarks $BENCHMARKS \
    --n_sampling 1 \
    --just_wandb false \
    --seed $SEED

echo ""
echo "=== ํ‰๊ฐ€ ์™„๋ฃŒ ==="
echo "๊ฒฐ๊ณผ ํ™•์ธ ๋ฐฉ๋ฒ•:"
echo "1. wandb ๋Œ€์‹œ๋ณด๋“œ์—์„œ ๊ฐ ์‹คํ–‰ ๊ฒฐ๊ณผ ํ™•์ธ"
echo "2. ๋กœ์ปฌ ๊ฒฐ๊ณผ ํŒŒ์ผ: evaluation/math_eval/eval/eval_results/"
echo "3. ๋น„๊ต ๋ถ„์„์„ ์œ„ํ•ด compare_results.py ์‹คํ–‰"