|
#!/bin/bash |
|
|
|
cd /home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/math_eval |
|
|
|
|
|
echo "=== Evaluating Base Model: Qwen2.5-7B ===" |
|
bash eval_math_nodes.sh \ |
|
--run_name qwen25_7b_base \ |
|
--init_model "Qwen/Qwen2.5-7B" \ |
|
--template qwen25 \ |
|
--tp_size 1 \ |
|
--temperature 0 \ |
|
--top_p 0.95 \ |
|
--max_tokens 16000 \ |
|
--benchmarks aime24,aime25,amc23,math500,olympiadbench,minerva_math \ |
|
--n_sampling 1 \ |
|
--just_wandb false \ |
|
--seed 42 |
|
|
|
|
|
echo "=== Evaluating AZR Coder 7B ===" |
|
bash eval_math_nodes.sh \ |
|
--run_name azr_coder_7b_hf \ |
|
--init_model "andrewzh/Absolute_Zero_Reasoner-Coder-7b" \ |
|
--template azr \ |
|
--tp_size 1 \ |
|
--temperature 0 \ |
|
--top_p 0.95 \ |
|
--max_tokens 16000 \ |
|
--benchmarks aime24,aime25,amc23,math500,olympiadbench,minerva_math \ |
|
--n_sampling 1 \ |
|
--just_wandb false \ |
|
--seed 42 |
|
|
|
|
|
echo "=== Evaluating AZR Base 7B ===" |
|
bash eval_math_nodes.sh \ |
|
--run_name azr_base_7b_hf \ |
|
--init_model "andrewzh2/Absolute_Zero_Reasoner-Base-7b" \ |
|
--template azr \ |
|
--tp_size 1 \ |
|
--temperature 0 \ |
|
--top_p 0.95 \ |
|
--max_tokens 16000 \ |
|
--benchmarks aime24,aime25,amc23,math500,olympiadbench,minerva_math \ |
|
--n_sampling 1 \ |
|
--just_wandb false \ |
|
--seed 42 |
|
|
|
echo "=== All evaluations completed! ===" |