[ { "model_name": "google/gemini-1.5-flash", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.749, 0.748, 0.763, 0.633, 0.883, 0.862, 0.676 ], "GPT-4o": [ 0.705, 0.701, 0.716, 0.579, 0.84, 0.794, 0.651 ], "Gemini-1.5-Pro": [ 0.749, 0.739, 0.772, 0.677, 0.915, 0.948, 0.551 ], "Llama-3.1-70B-Instruct": [ 0.83, 0.806, 0.811, 0.67, 0.92, 0.823, 0.833 ], "Qwen2.5-72B-Instruct": [ 0.712, 0.712, 0.731, 0.606, 0.853, 0.847, 0.619 ] }, { "model_name": "google/gemini-1.5-pro", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.807, 0.807, 0.809, 0.775, 0.845, 0.852, 0.764 ], "GPT-4o": [ 0.782, 0.782, 0.783, 0.764, 0.802, 0.805, 0.761 ], "Gemini-1.5-Pro": [ 0.815, 0.795, 0.802, 0.81, 0.829, 0.916, 0.654 ], "Llama-3.1-70B-Instruct": [ 0.852, 0.836, 0.837, 0.753, 0.908, 0.82, 0.868 ], "Qwen2.5-72B-Instruct": [ 0.779, 0.777, 0.78, 0.755, 0.81, 0.842, 0.712 ] }, { "model_name": "gpt-4o-mini-2024-07-18", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.725, 0.723, 0.743, 0.59, 0.881, 0.851, 0.651 ], "GPT-4o": [ 0.708, 0.704, 0.722, 0.564, 0.863, 0.814, 0.649 ], "Gemini-1.5-Pro": [ 0.705, 0.696, 0.733, 0.63, 0.878, 0.922, 0.507 ], "Llama-3.1-70B-Instruct": [ 0.793, 0.762, 0.768, 0.598, 0.902, 0.773, 0.801 ], "Qwen2.5-72B-Instruct": [ 0.694, 0.693, 0.721, 0.561, 0.871, 0.853, 0.598 ] }, { "model_name": "gpt-4o-2024-08-06", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.774, 0.774, 0.781, 0.701, 0.859, 0.851, 0.713 ], "GPT-4o": [ 0.775, 0.775, 0.778, 0.721, 0.832, 0.821, 0.736 ], "Gemini-1.5-Pro": [ 0.742, 0.726, 0.746, 0.704, 0.829, 0.905, 0.548 ], "Llama-3.1-70B-Instruct": [ 0.838, 0.818, 0.82, 0.711, 0.908, 0.812, 0.849 ], "Qwen2.5-72B-Instruct": [ 0.742, 0.742, 0.752, 0.671, 0.836, 0.846, 0.655 ] }, { "model_name": "meta-llama/Llama-3.1-8B-Instruct", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.52, 0.52, 0.523, 0.487, 0.559, 0.56, 0.485 ], "GPT-4o": [ 0.513, 0.512, 0.515, 0.464, 0.565, 0.533, 0.497 ], "Gemini-1.5-Pro": [ 0.572, 0.555, 0.579, 0.55, 0.622, 0.77, 0.375 ], "Llama-3.1-70B-Instruct": [ 0.509, 0.492, 0.497, 0.454, 0.54, 0.355, 0.639 ], "Qwen2.5-72B-Instruct": [ 0.487, 0.487, 0.493, 0.452, 0.534, 0.565, 0.422 ] }, { "model_name": "meta-llama/Llama-3.1-70B-Instruct", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.612, 0.61, 0.61, 0.625, 0.596, 0.641, 0.579 ], "GPT-4o": [ 0.694, 0.694, 0.694, 0.671, 0.718, 0.718, 0.671 ], "Gemini-1.5-Pro": [ 0.613, 0.588, 0.603, 0.614, 0.61, 0.784, 0.407 ], "Llama-3.1-70B-Instruct": [ 0.576, 0.57, 0.586, 0.639, 0.54, 0.437, 0.729 ], "Qwen2.5-72B-Instruct": [ 0.565, 0.56, 0.56, 0.587, 0.534, 0.628, 0.492 ] }, { "model_name": "Qwen/Qwen2.5-7B-Instruct", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.699, 0.693, 0.697, 0.787, 0.598, 0.693, 0.708 ], "GPT-4o": [ 0.69, 0.683, 0.693, 0.814, 0.557, 0.663, 0.737 ], "Gemini-1.5-Pro": [ 0.734, 0.691, 0.692, 0.794, 0.598, 0.82, 0.557 ], "Llama-3.1-70B-Instruct": [ 0.731, 0.723, 0.733, 0.784, 0.701, 0.594, 0.853 ], "Qwen2.5-72B-Instruct": [ 0.642, 0.624, 0.628, 0.755, 0.491, 0.665, 0.6 ] }, { "model_name": "Qwen/Qwen2.5-72B-Instruct", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.757, 0.756, 0.756, 0.771, 0.742, 0.775, 0.737 ], "GPT-4o": [ 0.738, 0.737, 0.738, 0.764, 0.71, 0.738, 0.738 ], "Gemini-1.5-Pro": [ 0.771, 0.742, 0.745, 0.794, 0.72, 0.867, 0.602 ], "Llama-3.1-70B-Instruct": [ 0.808, 0.793, 0.793, 0.753, 0.839, 0.723, 0.859 ], "Qwen2.5-72B-Instruct": [ 0.712, 0.705, 0.705, 0.761, 0.647, 0.742, 0.67 ] }, { "model_name": "Qwen/Qwen2.5-Math-7B-Instruct", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.633, 0.619, 0.628, 0.766, 0.479, 0.629, 0.639 ], "GPT-4o": [ 0.587, 0.572, 0.586, 0.75, 0.412, 0.577, 0.607 ], "Gemini-1.5-Pro": [ 0.694, 0.638, 0.638, 0.778, 0.5, 0.782, 0.494 ], "Llama-3.1-70B-Instruct": [ 0.638, 0.638, 0.684, 0.856, 0.517, 0.497, 0.865 ], "Qwen2.5-72B-Instruct": [ 0.613, 0.597, 0.598, 0.71, 0.483, 0.647, 0.554 ] }, { "model_name": "Qwen/Qwen2.5-Math-72B-Instruct", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.744, 0.74, 0.742, 0.809, 0.668, 0.738, 0.752 ], "GPT-4o": [ 0.686, 0.682, 0.687, 0.779, 0.588, 0.669, 0.713 ], "Gemini-1.5-Pro": [ 0.797, 0.768, 0.77, 0.825, 0.732, 0.876, 0.645 ], "Llama-3.1-70B-Instruct": [ 0.782, 0.773, 0.779, 0.814, 0.764, 0.658, 0.881 ], "Qwen2.5-72B-Instruct": [ 0.708, 0.693, 0.698, 0.813, 0.569, 0.716, 0.695 ] }, { "model_name": "claude-sonnet-3-5", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.75, 0.748, 0.766, 0.625, 0.895, 0.873, 0.674 ], "GPT-4o": [ 0.727, 0.722, 0.742, 0.579, 0.885, 0.844, 0.663 ], "Gemini-1.5-Pro": [ 0.753, 0.738, 0.759, 0.709, 0.854, 0.918, 0.56 ], "Llama-3.1-70B-Instruct": [ 0.812, 0.779, 0.79, 0.598, 0.931, 0.829, 0.806 ], "Qwen2.5-72B-Instruct": [ 0.708, 0.708, 0.734, 0.581, 0.879, 0.865, 0.611 ] }, { "model_name": "mistralai/Ministral-8B-Instruct-2410", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.605, 0.605, 0.609, 0.559, 0.658, 0.654, 0.564 ], "GPT-4o": [ 0.631, 0.629, 0.637, 0.536, 0.733, 0.682, 0.596 ], "Gemini-1.5-Pro": [ 0.613, 0.583, 0.594, 0.63, 0.573, 0.773, 0.402 ], "Llama-3.1-70B-Instruct": [ 0.649, 0.631, 0.634, 0.598, 0.678, 0.509, 0.752 ], "Qwen2.5-72B-Instruct": [ 0.528, 0.528, 0.537, 0.471, 0.603, 0.613, 0.461 ] }, { "model_name": "mistralai/Mistral-Large-Instruct-2411", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.767, 0.766, 0.767, 0.757, 0.777, 0.797, 0.735 ], "GPT-4o": [ 0.76, 0.76, 0.76, 0.757, 0.763, 0.774, 0.746 ], "Gemini-1.5-Pro": [ 0.779, 0.75, 0.754, 0.799, 0.732, 0.873, 0.612 ], "Llama-3.1-70B-Instruct": [ 0.801, 0.786, 0.786, 0.753, 0.828, 0.709, 0.857 ], "Qwen2.5-72B-Instruct": [ 0.727, 0.725, 0.728, 0.71, 0.75, 0.791, 0.659 ] }, { "model_name": "gemini-2.0-flash-thinking-exp-01-21", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.812, 0.81, 0.816, 0.891, 0.732, 0.769, 0.871 ], "GPT-4o": [ 0.754, 0.743, 0.764, 0.917, 0.576, 0.702, 0.864 ], "Gemini-1.5-Pro": [ 0.87, 0.858, 0.86, 0.93, 0.769, 0.87, 0.87 ], "Llama-3.1-70B-Instruct": [ 0.855, 0.833, 0.834, 0.81, 0.875, 0.739, 0.913 ], "Qwen2.5-72B-Instruct": [ 0.768, 0.76, 0.766, 0.868, 0.645, 0.75, 0.8 ] }, { "model_name": "o1", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.895, 0.895, 0.895, 0.906, 0.884, 0.887, 0.904 ], "GPT-4o": [ 0.884, 0.884, 0.884, 0.889, 0.879, 0.889, 0.879 ], "Gemini-1.5-Pro": [ 0.913, 0.906, 0.907, 0.953, 0.846, 0.911, 0.917 ], "Llama-3.1-70B-Instruct": [ 0.942, 0.932, 0.932, 0.905, 0.958, 0.905, 0.958 ], "Qwen2.5-72B-Instruct": [ 0.841, 0.838, 0.839, 0.868, 0.806, 0.846, 0.833 ] }, { "model_name": "Qwen/QwQ-32B-Preview", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.833, 0.832, 0.838, 0.913, 0.754, 0.787, 0.897 ], "GPT-4o": [ 0.783, 0.78, 0.784, 0.861, 0.697, 0.756, 0.821 ], "Gemini-1.5-Pro": [ 0.826, 0.8, 0.814, 0.953, 0.615, 0.804, 0.889 ], "Llama-3.1-70B-Instruct": [ 0.855, 0.84, 0.848, 0.905, 0.833, 0.704, 0.952 ], "Qwen2.5-72B-Instruct": [ 0.87, 0.867, 0.868, 0.921, 0.806, 0.854, 0.893 ] }, { "model_name": "deepseek-ai/DeepSeek-R1", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.822, 0.822, 0.824, 0.768, 0.877, 0.862, 0.791 ], "GPT-4o": [ 0.797, 0.797, 0.803, 0.722, 0.879, 0.867, 0.744 ], "Gemini-1.5-Pro": [ 0.826, 0.82, 0.823, 0.814, 0.846, 0.897, 0.733 ], "Llama-3.1-70B-Instruct": [ 0.899, 0.882, 0.882, 0.857, 0.917, 0.818, 0.936 ], "Qwen2.5-72B-Instruct": [ 0.768, 0.768, 0.774, 0.711, 0.839, 0.844, 0.703 ] }, { "model_name": "o1-mini", "extract_model_name": "Qwen/Qwen2.5-72B-Instruct", "mu_math": [ 0.848, 0.848, 0.848, 0.833, 0.862, 0.858, 0.838 ], "GPT-4o": [ 0.812, 0.812, 0.813, 0.778, 0.848, 0.848, 0.778 ], "Gemini-1.5-Pro": [ 0.87, 0.862, 0.862, 0.884, 0.846, 0.905, 0.815 ], "Llama-3.1-70B-Instruct": [ 0.913, 0.897, 0.897, 0.857, 0.938, 0.857, 0.938 ], "Qwen2.5-72B-Instruct": [ 0.797, 0.795, 0.795, 0.816, 0.774, 0.816, 0.774 ] } ]