{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.12, "eval_steps": 20, "global_step": 1140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 2.9520000000000002e-05, "loss": 0.675, "step": 20 }, { "epoch": 0.32, "learning_rate": 2.904e-05, "loss": 0.6234, "step": 40 }, { "epoch": 0.48, "learning_rate": 2.856e-05, "loss": 0.4842, "step": 60 }, { "epoch": 0.64, "learning_rate": 2.8080000000000002e-05, "loss": 0.3626, "step": 80 }, { "epoch": 0.8, "learning_rate": 2.7600000000000003e-05, "loss": 0.3259, "step": 100 }, { "epoch": 0.96, "learning_rate": 2.712e-05, "loss": 0.2786, "step": 120 }, { "epoch": 1.0, "eval_accuracy": 0.6135040521621704, "eval_loss": 1.1749801635742188, "eval_runtime": 4843.6207, "eval_samples_per_second": 0.887, "eval_steps_per_second": 0.028, "step": 125 }, { "epoch": 1.12, "learning_rate": 2.6640000000000002e-05, "loss": 0.231, "step": 140 }, { "epoch": 1.28, "learning_rate": 2.616e-05, "loss": 0.2978, "step": 160 }, { "epoch": 1.44, "learning_rate": 2.568e-05, "loss": 0.2383, "step": 180 }, { "epoch": 1.6, "learning_rate": 2.52e-05, "loss": 0.2364, "step": 200 }, { "epoch": 1.76, "learning_rate": 2.472e-05, "loss": 0.2186, "step": 220 }, { "epoch": 1.92, "learning_rate": 2.4240000000000002e-05, "loss": 0.1869, "step": 240 }, { "epoch": 2.0, "eval_accuracy": 0.6817229390144348, "eval_loss": 0.9132505655288696, "eval_runtime": 4886.8402, "eval_samples_per_second": 0.879, "eval_steps_per_second": 0.028, "step": 250 }, { "epoch": 2.08, "learning_rate": 2.3760000000000003e-05, "loss": 0.1656, "step": 260 }, { "epoch": 2.24, "learning_rate": 2.328e-05, "loss": 0.1846, "step": 280 }, { "epoch": 2.4, "learning_rate": 2.2800000000000002e-05, "loss": 0.1609, "step": 300 }, { "epoch": 2.56, "learning_rate": 2.232e-05, "loss": 0.1497, "step": 320 }, { "epoch": 2.72, "learning_rate": 2.184e-05, "loss": 0.1564, "step": 340 }, { "epoch": 2.88, "learning_rate": 2.136e-05, "loss": 0.1409, "step": 360 }, { "epoch": 3.0, "eval_accuracy": 0.6696158051490784, "eval_loss": 0.9002014398574829, "eval_runtime": 4890.7807, "eval_samples_per_second": 0.878, "eval_steps_per_second": 0.028, "step": 375 }, { "epoch": 3.04, "learning_rate": 2.088e-05, "loss": 0.1441, "step": 380 }, { "epoch": 3.2, "learning_rate": 2.04e-05, "loss": 0.1179, "step": 400 }, { "epoch": 3.36, "learning_rate": 1.9920000000000002e-05, "loss": 0.1467, "step": 420 }, { "epoch": 3.52, "learning_rate": 1.944e-05, "loss": 0.1311, "step": 440 }, { "epoch": 3.68, "learning_rate": 1.896e-05, "loss": 0.1261, "step": 460 }, { "epoch": 3.84, "learning_rate": 1.848e-05, "loss": 0.1533, "step": 480 }, { "epoch": 4.0, "learning_rate": 1.8e-05, "loss": 0.1296, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.7143189907073975, "eval_loss": 0.8478733897209167, "eval_runtime": 4805.5106, "eval_samples_per_second": 0.894, "eval_steps_per_second": 0.028, "step": 500 }, { "epoch": 4.16, "learning_rate": 1.7519999999999998e-05, "loss": 0.1276, "step": 520 }, { "epoch": 4.32, "learning_rate": 1.704e-05, "loss": 0.1259, "step": 540 }, { "epoch": 4.48, "learning_rate": 1.656e-05, "loss": 0.1175, "step": 560 }, { "epoch": 4.64, "learning_rate": 1.6080000000000002e-05, "loss": 0.1392, "step": 580 }, { "epoch": 4.8, "learning_rate": 1.56e-05, "loss": 0.113, "step": 600 }, { "epoch": 4.96, "learning_rate": 1.5120000000000001e-05, "loss": 0.1139, "step": 620 }, { "epoch": 5.0, "eval_accuracy": 0.7427241206169128, "eval_loss": 0.8348045349121094, "eval_runtime": 4819.2179, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.028, "step": 625 }, { "epoch": 5.12, "learning_rate": 1.464e-05, "loss": 0.1246, "step": 640 }, { "epoch": 5.28, "learning_rate": 1.416e-05, "loss": 0.1313, "step": 660 }, { "epoch": 5.44, "learning_rate": 1.3680000000000001e-05, "loss": 0.1175, "step": 680 }, { "epoch": 5.6, "learning_rate": 1.32e-05, "loss": 0.1205, "step": 700 }, { "epoch": 5.76, "learning_rate": 1.272e-05, "loss": 0.1015, "step": 720 }, { "epoch": 5.92, "learning_rate": 1.224e-05, "loss": 0.0941, "step": 740 }, { "epoch": 6.0, "eval_accuracy": 0.7427241206169128, "eval_loss": 0.830384373664856, "eval_runtime": 4804.5403, "eval_samples_per_second": 0.894, "eval_steps_per_second": 0.028, "step": 750 }, { "epoch": 6.08, "learning_rate": 1.1760000000000001e-05, "loss": 0.1224, "step": 760 }, { "epoch": 6.24, "learning_rate": 1.128e-05, "loss": 0.105, "step": 780 }, { "epoch": 6.4, "learning_rate": 1.08e-05, "loss": 0.122, "step": 800 }, { "epoch": 6.56, "learning_rate": 1.032e-05, "loss": 0.1044, "step": 820 }, { "epoch": 6.72, "learning_rate": 9.84e-06, "loss": 0.1077, "step": 840 }, { "epoch": 6.88, "learning_rate": 9.36e-06, "loss": 0.1075, "step": 860 }, { "epoch": 7.0, "eval_accuracy": 0.6938300132751465, "eval_loss": 0.8423438668251038, "eval_runtime": 4824.348, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.028, "step": 875 }, { "epoch": 7.04, "learning_rate": 8.88e-06, "loss": 0.0951, "step": 880 }, { "epoch": 7.2, "learning_rate": 8.400000000000001e-06, "loss": 0.1229, "step": 900 }, { "epoch": 7.36, "learning_rate": 7.92e-06, "loss": 0.1171, "step": 920 }, { "epoch": 7.52, "learning_rate": 7.44e-06, "loss": 0.0955, "step": 940 }, { "epoch": 7.68, "learning_rate": 6.96e-06, "loss": 0.1139, "step": 960 }, { "epoch": 7.84, "learning_rate": 6.48e-06, "loss": 0.1051, "step": 980 }, { "epoch": 8.0, "learning_rate": 6e-06, "loss": 0.1011, "step": 1000 }, { "epoch": 8.0, "eval_accuracy": 0.7236321568489075, "eval_loss": 0.8057857155799866, "eval_runtime": 4915.4234, "eval_samples_per_second": 0.874, "eval_steps_per_second": 0.027, "step": 1000 }, { "epoch": 8.16, "learning_rate": 5.52e-06, "loss": 0.0976, "step": 1020 }, { "epoch": 8.32, "learning_rate": 5.04e-06, "loss": 0.1008, "step": 1040 }, { "epoch": 8.48, "learning_rate": 4.56e-06, "loss": 0.1066, "step": 1060 }, { "epoch": 8.64, "learning_rate": 4.080000000000001e-06, "loss": 0.1038, "step": 1080 }, { "epoch": 8.8, "learning_rate": 3.6e-06, "loss": 0.1044, "step": 1100 }, { "epoch": 8.96, "learning_rate": 3.1199999999999998e-06, "loss": 0.1015, "step": 1120 }, { "epoch": 9.0, "eval_accuracy": 0.7201396822929382, "eval_loss": 0.7934114336967468, "eval_runtime": 4842.9757, "eval_samples_per_second": 0.887, "eval_steps_per_second": 0.028, "step": 1125 }, { "epoch": 9.12, "learning_rate": 2.6399999999999997e-06, "loss": 0.1091, "step": 1140 } ], "logging_steps": 20, "max_steps": 1250, "num_train_epochs": 10, "save_steps": 20, "total_flos": 4.43240458915968e+19, "trial_name": null, "trial_params": null }