{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7584, "global_step": 1896, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1.9624e-05, "loss": 0.9388, "step": 50 }, { "epoch": 0.04, "learning_rate": 1.9232e-05, "loss": 0.9522, "step": 100 }, { "epoch": 0.06, "learning_rate": 1.8832000000000002e-05, "loss": 0.8005, "step": 150 }, { "epoch": 0.08, "learning_rate": 1.8432000000000002e-05, "loss": 0.6908, "step": 200 }, { "epoch": 0.1, "learning_rate": 1.8032e-05, "loss": 0.7927, "step": 250 }, { "epoch": 0.1, "eval_accuracy": 0.6659167604049494, "eval_loss": 0.7351371645927429, "eval_runtime": 434.6411, "eval_samples_per_second": 2.045, "eval_steps_per_second": 2.045, "step": 250 }, { "epoch": 0.12, "learning_rate": 1.764e-05, "loss": 0.5962, "step": 300 }, { "epoch": 0.14, "learning_rate": 1.724e-05, "loss": 0.6989, "step": 350 }, { "epoch": 0.16, "learning_rate": 1.684e-05, "loss": 0.7051, "step": 400 }, { "epoch": 0.18, "learning_rate": 1.6440000000000002e-05, "loss": 0.6664, "step": 450 }, { "epoch": 0.2, "learning_rate": 1.6040000000000002e-05, "loss": 0.6547, "step": 500 }, { "epoch": 0.2, "eval_accuracy": 0.7041619797525309, "eval_loss": 0.6934666037559509, "eval_runtime": 436.2176, "eval_samples_per_second": 2.038, "eval_steps_per_second": 2.038, "step": 500 }, { "epoch": 0.22, "learning_rate": 1.5640000000000003e-05, "loss": 0.665, "step": 550 }, { "epoch": 0.24, "learning_rate": 1.5240000000000001e-05, "loss": 0.654, "step": 600 }, { "epoch": 0.26, "learning_rate": 1.4840000000000002e-05, "loss": 0.6714, "step": 650 }, { "epoch": 0.28, "learning_rate": 1.444e-05, "loss": 0.7395, "step": 700 }, { "epoch": 0.3, "learning_rate": 1.4040000000000001e-05, "loss": 0.5393, "step": 750 }, { "epoch": 0.3, "eval_accuracy": 0.7142857142857143, "eval_loss": 0.621578574180603, "eval_runtime": 436.3187, "eval_samples_per_second": 2.038, "eval_steps_per_second": 2.038, "step": 750 }, { "epoch": 0.32, "learning_rate": 1.3640000000000002e-05, "loss": 0.5185, "step": 800 }, { "epoch": 0.34, "learning_rate": 1.3240000000000002e-05, "loss": 0.6009, "step": 850 }, { "epoch": 0.36, "learning_rate": 1.284e-05, "loss": 0.6588, "step": 900 }, { "epoch": 0.38, "learning_rate": 1.2440000000000001e-05, "loss": 0.6022, "step": 950 }, { "epoch": 0.4, "learning_rate": 1.204e-05, "loss": 0.7316, "step": 1000 }, { "epoch": 0.4, "eval_accuracy": 0.734533183352081, "eval_loss": 0.5916205644607544, "eval_runtime": 436.6514, "eval_samples_per_second": 2.036, "eval_steps_per_second": 2.036, "step": 1000 }, { "epoch": 0.42, "learning_rate": 1.164e-05, "loss": 0.6086, "step": 1050 }, { "epoch": 0.44, "learning_rate": 1.1240000000000002e-05, "loss": 0.5806, "step": 1100 }, { "epoch": 0.46, "learning_rate": 1.0840000000000001e-05, "loss": 0.5992, "step": 1150 }, { "epoch": 0.48, "learning_rate": 1.0440000000000002e-05, "loss": 0.5807, "step": 1200 }, { "epoch": 0.5, "learning_rate": 1.004e-05, "loss": 0.5667, "step": 1250 }, { "epoch": 0.5, "eval_accuracy": 0.734533183352081, "eval_loss": 0.5785398483276367, "eval_runtime": 436.2096, "eval_samples_per_second": 2.038, "eval_steps_per_second": 2.038, "step": 1250 }, { "epoch": 0.52, "learning_rate": 9.640000000000001e-06, "loss": 0.4989, "step": 1300 }, { "epoch": 0.54, "learning_rate": 9.240000000000001e-06, "loss": 0.6015, "step": 1350 }, { "epoch": 0.56, "learning_rate": 8.848e-06, "loss": 0.5728, "step": 1400 }, { "epoch": 0.58, "learning_rate": 8.448000000000001e-06, "loss": 0.6285, "step": 1450 }, { "epoch": 0.6, "learning_rate": 8.048e-06, "loss": 0.498, "step": 1500 }, { "epoch": 0.6, "eval_accuracy": 0.7435320584926884, "eval_loss": 0.5632913708686829, "eval_runtime": 436.3374, "eval_samples_per_second": 2.037, "eval_steps_per_second": 2.037, "step": 1500 }, { "epoch": 0.62, "learning_rate": 7.648e-06, "loss": 0.5134, "step": 1550 }, { "epoch": 0.64, "learning_rate": 7.248000000000001e-06, "loss": 0.4582, "step": 1600 }, { "epoch": 0.66, "learning_rate": 6.848e-06, "loss": 0.534, "step": 1650 }, { "epoch": 0.68, "learning_rate": 6.448000000000001e-06, "loss": 0.6765, "step": 1700 }, { "epoch": 0.7, "learning_rate": 6.048e-06, "loss": 0.6598, "step": 1750 }, { "epoch": 0.7, "eval_accuracy": 0.7457817772778402, "eval_loss": 0.565944254398346, "eval_runtime": 435.7582, "eval_samples_per_second": 2.04, "eval_steps_per_second": 2.04, "step": 1750 }, { "epoch": 0.72, "learning_rate": 5.648e-06, "loss": 0.5006, "step": 1800 }, { "epoch": 0.74, "learning_rate": 5.248000000000001e-06, "loss": 0.579, "step": 1850 } ], "max_steps": 2500, "num_train_epochs": 1, "total_flos": 0.0, "trial_name": null, "trial_params": null }