{ "best_metric": 0.6354775828460039, "best_model_checkpoint": "brand-safety-model\\checkpoint-325", "epoch": 9.0, "eval_steps": 500, "global_step": 585, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7692307692307693, "grad_norm": 2.6331872940063477, "learning_rate": 2.7692307692307694e-05, "loss": 3.319, "step": 50 }, { "epoch": 1.0, "eval_accuracy": 0.42300194931773877, "eval_loss": 2.7462074756622314, "eval_runtime": 1.5522, "eval_samples_per_second": 330.495, "eval_steps_per_second": 10.952, "step": 65 }, { "epoch": 1.5384615384615383, "grad_norm": 3.3225834369659424, "learning_rate": 2.5384615384615386e-05, "loss": 2.6272, "step": 100 }, { "epoch": 2.0, "eval_accuracy": 0.5009746588693957, "eval_loss": 2.07950758934021, "eval_runtime": 1.523, "eval_samples_per_second": 336.824, "eval_steps_per_second": 11.162, "step": 130 }, { "epoch": 2.3076923076923075, "grad_norm": 3.2619457244873047, "learning_rate": 2.307692307692308e-05, "loss": 2.137, "step": 150 }, { "epoch": 3.0, "eval_accuracy": 0.5769980506822612, "eval_loss": 1.6682791709899902, "eval_runtime": 1.5108, "eval_samples_per_second": 339.548, "eval_steps_per_second": 11.252, "step": 195 }, { "epoch": 3.076923076923077, "grad_norm": 3.448869228363037, "learning_rate": 2.076923076923077e-05, "loss": 1.7515, "step": 200 }, { "epoch": 3.8461538461538463, "grad_norm": 3.639822483062744, "learning_rate": 1.8461538461538465e-05, "loss": 1.469, "step": 250 }, { "epoch": 4.0, "eval_accuracy": 0.6101364522417154, "eval_loss": 1.472076654434204, "eval_runtime": 1.5018, "eval_samples_per_second": 341.601, "eval_steps_per_second": 11.32, "step": 260 }, { "epoch": 4.615384615384615, "grad_norm": 4.488701820373535, "learning_rate": 1.6153846153846154e-05, "loss": 1.2405, "step": 300 }, { "epoch": 5.0, "eval_accuracy": 0.6354775828460039, "eval_loss": 1.34968101978302, "eval_runtime": 1.5205, "eval_samples_per_second": 337.4, "eval_steps_per_second": 11.181, "step": 325 }, { "epoch": 5.384615384615385, "grad_norm": 3.624941825866699, "learning_rate": 1.3846153846153847e-05, "loss": 1.1023, "step": 350 }, { "epoch": 6.0, "eval_accuracy": 0.6335282651072125, "eval_loss": 1.2936445474624634, "eval_runtime": 1.5136, "eval_samples_per_second": 338.922, "eval_steps_per_second": 11.231, "step": 390 }, { "epoch": 6.153846153846154, "grad_norm": 4.275500774383545, "learning_rate": 1.153846153846154e-05, "loss": 1.0241, "step": 400 }, { "epoch": 6.923076923076923, "grad_norm": 3.8007335662841797, "learning_rate": 9.230769230769232e-06, "loss": 0.9206, "step": 450 }, { "epoch": 7.0, "eval_accuracy": 0.631578947368421, "eval_loss": 1.2855250835418701, "eval_runtime": 1.4953, "eval_samples_per_second": 343.071, "eval_steps_per_second": 11.369, "step": 455 }, { "epoch": 7.6923076923076925, "grad_norm": 4.490166187286377, "learning_rate": 6.923076923076923e-06, "loss": 0.8374, "step": 500 }, { "epoch": 8.0, "eval_accuracy": 0.6354775828460039, "eval_loss": 1.2579463720321655, "eval_runtime": 1.5008, "eval_samples_per_second": 341.827, "eval_steps_per_second": 11.328, "step": 520 }, { "epoch": 8.461538461538462, "grad_norm": 5.005167484283447, "learning_rate": 4.615384615384616e-06, "loss": 0.794, "step": 550 }, { "epoch": 9.0, "eval_accuracy": 0.6335282651072125, "eval_loss": 1.2524960041046143, "eval_runtime": 1.4948, "eval_samples_per_second": 343.181, "eval_steps_per_second": 11.372, "step": 585 } ], "logging_steps": 50, "max_steps": 650, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 728684192713248.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }