{ "best_metric": 0.6101364522417154, "best_model_checkpoint": "brand-safety-model\\checkpoint-260", "epoch": 4.0, "eval_steps": 500, "global_step": 260, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7692307692307693, "grad_norm": 2.6331872940063477, "learning_rate": 2.7692307692307694e-05, "loss": 3.319, "step": 50 }, { "epoch": 1.0, "eval_accuracy": 0.42300194931773877, "eval_loss": 2.7462074756622314, "eval_runtime": 1.5522, "eval_samples_per_second": 330.495, "eval_steps_per_second": 10.952, "step": 65 }, { "epoch": 1.5384615384615383, "grad_norm": 3.3225834369659424, "learning_rate": 2.5384615384615386e-05, "loss": 2.6272, "step": 100 }, { "epoch": 2.0, "eval_accuracy": 0.5009746588693957, "eval_loss": 2.07950758934021, "eval_runtime": 1.523, "eval_samples_per_second": 336.824, "eval_steps_per_second": 11.162, "step": 130 }, { "epoch": 2.3076923076923075, "grad_norm": 3.2619457244873047, "learning_rate": 2.307692307692308e-05, "loss": 2.137, "step": 150 }, { "epoch": 3.0, "eval_accuracy": 0.5769980506822612, "eval_loss": 1.6682791709899902, "eval_runtime": 1.5108, "eval_samples_per_second": 339.548, "eval_steps_per_second": 11.252, "step": 195 }, { "epoch": 3.076923076923077, "grad_norm": 3.448869228363037, "learning_rate": 2.076923076923077e-05, "loss": 1.7515, "step": 200 }, { "epoch": 3.8461538461538463, "grad_norm": 3.639822483062744, "learning_rate": 1.8461538461538465e-05, "loss": 1.469, "step": 250 }, { "epoch": 4.0, "eval_accuracy": 0.6101364522417154, "eval_loss": 1.472076654434204, "eval_runtime": 1.5018, "eval_samples_per_second": 341.601, "eval_steps_per_second": 11.32, "step": 260 } ], "logging_steps": 50, "max_steps": 650, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 325268996251104.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }