{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 155, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.35, "grad_norm": 3.5674822330474854, "learning_rate": 0.0001375, "loss": 3.066, "step": 11 }, { "epoch": 0.71, "grad_norm": 2.487783670425415, "learning_rate": 0.00019136690647482017, "loss": 2.2172, "step": 22 }, { "epoch": 1.06, "grad_norm": 3.1076672077178955, "learning_rate": 0.00017553956834532374, "loss": 1.4606, "step": 33 }, { "epoch": 1.42, "grad_norm": 2.9141297340393066, "learning_rate": 0.00015971223021582736, "loss": 1.1038, "step": 44 }, { "epoch": 1.77, "grad_norm": 2.831645965576172, "learning_rate": 0.00014388489208633093, "loss": 1.0646, "step": 55 }, { "epoch": 2.13, "grad_norm": 2.672914743423462, "learning_rate": 0.00012805755395683453, "loss": 1.0186, "step": 66 }, { "epoch": 2.48, "grad_norm": 2.9274446964263916, "learning_rate": 0.00011223021582733813, "loss": 0.8428, "step": 77 }, { "epoch": 2.84, "grad_norm": 3.164165496826172, "learning_rate": 9.640287769784174e-05, "loss": 0.7633, "step": 88 }, { "epoch": 3.19, "grad_norm": 4.559382438659668, "learning_rate": 8.057553956834533e-05, "loss": 0.6851, "step": 99 }, { "epoch": 3.55, "grad_norm": 2.335391044616699, "learning_rate": 6.474820143884892e-05, "loss": 0.6521, "step": 110 }, { "epoch": 3.9, "grad_norm": 3.2239022254943848, "learning_rate": 4.892086330935252e-05, "loss": 0.706, "step": 121 }, { "epoch": 4.26, "grad_norm": 2.8791332244873047, "learning_rate": 3.3093525179856116e-05, "loss": 0.6047, "step": 132 }, { "epoch": 4.61, "grad_norm": 3.5159878730773926, "learning_rate": 1.7266187050359716e-05, "loss": 0.4889, "step": 143 }, { "epoch": 4.97, "grad_norm": 3.642343044281006, "learning_rate": 1.4388489208633094e-06, "loss": 0.585, "step": 154 } ], "logging_steps": 11, "max_steps": 155, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 6751637581824000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }