{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.75609756097561, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3902439024390244, "grad_norm": 0.9049399495124817, "learning_rate": 4.347826086956522e-05, "loss": 2.2562, "step": 20 }, { "epoch": 0.7804878048780488, "grad_norm": 0.8101117610931396, "learning_rate": 4.88544474393531e-05, "loss": 2.1366, "step": 40 }, { "epoch": 1.170731707317073, "grad_norm": 0.6929331421852112, "learning_rate": 4.750673854447439e-05, "loss": 1.9538, "step": 60 }, { "epoch": 1.5609756097560976, "grad_norm": 0.754576563835144, "learning_rate": 4.615902964959569e-05, "loss": 1.9039, "step": 80 }, { "epoch": 1.951219512195122, "grad_norm": 0.8291248679161072, "learning_rate": 4.4811320754716985e-05, "loss": 1.8062, "step": 100 }, { "epoch": 2.341463414634146, "grad_norm": 0.9486225247383118, "learning_rate": 4.3463611859838275e-05, "loss": 1.7765, "step": 120 }, { "epoch": 2.7317073170731705, "grad_norm": 1.1279809474945068, "learning_rate": 4.211590296495957e-05, "loss": 1.7288, "step": 140 }, { "epoch": 3.1219512195121952, "grad_norm": 1.1358485221862793, "learning_rate": 4.076819407008086e-05, "loss": 1.7136, "step": 160 }, { "epoch": 3.5121951219512195, "grad_norm": 1.2939114570617676, "learning_rate": 3.942048517520216e-05, "loss": 1.698, "step": 180 }, { "epoch": 3.902439024390244, "grad_norm": 1.3795692920684814, "learning_rate": 3.807277628032345e-05, "loss": 1.668, "step": 200 }, { "epoch": 4.2926829268292686, "grad_norm": 1.2294673919677734, "learning_rate": 3.672506738544474e-05, "loss": 1.6258, "step": 220 }, { "epoch": 4.682926829268292, "grad_norm": 1.4048880338668823, "learning_rate": 3.537735849056604e-05, "loss": 1.6133, "step": 240 }, { "epoch": 5.073170731707317, "grad_norm": 1.1704691648483276, "learning_rate": 3.4029649595687336e-05, "loss": 1.6349, "step": 260 }, { "epoch": 5.463414634146342, "grad_norm": 1.4525257349014282, "learning_rate": 3.2681940700808625e-05, "loss": 1.5788, "step": 280 }, { "epoch": 5.853658536585366, "grad_norm": 1.5394439697265625, "learning_rate": 3.133423180592992e-05, "loss": 1.5827, "step": 300 }, { "epoch": 6.2439024390243905, "grad_norm": 1.5792720317840576, "learning_rate": 2.998652291105121e-05, "loss": 1.5451, "step": 320 }, { "epoch": 6.634146341463414, "grad_norm": 1.6444499492645264, "learning_rate": 2.863881401617251e-05, "loss": 1.5757, "step": 340 }, { "epoch": 7.024390243902439, "grad_norm": 1.5749609470367432, "learning_rate": 2.7291105121293804e-05, "loss": 1.5414, "step": 360 }, { "epoch": 7.414634146341464, "grad_norm": 1.6040682792663574, "learning_rate": 2.5943396226415094e-05, "loss": 1.5311, "step": 380 }, { "epoch": 7.804878048780488, "grad_norm": 1.7397934198379517, "learning_rate": 2.459568733153639e-05, "loss": 1.534, "step": 400 }, { "epoch": 8.195121951219512, "grad_norm": 1.9339927434921265, "learning_rate": 2.3247978436657683e-05, "loss": 1.5277, "step": 420 }, { "epoch": 8.585365853658537, "grad_norm": 1.8686648607254028, "learning_rate": 2.1900269541778976e-05, "loss": 1.496, "step": 440 }, { "epoch": 8.975609756097562, "grad_norm": 1.8943285942077637, "learning_rate": 2.055256064690027e-05, "loss": 1.4955, "step": 460 }, { "epoch": 9.365853658536585, "grad_norm": 2.715195894241333, "learning_rate": 1.9204851752021562e-05, "loss": 1.4837, "step": 480 }, { "epoch": 9.75609756097561, "grad_norm": 2.464538097381592, "learning_rate": 1.785714285714286e-05, "loss": 1.5082, "step": 500 } ], "logging_steps": 20, "max_steps": 765, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.137043399013171e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }