{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 39, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02564102564102564, "grad_norm": 0.0, "learning_rate": 0, "loss": 8.3281, "step": 1 }, { "epoch": 0.05128205128205128, "grad_norm": 87.25187683105469, "learning_rate": 0.0, "loss": 10.0781, "step": 2 }, { "epoch": 0.07692307692307693, "grad_norm": 87.25187683105469, "learning_rate": 0.0, "loss": 3.9609, "step": 3 }, { "epoch": 0.10256410256410256, "grad_norm": 53.32879638671875, "learning_rate": 1.5051499783199057e-05, "loss": 5.457, "step": 4 }, { "epoch": 0.1282051282051282, "grad_norm": 106.99324798583984, "learning_rate": 2.385606273598312e-05, "loss": 2.8203, "step": 5 }, { "epoch": 0.15384615384615385, "grad_norm": 33.04688262939453, "learning_rate": 3.0102999566398115e-05, "loss": 1.75, "step": 6 }, { "epoch": 0.1794871794871795, "grad_norm": 63.9564208984375, "learning_rate": 3.4948500216800935e-05, "loss": 2.418, "step": 7 }, { "epoch": 0.20512820512820512, "grad_norm": 60.14704513549805, "learning_rate": 3.890756251918218e-05, "loss": 5.6406, "step": 8 }, { "epoch": 0.23076923076923078, "grad_norm": 77.91315460205078, "learning_rate": 4.2254902000712836e-05, "loss": 5.7148, "step": 9 }, { "epoch": 0.2564102564102564, "grad_norm": 82.38955688476562, "learning_rate": 4.515449934959717e-05, "loss": 7.3984, "step": 10 }, { "epoch": 0.28205128205128205, "grad_norm": 68.61566925048828, "learning_rate": 4.771212547196624e-05, "loss": 4.8672, "step": 11 }, { "epoch": 0.3076923076923077, "grad_norm": 93.55372619628906, "learning_rate": 4.9999999999999996e-05, "loss": 8.6016, "step": 12 }, { "epoch": 0.3333333333333333, "grad_norm": 38.4390754699707, "learning_rate": 5.2069634257911246e-05, "loss": 1.7266, "step": 13 }, { "epoch": 0.358974358974359, "grad_norm": 17.9733829498291, "learning_rate": 5.3959062302381234e-05, "loss": 2.2324, "step": 14 }, { "epoch": 0.38461538461538464, "grad_norm": 39.2911262512207, "learning_rate": 5.5697167615341825e-05, "loss": 2.7305, "step": 15 }, { "epoch": 0.41025641025641024, "grad_norm": 34.880802154541016, "learning_rate": 5.730640178391189e-05, "loss": 4.9688, "step": 16 }, { "epoch": 0.4358974358974359, "grad_norm": 14.430876731872559, "learning_rate": 5.880456295278406e-05, "loss": 2.082, "step": 17 }, { "epoch": 0.46153846153846156, "grad_norm": 18.182239532470703, "learning_rate": 6.020599913279623e-05, "loss": 1.2168, "step": 18 }, { "epoch": 0.48717948717948717, "grad_norm": 19.858905792236328, "learning_rate": 6.15224460689137e-05, "loss": 2.2441, "step": 19 }, { "epoch": 0.5128205128205128, "grad_norm": 30.2137451171875, "learning_rate": 6.276362525516529e-05, "loss": 3.3027, "step": 20 }, { "epoch": 0.5384615384615384, "grad_norm": 25.4908504486084, "learning_rate": 6.393768004764143e-05, "loss": 2.8242, "step": 21 }, { "epoch": 0.5641025641025641, "grad_norm": 16.74290657043457, "learning_rate": 6.505149978319905e-05, "loss": 2.4785, "step": 22 }, { "epoch": 0.5897435897435898, "grad_norm": 75.4117202758789, "learning_rate": 6.611096473669595e-05, "loss": 3.0977, "step": 23 }, { "epoch": 0.6153846153846154, "grad_norm": 29.400798797607422, "learning_rate": 6.712113404111031e-05, "loss": 2.5781, "step": 24 }, { "epoch": 0.6410256410256411, "grad_norm": 44.68231964111328, "learning_rate": 6.808639180087963e-05, "loss": 2.293, "step": 25 }, { "epoch": 0.6666666666666666, "grad_norm": 9.051566123962402, "learning_rate": 6.90105620855803e-05, "loss": 0.894, "step": 26 }, { "epoch": 0.6923076923076923, "grad_norm": 13.536922454833984, "learning_rate": 6.989700043360187e-05, "loss": 1.8252, "step": 27 }, { "epoch": 0.717948717948718, "grad_norm": 21.84697914123535, "learning_rate": 7.074866739854089e-05, "loss": 2.3457, "step": 28 }, { "epoch": 0.7435897435897436, "grad_norm": 13.119584083557129, "learning_rate": 7.156818820794936e-05, "loss": 2.248, "step": 29 }, { "epoch": 0.7692307692307693, "grad_norm": 8.631421089172363, "learning_rate": 7.235790156711095e-05, "loss": 0.7803, "step": 30 }, { "epoch": 0.7948717948717948, "grad_norm": 23.562847137451172, "learning_rate": 7.31198998949478e-05, "loss": 2.8242, "step": 31 }, { "epoch": 0.8205128205128205, "grad_norm": 17.66178321838379, "learning_rate": 7.385606273598311e-05, "loss": 1.0996, "step": 32 }, { "epoch": 0.8461538461538461, "grad_norm": 12.418828010559082, "learning_rate": 7.456808469171363e-05, "loss": 2.084, "step": 33 }, { "epoch": 0.8717948717948718, "grad_norm": 6.5270867347717285, "learning_rate": 7.52574989159953e-05, "loss": 0.7832, "step": 34 }, { "epoch": 0.8974358974358975, "grad_norm": 7.1268720626831055, "learning_rate": 7.592569699389437e-05, "loss": 0.5742, "step": 35 }, { "epoch": 0.9230769230769231, "grad_norm": 17.19321632385254, "learning_rate": 7.657394585211275e-05, "loss": 1.3506, "step": 36 }, { "epoch": 0.9487179487179487, "grad_norm": 8.55725383758545, "learning_rate": 7.720340221751377e-05, "loss": 1.1533, "step": 37 }, { "epoch": 0.9743589743589743, "grad_norm": 17.66382598876953, "learning_rate": 7.781512503836436e-05, "loss": 1.2773, "step": 38 }, { "epoch": 1.0, "grad_norm": 7.638050556182861, "learning_rate": 7.841008620334975e-05, "loss": 0.3796, "step": 39 } ], "logging_steps": 1.0, "max_steps": 39, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }