|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 39, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02564102564102564, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0, |
|
"loss": 8.3281, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05128205128205128, |
|
"grad_norm": 87.25187683105469, |
|
"learning_rate": 0.0, |
|
"loss": 10.0781, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"grad_norm": 87.25187683105469, |
|
"learning_rate": 0.0, |
|
"loss": 3.9609, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.10256410256410256, |
|
"grad_norm": 53.32879638671875, |
|
"learning_rate": 1.5051499783199057e-05, |
|
"loss": 5.457, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.1282051282051282, |
|
"grad_norm": 106.99324798583984, |
|
"learning_rate": 2.385606273598312e-05, |
|
"loss": 2.8203, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 33.04688262939453, |
|
"learning_rate": 3.0102999566398115e-05, |
|
"loss": 1.75, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1794871794871795, |
|
"grad_norm": 63.9564208984375, |
|
"learning_rate": 3.4948500216800935e-05, |
|
"loss": 2.418, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.20512820512820512, |
|
"grad_norm": 60.14704513549805, |
|
"learning_rate": 3.890756251918218e-05, |
|
"loss": 5.6406, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.23076923076923078, |
|
"grad_norm": 77.91315460205078, |
|
"learning_rate": 4.2254902000712836e-05, |
|
"loss": 5.7148, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 82.38955688476562, |
|
"learning_rate": 4.515449934959717e-05, |
|
"loss": 7.3984, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.28205128205128205, |
|
"grad_norm": 68.61566925048828, |
|
"learning_rate": 4.771212547196624e-05, |
|
"loss": 4.8672, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 93.55372619628906, |
|
"learning_rate": 4.9999999999999996e-05, |
|
"loss": 8.6016, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 38.4390754699707, |
|
"learning_rate": 5.2069634257911246e-05, |
|
"loss": 1.7266, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.358974358974359, |
|
"grad_norm": 17.9733829498291, |
|
"learning_rate": 5.3959062302381234e-05, |
|
"loss": 2.2324, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 39.2911262512207, |
|
"learning_rate": 5.5697167615341825e-05, |
|
"loss": 2.7305, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.41025641025641024, |
|
"grad_norm": 34.880802154541016, |
|
"learning_rate": 5.730640178391189e-05, |
|
"loss": 4.9688, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.4358974358974359, |
|
"grad_norm": 14.430876731872559, |
|
"learning_rate": 5.880456295278406e-05, |
|
"loss": 2.082, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 18.182239532470703, |
|
"learning_rate": 6.020599913279623e-05, |
|
"loss": 1.2168, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.48717948717948717, |
|
"grad_norm": 19.858905792236328, |
|
"learning_rate": 6.15224460689137e-05, |
|
"loss": 2.2441, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 30.2137451171875, |
|
"learning_rate": 6.276362525516529e-05, |
|
"loss": 3.3027, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5384615384615384, |
|
"grad_norm": 25.4908504486084, |
|
"learning_rate": 6.393768004764143e-05, |
|
"loss": 2.8242, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.5641025641025641, |
|
"grad_norm": 16.74290657043457, |
|
"learning_rate": 6.505149978319905e-05, |
|
"loss": 2.4785, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.5897435897435898, |
|
"grad_norm": 75.4117202758789, |
|
"learning_rate": 6.611096473669595e-05, |
|
"loss": 3.0977, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 29.400798797607422, |
|
"learning_rate": 6.712113404111031e-05, |
|
"loss": 2.5781, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.6410256410256411, |
|
"grad_norm": 44.68231964111328, |
|
"learning_rate": 6.808639180087963e-05, |
|
"loss": 2.293, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 9.051566123962402, |
|
"learning_rate": 6.90105620855803e-05, |
|
"loss": 0.894, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.6923076923076923, |
|
"grad_norm": 13.536922454833984, |
|
"learning_rate": 6.989700043360187e-05, |
|
"loss": 1.8252, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.717948717948718, |
|
"grad_norm": 21.84697914123535, |
|
"learning_rate": 7.074866739854089e-05, |
|
"loss": 2.3457, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.7435897435897436, |
|
"grad_norm": 13.119584083557129, |
|
"learning_rate": 7.156818820794936e-05, |
|
"loss": 2.248, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 8.631421089172363, |
|
"learning_rate": 7.235790156711095e-05, |
|
"loss": 0.7803, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7948717948717948, |
|
"grad_norm": 23.562847137451172, |
|
"learning_rate": 7.31198998949478e-05, |
|
"loss": 2.8242, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.8205128205128205, |
|
"grad_norm": 17.66178321838379, |
|
"learning_rate": 7.385606273598311e-05, |
|
"loss": 1.0996, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.8461538461538461, |
|
"grad_norm": 12.418828010559082, |
|
"learning_rate": 7.456808469171363e-05, |
|
"loss": 2.084, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.8717948717948718, |
|
"grad_norm": 6.5270867347717285, |
|
"learning_rate": 7.52574989159953e-05, |
|
"loss": 0.7832, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.8974358974358975, |
|
"grad_norm": 7.1268720626831055, |
|
"learning_rate": 7.592569699389437e-05, |
|
"loss": 0.5742, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 17.19321632385254, |
|
"learning_rate": 7.657394585211275e-05, |
|
"loss": 1.3506, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.9487179487179487, |
|
"grad_norm": 8.55725383758545, |
|
"learning_rate": 7.720340221751377e-05, |
|
"loss": 1.1533, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.9743589743589743, |
|
"grad_norm": 17.66382598876953, |
|
"learning_rate": 7.781512503836436e-05, |
|
"loss": 1.2773, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.638050556182861, |
|
"learning_rate": 7.841008620334975e-05, |
|
"loss": 0.3796, |
|
"step": 39 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 39, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 250, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|