|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 155, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.5674822330474854, |
|
"learning_rate": 0.0001375, |
|
"loss": 3.066, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2.487783670425415, |
|
"learning_rate": 0.00019136690647482017, |
|
"loss": 2.2172, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 3.1076672077178955, |
|
"learning_rate": 0.00017553956834532374, |
|
"loss": 1.4606, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 2.9141297340393066, |
|
"learning_rate": 0.00015971223021582736, |
|
"loss": 1.1038, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 2.831645965576172, |
|
"learning_rate": 0.00014388489208633093, |
|
"loss": 1.0646, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 2.672914743423462, |
|
"learning_rate": 0.00012805755395683453, |
|
"loss": 1.0186, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 2.9274446964263916, |
|
"learning_rate": 0.00011223021582733813, |
|
"loss": 0.8428, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 3.164165496826172, |
|
"learning_rate": 9.640287769784174e-05, |
|
"loss": 0.7633, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"grad_norm": 4.559382438659668, |
|
"learning_rate": 8.057553956834533e-05, |
|
"loss": 0.6851, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"grad_norm": 2.335391044616699, |
|
"learning_rate": 6.474820143884892e-05, |
|
"loss": 0.6521, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 3.2239022254943848, |
|
"learning_rate": 4.892086330935252e-05, |
|
"loss": 0.706, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"grad_norm": 2.8791332244873047, |
|
"learning_rate": 3.3093525179856116e-05, |
|
"loss": 0.6047, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"grad_norm": 3.5159878730773926, |
|
"learning_rate": 1.7266187050359716e-05, |
|
"loss": 0.4889, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"grad_norm": 3.642343044281006, |
|
"learning_rate": 1.4388489208633094e-06, |
|
"loss": 0.585, |
|
"step": 154 |
|
} |
|
], |
|
"logging_steps": 11, |
|
"max_steps": 155, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 6751637581824000.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|