TracyTank's picture
Training in progress, step 90, checkpoint
a18b64c verified
raw
history blame
7.69 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.576,
"eval_steps": 9,
"global_step": 90,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0064,
"eval_loss": 2.962462902069092,
"eval_runtime": 14.3886,
"eval_samples_per_second": 73.113,
"eval_steps_per_second": 2.293,
"step": 1
},
{
"epoch": 0.0192,
"grad_norm": 2.497889995574951,
"learning_rate": 3e-05,
"loss": 2.9509,
"step": 3
},
{
"epoch": 0.0384,
"grad_norm": 1.9927830696105957,
"learning_rate": 6e-05,
"loss": 2.9487,
"step": 6
},
{
"epoch": 0.0576,
"grad_norm": 1.110428810119629,
"learning_rate": 9e-05,
"loss": 2.6523,
"step": 9
},
{
"epoch": 0.0576,
"eval_loss": 2.410982847213745,
"eval_runtime": 4.6211,
"eval_samples_per_second": 227.65,
"eval_steps_per_second": 7.141,
"step": 9
},
{
"epoch": 0.0768,
"grad_norm": 1.4526073932647705,
"learning_rate": 9.987820251299122e-05,
"loss": 2.378,
"step": 12
},
{
"epoch": 0.096,
"grad_norm": 1.4305192232131958,
"learning_rate": 9.924038765061042e-05,
"loss": 2.3302,
"step": 15
},
{
"epoch": 0.1152,
"grad_norm": 1.1190311908721924,
"learning_rate": 9.806308479691595e-05,
"loss": 2.2509,
"step": 18
},
{
"epoch": 0.1152,
"eval_loss": 2.1677029132843018,
"eval_runtime": 4.5951,
"eval_samples_per_second": 228.939,
"eval_steps_per_second": 7.182,
"step": 18
},
{
"epoch": 0.1344,
"grad_norm": 1.0180203914642334,
"learning_rate": 9.635919272833938e-05,
"loss": 2.1507,
"step": 21
},
{
"epoch": 0.1536,
"grad_norm": 0.8608107566833496,
"learning_rate": 9.414737964294636e-05,
"loss": 2.0623,
"step": 24
},
{
"epoch": 0.1728,
"grad_norm": 0.8825973272323608,
"learning_rate": 9.145187862775209e-05,
"loss": 2.1129,
"step": 27
},
{
"epoch": 0.1728,
"eval_loss": 2.0721230506896973,
"eval_runtime": 4.6294,
"eval_samples_per_second": 227.245,
"eval_steps_per_second": 7.128,
"step": 27
},
{
"epoch": 0.192,
"grad_norm": 0.8532243371009827,
"learning_rate": 8.83022221559489e-05,
"loss": 2.0382,
"step": 30
},
{
"epoch": 0.2112,
"grad_norm": 0.7470282912254333,
"learning_rate": 8.473291852294987e-05,
"loss": 2.0626,
"step": 33
},
{
"epoch": 0.2304,
"grad_norm": 0.885766327381134,
"learning_rate": 8.07830737662829e-05,
"loss": 2.0687,
"step": 36
},
{
"epoch": 0.2304,
"eval_loss": 2.036273717880249,
"eval_runtime": 4.5994,
"eval_samples_per_second": 228.727,
"eval_steps_per_second": 7.175,
"step": 36
},
{
"epoch": 0.2496,
"grad_norm": 0.8446648716926575,
"learning_rate": 7.649596321166024e-05,
"loss": 2.0859,
"step": 39
},
{
"epoch": 0.2688,
"grad_norm": 0.8682901859283447,
"learning_rate": 7.191855733945387e-05,
"loss": 2.0376,
"step": 42
},
{
"epoch": 0.288,
"grad_norm": 0.7690749168395996,
"learning_rate": 6.710100716628344e-05,
"loss": 2.0631,
"step": 45
},
{
"epoch": 0.288,
"eval_loss": 2.0197086334228516,
"eval_runtime": 4.6016,
"eval_samples_per_second": 228.616,
"eval_steps_per_second": 7.171,
"step": 45
},
{
"epoch": 0.3072,
"grad_norm": 0.7606220841407776,
"learning_rate": 6.209609477998338e-05,
"loss": 2.0648,
"step": 48
},
{
"epoch": 0.3264,
"grad_norm": 0.9449107646942139,
"learning_rate": 5.695865504800327e-05,
"loss": 1.9471,
"step": 51
},
{
"epoch": 0.3456,
"grad_norm": 0.7188290953636169,
"learning_rate": 5.174497483512506e-05,
"loss": 2.011,
"step": 54
},
{
"epoch": 0.3456,
"eval_loss": 2.008131504058838,
"eval_runtime": 4.5949,
"eval_samples_per_second": 228.948,
"eval_steps_per_second": 7.182,
"step": 54
},
{
"epoch": 0.3648,
"grad_norm": 0.898175060749054,
"learning_rate": 4.6512176312793736e-05,
"loss": 2.0437,
"step": 57
},
{
"epoch": 0.384,
"grad_norm": 0.8860452175140381,
"learning_rate": 4.131759111665349e-05,
"loss": 2.0314,
"step": 60
},
{
"epoch": 0.4032,
"grad_norm": 0.7955260872840881,
"learning_rate": 3.6218132209150045e-05,
"loss": 2.0663,
"step": 63
},
{
"epoch": 0.4032,
"eval_loss": 2.0011115074157715,
"eval_runtime": 4.6261,
"eval_samples_per_second": 227.405,
"eval_steps_per_second": 7.133,
"step": 63
},
{
"epoch": 0.4224,
"grad_norm": 0.7795498371124268,
"learning_rate": 3.12696703292044e-05,
"loss": 1.9806,
"step": 66
},
{
"epoch": 0.4416,
"grad_norm": 0.8645098209381104,
"learning_rate": 2.6526421860705473e-05,
"loss": 2.0406,
"step": 69
},
{
"epoch": 0.4608,
"grad_norm": 0.9535412192344666,
"learning_rate": 2.2040354826462668e-05,
"loss": 2.026,
"step": 72
},
{
"epoch": 0.4608,
"eval_loss": 1.9973247051239014,
"eval_runtime": 4.6137,
"eval_samples_per_second": 228.018,
"eval_steps_per_second": 7.153,
"step": 72
},
{
"epoch": 0.48,
"grad_norm": 0.6900295615196228,
"learning_rate": 1.7860619515673033e-05,
"loss": 2.0248,
"step": 75
},
{
"epoch": 0.4992,
"grad_norm": 0.9134931564331055,
"learning_rate": 1.4033009983067452e-05,
"loss": 2.0247,
"step": 78
},
{
"epoch": 0.5184,
"grad_norm": 0.7832142114639282,
"learning_rate": 1.0599462319663905e-05,
"loss": 2.0234,
"step": 81
},
{
"epoch": 0.5184,
"eval_loss": 1.9945977926254272,
"eval_runtime": 4.596,
"eval_samples_per_second": 228.897,
"eval_steps_per_second": 7.18,
"step": 81
},
{
"epoch": 0.5376,
"grad_norm": 0.740906834602356,
"learning_rate": 7.597595192178702e-06,
"loss": 2.0243,
"step": 84
},
{
"epoch": 0.5568,
"grad_norm": 0.8714410066604614,
"learning_rate": 5.060297685041659e-06,
"loss": 1.9928,
"step": 87
},
{
"epoch": 0.576,
"grad_norm": 0.8628079891204834,
"learning_rate": 3.0153689607045845e-06,
"loss": 1.998,
"step": 90
},
{
"epoch": 0.576,
"eval_loss": 1.993507981300354,
"eval_runtime": 4.6158,
"eval_samples_per_second": 227.913,
"eval_steps_per_second": 7.149,
"step": 90
}
],
"logging_steps": 3,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 9,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.4526559989792768e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}