|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.400200100050025, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02001000500250125, |
|
"grad_norm": 1.2456663846969604, |
|
"learning_rate": 4.999447296060165e-05, |
|
"loss": 1.3318, |
|
"num_input_tokens_seen": 15648, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0400200100050025, |
|
"grad_norm": 0.8590693473815918, |
|
"learning_rate": 4.997789428625975e-05, |
|
"loss": 1.2773, |
|
"num_input_tokens_seen": 28720, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.060030015007503754, |
|
"grad_norm": 0.8516742587089539, |
|
"learning_rate": 4.995027130745321e-05, |
|
"loss": 1.1401, |
|
"num_input_tokens_seen": 43808, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.080040020010005, |
|
"grad_norm": 1.0357950925827026, |
|
"learning_rate": 4.99116162380454e-05, |
|
"loss": 1.089, |
|
"num_input_tokens_seen": 57472, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10005002501250625, |
|
"grad_norm": 0.744416356086731, |
|
"learning_rate": 4.986194616988364e-05, |
|
"loss": 1.1176, |
|
"num_input_tokens_seen": 71968, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12006003001500751, |
|
"grad_norm": 0.7087241411209106, |
|
"learning_rate": 4.980128306524183e-05, |
|
"loss": 0.9949, |
|
"num_input_tokens_seen": 85552, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14007003501750875, |
|
"grad_norm": 0.708070695400238, |
|
"learning_rate": 4.972965374710952e-05, |
|
"loss": 1.0264, |
|
"num_input_tokens_seen": 100512, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.16008004002001, |
|
"grad_norm": 0.7232606410980225, |
|
"learning_rate": 4.964708988733178e-05, |
|
"loss": 1.0004, |
|
"num_input_tokens_seen": 113376, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18009004502251125, |
|
"grad_norm": 1.207189679145813, |
|
"learning_rate": 4.9553627992605066e-05, |
|
"loss": 1.1137, |
|
"num_input_tokens_seen": 127680, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2001000500250125, |
|
"grad_norm": 0.6889365315437317, |
|
"learning_rate": 4.944930938833535e-05, |
|
"loss": 0.9646, |
|
"num_input_tokens_seen": 143136, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22011005502751377, |
|
"grad_norm": 1.3263787031173706, |
|
"learning_rate": 4.9334180200365486e-05, |
|
"loss": 0.9757, |
|
"num_input_tokens_seen": 155488, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.24012006003001501, |
|
"grad_norm": 1.0919948816299438, |
|
"learning_rate": 4.9208291334580104e-05, |
|
"loss": 0.9126, |
|
"num_input_tokens_seen": 171008, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26013006503251623, |
|
"grad_norm": 0.853539764881134, |
|
"learning_rate": 4.907169845439688e-05, |
|
"loss": 1.0119, |
|
"num_input_tokens_seen": 185536, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2801400700350175, |
|
"grad_norm": 0.9023884534835815, |
|
"learning_rate": 4.892446195615423e-05, |
|
"loss": 1.1192, |
|
"num_input_tokens_seen": 201728, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3001500750375188, |
|
"grad_norm": 1.3300387859344482, |
|
"learning_rate": 4.87666469424063e-05, |
|
"loss": 1.0167, |
|
"num_input_tokens_seen": 217584, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.32016008004002, |
|
"grad_norm": 1.1315807104110718, |
|
"learning_rate": 4.859832319313697e-05, |
|
"loss": 0.8291, |
|
"num_input_tokens_seen": 230864, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3401700850425213, |
|
"grad_norm": 1.2459896802902222, |
|
"learning_rate": 4.841956513490577e-05, |
|
"loss": 0.9591, |
|
"num_input_tokens_seen": 245584, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3601800900450225, |
|
"grad_norm": 1.114758849143982, |
|
"learning_rate": 4.8230451807939135e-05, |
|
"loss": 0.9869, |
|
"num_input_tokens_seen": 259760, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.38019009504752377, |
|
"grad_norm": 0.8792176842689514, |
|
"learning_rate": 4.803106683118177e-05, |
|
"loss": 1.0423, |
|
"num_input_tokens_seen": 274432, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.400200100050025, |
|
"grad_norm": 1.6365342140197754, |
|
"learning_rate": 4.782149836532345e-05, |
|
"loss": 1.0122, |
|
"num_input_tokens_seen": 288256, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 747, |
|
"num_input_tokens_seen": 288256, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3016308396326912e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|