|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9473684210526314, |
|
"eval_steps": 500, |
|
"global_step": 21, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14035087719298245, |
|
"grad_norm": 0.15580356121063232, |
|
"learning_rate": 4e-08, |
|
"loss": 1.1187, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2807017543859649, |
|
"grad_norm": 0.22189772129058838, |
|
"learning_rate": 8e-08, |
|
"loss": 1.1567, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 0.1661478728055954, |
|
"learning_rate": 1.2000000000000002e-07, |
|
"loss": 1.0823, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.5614035087719298, |
|
"grad_norm": 0.20104122161865234, |
|
"learning_rate": 1.6e-07, |
|
"loss": 1.1356, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 0.16658252477645874, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 1.1005, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 0.5534899830818176, |
|
"learning_rate": 2.4000000000000003e-07, |
|
"loss": 1.1723, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.9824561403508771, |
|
"grad_norm": 0.16570821404457092, |
|
"learning_rate": 2.8e-07, |
|
"loss": 1.0728, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.1228070175438596, |
|
"grad_norm": 0.3442396819591522, |
|
"learning_rate": 3.2e-07, |
|
"loss": 1.1199, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 1.263157894736842, |
|
"grad_norm": 0.16980823874473572, |
|
"learning_rate": 3.6e-07, |
|
"loss": 1.1108, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"grad_norm": 0.5345178246498108, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 1.06, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.543859649122807, |
|
"grad_norm": 0.5070182681083679, |
|
"learning_rate": 4.4e-07, |
|
"loss": 1.095, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.6842105263157894, |
|
"grad_norm": 0.13856372237205505, |
|
"learning_rate": 4.800000000000001e-07, |
|
"loss": 1.1721, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.8245614035087718, |
|
"grad_norm": 0.1828210949897766, |
|
"learning_rate": 5.2e-07, |
|
"loss": 1.1585, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.9649122807017543, |
|
"grad_norm": 0.19238212704658508, |
|
"learning_rate": 5.6e-07, |
|
"loss": 1.1213, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.17399907112121582, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 1.1722, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.245614035087719, |
|
"grad_norm": 0.22373908758163452, |
|
"learning_rate": 6.4e-07, |
|
"loss": 1.1152, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 2.3859649122807016, |
|
"grad_norm": 0.24265660345554352, |
|
"learning_rate": 6.800000000000001e-07, |
|
"loss": 1.1472, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 2.526315789473684, |
|
"grad_norm": 0.1733134537935257, |
|
"learning_rate": 7.2e-07, |
|
"loss": 1.1316, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.14888471364974976, |
|
"learning_rate": 7.6e-07, |
|
"loss": 0.9929, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 2.807017543859649, |
|
"grad_norm": 0.1817580908536911, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 1.2424, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.9473684210526314, |
|
"grad_norm": 0.13880617916584015, |
|
"learning_rate": 8.400000000000001e-07, |
|
"loss": 1.1539, |
|
"step": 21 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 21, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.9136378493952e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|