|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0007074437228518472, |
|
"eval_steps": 10, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.4148874457036944e-05, |
|
"eval_loss": 5.3834967613220215, |
|
"eval_runtime": 1889.4258, |
|
"eval_samples_per_second": 15.75, |
|
"eval_steps_per_second": 7.875, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 7.074437228518471e-05, |
|
"grad_norm": 26.077726364135742, |
|
"learning_rate": 5e-05, |
|
"loss": 20.0704, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00014148874457036941, |
|
"grad_norm": 26.809843063354492, |
|
"learning_rate": 0.0001, |
|
"loss": 19.5946, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00014148874457036941, |
|
"eval_loss": 3.9861538410186768, |
|
"eval_runtime": 1895.9004, |
|
"eval_samples_per_second": 15.697, |
|
"eval_steps_per_second": 7.849, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00021223311685555414, |
|
"grad_norm": 24.79098129272461, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 14.4881, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00028297748914073883, |
|
"grad_norm": 25.828630447387695, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 14.0964, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00028297748914073883, |
|
"eval_loss": 3.5989348888397217, |
|
"eval_runtime": 1891.5108, |
|
"eval_samples_per_second": 15.733, |
|
"eval_steps_per_second": 7.867, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0003537218614259236, |
|
"grad_norm": 23.07585334777832, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 12.9708, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.00042446623371110827, |
|
"grad_norm": 25.14630699157715, |
|
"learning_rate": 5e-05, |
|
"loss": 14.9684, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00042446623371110827, |
|
"eval_loss": 3.5218193531036377, |
|
"eval_runtime": 1883.6604, |
|
"eval_samples_per_second": 15.798, |
|
"eval_steps_per_second": 7.9, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.000495210605996293, |
|
"grad_norm": 24.184917449951172, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 13.4074, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0005659549782814777, |
|
"grad_norm": 21.100221633911133, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 14.4036, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0005659549782814777, |
|
"eval_loss": 3.4930121898651123, |
|
"eval_runtime": 1886.4935, |
|
"eval_samples_per_second": 15.775, |
|
"eval_steps_per_second": 7.888, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0006366993505666624, |
|
"grad_norm": 22.75106430053711, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 13.5211, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0007074437228518472, |
|
"grad_norm": 27.424501419067383, |
|
"learning_rate": 0.0, |
|
"loss": 14.3889, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0007074437228518472, |
|
"eval_loss": 3.4873178005218506, |
|
"eval_runtime": 1885.3182, |
|
"eval_samples_per_second": 15.785, |
|
"eval_steps_per_second": 7.893, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8763348929740800.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|