|
{ |
|
"best_metric": 1.0497090816497803, |
|
"best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-63", |
|
"epoch": 8.952380952380953, |
|
"eval_steps": 500, |
|
"global_step": 94, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 2.605168342590332, |
|
"learning_rate": 0.0001, |
|
"loss": 1.515, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"eval_loss": 1.310670256614685, |
|
"eval_runtime": 3.9865, |
|
"eval_samples_per_second": 4.264, |
|
"eval_steps_per_second": 2.258, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.2956364154815674, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 1.0908, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.1376752853393555, |
|
"eval_runtime": 3.9866, |
|
"eval_samples_per_second": 4.264, |
|
"eval_steps_per_second": 2.258, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 2.9523809523809526, |
|
"grad_norm": 1.952481746673584, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 1.0257, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 2.9523809523809526, |
|
"eval_loss": 1.0673834085464478, |
|
"eval_runtime": 4.0023, |
|
"eval_samples_per_second": 4.248, |
|
"eval_steps_per_second": 2.249, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.819559097290039, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 0.886, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.050466537475586, |
|
"eval_runtime": 4.0072, |
|
"eval_samples_per_second": 4.242, |
|
"eval_steps_per_second": 2.246, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 4.9523809523809526, |
|
"grad_norm": 2.7341201305389404, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 0.9453, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 4.9523809523809526, |
|
"eval_loss": 1.0541181564331055, |
|
"eval_runtime": 4.0101, |
|
"eval_samples_per_second": 4.239, |
|
"eval_steps_per_second": 2.244, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.5818099975585938, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 0.8409, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.0497090816497803, |
|
"eval_runtime": 3.9956, |
|
"eval_samples_per_second": 4.255, |
|
"eval_steps_per_second": 2.252, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 6.9523809523809526, |
|
"grad_norm": 2.11718487739563, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.9078, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 6.9523809523809526, |
|
"eval_loss": 1.0589897632598877, |
|
"eval_runtime": 4.0003, |
|
"eval_samples_per_second": 4.25, |
|
"eval_steps_per_second": 2.25, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.463954210281372, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 0.8154, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.0593781471252441, |
|
"eval_runtime": 3.9931, |
|
"eval_samples_per_second": 4.257, |
|
"eval_steps_per_second": 2.254, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 8.952380952380953, |
|
"grad_norm": 2.035900831222534, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 0.8909, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 8.952380952380953, |
|
"eval_loss": 1.057528018951416, |
|
"eval_runtime": 3.9065, |
|
"eval_samples_per_second": 4.352, |
|
"eval_steps_per_second": 2.304, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 8.952380952380953, |
|
"step": 94, |
|
"total_flos": 8571389664559104.0, |
|
"train_loss": 0.9873519552514908, |
|
"train_runtime": 598.1578, |
|
"train_samples_per_second": 1.388, |
|
"train_steps_per_second": 0.167 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8571389664559104.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|