|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 18.761904761904763, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.6349206349206349, |
|
"grad_norm": 1.5354194641113281, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.3494, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.253968253968254, |
|
"grad_norm": 0.43494924902915955, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.7194, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.8888888888888888, |
|
"grad_norm": 0.5042936205863953, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6417, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.507936507936508, |
|
"grad_norm": 0.37082698941230774, |
|
"learning_rate": 1.9932383577419432e-05, |
|
"loss": 0.5639, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.126984126984127, |
|
"grad_norm": 0.3500129282474518, |
|
"learning_rate": 1.973044870579824e-05, |
|
"loss": 0.5159, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.761904761904762, |
|
"grad_norm": 0.47998425364494324, |
|
"learning_rate": 1.9396926207859085e-05, |
|
"loss": 0.4783, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.380952380952381, |
|
"grad_norm": 0.7525663375854492, |
|
"learning_rate": 1.8936326403234125e-05, |
|
"loss": 0.4346, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.5472385287284851, |
|
"learning_rate": 1.8354878114129368e-05, |
|
"loss": 0.4163, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.634920634920634, |
|
"grad_norm": 0.8724488615989685, |
|
"learning_rate": 1.766044443118978e-05, |
|
"loss": 0.3745, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.253968253968254, |
|
"grad_norm": 0.5862165093421936, |
|
"learning_rate": 1.686241637868734e-05, |
|
"loss": 0.3326, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.888888888888889, |
|
"grad_norm": 0.9971191883087158, |
|
"learning_rate": 1.5971585917027864e-05, |
|
"loss": 0.2989, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.507936507936508, |
|
"grad_norm": 0.8492709398269653, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.2443, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.126984126984127, |
|
"grad_norm": 0.9441637992858887, |
|
"learning_rate": 1.396079766039157e-05, |
|
"loss": 0.2203, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 8.761904761904763, |
|
"grad_norm": 0.9310017228126526, |
|
"learning_rate": 1.2868032327110904e-05, |
|
"loss": 0.1696, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.380952380952381, |
|
"grad_norm": 0.9161068201065063, |
|
"learning_rate": 1.1736481776669307e-05, |
|
"loss": 0.1336, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.643371045589447, |
|
"learning_rate": 1.0581448289104759e-05, |
|
"loss": 0.1025, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 10.634920634920634, |
|
"grad_norm": 1.1491143703460693, |
|
"learning_rate": 9.418551710895243e-06, |
|
"loss": 0.0676, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 11.253968253968253, |
|
"grad_norm": 0.5557671189308167, |
|
"learning_rate": 8.263518223330698e-06, |
|
"loss": 0.0616, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 11.88888888888889, |
|
"grad_norm": 0.5206998586654663, |
|
"learning_rate": 7.131967672889101e-06, |
|
"loss": 0.045, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 12.507936507936508, |
|
"grad_norm": 0.356916218996048, |
|
"learning_rate": 6.039202339608432e-06, |
|
"loss": 0.0269, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.126984126984127, |
|
"grad_norm": 0.2968302369117737, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 0.0192, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 13.761904761904763, |
|
"grad_norm": 0.15373599529266357, |
|
"learning_rate": 4.028414082972141e-06, |
|
"loss": 0.0146, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 14.380952380952381, |
|
"grad_norm": 0.1345711648464203, |
|
"learning_rate": 3.1375836213126653e-06, |
|
"loss": 0.0115, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.11057666689157486, |
|
"learning_rate": 2.339555568810221e-06, |
|
"loss": 0.0092, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 15.634920634920634, |
|
"grad_norm": 0.08674044162034988, |
|
"learning_rate": 1.6451218858706374e-06, |
|
"loss": 0.0081, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 16.253968253968253, |
|
"grad_norm": 0.08988489955663681, |
|
"learning_rate": 1.0636735967658785e-06, |
|
"loss": 0.0076, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 16.88888888888889, |
|
"grad_norm": 0.07731083780527115, |
|
"learning_rate": 6.030737921409169e-07, |
|
"loss": 0.0077, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 17.507936507936506, |
|
"grad_norm": 0.08936144411563873, |
|
"learning_rate": 2.6955129420176193e-07, |
|
"loss": 0.0065, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 18.126984126984127, |
|
"grad_norm": 0.11082036793231964, |
|
"learning_rate": 6.761642258056977e-08, |
|
"loss": 0.0073, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 18.761904761904763, |
|
"grad_norm": 0.13867439329624176, |
|
"learning_rate": 0.0, |
|
"loss": 0.0069, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.416508510620877e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|