|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.012313521193464362, |
|
"eval_steps": 13, |
|
"global_step": 39, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00031573131265293233, |
|
"grad_norm": 0.6405626535415649, |
|
"learning_rate": 2e-05, |
|
"loss": 6.5486, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00031573131265293233, |
|
"eval_loss": 1.742234706878662, |
|
"eval_runtime": 74.1238, |
|
"eval_samples_per_second": 17.997, |
|
"eval_steps_per_second": 8.998, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006314626253058647, |
|
"grad_norm": 0.500285267829895, |
|
"learning_rate": 4e-05, |
|
"loss": 6.1622, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.000947193937958797, |
|
"grad_norm": 0.6612991690635681, |
|
"learning_rate": 6e-05, |
|
"loss": 7.4267, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0012629252506117293, |
|
"grad_norm": 0.6658630967140198, |
|
"learning_rate": 8e-05, |
|
"loss": 6.6306, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0015786565632646618, |
|
"grad_norm": 0.7246699333190918, |
|
"learning_rate": 0.0001, |
|
"loss": 6.792, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.001894387875917594, |
|
"grad_norm": 0.6887425780296326, |
|
"learning_rate": 0.00012, |
|
"loss": 6.5042, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0022101191885705264, |
|
"grad_norm": 1.1644823551177979, |
|
"learning_rate": 0.00014, |
|
"loss": 6.5081, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0025258505012234586, |
|
"grad_norm": 1.0138639211654663, |
|
"learning_rate": 0.00016, |
|
"loss": 6.8784, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0028415818138763913, |
|
"grad_norm": 1.9288033246994019, |
|
"learning_rate": 0.00018, |
|
"loss": 7.035, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0031573131265293236, |
|
"grad_norm": 2.2389585971832275, |
|
"learning_rate": 0.0002, |
|
"loss": 6.8514, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003473044439182256, |
|
"grad_norm": 1.7787041664123535, |
|
"learning_rate": 0.0001996917333733128, |
|
"loss": 5.8545, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.003788775751835188, |
|
"grad_norm": 1.6030915975570679, |
|
"learning_rate": 0.00019876883405951377, |
|
"loss": 5.7655, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.004104507064488121, |
|
"grad_norm": 1.6597908735275269, |
|
"learning_rate": 0.00019723699203976766, |
|
"loss": 5.3542, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.004104507064488121, |
|
"eval_loss": 1.340001106262207, |
|
"eval_runtime": 73.6832, |
|
"eval_samples_per_second": 18.105, |
|
"eval_steps_per_second": 9.052, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.004420238377141053, |
|
"grad_norm": 2.3956403732299805, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 5.4276, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.004735969689793985, |
|
"grad_norm": 2.0606181621551514, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 5.2302, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.005051701002446917, |
|
"grad_norm": 2.8639981746673584, |
|
"learning_rate": 0.0001891006524188368, |
|
"loss": 5.1071, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00536743231509985, |
|
"grad_norm": 2.9613735675811768, |
|
"learning_rate": 0.00018526401643540922, |
|
"loss": 4.8221, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.005683163627752783, |
|
"grad_norm": 2.5257747173309326, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 5.1752, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0059988949404057145, |
|
"grad_norm": 2.4979665279388428, |
|
"learning_rate": 0.0001760405965600031, |
|
"loss": 5.448, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.006314626253058647, |
|
"grad_norm": 2.4641616344451904, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 4.5116, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006630357565711579, |
|
"grad_norm": 2.583629608154297, |
|
"learning_rate": 0.00016494480483301836, |
|
"loss": 4.3406, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.006946088878364512, |
|
"grad_norm": 2.122413158416748, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 4.1938, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0072618201910174445, |
|
"grad_norm": 1.9655640125274658, |
|
"learning_rate": 0.0001522498564715949, |
|
"loss": 4.2416, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.007577551503670376, |
|
"grad_norm": 2.496213912963867, |
|
"learning_rate": 0.00014539904997395468, |
|
"loss": 4.492, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.007893282816323309, |
|
"grad_norm": 2.098853349685669, |
|
"learning_rate": 0.000138268343236509, |
|
"loss": 4.0397, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.008209014128976242, |
|
"grad_norm": 2.3906338214874268, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 3.8275, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.008209014128976242, |
|
"eval_loss": 1.069798469543457, |
|
"eval_runtime": 73.654, |
|
"eval_samples_per_second": 18.112, |
|
"eval_steps_per_second": 9.056, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.008524745441629173, |
|
"grad_norm": 2.8270161151885986, |
|
"learning_rate": 0.00012334453638559057, |
|
"loss": 4.3127, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.008840476754282105, |
|
"grad_norm": 2.343597173690796, |
|
"learning_rate": 0.0001156434465040231, |
|
"loss": 4.7903, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.009156208066935038, |
|
"grad_norm": 2.1331675052642822, |
|
"learning_rate": 0.0001078459095727845, |
|
"loss": 3.7894, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.00947193937958797, |
|
"grad_norm": 2.6220457553863525, |
|
"learning_rate": 0.0001, |
|
"loss": 4.2415, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009787670692240904, |
|
"grad_norm": 2.525392532348633, |
|
"learning_rate": 9.215409042721552e-05, |
|
"loss": 4.3988, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.010103402004893835, |
|
"grad_norm": 2.459665298461914, |
|
"learning_rate": 8.435655349597689e-05, |
|
"loss": 4.202, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.010419133317546767, |
|
"grad_norm": 2.508035182952881, |
|
"learning_rate": 7.66554636144095e-05, |
|
"loss": 4.4926, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0107348646301997, |
|
"grad_norm": 2.238208293914795, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 4.3429, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.011050595942852633, |
|
"grad_norm": 1.8518725633621216, |
|
"learning_rate": 6.173165676349103e-05, |
|
"loss": 3.7004, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.011366327255505565, |
|
"grad_norm": 2.1632683277130127, |
|
"learning_rate": 5.4600950026045326e-05, |
|
"loss": 3.6473, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.011682058568158496, |
|
"grad_norm": 2.4588687419891357, |
|
"learning_rate": 4.7750143528405126e-05, |
|
"loss": 4.118, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.011997789880811429, |
|
"grad_norm": 1.9152288436889648, |
|
"learning_rate": 4.12214747707527e-05, |
|
"loss": 3.8266, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.012313521193464362, |
|
"grad_norm": 2.8986527919769287, |
|
"learning_rate": 3.5055195166981645e-05, |
|
"loss": 4.5158, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.012313521193464362, |
|
"eval_loss": 0.9916089773178101, |
|
"eval_runtime": 73.6951, |
|
"eval_samples_per_second": 18.102, |
|
"eval_steps_per_second": 9.051, |
|
"step": 39 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.242743039852544e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|