|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.03409187761015938, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0003409187761015938, |
|
"eval_loss": 11.089664459228516, |
|
"eval_runtime": 10.5672, |
|
"eval_samples_per_second": 467.578, |
|
"eval_steps_per_second": 58.483, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0010227563283047815, |
|
"grad_norm": 0.45604798197746277, |
|
"learning_rate": 1.5e-05, |
|
"loss": 44.3568, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002045512656609563, |
|
"grad_norm": 0.46130335330963135, |
|
"learning_rate": 3e-05, |
|
"loss": 44.3545, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.003068268984914344, |
|
"grad_norm": 0.44202330708503723, |
|
"learning_rate": 4.5e-05, |
|
"loss": 44.3516, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.003068268984914344, |
|
"eval_loss": 11.088760375976562, |
|
"eval_runtime": 10.1289, |
|
"eval_samples_per_second": 487.81, |
|
"eval_steps_per_second": 61.013, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.004091025313219126, |
|
"grad_norm": 0.4335339367389679, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 44.3549, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.005113781641523907, |
|
"grad_norm": 0.4942709803581238, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 44.3544, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.006136537969828688, |
|
"grad_norm": 0.4659290015697479, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 44.3528, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.006136537969828688, |
|
"eval_loss": 11.08657455444336, |
|
"eval_runtime": 10.2663, |
|
"eval_samples_per_second": 481.283, |
|
"eval_steps_per_second": 60.197, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.007159294298133469, |
|
"grad_norm": 0.4347454905509949, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 44.338, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.008182050626438252, |
|
"grad_norm": 0.4440023601055145, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 44.3387, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.009204806954743032, |
|
"grad_norm": 0.4840239882469177, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 44.341, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.009204806954743032, |
|
"eval_loss": 11.084238052368164, |
|
"eval_runtime": 10.1848, |
|
"eval_samples_per_second": 485.137, |
|
"eval_steps_per_second": 60.679, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.010227563283047815, |
|
"grad_norm": 0.49456870555877686, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 44.3389, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.011250319611352595, |
|
"grad_norm": 0.4676015377044678, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 44.3346, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.012273075939657376, |
|
"grad_norm": 0.4602993428707123, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 44.3378, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.012273075939657376, |
|
"eval_loss": 11.082551956176758, |
|
"eval_runtime": 10.2119, |
|
"eval_samples_per_second": 483.848, |
|
"eval_steps_per_second": 60.518, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.013295832267962158, |
|
"grad_norm": 0.49384036660194397, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 44.3236, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.014318588596266939, |
|
"grad_norm": 0.47107622027397156, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 44.3268, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.015341344924571721, |
|
"grad_norm": 0.49026861786842346, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 44.3271, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.015341344924571721, |
|
"eval_loss": 11.080031394958496, |
|
"eval_runtime": 10.1875, |
|
"eval_samples_per_second": 485.006, |
|
"eval_steps_per_second": 60.663, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.016364101252876503, |
|
"grad_norm": 0.4897162914276123, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 44.3183, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.017386857581181282, |
|
"grad_norm": 0.45754456520080566, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 44.3153, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.018409613909486065, |
|
"grad_norm": 0.5274067521095276, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 44.3143, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.018409613909486065, |
|
"eval_loss": 11.078282356262207, |
|
"eval_runtime": 10.1669, |
|
"eval_samples_per_second": 485.988, |
|
"eval_steps_per_second": 60.785, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.019432370237790847, |
|
"grad_norm": 0.4819900393486023, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 44.3073, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.02045512656609563, |
|
"grad_norm": 0.5234776735305786, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 44.3053, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.021477882894400408, |
|
"grad_norm": 0.5196526646614075, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 44.3021, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.021477882894400408, |
|
"eval_loss": 11.077031135559082, |
|
"eval_runtime": 10.1496, |
|
"eval_samples_per_second": 486.817, |
|
"eval_steps_per_second": 60.889, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.02250063922270519, |
|
"grad_norm": 0.5121042728424072, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 44.3014, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.023523395551009973, |
|
"grad_norm": 0.47857794165611267, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 44.3116, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.02454615187931475, |
|
"grad_norm": 0.5120978951454163, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 44.3001, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.02454615187931475, |
|
"eval_loss": 11.07617473602295, |
|
"eval_runtime": 10.1688, |
|
"eval_samples_per_second": 485.897, |
|
"eval_steps_per_second": 60.774, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.025568908207619534, |
|
"grad_norm": 0.48292016983032227, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 44.2999, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.026591664535924316, |
|
"grad_norm": 0.46209338307380676, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 44.301, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0276144208642291, |
|
"grad_norm": 0.4896546006202698, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 44.3038, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0276144208642291, |
|
"eval_loss": 11.075565338134766, |
|
"eval_runtime": 10.139, |
|
"eval_samples_per_second": 487.324, |
|
"eval_steps_per_second": 60.952, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.028637177192533878, |
|
"grad_norm": 0.4694303274154663, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 44.3048, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.02965993352083866, |
|
"grad_norm": 0.48952430486679077, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 44.3033, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.030682689849143442, |
|
"grad_norm": 0.46993014216423035, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 44.3012, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.030682689849143442, |
|
"eval_loss": 11.075267791748047, |
|
"eval_runtime": 10.2328, |
|
"eval_samples_per_second": 482.858, |
|
"eval_steps_per_second": 60.394, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03170544617744822, |
|
"grad_norm": 0.49486178159713745, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 44.3013, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.03272820250575301, |
|
"grad_norm": 0.47963476181030273, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 44.3049, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.033750958834057786, |
|
"grad_norm": 0.4520646333694458, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 44.3065, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.033750958834057786, |
|
"eval_loss": 11.075203895568848, |
|
"eval_runtime": 10.2212, |
|
"eval_samples_per_second": 483.409, |
|
"eval_steps_per_second": 60.463, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 748683264000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|