{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.02454615187931475, "eval_steps": 9, "global_step": 72, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003409187761015938, "eval_loss": 11.089664459228516, "eval_runtime": 10.5672, "eval_samples_per_second": 467.578, "eval_steps_per_second": 58.483, "step": 1 }, { "epoch": 0.0010227563283047815, "grad_norm": 0.45604798197746277, "learning_rate": 1.5e-05, "loss": 44.3568, "step": 3 }, { "epoch": 0.002045512656609563, "grad_norm": 0.46130335330963135, "learning_rate": 3e-05, "loss": 44.3545, "step": 6 }, { "epoch": 0.003068268984914344, "grad_norm": 0.44202330708503723, "learning_rate": 4.5e-05, "loss": 44.3516, "step": 9 }, { "epoch": 0.003068268984914344, "eval_loss": 11.088760375976562, "eval_runtime": 10.1289, "eval_samples_per_second": 487.81, "eval_steps_per_second": 61.013, "step": 9 }, { "epoch": 0.004091025313219126, "grad_norm": 0.4335339367389679, "learning_rate": 4.993910125649561e-05, "loss": 44.3549, "step": 12 }, { "epoch": 0.005113781641523907, "grad_norm": 0.4942709803581238, "learning_rate": 4.962019382530521e-05, "loss": 44.3544, "step": 15 }, { "epoch": 0.006136537969828688, "grad_norm": 0.4659290015697479, "learning_rate": 4.9031542398457974e-05, "loss": 44.3528, "step": 18 }, { "epoch": 0.006136537969828688, "eval_loss": 11.08657455444336, "eval_runtime": 10.2663, "eval_samples_per_second": 481.283, "eval_steps_per_second": 60.197, "step": 18 }, { "epoch": 0.007159294298133469, "grad_norm": 0.4347454905509949, "learning_rate": 4.817959636416969e-05, "loss": 44.338, "step": 21 }, { "epoch": 0.008182050626438252, "grad_norm": 0.4440023601055145, "learning_rate": 4.707368982147318e-05, "loss": 44.3387, "step": 24 }, { "epoch": 0.009204806954743032, "grad_norm": 0.4840239882469177, "learning_rate": 4.572593931387604e-05, "loss": 44.341, "step": 27 }, { "epoch": 0.009204806954743032, "eval_loss": 11.084238052368164, "eval_runtime": 10.1848, "eval_samples_per_second": 485.137, "eval_steps_per_second": 60.679, "step": 27 }, { "epoch": 0.010227563283047815, "grad_norm": 0.49456870555877686, "learning_rate": 4.415111107797445e-05, "loss": 44.3389, "step": 30 }, { "epoch": 0.011250319611352595, "grad_norm": 0.4676015377044678, "learning_rate": 4.2366459261474933e-05, "loss": 44.3346, "step": 33 }, { "epoch": 0.012273075939657376, "grad_norm": 0.4602993428707123, "learning_rate": 4.039153688314145e-05, "loss": 44.3378, "step": 36 }, { "epoch": 0.012273075939657376, "eval_loss": 11.082551956176758, "eval_runtime": 10.2119, "eval_samples_per_second": 483.848, "eval_steps_per_second": 60.518, "step": 36 }, { "epoch": 0.013295832267962158, "grad_norm": 0.49384036660194397, "learning_rate": 3.824798160583012e-05, "loss": 44.3236, "step": 39 }, { "epoch": 0.014318588596266939, "grad_norm": 0.47107622027397156, "learning_rate": 3.5959278669726935e-05, "loss": 44.3268, "step": 42 }, { "epoch": 0.015341344924571721, "grad_norm": 0.49026861786842346, "learning_rate": 3.355050358314172e-05, "loss": 44.3271, "step": 45 }, { "epoch": 0.015341344924571721, "eval_loss": 11.080031394958496, "eval_runtime": 10.1875, "eval_samples_per_second": 485.006, "eval_steps_per_second": 60.663, "step": 45 }, { "epoch": 0.016364101252876503, "grad_norm": 0.4897162914276123, "learning_rate": 3.104804738999169e-05, "loss": 44.3183, "step": 48 }, { "epoch": 0.017386857581181282, "grad_norm": 0.45754456520080566, "learning_rate": 2.8479327524001636e-05, "loss": 44.3153, "step": 51 }, { "epoch": 0.018409613909486065, "grad_norm": 0.5274067521095276, "learning_rate": 2.587248741756253e-05, "loss": 44.3143, "step": 54 }, { "epoch": 0.018409613909486065, "eval_loss": 11.078282356262207, "eval_runtime": 10.1669, "eval_samples_per_second": 485.988, "eval_steps_per_second": 60.785, "step": 54 }, { "epoch": 0.019432370237790847, "grad_norm": 0.4819900393486023, "learning_rate": 2.3256088156396868e-05, "loss": 44.3073, "step": 57 }, { "epoch": 0.02045512656609563, "grad_norm": 0.5234776735305786, "learning_rate": 2.0658795558326743e-05, "loss": 44.3053, "step": 60 }, { "epoch": 0.021477882894400408, "grad_norm": 0.5196526646614075, "learning_rate": 1.8109066104575023e-05, "loss": 44.3021, "step": 63 }, { "epoch": 0.021477882894400408, "eval_loss": 11.077031135559082, "eval_runtime": 10.1496, "eval_samples_per_second": 486.817, "eval_steps_per_second": 60.889, "step": 63 }, { "epoch": 0.02250063922270519, "grad_norm": 0.5121042728424072, "learning_rate": 1.56348351646022e-05, "loss": 44.3014, "step": 66 }, { "epoch": 0.023523395551009973, "grad_norm": 0.47857794165611267, "learning_rate": 1.3263210930352737e-05, "loss": 44.3116, "step": 69 }, { "epoch": 0.02454615187931475, "grad_norm": 0.5120978951454163, "learning_rate": 1.1020177413231334e-05, "loss": 44.3001, "step": 72 }, { "epoch": 0.02454615187931475, "eval_loss": 11.07617473602295, "eval_runtime": 10.1688, "eval_samples_per_second": 485.897, "eval_steps_per_second": 60.774, "step": 72 } ], "logging_steps": 3, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 539051950080.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }