{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.011779950524207798, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023559901048415596, "eval_loss": 10.376893043518066, "eval_runtime": 10.972, "eval_samples_per_second": 162.96, "eval_steps_per_second": 81.48, "step": 1 }, { "epoch": 0.0011779950524207798, "grad_norm": 0.3874177634716034, "learning_rate": 5e-05, "loss": 41.5082, "step": 5 }, { "epoch": 0.0023559901048415596, "grad_norm": 0.3574215769767761, "learning_rate": 0.0001, "loss": 41.4991, "step": 10 }, { "epoch": 0.0023559901048415596, "eval_loss": 10.376096725463867, "eval_runtime": 11.0366, "eval_samples_per_second": 162.007, "eval_steps_per_second": 81.003, "step": 10 }, { "epoch": 0.0035339851572623393, "grad_norm": 0.427160382270813, "learning_rate": 9.619397662556435e-05, "loss": 41.4984, "step": 15 }, { "epoch": 0.004711980209683119, "grad_norm": 0.43340450525283813, "learning_rate": 8.535533905932738e-05, "loss": 41.5071, "step": 20 }, { "epoch": 0.004711980209683119, "eval_loss": 10.37428092956543, "eval_runtime": 11.1192, "eval_samples_per_second": 160.803, "eval_steps_per_second": 80.401, "step": 20 }, { "epoch": 0.005889975262103899, "grad_norm": 0.39236587285995483, "learning_rate": 6.91341716182545e-05, "loss": 41.493, "step": 25 }, { "epoch": 0.007067970314524679, "grad_norm": 0.47753089666366577, "learning_rate": 5e-05, "loss": 41.493, "step": 30 }, { "epoch": 0.007067970314524679, "eval_loss": 10.372749328613281, "eval_runtime": 11.1057, "eval_samples_per_second": 160.999, "eval_steps_per_second": 80.499, "step": 30 }, { "epoch": 0.00824596536694546, "grad_norm": 0.4785812497138977, "learning_rate": 3.086582838174551e-05, "loss": 41.4795, "step": 35 }, { "epoch": 0.009423960419366238, "grad_norm": 0.4244927763938904, "learning_rate": 1.4644660940672627e-05, "loss": 41.5, "step": 40 }, { "epoch": 0.009423960419366238, "eval_loss": 10.371926307678223, "eval_runtime": 11.2431, "eval_samples_per_second": 159.031, "eval_steps_per_second": 79.515, "step": 40 }, { "epoch": 0.010601955471787019, "grad_norm": 0.5031403303146362, "learning_rate": 3.8060233744356633e-06, "loss": 41.4935, "step": 45 }, { "epoch": 0.011779950524207798, "grad_norm": 0.4761594533920288, "learning_rate": 0.0, "loss": 41.4824, "step": 50 }, { "epoch": 0.011779950524207798, "eval_loss": 10.371774673461914, "eval_runtime": 11.0171, "eval_samples_per_second": 162.293, "eval_steps_per_second": 81.147, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2566481510400.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }