{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2618296529968454, "eval_steps": 9, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012618296529968454, "eval_loss": 1.88910710811615, "eval_runtime": 2.2136, "eval_samples_per_second": 60.535, "eval_steps_per_second": 7.68, "step": 1 }, { "epoch": 0.03785488958990536, "grad_norm": 0.7511712908744812, "learning_rate": 3e-05, "loss": 1.8139, "step": 3 }, { "epoch": 0.07570977917981073, "grad_norm": 0.8463343381881714, "learning_rate": 6e-05, "loss": 1.9489, "step": 6 }, { "epoch": 0.11356466876971609, "grad_norm": 0.9100675582885742, "learning_rate": 9e-05, "loss": 1.7745, "step": 9 }, { "epoch": 0.11356466876971609, "eval_loss": 1.8236573934555054, "eval_runtime": 2.232, "eval_samples_per_second": 60.036, "eval_steps_per_second": 7.617, "step": 9 }, { "epoch": 0.15141955835962145, "grad_norm": 0.6830158233642578, "learning_rate": 9.987820251299122e-05, "loss": 1.7182, "step": 12 }, { "epoch": 0.1892744479495268, "grad_norm": 0.7045711278915405, "learning_rate": 9.924038765061042e-05, "loss": 1.695, "step": 15 }, { "epoch": 0.22712933753943218, "grad_norm": 0.6682664752006531, "learning_rate": 9.806308479691595e-05, "loss": 1.5894, "step": 18 }, { "epoch": 0.22712933753943218, "eval_loss": 1.7477922439575195, "eval_runtime": 2.2514, "eval_samples_per_second": 59.52, "eval_steps_per_second": 7.551, "step": 18 }, { "epoch": 0.26498422712933756, "grad_norm": 0.6020422577857971, "learning_rate": 9.635919272833938e-05, "loss": 1.6764, "step": 21 }, { "epoch": 0.3028391167192429, "grad_norm": 0.8476335406303406, "learning_rate": 9.414737964294636e-05, "loss": 1.6508, "step": 24 }, { "epoch": 0.34069400630914826, "grad_norm": 0.7293965220451355, "learning_rate": 9.145187862775209e-05, "loss": 1.4719, "step": 27 }, { "epoch": 0.34069400630914826, "eval_loss": 1.7059693336486816, "eval_runtime": 2.2316, "eval_samples_per_second": 60.048, "eval_steps_per_second": 7.618, "step": 27 }, { "epoch": 0.3785488958990536, "grad_norm": 0.6286596059799194, "learning_rate": 8.83022221559489e-05, "loss": 1.7276, "step": 30 }, { "epoch": 0.416403785488959, "grad_norm": 0.8441144227981567, "learning_rate": 8.473291852294987e-05, "loss": 1.8129, "step": 33 }, { "epoch": 0.45425867507886436, "grad_norm": 0.6982610821723938, "learning_rate": 8.07830737662829e-05, "loss": 1.8098, "step": 36 }, { "epoch": 0.45425867507886436, "eval_loss": 1.6810848712921143, "eval_runtime": 2.2408, "eval_samples_per_second": 59.8, "eval_steps_per_second": 7.587, "step": 36 }, { "epoch": 0.4921135646687697, "grad_norm": 0.704291045665741, "learning_rate": 7.649596321166024e-05, "loss": 1.5021, "step": 39 }, { "epoch": 0.5299684542586751, "grad_norm": 0.725155234336853, "learning_rate": 7.191855733945387e-05, "loss": 1.712, "step": 42 }, { "epoch": 0.5678233438485805, "grad_norm": 0.6382411122322083, "learning_rate": 6.710100716628344e-05, "loss": 1.587, "step": 45 }, { "epoch": 0.5678233438485805, "eval_loss": 1.6629996299743652, "eval_runtime": 2.2344, "eval_samples_per_second": 59.971, "eval_steps_per_second": 7.608, "step": 45 }, { "epoch": 0.6056782334384858, "grad_norm": 0.717880368232727, "learning_rate": 6.209609477998338e-05, "loss": 1.6268, "step": 48 }, { "epoch": 0.6435331230283912, "grad_norm": 0.688407301902771, "learning_rate": 5.695865504800327e-05, "loss": 1.7635, "step": 51 }, { "epoch": 0.6813880126182965, "grad_norm": 0.7644420266151428, "learning_rate": 5.174497483512506e-05, "loss": 1.4975, "step": 54 }, { "epoch": 0.6813880126182965, "eval_loss": 1.6497859954833984, "eval_runtime": 2.2418, "eval_samples_per_second": 59.772, "eval_steps_per_second": 7.583, "step": 54 }, { "epoch": 0.7192429022082019, "grad_norm": 0.7091464400291443, "learning_rate": 4.6512176312793736e-05, "loss": 1.7363, "step": 57 }, { "epoch": 0.7570977917981072, "grad_norm": 0.7677245140075684, "learning_rate": 4.131759111665349e-05, "loss": 1.4822, "step": 60 }, { "epoch": 0.7949526813880127, "grad_norm": 0.6542797088623047, "learning_rate": 3.6218132209150045e-05, "loss": 1.5378, "step": 63 }, { "epoch": 0.7949526813880127, "eval_loss": 1.6406694650650024, "eval_runtime": 2.2416, "eval_samples_per_second": 59.778, "eval_steps_per_second": 7.584, "step": 63 }, { "epoch": 0.832807570977918, "grad_norm": 0.718596875667572, "learning_rate": 3.12696703292044e-05, "loss": 1.5353, "step": 66 }, { "epoch": 0.8706624605678234, "grad_norm": 0.6250401735305786, "learning_rate": 2.6526421860705473e-05, "loss": 1.5808, "step": 69 }, { "epoch": 0.9085173501577287, "grad_norm": 0.7185081243515015, "learning_rate": 2.2040354826462668e-05, "loss": 1.8177, "step": 72 }, { "epoch": 0.9085173501577287, "eval_loss": 1.6348599195480347, "eval_runtime": 2.2303, "eval_samples_per_second": 60.082, "eval_steps_per_second": 7.622, "step": 72 }, { "epoch": 0.9463722397476341, "grad_norm": 0.8165435194969177, "learning_rate": 1.7860619515673033e-05, "loss": 1.6547, "step": 75 }, { "epoch": 0.9842271293375394, "grad_norm": 0.7258487343788147, "learning_rate": 1.4033009983067452e-05, "loss": 1.5405, "step": 78 }, { "epoch": 1.0220820189274449, "grad_norm": 0.6014052629470825, "learning_rate": 1.0599462319663905e-05, "loss": 1.8766, "step": 81 }, { "epoch": 1.0220820189274449, "eval_loss": 1.6316182613372803, "eval_runtime": 2.2331, "eval_samples_per_second": 60.007, "eval_steps_per_second": 7.613, "step": 81 }, { "epoch": 1.0599369085173502, "grad_norm": 0.8883140683174133, "learning_rate": 7.597595192178702e-06, "loss": 1.7513, "step": 84 }, { "epoch": 1.0977917981072556, "grad_norm": 0.7024074196815491, "learning_rate": 5.060297685041659e-06, "loss": 1.3722, "step": 87 }, { "epoch": 1.135646687697161, "grad_norm": 0.8499208092689514, "learning_rate": 3.0153689607045845e-06, "loss": 1.4446, "step": 90 }, { "epoch": 1.135646687697161, "eval_loss": 1.630479097366333, "eval_runtime": 2.2459, "eval_samples_per_second": 59.663, "eval_steps_per_second": 7.569, "step": 90 }, { "epoch": 1.1735015772870663, "grad_norm": 0.800684928894043, "learning_rate": 1.4852136862001764e-06, "loss": 1.6495, "step": 93 }, { "epoch": 1.2113564668769716, "grad_norm": 0.615403950214386, "learning_rate": 4.865965629214819e-07, "loss": 1.5472, "step": 96 }, { "epoch": 1.249211356466877, "grad_norm": 0.7376525402069092, "learning_rate": 3.04586490452119e-08, "loss": 1.3958, "step": 99 }, { "epoch": 1.249211356466877, "eval_loss": 1.6302069425582886, "eval_runtime": 2.2327, "eval_samples_per_second": 60.017, "eval_steps_per_second": 7.614, "step": 99 } ], "logging_steps": 3, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7874172754329600.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }