{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9988901220865705, "eval_steps": 500, "global_step": 225, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.022197558268590455, "grad_norm": 4.173152160656369, "learning_rate": 9.987820251299122e-05, "loss": 2.7367, "step": 5 }, { "epoch": 0.04439511653718091, "grad_norm": 3.2189546924605743, "learning_rate": 9.951340343707852e-05, "loss": 1.6236, "step": 10 }, { "epoch": 0.06659267480577137, "grad_norm": 2.5798875358644615, "learning_rate": 9.890738003669029e-05, "loss": 1.3355, "step": 15 }, { "epoch": 0.08879023307436182, "grad_norm": 3.169021801096128, "learning_rate": 9.806308479691595e-05, "loss": 1.2607, "step": 20 }, { "epoch": 0.11098779134295228, "grad_norm": 2.6869772888956365, "learning_rate": 9.698463103929542e-05, "loss": 1.2223, "step": 25 }, { "epoch": 0.13318534961154274, "grad_norm": 2.5246138578206296, "learning_rate": 9.567727288213005e-05, "loss": 1.1477, "step": 30 }, { "epoch": 0.15538290788013318, "grad_norm": 2.6296096882455298, "learning_rate": 9.414737964294636e-05, "loss": 1.0723, "step": 35 }, { "epoch": 0.17758046614872364, "grad_norm": 2.013904106905957, "learning_rate": 9.24024048078213e-05, "loss": 0.9628, "step": 40 }, { "epoch": 0.1997780244173141, "grad_norm": 2.9993599311041197, "learning_rate": 9.045084971874738e-05, "loss": 0.963, "step": 45 }, { "epoch": 0.22197558268590456, "grad_norm": 2.8591825849504353, "learning_rate": 8.83022221559489e-05, "loss": 0.9454, "step": 50 }, { "epoch": 0.244173140954495, "grad_norm": 2.7350211425782365, "learning_rate": 8.596699001693255e-05, "loss": 0.9329, "step": 55 }, { "epoch": 0.2663706992230855, "grad_norm": 2.1765633333645082, "learning_rate": 8.345653031794292e-05, "loss": 0.938, "step": 60 }, { "epoch": 0.2885682574916759, "grad_norm": 2.61036019611883, "learning_rate": 8.07830737662829e-05, "loss": 0.9445, "step": 65 }, { "epoch": 0.31076581576026635, "grad_norm": 2.272074478469258, "learning_rate": 7.795964517353735e-05, "loss": 0.9281, "step": 70 }, { "epoch": 0.33296337402885684, "grad_norm": 2.180048695923942, "learning_rate": 7.500000000000001e-05, "loss": 0.923, "step": 75 }, { "epoch": 0.3551609322974473, "grad_norm": 2.7343263244068567, "learning_rate": 7.191855733945387e-05, "loss": 0.9238, "step": 80 }, { "epoch": 0.37735849056603776, "grad_norm": 3.347285634649469, "learning_rate": 6.873032967079561e-05, "loss": 0.9262, "step": 85 }, { "epoch": 0.3995560488346282, "grad_norm": 1.9801889636127816, "learning_rate": 6.545084971874738e-05, "loss": 0.8858, "step": 90 }, { "epoch": 0.42175360710321863, "grad_norm": 2.5733487656351617, "learning_rate": 6.209609477998338e-05, "loss": 0.8976, "step": 95 }, { "epoch": 0.4439511653718091, "grad_norm": 2.6122649493371104, "learning_rate": 5.868240888334653e-05, "loss": 0.8718, "step": 100 }, { "epoch": 0.46614872364039955, "grad_norm": 2.0876959501211094, "learning_rate": 5.522642316338268e-05, "loss": 0.8649, "step": 105 }, { "epoch": 0.48834628190899, "grad_norm": 2.16104748727945, "learning_rate": 5.174497483512506e-05, "loss": 0.8705, "step": 110 }, { "epoch": 0.5105438401775805, "grad_norm": 2.324049561076729, "learning_rate": 4.825502516487497e-05, "loss": 0.8649, "step": 115 }, { "epoch": 0.532741398446171, "grad_norm": 2.2381634375296047, "learning_rate": 4.477357683661734e-05, "loss": 0.8514, "step": 120 }, { "epoch": 0.5549389567147613, "grad_norm": 2.1550933983565534, "learning_rate": 4.131759111665349e-05, "loss": 0.8541, "step": 125 }, { "epoch": 0.5771365149833518, "grad_norm": 2.0860600049733007, "learning_rate": 3.790390522001662e-05, "loss": 0.8443, "step": 130 }, { "epoch": 0.5993340732519423, "grad_norm": 2.8122996250620313, "learning_rate": 3.4549150281252636e-05, "loss": 0.8305, "step": 135 }, { "epoch": 0.6215316315205327, "grad_norm": 1.8994822037248742, "learning_rate": 3.12696703292044e-05, "loss": 0.8608, "step": 140 }, { "epoch": 0.6437291897891232, "grad_norm": 2.2555867483043848, "learning_rate": 2.8081442660546125e-05, "loss": 0.8134, "step": 145 }, { "epoch": 0.6659267480577137, "grad_norm": 1.8397515247154257, "learning_rate": 2.500000000000001e-05, "loss": 0.8128, "step": 150 }, { "epoch": 0.6881243063263041, "grad_norm": 2.4303983539129264, "learning_rate": 2.2040354826462668e-05, "loss": 0.8251, "step": 155 }, { "epoch": 0.7103218645948945, "grad_norm": 2.2313954875151016, "learning_rate": 1.9216926233717085e-05, "loss": 0.8023, "step": 160 }, { "epoch": 0.732519422863485, "grad_norm": 2.158630207915647, "learning_rate": 1.6543469682057106e-05, "loss": 0.8068, "step": 165 }, { "epoch": 0.7547169811320755, "grad_norm": 2.0984542929723227, "learning_rate": 1.4033009983067452e-05, "loss": 0.83, "step": 170 }, { "epoch": 0.7769145394006659, "grad_norm": 2.3913092010409445, "learning_rate": 1.1697777844051105e-05, "loss": 0.8317, "step": 175 }, { "epoch": 0.7991120976692564, "grad_norm": 2.2309830019482333, "learning_rate": 9.549150281252633e-06, "loss": 0.8319, "step": 180 }, { "epoch": 0.8213096559378469, "grad_norm": 2.3343796415351554, "learning_rate": 7.597595192178702e-06, "loss": 0.8383, "step": 185 }, { "epoch": 0.8435072142064373, "grad_norm": 2.058761898873254, "learning_rate": 5.852620357053651e-06, "loss": 0.8052, "step": 190 }, { "epoch": 0.8657047724750278, "grad_norm": 2.4171172190014416, "learning_rate": 4.322727117869951e-06, "loss": 0.8262, "step": 195 }, { "epoch": 0.8879023307436182, "grad_norm": 2.231323330105212, "learning_rate": 3.0153689607045845e-06, "loss": 0.8204, "step": 200 }, { "epoch": 0.9100998890122086, "grad_norm": 2.3595682400676714, "learning_rate": 1.9369152030840556e-06, "loss": 0.8168, "step": 205 }, { "epoch": 0.9322974472807991, "grad_norm": 2.229272501664809, "learning_rate": 1.0926199633097157e-06, "loss": 0.829, "step": 210 }, { "epoch": 0.9544950055493896, "grad_norm": 1.9609279896552483, "learning_rate": 4.865965629214819e-07, "loss": 0.8335, "step": 215 }, { "epoch": 0.97669256381798, "grad_norm": 2.100020381721619, "learning_rate": 1.2179748700879012e-07, "loss": 0.8238, "step": 220 }, { "epoch": 0.9988901220865705, "grad_norm": 2.933024766927132, "learning_rate": 0.0, "loss": 0.8168, "step": 225 }, { "epoch": 0.9988901220865705, "eval_loss": 0.8767702579498291, "eval_runtime": 116.7284, "eval_samples_per_second": 21.786, "eval_steps_per_second": 1.362, "step": 225 } ], "logging_steps": 5, "max_steps": 225, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 490142173233152.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }