{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2969561989606533, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0014847809948032665, "eval_loss": 10.3683443069458, "eval_runtime": 0.8345, "eval_samples_per_second": 340.315, "eval_steps_per_second": 170.158, "step": 1 }, { "epoch": 0.007423904974016332, "grad_norm": 0.9056758284568787, "learning_rate": 5e-05, "loss": 41.5082, "step": 5 }, { "epoch": 0.014847809948032665, "grad_norm": 1.05762779712677, "learning_rate": 0.0001, "loss": 41.4939, "step": 10 }, { "epoch": 0.022271714922048998, "grad_norm": 0.9474515914916992, "learning_rate": 9.98292246503335e-05, "loss": 41.4681, "step": 15 }, { "epoch": 0.02969561989606533, "grad_norm": 1.002007007598877, "learning_rate": 9.931806517013612e-05, "loss": 41.4419, "step": 20 }, { "epoch": 0.03711952487008166, "grad_norm": 1.2783421277999878, "learning_rate": 9.847001329696653e-05, "loss": 41.4158, "step": 25 }, { "epoch": 0.044543429844097995, "grad_norm": 1.2331560850143433, "learning_rate": 9.729086208503174e-05, "loss": 41.4154, "step": 30 }, { "epoch": 0.05196733481811433, "grad_norm": 1.2927788496017456, "learning_rate": 9.578866633275288e-05, "loss": 41.4028, "step": 35 }, { "epoch": 0.05939123979213066, "grad_norm": 1.4856021404266357, "learning_rate": 9.397368756032445e-05, "loss": 41.3632, "step": 40 }, { "epoch": 0.066815144766147, "grad_norm": 1.2376281023025513, "learning_rate": 9.185832391312644e-05, "loss": 41.3726, "step": 45 }, { "epoch": 0.07423904974016332, "grad_norm": 1.6549413204193115, "learning_rate": 8.945702546981969e-05, "loss": 41.3998, "step": 50 }, { "epoch": 0.07423904974016332, "eval_loss": 10.339252471923828, "eval_runtime": 0.8174, "eval_samples_per_second": 347.451, "eval_steps_per_second": 173.725, "step": 50 }, { "epoch": 0.08166295471417966, "grad_norm": 1.2320919036865234, "learning_rate": 8.678619553365659e-05, "loss": 41.4036, "step": 55 }, { "epoch": 0.08908685968819599, "grad_norm": 1.3699207305908203, "learning_rate": 8.386407858128706e-05, "loss": 41.3627, "step": 60 }, { "epoch": 0.09651076466221233, "grad_norm": 1.235710859298706, "learning_rate": 8.07106356344834e-05, "loss": 41.3265, "step": 65 }, { "epoch": 0.10393466963622866, "grad_norm": 1.4450459480285645, "learning_rate": 7.734740790612136e-05, "loss": 41.2958, "step": 70 }, { "epoch": 0.111358574610245, "grad_norm": 1.3220700025558472, "learning_rate": 7.379736965185368e-05, "loss": 41.2793, "step": 75 }, { "epoch": 0.11878247958426132, "grad_norm": 1.340510606765747, "learning_rate": 7.008477123264848e-05, "loss": 41.27, "step": 80 }, { "epoch": 0.12620638455827765, "grad_norm": 1.6662790775299072, "learning_rate": 6.623497346023418e-05, "loss": 41.2258, "step": 85 }, { "epoch": 0.133630289532294, "grad_norm": 1.7347187995910645, "learning_rate": 6.227427435703997e-05, "loss": 41.2249, "step": 90 }, { "epoch": 0.14105419450631032, "grad_norm": 1.77666437625885, "learning_rate": 5.8229729514036705e-05, "loss": 41.2137, "step": 95 }, { "epoch": 0.14847809948032664, "grad_norm": 3.1678805351257324, "learning_rate": 5.4128967273616625e-05, "loss": 41.2879, "step": 100 }, { "epoch": 0.14847809948032664, "eval_loss": 10.302581787109375, "eval_runtime": 0.8248, "eval_samples_per_second": 344.316, "eval_steps_per_second": 172.158, "step": 100 }, { "epoch": 0.155902004454343, "grad_norm": 1.298153281211853, "learning_rate": 5e-05, "loss": 41.2814, "step": 105 }, { "epoch": 0.1633259094283593, "grad_norm": 1.5448156595230103, "learning_rate": 4.5871032726383386e-05, "loss": 41.226, "step": 110 }, { "epoch": 0.17074981440237566, "grad_norm": 1.4137225151062012, "learning_rate": 4.17702704859633e-05, "loss": 41.1952, "step": 115 }, { "epoch": 0.17817371937639198, "grad_norm": 1.5741055011749268, "learning_rate": 3.772572564296005e-05, "loss": 41.1606, "step": 120 }, { "epoch": 0.1855976243504083, "grad_norm": 1.565761685371399, "learning_rate": 3.3765026539765834e-05, "loss": 41.139, "step": 125 }, { "epoch": 0.19302152932442465, "grad_norm": 1.6328357458114624, "learning_rate": 2.991522876735154e-05, "loss": 41.131, "step": 130 }, { "epoch": 0.20044543429844097, "grad_norm": 1.9674378633499146, "learning_rate": 2.6202630348146324e-05, "loss": 41.1032, "step": 135 }, { "epoch": 0.20786933927245732, "grad_norm": 1.79765784740448, "learning_rate": 2.2652592093878666e-05, "loss": 41.0977, "step": 140 }, { "epoch": 0.21529324424647364, "grad_norm": 2.516068458557129, "learning_rate": 1.928936436551661e-05, "loss": 41.1421, "step": 145 }, { "epoch": 0.22271714922049, "grad_norm": 3.022280693054199, "learning_rate": 1.6135921418712956e-05, "loss": 41.1302, "step": 150 }, { "epoch": 0.22271714922049, "eval_loss": 10.28184986114502, "eval_runtime": 0.8175, "eval_samples_per_second": 347.416, "eval_steps_per_second": 173.708, "step": 150 }, { "epoch": 0.2301410541945063, "grad_norm": 1.6921619176864624, "learning_rate": 1.3213804466343421e-05, "loss": 41.1946, "step": 155 }, { "epoch": 0.23756495916852263, "grad_norm": 1.656246542930603, "learning_rate": 1.0542974530180327e-05, "loss": 41.1629, "step": 160 }, { "epoch": 0.24498886414253898, "grad_norm": 1.5233951807022095, "learning_rate": 8.141676086873572e-06, "loss": 41.1404, "step": 165 }, { "epoch": 0.2524127691165553, "grad_norm": 1.632401943206787, "learning_rate": 6.026312439675552e-06, "loss": 41.1259, "step": 170 }, { "epoch": 0.25983667409057165, "grad_norm": 1.4015792608261108, "learning_rate": 4.2113336672471245e-06, "loss": 41.1078, "step": 175 }, { "epoch": 0.267260579064588, "grad_norm": 1.4440001249313354, "learning_rate": 2.7091379149682685e-06, "loss": 41.096, "step": 180 }, { "epoch": 0.2746844840386043, "grad_norm": 1.8431029319763184, "learning_rate": 1.5299867030334814e-06, "loss": 41.0677, "step": 185 }, { "epoch": 0.28210838901262064, "grad_norm": 2.0052757263183594, "learning_rate": 6.819348298638839e-07, "loss": 41.0782, "step": 190 }, { "epoch": 0.289532293986637, "grad_norm": 1.7016942501068115, "learning_rate": 1.7077534966650766e-07, "loss": 41.0708, "step": 195 }, { "epoch": 0.2969561989606533, "grad_norm": 2.6630356311798096, "learning_rate": 0.0, "loss": 41.2053, "step": 200 }, { "epoch": 0.2969561989606533, "eval_loss": 10.27867603302002, "eval_runtime": 0.8177, "eval_samples_per_second": 347.296, "eval_steps_per_second": 173.648, "step": 200 } ], "logging_steps": 5, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 10265926041600.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }