{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.895572263993316, "eval_steps": 500, "global_step": 2384, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3341687552213868, "grad_norm": 203.0, "learning_rate": 1.9596977329974813e-05, "loss": 3.5144, "step": 50 }, { "epoch": 0.6683375104427736, "grad_norm": 272.0, "learning_rate": 1.9177162048698573e-05, "loss": 3.2008, "step": 100 }, { "epoch": 1.0, "grad_norm": 205.0, "learning_rate": 1.8757346767422336e-05, "loss": 3.156, "step": 150 }, { "epoch": 1.3341687552213868, "grad_norm": 366.0, "learning_rate": 1.8337531486146097e-05, "loss": 2.6074, "step": 200 }, { "epoch": 1.6683375104427736, "grad_norm": 524.0, "learning_rate": 1.791771620486986e-05, "loss": 2.1476, "step": 250 }, { "epoch": 2.0, "grad_norm": 310.0, "learning_rate": 1.749790092359362e-05, "loss": 1.7122, "step": 300 }, { "epoch": 2.334168755221387, "grad_norm": 332.0, "learning_rate": 1.707808564231738e-05, "loss": 0.9868, "step": 350 }, { "epoch": 2.6683375104427736, "grad_norm": 264.0, "learning_rate": 1.6658270361041144e-05, "loss": 0.8583, "step": 400 }, { "epoch": 3.0, "grad_norm": 221.0, "learning_rate": 1.6238455079764907e-05, "loss": 0.6614, "step": 450 }, { "epoch": 3.334168755221387, "grad_norm": 247.0, "learning_rate": 1.5818639798488667e-05, "loss": 0.4849, "step": 500 }, { "epoch": 3.6683375104427736, "grad_norm": 156.0, "learning_rate": 1.5398824517212427e-05, "loss": 0.4501, "step": 550 }, { "epoch": 4.0, "grad_norm": 132.0, "learning_rate": 1.4979009235936189e-05, "loss": 0.4275, "step": 600 }, { "epoch": 4.334168755221387, "grad_norm": 218.0, "learning_rate": 1.455919395465995e-05, "loss": 0.3567, "step": 650 }, { "epoch": 4.668337510442774, "grad_norm": 129.0, "learning_rate": 1.4139378673383712e-05, "loss": 0.3326, "step": 700 }, { "epoch": 5.0, "grad_norm": 115.0, "learning_rate": 1.3719563392107474e-05, "loss": 0.3078, "step": 750 }, { "epoch": 5.334168755221387, "grad_norm": 121.0, "learning_rate": 1.3299748110831234e-05, "loss": 0.283, "step": 800 }, { "epoch": 5.668337510442774, "grad_norm": 156.0, "learning_rate": 1.2879932829554998e-05, "loss": 0.2888, "step": 850 }, { "epoch": 6.0, "grad_norm": 108.5, "learning_rate": 1.2460117548278758e-05, "loss": 0.2469, "step": 900 }, { "epoch": 6.334168755221387, "grad_norm": 151.0, "learning_rate": 1.204030226700252e-05, "loss": 0.2764, "step": 950 }, { "epoch": 6.668337510442774, "grad_norm": 105.5, "learning_rate": 1.162048698572628e-05, "loss": 0.2275, "step": 1000 }, { "epoch": 7.0, "grad_norm": 97.0, "learning_rate": 1.1200671704450043e-05, "loss": 0.1984, "step": 1050 }, { "epoch": 7.334168755221387, "grad_norm": 102.0, "learning_rate": 1.0780856423173805e-05, "loss": 0.218, "step": 1100 }, { "epoch": 7.668337510442774, "grad_norm": 108.0, "learning_rate": 1.0361041141897565e-05, "loss": 0.2004, "step": 1150 }, { "epoch": 8.0, "grad_norm": 64.5, "learning_rate": 9.941225860621327e-06, "loss": 0.1896, "step": 1200 }, { "epoch": 8.334168755221388, "grad_norm": 95.5, "learning_rate": 9.521410579345088e-06, "loss": 0.1903, "step": 1250 }, { "epoch": 8.668337510442774, "grad_norm": 101.0, "learning_rate": 9.10159529806885e-06, "loss": 0.1831, "step": 1300 }, { "epoch": 9.0, "grad_norm": 68.0, "learning_rate": 8.681780016792612e-06, "loss": 0.1624, "step": 1350 }, { "epoch": 9.334168755221388, "grad_norm": 83.5, "learning_rate": 8.261964735516374e-06, "loss": 0.1639, "step": 1400 }, { "epoch": 9.668337510442774, "grad_norm": 74.5, "learning_rate": 7.842149454240135e-06, "loss": 0.1834, "step": 1450 }, { "epoch": 10.0, "grad_norm": 35.5, "learning_rate": 7.422334172963896e-06, "loss": 0.1441, "step": 1500 }, { "epoch": 10.334168755221388, "grad_norm": 71.5, "learning_rate": 7.002518891687659e-06, "loss": 0.1547, "step": 1550 }, { "epoch": 10.668337510442774, "grad_norm": 85.5, "learning_rate": 6.58270361041142e-06, "loss": 0.1532, "step": 1600 }, { "epoch": 11.0, "grad_norm": 70.5, "learning_rate": 6.162888329135182e-06, "loss": 0.1452, "step": 1650 }, { "epoch": 11.334168755221388, "grad_norm": 83.5, "learning_rate": 5.7430730478589425e-06, "loss": 0.1294, "step": 1700 }, { "epoch": 11.668337510442774, "grad_norm": 49.75, "learning_rate": 5.323257766582704e-06, "loss": 0.167, "step": 1750 }, { "epoch": 12.0, "grad_norm": 52.25, "learning_rate": 4.903442485306465e-06, "loss": 0.1306, "step": 1800 }, { "epoch": 12.334168755221388, "grad_norm": 63.5, "learning_rate": 4.483627204030227e-06, "loss": 0.1376, "step": 1850 }, { "epoch": 12.668337510442774, "grad_norm": 71.5, "learning_rate": 4.063811922753989e-06, "loss": 0.1307, "step": 1900 }, { "epoch": 13.0, "grad_norm": 48.5, "learning_rate": 3.64399664147775e-06, "loss": 0.1369, "step": 1950 }, { "epoch": 13.334168755221388, "grad_norm": 91.0, "learning_rate": 3.2241813602015114e-06, "loss": 0.1284, "step": 2000 }, { "epoch": 13.668337510442774, "grad_norm": 82.5, "learning_rate": 2.804366078925273e-06, "loss": 0.1313, "step": 2050 }, { "epoch": 14.0, "grad_norm": 53.25, "learning_rate": 2.3845507976490345e-06, "loss": 0.1377, "step": 2100 }, { "epoch": 14.334168755221388, "grad_norm": 81.0, "learning_rate": 1.9647355163727962e-06, "loss": 0.1286, "step": 2150 }, { "epoch": 14.668337510442774, "grad_norm": 71.5, "learning_rate": 1.5449202350965576e-06, "loss": 0.1481, "step": 2200 }, { "epoch": 15.0, "grad_norm": 41.0, "learning_rate": 1.1251049538203191e-06, "loss": 0.1215, "step": 2250 }, { "epoch": 15.334168755221388, "grad_norm": 77.5, "learning_rate": 7.052896725440807e-07, "loss": 0.1247, "step": 2300 }, { "epoch": 15.668337510442774, "grad_norm": 70.0, "learning_rate": 2.8547439126784216e-07, "loss": 0.1307, "step": 2350 } ], "logging_steps": 50, "max_steps": 2384, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.861877575078e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }