{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.400200100050025, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02001000500250125, "grad_norm": 1.2456663846969604, "learning_rate": 4.999447296060165e-05, "loss": 1.3318, "num_input_tokens_seen": 15648, "step": 5 }, { "epoch": 0.0400200100050025, "grad_norm": 0.8590693473815918, "learning_rate": 4.997789428625975e-05, "loss": 1.2773, "num_input_tokens_seen": 28720, "step": 10 }, { "epoch": 0.060030015007503754, "grad_norm": 0.8516742587089539, "learning_rate": 4.995027130745321e-05, "loss": 1.1401, "num_input_tokens_seen": 43808, "step": 15 }, { "epoch": 0.080040020010005, "grad_norm": 1.0357950925827026, "learning_rate": 4.99116162380454e-05, "loss": 1.089, "num_input_tokens_seen": 57472, "step": 20 }, { "epoch": 0.10005002501250625, "grad_norm": 0.744416356086731, "learning_rate": 4.986194616988364e-05, "loss": 1.1176, "num_input_tokens_seen": 71968, "step": 25 }, { "epoch": 0.12006003001500751, "grad_norm": 0.7087241411209106, "learning_rate": 4.980128306524183e-05, "loss": 0.9949, "num_input_tokens_seen": 85552, "step": 30 }, { "epoch": 0.14007003501750875, "grad_norm": 0.708070695400238, "learning_rate": 4.972965374710952e-05, "loss": 1.0264, "num_input_tokens_seen": 100512, "step": 35 }, { "epoch": 0.16008004002001, "grad_norm": 0.7232606410980225, "learning_rate": 4.964708988733178e-05, "loss": 1.0004, "num_input_tokens_seen": 113376, "step": 40 }, { "epoch": 0.18009004502251125, "grad_norm": 1.207189679145813, "learning_rate": 4.9553627992605066e-05, "loss": 1.1137, "num_input_tokens_seen": 127680, "step": 45 }, { "epoch": 0.2001000500250125, "grad_norm": 0.6889365315437317, "learning_rate": 4.944930938833535e-05, "loss": 0.9646, "num_input_tokens_seen": 143136, "step": 50 }, { "epoch": 0.22011005502751377, "grad_norm": 1.3263787031173706, "learning_rate": 4.9334180200365486e-05, "loss": 0.9757, "num_input_tokens_seen": 155488, "step": 55 }, { "epoch": 0.24012006003001501, "grad_norm": 1.0919948816299438, "learning_rate": 4.9208291334580104e-05, "loss": 0.9126, "num_input_tokens_seen": 171008, "step": 60 }, { "epoch": 0.26013006503251623, "grad_norm": 0.853539764881134, "learning_rate": 4.907169845439688e-05, "loss": 1.0119, "num_input_tokens_seen": 185536, "step": 65 }, { "epoch": 0.2801400700350175, "grad_norm": 0.9023884534835815, "learning_rate": 4.892446195615423e-05, "loss": 1.1192, "num_input_tokens_seen": 201728, "step": 70 }, { "epoch": 0.3001500750375188, "grad_norm": 1.3300387859344482, "learning_rate": 4.87666469424063e-05, "loss": 1.0167, "num_input_tokens_seen": 217584, "step": 75 }, { "epoch": 0.32016008004002, "grad_norm": 1.1315807104110718, "learning_rate": 4.859832319313697e-05, "loss": 0.8291, "num_input_tokens_seen": 230864, "step": 80 }, { "epoch": 0.3401700850425213, "grad_norm": 1.2459896802902222, "learning_rate": 4.841956513490577e-05, "loss": 0.9591, "num_input_tokens_seen": 245584, "step": 85 }, { "epoch": 0.3601800900450225, "grad_norm": 1.114758849143982, "learning_rate": 4.8230451807939135e-05, "loss": 0.9869, "num_input_tokens_seen": 259760, "step": 90 }, { "epoch": 0.38019009504752377, "grad_norm": 0.8792176842689514, "learning_rate": 4.803106683118177e-05, "loss": 1.0423, "num_input_tokens_seen": 274432, "step": 95 }, { "epoch": 0.400200100050025, "grad_norm": 1.6365342140197754, "learning_rate": 4.782149836532345e-05, "loss": 1.0122, "num_input_tokens_seen": 288256, "step": 100 } ], "logging_steps": 5, "max_steps": 747, "num_input_tokens_seen": 288256, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3016308396326912e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }