{ "best_metric": 1.273123025894165, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.006518267444513249, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.518267444513249e-05, "eval_loss": 2.850663900375366, "eval_runtime": 443.5942, "eval_samples_per_second": 14.563, "eval_steps_per_second": 3.641, "step": 1 }, { "epoch": 0.0006518267444513248, "grad_norm": 3.971789598464966, "learning_rate": 4.16e-05, "loss": 2.0815, "step": 10 }, { "epoch": 0.0013036534889026496, "grad_norm": 2.8578591346740723, "learning_rate": 8.32e-05, "loss": 1.8557, "step": 20 }, { "epoch": 0.0019554802333539745, "grad_norm": 3.0689802169799805, "learning_rate": 0.0001248, "loss": 1.5229, "step": 30 }, { "epoch": 0.002607306977805299, "grad_norm": 4.3151397705078125, "learning_rate": 0.0001664, "loss": 1.3099, "step": 40 }, { "epoch": 0.0032591337222566243, "grad_norm": 4.729147434234619, "learning_rate": 0.000208, "loss": 1.6873, "step": 50 }, { "epoch": 0.0032591337222566243, "eval_loss": 1.273123025894165, "eval_runtime": 445.0677, "eval_samples_per_second": 14.515, "eval_steps_per_second": 3.629, "step": 50 }, { "epoch": 0.003910960466707949, "grad_norm": 2.3496737480163574, "learning_rate": 0.0002077466612270217, "loss": 1.0568, "step": 60 }, { "epoch": 0.004562787211159274, "grad_norm": 2.499488353729248, "learning_rate": 0.0002069878791491233, "loss": 1.1359, "step": 70 }, { "epoch": 0.005214613955610598, "grad_norm": 3.1299209594726562, "learning_rate": 0.00020572735047631578, "loss": 1.2264, "step": 80 }, { "epoch": 0.005866440700061924, "grad_norm": 3.3837058544158936, "learning_rate": 0.00020397121637758515, "loss": 1.2004, "step": 90 }, { "epoch": 0.006518267444513249, "grad_norm": 2.973175287246704, "learning_rate": 0.00020172803256173445, "loss": 1.545, "step": 100 }, { "epoch": 0.006518267444513249, "eval_loss": 1.275679111480713, "eval_runtime": 446.3022, "eval_samples_per_second": 14.474, "eval_steps_per_second": 3.619, "step": 100 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.0633874937675776e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }