{ "best_metric": 1.1086701154708862, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.013036534889026497, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.518267444513249e-05, "eval_loss": 2.850663900375366, "eval_runtime": 443.5942, "eval_samples_per_second": 14.563, "eval_steps_per_second": 3.641, "step": 1 }, { "epoch": 0.0006518267444513248, "grad_norm": 3.971789598464966, "learning_rate": 4.16e-05, "loss": 2.0815, "step": 10 }, { "epoch": 0.0013036534889026496, "grad_norm": 2.8578591346740723, "learning_rate": 8.32e-05, "loss": 1.8557, "step": 20 }, { "epoch": 0.0019554802333539745, "grad_norm": 3.0689802169799805, "learning_rate": 0.0001248, "loss": 1.5229, "step": 30 }, { "epoch": 0.002607306977805299, "grad_norm": 4.3151397705078125, "learning_rate": 0.0001664, "loss": 1.3099, "step": 40 }, { "epoch": 0.0032591337222566243, "grad_norm": 4.729147434234619, "learning_rate": 0.000208, "loss": 1.6873, "step": 50 }, { "epoch": 0.0032591337222566243, "eval_loss": 1.273123025894165, "eval_runtime": 445.0677, "eval_samples_per_second": 14.515, "eval_steps_per_second": 3.629, "step": 50 }, { "epoch": 0.003910960466707949, "grad_norm": 2.3496737480163574, "learning_rate": 0.0002077466612270217, "loss": 1.0568, "step": 60 }, { "epoch": 0.004562787211159274, "grad_norm": 2.499488353729248, "learning_rate": 0.0002069878791491233, "loss": 1.1359, "step": 70 }, { "epoch": 0.005214613955610598, "grad_norm": 3.1299209594726562, "learning_rate": 0.00020572735047631578, "loss": 1.2264, "step": 80 }, { "epoch": 0.005866440700061924, "grad_norm": 3.3837058544158936, "learning_rate": 0.00020397121637758515, "loss": 1.2004, "step": 90 }, { "epoch": 0.006518267444513249, "grad_norm": 2.973175287246704, "learning_rate": 0.00020172803256173445, "loss": 1.545, "step": 100 }, { "epoch": 0.006518267444513249, "eval_loss": 1.275679111480713, "eval_runtime": 446.3022, "eval_samples_per_second": 14.474, "eval_steps_per_second": 3.619, "step": 100 }, { "epoch": 0.007170094188964573, "grad_norm": 2.3844895362854004, "learning_rate": 0.00019900872759483047, "loss": 1.0244, "step": 110 }, { "epoch": 0.007821920933415898, "grad_norm": 2.4352855682373047, "learning_rate": 0.0001958265496573284, "loss": 1.1891, "step": 120 }, { "epoch": 0.008473747677867223, "grad_norm": 3.231959819793701, "learning_rate": 0.00019219700200026827, "loss": 1.2308, "step": 130 }, { "epoch": 0.009125574422318547, "grad_norm": 2.5154852867126465, "learning_rate": 0.0001881377674149945, "loss": 1.2077, "step": 140 }, { "epoch": 0.009777401166769872, "grad_norm": 3.0093817710876465, "learning_rate": 0.00018366862208437368, "loss": 1.3988, "step": 150 }, { "epoch": 0.009777401166769872, "eval_loss": 1.1874332427978516, "eval_runtime": 447.657, "eval_samples_per_second": 14.431, "eval_steps_per_second": 3.608, "step": 150 }, { "epoch": 0.010429227911221197, "grad_norm": 2.5995352268218994, "learning_rate": 0.00017881133923521971, "loss": 1.0265, "step": 160 }, { "epoch": 0.011081054655672521, "grad_norm": 2.5440680980682373, "learning_rate": 0.00017358958306132124, "loss": 0.9651, "step": 170 }, { "epoch": 0.011732881400123848, "grad_norm": 2.6204676628112793, "learning_rate": 0.00016802879343386844, "loss": 1.0322, "step": 180 }, { "epoch": 0.012384708144575172, "grad_norm": 2.2899601459503174, "learning_rate": 0.00016215606196095766, "loss": 1.1977, "step": 190 }, { "epoch": 0.013036534889026497, "grad_norm": 3.2127535343170166, "learning_rate": 0.000156, "loss": 1.6102, "step": 200 }, { "epoch": 0.013036534889026497, "eval_loss": 1.1086701154708862, "eval_runtime": 446.4333, "eval_samples_per_second": 14.47, "eval_steps_per_second": 3.618, "step": 200 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.160326003693978e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }