{ "best_metric": 1.1258819103240967, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.05092514004413512, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00033950093362756747, "eval_loss": 1.233475923538208, "eval_runtime": 73.3685, "eval_samples_per_second": 16.915, "eval_steps_per_second": 4.239, "step": 1 }, { "epoch": 0.003395009336275675, "grad_norm": 8.238821983337402, "learning_rate": 4.34e-05, "loss": 1.651, "step": 10 }, { "epoch": 0.00679001867255135, "grad_norm": 8.250868797302246, "learning_rate": 8.68e-05, "loss": 1.7066, "step": 20 }, { "epoch": 0.010185028008827025, "grad_norm": 10.320452690124512, "learning_rate": 0.0001302, "loss": 1.6893, "step": 30 }, { "epoch": 0.0135800373451027, "grad_norm": 9.33686351776123, "learning_rate": 0.0001736, "loss": 1.6924, "step": 40 }, { "epoch": 0.016975046681378374, "grad_norm": 21.88421630859375, "learning_rate": 0.000217, "loss": 1.9382, "step": 50 }, { "epoch": 0.016975046681378374, "eval_loss": 1.1258819103240967, "eval_runtime": 73.3246, "eval_samples_per_second": 16.925, "eval_steps_per_second": 4.241, "step": 50 }, { "epoch": 0.02037005601765405, "grad_norm": 7.034121513366699, "learning_rate": 0.00021673569945319091, "loss": 1.7911, "step": 60 }, { "epoch": 0.023765065353929724, "grad_norm": 7.012728214263916, "learning_rate": 0.00021594408545846038, "loss": 2.0025, "step": 70 }, { "epoch": 0.0271600746902054, "grad_norm": 6.738110065460205, "learning_rate": 0.0002146290146796179, "loss": 2.0042, "step": 80 }, { "epoch": 0.030555084026481074, "grad_norm": 9.321654319763184, "learning_rate": 0.0002127968940093076, "loss": 2.0401, "step": 90 }, { "epoch": 0.03395009336275675, "grad_norm": 29.11263656616211, "learning_rate": 0.00021045664935527106, "loss": 2.1941, "step": 100 }, { "epoch": 0.03395009336275675, "eval_loss": 1.2958543300628662, "eval_runtime": 73.5899, "eval_samples_per_second": 16.864, "eval_steps_per_second": 4.226, "step": 100 }, { "epoch": 0.03734510269903242, "grad_norm": 6.313995838165283, "learning_rate": 0.00020761968215422217, "loss": 2.0996, "step": 110 }, { "epoch": 0.0407401120353081, "grad_norm": 7.566165924072266, "learning_rate": 0.00020429981382519356, "loss": 2.104, "step": 120 }, { "epoch": 0.04413512137158377, "grad_norm": 8.9271240234375, "learning_rate": 0.00020051321843297219, "loss": 2.0573, "step": 130 }, { "epoch": 0.04753013070785945, "grad_norm": 10.48095989227295, "learning_rate": 0.0001962783438896818, "loss": 2.1817, "step": 140 }, { "epoch": 0.05092514004413512, "grad_norm": 31.889493942260742, "learning_rate": 0.0001916158220784091, "loss": 2.0493, "step": 150 }, { "epoch": 0.05092514004413512, "eval_loss": 1.2741807699203491, "eval_runtime": 73.4449, "eval_samples_per_second": 16.897, "eval_steps_per_second": 4.234, "step": 150 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.692065004408013e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }