{ "best_metric": 11.059524536132812, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.00044076357882395464, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.7630543152958186e-05, "grad_norm": 1.2864186763763428, "learning_rate": 2e-05, "loss": 44.3172, "step": 1 }, { "epoch": 1.7630543152958186e-05, "eval_loss": 11.080211639404297, "eval_runtime": 157.8291, "eval_samples_per_second": 151.316, "eval_steps_per_second": 75.658, "step": 1 }, { "epoch": 3.526108630591637e-05, "grad_norm": 1.1442649364471436, "learning_rate": 4e-05, "loss": 44.3623, "step": 2 }, { "epoch": 5.2891629458874556e-05, "grad_norm": 1.320364236831665, "learning_rate": 6e-05, "loss": 44.2724, "step": 3 }, { "epoch": 7.052217261183275e-05, "grad_norm": 1.1162445545196533, "learning_rate": 8e-05, "loss": 44.3297, "step": 4 }, { "epoch": 8.815271576479092e-05, "grad_norm": 1.2065627574920654, "learning_rate": 0.0001, "loss": 44.3505, "step": 5 }, { "epoch": 8.815271576479092e-05, "eval_loss": 11.079597473144531, "eval_runtime": 154.6193, "eval_samples_per_second": 154.457, "eval_steps_per_second": 77.228, "step": 5 }, { "epoch": 0.00010578325891774911, "grad_norm": 1.129872441291809, "learning_rate": 0.00012, "loss": 44.3087, "step": 6 }, { "epoch": 0.0001234138020707073, "grad_norm": 1.278194546699524, "learning_rate": 0.00014, "loss": 44.3065, "step": 7 }, { "epoch": 0.0001410443452236655, "grad_norm": 0.9755666851997375, "learning_rate": 0.00016, "loss": 44.3318, "step": 8 }, { "epoch": 0.00015867488837662365, "grad_norm": 1.103223204612732, "learning_rate": 0.00018, "loss": 44.3202, "step": 9 }, { "epoch": 0.00017630543152958184, "grad_norm": 1.0434162616729736, "learning_rate": 0.0002, "loss": 44.3092, "step": 10 }, { "epoch": 0.00017630543152958184, "eval_loss": 11.074006080627441, "eval_runtime": 154.9175, "eval_samples_per_second": 154.159, "eval_steps_per_second": 77.08, "step": 10 }, { "epoch": 0.00019393597468254003, "grad_norm": 1.1925216913223267, "learning_rate": 0.00019781476007338058, "loss": 44.2824, "step": 11 }, { "epoch": 0.00021156651783549822, "grad_norm": 1.1967883110046387, "learning_rate": 0.0001913545457642601, "loss": 44.287, "step": 12 }, { "epoch": 0.0002291970609884564, "grad_norm": 1.1843236684799194, "learning_rate": 0.00018090169943749476, "loss": 44.2499, "step": 13 }, { "epoch": 0.0002468276041414146, "grad_norm": 1.2098557949066162, "learning_rate": 0.00016691306063588583, "loss": 44.2345, "step": 14 }, { "epoch": 0.0002644581472943728, "grad_norm": 1.283496379852295, "learning_rate": 0.00015000000000000001, "loss": 44.2413, "step": 15 }, { "epoch": 0.0002644581472943728, "eval_loss": 11.064728736877441, "eval_runtime": 155.3564, "eval_samples_per_second": 153.724, "eval_steps_per_second": 76.862, "step": 15 }, { "epoch": 0.000282088690447331, "grad_norm": 1.2000702619552612, "learning_rate": 0.00013090169943749476, "loss": 44.2442, "step": 16 }, { "epoch": 0.0002997192336002891, "grad_norm": 1.1320544481277466, "learning_rate": 0.00011045284632676536, "loss": 44.2807, "step": 17 }, { "epoch": 0.0003173497767532473, "grad_norm": 1.0614330768585205, "learning_rate": 8.954715367323468e-05, "loss": 44.2788, "step": 18 }, { "epoch": 0.0003349803199062055, "grad_norm": 1.0958316326141357, "learning_rate": 6.909830056250527e-05, "loss": 44.2804, "step": 19 }, { "epoch": 0.0003526108630591637, "grad_norm": 1.0989373922348022, "learning_rate": 5.000000000000002e-05, "loss": 44.2736, "step": 20 }, { "epoch": 0.0003526108630591637, "eval_loss": 11.060052871704102, "eval_runtime": 156.1013, "eval_samples_per_second": 152.99, "eval_steps_per_second": 76.495, "step": 20 }, { "epoch": 0.0003702414062121219, "grad_norm": 1.2673594951629639, "learning_rate": 3.308693936411421e-05, "loss": 44.2516, "step": 21 }, { "epoch": 0.00038787194936508007, "grad_norm": 1.0727105140686035, "learning_rate": 1.9098300562505266e-05, "loss": 44.2384, "step": 22 }, { "epoch": 0.00040550249251803826, "grad_norm": 1.2554271221160889, "learning_rate": 8.645454235739903e-06, "loss": 44.2502, "step": 23 }, { "epoch": 0.00042313303567099645, "grad_norm": 1.1639366149902344, "learning_rate": 2.1852399266194314e-06, "loss": 44.2355, "step": 24 }, { "epoch": 0.00044076357882395464, "grad_norm": 1.2954105138778687, "learning_rate": 0.0, "loss": 44.2127, "step": 25 }, { "epoch": 0.00044076357882395464, "eval_loss": 11.059524536132812, "eval_runtime": 156.1425, "eval_samples_per_second": 152.95, "eval_steps_per_second": 76.475, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 37355520000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }