|
{ |
|
"best_metric": 11.059524536132812, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 0.00044076357882395464, |
|
"eval_steps": 5, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.7630543152958186e-05, |
|
"grad_norm": 1.2864186763763428, |
|
"learning_rate": 2e-05, |
|
"loss": 44.3172, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.7630543152958186e-05, |
|
"eval_loss": 11.080211639404297, |
|
"eval_runtime": 157.8291, |
|
"eval_samples_per_second": 151.316, |
|
"eval_steps_per_second": 75.658, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 3.526108630591637e-05, |
|
"grad_norm": 1.1442649364471436, |
|
"learning_rate": 4e-05, |
|
"loss": 44.3623, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 5.2891629458874556e-05, |
|
"grad_norm": 1.320364236831665, |
|
"learning_rate": 6e-05, |
|
"loss": 44.2724, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 7.052217261183275e-05, |
|
"grad_norm": 1.1162445545196533, |
|
"learning_rate": 8e-05, |
|
"loss": 44.3297, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 8.815271576479092e-05, |
|
"grad_norm": 1.2065627574920654, |
|
"learning_rate": 0.0001, |
|
"loss": 44.3505, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 8.815271576479092e-05, |
|
"eval_loss": 11.079597473144531, |
|
"eval_runtime": 154.6193, |
|
"eval_samples_per_second": 154.457, |
|
"eval_steps_per_second": 77.228, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00010578325891774911, |
|
"grad_norm": 1.129872441291809, |
|
"learning_rate": 0.00012, |
|
"loss": 44.3087, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0001234138020707073, |
|
"grad_norm": 1.278194546699524, |
|
"learning_rate": 0.00014, |
|
"loss": 44.3065, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0001410443452236655, |
|
"grad_norm": 0.9755666851997375, |
|
"learning_rate": 0.00016, |
|
"loss": 44.3318, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00015867488837662365, |
|
"grad_norm": 1.103223204612732, |
|
"learning_rate": 0.00018, |
|
"loss": 44.3202, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00017630543152958184, |
|
"grad_norm": 1.0434162616729736, |
|
"learning_rate": 0.0002, |
|
"loss": 44.3092, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00017630543152958184, |
|
"eval_loss": 11.074006080627441, |
|
"eval_runtime": 154.9175, |
|
"eval_samples_per_second": 154.159, |
|
"eval_steps_per_second": 77.08, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00019393597468254003, |
|
"grad_norm": 1.1925216913223267, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 44.2824, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00021156651783549822, |
|
"grad_norm": 1.1967883110046387, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 44.287, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0002291970609884564, |
|
"grad_norm": 1.1843236684799194, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 44.2499, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0002468276041414146, |
|
"grad_norm": 1.2098557949066162, |
|
"learning_rate": 0.00016691306063588583, |
|
"loss": 44.2345, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0002644581472943728, |
|
"grad_norm": 1.283496379852295, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 44.2413, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0002644581472943728, |
|
"eval_loss": 11.064728736877441, |
|
"eval_runtime": 155.3564, |
|
"eval_samples_per_second": 153.724, |
|
"eval_steps_per_second": 76.862, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.000282088690447331, |
|
"grad_norm": 1.2000702619552612, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 44.2442, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0002997192336002891, |
|
"grad_norm": 1.1320544481277466, |
|
"learning_rate": 0.00011045284632676536, |
|
"loss": 44.2807, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0003173497767532473, |
|
"grad_norm": 1.0614330768585205, |
|
"learning_rate": 8.954715367323468e-05, |
|
"loss": 44.2788, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0003349803199062055, |
|
"grad_norm": 1.0958316326141357, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 44.2804, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0003526108630591637, |
|
"grad_norm": 1.0989373922348022, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 44.2736, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0003526108630591637, |
|
"eval_loss": 11.060052871704102, |
|
"eval_runtime": 156.1013, |
|
"eval_samples_per_second": 152.99, |
|
"eval_steps_per_second": 76.495, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0003702414062121219, |
|
"grad_norm": 1.2673594951629639, |
|
"learning_rate": 3.308693936411421e-05, |
|
"loss": 44.2516, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.00038787194936508007, |
|
"grad_norm": 1.0727105140686035, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 44.2384, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.00040550249251803826, |
|
"grad_norm": 1.2554271221160889, |
|
"learning_rate": 8.645454235739903e-06, |
|
"loss": 44.2502, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.00042313303567099645, |
|
"grad_norm": 1.1639366149902344, |
|
"learning_rate": 2.1852399266194314e-06, |
|
"loss": 44.2355, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.00044076357882395464, |
|
"grad_norm": 1.2954105138778687, |
|
"learning_rate": 0.0, |
|
"loss": 44.2127, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.00044076357882395464, |
|
"eval_loss": 11.059524536132812, |
|
"eval_runtime": 156.1425, |
|
"eval_samples_per_second": 152.95, |
|
"eval_steps_per_second": 76.475, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 25, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 37355520000.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|