{ "best_metric": 2.090728282928467, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.08431703204047218, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003372681281618887, "grad_norm": 1.417541265487671, "learning_rate": 2e-05, "loss": 3.6623, "step": 1 }, { "epoch": 0.003372681281618887, "eval_loss": 3.225717067718506, "eval_runtime": 19.339, "eval_samples_per_second": 6.464, "eval_steps_per_second": 3.258, "step": 1 }, { "epoch": 0.006745362563237774, "grad_norm": 1.2096680402755737, "learning_rate": 4e-05, "loss": 2.796, "step": 2 }, { "epoch": 0.01011804384485666, "grad_norm": 1.092326283454895, "learning_rate": 6e-05, "loss": 2.9494, "step": 3 }, { "epoch": 0.013490725126475547, "grad_norm": 0.9202325344085693, "learning_rate": 8e-05, "loss": 2.6686, "step": 4 }, { "epoch": 0.016863406408094434, "grad_norm": 1.5893256664276123, "learning_rate": 0.0001, "loss": 3.4625, "step": 5 }, { "epoch": 0.016863406408094434, "eval_loss": 3.213568925857544, "eval_runtime": 18.3512, "eval_samples_per_second": 6.812, "eval_steps_per_second": 3.433, "step": 5 }, { "epoch": 0.02023608768971332, "grad_norm": 1.2070389986038208, "learning_rate": 0.00012, "loss": 2.904, "step": 6 }, { "epoch": 0.023608768971332208, "grad_norm": 1.7667227983474731, "learning_rate": 0.00014, "loss": 3.1713, "step": 7 }, { "epoch": 0.026981450252951095, "grad_norm": 1.2589635848999023, "learning_rate": 0.00016, "loss": 3.0576, "step": 8 }, { "epoch": 0.03035413153456998, "grad_norm": 1.7349282503128052, "learning_rate": 0.00018, "loss": 2.9782, "step": 9 }, { "epoch": 0.03372681281618887, "grad_norm": 2.6368513107299805, "learning_rate": 0.0002, "loss": 3.2474, "step": 10 }, { "epoch": 0.03372681281618887, "eval_loss": 2.887220859527588, "eval_runtime": 18.1428, "eval_samples_per_second": 6.89, "eval_steps_per_second": 3.472, "step": 10 }, { "epoch": 0.03709949409780776, "grad_norm": 2.249255895614624, "learning_rate": 0.00019781476007338058, "loss": 3.0063, "step": 11 }, { "epoch": 0.04047217537942664, "grad_norm": 2.5278384685516357, "learning_rate": 0.0001913545457642601, "loss": 2.845, "step": 12 }, { "epoch": 0.04384485666104553, "grad_norm": 2.5744972229003906, "learning_rate": 0.00018090169943749476, "loss": 2.4101, "step": 13 }, { "epoch": 0.047217537942664416, "grad_norm": 2.417795419692993, "learning_rate": 0.00016691306063588583, "loss": 2.247, "step": 14 }, { "epoch": 0.050590219224283306, "grad_norm": 2.620561361312866, "learning_rate": 0.00015000000000000001, "loss": 1.8745, "step": 15 }, { "epoch": 0.050590219224283306, "eval_loss": 2.2684640884399414, "eval_runtime": 18.1715, "eval_samples_per_second": 6.879, "eval_steps_per_second": 3.467, "step": 15 }, { "epoch": 0.05396290050590219, "grad_norm": 2.6527938842773438, "learning_rate": 0.00013090169943749476, "loss": 2.0356, "step": 16 }, { "epoch": 0.05733558178752108, "grad_norm": 3.06416654586792, "learning_rate": 0.00011045284632676536, "loss": 2.5972, "step": 17 }, { "epoch": 0.06070826306913996, "grad_norm": 4.44185733795166, "learning_rate": 8.954715367323468e-05, "loss": 2.0945, "step": 18 }, { "epoch": 0.06408094435075885, "grad_norm": 2.4905245304107666, "learning_rate": 6.909830056250527e-05, "loss": 2.054, "step": 19 }, { "epoch": 0.06745362563237774, "grad_norm": 2.5971834659576416, "learning_rate": 5.000000000000002e-05, "loss": 2.2086, "step": 20 }, { "epoch": 0.06745362563237774, "eval_loss": 2.112858772277832, "eval_runtime": 18.1213, "eval_samples_per_second": 6.898, "eval_steps_per_second": 3.477, "step": 20 }, { "epoch": 0.07082630691399663, "grad_norm": 2.9609220027923584, "learning_rate": 3.308693936411421e-05, "loss": 2.1288, "step": 21 }, { "epoch": 0.07419898819561552, "grad_norm": 2.6516270637512207, "learning_rate": 1.9098300562505266e-05, "loss": 2.2109, "step": 22 }, { "epoch": 0.0775716694772344, "grad_norm": 2.68251895904541, "learning_rate": 8.645454235739903e-06, "loss": 2.2448, "step": 23 }, { "epoch": 0.08094435075885328, "grad_norm": 2.136423110961914, "learning_rate": 2.1852399266194314e-06, "loss": 1.9205, "step": 24 }, { "epoch": 0.08431703204047218, "grad_norm": 2.8987932205200195, "learning_rate": 0.0, "loss": 2.1777, "step": 25 }, { "epoch": 0.08431703204047218, "eval_loss": 2.090728282928467, "eval_runtime": 18.0988, "eval_samples_per_second": 6.907, "eval_steps_per_second": 3.481, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4623910620364800.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }