{ "best_metric": 1.8633633852005005, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 1.0021141649048626, "eval_steps": 50, "global_step": 237, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004228329809725159, "eval_loss": 2.096205472946167, "eval_runtime": 6.3468, "eval_samples_per_second": 15.756, "eval_steps_per_second": 3.939, "step": 1 }, { "epoch": 0.042283298097251586, "grad_norm": 7.564020156860352, "learning_rate": 4.24e-05, "loss": 3.6687, "step": 10 }, { "epoch": 0.08456659619450317, "grad_norm": 5.9212799072265625, "learning_rate": 8.48e-05, "loss": 3.7253, "step": 20 }, { "epoch": 0.12684989429175475, "grad_norm": 6.140769004821777, "learning_rate": 0.0001272, "loss": 3.7108, "step": 30 }, { "epoch": 0.16913319238900634, "grad_norm": 8.776068687438965, "learning_rate": 0.0001696, "loss": 3.5971, "step": 40 }, { "epoch": 0.21141649048625794, "grad_norm": 44.07782745361328, "learning_rate": 0.000212, "loss": 3.6765, "step": 50 }, { "epoch": 0.21141649048625794, "eval_loss": 2.286642074584961, "eval_runtime": 6.5576, "eval_samples_per_second": 15.249, "eval_steps_per_second": 3.812, "step": 50 }, { "epoch": 0.2536997885835095, "grad_norm": 5.269192695617676, "learning_rate": 0.00021050764994571535, "loss": 3.8943, "step": 60 }, { "epoch": 0.2959830866807611, "grad_norm": 5.10400390625, "learning_rate": 0.00020607262070143734, "loss": 3.5937, "step": 70 }, { "epoch": 0.3382663847780127, "grad_norm": 6.93461275100708, "learning_rate": 0.00019881979181685374, "loss": 3.7963, "step": 80 }, { "epoch": 0.38054968287526425, "grad_norm": 10.116086959838867, "learning_rate": 0.00018895338517082536, "loss": 3.8322, "step": 90 }, { "epoch": 0.42283298097251587, "grad_norm": 48.350345611572266, "learning_rate": 0.00017675121458398993, "loss": 3.6459, "step": 100 }, { "epoch": 0.42283298097251587, "eval_loss": 2.329221248626709, "eval_runtime": 6.5526, "eval_samples_per_second": 15.261, "eval_steps_per_second": 3.815, "step": 100 }, { "epoch": 0.46511627906976744, "grad_norm": 5.142873287200928, "learning_rate": 0.0001625568632627182, "loss": 3.9402, "step": 110 }, { "epoch": 0.507399577167019, "grad_norm": 5.514346122741699, "learning_rate": 0.0001467700093384222, "loss": 3.8567, "step": 120 }, { "epoch": 0.5496828752642706, "grad_norm": 6.7151103019714355, "learning_rate": 0.00012983517191130775, "loss": 3.6157, "step": 130 }, { "epoch": 0.5919661733615222, "grad_norm": 7.746840000152588, "learning_rate": 0.00011222919448238718, "loss": 3.9139, "step": 140 }, { "epoch": 0.6342494714587738, "grad_norm": 28.235681533813477, "learning_rate": 9.444781820961864e-05, "loss": 3.6412, "step": 150 }, { "epoch": 0.6342494714587738, "eval_loss": 2.091932773590088, "eval_runtime": 6.5409, "eval_samples_per_second": 15.288, "eval_steps_per_second": 3.822, "step": 150 }, { "epoch": 0.6765327695560254, "grad_norm": 5.661881923675537, "learning_rate": 7.699172305235922e-05, "loss": 3.8396, "step": 160 }, { "epoch": 0.718816067653277, "grad_norm": 7.51119327545166, "learning_rate": 6.035242985127933e-05, "loss": 3.8834, "step": 170 }, { "epoch": 0.7610993657505285, "grad_norm": 7.847259998321533, "learning_rate": 4.4998460306603325e-05, "loss": 3.938, "step": 180 }, { "epoch": 0.8033826638477801, "grad_norm": 6.869880676269531, "learning_rate": 3.136214455575538e-05, "loss": 3.6543, "step": 190 }, { "epoch": 0.8456659619450317, "grad_norm": 22.58540916442871, "learning_rate": 1.9827447816679272e-05, "loss": 3.3961, "step": 200 }, { "epoch": 0.8456659619450317, "eval_loss": 1.8633633852005005, "eval_runtime": 6.5539, "eval_samples_per_second": 15.258, "eval_steps_per_second": 3.815, "step": 200 }, { "epoch": 0.8879492600422833, "grad_norm": 5.115078449249268, "learning_rate": 1.0719158868709316e-05, "loss": 3.8714, "step": 210 }, { "epoch": 0.9302325581395349, "grad_norm": 6.137579441070557, "learning_rate": 4.29374479686329e-06, "loss": 3.8645, "step": 220 }, { "epoch": 0.9725158562367865, "grad_norm": 7.499491214752197, "learning_rate": 7.321295075359002e-07, "loss": 3.5654, "step": 230 } ], "logging_steps": 10, "max_steps": 237, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.371259110457344e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }