{ "best_metric": 0.29707786440849304, "best_model_checkpoint": "miner_id_24/checkpoint-500", "epoch": 0.437636761487965, "eval_steps": 50, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00087527352297593, "eval_loss": 0.5409302115440369, "eval_runtime": 47.9717, "eval_samples_per_second": 10.027, "eval_steps_per_second": 2.522, "step": 1 }, { "epoch": 0.0087527352297593, "grad_norm": 0.6175008416175842, "learning_rate": 4.0600000000000004e-05, "loss": 1.102, "step": 10 }, { "epoch": 0.0175054704595186, "grad_norm": 0.5922743082046509, "learning_rate": 8.120000000000001e-05, "loss": 0.7527, "step": 20 }, { "epoch": 0.0262582056892779, "grad_norm": 0.18080410361289978, "learning_rate": 0.00012179999999999999, "loss": 0.2428, "step": 30 }, { "epoch": 0.0350109409190372, "grad_norm": 0.2408462017774582, "learning_rate": 0.00016240000000000002, "loss": 0.0084, "step": 40 }, { "epoch": 0.0437636761487965, "grad_norm": 0.16170749068260193, "learning_rate": 0.000203, "loss": 0.0081, "step": 50 }, { "epoch": 0.0437636761487965, "eval_loss": 0.41450178623199463, "eval_runtime": 48.0897, "eval_samples_per_second": 10.002, "eval_steps_per_second": 2.516, "step": 50 }, { "epoch": 0.0525164113785558, "grad_norm": 0.523928165435791, "learning_rate": 0.00020275275110137215, "loss": 0.6924, "step": 60 }, { "epoch": 0.061269146608315096, "grad_norm": 0.6150461435317993, "learning_rate": 0.00020201220897726938, "loss": 0.5178, "step": 70 }, { "epoch": 0.0700218818380744, "grad_norm": 0.3789159655570984, "learning_rate": 0.00020078198147448128, "loss": 0.2702, "step": 80 }, { "epoch": 0.0787746170678337, "grad_norm": 1.880570411682129, "learning_rate": 0.00019906806213773937, "loss": 0.0048, "step": 90 }, { "epoch": 0.087527352297593, "grad_norm": 0.6373544335365295, "learning_rate": 0.0001968788010097697, "loss": 0.0049, "step": 100 }, { "epoch": 0.087527352297593, "eval_loss": 0.3669174313545227, "eval_runtime": 48.1037, "eval_samples_per_second": 9.999, "eval_steps_per_second": 2.515, "step": 100 }, { "epoch": 0.0962800875273523, "grad_norm": 0.6095497608184814, "learning_rate": 0.00019422486395072398, "loss": 0.8385, "step": 110 }, { "epoch": 0.1050328227571116, "grad_norm": 0.44535937905311584, "learning_rate": 0.0001911191806751811, "loss": 0.5802, "step": 120 }, { "epoch": 0.1137855579868709, "grad_norm": 0.10592010617256165, "learning_rate": 0.00018757688175987723, "loss": 0.1586, "step": 130 }, { "epoch": 0.12253829321663019, "grad_norm": 0.025220032781362534, "learning_rate": 0.00018361522492905716, "loss": 0.0018, "step": 140 }, { "epoch": 0.13129102844638948, "grad_norm": 0.014781222678720951, "learning_rate": 0.00017925351097657625, "loss": 0.002, "step": 150 }, { "epoch": 0.13129102844638948, "eval_loss": 0.36381667852401733, "eval_runtime": 48.1198, "eval_samples_per_second": 9.996, "eval_steps_per_second": 2.515, "step": 150 }, { "epoch": 0.1400437636761488, "grad_norm": 0.5591445565223694, "learning_rate": 0.00017451298973437308, "loss": 0.9412, "step": 160 }, { "epoch": 0.1487964989059081, "grad_norm": 0.30725646018981934, "learning_rate": 0.0001694167565454241, "loss": 0.4974, "step": 170 }, { "epoch": 0.1575492341356674, "grad_norm": 0.11813530325889587, "learning_rate": 0.0001639896397455543, "loss": 0.0992, "step": 180 }, { "epoch": 0.16630196936542668, "grad_norm": 0.009045140817761421, "learning_rate": 0.0001582580797022808, "loss": 0.0109, "step": 190 }, { "epoch": 0.175054704595186, "grad_norm": 0.017792249098420143, "learning_rate": 0.00015225, "loss": 0.0007, "step": 200 }, { "epoch": 0.175054704595186, "eval_loss": 0.35675832629203796, "eval_runtime": 47.8961, "eval_samples_per_second": 10.043, "eval_steps_per_second": 2.526, "step": 200 }, { "epoch": 0.1838074398249453, "grad_norm": 0.5258771777153015, "learning_rate": 0.00014599467139909136, "loss": 0.7887, "step": 210 }, { "epoch": 0.1925601750547046, "grad_norm": 0.5954610109329224, "learning_rate": 0.0001395225692317151, "loss": 0.7499, "step": 220 }, { "epoch": 0.2013129102844639, "grad_norm": 0.0030906260944902897, "learning_rate": 0.00013286522492905717, "loss": 0.1606, "step": 230 }, { "epoch": 0.2100656455142232, "grad_norm": 0.0038282345049083233, "learning_rate": 0.00012605507240336626, "loss": 0.0009, "step": 240 }, { "epoch": 0.2188183807439825, "grad_norm": 0.01913132704794407, "learning_rate": 0.00011912529003319345, "loss": 0.0004, "step": 250 }, { "epoch": 0.2188183807439825, "eval_loss": 0.3523136377334595, "eval_runtime": 48.1668, "eval_samples_per_second": 9.986, "eval_steps_per_second": 2.512, "step": 250 }, { "epoch": 0.2275711159737418, "grad_norm": 0.5447108149528503, "learning_rate": 0.00011210963902166683, "loss": 0.8402, "step": 260 }, { "epoch": 0.2363238512035011, "grad_norm": 0.32077598571777344, "learning_rate": 0.00010504229891530386, "loss": 0.6314, "step": 270 }, { "epoch": 0.24507658643326038, "grad_norm": 0.22313565015792847, "learning_rate": 9.795770108469618e-05, "loss": 0.2108, "step": 280 }, { "epoch": 0.2538293216630197, "grad_norm": 0.03895167261362076, "learning_rate": 9.08903609783332e-05, "loss": 0.0013, "step": 290 }, { "epoch": 0.26258205689277897, "grad_norm": 0.004272861871868372, "learning_rate": 8.387470996680658e-05, "loss": 0.0024, "step": 300 }, { "epoch": 0.26258205689277897, "eval_loss": 0.3272861838340759, "eval_runtime": 47.9479, "eval_samples_per_second": 10.032, "eval_steps_per_second": 2.524, "step": 300 }, { "epoch": 0.2713347921225383, "grad_norm": 0.4647512137889862, "learning_rate": 7.694492759663374e-05, "loss": 0.7352, "step": 310 }, { "epoch": 0.2800875273522976, "grad_norm": 0.23608016967773438, "learning_rate": 7.013477507094284e-05, "loss": 0.5446, "step": 320 }, { "epoch": 0.2888402625820569, "grad_norm": 0.011121237650513649, "learning_rate": 6.347743076828492e-05, "loss": 0.1594, "step": 330 }, { "epoch": 0.2975929978118162, "grad_norm": 0.03790149837732315, "learning_rate": 5.700532860090863e-05, "loss": 0.0004, "step": 340 }, { "epoch": 0.3063457330415755, "grad_norm": 0.17843233048915863, "learning_rate": 5.075000000000002e-05, "loss": 0.0016, "step": 350 }, { "epoch": 0.3063457330415755, "eval_loss": 0.31218644976615906, "eval_runtime": 47.9567, "eval_samples_per_second": 10.03, "eval_steps_per_second": 2.523, "step": 350 }, { "epoch": 0.3150984682713348, "grad_norm": 0.3766857385635376, "learning_rate": 4.4741920297719214e-05, "loss": 0.6989, "step": 360 }, { "epoch": 0.3238512035010941, "grad_norm": 0.45434561371803284, "learning_rate": 3.901036025444568e-05, "loss": 0.6038, "step": 370 }, { "epoch": 0.33260393873085337, "grad_norm": 0.002181870862841606, "learning_rate": 3.358324345457592e-05, "loss": 0.1711, "step": 380 }, { "epoch": 0.3413566739606127, "grad_norm": 0.0015434159431606531, "learning_rate": 2.8487010265626928e-05, "loss": 0.0022, "step": 390 }, { "epoch": 0.350109409190372, "grad_norm": 0.07286681234836578, "learning_rate": 2.3746489023423744e-05, "loss": 0.0005, "step": 400 }, { "epoch": 0.350109409190372, "eval_loss": 0.3000512421131134, "eval_runtime": 47.9614, "eval_samples_per_second": 10.029, "eval_steps_per_second": 2.523, "step": 400 }, { "epoch": 0.3588621444201313, "grad_norm": 0.36653754115104675, "learning_rate": 1.9384775070942844e-05, "loss": 0.7181, "step": 410 }, { "epoch": 0.3676148796498906, "grad_norm": 0.229636549949646, "learning_rate": 1.5423118240122765e-05, "loss": 0.5265, "step": 420 }, { "epoch": 0.37636761487964987, "grad_norm": 0.2803157567977905, "learning_rate": 1.188081932481891e-05, "loss": 0.1247, "step": 430 }, { "epoch": 0.3851203501094092, "grad_norm": 0.01452224887907505, "learning_rate": 8.775136049276001e-06, "loss": 0.0002, "step": 440 }, { "epoch": 0.3938730853391685, "grad_norm": 0.00235812459141016, "learning_rate": 6.121198990230306e-06, "loss": 0.0002, "step": 450 }, { "epoch": 0.3938730853391685, "eval_loss": 0.29808175563812256, "eval_runtime": 47.9228, "eval_samples_per_second": 10.037, "eval_steps_per_second": 2.525, "step": 450 }, { "epoch": 0.4026258205689278, "grad_norm": 0.4591379463672638, "learning_rate": 3.931937862260632e-06, "loss": 0.6904, "step": 460 }, { "epoch": 0.4113785557986871, "grad_norm": 0.2666955292224884, "learning_rate": 2.2180185255187225e-06, "loss": 0.5246, "step": 470 }, { "epoch": 0.4201312910284464, "grad_norm": 0.004976064432412386, "learning_rate": 9.877910227306082e-07, "loss": 0.0667, "step": 480 }, { "epoch": 0.4288840262582057, "grad_norm": 0.0005102211725898087, "learning_rate": 2.472488986278439e-07, "loss": 0.0005, "step": 490 }, { "epoch": 0.437636761487965, "grad_norm": 0.05553697422146797, "learning_rate": 0.0, "loss": 0.0007, "step": 500 }, { "epoch": 0.437636761487965, "eval_loss": 0.29707786440849304, "eval_runtime": 47.9196, "eval_samples_per_second": 10.038, "eval_steps_per_second": 2.525, "step": 500 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6342833077157888e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }