|
{ |
|
"best_metric": 0.29707786440849304, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-500", |
|
"epoch": 0.437636761487965, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00087527352297593, |
|
"eval_loss": 0.5409302115440369, |
|
"eval_runtime": 47.9717, |
|
"eval_samples_per_second": 10.027, |
|
"eval_steps_per_second": 2.522, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0087527352297593, |
|
"grad_norm": 0.6175008416175842, |
|
"learning_rate": 4.0600000000000004e-05, |
|
"loss": 1.102, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0175054704595186, |
|
"grad_norm": 0.5922743082046509, |
|
"learning_rate": 8.120000000000001e-05, |
|
"loss": 0.7527, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0262582056892779, |
|
"grad_norm": 0.18080410361289978, |
|
"learning_rate": 0.00012179999999999999, |
|
"loss": 0.2428, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0350109409190372, |
|
"grad_norm": 0.2408462017774582, |
|
"learning_rate": 0.00016240000000000002, |
|
"loss": 0.0084, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0437636761487965, |
|
"grad_norm": 0.16170749068260193, |
|
"learning_rate": 0.000203, |
|
"loss": 0.0081, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0437636761487965, |
|
"eval_loss": 0.41450178623199463, |
|
"eval_runtime": 48.0897, |
|
"eval_samples_per_second": 10.002, |
|
"eval_steps_per_second": 2.516, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0525164113785558, |
|
"grad_norm": 0.523928165435791, |
|
"learning_rate": 0.00020275275110137215, |
|
"loss": 0.6924, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.061269146608315096, |
|
"grad_norm": 0.6150461435317993, |
|
"learning_rate": 0.00020201220897726938, |
|
"loss": 0.5178, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0700218818380744, |
|
"grad_norm": 0.3789159655570984, |
|
"learning_rate": 0.00020078198147448128, |
|
"loss": 0.2702, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0787746170678337, |
|
"grad_norm": 1.880570411682129, |
|
"learning_rate": 0.00019906806213773937, |
|
"loss": 0.0048, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.087527352297593, |
|
"grad_norm": 0.6373544335365295, |
|
"learning_rate": 0.0001968788010097697, |
|
"loss": 0.0049, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.087527352297593, |
|
"eval_loss": 0.3669174313545227, |
|
"eval_runtime": 48.1037, |
|
"eval_samples_per_second": 9.999, |
|
"eval_steps_per_second": 2.515, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0962800875273523, |
|
"grad_norm": 0.6095497608184814, |
|
"learning_rate": 0.00019422486395072398, |
|
"loss": 0.8385, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1050328227571116, |
|
"grad_norm": 0.44535937905311584, |
|
"learning_rate": 0.0001911191806751811, |
|
"loss": 0.5802, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1137855579868709, |
|
"grad_norm": 0.10592010617256165, |
|
"learning_rate": 0.00018757688175987723, |
|
"loss": 0.1586, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12253829321663019, |
|
"grad_norm": 0.025220032781362534, |
|
"learning_rate": 0.00018361522492905716, |
|
"loss": 0.0018, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13129102844638948, |
|
"grad_norm": 0.014781222678720951, |
|
"learning_rate": 0.00017925351097657625, |
|
"loss": 0.002, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13129102844638948, |
|
"eval_loss": 0.36381667852401733, |
|
"eval_runtime": 48.1198, |
|
"eval_samples_per_second": 9.996, |
|
"eval_steps_per_second": 2.515, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1400437636761488, |
|
"grad_norm": 0.5591445565223694, |
|
"learning_rate": 0.00017451298973437308, |
|
"loss": 0.9412, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1487964989059081, |
|
"grad_norm": 0.30725646018981934, |
|
"learning_rate": 0.0001694167565454241, |
|
"loss": 0.4974, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1575492341356674, |
|
"grad_norm": 0.11813530325889587, |
|
"learning_rate": 0.0001639896397455543, |
|
"loss": 0.0992, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16630196936542668, |
|
"grad_norm": 0.009045140817761421, |
|
"learning_rate": 0.0001582580797022808, |
|
"loss": 0.0109, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.175054704595186, |
|
"grad_norm": 0.017792249098420143, |
|
"learning_rate": 0.00015225, |
|
"loss": 0.0007, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.175054704595186, |
|
"eval_loss": 0.35675832629203796, |
|
"eval_runtime": 47.8961, |
|
"eval_samples_per_second": 10.043, |
|
"eval_steps_per_second": 2.526, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1838074398249453, |
|
"grad_norm": 0.5258771777153015, |
|
"learning_rate": 0.00014599467139909136, |
|
"loss": 0.7887, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1925601750547046, |
|
"grad_norm": 0.5954610109329224, |
|
"learning_rate": 0.0001395225692317151, |
|
"loss": 0.7499, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2013129102844639, |
|
"grad_norm": 0.0030906260944902897, |
|
"learning_rate": 0.00013286522492905717, |
|
"loss": 0.1606, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2100656455142232, |
|
"grad_norm": 0.0038282345049083233, |
|
"learning_rate": 0.00012605507240336626, |
|
"loss": 0.0009, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2188183807439825, |
|
"grad_norm": 0.01913132704794407, |
|
"learning_rate": 0.00011912529003319345, |
|
"loss": 0.0004, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2188183807439825, |
|
"eval_loss": 0.3523136377334595, |
|
"eval_runtime": 48.1668, |
|
"eval_samples_per_second": 9.986, |
|
"eval_steps_per_second": 2.512, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2275711159737418, |
|
"grad_norm": 0.5447108149528503, |
|
"learning_rate": 0.00011210963902166683, |
|
"loss": 0.8402, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2363238512035011, |
|
"grad_norm": 0.32077598571777344, |
|
"learning_rate": 0.00010504229891530386, |
|
"loss": 0.6314, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.24507658643326038, |
|
"grad_norm": 0.22313565015792847, |
|
"learning_rate": 9.795770108469618e-05, |
|
"loss": 0.2108, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2538293216630197, |
|
"grad_norm": 0.03895167261362076, |
|
"learning_rate": 9.08903609783332e-05, |
|
"loss": 0.0013, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.26258205689277897, |
|
"grad_norm": 0.004272861871868372, |
|
"learning_rate": 8.387470996680658e-05, |
|
"loss": 0.0024, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.26258205689277897, |
|
"eval_loss": 0.3272861838340759, |
|
"eval_runtime": 47.9479, |
|
"eval_samples_per_second": 10.032, |
|
"eval_steps_per_second": 2.524, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2713347921225383, |
|
"grad_norm": 0.4647512137889862, |
|
"learning_rate": 7.694492759663374e-05, |
|
"loss": 0.7352, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2800875273522976, |
|
"grad_norm": 0.23608016967773438, |
|
"learning_rate": 7.013477507094284e-05, |
|
"loss": 0.5446, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2888402625820569, |
|
"grad_norm": 0.011121237650513649, |
|
"learning_rate": 6.347743076828492e-05, |
|
"loss": 0.1594, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2975929978118162, |
|
"grad_norm": 0.03790149837732315, |
|
"learning_rate": 5.700532860090863e-05, |
|
"loss": 0.0004, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3063457330415755, |
|
"grad_norm": 0.17843233048915863, |
|
"learning_rate": 5.075000000000002e-05, |
|
"loss": 0.0016, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3063457330415755, |
|
"eval_loss": 0.31218644976615906, |
|
"eval_runtime": 47.9567, |
|
"eval_samples_per_second": 10.03, |
|
"eval_steps_per_second": 2.523, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3150984682713348, |
|
"grad_norm": 0.3766857385635376, |
|
"learning_rate": 4.4741920297719214e-05, |
|
"loss": 0.6989, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3238512035010941, |
|
"grad_norm": 0.45434561371803284, |
|
"learning_rate": 3.901036025444568e-05, |
|
"loss": 0.6038, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.33260393873085337, |
|
"grad_norm": 0.002181870862841606, |
|
"learning_rate": 3.358324345457592e-05, |
|
"loss": 0.1711, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3413566739606127, |
|
"grad_norm": 0.0015434159431606531, |
|
"learning_rate": 2.8487010265626928e-05, |
|
"loss": 0.0022, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.350109409190372, |
|
"grad_norm": 0.07286681234836578, |
|
"learning_rate": 2.3746489023423744e-05, |
|
"loss": 0.0005, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.350109409190372, |
|
"eval_loss": 0.3000512421131134, |
|
"eval_runtime": 47.9614, |
|
"eval_samples_per_second": 10.029, |
|
"eval_steps_per_second": 2.523, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3588621444201313, |
|
"grad_norm": 0.36653754115104675, |
|
"learning_rate": 1.9384775070942844e-05, |
|
"loss": 0.7181, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3676148796498906, |
|
"grad_norm": 0.229636549949646, |
|
"learning_rate": 1.5423118240122765e-05, |
|
"loss": 0.5265, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.37636761487964987, |
|
"grad_norm": 0.2803157567977905, |
|
"learning_rate": 1.188081932481891e-05, |
|
"loss": 0.1247, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3851203501094092, |
|
"grad_norm": 0.01452224887907505, |
|
"learning_rate": 8.775136049276001e-06, |
|
"loss": 0.0002, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3938730853391685, |
|
"grad_norm": 0.00235812459141016, |
|
"learning_rate": 6.121198990230306e-06, |
|
"loss": 0.0002, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3938730853391685, |
|
"eval_loss": 0.29808175563812256, |
|
"eval_runtime": 47.9228, |
|
"eval_samples_per_second": 10.037, |
|
"eval_steps_per_second": 2.525, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4026258205689278, |
|
"grad_norm": 0.4591379463672638, |
|
"learning_rate": 3.931937862260632e-06, |
|
"loss": 0.6904, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4113785557986871, |
|
"grad_norm": 0.2666955292224884, |
|
"learning_rate": 2.2180185255187225e-06, |
|
"loss": 0.5246, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4201312910284464, |
|
"grad_norm": 0.004976064432412386, |
|
"learning_rate": 9.877910227306082e-07, |
|
"loss": 0.0667, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4288840262582057, |
|
"grad_norm": 0.0005102211725898087, |
|
"learning_rate": 2.472488986278439e-07, |
|
"loss": 0.0005, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.437636761487965, |
|
"grad_norm": 0.05553697422146797, |
|
"learning_rate": 0.0, |
|
"loss": 0.0007, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.437636761487965, |
|
"eval_loss": 0.29707786440849304, |
|
"eval_runtime": 47.9196, |
|
"eval_samples_per_second": 10.038, |
|
"eval_steps_per_second": 2.525, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6342833077157888e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|