|
{ |
|
"best_metric": 0.14771892130374908, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.009848820603732702, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00019697641207465407, |
|
"grad_norm": 3.348949670791626, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 1.0606, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00019697641207465407, |
|
"eval_loss": 0.360129714012146, |
|
"eval_runtime": 10.5317, |
|
"eval_samples_per_second": 4.748, |
|
"eval_steps_per_second": 0.665, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00039395282414930814, |
|
"grad_norm": 3.371903657913208, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 1.0354, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0005909292362239622, |
|
"grad_norm": 2.8037610054016113, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 0.9175, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0007879056482986163, |
|
"grad_norm": 1.5464385747909546, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 0.5709, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0009848820603732703, |
|
"grad_norm": 1.5053009986877441, |
|
"learning_rate": 0.00015, |
|
"loss": 0.5139, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0011818584724479244, |
|
"grad_norm": 1.4200072288513184, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.5191, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0013788348845225785, |
|
"grad_norm": 1.2713923454284668, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.5693, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0015758112965972325, |
|
"grad_norm": 2.9913175106048584, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.4701, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0017727877086718866, |
|
"grad_norm": 3.1078169345855713, |
|
"learning_rate": 0.00027, |
|
"loss": 0.4946, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0019697641207465405, |
|
"grad_norm": 1.0796478986740112, |
|
"learning_rate": 0.0003, |
|
"loss": 0.552, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0021667405328211946, |
|
"grad_norm": 0.8546186685562134, |
|
"learning_rate": 0.0002999794957488703, |
|
"loss": 0.4502, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0023637169448958487, |
|
"grad_norm": 1.210360050201416, |
|
"learning_rate": 0.0002999179886011389, |
|
"loss": 0.4655, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.002560693356970503, |
|
"grad_norm": 1.6150785684585571, |
|
"learning_rate": 0.0002998154953722457, |
|
"loss": 0.4661, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002757669769045157, |
|
"grad_norm": 0.7436849474906921, |
|
"learning_rate": 0.00029967204408281613, |
|
"loss": 0.366, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.002954646181119811, |
|
"grad_norm": 0.7962251901626587, |
|
"learning_rate": 0.00029948767395100045, |
|
"loss": 0.4605, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.003151622593194465, |
|
"grad_norm": 0.9352697134017944, |
|
"learning_rate": 0.0002992624353817517, |
|
"loss": 0.382, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.003348599005269119, |
|
"grad_norm": 0.9323279857635498, |
|
"learning_rate": 0.0002989963899530457, |
|
"loss": 0.5812, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0035455754173437733, |
|
"grad_norm": 0.8441579937934875, |
|
"learning_rate": 0.00029868961039904624, |
|
"loss": 0.4787, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.003742551829418427, |
|
"grad_norm": 1.0008450746536255, |
|
"learning_rate": 0.00029834218059022024, |
|
"loss": 0.3968, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.003939528241493081, |
|
"grad_norm": 0.7684171199798584, |
|
"learning_rate": 0.00029795419551040833, |
|
"loss": 0.3796, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004136504653567736, |
|
"grad_norm": 1.0816248655319214, |
|
"learning_rate": 0.00029752576123085736, |
|
"loss": 0.4181, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.004333481065642389, |
|
"grad_norm": 0.8110284209251404, |
|
"learning_rate": 0.0002970569948812214, |
|
"loss": 0.3604, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.004530457477717044, |
|
"grad_norm": 2.4406509399414062, |
|
"learning_rate": 0.0002965480246175399, |
|
"loss": 0.4562, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.004727433889791697, |
|
"grad_norm": 1.13535475730896, |
|
"learning_rate": 0.0002959989895872009, |
|
"loss": 0.4112, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.004924410301866351, |
|
"grad_norm": 0.845367968082428, |
|
"learning_rate": 0.0002954100398908995, |
|
"loss": 0.4399, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004924410301866351, |
|
"eval_loss": 0.11439956724643707, |
|
"eval_runtime": 12.8261, |
|
"eval_samples_per_second": 3.898, |
|
"eval_steps_per_second": 0.546, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.005121386713941006, |
|
"grad_norm": 0.7668648362159729, |
|
"learning_rate": 0.0002947813365416023, |
|
"loss": 0.4863, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.005318363126015659, |
|
"grad_norm": 0.8414048552513123, |
|
"learning_rate": 0.0002941130514205272, |
|
"loss": 0.4708, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.005515339538090314, |
|
"grad_norm": 0.9618905782699585, |
|
"learning_rate": 0.0002934053672301536, |
|
"loss": 0.4237, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0057123159501649675, |
|
"grad_norm": 0.7617946267127991, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 0.3429, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.005909292362239622, |
|
"grad_norm": 1.3151721954345703, |
|
"learning_rate": 0.00029187258625509513, |
|
"loss": 0.5637, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.006106268774314276, |
|
"grad_norm": 1.2373560667037964, |
|
"learning_rate": 0.00029104790851742417, |
|
"loss": 0.4711, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.00630324518638893, |
|
"grad_norm": 0.9699512124061584, |
|
"learning_rate": 0.0002901846696899191, |
|
"loss": 0.3827, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.006500221598463584, |
|
"grad_norm": 1.1460570096969604, |
|
"learning_rate": 0.00028928310577345606, |
|
"loss": 0.4132, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.006697198010538238, |
|
"grad_norm": 1.2482142448425293, |
|
"learning_rate": 0.0002883434632466077, |
|
"loss": 0.4066, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.006894174422612892, |
|
"grad_norm": 1.0130342245101929, |
|
"learning_rate": 0.00028736599899825856, |
|
"loss": 0.4401, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.007091150834687547, |
|
"grad_norm": 1.153891921043396, |
|
"learning_rate": 0.00028635098025737434, |
|
"loss": 0.3825, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0072881272467622, |
|
"grad_norm": 1.4242093563079834, |
|
"learning_rate": 0.00028529868451994384, |
|
"loss": 0.3514, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.007485103658836854, |
|
"grad_norm": 0.8260576725006104, |
|
"learning_rate": 0.0002842093994731145, |
|
"loss": 0.38, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.007682080070911508, |
|
"grad_norm": 1.6134790182113647, |
|
"learning_rate": 0.00028308342291654174, |
|
"loss": 0.3237, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.007879056482986162, |
|
"grad_norm": 1.4775500297546387, |
|
"learning_rate": 0.00028192106268097334, |
|
"loss": 0.503, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.008076032895060816, |
|
"grad_norm": 1.2248170375823975, |
|
"learning_rate": 0.00028072263654409154, |
|
"loss": 0.3367, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.008273009307135471, |
|
"grad_norm": 5.347848415374756, |
|
"learning_rate": 0.0002794884721436361, |
|
"loss": 0.3759, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.008469985719210125, |
|
"grad_norm": 1.2572146654129028, |
|
"learning_rate": 0.00027821890688783083, |
|
"loss": 0.3936, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.008666962131284778, |
|
"grad_norm": 1.1421171426773071, |
|
"learning_rate": 0.0002769142878631403, |
|
"loss": 0.2144, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.008863938543359432, |
|
"grad_norm": 0.9987139701843262, |
|
"learning_rate": 0.00027557497173937923, |
|
"loss": 0.217, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.009060914955434088, |
|
"grad_norm": 1.9025821685791016, |
|
"learning_rate": 0.000274201324672203, |
|
"loss": 0.2224, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.009257891367508741, |
|
"grad_norm": 0.7751299738883972, |
|
"learning_rate": 0.00027279372220300385, |
|
"loss": 0.1837, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.009454867779583395, |
|
"grad_norm": 2.193005084991455, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 0.1896, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.009651844191658049, |
|
"grad_norm": 1.596818447113037, |
|
"learning_rate": 0.00026987819953423867, |
|
"loss": 0.2651, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.009848820603732702, |
|
"grad_norm": 10.099899291992188, |
|
"learning_rate": 0.00026837107640945905, |
|
"loss": 0.4042, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.009848820603732702, |
|
"eval_loss": 0.14771892130374908, |
|
"eval_runtime": 10.3847, |
|
"eval_samples_per_second": 4.815, |
|
"eval_steps_per_second": 0.674, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.778132335099904e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|