{ "best_metric": 0.14771892130374908, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.009848820603732702, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00019697641207465407, "grad_norm": 3.348949670791626, "learning_rate": 2.9999999999999997e-05, "loss": 1.0606, "step": 1 }, { "epoch": 0.00019697641207465407, "eval_loss": 0.360129714012146, "eval_runtime": 10.5317, "eval_samples_per_second": 4.748, "eval_steps_per_second": 0.665, "step": 1 }, { "epoch": 0.00039395282414930814, "grad_norm": 3.371903657913208, "learning_rate": 5.9999999999999995e-05, "loss": 1.0354, "step": 2 }, { "epoch": 0.0005909292362239622, "grad_norm": 2.8037610054016113, "learning_rate": 8.999999999999999e-05, "loss": 0.9175, "step": 3 }, { "epoch": 0.0007879056482986163, "grad_norm": 1.5464385747909546, "learning_rate": 0.00011999999999999999, "loss": 0.5709, "step": 4 }, { "epoch": 0.0009848820603732703, "grad_norm": 1.5053009986877441, "learning_rate": 0.00015, "loss": 0.5139, "step": 5 }, { "epoch": 0.0011818584724479244, "grad_norm": 1.4200072288513184, "learning_rate": 0.00017999999999999998, "loss": 0.5191, "step": 6 }, { "epoch": 0.0013788348845225785, "grad_norm": 1.2713923454284668, "learning_rate": 0.00020999999999999998, "loss": 0.5693, "step": 7 }, { "epoch": 0.0015758112965972325, "grad_norm": 2.9913175106048584, "learning_rate": 0.00023999999999999998, "loss": 0.4701, "step": 8 }, { "epoch": 0.0017727877086718866, "grad_norm": 3.1078169345855713, "learning_rate": 0.00027, "loss": 0.4946, "step": 9 }, { "epoch": 0.0019697641207465405, "grad_norm": 1.0796478986740112, "learning_rate": 0.0003, "loss": 0.552, "step": 10 }, { "epoch": 0.0021667405328211946, "grad_norm": 0.8546186685562134, "learning_rate": 0.0002999794957488703, "loss": 0.4502, "step": 11 }, { "epoch": 0.0023637169448958487, "grad_norm": 1.210360050201416, "learning_rate": 0.0002999179886011389, "loss": 0.4655, "step": 12 }, { "epoch": 0.002560693356970503, "grad_norm": 1.6150785684585571, "learning_rate": 0.0002998154953722457, "loss": 0.4661, "step": 13 }, { "epoch": 0.002757669769045157, "grad_norm": 0.7436849474906921, "learning_rate": 0.00029967204408281613, "loss": 0.366, "step": 14 }, { "epoch": 0.002954646181119811, "grad_norm": 0.7962251901626587, "learning_rate": 0.00029948767395100045, "loss": 0.4605, "step": 15 }, { "epoch": 0.003151622593194465, "grad_norm": 0.9352697134017944, "learning_rate": 0.0002992624353817517, "loss": 0.382, "step": 16 }, { "epoch": 0.003348599005269119, "grad_norm": 0.9323279857635498, "learning_rate": 0.0002989963899530457, "loss": 0.5812, "step": 17 }, { "epoch": 0.0035455754173437733, "grad_norm": 0.8441579937934875, "learning_rate": 0.00029868961039904624, "loss": 0.4787, "step": 18 }, { "epoch": 0.003742551829418427, "grad_norm": 1.0008450746536255, "learning_rate": 0.00029834218059022024, "loss": 0.3968, "step": 19 }, { "epoch": 0.003939528241493081, "grad_norm": 0.7684171199798584, "learning_rate": 0.00029795419551040833, "loss": 0.3796, "step": 20 }, { "epoch": 0.004136504653567736, "grad_norm": 1.0816248655319214, "learning_rate": 0.00029752576123085736, "loss": 0.4181, "step": 21 }, { "epoch": 0.004333481065642389, "grad_norm": 0.8110284209251404, "learning_rate": 0.0002970569948812214, "loss": 0.3604, "step": 22 }, { "epoch": 0.004530457477717044, "grad_norm": 2.4406509399414062, "learning_rate": 0.0002965480246175399, "loss": 0.4562, "step": 23 }, { "epoch": 0.004727433889791697, "grad_norm": 1.13535475730896, "learning_rate": 0.0002959989895872009, "loss": 0.4112, "step": 24 }, { "epoch": 0.004924410301866351, "grad_norm": 0.845367968082428, "learning_rate": 0.0002954100398908995, "loss": 0.4399, "step": 25 }, { "epoch": 0.004924410301866351, "eval_loss": 0.11439956724643707, "eval_runtime": 12.8261, "eval_samples_per_second": 3.898, "eval_steps_per_second": 0.546, "step": 25 }, { "epoch": 0.005121386713941006, "grad_norm": 0.7668648362159729, "learning_rate": 0.0002947813365416023, "loss": 0.4863, "step": 26 }, { "epoch": 0.005318363126015659, "grad_norm": 0.8414048552513123, "learning_rate": 0.0002941130514205272, "loss": 0.4708, "step": 27 }, { "epoch": 0.005515339538090314, "grad_norm": 0.9618905782699585, "learning_rate": 0.0002934053672301536, "loss": 0.4237, "step": 28 }, { "epoch": 0.0057123159501649675, "grad_norm": 0.7617946267127991, "learning_rate": 0.00029265847744427303, "loss": 0.3429, "step": 29 }, { "epoch": 0.005909292362239622, "grad_norm": 1.3151721954345703, "learning_rate": 0.00029187258625509513, "loss": 0.5637, "step": 30 }, { "epoch": 0.006106268774314276, "grad_norm": 1.2373560667037964, "learning_rate": 0.00029104790851742417, "loss": 0.4711, "step": 31 }, { "epoch": 0.00630324518638893, "grad_norm": 0.9699512124061584, "learning_rate": 0.0002901846696899191, "loss": 0.3827, "step": 32 }, { "epoch": 0.006500221598463584, "grad_norm": 1.1460570096969604, "learning_rate": 0.00028928310577345606, "loss": 0.4132, "step": 33 }, { "epoch": 0.006697198010538238, "grad_norm": 1.2482142448425293, "learning_rate": 0.0002883434632466077, "loss": 0.4066, "step": 34 }, { "epoch": 0.006894174422612892, "grad_norm": 1.0130342245101929, "learning_rate": 0.00028736599899825856, "loss": 0.4401, "step": 35 }, { "epoch": 0.007091150834687547, "grad_norm": 1.153891921043396, "learning_rate": 0.00028635098025737434, "loss": 0.3825, "step": 36 }, { "epoch": 0.0072881272467622, "grad_norm": 1.4242093563079834, "learning_rate": 0.00028529868451994384, "loss": 0.3514, "step": 37 }, { "epoch": 0.007485103658836854, "grad_norm": 0.8260576725006104, "learning_rate": 0.0002842093994731145, "loss": 0.38, "step": 38 }, { "epoch": 0.007682080070911508, "grad_norm": 1.6134790182113647, "learning_rate": 0.00028308342291654174, "loss": 0.3237, "step": 39 }, { "epoch": 0.007879056482986162, "grad_norm": 1.4775500297546387, "learning_rate": 0.00028192106268097334, "loss": 0.503, "step": 40 }, { "epoch": 0.008076032895060816, "grad_norm": 1.2248170375823975, "learning_rate": 0.00028072263654409154, "loss": 0.3367, "step": 41 }, { "epoch": 0.008273009307135471, "grad_norm": 5.347848415374756, "learning_rate": 0.0002794884721436361, "loss": 0.3759, "step": 42 }, { "epoch": 0.008469985719210125, "grad_norm": 1.2572146654129028, "learning_rate": 0.00027821890688783083, "loss": 0.3936, "step": 43 }, { "epoch": 0.008666962131284778, "grad_norm": 1.1421171426773071, "learning_rate": 0.0002769142878631403, "loss": 0.2144, "step": 44 }, { "epoch": 0.008863938543359432, "grad_norm": 0.9987139701843262, "learning_rate": 0.00027557497173937923, "loss": 0.217, "step": 45 }, { "epoch": 0.009060914955434088, "grad_norm": 1.9025821685791016, "learning_rate": 0.000274201324672203, "loss": 0.2224, "step": 46 }, { "epoch": 0.009257891367508741, "grad_norm": 0.7751299738883972, "learning_rate": 0.00027279372220300385, "loss": 0.1837, "step": 47 }, { "epoch": 0.009454867779583395, "grad_norm": 2.193005084991455, "learning_rate": 0.0002713525491562421, "loss": 0.1896, "step": 48 }, { "epoch": 0.009651844191658049, "grad_norm": 1.596818447113037, "learning_rate": 0.00026987819953423867, "loss": 0.2651, "step": 49 }, { "epoch": 0.009848820603732702, "grad_norm": 10.099899291992188, "learning_rate": 0.00026837107640945905, "loss": 0.4042, "step": 50 }, { "epoch": 0.009848820603732702, "eval_loss": 0.14771892130374908, "eval_runtime": 10.3847, "eval_samples_per_second": 4.815, "eval_steps_per_second": 0.674, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.778132335099904e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }