{ "best_metric": 2.4503767490386963, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.11286681715575621, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007524454477050414, "grad_norm": 4.9338603019714355, "learning_rate": 1.018e-05, "loss": 4.511, "step": 1 }, { "epoch": 0.0007524454477050414, "eval_loss": 2.8468708992004395, "eval_runtime": 37.6411, "eval_samples_per_second": 14.877, "eval_steps_per_second": 3.719, "step": 1 }, { "epoch": 0.0015048908954100827, "grad_norm": 5.828423500061035, "learning_rate": 2.036e-05, "loss": 3.8614, "step": 2 }, { "epoch": 0.002257336343115124, "grad_norm": 7.995044708251953, "learning_rate": 3.0539999999999996e-05, "loss": 4.6689, "step": 3 }, { "epoch": 0.0030097817908201654, "grad_norm": 6.929337024688721, "learning_rate": 4.072e-05, "loss": 4.2038, "step": 4 }, { "epoch": 0.003762227238525207, "grad_norm": 6.514157772064209, "learning_rate": 5.09e-05, "loss": 4.6263, "step": 5 }, { "epoch": 0.004514672686230248, "grad_norm": 6.524606227874756, "learning_rate": 6.107999999999999e-05, "loss": 4.8234, "step": 6 }, { "epoch": 0.005267118133935289, "grad_norm": 9.753814697265625, "learning_rate": 7.125999999999999e-05, "loss": 4.544, "step": 7 }, { "epoch": 0.006019563581640331, "grad_norm": 6.565380096435547, "learning_rate": 8.144e-05, "loss": 4.2002, "step": 8 }, { "epoch": 0.006772009029345372, "grad_norm": 7.157611846923828, "learning_rate": 9.162e-05, "loss": 3.7329, "step": 9 }, { "epoch": 0.007524454477050414, "grad_norm": 8.59189510345459, "learning_rate": 0.0001018, "loss": 5.0335, "step": 10 }, { "epoch": 0.008276899924755455, "grad_norm": 10.278609275817871, "learning_rate": 0.00010126421052631578, "loss": 5.018, "step": 11 }, { "epoch": 0.009029345372460496, "grad_norm": 11.457633018493652, "learning_rate": 0.00010072842105263156, "loss": 5.185, "step": 12 }, { "epoch": 0.009781790820165538, "grad_norm": 8.473273277282715, "learning_rate": 0.00010019263157894736, "loss": 4.661, "step": 13 }, { "epoch": 0.010534236267870579, "grad_norm": 9.23606014251709, "learning_rate": 9.965684210526316e-05, "loss": 5.1647, "step": 14 }, { "epoch": 0.011286681715575621, "grad_norm": 10.785460472106934, "learning_rate": 9.912105263157895e-05, "loss": 5.2782, "step": 15 }, { "epoch": 0.012039127163280662, "grad_norm": 9.376898765563965, "learning_rate": 9.858526315789473e-05, "loss": 4.4393, "step": 16 }, { "epoch": 0.012791572610985704, "grad_norm": 10.466320037841797, "learning_rate": 9.804947368421052e-05, "loss": 5.3007, "step": 17 }, { "epoch": 0.013544018058690745, "grad_norm": 8.497576713562012, "learning_rate": 9.75136842105263e-05, "loss": 4.941, "step": 18 }, { "epoch": 0.014296463506395787, "grad_norm": 9.376507759094238, "learning_rate": 9.69778947368421e-05, "loss": 5.1504, "step": 19 }, { "epoch": 0.015048908954100828, "grad_norm": 9.785571098327637, "learning_rate": 9.644210526315789e-05, "loss": 4.6566, "step": 20 }, { "epoch": 0.01580135440180587, "grad_norm": 16.524185180664062, "learning_rate": 9.590631578947369e-05, "loss": 5.2649, "step": 21 }, { "epoch": 0.01655379984951091, "grad_norm": 9.190619468688965, "learning_rate": 9.537052631578947e-05, "loss": 4.9649, "step": 22 }, { "epoch": 0.01730624529721595, "grad_norm": 15.659117698669434, "learning_rate": 9.483473684210526e-05, "loss": 6.6098, "step": 23 }, { "epoch": 0.01805869074492099, "grad_norm": 9.867484092712402, "learning_rate": 9.429894736842104e-05, "loss": 5.5985, "step": 24 }, { "epoch": 0.018811136192626036, "grad_norm": 8.590432167053223, "learning_rate": 9.376315789473684e-05, "loss": 4.1838, "step": 25 }, { "epoch": 0.019563581640331076, "grad_norm": 8.883073806762695, "learning_rate": 9.322736842105262e-05, "loss": 4.8669, "step": 26 }, { "epoch": 0.020316027088036117, "grad_norm": 8.349895477294922, "learning_rate": 9.269157894736842e-05, "loss": 5.1803, "step": 27 }, { "epoch": 0.021068472535741158, "grad_norm": 11.773541450500488, "learning_rate": 9.215578947368421e-05, "loss": 5.0873, "step": 28 }, { "epoch": 0.0218209179834462, "grad_norm": 13.537124633789062, "learning_rate": 9.162e-05, "loss": 4.6728, "step": 29 }, { "epoch": 0.022573363431151242, "grad_norm": 12.410065650939941, "learning_rate": 9.108421052631578e-05, "loss": 5.1454, "step": 30 }, { "epoch": 0.023325808878856283, "grad_norm": 11.18578052520752, "learning_rate": 9.054842105263158e-05, "loss": 4.2279, "step": 31 }, { "epoch": 0.024078254326561323, "grad_norm": 38.089595794677734, "learning_rate": 9.001263157894736e-05, "loss": 5.0972, "step": 32 }, { "epoch": 0.024830699774266364, "grad_norm": 20.254100799560547, "learning_rate": 8.947684210526315e-05, "loss": 4.9312, "step": 33 }, { "epoch": 0.025583145221971408, "grad_norm": 32.83479309082031, "learning_rate": 8.894105263157895e-05, "loss": 5.4251, "step": 34 }, { "epoch": 0.02633559066967645, "grad_norm": 14.390934944152832, "learning_rate": 8.840526315789473e-05, "loss": 5.1836, "step": 35 }, { "epoch": 0.02708803611738149, "grad_norm": 17.68592071533203, "learning_rate": 8.786947368421052e-05, "loss": 5.4996, "step": 36 }, { "epoch": 0.02784048156508653, "grad_norm": 15.407584190368652, "learning_rate": 8.733368421052632e-05, "loss": 5.5683, "step": 37 }, { "epoch": 0.028592927012791574, "grad_norm": 13.758222579956055, "learning_rate": 8.67978947368421e-05, "loss": 5.6531, "step": 38 }, { "epoch": 0.029345372460496615, "grad_norm": 15.094158172607422, "learning_rate": 8.626210526315789e-05, "loss": 4.6223, "step": 39 }, { "epoch": 0.030097817908201655, "grad_norm": 14.733675003051758, "learning_rate": 8.572631578947367e-05, "loss": 5.6096, "step": 40 }, { "epoch": 0.030850263355906696, "grad_norm": 11.245537757873535, "learning_rate": 8.519052631578947e-05, "loss": 4.9552, "step": 41 }, { "epoch": 0.03160270880361174, "grad_norm": 15.303187370300293, "learning_rate": 8.465473684210527e-05, "loss": 6.245, "step": 42 }, { "epoch": 0.03235515425131678, "grad_norm": 12.705514907836914, "learning_rate": 8.411894736842105e-05, "loss": 5.5335, "step": 43 }, { "epoch": 0.03310759969902182, "grad_norm": 12.836231231689453, "learning_rate": 8.358315789473684e-05, "loss": 5.8837, "step": 44 }, { "epoch": 0.033860045146726865, "grad_norm": 12.260278701782227, "learning_rate": 8.304736842105262e-05, "loss": 4.22, "step": 45 }, { "epoch": 0.0346124905944319, "grad_norm": 15.98351764678955, "learning_rate": 8.251157894736841e-05, "loss": 4.8508, "step": 46 }, { "epoch": 0.035364936042136946, "grad_norm": 18.42877197265625, "learning_rate": 8.197578947368421e-05, "loss": 5.7646, "step": 47 }, { "epoch": 0.03611738148984198, "grad_norm": 15.042816162109375, "learning_rate": 8.144e-05, "loss": 6.2102, "step": 48 }, { "epoch": 0.03686982693754703, "grad_norm": 20.197011947631836, "learning_rate": 8.090421052631579e-05, "loss": 7.2784, "step": 49 }, { "epoch": 0.03762227238525207, "grad_norm": 22.858545303344727, "learning_rate": 8.036842105263158e-05, "loss": 6.3548, "step": 50 }, { "epoch": 0.03762227238525207, "eval_loss": 2.573653221130371, "eval_runtime": 37.6194, "eval_samples_per_second": 14.886, "eval_steps_per_second": 3.721, "step": 50 }, { "epoch": 0.03837471783295711, "grad_norm": 6.201414108276367, "learning_rate": 7.983263157894736e-05, "loss": 4.6951, "step": 51 }, { "epoch": 0.03912716328066215, "grad_norm": 4.830835342407227, "learning_rate": 7.929684210526315e-05, "loss": 5.02, "step": 52 }, { "epoch": 0.0398796087283672, "grad_norm": 7.732100009918213, "learning_rate": 7.876105263157895e-05, "loss": 4.9632, "step": 53 }, { "epoch": 0.040632054176072234, "grad_norm": 7.422882556915283, "learning_rate": 7.822526315789473e-05, "loss": 4.2894, "step": 54 }, { "epoch": 0.04138449962377728, "grad_norm": 6.019810676574707, "learning_rate": 7.768947368421053e-05, "loss": 4.5985, "step": 55 }, { "epoch": 0.042136945071482315, "grad_norm": 7.448675632476807, "learning_rate": 7.715368421052631e-05, "loss": 5.6458, "step": 56 }, { "epoch": 0.04288939051918736, "grad_norm": 6.525016784667969, "learning_rate": 7.66178947368421e-05, "loss": 4.4928, "step": 57 }, { "epoch": 0.0436418359668924, "grad_norm": 5.862019062042236, "learning_rate": 7.608210526315788e-05, "loss": 5.0442, "step": 58 }, { "epoch": 0.04439428141459744, "grad_norm": 6.698094844818115, "learning_rate": 7.554631578947368e-05, "loss": 5.1862, "step": 59 }, { "epoch": 0.045146726862302484, "grad_norm": 5.901148796081543, "learning_rate": 7.501052631578947e-05, "loss": 4.6401, "step": 60 }, { "epoch": 0.04589917231000752, "grad_norm": 8.506747245788574, "learning_rate": 7.447473684210527e-05, "loss": 5.6167, "step": 61 }, { "epoch": 0.046651617757712566, "grad_norm": 8.143284797668457, "learning_rate": 7.393894736842105e-05, "loss": 4.5655, "step": 62 }, { "epoch": 0.04740406320541761, "grad_norm": 5.302389621734619, "learning_rate": 7.340315789473684e-05, "loss": 4.4897, "step": 63 }, { "epoch": 0.04815650865312265, "grad_norm": 5.529751300811768, "learning_rate": 7.286736842105262e-05, "loss": 4.2722, "step": 64 }, { "epoch": 0.04890895410082769, "grad_norm": 8.762489318847656, "learning_rate": 7.233157894736842e-05, "loss": 5.1436, "step": 65 }, { "epoch": 0.04966139954853273, "grad_norm": 11.295607566833496, "learning_rate": 7.179578947368421e-05, "loss": 4.9496, "step": 66 }, { "epoch": 0.05041384499623777, "grad_norm": 9.971809387207031, "learning_rate": 7.125999999999999e-05, "loss": 5.4893, "step": 67 }, { "epoch": 0.051166290443942816, "grad_norm": 7.344180583953857, "learning_rate": 7.072421052631579e-05, "loss": 4.6742, "step": 68 }, { "epoch": 0.05191873589164785, "grad_norm": 9.844782829284668, "learning_rate": 7.018842105263158e-05, "loss": 5.1691, "step": 69 }, { "epoch": 0.0526711813393529, "grad_norm": 8.95771598815918, "learning_rate": 6.965263157894736e-05, "loss": 4.879, "step": 70 }, { "epoch": 0.05342362678705794, "grad_norm": 8.04973030090332, "learning_rate": 6.911684210526316e-05, "loss": 5.0598, "step": 71 }, { "epoch": 0.05417607223476298, "grad_norm": 8.579294204711914, "learning_rate": 6.858105263157894e-05, "loss": 4.8055, "step": 72 }, { "epoch": 0.05492851768246802, "grad_norm": 7.308038234710693, "learning_rate": 6.804526315789473e-05, "loss": 4.7293, "step": 73 }, { "epoch": 0.05568096313017306, "grad_norm": 7.7795233726501465, "learning_rate": 6.750947368421052e-05, "loss": 5.0379, "step": 74 }, { "epoch": 0.056433408577878104, "grad_norm": 8.343374252319336, "learning_rate": 6.697368421052631e-05, "loss": 4.4904, "step": 75 }, { "epoch": 0.05718585402558315, "grad_norm": 7.44524621963501, "learning_rate": 6.64378947368421e-05, "loss": 4.7685, "step": 76 }, { "epoch": 0.057938299473288185, "grad_norm": 10.718270301818848, "learning_rate": 6.59021052631579e-05, "loss": 5.2094, "step": 77 }, { "epoch": 0.05869074492099323, "grad_norm": 9.407214164733887, "learning_rate": 6.536631578947368e-05, "loss": 5.0866, "step": 78 }, { "epoch": 0.059443190368698266, "grad_norm": 8.373135566711426, "learning_rate": 6.483052631578947e-05, "loss": 4.8998, "step": 79 }, { "epoch": 0.06019563581640331, "grad_norm": 8.654524803161621, "learning_rate": 6.429473684210525e-05, "loss": 4.6364, "step": 80 }, { "epoch": 0.060948081264108354, "grad_norm": 9.621380805969238, "learning_rate": 6.375894736842104e-05, "loss": 5.1948, "step": 81 }, { "epoch": 0.06170052671181339, "grad_norm": 13.150786399841309, "learning_rate": 6.322315789473684e-05, "loss": 5.4827, "step": 82 }, { "epoch": 0.062452972159518436, "grad_norm": 10.533498764038086, "learning_rate": 6.268736842105264e-05, "loss": 4.9195, "step": 83 }, { "epoch": 0.06320541760722348, "grad_norm": 10.708104133605957, "learning_rate": 6.215157894736842e-05, "loss": 4.8833, "step": 84 }, { "epoch": 0.06395786305492852, "grad_norm": 7.892517566680908, "learning_rate": 6.16157894736842e-05, "loss": 5.0272, "step": 85 }, { "epoch": 0.06471030850263355, "grad_norm": 13.007377624511719, "learning_rate": 6.107999999999999e-05, "loss": 4.8137, "step": 86 }, { "epoch": 0.0654627539503386, "grad_norm": 11.001848220825195, "learning_rate": 6.054421052631578e-05, "loss": 5.8105, "step": 87 }, { "epoch": 0.06621519939804364, "grad_norm": 13.360245704650879, "learning_rate": 6.000842105263157e-05, "loss": 5.2308, "step": 88 }, { "epoch": 0.06696764484574869, "grad_norm": 8.78776741027832, "learning_rate": 5.947263157894737e-05, "loss": 4.3293, "step": 89 }, { "epoch": 0.06772009029345373, "grad_norm": 11.788161277770996, "learning_rate": 5.893684210526316e-05, "loss": 5.5094, "step": 90 }, { "epoch": 0.06847253574115876, "grad_norm": 13.817206382751465, "learning_rate": 5.8401052631578944e-05, "loss": 5.6535, "step": 91 }, { "epoch": 0.0692249811888638, "grad_norm": 10.35663890838623, "learning_rate": 5.7865263157894736e-05, "loss": 4.9656, "step": 92 }, { "epoch": 0.06997742663656885, "grad_norm": 12.754554748535156, "learning_rate": 5.732947368421052e-05, "loss": 6.037, "step": 93 }, { "epoch": 0.07072987208427389, "grad_norm": 13.788698196411133, "learning_rate": 5.6793684210526306e-05, "loss": 5.4732, "step": 94 }, { "epoch": 0.07148231753197894, "grad_norm": 10.369476318359375, "learning_rate": 5.6257894736842105e-05, "loss": 4.9698, "step": 95 }, { "epoch": 0.07223476297968397, "grad_norm": 11.039383888244629, "learning_rate": 5.57221052631579e-05, "loss": 5.0991, "step": 96 }, { "epoch": 0.07298720842738901, "grad_norm": 18.217975616455078, "learning_rate": 5.518631578947368e-05, "loss": 5.7764, "step": 97 }, { "epoch": 0.07373965387509406, "grad_norm": 13.361612319946289, "learning_rate": 5.4650526315789474e-05, "loss": 4.8924, "step": 98 }, { "epoch": 0.0744920993227991, "grad_norm": 24.20296287536621, "learning_rate": 5.411473684210526e-05, "loss": 6.2003, "step": 99 }, { "epoch": 0.07524454477050414, "grad_norm": 16.34416389465332, "learning_rate": 5.3578947368421044e-05, "loss": 5.3467, "step": 100 }, { "epoch": 0.07524454477050414, "eval_loss": 2.492374897003174, "eval_runtime": 37.6751, "eval_samples_per_second": 14.864, "eval_steps_per_second": 3.716, "step": 100 }, { "epoch": 0.07599699021820917, "grad_norm": 4.632258415222168, "learning_rate": 5.3043157894736836e-05, "loss": 4.8204, "step": 101 }, { "epoch": 0.07674943566591422, "grad_norm": 5.53971004486084, "learning_rate": 5.2507368421052635e-05, "loss": 5.2553, "step": 102 }, { "epoch": 0.07750188111361926, "grad_norm": 3.8668930530548096, "learning_rate": 5.197157894736842e-05, "loss": 4.3132, "step": 103 }, { "epoch": 0.0782543265613243, "grad_norm": 5.11984920501709, "learning_rate": 5.143578947368421e-05, "loss": 4.5635, "step": 104 }, { "epoch": 0.07900677200902935, "grad_norm": 4.6450347900390625, "learning_rate": 5.09e-05, "loss": 3.4141, "step": 105 }, { "epoch": 0.0797592174567344, "grad_norm": 5.824936389923096, "learning_rate": 5.036421052631578e-05, "loss": 4.7563, "step": 106 }, { "epoch": 0.08051166290443942, "grad_norm": 4.639711380004883, "learning_rate": 4.982842105263158e-05, "loss": 4.3026, "step": 107 }, { "epoch": 0.08126410835214447, "grad_norm": 8.702680587768555, "learning_rate": 4.9292631578947366e-05, "loss": 5.7537, "step": 108 }, { "epoch": 0.08201655379984951, "grad_norm": 5.901224613189697, "learning_rate": 4.875684210526315e-05, "loss": 5.0042, "step": 109 }, { "epoch": 0.08276899924755456, "grad_norm": 8.818628311157227, "learning_rate": 4.822105263157894e-05, "loss": 5.3414, "step": 110 }, { "epoch": 0.0835214446952596, "grad_norm": 7.103747367858887, "learning_rate": 4.7685263157894735e-05, "loss": 5.2836, "step": 111 }, { "epoch": 0.08427389014296463, "grad_norm": 6.621494770050049, "learning_rate": 4.714947368421052e-05, "loss": 4.9538, "step": 112 }, { "epoch": 0.08502633559066967, "grad_norm": 8.953717231750488, "learning_rate": 4.661368421052631e-05, "loss": 5.1009, "step": 113 }, { "epoch": 0.08577878103837472, "grad_norm": 8.522113800048828, "learning_rate": 4.6077894736842104e-05, "loss": 5.0785, "step": 114 }, { "epoch": 0.08653122648607976, "grad_norm": 6.302427291870117, "learning_rate": 4.554210526315789e-05, "loss": 4.6044, "step": 115 }, { "epoch": 0.0872836719337848, "grad_norm": 8.132070541381836, "learning_rate": 4.500631578947368e-05, "loss": 4.9639, "step": 116 }, { "epoch": 0.08803611738148984, "grad_norm": 7.749171733856201, "learning_rate": 4.447052631578947e-05, "loss": 5.2865, "step": 117 }, { "epoch": 0.08878856282919488, "grad_norm": 5.969038963317871, "learning_rate": 4.393473684210526e-05, "loss": 4.7642, "step": 118 }, { "epoch": 0.08954100827689992, "grad_norm": 6.512506484985352, "learning_rate": 4.339894736842105e-05, "loss": 2.9121, "step": 119 }, { "epoch": 0.09029345372460497, "grad_norm": 6.616455554962158, "learning_rate": 4.2863157894736835e-05, "loss": 5.1259, "step": 120 }, { "epoch": 0.09104589917231001, "grad_norm": 8.678909301757812, "learning_rate": 4.2327368421052634e-05, "loss": 5.4049, "step": 121 }, { "epoch": 0.09179834462001504, "grad_norm": 7.853146553039551, "learning_rate": 4.179157894736842e-05, "loss": 5.1167, "step": 122 }, { "epoch": 0.09255079006772009, "grad_norm": 6.326202392578125, "learning_rate": 4.1255789473684204e-05, "loss": 5.1124, "step": 123 }, { "epoch": 0.09330323551542513, "grad_norm": 6.513983726501465, "learning_rate": 4.072e-05, "loss": 4.8156, "step": 124 }, { "epoch": 0.09405568096313018, "grad_norm": 7.685911178588867, "learning_rate": 4.018421052631579e-05, "loss": 4.3464, "step": 125 }, { "epoch": 0.09480812641083522, "grad_norm": 8.669236183166504, "learning_rate": 3.9648421052631573e-05, "loss": 4.7814, "step": 126 }, { "epoch": 0.09556057185854025, "grad_norm": 7.881282806396484, "learning_rate": 3.9112631578947365e-05, "loss": 5.0522, "step": 127 }, { "epoch": 0.0963130173062453, "grad_norm": 6.99576473236084, "learning_rate": 3.857684210526316e-05, "loss": 4.4357, "step": 128 }, { "epoch": 0.09706546275395034, "grad_norm": 8.76285171508789, "learning_rate": 3.804105263157894e-05, "loss": 5.5137, "step": 129 }, { "epoch": 0.09781790820165538, "grad_norm": 7.629359245300293, "learning_rate": 3.7505263157894734e-05, "loss": 4.3788, "step": 130 }, { "epoch": 0.09857035364936043, "grad_norm": 9.962780952453613, "learning_rate": 3.6969473684210526e-05, "loss": 4.5262, "step": 131 }, { "epoch": 0.09932279909706546, "grad_norm": 7.68848180770874, "learning_rate": 3.643368421052631e-05, "loss": 4.803, "step": 132 }, { "epoch": 0.1000752445447705, "grad_norm": 12.457582473754883, "learning_rate": 3.5897894736842103e-05, "loss": 5.286, "step": 133 }, { "epoch": 0.10082768999247554, "grad_norm": 10.64576244354248, "learning_rate": 3.5362105263157895e-05, "loss": 5.5489, "step": 134 }, { "epoch": 0.10158013544018059, "grad_norm": 8.363515853881836, "learning_rate": 3.482631578947368e-05, "loss": 4.724, "step": 135 }, { "epoch": 0.10233258088788563, "grad_norm": 9.08327579498291, "learning_rate": 3.429052631578947e-05, "loss": 4.5964, "step": 136 }, { "epoch": 0.10308502633559068, "grad_norm": 10.975964546203613, "learning_rate": 3.375473684210526e-05, "loss": 4.7091, "step": 137 }, { "epoch": 0.1038374717832957, "grad_norm": 8.273202896118164, "learning_rate": 3.321894736842105e-05, "loss": 3.8967, "step": 138 }, { "epoch": 0.10458991723100075, "grad_norm": 10.234407424926758, "learning_rate": 3.268315789473684e-05, "loss": 5.5976, "step": 139 }, { "epoch": 0.1053423626787058, "grad_norm": 8.687202453613281, "learning_rate": 3.2147368421052627e-05, "loss": 4.7047, "step": 140 }, { "epoch": 0.10609480812641084, "grad_norm": 9.25235652923584, "learning_rate": 3.161157894736842e-05, "loss": 4.7048, "step": 141 }, { "epoch": 0.10684725357411588, "grad_norm": 10.904390335083008, "learning_rate": 3.107578947368421e-05, "loss": 5.0487, "step": 142 }, { "epoch": 0.10759969902182091, "grad_norm": 12.776407241821289, "learning_rate": 3.0539999999999996e-05, "loss": 5.6051, "step": 143 }, { "epoch": 0.10835214446952596, "grad_norm": 10.124897003173828, "learning_rate": 3.0004210526315784e-05, "loss": 5.6051, "step": 144 }, { "epoch": 0.109104589917231, "grad_norm": 10.322992324829102, "learning_rate": 2.946842105263158e-05, "loss": 4.5447, "step": 145 }, { "epoch": 0.10985703536493605, "grad_norm": 17.68702507019043, "learning_rate": 2.8932631578947368e-05, "loss": 5.1524, "step": 146 }, { "epoch": 0.11060948081264109, "grad_norm": 14.997350692749023, "learning_rate": 2.8396842105263153e-05, "loss": 5.7192, "step": 147 }, { "epoch": 0.11136192626034612, "grad_norm": 12.031723022460938, "learning_rate": 2.786105263157895e-05, "loss": 5.2456, "step": 148 }, { "epoch": 0.11211437170805116, "grad_norm": 17.6466007232666, "learning_rate": 2.7325263157894737e-05, "loss": 6.2655, "step": 149 }, { "epoch": 0.11286681715575621, "grad_norm": 18.988000869750977, "learning_rate": 2.6789473684210522e-05, "loss": 7.0167, "step": 150 }, { "epoch": 0.11286681715575621, "eval_loss": 2.4503767490386963, "eval_runtime": 37.6662, "eval_samples_per_second": 14.867, "eval_steps_per_second": 3.717, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.437792434153062e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }