|
{ |
|
"best_metric": 2.4503767490386963, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 0.11286681715575621, |
|
"eval_steps": 50, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007524454477050414, |
|
"grad_norm": 4.9338603019714355, |
|
"learning_rate": 1.018e-05, |
|
"loss": 4.511, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0007524454477050414, |
|
"eval_loss": 2.8468708992004395, |
|
"eval_runtime": 37.6411, |
|
"eval_samples_per_second": 14.877, |
|
"eval_steps_per_second": 3.719, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0015048908954100827, |
|
"grad_norm": 5.828423500061035, |
|
"learning_rate": 2.036e-05, |
|
"loss": 3.8614, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002257336343115124, |
|
"grad_norm": 7.995044708251953, |
|
"learning_rate": 3.0539999999999996e-05, |
|
"loss": 4.6689, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0030097817908201654, |
|
"grad_norm": 6.929337024688721, |
|
"learning_rate": 4.072e-05, |
|
"loss": 4.2038, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003762227238525207, |
|
"grad_norm": 6.514157772064209, |
|
"learning_rate": 5.09e-05, |
|
"loss": 4.6263, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004514672686230248, |
|
"grad_norm": 6.524606227874756, |
|
"learning_rate": 6.107999999999999e-05, |
|
"loss": 4.8234, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005267118133935289, |
|
"grad_norm": 9.753814697265625, |
|
"learning_rate": 7.125999999999999e-05, |
|
"loss": 4.544, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006019563581640331, |
|
"grad_norm": 6.565380096435547, |
|
"learning_rate": 8.144e-05, |
|
"loss": 4.2002, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006772009029345372, |
|
"grad_norm": 7.157611846923828, |
|
"learning_rate": 9.162e-05, |
|
"loss": 3.7329, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.007524454477050414, |
|
"grad_norm": 8.59189510345459, |
|
"learning_rate": 0.0001018, |
|
"loss": 5.0335, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008276899924755455, |
|
"grad_norm": 10.278609275817871, |
|
"learning_rate": 0.00010126421052631578, |
|
"loss": 5.018, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.009029345372460496, |
|
"grad_norm": 11.457633018493652, |
|
"learning_rate": 0.00010072842105263156, |
|
"loss": 5.185, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.009781790820165538, |
|
"grad_norm": 8.473273277282715, |
|
"learning_rate": 0.00010019263157894736, |
|
"loss": 4.661, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.010534236267870579, |
|
"grad_norm": 9.23606014251709, |
|
"learning_rate": 9.965684210526316e-05, |
|
"loss": 5.1647, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.011286681715575621, |
|
"grad_norm": 10.785460472106934, |
|
"learning_rate": 9.912105263157895e-05, |
|
"loss": 5.2782, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.012039127163280662, |
|
"grad_norm": 9.376898765563965, |
|
"learning_rate": 9.858526315789473e-05, |
|
"loss": 4.4393, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.012791572610985704, |
|
"grad_norm": 10.466320037841797, |
|
"learning_rate": 9.804947368421052e-05, |
|
"loss": 5.3007, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.013544018058690745, |
|
"grad_norm": 8.497576713562012, |
|
"learning_rate": 9.75136842105263e-05, |
|
"loss": 4.941, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.014296463506395787, |
|
"grad_norm": 9.376507759094238, |
|
"learning_rate": 9.69778947368421e-05, |
|
"loss": 5.1504, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.015048908954100828, |
|
"grad_norm": 9.785571098327637, |
|
"learning_rate": 9.644210526315789e-05, |
|
"loss": 4.6566, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01580135440180587, |
|
"grad_norm": 16.524185180664062, |
|
"learning_rate": 9.590631578947369e-05, |
|
"loss": 5.2649, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01655379984951091, |
|
"grad_norm": 9.190619468688965, |
|
"learning_rate": 9.537052631578947e-05, |
|
"loss": 4.9649, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01730624529721595, |
|
"grad_norm": 15.659117698669434, |
|
"learning_rate": 9.483473684210526e-05, |
|
"loss": 6.6098, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01805869074492099, |
|
"grad_norm": 9.867484092712402, |
|
"learning_rate": 9.429894736842104e-05, |
|
"loss": 5.5985, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.018811136192626036, |
|
"grad_norm": 8.590432167053223, |
|
"learning_rate": 9.376315789473684e-05, |
|
"loss": 4.1838, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.019563581640331076, |
|
"grad_norm": 8.883073806762695, |
|
"learning_rate": 9.322736842105262e-05, |
|
"loss": 4.8669, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.020316027088036117, |
|
"grad_norm": 8.349895477294922, |
|
"learning_rate": 9.269157894736842e-05, |
|
"loss": 5.1803, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.021068472535741158, |
|
"grad_norm": 11.773541450500488, |
|
"learning_rate": 9.215578947368421e-05, |
|
"loss": 5.0873, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0218209179834462, |
|
"grad_norm": 13.537124633789062, |
|
"learning_rate": 9.162e-05, |
|
"loss": 4.6728, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.022573363431151242, |
|
"grad_norm": 12.410065650939941, |
|
"learning_rate": 9.108421052631578e-05, |
|
"loss": 5.1454, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.023325808878856283, |
|
"grad_norm": 11.18578052520752, |
|
"learning_rate": 9.054842105263158e-05, |
|
"loss": 4.2279, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.024078254326561323, |
|
"grad_norm": 38.089595794677734, |
|
"learning_rate": 9.001263157894736e-05, |
|
"loss": 5.0972, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.024830699774266364, |
|
"grad_norm": 20.254100799560547, |
|
"learning_rate": 8.947684210526315e-05, |
|
"loss": 4.9312, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.025583145221971408, |
|
"grad_norm": 32.83479309082031, |
|
"learning_rate": 8.894105263157895e-05, |
|
"loss": 5.4251, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02633559066967645, |
|
"grad_norm": 14.390934944152832, |
|
"learning_rate": 8.840526315789473e-05, |
|
"loss": 5.1836, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02708803611738149, |
|
"grad_norm": 17.68592071533203, |
|
"learning_rate": 8.786947368421052e-05, |
|
"loss": 5.4996, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02784048156508653, |
|
"grad_norm": 15.407584190368652, |
|
"learning_rate": 8.733368421052632e-05, |
|
"loss": 5.5683, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.028592927012791574, |
|
"grad_norm": 13.758222579956055, |
|
"learning_rate": 8.67978947368421e-05, |
|
"loss": 5.6531, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.029345372460496615, |
|
"grad_norm": 15.094158172607422, |
|
"learning_rate": 8.626210526315789e-05, |
|
"loss": 4.6223, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.030097817908201655, |
|
"grad_norm": 14.733675003051758, |
|
"learning_rate": 8.572631578947367e-05, |
|
"loss": 5.6096, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.030850263355906696, |
|
"grad_norm": 11.245537757873535, |
|
"learning_rate": 8.519052631578947e-05, |
|
"loss": 4.9552, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03160270880361174, |
|
"grad_norm": 15.303187370300293, |
|
"learning_rate": 8.465473684210527e-05, |
|
"loss": 6.245, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03235515425131678, |
|
"grad_norm": 12.705514907836914, |
|
"learning_rate": 8.411894736842105e-05, |
|
"loss": 5.5335, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03310759969902182, |
|
"grad_norm": 12.836231231689453, |
|
"learning_rate": 8.358315789473684e-05, |
|
"loss": 5.8837, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.033860045146726865, |
|
"grad_norm": 12.260278701782227, |
|
"learning_rate": 8.304736842105262e-05, |
|
"loss": 4.22, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0346124905944319, |
|
"grad_norm": 15.98351764678955, |
|
"learning_rate": 8.251157894736841e-05, |
|
"loss": 4.8508, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.035364936042136946, |
|
"grad_norm": 18.42877197265625, |
|
"learning_rate": 8.197578947368421e-05, |
|
"loss": 5.7646, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03611738148984198, |
|
"grad_norm": 15.042816162109375, |
|
"learning_rate": 8.144e-05, |
|
"loss": 6.2102, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03686982693754703, |
|
"grad_norm": 20.197011947631836, |
|
"learning_rate": 8.090421052631579e-05, |
|
"loss": 7.2784, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03762227238525207, |
|
"grad_norm": 22.858545303344727, |
|
"learning_rate": 8.036842105263158e-05, |
|
"loss": 6.3548, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03762227238525207, |
|
"eval_loss": 2.573653221130371, |
|
"eval_runtime": 37.6194, |
|
"eval_samples_per_second": 14.886, |
|
"eval_steps_per_second": 3.721, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03837471783295711, |
|
"grad_norm": 6.201414108276367, |
|
"learning_rate": 7.983263157894736e-05, |
|
"loss": 4.6951, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03912716328066215, |
|
"grad_norm": 4.830835342407227, |
|
"learning_rate": 7.929684210526315e-05, |
|
"loss": 5.02, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0398796087283672, |
|
"grad_norm": 7.732100009918213, |
|
"learning_rate": 7.876105263157895e-05, |
|
"loss": 4.9632, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.040632054176072234, |
|
"grad_norm": 7.422882556915283, |
|
"learning_rate": 7.822526315789473e-05, |
|
"loss": 4.2894, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04138449962377728, |
|
"grad_norm": 6.019810676574707, |
|
"learning_rate": 7.768947368421053e-05, |
|
"loss": 4.5985, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.042136945071482315, |
|
"grad_norm": 7.448675632476807, |
|
"learning_rate": 7.715368421052631e-05, |
|
"loss": 5.6458, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04288939051918736, |
|
"grad_norm": 6.525016784667969, |
|
"learning_rate": 7.66178947368421e-05, |
|
"loss": 4.4928, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0436418359668924, |
|
"grad_norm": 5.862019062042236, |
|
"learning_rate": 7.608210526315788e-05, |
|
"loss": 5.0442, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04439428141459744, |
|
"grad_norm": 6.698094844818115, |
|
"learning_rate": 7.554631578947368e-05, |
|
"loss": 5.1862, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.045146726862302484, |
|
"grad_norm": 5.901148796081543, |
|
"learning_rate": 7.501052631578947e-05, |
|
"loss": 4.6401, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04589917231000752, |
|
"grad_norm": 8.506747245788574, |
|
"learning_rate": 7.447473684210527e-05, |
|
"loss": 5.6167, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.046651617757712566, |
|
"grad_norm": 8.143284797668457, |
|
"learning_rate": 7.393894736842105e-05, |
|
"loss": 4.5655, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04740406320541761, |
|
"grad_norm": 5.302389621734619, |
|
"learning_rate": 7.340315789473684e-05, |
|
"loss": 4.4897, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.04815650865312265, |
|
"grad_norm": 5.529751300811768, |
|
"learning_rate": 7.286736842105262e-05, |
|
"loss": 4.2722, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.04890895410082769, |
|
"grad_norm": 8.762489318847656, |
|
"learning_rate": 7.233157894736842e-05, |
|
"loss": 5.1436, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04966139954853273, |
|
"grad_norm": 11.295607566833496, |
|
"learning_rate": 7.179578947368421e-05, |
|
"loss": 4.9496, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05041384499623777, |
|
"grad_norm": 9.971809387207031, |
|
"learning_rate": 7.125999999999999e-05, |
|
"loss": 5.4893, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.051166290443942816, |
|
"grad_norm": 7.344180583953857, |
|
"learning_rate": 7.072421052631579e-05, |
|
"loss": 4.6742, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05191873589164785, |
|
"grad_norm": 9.844782829284668, |
|
"learning_rate": 7.018842105263158e-05, |
|
"loss": 5.1691, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0526711813393529, |
|
"grad_norm": 8.95771598815918, |
|
"learning_rate": 6.965263157894736e-05, |
|
"loss": 4.879, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05342362678705794, |
|
"grad_norm": 8.04973030090332, |
|
"learning_rate": 6.911684210526316e-05, |
|
"loss": 5.0598, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05417607223476298, |
|
"grad_norm": 8.579294204711914, |
|
"learning_rate": 6.858105263157894e-05, |
|
"loss": 4.8055, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05492851768246802, |
|
"grad_norm": 7.308038234710693, |
|
"learning_rate": 6.804526315789473e-05, |
|
"loss": 4.7293, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05568096313017306, |
|
"grad_norm": 7.7795233726501465, |
|
"learning_rate": 6.750947368421052e-05, |
|
"loss": 5.0379, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.056433408577878104, |
|
"grad_norm": 8.343374252319336, |
|
"learning_rate": 6.697368421052631e-05, |
|
"loss": 4.4904, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05718585402558315, |
|
"grad_norm": 7.44524621963501, |
|
"learning_rate": 6.64378947368421e-05, |
|
"loss": 4.7685, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.057938299473288185, |
|
"grad_norm": 10.718270301818848, |
|
"learning_rate": 6.59021052631579e-05, |
|
"loss": 5.2094, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.05869074492099323, |
|
"grad_norm": 9.407214164733887, |
|
"learning_rate": 6.536631578947368e-05, |
|
"loss": 5.0866, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.059443190368698266, |
|
"grad_norm": 8.373135566711426, |
|
"learning_rate": 6.483052631578947e-05, |
|
"loss": 4.8998, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06019563581640331, |
|
"grad_norm": 8.654524803161621, |
|
"learning_rate": 6.429473684210525e-05, |
|
"loss": 4.6364, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.060948081264108354, |
|
"grad_norm": 9.621380805969238, |
|
"learning_rate": 6.375894736842104e-05, |
|
"loss": 5.1948, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06170052671181339, |
|
"grad_norm": 13.150786399841309, |
|
"learning_rate": 6.322315789473684e-05, |
|
"loss": 5.4827, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.062452972159518436, |
|
"grad_norm": 10.533498764038086, |
|
"learning_rate": 6.268736842105264e-05, |
|
"loss": 4.9195, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06320541760722348, |
|
"grad_norm": 10.708104133605957, |
|
"learning_rate": 6.215157894736842e-05, |
|
"loss": 4.8833, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06395786305492852, |
|
"grad_norm": 7.892517566680908, |
|
"learning_rate": 6.16157894736842e-05, |
|
"loss": 5.0272, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06471030850263355, |
|
"grad_norm": 13.007377624511719, |
|
"learning_rate": 6.107999999999999e-05, |
|
"loss": 4.8137, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0654627539503386, |
|
"grad_norm": 11.001848220825195, |
|
"learning_rate": 6.054421052631578e-05, |
|
"loss": 5.8105, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06621519939804364, |
|
"grad_norm": 13.360245704650879, |
|
"learning_rate": 6.000842105263157e-05, |
|
"loss": 5.2308, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06696764484574869, |
|
"grad_norm": 8.78776741027832, |
|
"learning_rate": 5.947263157894737e-05, |
|
"loss": 4.3293, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.06772009029345373, |
|
"grad_norm": 11.788161277770996, |
|
"learning_rate": 5.893684210526316e-05, |
|
"loss": 5.5094, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06847253574115876, |
|
"grad_norm": 13.817206382751465, |
|
"learning_rate": 5.8401052631578944e-05, |
|
"loss": 5.6535, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0692249811888638, |
|
"grad_norm": 10.35663890838623, |
|
"learning_rate": 5.7865263157894736e-05, |
|
"loss": 4.9656, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.06997742663656885, |
|
"grad_norm": 12.754554748535156, |
|
"learning_rate": 5.732947368421052e-05, |
|
"loss": 6.037, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07072987208427389, |
|
"grad_norm": 13.788698196411133, |
|
"learning_rate": 5.6793684210526306e-05, |
|
"loss": 5.4732, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07148231753197894, |
|
"grad_norm": 10.369476318359375, |
|
"learning_rate": 5.6257894736842105e-05, |
|
"loss": 4.9698, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07223476297968397, |
|
"grad_norm": 11.039383888244629, |
|
"learning_rate": 5.57221052631579e-05, |
|
"loss": 5.0991, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07298720842738901, |
|
"grad_norm": 18.217975616455078, |
|
"learning_rate": 5.518631578947368e-05, |
|
"loss": 5.7764, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07373965387509406, |
|
"grad_norm": 13.361612319946289, |
|
"learning_rate": 5.4650526315789474e-05, |
|
"loss": 4.8924, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0744920993227991, |
|
"grad_norm": 24.20296287536621, |
|
"learning_rate": 5.411473684210526e-05, |
|
"loss": 6.2003, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07524454477050414, |
|
"grad_norm": 16.34416389465332, |
|
"learning_rate": 5.3578947368421044e-05, |
|
"loss": 5.3467, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07524454477050414, |
|
"eval_loss": 2.492374897003174, |
|
"eval_runtime": 37.6751, |
|
"eval_samples_per_second": 14.864, |
|
"eval_steps_per_second": 3.716, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07599699021820917, |
|
"grad_norm": 4.632258415222168, |
|
"learning_rate": 5.3043157894736836e-05, |
|
"loss": 4.8204, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.07674943566591422, |
|
"grad_norm": 5.53971004486084, |
|
"learning_rate": 5.2507368421052635e-05, |
|
"loss": 5.2553, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07750188111361926, |
|
"grad_norm": 3.8668930530548096, |
|
"learning_rate": 5.197157894736842e-05, |
|
"loss": 4.3132, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0782543265613243, |
|
"grad_norm": 5.11984920501709, |
|
"learning_rate": 5.143578947368421e-05, |
|
"loss": 4.5635, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07900677200902935, |
|
"grad_norm": 4.6450347900390625, |
|
"learning_rate": 5.09e-05, |
|
"loss": 3.4141, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0797592174567344, |
|
"grad_norm": 5.824936389923096, |
|
"learning_rate": 5.036421052631578e-05, |
|
"loss": 4.7563, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08051166290443942, |
|
"grad_norm": 4.639711380004883, |
|
"learning_rate": 4.982842105263158e-05, |
|
"loss": 4.3026, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08126410835214447, |
|
"grad_norm": 8.702680587768555, |
|
"learning_rate": 4.9292631578947366e-05, |
|
"loss": 5.7537, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08201655379984951, |
|
"grad_norm": 5.901224613189697, |
|
"learning_rate": 4.875684210526315e-05, |
|
"loss": 5.0042, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.08276899924755456, |
|
"grad_norm": 8.818628311157227, |
|
"learning_rate": 4.822105263157894e-05, |
|
"loss": 5.3414, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0835214446952596, |
|
"grad_norm": 7.103747367858887, |
|
"learning_rate": 4.7685263157894735e-05, |
|
"loss": 5.2836, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08427389014296463, |
|
"grad_norm": 6.621494770050049, |
|
"learning_rate": 4.714947368421052e-05, |
|
"loss": 4.9538, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.08502633559066967, |
|
"grad_norm": 8.953717231750488, |
|
"learning_rate": 4.661368421052631e-05, |
|
"loss": 5.1009, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.08577878103837472, |
|
"grad_norm": 8.522113800048828, |
|
"learning_rate": 4.6077894736842104e-05, |
|
"loss": 5.0785, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08653122648607976, |
|
"grad_norm": 6.302427291870117, |
|
"learning_rate": 4.554210526315789e-05, |
|
"loss": 4.6044, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0872836719337848, |
|
"grad_norm": 8.132070541381836, |
|
"learning_rate": 4.500631578947368e-05, |
|
"loss": 4.9639, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08803611738148984, |
|
"grad_norm": 7.749171733856201, |
|
"learning_rate": 4.447052631578947e-05, |
|
"loss": 5.2865, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08878856282919488, |
|
"grad_norm": 5.969038963317871, |
|
"learning_rate": 4.393473684210526e-05, |
|
"loss": 4.7642, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08954100827689992, |
|
"grad_norm": 6.512506484985352, |
|
"learning_rate": 4.339894736842105e-05, |
|
"loss": 2.9121, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.09029345372460497, |
|
"grad_norm": 6.616455554962158, |
|
"learning_rate": 4.2863157894736835e-05, |
|
"loss": 5.1259, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09104589917231001, |
|
"grad_norm": 8.678909301757812, |
|
"learning_rate": 4.2327368421052634e-05, |
|
"loss": 5.4049, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.09179834462001504, |
|
"grad_norm": 7.853146553039551, |
|
"learning_rate": 4.179157894736842e-05, |
|
"loss": 5.1167, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09255079006772009, |
|
"grad_norm": 6.326202392578125, |
|
"learning_rate": 4.1255789473684204e-05, |
|
"loss": 5.1124, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.09330323551542513, |
|
"grad_norm": 6.513983726501465, |
|
"learning_rate": 4.072e-05, |
|
"loss": 4.8156, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09405568096313018, |
|
"grad_norm": 7.685911178588867, |
|
"learning_rate": 4.018421052631579e-05, |
|
"loss": 4.3464, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09480812641083522, |
|
"grad_norm": 8.669236183166504, |
|
"learning_rate": 3.9648421052631573e-05, |
|
"loss": 4.7814, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.09556057185854025, |
|
"grad_norm": 7.881282806396484, |
|
"learning_rate": 3.9112631578947365e-05, |
|
"loss": 5.0522, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0963130173062453, |
|
"grad_norm": 6.99576473236084, |
|
"learning_rate": 3.857684210526316e-05, |
|
"loss": 4.4357, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.09706546275395034, |
|
"grad_norm": 8.76285171508789, |
|
"learning_rate": 3.804105263157894e-05, |
|
"loss": 5.5137, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.09781790820165538, |
|
"grad_norm": 7.629359245300293, |
|
"learning_rate": 3.7505263157894734e-05, |
|
"loss": 4.3788, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09857035364936043, |
|
"grad_norm": 9.962780952453613, |
|
"learning_rate": 3.6969473684210526e-05, |
|
"loss": 4.5262, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.09932279909706546, |
|
"grad_norm": 7.68848180770874, |
|
"learning_rate": 3.643368421052631e-05, |
|
"loss": 4.803, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.1000752445447705, |
|
"grad_norm": 12.457582473754883, |
|
"learning_rate": 3.5897894736842103e-05, |
|
"loss": 5.286, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.10082768999247554, |
|
"grad_norm": 10.64576244354248, |
|
"learning_rate": 3.5362105263157895e-05, |
|
"loss": 5.5489, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.10158013544018059, |
|
"grad_norm": 8.363515853881836, |
|
"learning_rate": 3.482631578947368e-05, |
|
"loss": 4.724, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.10233258088788563, |
|
"grad_norm": 9.08327579498291, |
|
"learning_rate": 3.429052631578947e-05, |
|
"loss": 4.5964, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.10308502633559068, |
|
"grad_norm": 10.975964546203613, |
|
"learning_rate": 3.375473684210526e-05, |
|
"loss": 4.7091, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1038374717832957, |
|
"grad_norm": 8.273202896118164, |
|
"learning_rate": 3.321894736842105e-05, |
|
"loss": 3.8967, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.10458991723100075, |
|
"grad_norm": 10.234407424926758, |
|
"learning_rate": 3.268315789473684e-05, |
|
"loss": 5.5976, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1053423626787058, |
|
"grad_norm": 8.687202453613281, |
|
"learning_rate": 3.2147368421052627e-05, |
|
"loss": 4.7047, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10609480812641084, |
|
"grad_norm": 9.25235652923584, |
|
"learning_rate": 3.161157894736842e-05, |
|
"loss": 4.7048, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.10684725357411588, |
|
"grad_norm": 10.904390335083008, |
|
"learning_rate": 3.107578947368421e-05, |
|
"loss": 5.0487, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.10759969902182091, |
|
"grad_norm": 12.776407241821289, |
|
"learning_rate": 3.0539999999999996e-05, |
|
"loss": 5.6051, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.10835214446952596, |
|
"grad_norm": 10.124897003173828, |
|
"learning_rate": 3.0004210526315784e-05, |
|
"loss": 5.6051, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.109104589917231, |
|
"grad_norm": 10.322992324829102, |
|
"learning_rate": 2.946842105263158e-05, |
|
"loss": 4.5447, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.10985703536493605, |
|
"grad_norm": 17.68702507019043, |
|
"learning_rate": 2.8932631578947368e-05, |
|
"loss": 5.1524, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.11060948081264109, |
|
"grad_norm": 14.997350692749023, |
|
"learning_rate": 2.8396842105263153e-05, |
|
"loss": 5.7192, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.11136192626034612, |
|
"grad_norm": 12.031723022460938, |
|
"learning_rate": 2.786105263157895e-05, |
|
"loss": 5.2456, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.11211437170805116, |
|
"grad_norm": 17.6466007232666, |
|
"learning_rate": 2.7325263157894737e-05, |
|
"loss": 6.2655, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.11286681715575621, |
|
"grad_norm": 18.988000869750977, |
|
"learning_rate": 2.6789473684210522e-05, |
|
"loss": 7.0167, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11286681715575621, |
|
"eval_loss": 2.4503767490386963, |
|
"eval_runtime": 37.6662, |
|
"eval_samples_per_second": 14.867, |
|
"eval_steps_per_second": 3.717, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.437792434153062e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|