|
{ |
|
"best_metric": 0.9860224723815918, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 3.0127659574468084, |
|
"eval_steps": 50, |
|
"global_step": 177, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01702127659574468, |
|
"grad_norm": 2.3956034183502197, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5021, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01702127659574468, |
|
"eval_loss": 1.6579022407531738, |
|
"eval_runtime": 4.4292, |
|
"eval_samples_per_second": 22.352, |
|
"eval_steps_per_second": 5.644, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03404255319148936, |
|
"grad_norm": 2.8143370151519775, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4469, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.05106382978723404, |
|
"grad_norm": 2.780684232711792, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5057, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06808510638297872, |
|
"grad_norm": 2.466012716293335, |
|
"learning_rate": 4e-05, |
|
"loss": 1.5455, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0851063829787234, |
|
"grad_norm": 2.0236899852752686, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3597, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.10212765957446808, |
|
"grad_norm": 1.7500122785568237, |
|
"learning_rate": 6e-05, |
|
"loss": 1.3609, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.11914893617021277, |
|
"grad_norm": 2.0319266319274902, |
|
"learning_rate": 7e-05, |
|
"loss": 1.325, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.13617021276595745, |
|
"grad_norm": 1.9214025735855103, |
|
"learning_rate": 8e-05, |
|
"loss": 1.2035, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.15319148936170213, |
|
"grad_norm": 1.8180416822433472, |
|
"learning_rate": 9e-05, |
|
"loss": 1.1698, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 1.722793459892273, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0561, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.18723404255319148, |
|
"grad_norm": 1.8683631420135498, |
|
"learning_rate": 9.999115304121457e-05, |
|
"loss": 1.1662, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.20425531914893616, |
|
"grad_norm": 2.0223588943481445, |
|
"learning_rate": 9.996461529560553e-05, |
|
"loss": 1.1261, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.22127659574468084, |
|
"grad_norm": 1.9186768531799316, |
|
"learning_rate": 9.992039615430648e-05, |
|
"loss": 0.9619, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.23829787234042554, |
|
"grad_norm": 2.4184582233428955, |
|
"learning_rate": 9.985851126551428e-05, |
|
"loss": 1.1466, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"grad_norm": 1.746878743171692, |
|
"learning_rate": 9.977898252895134e-05, |
|
"loss": 1.0855, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2723404255319149, |
|
"grad_norm": 1.4888535737991333, |
|
"learning_rate": 9.968183808811586e-05, |
|
"loss": 1.0404, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.28936170212765955, |
|
"grad_norm": 1.3668509721755981, |
|
"learning_rate": 9.95671123203224e-05, |
|
"loss": 1.1569, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.30638297872340425, |
|
"grad_norm": 1.2972848415374756, |
|
"learning_rate": 9.943484582453653e-05, |
|
"loss": 1.0638, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.32340425531914896, |
|
"grad_norm": 1.6263564825057983, |
|
"learning_rate": 9.928508540700774e-05, |
|
"loss": 1.1049, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 1.4598687887191772, |
|
"learning_rate": 9.911788406470569e-05, |
|
"loss": 1.0198, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3574468085106383, |
|
"grad_norm": 1.431780457496643, |
|
"learning_rate": 9.893330096656574e-05, |
|
"loss": 1.0425, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.37446808510638296, |
|
"grad_norm": 1.275739073753357, |
|
"learning_rate": 9.873140143255036e-05, |
|
"loss": 1.0508, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.39148936170212767, |
|
"grad_norm": 1.3290923833847046, |
|
"learning_rate": 9.85122569105338e-05, |
|
"loss": 1.0344, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.4085106382978723, |
|
"grad_norm": 1.385582685470581, |
|
"learning_rate": 9.827594495101823e-05, |
|
"loss": 0.9878, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"grad_norm": 1.3875707387924194, |
|
"learning_rate": 9.802254917969032e-05, |
|
"loss": 1.0025, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4425531914893617, |
|
"grad_norm": 1.3922779560089111, |
|
"learning_rate": 9.775215926782788e-05, |
|
"loss": 0.9845, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.4595744680851064, |
|
"grad_norm": 1.487823247909546, |
|
"learning_rate": 9.746487090056713e-05, |
|
"loss": 0.8108, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.4765957446808511, |
|
"grad_norm": 2.1065125465393066, |
|
"learning_rate": 9.716078574304189e-05, |
|
"loss": 1.0918, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.49361702127659574, |
|
"grad_norm": 1.2851176261901855, |
|
"learning_rate": 9.684001140440639e-05, |
|
"loss": 1.1534, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.5106382978723404, |
|
"grad_norm": 1.350062370300293, |
|
"learning_rate": 9.650266139975474e-05, |
|
"loss": 1.0002, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5276595744680851, |
|
"grad_norm": 1.263066291809082, |
|
"learning_rate": 9.614885510995047e-05, |
|
"loss": 1.0785, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5446808510638298, |
|
"grad_norm": 1.1791244745254517, |
|
"learning_rate": 9.577871773938011e-05, |
|
"loss": 1.0402, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5617021276595745, |
|
"grad_norm": 1.1864782571792603, |
|
"learning_rate": 9.539238027164619e-05, |
|
"loss": 1.091, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5787234042553191, |
|
"grad_norm": 1.2298381328582764, |
|
"learning_rate": 9.498997942321483e-05, |
|
"loss": 0.998, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5957446808510638, |
|
"grad_norm": 1.2237919569015503, |
|
"learning_rate": 9.457165759503493e-05, |
|
"loss": 0.9893, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.6127659574468085, |
|
"grad_norm": 1.3423452377319336, |
|
"learning_rate": 9.413756282214537e-05, |
|
"loss": 0.955, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.6297872340425532, |
|
"grad_norm": 1.3232065439224243, |
|
"learning_rate": 9.368784872128878e-05, |
|
"loss": 1.0207, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.6468085106382979, |
|
"grad_norm": 1.2950204610824585, |
|
"learning_rate": 9.322267443654972e-05, |
|
"loss": 0.8997, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6638297872340425, |
|
"grad_norm": 1.3665379285812378, |
|
"learning_rate": 9.274220458303727e-05, |
|
"loss": 0.9627, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 1.5041331052780151, |
|
"learning_rate": 9.224660918863104e-05, |
|
"loss": 0.9353, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6978723404255319, |
|
"grad_norm": 1.484763264656067, |
|
"learning_rate": 9.173606363381219e-05, |
|
"loss": 0.9178, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.7148936170212766, |
|
"grad_norm": 2.1842877864837646, |
|
"learning_rate": 9.121074858959997e-05, |
|
"loss": 0.912, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.7319148936170212, |
|
"grad_norm": 1.146817922592163, |
|
"learning_rate": 9.067084995361623e-05, |
|
"loss": 1.0537, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.7489361702127659, |
|
"grad_norm": 1.1362913846969604, |
|
"learning_rate": 9.011655878430019e-05, |
|
"loss": 0.9867, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.7659574468085106, |
|
"grad_norm": 1.1847161054611206, |
|
"learning_rate": 8.954807123329704e-05, |
|
"loss": 1.0172, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7829787234042553, |
|
"grad_norm": 1.1899924278259277, |
|
"learning_rate": 8.896558847604414e-05, |
|
"loss": 1.0202, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.22841477394104, |
|
"learning_rate": 8.836931664057935e-05, |
|
"loss": 1.0795, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.8170212765957446, |
|
"grad_norm": 1.1131497621536255, |
|
"learning_rate": 8.775946673459681e-05, |
|
"loss": 0.951, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.8340425531914893, |
|
"grad_norm": 1.2665643692016602, |
|
"learning_rate": 8.713625457077585e-05, |
|
"loss": 0.936, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 1.2944315671920776, |
|
"learning_rate": 8.649990069040961e-05, |
|
"loss": 1.098, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"eval_loss": 0.9860224723815918, |
|
"eval_runtime": 4.4613, |
|
"eval_samples_per_second": 22.191, |
|
"eval_steps_per_second": 5.604, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8680851063829788, |
|
"grad_norm": 1.3318464756011963, |
|
"learning_rate": 8.585063028536016e-05, |
|
"loss": 0.9964, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.8851063829787233, |
|
"grad_norm": 1.2892343997955322, |
|
"learning_rate": 8.518867311836808e-05, |
|
"loss": 0.8719, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.902127659574468, |
|
"grad_norm": 1.4797875881195068, |
|
"learning_rate": 8.451426344174433e-05, |
|
"loss": 1.0485, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.9191489361702128, |
|
"grad_norm": 1.351209044456482, |
|
"learning_rate": 8.382763991447344e-05, |
|
"loss": 0.8408, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.9361702127659575, |
|
"grad_norm": 1.4917362928390503, |
|
"learning_rate": 8.312904551775731e-05, |
|
"loss": 0.9081, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.9531914893617022, |
|
"grad_norm": 1.636767864227295, |
|
"learning_rate": 8.241872746902935e-05, |
|
"loss": 0.7751, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.9702127659574468, |
|
"grad_norm": 1.0549850463867188, |
|
"learning_rate": 8.169693713446959e-05, |
|
"loss": 0.9314, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.9872340425531915, |
|
"grad_norm": 1.174147367477417, |
|
"learning_rate": 8.096392994005177e-05, |
|
"loss": 0.839, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.004255319148936, |
|
"grad_norm": 1.7790963649749756, |
|
"learning_rate": 8.021996528115335e-05, |
|
"loss": 1.0854, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.0212765957446808, |
|
"grad_norm": 0.8404403924942017, |
|
"learning_rate": 7.946530643076138e-05, |
|
"loss": 0.791, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0382978723404255, |
|
"grad_norm": 0.9521711468696594, |
|
"learning_rate": 7.870022044630569e-05, |
|
"loss": 0.6483, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.0553191489361702, |
|
"grad_norm": 0.9962537288665771, |
|
"learning_rate": 7.792497807515317e-05, |
|
"loss": 0.7106, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.0723404255319149, |
|
"grad_norm": 0.9780325293540955, |
|
"learning_rate": 7.713985365879606e-05, |
|
"loss": 0.6272, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.0893617021276596, |
|
"grad_norm": 0.9337742328643799, |
|
"learning_rate": 7.63451250357685e-05, |
|
"loss": 0.6399, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.1063829787234043, |
|
"grad_norm": 0.9755467176437378, |
|
"learning_rate": 7.55410734433254e-05, |
|
"loss": 0.5694, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.123404255319149, |
|
"grad_norm": 1.0322693586349487, |
|
"learning_rate": 7.472798341791877e-05, |
|
"loss": 0.5253, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.1404255319148937, |
|
"grad_norm": 1.1534591913223267, |
|
"learning_rate": 7.390614269450634e-05, |
|
"loss": 0.6757, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.1574468085106382, |
|
"grad_norm": 1.1756784915924072, |
|
"learning_rate": 7.307584210472844e-05, |
|
"loss": 0.5744, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.174468085106383, |
|
"grad_norm": 1.227107048034668, |
|
"learning_rate": 7.223737547398898e-05, |
|
"loss": 0.471, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.1914893617021276, |
|
"grad_norm": 1.4421412944793701, |
|
"learning_rate": 7.139103951747695e-05, |
|
"loss": 0.5671, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.2085106382978723, |
|
"grad_norm": 1.5038673877716064, |
|
"learning_rate": 7.053713373516538e-05, |
|
"loss": 0.5429, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.225531914893617, |
|
"grad_norm": 1.6070609092712402, |
|
"learning_rate": 6.967596030582478e-05, |
|
"loss": 0.3801, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.2425531914893617, |
|
"grad_norm": 1.7779772281646729, |
|
"learning_rate": 6.880782398008862e-05, |
|
"loss": 0.6141, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.2595744680851064, |
|
"grad_norm": 1.2830954790115356, |
|
"learning_rate": 6.793303197260864e-05, |
|
"loss": 0.667, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.2765957446808511, |
|
"grad_norm": 1.3784350156784058, |
|
"learning_rate": 6.70518938533383e-05, |
|
"loss": 0.6144, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.2936170212765958, |
|
"grad_norm": 1.772112488746643, |
|
"learning_rate": 6.616472143798261e-05, |
|
"loss": 0.8042, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.3106382978723405, |
|
"grad_norm": 1.6115304231643677, |
|
"learning_rate": 6.527182867765332e-05, |
|
"loss": 0.6911, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.327659574468085, |
|
"grad_norm": 1.4860713481903076, |
|
"learning_rate": 6.437353154776849e-05, |
|
"loss": 0.668, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.3446808510638297, |
|
"grad_norm": 1.3420848846435547, |
|
"learning_rate": 6.347014793623547e-05, |
|
"loss": 0.5545, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.3617021276595744, |
|
"grad_norm": 1.3645442724227905, |
|
"learning_rate": 6.256199753095745e-05, |
|
"loss": 0.6041, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.3787234042553191, |
|
"grad_norm": 1.2304599285125732, |
|
"learning_rate": 6.164940170670266e-05, |
|
"loss": 0.4798, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.3957446808510638, |
|
"grad_norm": 1.1637616157531738, |
|
"learning_rate": 6.0732683411376935e-05, |
|
"loss": 0.4231, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.4127659574468086, |
|
"grad_norm": 1.2594172954559326, |
|
"learning_rate": 5.98121670517393e-05, |
|
"loss": 0.4704, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.4297872340425533, |
|
"grad_norm": 1.3521147966384888, |
|
"learning_rate": 5.8888178378601565e-05, |
|
"loss": 0.4922, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.4468085106382977, |
|
"grad_norm": 1.2671799659729004, |
|
"learning_rate": 5.796104437155213e-05, |
|
"loss": 0.3777, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.4638297872340424, |
|
"grad_norm": 1.4365724325180054, |
|
"learning_rate": 5.7031093123244925e-05, |
|
"loss": 0.4116, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.4808510638297872, |
|
"grad_norm": 1.3327573537826538, |
|
"learning_rate": 5.6098653723294604e-05, |
|
"loss": 0.4738, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.4978723404255319, |
|
"grad_norm": 1.1536500453948975, |
|
"learning_rate": 5.516405614181883e-05, |
|
"loss": 0.6458, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.5148936170212766, |
|
"grad_norm": 1.2099850177764893, |
|
"learning_rate": 5.4227631112668955e-05, |
|
"loss": 0.628, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.5319148936170213, |
|
"grad_norm": 1.3336706161499023, |
|
"learning_rate": 5.3289710016390535e-05, |
|
"loss": 0.6473, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.548936170212766, |
|
"grad_norm": 1.279130220413208, |
|
"learning_rate": 5.2350624762954884e-05, |
|
"loss": 0.6072, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.5659574468085107, |
|
"grad_norm": 1.2410911321640015, |
|
"learning_rate": 5.14107076743033e-05, |
|
"loss": 0.5237, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.5829787234042554, |
|
"grad_norm": 1.3553860187530518, |
|
"learning_rate": 5.047029136674563e-05, |
|
"loss": 0.6179, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.236146092414856, |
|
"learning_rate": 4.95297086332544e-05, |
|
"loss": 0.4814, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.6170212765957448, |
|
"grad_norm": 1.4804574251174927, |
|
"learning_rate": 4.858929232569671e-05, |
|
"loss": 0.5876, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.6340425531914895, |
|
"grad_norm": 1.3778893947601318, |
|
"learning_rate": 4.7649375237045135e-05, |
|
"loss": 0.4875, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.6510638297872342, |
|
"grad_norm": 1.3673796653747559, |
|
"learning_rate": 4.671028998360947e-05, |
|
"loss": 0.4232, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.6680851063829787, |
|
"grad_norm": 1.4746342897415161, |
|
"learning_rate": 4.577236888733105e-05, |
|
"loss": 0.4221, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.6851063829787234, |
|
"grad_norm": 1.4111493825912476, |
|
"learning_rate": 4.483594385818118e-05, |
|
"loss": 0.4132, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.702127659574468, |
|
"grad_norm": 1.6582450866699219, |
|
"learning_rate": 4.39013462767054e-05, |
|
"loss": 0.3586, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.702127659574468, |
|
"eval_loss": 1.0457570552825928, |
|
"eval_runtime": 4.456, |
|
"eval_samples_per_second": 22.217, |
|
"eval_steps_per_second": 5.61, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7191489361702128, |
|
"grad_norm": 1.557518720626831, |
|
"learning_rate": 4.29689068767551e-05, |
|
"loss": 0.4448, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.7361702127659573, |
|
"grad_norm": 1.330199122428894, |
|
"learning_rate": 4.203895562844789e-05, |
|
"loss": 0.6906, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.753191489361702, |
|
"grad_norm": 1.3650050163269043, |
|
"learning_rate": 4.1111821621398446e-05, |
|
"loss": 0.572, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.7702127659574467, |
|
"grad_norm": 1.286249041557312, |
|
"learning_rate": 4.0187832948260705e-05, |
|
"loss": 0.5293, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.7872340425531914, |
|
"grad_norm": 1.4000760316848755, |
|
"learning_rate": 3.926731658862307e-05, |
|
"loss": 0.5973, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.804255319148936, |
|
"grad_norm": 1.450547456741333, |
|
"learning_rate": 3.835059829329735e-05, |
|
"loss": 0.5751, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.8212765957446808, |
|
"grad_norm": 1.4450291395187378, |
|
"learning_rate": 3.7438002469042565e-05, |
|
"loss": 0.49, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.8382978723404255, |
|
"grad_norm": 1.4160994291305542, |
|
"learning_rate": 3.6529852063764545e-05, |
|
"loss": 0.5344, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.8553191489361702, |
|
"grad_norm": 1.4834502935409546, |
|
"learning_rate": 3.562646845223153e-05, |
|
"loss": 0.5433, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.872340425531915, |
|
"grad_norm": 1.3811678886413574, |
|
"learning_rate": 3.4728171322346694e-05, |
|
"loss": 0.4107, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.8893617021276596, |
|
"grad_norm": 1.3840794563293457, |
|
"learning_rate": 3.38352785620174e-05, |
|
"loss": 0.4713, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.9063829787234043, |
|
"grad_norm": 1.5129518508911133, |
|
"learning_rate": 3.29481061466617e-05, |
|
"loss": 0.4653, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.923404255319149, |
|
"grad_norm": 1.569916009902954, |
|
"learning_rate": 3.2066968027391374e-05, |
|
"loss": 0.3947, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.9404255319148938, |
|
"grad_norm": 1.7366918325424194, |
|
"learning_rate": 3.119217601991139e-05, |
|
"loss": 0.3459, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.9574468085106385, |
|
"grad_norm": 1.6125799417495728, |
|
"learning_rate": 3.0324039694175233e-05, |
|
"loss": 0.3711, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.974468085106383, |
|
"grad_norm": 1.3394767045974731, |
|
"learning_rate": 2.946286626483463e-05, |
|
"loss": 0.6046, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.9914893617021276, |
|
"grad_norm": 1.6290675401687622, |
|
"learning_rate": 2.8608960482523056e-05, |
|
"loss": 0.5723, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.008510638297872, |
|
"grad_norm": 1.4972840547561646, |
|
"learning_rate": 2.7762624526011038e-05, |
|
"loss": 0.5839, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.025531914893617, |
|
"grad_norm": 0.92827969789505, |
|
"learning_rate": 2.6924157895271563e-05, |
|
"loss": 0.414, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 2.0425531914893615, |
|
"grad_norm": 0.9881418943405151, |
|
"learning_rate": 2.6093857305493664e-05, |
|
"loss": 0.338, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0595744680851062, |
|
"grad_norm": 1.0140328407287598, |
|
"learning_rate": 2.5272016582081236e-05, |
|
"loss": 0.3692, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 2.076595744680851, |
|
"grad_norm": 1.0715774297714233, |
|
"learning_rate": 2.4458926556674615e-05, |
|
"loss": 0.3516, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0936170212765957, |
|
"grad_norm": 0.9538404941558838, |
|
"learning_rate": 2.3654874964231518e-05, |
|
"loss": 0.2303, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.1106382978723404, |
|
"grad_norm": 1.1230480670928955, |
|
"learning_rate": 2.2860146341203937e-05, |
|
"loss": 0.293, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 1.1339483261108398, |
|
"learning_rate": 2.207502192484685e-05, |
|
"loss": 0.3407, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.1446808510638298, |
|
"grad_norm": 1.0703402757644653, |
|
"learning_rate": 2.1299779553694323e-05, |
|
"loss": 0.2584, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.1617021276595745, |
|
"grad_norm": 1.0961896181106567, |
|
"learning_rate": 2.053469356923865e-05, |
|
"loss": 0.2334, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.178723404255319, |
|
"grad_norm": 1.2202945947647095, |
|
"learning_rate": 1.978003471884665e-05, |
|
"loss": 0.2705, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.195744680851064, |
|
"grad_norm": 1.281968593597412, |
|
"learning_rate": 1.9036070059948252e-05, |
|
"loss": 0.2222, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.2127659574468086, |
|
"grad_norm": 1.530350685119629, |
|
"learning_rate": 1.8303062865530406e-05, |
|
"loss": 0.1816, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.2297872340425533, |
|
"grad_norm": 1.5826750993728638, |
|
"learning_rate": 1.7581272530970667e-05, |
|
"loss": 0.2032, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.246808510638298, |
|
"grad_norm": 1.333558440208435, |
|
"learning_rate": 1.6870954482242707e-05, |
|
"loss": 0.4571, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.2638297872340427, |
|
"grad_norm": 1.2566792964935303, |
|
"learning_rate": 1.6172360085526565e-05, |
|
"loss": 0.394, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.2808510638297874, |
|
"grad_norm": 1.2111644744873047, |
|
"learning_rate": 1.5485736558255697e-05, |
|
"loss": 0.3292, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.297872340425532, |
|
"grad_norm": 1.3459227085113525, |
|
"learning_rate": 1.4811326881631937e-05, |
|
"loss": 0.3263, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.3148936170212764, |
|
"grad_norm": 1.3436534404754639, |
|
"learning_rate": 1.4149369714639853e-05, |
|
"loss": 0.3349, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.331914893617021, |
|
"grad_norm": 1.4782233238220215, |
|
"learning_rate": 1.3500099309590397e-05, |
|
"loss": 0.3526, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.348936170212766, |
|
"grad_norm": 1.3280771970748901, |
|
"learning_rate": 1.2863745429224144e-05, |
|
"loss": 0.2997, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.3659574468085105, |
|
"grad_norm": 1.3815573453903198, |
|
"learning_rate": 1.2240533265403198e-05, |
|
"loss": 0.2855, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.382978723404255, |
|
"grad_norm": 1.4903278350830078, |
|
"learning_rate": 1.1630683359420652e-05, |
|
"loss": 0.2904, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 1.3255101442337036, |
|
"learning_rate": 1.103441152395588e-05, |
|
"loss": 0.1762, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.4170212765957446, |
|
"grad_norm": 1.3879741430282593, |
|
"learning_rate": 1.0451928766702979e-05, |
|
"loss": 0.2153, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.4340425531914893, |
|
"grad_norm": 1.2487590312957764, |
|
"learning_rate": 9.883441215699823e-06, |
|
"loss": 0.1699, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.451063829787234, |
|
"grad_norm": 1.142252802848816, |
|
"learning_rate": 9.329150046383772e-06, |
|
"loss": 0.1227, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.4680851063829787, |
|
"grad_norm": 1.8175076246261597, |
|
"learning_rate": 8.789251410400023e-06, |
|
"loss": 0.1298, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.4851063829787234, |
|
"grad_norm": 1.2175217866897583, |
|
"learning_rate": 8.263936366187824e-06, |
|
"loss": 0.3301, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.502127659574468, |
|
"grad_norm": 1.314565896987915, |
|
"learning_rate": 7.753390811368971e-06, |
|
"loss": 0.3832, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.519148936170213, |
|
"grad_norm": 1.393023133277893, |
|
"learning_rate": 7.257795416962753e-06, |
|
"loss": 0.331, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.5361702127659576, |
|
"grad_norm": 1.4550484418869019, |
|
"learning_rate": 6.777325563450282e-06, |
|
"loss": 0.3105, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.5531914893617023, |
|
"grad_norm": 1.555659294128418, |
|
"learning_rate": 6.312151278711237e-06, |
|
"loss": 0.3881, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.5531914893617023, |
|
"eval_loss": 1.250982403755188, |
|
"eval_runtime": 4.4645, |
|
"eval_samples_per_second": 22.175, |
|
"eval_steps_per_second": 5.6, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.570212765957447, |
|
"grad_norm": 1.4013265371322632, |
|
"learning_rate": 5.86243717785463e-06, |
|
"loss": 0.3459, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 2.5872340425531917, |
|
"grad_norm": 1.444057822227478, |
|
"learning_rate": 5.428342404965076e-06, |
|
"loss": 0.277, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.604255319148936, |
|
"grad_norm": 1.328897476196289, |
|
"learning_rate": 5.010020576785174e-06, |
|
"loss": 0.2324, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.621276595744681, |
|
"grad_norm": 1.4151270389556885, |
|
"learning_rate": 4.607619728353818e-06, |
|
"loss": 0.2435, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.6382978723404253, |
|
"grad_norm": 1.3859100341796875, |
|
"learning_rate": 4.221282260619891e-06, |
|
"loss": 0.2068, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.65531914893617, |
|
"grad_norm": 1.1932073831558228, |
|
"learning_rate": 3.851144890049535e-06, |
|
"loss": 0.156, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.6723404255319148, |
|
"grad_norm": 1.4522919654846191, |
|
"learning_rate": 3.4973386002452535e-06, |
|
"loss": 0.2544, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 2.6893617021276595, |
|
"grad_norm": 1.2598811388015747, |
|
"learning_rate": 3.159988595593616e-06, |
|
"loss": 0.1501, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 2.706382978723404, |
|
"grad_norm": 1.3632055521011353, |
|
"learning_rate": 2.839214256958106e-06, |
|
"loss": 0.1397, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.723404255319149, |
|
"grad_norm": 1.353302240371704, |
|
"learning_rate": 2.53512909943287e-06, |
|
"loss": 0.305, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.7404255319148936, |
|
"grad_norm": 1.339839220046997, |
|
"learning_rate": 2.2478407321721296e-06, |
|
"loss": 0.366, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 2.7574468085106383, |
|
"grad_norm": 1.3590214252471924, |
|
"learning_rate": 1.977450820309684e-06, |
|
"loss": 0.3311, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.774468085106383, |
|
"grad_norm": 1.4828448295593262, |
|
"learning_rate": 1.7240550489817653e-06, |
|
"loss": 0.3547, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 2.7914893617021277, |
|
"grad_norm": 1.3542424440383911, |
|
"learning_rate": 1.4877430894662036e-06, |
|
"loss": 0.2935, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.8085106382978724, |
|
"grad_norm": 1.304952621459961, |
|
"learning_rate": 1.268598567449647e-06, |
|
"loss": 0.2388, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.825531914893617, |
|
"grad_norm": 1.453139066696167, |
|
"learning_rate": 1.0666990334342707e-06, |
|
"loss": 0.2971, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 2.842553191489362, |
|
"grad_norm": 1.335195541381836, |
|
"learning_rate": 8.821159352943143e-07, |
|
"loss": 0.2004, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 2.8595744680851065, |
|
"grad_norm": 1.482730746269226, |
|
"learning_rate": 7.149145929922607e-07, |
|
"loss": 0.2955, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.876595744680851, |
|
"grad_norm": 1.3346716165542603, |
|
"learning_rate": 5.651541754634726e-07, |
|
"loss": 0.2225, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 2.8936170212765955, |
|
"grad_norm": 1.362627625465393, |
|
"learning_rate": 4.3288767967760715e-07, |
|
"loss": 0.1993, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.9106382978723406, |
|
"grad_norm": 1.4417704343795776, |
|
"learning_rate": 3.1816191188415166e-07, |
|
"loss": 0.2222, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.927659574468085, |
|
"grad_norm": 1.2695422172546387, |
|
"learning_rate": 2.2101747104866788e-07, |
|
"loss": 0.1648, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 2.94468085106383, |
|
"grad_norm": 1.325673222541809, |
|
"learning_rate": 1.4148873448573408e-07, |
|
"loss": 0.1262, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 2.9617021276595743, |
|
"grad_norm": 1.330980896949768, |
|
"learning_rate": 7.960384569353219e-08, |
|
"loss": 0.2392, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.978723404255319, |
|
"grad_norm": 1.4023689031600952, |
|
"learning_rate": 3.538470439448105e-08, |
|
"loss": 0.2823, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.9957446808510637, |
|
"grad_norm": 1.465209722518921, |
|
"learning_rate": 8.846958785418968e-09, |
|
"loss": 0.192, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 3.0127659574468084, |
|
"grad_norm": 1.1684350967407227, |
|
"learning_rate": 0.0, |
|
"loss": 0.345, |
|
"step": 177 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 177, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 2 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.334314061384909e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|