|
{ |
|
"best_metric": 0.636073112487793, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-900", |
|
"epoch": 2.278893109061313, |
|
"eval_steps": 150, |
|
"global_step": 1050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002170374389582203, |
|
"eval_loss": 3.8393607139587402, |
|
"eval_runtime": 52.8023, |
|
"eval_samples_per_second": 14.715, |
|
"eval_steps_per_second": 1.856, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02170374389582203, |
|
"grad_norm": 26.712209701538086, |
|
"learning_rate": 6e-06, |
|
"loss": 10.3362, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04340748779164406, |
|
"grad_norm": 38.7980842590332, |
|
"learning_rate": 1.2e-05, |
|
"loss": 12.0151, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06511123168746609, |
|
"grad_norm": 29.304401397705078, |
|
"learning_rate": 1.8e-05, |
|
"loss": 11.2638, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08681497558328811, |
|
"grad_norm": 39.74540710449219, |
|
"learning_rate": 2.4e-05, |
|
"loss": 9.229, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10851871947911014, |
|
"grad_norm": 65.20191955566406, |
|
"learning_rate": 3e-05, |
|
"loss": 6.7067, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13022246337493218, |
|
"grad_norm": 15.829483985900879, |
|
"learning_rate": 2.9996479470277262e-05, |
|
"loss": 5.6606, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1519262072707542, |
|
"grad_norm": 16.72377586364746, |
|
"learning_rate": 2.9985919533659653e-05, |
|
"loss": 4.3456, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17362995116657623, |
|
"grad_norm": 17.048402786254883, |
|
"learning_rate": 2.9968325147023267e-05, |
|
"loss": 3.5431, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19533369506239825, |
|
"grad_norm": 23.174785614013672, |
|
"learning_rate": 2.994370456924292e-05, |
|
"loss": 3.5592, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21703743895822028, |
|
"grad_norm": 63.94609832763672, |
|
"learning_rate": 2.9912069357315394e-05, |
|
"loss": 3.5889, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23874118285404233, |
|
"grad_norm": 17.54082679748535, |
|
"learning_rate": 2.9873434360934543e-05, |
|
"loss": 4.0375, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.26044492674986436, |
|
"grad_norm": 13.922243118286133, |
|
"learning_rate": 2.9827817715520775e-05, |
|
"loss": 3.5151, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2821486706456864, |
|
"grad_norm": 18.978328704833984, |
|
"learning_rate": 2.977524083370823e-05, |
|
"loss": 3.1774, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3038524145415084, |
|
"grad_norm": 19.49057388305664, |
|
"learning_rate": 2.9715728395293587e-05, |
|
"loss": 3.2158, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.32555615843733043, |
|
"grad_norm": 36.420654296875, |
|
"learning_rate": 2.96493083356513e-05, |
|
"loss": 3.1129, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32555615843733043, |
|
"eval_loss": 0.803920328617096, |
|
"eval_runtime": 53.8173, |
|
"eval_samples_per_second": 14.438, |
|
"eval_steps_per_second": 1.821, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.34725990233315246, |
|
"grad_norm": 15.815438270568848, |
|
"learning_rate": 2.9576011832620583e-05, |
|
"loss": 3.6763, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3689636462289745, |
|
"grad_norm": 17.322349548339844, |
|
"learning_rate": 2.9495873291870436e-05, |
|
"loss": 3.2852, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3906673901247965, |
|
"grad_norm": 16.479698181152344, |
|
"learning_rate": 2.940893033074948e-05, |
|
"loss": 3.0177, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.41237113402061853, |
|
"grad_norm": 20.874675750732422, |
|
"learning_rate": 2.9315223760628224e-05, |
|
"loss": 2.676, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.43407487791644056, |
|
"grad_norm": 29.774669647216797, |
|
"learning_rate": 2.9214797567742036e-05, |
|
"loss": 3.227, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45577862181226264, |
|
"grad_norm": 14.984698295593262, |
|
"learning_rate": 2.9107698892543862e-05, |
|
"loss": 3.4, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.47748236570808467, |
|
"grad_norm": 18.93448829650879, |
|
"learning_rate": 2.8993978007576263e-05, |
|
"loss": 2.9846, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4991861096039067, |
|
"grad_norm": 17.96265411376953, |
|
"learning_rate": 2.8873688293873336e-05, |
|
"loss": 3.0037, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5208898534997287, |
|
"grad_norm": 23.578723907470703, |
|
"learning_rate": 2.874688621590339e-05, |
|
"loss": 2.7363, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5425935973955507, |
|
"grad_norm": 39.76424026489258, |
|
"learning_rate": 2.861363129506436e-05, |
|
"loss": 3.1817, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5642973412913728, |
|
"grad_norm": 20.42775535583496, |
|
"learning_rate": 2.847398608174417e-05, |
|
"loss": 3.2541, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5860010851871947, |
|
"grad_norm": 16.566482543945312, |
|
"learning_rate": 2.832801612595937e-05, |
|
"loss": 2.8651, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6077048290830168, |
|
"grad_norm": 16.73906707763672, |
|
"learning_rate": 2.8175789946585697e-05, |
|
"loss": 2.8237, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6294085729788389, |
|
"grad_norm": 23.292736053466797, |
|
"learning_rate": 2.801737899919502e-05, |
|
"loss": 3.0393, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6511123168746609, |
|
"grad_norm": 38.29425048828125, |
|
"learning_rate": 2.7852857642513838e-05, |
|
"loss": 2.7109, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6511123168746609, |
|
"eval_loss": 0.7168570756912231, |
|
"eval_runtime": 53.8613, |
|
"eval_samples_per_second": 14.426, |
|
"eval_steps_per_second": 1.819, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.672816060770483, |
|
"grad_norm": 13.968533515930176, |
|
"learning_rate": 2.768230310351898e-05, |
|
"loss": 3.3938, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6945198046663049, |
|
"grad_norm": 16.265430450439453, |
|
"learning_rate": 2.7505795441186953e-05, |
|
"loss": 2.9047, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.716223548562127, |
|
"grad_norm": 14.983628273010254, |
|
"learning_rate": 2.7323417508913973e-05, |
|
"loss": 2.7784, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.737927292457949, |
|
"grad_norm": 20.370397567749023, |
|
"learning_rate": 2.7135254915624213e-05, |
|
"loss": 2.7854, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.759631036353771, |
|
"grad_norm": 50.72266387939453, |
|
"learning_rate": 2.6941395985584656e-05, |
|
"loss": 2.8735, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.781334780249593, |
|
"grad_norm": 17.988285064697266, |
|
"learning_rate": 2.6741931716945336e-05, |
|
"loss": 3.0907, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8030385241454151, |
|
"grad_norm": 16.146488189697266, |
|
"learning_rate": 2.6536955739024436e-05, |
|
"loss": 2.7129, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"grad_norm": 18.454801559448242, |
|
"learning_rate": 2.632656426835831e-05, |
|
"loss": 2.6485, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8464460119370592, |
|
"grad_norm": 19.638561248779297, |
|
"learning_rate": 2.6110856063537087e-05, |
|
"loss": 2.4677, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8681497558328811, |
|
"grad_norm": 35.427913665771484, |
|
"learning_rate": 2.5889932378846963e-05, |
|
"loss": 2.7575, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8898534997287032, |
|
"grad_norm": 17.886579513549805, |
|
"learning_rate": 2.5663896916741064e-05, |
|
"loss": 2.8667, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9115572436245253, |
|
"grad_norm": 17.810691833496094, |
|
"learning_rate": 2.543285577916108e-05, |
|
"loss": 2.7324, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9332609875203473, |
|
"grad_norm": 19.121427536010742, |
|
"learning_rate": 2.519691741773262e-05, |
|
"loss": 2.5549, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9549647314161693, |
|
"grad_norm": 21.621971130371094, |
|
"learning_rate": 2.495619258285757e-05, |
|
"loss": 2.4607, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9766684753119913, |
|
"grad_norm": 38.662139892578125, |
|
"learning_rate": 2.4710794271727415e-05, |
|
"loss": 2.7824, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9766684753119913, |
|
"eval_loss": 0.6706861853599548, |
|
"eval_runtime": 53.787, |
|
"eval_samples_per_second": 14.446, |
|
"eval_steps_per_second": 1.822, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9983722192078134, |
|
"grad_norm": 25.043182373046875, |
|
"learning_rate": 2.446083767528193e-05, |
|
"loss": 2.7357, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.0200759631036354, |
|
"grad_norm": 15.549626350402832, |
|
"learning_rate": 2.4206440124138064e-05, |
|
"loss": 2.7512, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0417797069994574, |
|
"grad_norm": 18.95859146118164, |
|
"learning_rate": 2.3947721033514517e-05, |
|
"loss": 2.3525, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.0634834508952795, |
|
"grad_norm": 18.817302703857422, |
|
"learning_rate": 2.3684801847177732e-05, |
|
"loss": 2.2039, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.0851871947911014, |
|
"grad_norm": 23.182357788085938, |
|
"learning_rate": 2.341780598043574e-05, |
|
"loss": 2.0556, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1068909386869235, |
|
"grad_norm": 34.256813049316406, |
|
"learning_rate": 2.3146858762206493e-05, |
|
"loss": 2.2184, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.1285946825827455, |
|
"grad_norm": 18.3436336517334, |
|
"learning_rate": 2.287208737618801e-05, |
|
"loss": 2.5032, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.1502984264785676, |
|
"grad_norm": 18.193883895874023, |
|
"learning_rate": 2.259362080115781e-05, |
|
"loss": 2.1954, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.1720021703743897, |
|
"grad_norm": 22.53719711303711, |
|
"learning_rate": 2.231158975042979e-05, |
|
"loss": 2.2031, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.1937059142702116, |
|
"grad_norm": 21.267290115356445, |
|
"learning_rate": 2.2026126610496852e-05, |
|
"loss": 1.8531, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.2154096581660336, |
|
"grad_norm": 29.71878433227539, |
|
"learning_rate": 2.173736537888819e-05, |
|
"loss": 1.9597, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.2371134020618557, |
|
"grad_norm": 18.650861740112305, |
|
"learning_rate": 2.1445441601270276e-05, |
|
"loss": 2.6653, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.2588171459576776, |
|
"grad_norm": 22.564220428466797, |
|
"learning_rate": 2.115049230782124e-05, |
|
"loss": 2.34, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.2805208898534997, |
|
"grad_norm": 22.589075088500977, |
|
"learning_rate": 2.085265594890832e-05, |
|
"loss": 2.181, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.3022246337493217, |
|
"grad_norm": 22.656047821044922, |
|
"learning_rate": 2.055207233009872e-05, |
|
"loss": 1.9121, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3022246337493217, |
|
"eval_loss": 0.6503757834434509, |
|
"eval_runtime": 53.7768, |
|
"eval_samples_per_second": 14.449, |
|
"eval_steps_per_second": 1.822, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3239283776451438, |
|
"grad_norm": 33.046363830566406, |
|
"learning_rate": 2.0248882546534327e-05, |
|
"loss": 1.9914, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.345632121540966, |
|
"grad_norm": 21.024412155151367, |
|
"learning_rate": 1.9943228916701108e-05, |
|
"loss": 2.5415, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.3673358654367878, |
|
"grad_norm": 20.984058380126953, |
|
"learning_rate": 1.963525491562421e-05, |
|
"loss": 2.329, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.3890396093326098, |
|
"grad_norm": 23.811756134033203, |
|
"learning_rate": 1.9325105107520264e-05, |
|
"loss": 2.236, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.410743353228432, |
|
"grad_norm": 24.431550979614258, |
|
"learning_rate": 1.9012925077938318e-05, |
|
"loss": 2.0522, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.432447097124254, |
|
"grad_norm": 27.39398193359375, |
|
"learning_rate": 1.8698861365421433e-05, |
|
"loss": 1.8751, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.454150841020076, |
|
"grad_norm": 18.39029312133789, |
|
"learning_rate": 1.8383061392720914e-05, |
|
"loss": 2.6245, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.475854584915898, |
|
"grad_norm": 19.114816665649414, |
|
"learning_rate": 1.8065673397595475e-05, |
|
"loss": 2.1778, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.49755832881172, |
|
"grad_norm": 20.83147621154785, |
|
"learning_rate": 1.7746846363227843e-05, |
|
"loss": 1.9417, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.519262072707542, |
|
"grad_norm": 26.67376708984375, |
|
"learning_rate": 1.7426729948291474e-05, |
|
"loss": 1.9912, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.540965816603364, |
|
"grad_norm": 29.251083374023438, |
|
"learning_rate": 1.7105474416700165e-05, |
|
"loss": 2.0298, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.5626695604991863, |
|
"grad_norm": 17.292633056640625, |
|
"learning_rate": 1.6783230567073597e-05, |
|
"loss": 2.4696, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.5843733043950081, |
|
"grad_norm": 20.139678955078125, |
|
"learning_rate": 1.646014966195185e-05, |
|
"loss": 2.2227, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.6060770482908302, |
|
"grad_norm": 20.934541702270508, |
|
"learning_rate": 1.613638335679216e-05, |
|
"loss": 1.9018, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.6277807921866523, |
|
"grad_norm": 24.31667709350586, |
|
"learning_rate": 1.5812083628781265e-05, |
|
"loss": 2.1797, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.6277807921866523, |
|
"eval_loss": 0.647061824798584, |
|
"eval_runtime": 53.7706, |
|
"eval_samples_per_second": 14.45, |
|
"eval_steps_per_second": 1.823, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.6494845360824741, |
|
"grad_norm": 36.196144104003906, |
|
"learning_rate": 1.548740270549671e-05, |
|
"loss": 2.0389, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.6711882799782962, |
|
"grad_norm": 20.02777671813965, |
|
"learning_rate": 1.5162492993450599e-05, |
|
"loss": 2.4617, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.6928920238741183, |
|
"grad_norm": 20.28060531616211, |
|
"learning_rate": 1.4837507006549403e-05, |
|
"loss": 2.1809, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.7145957677699402, |
|
"grad_norm": 22.740869522094727, |
|
"learning_rate": 1.4512597294503295e-05, |
|
"loss": 2.0388, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.7362995116657625, |
|
"grad_norm": 23.173282623291016, |
|
"learning_rate": 1.4187916371218739e-05, |
|
"loss": 2.0224, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7580032555615843, |
|
"grad_norm": 40.70626449584961, |
|
"learning_rate": 1.3863616643207844e-05, |
|
"loss": 1.9738, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.7797069994574064, |
|
"grad_norm": 19.63157844543457, |
|
"learning_rate": 1.3539850338048156e-05, |
|
"loss": 2.5696, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.8014107433532285, |
|
"grad_norm": 21.02994155883789, |
|
"learning_rate": 1.3216769432926405e-05, |
|
"loss": 2.2346, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.8231144872490503, |
|
"grad_norm": 21.89013671875, |
|
"learning_rate": 1.2894525583299835e-05, |
|
"loss": 2.0149, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.8448182311448726, |
|
"grad_norm": 20.857643127441406, |
|
"learning_rate": 1.2573270051708529e-05, |
|
"loss": 1.7583, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.8665219750406945, |
|
"grad_norm": 48.44206237792969, |
|
"learning_rate": 1.2253153636772158e-05, |
|
"loss": 2.221, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.8882257189365166, |
|
"grad_norm": 19.892345428466797, |
|
"learning_rate": 1.193432660240453e-05, |
|
"loss": 2.4434, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.9099294628323387, |
|
"grad_norm": 22.947458267211914, |
|
"learning_rate": 1.1616938607279089e-05, |
|
"loss": 2.2474, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.9316332067281605, |
|
"grad_norm": 21.839048385620117, |
|
"learning_rate": 1.1301138634578571e-05, |
|
"loss": 1.947, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.9533369506239826, |
|
"grad_norm": 33.06451416015625, |
|
"learning_rate": 1.098707492206169e-05, |
|
"loss": 2.0453, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.9533369506239826, |
|
"eval_loss": 0.636073112487793, |
|
"eval_runtime": 53.9004, |
|
"eval_samples_per_second": 14.415, |
|
"eval_steps_per_second": 1.818, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.9750406945198047, |
|
"grad_norm": 33.91230773925781, |
|
"learning_rate": 1.067489489247974e-05, |
|
"loss": 1.9067, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.9967444384156265, |
|
"grad_norm": 28.854825973510742, |
|
"learning_rate": 1.036474508437579e-05, |
|
"loss": 2.3281, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.018448182311449, |
|
"grad_norm": 18.260103225708008, |
|
"learning_rate": 1.0056771083298894e-05, |
|
"loss": 2.1337, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.0401519262072707, |
|
"grad_norm": 19.979646682739258, |
|
"learning_rate": 9.751117453465674e-06, |
|
"loss": 1.6345, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"grad_norm": 22.703859329223633, |
|
"learning_rate": 9.447927669901284e-06, |
|
"loss": 1.634, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.083559413998915, |
|
"grad_norm": 21.72873878479004, |
|
"learning_rate": 9.147344051091682e-06, |
|
"loss": 1.5881, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 31.957630157470703, |
|
"learning_rate": 8.849507692178758e-06, |
|
"loss": 1.3856, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.126966901790559, |
|
"grad_norm": 21.75389862060547, |
|
"learning_rate": 8.554558398729726e-06, |
|
"loss": 1.9382, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.148670645686381, |
|
"grad_norm": 23.939027786254883, |
|
"learning_rate": 8.262634621111819e-06, |
|
"loss": 1.8201, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.1703743895822027, |
|
"grad_norm": 21.948673248291016, |
|
"learning_rate": 7.97387338950315e-06, |
|
"loss": 1.5186, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.192078133478025, |
|
"grad_norm": 26.39198112487793, |
|
"learning_rate": 7.688410249570214e-06, |
|
"loss": 1.4693, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.213781877373847, |
|
"grad_norm": 43.11570358276367, |
|
"learning_rate": 7.4063791988421905e-06, |
|
"loss": 1.3836, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.235485621269669, |
|
"grad_norm": 21.80547523498535, |
|
"learning_rate": 7.127912623811993e-06, |
|
"loss": 1.9962, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.257189365165491, |
|
"grad_norm": 20.71767234802246, |
|
"learning_rate": 6.853141237793507e-06, |
|
"loss": 1.6606, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.278893109061313, |
|
"grad_norm": 21.81035614013672, |
|
"learning_rate": 6.582194019564266e-06, |
|
"loss": 1.4825, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.278893109061313, |
|
"eval_loss": 0.6611286997795105, |
|
"eval_runtime": 53.8731, |
|
"eval_samples_per_second": 14.423, |
|
"eval_steps_per_second": 1.819, |
|
"step": 1050 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 150, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4722426201964544e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|