|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.25157232704402516, |
|
"eval_steps": 120, |
|
"global_step": 120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0020964360587002098, |
|
"grad_norm": 0.6940059065818787, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5374, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0041928721174004195, |
|
"grad_norm": 0.7119749188423157, |
|
"learning_rate": 4e-05, |
|
"loss": 1.4412, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006289308176100629, |
|
"grad_norm": 0.6409623026847839, |
|
"learning_rate": 6e-05, |
|
"loss": 1.3945, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008385744234800839, |
|
"grad_norm": 0.7138105630874634, |
|
"learning_rate": 8e-05, |
|
"loss": 1.4504, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010482180293501049, |
|
"grad_norm": 0.8199965357780457, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6434, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012578616352201259, |
|
"grad_norm": 0.7341210842132568, |
|
"learning_rate": 0.00012, |
|
"loss": 1.3584, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014675052410901468, |
|
"grad_norm": 0.8366342782974243, |
|
"learning_rate": 0.00014, |
|
"loss": 1.6133, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.016771488469601678, |
|
"grad_norm": 0.45160987973213196, |
|
"learning_rate": 0.00016, |
|
"loss": 1.5444, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.018867924528301886, |
|
"grad_norm": 0.564163863658905, |
|
"learning_rate": 0.00018, |
|
"loss": 1.4406, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.020964360587002098, |
|
"grad_norm": 1.0345866680145264, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4192, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.023060796645702306, |
|
"grad_norm": 0.7354846596717834, |
|
"learning_rate": 0.0001999977372615812, |
|
"loss": 1.386, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.025157232704402517, |
|
"grad_norm": 0.5225378274917603, |
|
"learning_rate": 0.00019999094914872442, |
|
"loss": 1.272, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.027253668763102725, |
|
"grad_norm": 0.3142692446708679, |
|
"learning_rate": 0.0001999796359686242, |
|
"loss": 1.3781, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.029350104821802937, |
|
"grad_norm": 0.33122241497039795, |
|
"learning_rate": 0.00019996379823325583, |
|
"loss": 1.5188, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.031446540880503145, |
|
"grad_norm": 0.2958654761314392, |
|
"learning_rate": 0.0001999434366593524, |
|
"loss": 0.999, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.033542976939203356, |
|
"grad_norm": 0.4279610812664032, |
|
"learning_rate": 0.00019991855216837224, |
|
"loss": 1.5178, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03563941299790356, |
|
"grad_norm": 0.36770907044410706, |
|
"learning_rate": 0.00019988914588645715, |
|
"loss": 1.2745, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03773584905660377, |
|
"grad_norm": 0.3166482150554657, |
|
"learning_rate": 0.00019985521914438165, |
|
"loss": 1.5023, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.039832285115303984, |
|
"grad_norm": 0.42765095829963684, |
|
"learning_rate": 0.0001998167734774926, |
|
"loss": 1.2504, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.041928721174004195, |
|
"grad_norm": 0.392689973115921, |
|
"learning_rate": 0.00019977381062563976, |
|
"loss": 1.2228, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0440251572327044, |
|
"grad_norm": 0.358163982629776, |
|
"learning_rate": 0.000199726332533097, |
|
"loss": 1.2634, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04612159329140461, |
|
"grad_norm": 0.3274112939834595, |
|
"learning_rate": 0.00019967434134847442, |
|
"loss": 1.4746, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04821802935010482, |
|
"grad_norm": 0.3587968945503235, |
|
"learning_rate": 0.00019961783942462104, |
|
"loss": 1.3947, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.050314465408805034, |
|
"grad_norm": 0.30727654695510864, |
|
"learning_rate": 0.00019955682931851833, |
|
"loss": 1.4815, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05241090146750524, |
|
"grad_norm": 0.4096279442310333, |
|
"learning_rate": 0.00019949131379116454, |
|
"loss": 1.3225, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05450733752620545, |
|
"grad_norm": 0.36623865365982056, |
|
"learning_rate": 0.00019942129580744966, |
|
"loss": 1.3904, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05660377358490566, |
|
"grad_norm": 0.3568407893180847, |
|
"learning_rate": 0.00019934677853602133, |
|
"loss": 1.463, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05870020964360587, |
|
"grad_norm": 0.4338196814060211, |
|
"learning_rate": 0.0001992677653491414, |
|
"loss": 1.4359, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06079664570230608, |
|
"grad_norm": 0.4408683180809021, |
|
"learning_rate": 0.00019918425982253334, |
|
"loss": 1.8015, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06289308176100629, |
|
"grad_norm": 0.3609876036643982, |
|
"learning_rate": 0.00019909626573522043, |
|
"loss": 1.3589, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0649895178197065, |
|
"grad_norm": 0.43560177087783813, |
|
"learning_rate": 0.0001990037870693547, |
|
"loss": 1.734, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06708595387840671, |
|
"grad_norm": 0.37430861592292786, |
|
"learning_rate": 0.00019890682801003675, |
|
"loss": 1.3517, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06918238993710692, |
|
"grad_norm": 0.4608246386051178, |
|
"learning_rate": 0.00019880539294512637, |
|
"loss": 1.4881, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07127882599580712, |
|
"grad_norm": 0.41597816348075867, |
|
"learning_rate": 0.0001986994864650439, |
|
"loss": 1.2676, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07337526205450734, |
|
"grad_norm": 0.561418354511261, |
|
"learning_rate": 0.00019858911336256257, |
|
"loss": 1.4233, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07547169811320754, |
|
"grad_norm": 0.9351180195808411, |
|
"learning_rate": 0.00019847427863259163, |
|
"loss": 1.2086, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07756813417190776, |
|
"grad_norm": 0.6147457957267761, |
|
"learning_rate": 0.00019835498747195008, |
|
"loss": 1.4909, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07966457023060797, |
|
"grad_norm": 0.4514181315898895, |
|
"learning_rate": 0.00019823124527913185, |
|
"loss": 1.2649, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08176100628930817, |
|
"grad_norm": 0.49401888251304626, |
|
"learning_rate": 0.0001981030576540612, |
|
"loss": 1.5149, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08385744234800839, |
|
"grad_norm": 0.6095734238624573, |
|
"learning_rate": 0.00019797043039783936, |
|
"loss": 1.4917, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0859538784067086, |
|
"grad_norm": 0.42444926500320435, |
|
"learning_rate": 0.0001978333695124821, |
|
"loss": 1.3691, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0880503144654088, |
|
"grad_norm": 0.47243213653564453, |
|
"learning_rate": 0.00019769188120064812, |
|
"loss": 1.7828, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09014675052410902, |
|
"grad_norm": 0.4187338650226593, |
|
"learning_rate": 0.00019754597186535814, |
|
"loss": 1.2147, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09224318658280922, |
|
"grad_norm": 0.4433446228504181, |
|
"learning_rate": 0.0001973956481097053, |
|
"loss": 1.1449, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09433962264150944, |
|
"grad_norm": 0.5269142389297485, |
|
"learning_rate": 0.0001972409167365564, |
|
"loss": 1.4682, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09643605870020965, |
|
"grad_norm": 0.4906723201274872, |
|
"learning_rate": 0.0001970817847482439, |
|
"loss": 1.3701, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09853249475890985, |
|
"grad_norm": 0.5275290608406067, |
|
"learning_rate": 0.000196918259346249, |
|
"loss": 1.3704, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.10062893081761007, |
|
"grad_norm": 0.5568628907203674, |
|
"learning_rate": 0.00019675034793087596, |
|
"loss": 1.068, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.10272536687631027, |
|
"grad_norm": 0.6039868593215942, |
|
"learning_rate": 0.000196578058100917, |
|
"loss": 1.2204, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.10482180293501048, |
|
"grad_norm": 0.9857679605484009, |
|
"learning_rate": 0.0001964013976533084, |
|
"loss": 1.0091, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1069182389937107, |
|
"grad_norm": 0.3437671959400177, |
|
"learning_rate": 0.00019622037458277784, |
|
"loss": 1.2225, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1090146750524109, |
|
"grad_norm": 0.3308734893798828, |
|
"learning_rate": 0.00019603499708148244, |
|
"loss": 1.2099, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 0.353939026594162, |
|
"learning_rate": 0.0001958452735386381, |
|
"loss": 1.2554, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.11320754716981132, |
|
"grad_norm": 0.3151988089084625, |
|
"learning_rate": 0.00019565121254013979, |
|
"loss": 1.252, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11530398322851153, |
|
"grad_norm": 0.32421159744262695, |
|
"learning_rate": 0.00019545282286817303, |
|
"loss": 0.9776, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11740041928721175, |
|
"grad_norm": 0.3662404417991638, |
|
"learning_rate": 0.0001952501135008165, |
|
"loss": 1.3977, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11949685534591195, |
|
"grad_norm": 0.3840480148792267, |
|
"learning_rate": 0.00019504309361163566, |
|
"loss": 1.2663, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.12159329140461216, |
|
"grad_norm": 0.37903356552124023, |
|
"learning_rate": 0.00019483177256926767, |
|
"loss": 1.5308, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.12368972746331237, |
|
"grad_norm": 0.346229612827301, |
|
"learning_rate": 0.0001946161599369973, |
|
"loss": 1.4319, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12578616352201258, |
|
"grad_norm": 0.34781116247177124, |
|
"learning_rate": 0.00019439626547232433, |
|
"loss": 1.1933, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1278825995807128, |
|
"grad_norm": 0.3286825716495514, |
|
"learning_rate": 0.0001941720991265218, |
|
"loss": 1.1038, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.129979035639413, |
|
"grad_norm": 0.39212745428085327, |
|
"learning_rate": 0.00019394367104418576, |
|
"loss": 1.2789, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1320754716981132, |
|
"grad_norm": 0.3172178566455841, |
|
"learning_rate": 0.0001937109915627762, |
|
"loss": 1.1614, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.13417190775681342, |
|
"grad_norm": 0.371159166097641, |
|
"learning_rate": 0.00019347407121214914, |
|
"loss": 1.3819, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13626834381551362, |
|
"grad_norm": 0.36089271306991577, |
|
"learning_rate": 0.00019323292071408017, |
|
"loss": 1.4392, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13836477987421383, |
|
"grad_norm": 0.42245927453041077, |
|
"learning_rate": 0.00019298755098177926, |
|
"loss": 1.2518, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.14046121593291405, |
|
"grad_norm": 0.3602246642112732, |
|
"learning_rate": 0.00019273797311939673, |
|
"loss": 1.3146, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.14255765199161424, |
|
"grad_norm": 0.3581138253211975, |
|
"learning_rate": 0.00019248419842152098, |
|
"loss": 1.2622, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.14465408805031446, |
|
"grad_norm": 0.391454815864563, |
|
"learning_rate": 0.0001922262383726672, |
|
"loss": 1.4421, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14675052410901468, |
|
"grad_norm": 0.4634746313095093, |
|
"learning_rate": 0.00019196410464675766, |
|
"loss": 1.3862, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1488469601677149, |
|
"grad_norm": 0.35802096128463745, |
|
"learning_rate": 0.00019169780910659333, |
|
"loss": 1.4004, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1509433962264151, |
|
"grad_norm": 0.34099411964416504, |
|
"learning_rate": 0.00019142736380331726, |
|
"loss": 1.2887, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1530398322851153, |
|
"grad_norm": 0.37205106019973755, |
|
"learning_rate": 0.00019115278097586903, |
|
"loss": 1.518, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.15513626834381553, |
|
"grad_norm": 0.3985058665275574, |
|
"learning_rate": 0.00019087407305043086, |
|
"loss": 1.3483, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.15723270440251572, |
|
"grad_norm": 0.3541426956653595, |
|
"learning_rate": 0.0001905912526398654, |
|
"loss": 1.3036, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15932914046121593, |
|
"grad_norm": 0.44033437967300415, |
|
"learning_rate": 0.00019030433254314474, |
|
"loss": 1.3732, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.16142557651991615, |
|
"grad_norm": 0.40152212977409363, |
|
"learning_rate": 0.00019001332574477146, |
|
"loss": 1.479, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.16352201257861634, |
|
"grad_norm": 0.46172958612442017, |
|
"learning_rate": 0.00018971824541419083, |
|
"loss": 1.381, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.16561844863731656, |
|
"grad_norm": 0.40097662806510925, |
|
"learning_rate": 0.0001894191049051948, |
|
"loss": 1.1499, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.16771488469601678, |
|
"grad_norm": 0.49080637097358704, |
|
"learning_rate": 0.0001891159177553179, |
|
"loss": 1.664, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16981132075471697, |
|
"grad_norm": 0.45318862795829773, |
|
"learning_rate": 0.00018880869768522432, |
|
"loss": 1.3287, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1719077568134172, |
|
"grad_norm": 0.4062664210796356, |
|
"learning_rate": 0.00018849745859808717, |
|
"loss": 1.2012, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1740041928721174, |
|
"grad_norm": 0.4371073246002197, |
|
"learning_rate": 0.00018818221457895926, |
|
"loss": 1.4706, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1761006289308176, |
|
"grad_norm": 0.41299256682395935, |
|
"learning_rate": 0.00018786297989413568, |
|
"loss": 1.2486, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.17819706498951782, |
|
"grad_norm": 0.44734108448028564, |
|
"learning_rate": 0.00018753976899050812, |
|
"loss": 1.1505, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18029350104821804, |
|
"grad_norm": 0.552854597568512, |
|
"learning_rate": 0.00018721259649491113, |
|
"loss": 1.5622, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.18238993710691823, |
|
"grad_norm": 0.541213870048523, |
|
"learning_rate": 0.0001868814772134603, |
|
"loss": 1.5055, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.18448637316561844, |
|
"grad_norm": 0.48175540566444397, |
|
"learning_rate": 0.00018654642613088194, |
|
"loss": 1.2456, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.18658280922431866, |
|
"grad_norm": 0.5197116732597351, |
|
"learning_rate": 0.0001862074584098352, |
|
"loss": 1.4801, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.18867924528301888, |
|
"grad_norm": 0.46993735432624817, |
|
"learning_rate": 0.00018586458939022586, |
|
"loss": 1.5128, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19077568134171907, |
|
"grad_norm": 0.5093168616294861, |
|
"learning_rate": 0.00018551783458851189, |
|
"loss": 1.521, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.1928721174004193, |
|
"grad_norm": 0.4279519021511078, |
|
"learning_rate": 0.0001851672096970016, |
|
"loss": 1.0692, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1949685534591195, |
|
"grad_norm": 0.48902031779289246, |
|
"learning_rate": 0.00018481273058314316, |
|
"loss": 1.3202, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.1970649895178197, |
|
"grad_norm": 0.5409737229347229, |
|
"learning_rate": 0.00018445441328880682, |
|
"loss": 1.6125, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.19916142557651992, |
|
"grad_norm": 0.5205714702606201, |
|
"learning_rate": 0.00018409227402955871, |
|
"loss": 1.1616, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.20125786163522014, |
|
"grad_norm": 0.5157482624053955, |
|
"learning_rate": 0.00018372632919392716, |
|
"loss": 1.3375, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.20335429769392033, |
|
"grad_norm": 0.5590908527374268, |
|
"learning_rate": 0.00018335659534266094, |
|
"loss": 1.6429, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.20545073375262055, |
|
"grad_norm": 0.5677520036697388, |
|
"learning_rate": 0.00018298308920797985, |
|
"loss": 1.1629, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.20754716981132076, |
|
"grad_norm": 0.6165626645088196, |
|
"learning_rate": 0.00018260582769281743, |
|
"loss": 1.0469, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.20964360587002095, |
|
"grad_norm": 0.7722473740577698, |
|
"learning_rate": 0.0001822248278700563, |
|
"loss": 1.7717, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21174004192872117, |
|
"grad_norm": 0.34500235319137573, |
|
"learning_rate": 0.00018184010698175506, |
|
"loss": 1.0338, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2138364779874214, |
|
"grad_norm": 0.4223347008228302, |
|
"learning_rate": 0.0001814516824383685, |
|
"loss": 1.384, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.21593291404612158, |
|
"grad_norm": 0.3532989025115967, |
|
"learning_rate": 0.0001810595718179593, |
|
"loss": 1.1763, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2180293501048218, |
|
"grad_norm": 0.31655967235565186, |
|
"learning_rate": 0.00018066379286540277, |
|
"loss": 1.4366, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.22012578616352202, |
|
"grad_norm": 0.580037534236908, |
|
"learning_rate": 0.00018026436349158378, |
|
"loss": 1.4038, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.3317371606826782, |
|
"learning_rate": 0.00017986130177258608, |
|
"loss": 1.2701, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.22431865828092243, |
|
"grad_norm": 0.34435999393463135, |
|
"learning_rate": 0.00017945462594887445, |
|
"loss": 1.2306, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.22641509433962265, |
|
"grad_norm": 0.30907875299453735, |
|
"learning_rate": 0.000179044354424469, |
|
"loss": 1.0864, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.22851153039832284, |
|
"grad_norm": 0.3259734511375427, |
|
"learning_rate": 0.00017863050576611265, |
|
"loss": 1.1871, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.23060796645702306, |
|
"grad_norm": 0.3698357939720154, |
|
"learning_rate": 0.00017821309870243054, |
|
"loss": 1.2336, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.23270440251572327, |
|
"grad_norm": 0.3339691162109375, |
|
"learning_rate": 0.00017779215212308265, |
|
"loss": 1.1696, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2348008385744235, |
|
"grad_norm": 0.333344429731369, |
|
"learning_rate": 0.0001773676850779089, |
|
"loss": 1.3809, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.23689727463312368, |
|
"grad_norm": 0.35278016328811646, |
|
"learning_rate": 0.00017693971677606714, |
|
"loss": 1.3156, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2389937106918239, |
|
"grad_norm": 0.3800717890262604, |
|
"learning_rate": 0.00017650826658516375, |
|
"loss": 1.1809, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.24109014675052412, |
|
"grad_norm": 0.353089302778244, |
|
"learning_rate": 0.00017607335403037712, |
|
"loss": 1.5121, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2431865828092243, |
|
"grad_norm": 0.3874945044517517, |
|
"learning_rate": 0.00017563499879357425, |
|
"loss": 1.5124, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.24528301886792453, |
|
"grad_norm": 0.3635624945163727, |
|
"learning_rate": 0.00017519322071241983, |
|
"loss": 1.1454, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.24737945492662475, |
|
"grad_norm": 0.39976125955581665, |
|
"learning_rate": 0.0001747480397794786, |
|
"loss": 1.4797, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.24947589098532494, |
|
"grad_norm": 0.3654632866382599, |
|
"learning_rate": 0.0001742994761413105, |
|
"loss": 1.2913, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.25157232704402516, |
|
"grad_norm": 0.4034808874130249, |
|
"learning_rate": 0.0001738475500975592, |
|
"loss": 1.4904, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.25157232704402516, |
|
"eval_loss": 1.3252530097961426, |
|
"eval_runtime": 13.8389, |
|
"eval_samples_per_second": 14.524, |
|
"eval_steps_per_second": 7.298, |
|
"step": 120 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 120, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.141788609845658e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|