|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.250272034820457, |
|
"eval_steps": 230, |
|
"global_step": 230, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001088139281828074, |
|
"grad_norm": 0.04828796908259392, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0762, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002176278563656148, |
|
"grad_norm": 0.06207922473549843, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0973, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.003264417845484222, |
|
"grad_norm": 0.06030188128352165, |
|
"learning_rate": 6e-05, |
|
"loss": 0.1003, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004352557127312296, |
|
"grad_norm": 0.05758730694651604, |
|
"learning_rate": 8e-05, |
|
"loss": 0.085, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00544069640914037, |
|
"grad_norm": 0.07111983001232147, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1174, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006528835690968444, |
|
"grad_norm": 0.09522929042577744, |
|
"learning_rate": 0.00012, |
|
"loss": 0.1404, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.007616974972796518, |
|
"grad_norm": 0.08600069582462311, |
|
"learning_rate": 0.00014, |
|
"loss": 0.1187, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.008705114254624592, |
|
"grad_norm": 0.0808354914188385, |
|
"learning_rate": 0.00016, |
|
"loss": 0.1235, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.009793253536452665, |
|
"grad_norm": 0.10051260888576508, |
|
"learning_rate": 0.00018, |
|
"loss": 0.135, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01088139281828074, |
|
"grad_norm": 0.08877796679735184, |
|
"learning_rate": 0.0002, |
|
"loss": 0.0803, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011969532100108813, |
|
"grad_norm": 0.10384293645620346, |
|
"learning_rate": 0.00019999940277008808, |
|
"loss": 0.1206, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.013057671381936888, |
|
"grad_norm": 0.1314290463924408, |
|
"learning_rate": 0.00019999761108748597, |
|
"loss": 0.1249, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.014145810663764961, |
|
"grad_norm": 0.12873488664627075, |
|
"learning_rate": 0.00019999462497359466, |
|
"loss": 0.0847, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.015233949945593036, |
|
"grad_norm": 0.12493155896663666, |
|
"learning_rate": 0.000199990444464082, |
|
"loss": 0.0858, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01632208922742111, |
|
"grad_norm": 0.1131022647023201, |
|
"learning_rate": 0.00019998506960888256, |
|
"loss": 0.0737, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.017410228509249184, |
|
"grad_norm": 0.1214890405535698, |
|
"learning_rate": 0.0001999785004721968, |
|
"loss": 0.1151, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.018498367791077257, |
|
"grad_norm": 0.12649253010749817, |
|
"learning_rate": 0.0001999707371324904, |
|
"loss": 0.0913, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01958650707290533, |
|
"grad_norm": 0.12470237910747528, |
|
"learning_rate": 0.00019996177968249334, |
|
"loss": 0.1043, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.020674646354733407, |
|
"grad_norm": 0.15574277937412262, |
|
"learning_rate": 0.00019995162822919883, |
|
"loss": 0.1092, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02176278563656148, |
|
"grad_norm": 0.1281992644071579, |
|
"learning_rate": 0.0001999402828938618, |
|
"loss": 0.0892, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.022850924918389554, |
|
"grad_norm": 0.16906976699829102, |
|
"learning_rate": 0.00019992774381199778, |
|
"loss": 0.1633, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.023939064200217627, |
|
"grad_norm": 0.12387573719024658, |
|
"learning_rate": 0.00019991401113338104, |
|
"loss": 0.0872, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.025027203482045703, |
|
"grad_norm": 0.12343913316726685, |
|
"learning_rate": 0.00019989908502204292, |
|
"loss": 0.1084, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.026115342763873776, |
|
"grad_norm": 0.11399204283952713, |
|
"learning_rate": 0.00019988296565626987, |
|
"loss": 0.0824, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02720348204570185, |
|
"grad_norm": 0.1402149796485901, |
|
"learning_rate": 0.00019986565322860115, |
|
"loss": 0.1289, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.028291621327529923, |
|
"grad_norm": 0.13711000978946686, |
|
"learning_rate": 0.00019984714794582683, |
|
"loss": 0.097, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.029379760609358, |
|
"grad_norm": 0.18544617295265198, |
|
"learning_rate": 0.000199827450028985, |
|
"loss": 0.1281, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.030467899891186073, |
|
"grad_norm": 0.15856046974658966, |
|
"learning_rate": 0.00019980655971335945, |
|
"loss": 0.1027, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.031556039173014146, |
|
"grad_norm": 0.12590578198432922, |
|
"learning_rate": 0.00019978447724847652, |
|
"loss": 0.0863, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03264417845484222, |
|
"grad_norm": 0.14784540235996246, |
|
"learning_rate": 0.00019976120289810247, |
|
"loss": 0.1125, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03373231773667029, |
|
"grad_norm": 0.19753308594226837, |
|
"learning_rate": 0.00019973673694024, |
|
"loss": 0.1389, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03482045701849837, |
|
"grad_norm": 0.17247086763381958, |
|
"learning_rate": 0.00019971107966712518, |
|
"loss": 0.0901, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.035908596300326445, |
|
"grad_norm": 0.23573236167430878, |
|
"learning_rate": 0.0001996842313852238, |
|
"loss": 0.1141, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.036996735582154515, |
|
"grad_norm": 0.1349520981311798, |
|
"learning_rate": 0.0001996561924152278, |
|
"loss": 0.077, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03808487486398259, |
|
"grad_norm": 0.2573637068271637, |
|
"learning_rate": 0.00019962696309205148, |
|
"loss": 0.0965, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03917301414581066, |
|
"grad_norm": 0.2688570022583008, |
|
"learning_rate": 0.0001995965437648273, |
|
"loss": 0.1754, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04026115342763874, |
|
"grad_norm": 0.2946501076221466, |
|
"learning_rate": 0.0001995649347969019, |
|
"loss": 0.1893, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.041349292709466814, |
|
"grad_norm": 0.2942318320274353, |
|
"learning_rate": 0.00019953213656583168, |
|
"loss": 0.1461, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.042437431991294884, |
|
"grad_norm": 0.3316934406757355, |
|
"learning_rate": 0.00019949814946337838, |
|
"loss": 0.1072, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.04352557127312296, |
|
"grad_norm": 0.3863315284252167, |
|
"learning_rate": 0.00019946297389550433, |
|
"loss": 0.1517, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04461371055495103, |
|
"grad_norm": 0.3102675974369049, |
|
"learning_rate": 0.00019942661028236745, |
|
"loss": 0.1421, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04570184983677911, |
|
"grad_norm": 0.3270488679409027, |
|
"learning_rate": 0.00019938905905831654, |
|
"loss": 0.085, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.046789989118607184, |
|
"grad_norm": 0.3799861669540405, |
|
"learning_rate": 0.0001993503206718859, |
|
"loss": 0.1333, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04787812840043525, |
|
"grad_norm": 0.45475858449935913, |
|
"learning_rate": 0.00019931039558578997, |
|
"loss": 0.1431, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04896626768226333, |
|
"grad_norm": 0.3995339870452881, |
|
"learning_rate": 0.00019926928427691786, |
|
"loss": 0.18, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05005440696409141, |
|
"grad_norm": 0.4547290503978729, |
|
"learning_rate": 0.00019922698723632767, |
|
"loss": 0.1578, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.051142546245919476, |
|
"grad_norm": 0.44180116057395935, |
|
"learning_rate": 0.0001991835049692405, |
|
"loss": 0.2327, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.05223068552774755, |
|
"grad_norm": 0.8059853911399841, |
|
"learning_rate": 0.0001991388379950346, |
|
"loss": 0.3089, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05331882480957562, |
|
"grad_norm": 0.6336686015129089, |
|
"learning_rate": 0.00019909298684723904, |
|
"loss": 0.2055, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0544069640914037, |
|
"grad_norm": 0.8279074430465698, |
|
"learning_rate": 0.00019904595207352737, |
|
"loss": 0.2626, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.055495103373231776, |
|
"grad_norm": 0.07426032423973083, |
|
"learning_rate": 0.000198997734235711, |
|
"loss": 0.0632, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.056583242655059846, |
|
"grad_norm": 0.09094005823135376, |
|
"learning_rate": 0.00019894833390973266, |
|
"loss": 0.0734, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.05767138193688792, |
|
"grad_norm": 0.09561450034379959, |
|
"learning_rate": 0.00019889775168565943, |
|
"loss": 0.0972, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.058759521218716, |
|
"grad_norm": 0.09174304455518723, |
|
"learning_rate": 0.00019884598816767563, |
|
"loss": 0.082, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05984766050054407, |
|
"grad_norm": 0.0911480113863945, |
|
"learning_rate": 0.0001987930439740757, |
|
"loss": 0.0712, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.060935799782372145, |
|
"grad_norm": 0.1090071052312851, |
|
"learning_rate": 0.0001987389197372567, |
|
"loss": 0.09, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.062023939064200215, |
|
"grad_norm": 0.09118974953889847, |
|
"learning_rate": 0.00019868361610371097, |
|
"loss": 0.0946, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.06311207834602829, |
|
"grad_norm": 0.09903446584939957, |
|
"learning_rate": 0.0001986271337340182, |
|
"loss": 0.1074, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.06420021762785637, |
|
"grad_norm": 0.08208192884922028, |
|
"learning_rate": 0.00019856947330283752, |
|
"loss": 0.0847, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.06528835690968444, |
|
"grad_norm": 0.08504832535982132, |
|
"learning_rate": 0.0001985106354988997, |
|
"loss": 0.0852, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06637649619151251, |
|
"grad_norm": 0.07276565581560135, |
|
"learning_rate": 0.0001984506210249986, |
|
"loss": 0.061, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.06746463547334058, |
|
"grad_norm": 0.08346979320049286, |
|
"learning_rate": 0.00019838943059798304, |
|
"loss": 0.0717, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.06855277475516866, |
|
"grad_norm": 0.09144837409257889, |
|
"learning_rate": 0.0001983270649487481, |
|
"loss": 0.0817, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06964091403699674, |
|
"grad_norm": 0.09562050551176071, |
|
"learning_rate": 0.00019826352482222638, |
|
"loss": 0.0809, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07072905331882481, |
|
"grad_norm": 0.10410594195127487, |
|
"learning_rate": 0.00019819881097737915, |
|
"loss": 0.0801, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07181719260065289, |
|
"grad_norm": 0.0836932361125946, |
|
"learning_rate": 0.00019813292418718732, |
|
"loss": 0.0775, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07290533188248095, |
|
"grad_norm": 0.09397463500499725, |
|
"learning_rate": 0.0001980658652386421, |
|
"loss": 0.065, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.07399347116430903, |
|
"grad_norm": 0.108340784907341, |
|
"learning_rate": 0.0001979976349327357, |
|
"loss": 0.0981, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0750816104461371, |
|
"grad_norm": 0.08717814087867737, |
|
"learning_rate": 0.00019792823408445174, |
|
"loss": 0.0798, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.07616974972796518, |
|
"grad_norm": 0.1279960721731186, |
|
"learning_rate": 0.00019785766352275542, |
|
"loss": 0.0993, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07725788900979326, |
|
"grad_norm": 0.13422514498233795, |
|
"learning_rate": 0.00019778592409058378, |
|
"loss": 0.1023, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.07834602829162132, |
|
"grad_norm": 0.10113417357206345, |
|
"learning_rate": 0.0001977130166448355, |
|
"loss": 0.0742, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0794341675734494, |
|
"grad_norm": 0.1026310920715332, |
|
"learning_rate": 0.00019763894205636072, |
|
"loss": 0.0988, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.08052230685527748, |
|
"grad_norm": 0.09779265522956848, |
|
"learning_rate": 0.00019756370120995066, |
|
"loss": 0.0738, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08161044613710555, |
|
"grad_norm": 0.14643464982509613, |
|
"learning_rate": 0.000197487295004327, |
|
"loss": 0.09, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08269858541893363, |
|
"grad_norm": 0.11976644396781921, |
|
"learning_rate": 0.00019740972435213115, |
|
"loss": 0.0904, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.08378672470076169, |
|
"grad_norm": 0.10904321819543839, |
|
"learning_rate": 0.00019733099017991341, |
|
"loss": 0.0766, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.08487486398258977, |
|
"grad_norm": 0.1651339828968048, |
|
"learning_rate": 0.0001972510934281218, |
|
"loss": 0.1186, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.08596300326441784, |
|
"grad_norm": 0.11781762540340424, |
|
"learning_rate": 0.00019717003505109095, |
|
"loss": 0.0833, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.08705114254624592, |
|
"grad_norm": 0.15122370421886444, |
|
"learning_rate": 0.00019708781601703065, |
|
"loss": 0.1166, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.088139281828074, |
|
"grad_norm": 0.1798838973045349, |
|
"learning_rate": 0.00019700443730801413, |
|
"loss": 0.1109, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.08922742110990206, |
|
"grad_norm": 0.18025629222393036, |
|
"learning_rate": 0.00019691989991996663, |
|
"loss": 0.1243, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09031556039173014, |
|
"grad_norm": 0.1731874644756317, |
|
"learning_rate": 0.00019683420486265327, |
|
"loss": 0.1189, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.09140369967355821, |
|
"grad_norm": 0.2220824509859085, |
|
"learning_rate": 0.0001967473531596671, |
|
"loss": 0.1451, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.09249183895538629, |
|
"grad_norm": 0.1664338856935501, |
|
"learning_rate": 0.00019665934584841682, |
|
"loss": 0.0897, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.09357997823721437, |
|
"grad_norm": 0.17619486153125763, |
|
"learning_rate": 0.00019657018398011434, |
|
"loss": 0.0935, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.09466811751904244, |
|
"grad_norm": 0.24987219274044037, |
|
"learning_rate": 0.00019647986861976246, |
|
"loss": 0.1955, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0957562568008705, |
|
"grad_norm": 0.21318784356117249, |
|
"learning_rate": 0.00019638840084614182, |
|
"loss": 0.1131, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.09684439608269858, |
|
"grad_norm": 0.3128167390823364, |
|
"learning_rate": 0.0001962957817517982, |
|
"loss": 0.2011, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.09793253536452666, |
|
"grad_norm": 0.2833835184574127, |
|
"learning_rate": 0.00019620201244302952, |
|
"loss": 0.1592, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09902067464635474, |
|
"grad_norm": 0.3286789357662201, |
|
"learning_rate": 0.00019610709403987246, |
|
"loss": 0.1602, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.10010881392818281, |
|
"grad_norm": 0.44117358326911926, |
|
"learning_rate": 0.00019601102767608923, |
|
"loss": 0.2323, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.10119695321001088, |
|
"grad_norm": 0.3795579671859741, |
|
"learning_rate": 0.00019591381449915397, |
|
"loss": 0.1867, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.10228509249183895, |
|
"grad_norm": 0.5780506730079651, |
|
"learning_rate": 0.000195815455670239, |
|
"loss": 0.1884, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.10337323177366703, |
|
"grad_norm": 0.5124024748802185, |
|
"learning_rate": 0.00019571595236420102, |
|
"loss": 0.221, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1044613710554951, |
|
"grad_norm": 0.4628782272338867, |
|
"learning_rate": 0.00019561530576956703, |
|
"loss": 0.208, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.10554951033732318, |
|
"grad_norm": 0.3904087543487549, |
|
"learning_rate": 0.0001955135170885202, |
|
"loss": 0.2029, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.10663764961915125, |
|
"grad_norm": 0.513387143611908, |
|
"learning_rate": 0.00019541058753688538, |
|
"loss": 0.2248, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.10772578890097932, |
|
"grad_norm": 0.6133597493171692, |
|
"learning_rate": 0.00019530651834411474, |
|
"loss": 0.3751, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1088139281828074, |
|
"grad_norm": 0.6515563726425171, |
|
"learning_rate": 0.00019520131075327298, |
|
"loss": 0.2096, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10990206746463548, |
|
"grad_norm": 0.07718291878700256, |
|
"learning_rate": 0.00019509496602102252, |
|
"loss": 0.0631, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.11099020674646355, |
|
"grad_norm": 0.07896394282579422, |
|
"learning_rate": 0.00019498748541760846, |
|
"loss": 0.095, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.11207834602829161, |
|
"grad_norm": 0.07955438643693924, |
|
"learning_rate": 0.00019487887022684336, |
|
"loss": 0.067, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.11316648531011969, |
|
"grad_norm": 0.08391376584768295, |
|
"learning_rate": 0.0001947691217460921, |
|
"loss": 0.0637, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.11425462459194777, |
|
"grad_norm": 0.07990088313817978, |
|
"learning_rate": 0.00019465824128625617, |
|
"loss": 0.0569, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.11534276387377584, |
|
"grad_norm": 0.09000790864229202, |
|
"learning_rate": 0.00019454623017175812, |
|
"loss": 0.0639, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.11643090315560392, |
|
"grad_norm": 0.0831453874707222, |
|
"learning_rate": 0.0001944330897405257, |
|
"loss": 0.0766, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.117519042437432, |
|
"grad_norm": 0.08180610835552216, |
|
"learning_rate": 0.00019431882134397598, |
|
"loss": 0.0806, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.11860718171926006, |
|
"grad_norm": 0.07601706683635712, |
|
"learning_rate": 0.0001942034263469989, |
|
"loss": 0.0727, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.11969532100108814, |
|
"grad_norm": 0.08873546868562698, |
|
"learning_rate": 0.00019408690612794148, |
|
"loss": 0.0878, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12078346028291621, |
|
"grad_norm": 0.10206414759159088, |
|
"learning_rate": 0.00019396926207859084, |
|
"loss": 0.1171, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.12187159956474429, |
|
"grad_norm": 0.07465587556362152, |
|
"learning_rate": 0.00019385049560415794, |
|
"loss": 0.0603, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.12295973884657237, |
|
"grad_norm": 0.09952360391616821, |
|
"learning_rate": 0.00019373060812326052, |
|
"loss": 0.0923, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.12404787812840043, |
|
"grad_norm": 0.09894778579473495, |
|
"learning_rate": 0.00019360960106790643, |
|
"loss": 0.0795, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1251360174102285, |
|
"grad_norm": 0.09721358120441437, |
|
"learning_rate": 0.00019348747588347637, |
|
"loss": 0.1103, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.12622415669205658, |
|
"grad_norm": 0.10310002416372299, |
|
"learning_rate": 0.00019336423402870653, |
|
"loss": 0.1037, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.12731229597388466, |
|
"grad_norm": 0.10904382914304733, |
|
"learning_rate": 0.0001932398769756714, |
|
"loss": 0.1063, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.12840043525571274, |
|
"grad_norm": 0.11545544862747192, |
|
"learning_rate": 0.00019311440620976597, |
|
"loss": 0.096, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.1294885745375408, |
|
"grad_norm": 0.08674886077642441, |
|
"learning_rate": 0.00019298782322968815, |
|
"loss": 0.0779, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.1305767138193689, |
|
"grad_norm": 0.09437372535467148, |
|
"learning_rate": 0.0001928601295474208, |
|
"loss": 0.0975, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13166485310119697, |
|
"grad_norm": 0.1258208006620407, |
|
"learning_rate": 0.00019273132668821364, |
|
"loss": 0.1277, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.13275299238302501, |
|
"grad_norm": 0.09919868409633636, |
|
"learning_rate": 0.00019260141619056507, |
|
"loss": 0.0993, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.1338411316648531, |
|
"grad_norm": 0.12028370052576065, |
|
"learning_rate": 0.0001924703996062038, |
|
"loss": 0.0751, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.13492927094668117, |
|
"grad_norm": 0.10702817142009735, |
|
"learning_rate": 0.00019233827850007027, |
|
"loss": 0.0949, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.13601741022850924, |
|
"grad_norm": 0.10939855128526688, |
|
"learning_rate": 0.000192205054450298, |
|
"loss": 0.0977, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.13710554951033732, |
|
"grad_norm": 0.11803679168224335, |
|
"learning_rate": 0.00019207072904819486, |
|
"loss": 0.0877, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1381936887921654, |
|
"grad_norm": 0.1382649838924408, |
|
"learning_rate": 0.00019193530389822363, |
|
"loss": 0.1017, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.13928182807399347, |
|
"grad_norm": 0.1433139145374298, |
|
"learning_rate": 0.00019179878061798347, |
|
"loss": 0.0748, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.14036996735582155, |
|
"grad_norm": 0.14679527282714844, |
|
"learning_rate": 0.00019166116083819002, |
|
"loss": 0.1164, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.14145810663764963, |
|
"grad_norm": 0.13680118322372437, |
|
"learning_rate": 0.0001915224462026563, |
|
"loss": 0.1149, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1425462459194777, |
|
"grad_norm": 0.16263848543167114, |
|
"learning_rate": 0.00019138263836827288, |
|
"loss": 0.1192, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.14363438520130578, |
|
"grad_norm": 0.16534928977489471, |
|
"learning_rate": 0.00019124173900498818, |
|
"loss": 0.1138, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.14472252448313383, |
|
"grad_norm": 0.21276706457138062, |
|
"learning_rate": 0.0001910997497957885, |
|
"loss": 0.1461, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.1458106637649619, |
|
"grad_norm": 0.2375650703907013, |
|
"learning_rate": 0.0001909566724366779, |
|
"loss": 0.2031, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.14689880304678998, |
|
"grad_norm": 0.20974405109882355, |
|
"learning_rate": 0.00019081250863665794, |
|
"loss": 0.1285, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.14798694232861806, |
|
"grad_norm": 0.308624267578125, |
|
"learning_rate": 0.00019066726011770726, |
|
"loss": 0.1717, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.14907508161044614, |
|
"grad_norm": 0.21192695200443268, |
|
"learning_rate": 0.0001905209286147611, |
|
"loss": 0.1064, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1501632208922742, |
|
"grad_norm": 0.20596542954444885, |
|
"learning_rate": 0.0001903735158756905, |
|
"loss": 0.0975, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.1512513601741023, |
|
"grad_norm": 0.21547934412956238, |
|
"learning_rate": 0.00019022502366128135, |
|
"loss": 0.1255, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.15233949945593037, |
|
"grad_norm": 0.276815801858902, |
|
"learning_rate": 0.00019007545374521355, |
|
"loss": 0.0868, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15342763873775844, |
|
"grad_norm": 0.37251031398773193, |
|
"learning_rate": 0.00018992480791403958, |
|
"loss": 0.1078, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.15451577801958652, |
|
"grad_norm": 0.328265517950058, |
|
"learning_rate": 0.0001897730879671634, |
|
"loss": 0.1842, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.15560391730141457, |
|
"grad_norm": 0.4400005340576172, |
|
"learning_rate": 0.00018962029571681886, |
|
"loss": 0.1857, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.15669205658324264, |
|
"grad_norm": 0.28378888964653015, |
|
"learning_rate": 0.00018946643298804793, |
|
"loss": 0.0955, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.15778019586507072, |
|
"grad_norm": 0.609008252620697, |
|
"learning_rate": 0.00018931150161867916, |
|
"loss": 0.3827, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1588683351468988, |
|
"grad_norm": 0.3973180055618286, |
|
"learning_rate": 0.0001891555034593055, |
|
"loss": 0.1844, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.15995647442872687, |
|
"grad_norm": 0.36245423555374146, |
|
"learning_rate": 0.00018899844037326225, |
|
"loss": 0.2005, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.16104461371055495, |
|
"grad_norm": 0.5730637311935425, |
|
"learning_rate": 0.0001888403142366049, |
|
"loss": 0.2742, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.16213275299238303, |
|
"grad_norm": 0.5383718013763428, |
|
"learning_rate": 0.00018868112693808665, |
|
"loss": 0.2249, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.1632208922742111, |
|
"grad_norm": 0.9835379123687744, |
|
"learning_rate": 0.00018852088037913577, |
|
"loss": 0.344, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16430903155603918, |
|
"grad_norm": 0.09142426401376724, |
|
"learning_rate": 0.00018835957647383303, |
|
"loss": 0.0876, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.16539717083786726, |
|
"grad_norm": 0.09199874103069305, |
|
"learning_rate": 0.00018819721714888877, |
|
"loss": 0.0798, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.16648531011969533, |
|
"grad_norm": 0.08299195021390915, |
|
"learning_rate": 0.00018803380434362, |
|
"loss": 0.0746, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.16757344940152338, |
|
"grad_norm": 0.10273315012454987, |
|
"learning_rate": 0.00018786934000992688, |
|
"loss": 0.101, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.16866158868335146, |
|
"grad_norm": 0.08524151146411896, |
|
"learning_rate": 0.00018770382611226987, |
|
"loss": 0.0684, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.16974972796517954, |
|
"grad_norm": 0.10515481233596802, |
|
"learning_rate": 0.000187537264627646, |
|
"loss": 0.0809, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.1708378672470076, |
|
"grad_norm": 0.09009183198213577, |
|
"learning_rate": 0.00018736965754556528, |
|
"loss": 0.0955, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1719260065288357, |
|
"grad_norm": 0.10571747273206711, |
|
"learning_rate": 0.00018720100686802694, |
|
"loss": 0.0847, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.17301414581066377, |
|
"grad_norm": 0.08275768160820007, |
|
"learning_rate": 0.00018703131460949554, |
|
"loss": 0.0799, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.17410228509249184, |
|
"grad_norm": 0.08440782129764557, |
|
"learning_rate": 0.00018686058279687698, |
|
"loss": 0.0672, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17519042437431992, |
|
"grad_norm": 0.10261505097150803, |
|
"learning_rate": 0.00018668881346949417, |
|
"loss": 0.1004, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.176278563656148, |
|
"grad_norm": 0.08730655908584595, |
|
"learning_rate": 0.00018651600867906272, |
|
"loss": 0.0711, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.17736670293797607, |
|
"grad_norm": 0.10591359436511993, |
|
"learning_rate": 0.00018634217048966637, |
|
"loss": 0.0919, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.17845484221980412, |
|
"grad_norm": 0.09251653403043747, |
|
"learning_rate": 0.0001861673009777325, |
|
"loss": 0.078, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1795429815016322, |
|
"grad_norm": 0.106822170317173, |
|
"learning_rate": 0.00018599140223200716, |
|
"loss": 0.0895, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.18063112078346028, |
|
"grad_norm": 0.11222364753484726, |
|
"learning_rate": 0.0001858144763535302, |
|
"loss": 0.0918, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.18171926006528835, |
|
"grad_norm": 0.1363314390182495, |
|
"learning_rate": 0.00018563652545561013, |
|
"loss": 0.1126, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.18280739934711643, |
|
"grad_norm": 0.09316174685955048, |
|
"learning_rate": 0.000185457551663799, |
|
"loss": 0.0799, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1838955386289445, |
|
"grad_norm": 0.13098089396953583, |
|
"learning_rate": 0.00018527755711586678, |
|
"loss": 0.0994, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.18498367791077258, |
|
"grad_norm": 0.11433115601539612, |
|
"learning_rate": 0.00018509654396177609, |
|
"loss": 0.1072, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18607181719260066, |
|
"grad_norm": 0.11261814087629318, |
|
"learning_rate": 0.00018491451436365627, |
|
"loss": 0.1011, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.18715995647442873, |
|
"grad_norm": 0.1038559302687645, |
|
"learning_rate": 0.00018473147049577774, |
|
"loss": 0.0746, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1882480957562568, |
|
"grad_norm": 0.11395396292209625, |
|
"learning_rate": 0.00018454741454452603, |
|
"loss": 0.0792, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1893362350380849, |
|
"grad_norm": 0.13332821428775787, |
|
"learning_rate": 0.00018436234870837547, |
|
"loss": 0.1041, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.19042437431991294, |
|
"grad_norm": 0.12438289821147919, |
|
"learning_rate": 0.00018417627519786315, |
|
"loss": 0.1066, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.191512513601741, |
|
"grad_norm": 0.1353287398815155, |
|
"learning_rate": 0.00018398919623556238, |
|
"loss": 0.1193, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.1926006528835691, |
|
"grad_norm": 0.13928581774234772, |
|
"learning_rate": 0.0001838011140560562, |
|
"loss": 0.1228, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.19368879216539717, |
|
"grad_norm": 0.1474994421005249, |
|
"learning_rate": 0.00018361203090591071, |
|
"loss": 0.0812, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.19477693144722524, |
|
"grad_norm": 0.1910678595304489, |
|
"learning_rate": 0.00018342194904364813, |
|
"loss": 0.1435, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.19586507072905332, |
|
"grad_norm": 0.16526034474372864, |
|
"learning_rate": 0.00018323087073971993, |
|
"loss": 0.1136, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1969532100108814, |
|
"grad_norm": 0.1933068335056305, |
|
"learning_rate": 0.00018303879827647975, |
|
"loss": 0.1498, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.19804134929270947, |
|
"grad_norm": 0.1647845059633255, |
|
"learning_rate": 0.00018284573394815597, |
|
"loss": 0.0764, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.19912948857453755, |
|
"grad_norm": 0.19414739310741425, |
|
"learning_rate": 0.00018265168006082437, |
|
"loss": 0.1142, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.20021762785636563, |
|
"grad_norm": 0.1872360110282898, |
|
"learning_rate": 0.00018245663893238075, |
|
"loss": 0.1169, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.20130576713819368, |
|
"grad_norm": 0.19919492304325104, |
|
"learning_rate": 0.00018226061289251298, |
|
"loss": 0.0854, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.20239390642002175, |
|
"grad_norm": 0.2233375757932663, |
|
"learning_rate": 0.00018206360428267332, |
|
"loss": 0.1271, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.20348204570184983, |
|
"grad_norm": 0.22116345167160034, |
|
"learning_rate": 0.00018186561545605054, |
|
"loss": 0.1402, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.2045701849836779, |
|
"grad_norm": 0.253864049911499, |
|
"learning_rate": 0.0001816666487775416, |
|
"loss": 0.1431, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.20565832426550598, |
|
"grad_norm": 0.2945636212825775, |
|
"learning_rate": 0.00018146670662372354, |
|
"loss": 0.1284, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.20674646354733406, |
|
"grad_norm": 0.24834126234054565, |
|
"learning_rate": 0.00018126579138282503, |
|
"loss": 0.098, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20783460282916214, |
|
"grad_norm": 0.26815730333328247, |
|
"learning_rate": 0.00018106390545469795, |
|
"loss": 0.0877, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.2089227421109902, |
|
"grad_norm": 0.375293493270874, |
|
"learning_rate": 0.00018086105125078857, |
|
"loss": 0.1985, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.2100108813928183, |
|
"grad_norm": 0.4025906026363373, |
|
"learning_rate": 0.00018065723119410884, |
|
"loss": 0.2082, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.21109902067464636, |
|
"grad_norm": 0.3551553785800934, |
|
"learning_rate": 0.0001804524477192075, |
|
"loss": 0.2305, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.21218715995647444, |
|
"grad_norm": 0.594780445098877, |
|
"learning_rate": 0.00018024670327214084, |
|
"loss": 0.2713, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.2132752992383025, |
|
"grad_norm": 0.3940027356147766, |
|
"learning_rate": 0.0001800400003104436, |
|
"loss": 0.1623, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.21436343852013057, |
|
"grad_norm": 0.51041579246521, |
|
"learning_rate": 0.00017983234130309968, |
|
"loss": 0.2236, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.21545157780195864, |
|
"grad_norm": 0.6203753352165222, |
|
"learning_rate": 0.00017962372873051252, |
|
"loss": 0.2654, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.21653971708378672, |
|
"grad_norm": 0.7527713179588318, |
|
"learning_rate": 0.00017941416508447536, |
|
"loss": 0.2088, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.2176278563656148, |
|
"grad_norm": 1.1047406196594238, |
|
"learning_rate": 0.00017920365286814183, |
|
"loss": 0.3097, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21871599564744287, |
|
"grad_norm": 0.0492124930024147, |
|
"learning_rate": 0.0001789921945959958, |
|
"loss": 0.0344, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.21980413492927095, |
|
"grad_norm": 0.07087790220975876, |
|
"learning_rate": 0.00017877979279382135, |
|
"loss": 0.0582, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.22089227421109903, |
|
"grad_norm": 0.07622935622930527, |
|
"learning_rate": 0.00017856644999867264, |
|
"loss": 0.062, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.2219804134929271, |
|
"grad_norm": 0.08792652189731598, |
|
"learning_rate": 0.00017835216875884368, |
|
"loss": 0.0511, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.22306855277475518, |
|
"grad_norm": 0.08028998970985413, |
|
"learning_rate": 0.0001781369516338378, |
|
"loss": 0.0665, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.22415669205658323, |
|
"grad_norm": 0.08997032046318054, |
|
"learning_rate": 0.0001779208011943371, |
|
"loss": 0.069, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.2252448313384113, |
|
"grad_norm": 0.08684886246919632, |
|
"learning_rate": 0.00017770372002217172, |
|
"loss": 0.077, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.22633297062023938, |
|
"grad_norm": 0.0965440422296524, |
|
"learning_rate": 0.000177485710710289, |
|
"loss": 0.0782, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.22742110990206746, |
|
"grad_norm": 0.09060367196798325, |
|
"learning_rate": 0.00017726677586272263, |
|
"loss": 0.066, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.22850924918389554, |
|
"grad_norm": 0.0900409147143364, |
|
"learning_rate": 0.00017704691809456143, |
|
"loss": 0.0707, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2295973884657236, |
|
"grad_norm": 0.10733999311923981, |
|
"learning_rate": 0.00017682614003191807, |
|
"loss": 0.0916, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.2306855277475517, |
|
"grad_norm": 0.09372083842754364, |
|
"learning_rate": 0.0001766044443118978, |
|
"loss": 0.0872, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.23177366702937977, |
|
"grad_norm": 0.10344577580690384, |
|
"learning_rate": 0.00017638183358256696, |
|
"loss": 0.0903, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.23286180631120784, |
|
"grad_norm": 0.1084800437092781, |
|
"learning_rate": 0.0001761583105029213, |
|
"loss": 0.0926, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.23394994559303592, |
|
"grad_norm": 0.08565113693475723, |
|
"learning_rate": 0.00017593387774285412, |
|
"loss": 0.0758, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.235038084874864, |
|
"grad_norm": 0.11589045077562332, |
|
"learning_rate": 0.0001757085379831246, |
|
"loss": 0.0925, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.23612622415669204, |
|
"grad_norm": 0.12087468057870865, |
|
"learning_rate": 0.00017548229391532572, |
|
"loss": 0.1012, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.23721436343852012, |
|
"grad_norm": 0.1125798150897026, |
|
"learning_rate": 0.00017525514824185185, |
|
"loss": 0.109, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.2383025027203482, |
|
"grad_norm": 0.12492644041776657, |
|
"learning_rate": 0.00017502710367586687, |
|
"loss": 0.1048, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.23939064200217627, |
|
"grad_norm": 0.09837982058525085, |
|
"learning_rate": 0.00017479816294127152, |
|
"loss": 0.0803, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24047878128400435, |
|
"grad_norm": 0.099558524787426, |
|
"learning_rate": 0.00017456832877267084, |
|
"loss": 0.0552, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.24156692056583243, |
|
"grad_norm": 0.095551498234272, |
|
"learning_rate": 0.00017433760391534167, |
|
"loss": 0.0905, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.2426550598476605, |
|
"grad_norm": 0.11664412170648575, |
|
"learning_rate": 0.0001741059911251997, |
|
"loss": 0.1005, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.24374319912948858, |
|
"grad_norm": 0.1248706802725792, |
|
"learning_rate": 0.00017387349316876666, |
|
"loss": 0.1135, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.24483133841131666, |
|
"grad_norm": 0.13133874535560608, |
|
"learning_rate": 0.0001736401128231373, |
|
"loss": 0.121, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.24591947769314473, |
|
"grad_norm": 0.12476039677858353, |
|
"learning_rate": 0.00017340585287594604, |
|
"loss": 0.1025, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.2470076169749728, |
|
"grad_norm": 0.1645650863647461, |
|
"learning_rate": 0.0001731707161253338, |
|
"loss": 0.1313, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.24809575625680086, |
|
"grad_norm": 0.1172671690583229, |
|
"learning_rate": 0.00017293470537991463, |
|
"loss": 0.0801, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.24918389553862894, |
|
"grad_norm": 0.17031441628932953, |
|
"learning_rate": 0.00017269782345874203, |
|
"loss": 0.154, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.250272034820457, |
|
"grad_norm": 0.16571593284606934, |
|
"learning_rate": 0.00017246007319127545, |
|
"loss": 0.1209, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.250272034820457, |
|
"eval_loss": 0.12318640202283859, |
|
"eval_runtime": 24.4163, |
|
"eval_samples_per_second": 15.85, |
|
"eval_steps_per_second": 7.945, |
|
"step": 230 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 919, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 230, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.305676486121882e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|