|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9900990099009901, |
|
"eval_steps": 500, |
|
"global_step": 800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 73.68818664550781, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 8.0388, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 71.36270904541016, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 8.0003, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 73.16751861572266, |
|
"learning_rate": 3e-06, |
|
"loss": 7.9032, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 74.18943786621094, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 7.921, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 70.63272857666016, |
|
"learning_rate": 5e-06, |
|
"loss": 8.032, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 64.6897964477539, |
|
"learning_rate": 6e-06, |
|
"loss": 7.68, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 65.79997253417969, |
|
"learning_rate": 7e-06, |
|
"loss": 7.5291, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 63.4569091796875, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 5.6132, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 63.28990173339844, |
|
"learning_rate": 9e-06, |
|
"loss": 5.0102, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 46.30258560180664, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2227, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 46.01011657714844, |
|
"learning_rate": 9.99999848074862e-06, |
|
"loss": 1.6679, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 42.595951080322266, |
|
"learning_rate": 9.9999939229954e-06, |
|
"loss": 1.5493, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.979974746704102, |
|
"learning_rate": 9.999986326743111e-06, |
|
"loss": 0.892, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 13.096778869628906, |
|
"learning_rate": 9.99997569199637e-06, |
|
"loss": 0.9386, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 35.61207962036133, |
|
"learning_rate": 9.99996201876164e-06, |
|
"loss": 1.3573, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 18.184959411621094, |
|
"learning_rate": 9.999945307047228e-06, |
|
"loss": 0.9778, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.461019992828369, |
|
"learning_rate": 9.99992555686329e-06, |
|
"loss": 1.0665, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 4.743849277496338, |
|
"learning_rate": 9.99990276822183e-06, |
|
"loss": 0.5975, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 5.654608726501465, |
|
"learning_rate": 9.999876941136697e-06, |
|
"loss": 0.856, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 5.488308906555176, |
|
"learning_rate": 9.999848075623584e-06, |
|
"loss": 0.7874, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.833119869232178, |
|
"learning_rate": 9.999816171700034e-06, |
|
"loss": 0.8777, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.678900718688965, |
|
"learning_rate": 9.999781229385433e-06, |
|
"loss": 0.5888, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.357454776763916, |
|
"learning_rate": 9.99974324870102e-06, |
|
"loss": 0.9263, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.7684149742126465, |
|
"learning_rate": 9.99970222966987e-06, |
|
"loss": 0.7734, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.77016019821167, |
|
"learning_rate": 9.999658172316915e-06, |
|
"loss": 0.7735, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.0211334228515625, |
|
"learning_rate": 9.999611076668926e-06, |
|
"loss": 0.5645, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 7.3770222663879395, |
|
"learning_rate": 9.999560942754525e-06, |
|
"loss": 1.0185, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.433741569519043, |
|
"learning_rate": 9.999507770604177e-06, |
|
"loss": 0.3547, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.0549492835998535, |
|
"learning_rate": 9.999451560250196e-06, |
|
"loss": 0.4961, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 7.8142619132995605, |
|
"learning_rate": 9.999392311726738e-06, |
|
"loss": 0.4398, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.785826683044434, |
|
"learning_rate": 9.999330025069812e-06, |
|
"loss": 0.6431, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.010104656219482, |
|
"learning_rate": 9.999264700317268e-06, |
|
"loss": 0.6129, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 8.289867401123047, |
|
"learning_rate": 9.999196337508804e-06, |
|
"loss": 0.3771, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.59083890914917, |
|
"learning_rate": 9.999124936685965e-06, |
|
"loss": 0.3964, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.018394947052002, |
|
"learning_rate": 9.99905049789214e-06, |
|
"loss": 0.4801, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 10.878011703491211, |
|
"learning_rate": 9.998973021172564e-06, |
|
"loss": 0.2996, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 31.676380157470703, |
|
"learning_rate": 9.998892506574325e-06, |
|
"loss": 0.5261, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 16.133407592773438, |
|
"learning_rate": 9.998808954146347e-06, |
|
"loss": 0.3843, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 8.785749435424805, |
|
"learning_rate": 9.998722363939407e-06, |
|
"loss": 0.2476, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.326422214508057, |
|
"learning_rate": 9.998632736006124e-06, |
|
"loss": 0.2334, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.171711444854736, |
|
"learning_rate": 9.998540070400966e-06, |
|
"loss": 0.1671, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.5893757343292236, |
|
"learning_rate": 9.998444367180247e-06, |
|
"loss": 0.1732, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.918233633041382, |
|
"learning_rate": 9.998345626402124e-06, |
|
"loss": 0.1127, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.620290517807007, |
|
"learning_rate": 9.998243848126604e-06, |
|
"loss": 0.1337, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.31186056137085, |
|
"learning_rate": 9.998139032415534e-06, |
|
"loss": 0.144, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.399256706237793, |
|
"learning_rate": 9.998031179332618e-06, |
|
"loss": 0.0878, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.658913016319275, |
|
"learning_rate": 9.997920288943388e-06, |
|
"loss": 0.0651, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.0306262969970703, |
|
"learning_rate": 9.99780636131524e-06, |
|
"loss": 0.1051, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.099931478500366, |
|
"learning_rate": 9.997689396517408e-06, |
|
"loss": 0.0937, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.8879055976867676, |
|
"learning_rate": 9.997569394620965e-06, |
|
"loss": 0.0701, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.2706345319747925, |
|
"learning_rate": 9.997446355698843e-06, |
|
"loss": 0.0559, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.7181872129440308, |
|
"learning_rate": 9.99732027982581e-06, |
|
"loss": 0.06, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.4794338941574097, |
|
"learning_rate": 9.997191167078479e-06, |
|
"loss": 0.0715, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.40012788772583, |
|
"learning_rate": 9.99705901753532e-06, |
|
"loss": 0.0608, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.034327983856201, |
|
"learning_rate": 9.996923831276632e-06, |
|
"loss": 0.0603, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.1301584243774414, |
|
"learning_rate": 9.996785608384573e-06, |
|
"loss": 0.0762, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.8218867778778076, |
|
"learning_rate": 9.996644348943141e-06, |
|
"loss": 0.0956, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.7874306440353394, |
|
"learning_rate": 9.996500053038176e-06, |
|
"loss": 0.0693, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.582387089729309, |
|
"learning_rate": 9.99635272075737e-06, |
|
"loss": 0.0679, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.9699870347976685, |
|
"learning_rate": 9.996202352190256e-06, |
|
"loss": 0.0565, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.0471878051757812, |
|
"learning_rate": 9.996048947428212e-06, |
|
"loss": 0.0725, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.3680695295333862, |
|
"learning_rate": 9.995892506564461e-06, |
|
"loss": 0.0586, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.124834060668945, |
|
"learning_rate": 9.995733029694077e-06, |
|
"loss": 0.0724, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.7247884273529053, |
|
"learning_rate": 9.995570516913971e-06, |
|
"loss": 0.095, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.306038856506348, |
|
"learning_rate": 9.995404968322902e-06, |
|
"loss": 0.0783, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.6850618124008179, |
|
"learning_rate": 9.995236384021474e-06, |
|
"loss": 0.0602, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 7.234889984130859, |
|
"learning_rate": 9.995064764112135e-06, |
|
"loss": 0.0852, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.0967376232147217, |
|
"learning_rate": 9.994890108699182e-06, |
|
"loss": 0.0905, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.0317909717559814, |
|
"learning_rate": 9.99471241788875e-06, |
|
"loss": 0.0728, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.1822967529296875, |
|
"learning_rate": 9.994531691788822e-06, |
|
"loss": 0.0919, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.137779474258423, |
|
"learning_rate": 9.994347930509225e-06, |
|
"loss": 0.0496, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.5484669208526611, |
|
"learning_rate": 9.994161134161635e-06, |
|
"loss": 0.0696, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.332581996917725, |
|
"learning_rate": 9.993971302859561e-06, |
|
"loss": 0.0769, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.838725209236145, |
|
"learning_rate": 9.99377843671837e-06, |
|
"loss": 0.1011, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.3594350814819336, |
|
"learning_rate": 9.993582535855265e-06, |
|
"loss": 0.075, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.8417227268218994, |
|
"learning_rate": 9.993383600389294e-06, |
|
"loss": 0.0529, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.550814151763916, |
|
"learning_rate": 9.993181630441352e-06, |
|
"loss": 0.1104, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.729711651802063, |
|
"learning_rate": 9.992976626134171e-06, |
|
"loss": 0.0601, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.1105282306671143, |
|
"learning_rate": 9.99276858759234e-06, |
|
"loss": 0.0423, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.180546522140503, |
|
"learning_rate": 9.992557514942278e-06, |
|
"loss": 0.0691, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.0761630535125732, |
|
"learning_rate": 9.992343408312258e-06, |
|
"loss": 0.0503, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.8641157150268555, |
|
"learning_rate": 9.992126267832392e-06, |
|
"loss": 0.0425, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 10.724833488464355, |
|
"learning_rate": 9.991906093634633e-06, |
|
"loss": 0.0603, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.1756705045700073, |
|
"learning_rate": 9.991682885852784e-06, |
|
"loss": 0.0392, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.1171228885650635, |
|
"learning_rate": 9.991456644622489e-06, |
|
"loss": 0.0454, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.6004431247711182, |
|
"learning_rate": 9.991227370081233e-06, |
|
"loss": 0.0496, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 3.264841318130493, |
|
"learning_rate": 9.990995062368346e-06, |
|
"loss": 0.0339, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.4765560626983643, |
|
"learning_rate": 9.990759721625005e-06, |
|
"loss": 0.0698, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 6.907183647155762, |
|
"learning_rate": 9.990521347994224e-06, |
|
"loss": 0.1026, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.384580135345459, |
|
"learning_rate": 9.990279941620861e-06, |
|
"loss": 0.0664, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.2060827016830444, |
|
"learning_rate": 9.990035502651624e-06, |
|
"loss": 0.0324, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.0956050157546997, |
|
"learning_rate": 9.989788031235054e-06, |
|
"loss": 0.0593, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.1994054317474365, |
|
"learning_rate": 9.98953752752154e-06, |
|
"loss": 0.0484, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.490142583847046, |
|
"learning_rate": 9.989283991663316e-06, |
|
"loss": 0.0561, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.274105072021484, |
|
"learning_rate": 9.989027423814454e-06, |
|
"loss": 0.1123, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.0847527980804443, |
|
"learning_rate": 9.98876782413087e-06, |
|
"loss": 0.0606, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.8111186027526855, |
|
"learning_rate": 9.988505192770324e-06, |
|
"loss": 0.0681, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.2713731527328491, |
|
"learning_rate": 9.988239529892416e-06, |
|
"loss": 0.0516, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.189513087272644, |
|
"learning_rate": 9.987970835658592e-06, |
|
"loss": 0.0768, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.9951283931732178, |
|
"learning_rate": 9.987699110232134e-06, |
|
"loss": 0.0416, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.4628676176071167, |
|
"learning_rate": 9.987424353778172e-06, |
|
"loss": 0.0751, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.41041100025177, |
|
"learning_rate": 9.987146566463677e-06, |
|
"loss": 0.0681, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.9383851289749146, |
|
"learning_rate": 9.986865748457457e-06, |
|
"loss": 0.1003, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.1434725522994995, |
|
"learning_rate": 9.986581899930167e-06, |
|
"loss": 0.049, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.613456964492798, |
|
"learning_rate": 9.986295021054302e-06, |
|
"loss": 0.0519, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.5484371185302734, |
|
"learning_rate": 9.986005112004198e-06, |
|
"loss": 0.0571, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.9423480033874512, |
|
"learning_rate": 9.985712172956035e-06, |
|
"loss": 0.039, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.0560059547424316, |
|
"learning_rate": 9.985416204087828e-06, |
|
"loss": 0.0904, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.695100784301758, |
|
"learning_rate": 9.985117205579442e-06, |
|
"loss": 0.1549, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.4656105041503906, |
|
"learning_rate": 9.984815177612574e-06, |
|
"loss": 0.079, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.974007487297058, |
|
"learning_rate": 9.984510120370771e-06, |
|
"loss": 0.0585, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.3341798782348633, |
|
"learning_rate": 9.984202034039414e-06, |
|
"loss": 0.0585, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.7250359058380127, |
|
"learning_rate": 9.983890918805727e-06, |
|
"loss": 0.0651, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.140810489654541, |
|
"learning_rate": 9.983576774858776e-06, |
|
"loss": 0.0748, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 6.119039058685303, |
|
"learning_rate": 9.983259602389469e-06, |
|
"loss": 0.0818, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.3782867193222046, |
|
"learning_rate": 9.982939401590545e-06, |
|
"loss": 0.0563, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.240810513496399, |
|
"learning_rate": 9.982616172656594e-06, |
|
"loss": 0.0555, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.0260303020477295, |
|
"learning_rate": 9.982289915784044e-06, |
|
"loss": 0.0554, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.1243703365325928, |
|
"learning_rate": 9.981960631171162e-06, |
|
"loss": 0.0584, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.7996938228607178, |
|
"learning_rate": 9.98162831901805e-06, |
|
"loss": 0.0854, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.3062973022460938, |
|
"learning_rate": 9.981292979526656e-06, |
|
"loss": 0.0821, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.2655537128448486, |
|
"learning_rate": 9.980954612900768e-06, |
|
"loss": 0.0643, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.0950798988342285, |
|
"learning_rate": 9.980613219346012e-06, |
|
"loss": 0.0994, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.522292971611023, |
|
"learning_rate": 9.980268799069848e-06, |
|
"loss": 0.0369, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.5451443195343018, |
|
"learning_rate": 9.979921352281585e-06, |
|
"loss": 0.0286, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.8015575408935547, |
|
"learning_rate": 9.979570879192365e-06, |
|
"loss": 0.0736, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.2620017528533936, |
|
"learning_rate": 9.979217380015173e-06, |
|
"loss": 0.0662, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.5585700273513794, |
|
"learning_rate": 9.978860854964827e-06, |
|
"loss": 0.0248, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.1841486692428589, |
|
"learning_rate": 9.978501304257991e-06, |
|
"loss": 0.0386, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.1351743936538696, |
|
"learning_rate": 9.97813872811316e-06, |
|
"loss": 0.0437, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.4472172260284424, |
|
"learning_rate": 9.977773126750677e-06, |
|
"loss": 0.074, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.9335076808929443, |
|
"learning_rate": 9.977404500392711e-06, |
|
"loss": 0.034, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.9846038818359375, |
|
"learning_rate": 9.977032849263284e-06, |
|
"loss": 0.0488, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.003464698791504, |
|
"learning_rate": 9.976658173588244e-06, |
|
"loss": 0.0199, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.1298803091049194, |
|
"learning_rate": 9.976280473595284e-06, |
|
"loss": 0.0507, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4.0241546630859375, |
|
"learning_rate": 9.975899749513928e-06, |
|
"loss": 0.097, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.1224637031555176, |
|
"learning_rate": 9.975516001575549e-06, |
|
"loss": 0.0656, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.3180643320083618, |
|
"learning_rate": 9.975129230013347e-06, |
|
"loss": 0.0839, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.089977979660034, |
|
"learning_rate": 9.974739435062364e-06, |
|
"loss": 0.0571, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.773493766784668, |
|
"learning_rate": 9.974346616959476e-06, |
|
"loss": 0.025, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.1019980907440186, |
|
"learning_rate": 9.973950775943403e-06, |
|
"loss": 0.0447, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.4967840909957886, |
|
"learning_rate": 9.973551912254696e-06, |
|
"loss": 0.0422, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.1371103525161743, |
|
"learning_rate": 9.973150026135743e-06, |
|
"loss": 0.0648, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.8660270571708679, |
|
"learning_rate": 9.972745117830774e-06, |
|
"loss": 0.0344, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 5.05332088470459, |
|
"learning_rate": 9.972337187585848e-06, |
|
"loss": 0.1036, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.1562827825546265, |
|
"learning_rate": 9.971926235648868e-06, |
|
"loss": 0.041, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.426886558532715, |
|
"learning_rate": 9.971512262269568e-06, |
|
"loss": 0.127, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.173113465309143, |
|
"learning_rate": 9.97109526769952e-06, |
|
"loss": 0.0525, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.1487282514572144, |
|
"learning_rate": 9.970675252192133e-06, |
|
"loss": 0.052, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.5633060932159424, |
|
"learning_rate": 9.970252216002647e-06, |
|
"loss": 0.0389, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.445123314857483, |
|
"learning_rate": 9.969826159388145e-06, |
|
"loss": 0.0521, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.8425119519233704, |
|
"learning_rate": 9.96939708260754e-06, |
|
"loss": 0.0513, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.9555310606956482, |
|
"learning_rate": 9.968964985921584e-06, |
|
"loss": 0.0574, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.8024086952209473, |
|
"learning_rate": 9.96852986959286e-06, |
|
"loss": 0.058, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.4136022329330444, |
|
"learning_rate": 9.96809173388579e-06, |
|
"loss": 0.0402, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.9865325093269348, |
|
"learning_rate": 9.96765057906663e-06, |
|
"loss": 0.0555, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.3715591430664062, |
|
"learning_rate": 9.967206405403468e-06, |
|
"loss": 0.0549, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.10662841796875, |
|
"learning_rate": 9.966759213166231e-06, |
|
"loss": 0.0584, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.3035138845443726, |
|
"learning_rate": 9.966309002626676e-06, |
|
"loss": 0.0398, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.7275445461273193, |
|
"learning_rate": 9.965855774058395e-06, |
|
"loss": 0.0583, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.4070425033569336, |
|
"learning_rate": 9.965399527736819e-06, |
|
"loss": 0.0476, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.2913644313812256, |
|
"learning_rate": 9.964940263939206e-06, |
|
"loss": 0.0693, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 5.090683937072754, |
|
"learning_rate": 9.964477982944654e-06, |
|
"loss": 0.0737, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.244226455688477, |
|
"learning_rate": 9.964012685034087e-06, |
|
"loss": 0.0659, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.7967549562454224, |
|
"learning_rate": 9.96354437049027e-06, |
|
"loss": 0.0226, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.695214033126831, |
|
"learning_rate": 9.963073039597798e-06, |
|
"loss": 0.0772, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.0708000659942627, |
|
"learning_rate": 9.962598692643098e-06, |
|
"loss": 0.053, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.1509592533111572, |
|
"learning_rate": 9.962121329914432e-06, |
|
"loss": 0.0714, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.4323039054870605, |
|
"learning_rate": 9.961640951701892e-06, |
|
"loss": 0.0456, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.304720640182495, |
|
"learning_rate": 9.961157558297404e-06, |
|
"loss": 0.0854, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.8575959205627441, |
|
"learning_rate": 9.960671149994727e-06, |
|
"loss": 0.0374, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.106746792793274, |
|
"learning_rate": 9.960181727089455e-06, |
|
"loss": 0.0515, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.6459972858428955, |
|
"learning_rate": 9.959689289879003e-06, |
|
"loss": 0.0514, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.5684750080108643, |
|
"learning_rate": 9.959193838662634e-06, |
|
"loss": 0.0669, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.1011048555374146, |
|
"learning_rate": 9.958695373741428e-06, |
|
"loss": 0.0406, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.9976766109466553, |
|
"learning_rate": 9.958193895418305e-06, |
|
"loss": 0.0377, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.4583932161331177, |
|
"learning_rate": 9.957689403998012e-06, |
|
"loss": 0.06, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.1599044799804688, |
|
"learning_rate": 9.95718189978713e-06, |
|
"loss": 0.0406, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.9436582326889038, |
|
"learning_rate": 9.95667138309407e-06, |
|
"loss": 0.0361, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.8169147968292236, |
|
"learning_rate": 9.956157854229072e-06, |
|
"loss": 0.0597, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.9190147519111633, |
|
"learning_rate": 9.955641313504208e-06, |
|
"loss": 0.0258, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.8643155694007874, |
|
"learning_rate": 9.95512176123338e-06, |
|
"loss": 0.0327, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.2514710426330566, |
|
"learning_rate": 9.95459919773232e-06, |
|
"loss": 0.0723, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.3103550672531128, |
|
"learning_rate": 9.954073623318593e-06, |
|
"loss": 0.0576, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.092473268508911, |
|
"learning_rate": 9.953545038311587e-06, |
|
"loss": 0.0734, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.5062074661254883, |
|
"learning_rate": 9.953013443032524e-06, |
|
"loss": 0.0483, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.1158766746520996, |
|
"learning_rate": 9.952478837804459e-06, |
|
"loss": 0.0345, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.5865800380706787, |
|
"learning_rate": 9.951941222952264e-06, |
|
"loss": 0.0557, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.171496868133545, |
|
"learning_rate": 9.951400598802655e-06, |
|
"loss": 0.062, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.9497528076171875, |
|
"learning_rate": 9.950856965684167e-06, |
|
"loss": 0.0365, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.4575358629226685, |
|
"learning_rate": 9.950310323927165e-06, |
|
"loss": 0.0648, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.8335795402526855, |
|
"learning_rate": 9.949760673863846e-06, |
|
"loss": 0.0611, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.1269536018371582, |
|
"learning_rate": 9.949208015828232e-06, |
|
"loss": 0.0541, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.9925274848937988, |
|
"learning_rate": 9.948652350156172e-06, |
|
"loss": 0.0275, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.2717292308807373, |
|
"learning_rate": 9.948093677185345e-06, |
|
"loss": 0.041, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.1867843866348267, |
|
"learning_rate": 9.947531997255256e-06, |
|
"loss": 0.0517, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.1004167795181274, |
|
"learning_rate": 9.946967310707241e-06, |
|
"loss": 0.0503, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.8476804494857788, |
|
"learning_rate": 9.946399617884457e-06, |
|
"loss": 0.0419, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.3617258071899414, |
|
"learning_rate": 9.945828919131894e-06, |
|
"loss": 0.0273, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.4114432334899902, |
|
"learning_rate": 9.945255214796366e-06, |
|
"loss": 0.0448, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.4074312448501587, |
|
"learning_rate": 9.944678505226511e-06, |
|
"loss": 0.0637, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.2234091758728027, |
|
"learning_rate": 9.944098790772797e-06, |
|
"loss": 0.0497, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.3652763366699219, |
|
"learning_rate": 9.943516071787517e-06, |
|
"loss": 0.0555, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.020076036453247, |
|
"learning_rate": 9.942930348624788e-06, |
|
"loss": 0.0488, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.1463106870651245, |
|
"learning_rate": 9.942341621640558e-06, |
|
"loss": 0.0498, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.1451953649520874, |
|
"learning_rate": 9.941749891192594e-06, |
|
"loss": 0.0485, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.710951805114746, |
|
"learning_rate": 9.94115515764049e-06, |
|
"loss": 0.0485, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.6404072046279907, |
|
"learning_rate": 9.940557421345667e-06, |
|
"loss": 0.0387, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.1222543716430664, |
|
"learning_rate": 9.939956682671372e-06, |
|
"loss": 0.0586, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.6379327774047852, |
|
"learning_rate": 9.939352941982671e-06, |
|
"loss": 0.068, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.2636500597000122, |
|
"learning_rate": 9.938746199646458e-06, |
|
"loss": 0.0413, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.1981465816497803, |
|
"learning_rate": 9.938136456031454e-06, |
|
"loss": 0.0259, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.2407490015029907, |
|
"learning_rate": 9.937523711508196e-06, |
|
"loss": 0.0413, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.5851786136627197, |
|
"learning_rate": 9.93690796644905e-06, |
|
"loss": 0.0452, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.1833544969558716, |
|
"learning_rate": 9.936289221228207e-06, |
|
"loss": 0.0415, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 5.073670387268066, |
|
"learning_rate": 9.935667476221678e-06, |
|
"loss": 0.1248, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.5642805099487305, |
|
"learning_rate": 9.935042731807297e-06, |
|
"loss": 0.0708, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.680995464324951, |
|
"learning_rate": 9.934414988364722e-06, |
|
"loss": 0.0587, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.164574146270752, |
|
"learning_rate": 9.933784246275432e-06, |
|
"loss": 0.0532, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.1444894075393677, |
|
"learning_rate": 9.93315050592273e-06, |
|
"loss": 0.0486, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.9272328615188599, |
|
"learning_rate": 9.932513767691743e-06, |
|
"loss": 0.0465, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.0213119983673096, |
|
"learning_rate": 9.931874031969411e-06, |
|
"loss": 0.0679, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.7126073837280273, |
|
"learning_rate": 9.931231299144509e-06, |
|
"loss": 0.0849, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.2266963720321655, |
|
"learning_rate": 9.93058556960762e-06, |
|
"loss": 0.0722, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.530362844467163, |
|
"learning_rate": 9.929936843751158e-06, |
|
"loss": 0.0477, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.087737798690796, |
|
"learning_rate": 9.929285121969352e-06, |
|
"loss": 0.0698, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.2407419681549072, |
|
"learning_rate": 9.928630404658255e-06, |
|
"loss": 0.0501, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.7187033891677856, |
|
"learning_rate": 9.927972692215739e-06, |
|
"loss": 0.0537, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.143998861312866, |
|
"learning_rate": 9.927311985041495e-06, |
|
"loss": 0.0554, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.8843326568603516, |
|
"learning_rate": 9.926648283537037e-06, |
|
"loss": 0.0544, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.6308791637420654, |
|
"learning_rate": 9.925981588105695e-06, |
|
"loss": 0.0505, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.8796863555908203, |
|
"learning_rate": 9.92531189915262e-06, |
|
"loss": 0.0537, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.4090087413787842, |
|
"learning_rate": 9.924639217084783e-06, |
|
"loss": 0.0589, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.9706072807312012, |
|
"learning_rate": 9.923963542310975e-06, |
|
"loss": 0.049, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.9905783534049988, |
|
"learning_rate": 9.923284875241802e-06, |
|
"loss": 0.0537, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.5304461717605591, |
|
"learning_rate": 9.92260321628969e-06, |
|
"loss": 0.0291, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.2716902494430542, |
|
"learning_rate": 9.921918565868887e-06, |
|
"loss": 0.0652, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.9943916201591492, |
|
"learning_rate": 9.921230924395449e-06, |
|
"loss": 0.0543, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.3783643245697021, |
|
"learning_rate": 9.920540292287262e-06, |
|
"loss": 0.0536, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.389773964881897, |
|
"learning_rate": 9.91984666996402e-06, |
|
"loss": 0.0376, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.7887927293777466, |
|
"learning_rate": 9.91915005784724e-06, |
|
"loss": 0.0272, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.902744174003601, |
|
"learning_rate": 9.918450456360252e-06, |
|
"loss": 0.0543, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.6114033460617065, |
|
"learning_rate": 9.917747865928206e-06, |
|
"loss": 0.0262, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.1496695280075073, |
|
"learning_rate": 9.917042286978064e-06, |
|
"loss": 0.0643, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.8322230577468872, |
|
"learning_rate": 9.916333719938608e-06, |
|
"loss": 0.0435, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.9281955361366272, |
|
"learning_rate": 9.915622165240435e-06, |
|
"loss": 0.0399, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.7492028474807739, |
|
"learning_rate": 9.914907623315958e-06, |
|
"loss": 0.0367, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.0944385528564453, |
|
"learning_rate": 9.914190094599403e-06, |
|
"loss": 0.0488, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.0233027935028076, |
|
"learning_rate": 9.913469579526811e-06, |
|
"loss": 0.0475, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.9051103591918945, |
|
"learning_rate": 9.912746078536044e-06, |
|
"loss": 0.0374, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.6250872015953064, |
|
"learning_rate": 9.91201959206677e-06, |
|
"loss": 0.0236, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.0147565603256226, |
|
"learning_rate": 9.911290120560477e-06, |
|
"loss": 0.0408, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.8525872230529785, |
|
"learning_rate": 9.910557664460464e-06, |
|
"loss": 0.0485, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.040386915206909, |
|
"learning_rate": 9.909822224211845e-06, |
|
"loss": 0.0716, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.2481484413146973, |
|
"learning_rate": 9.90908380026155e-06, |
|
"loss": 0.0376, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.1175787448883057, |
|
"learning_rate": 9.908342393058317e-06, |
|
"loss": 0.0657, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.9903053641319275, |
|
"learning_rate": 9.907598003052701e-06, |
|
"loss": 0.0378, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.7109051942825317, |
|
"learning_rate": 9.906850630697068e-06, |
|
"loss": 0.0624, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.9067022800445557, |
|
"learning_rate": 9.906100276445596e-06, |
|
"loss": 0.0492, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.9397685527801514, |
|
"learning_rate": 9.905346940754274e-06, |
|
"loss": 0.0147, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.0456113815307617, |
|
"learning_rate": 9.90459062408091e-06, |
|
"loss": 0.0812, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.6053810119628906, |
|
"learning_rate": 9.903831326885112e-06, |
|
"loss": 0.0623, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.0448148250579834, |
|
"learning_rate": 9.90306904962831e-06, |
|
"loss": 0.0803, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.1430933475494385, |
|
"learning_rate": 9.902303792773736e-06, |
|
"loss": 0.0305, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.8864290714263916, |
|
"learning_rate": 9.90153555678644e-06, |
|
"loss": 0.0488, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.6222556829452515, |
|
"learning_rate": 9.900764342133277e-06, |
|
"loss": 0.021, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.0808035135269165, |
|
"learning_rate": 9.899990149282917e-06, |
|
"loss": 0.0326, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.029120683670044, |
|
"learning_rate": 9.899212978705836e-06, |
|
"loss": 0.0384, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.2418546676635742, |
|
"learning_rate": 9.898432830874324e-06, |
|
"loss": 0.0365, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.3441228866577148, |
|
"learning_rate": 9.897649706262474e-06, |
|
"loss": 0.0692, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.4092243909835815, |
|
"learning_rate": 9.896863605346191e-06, |
|
"loss": 0.0472, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.3884505033493042, |
|
"learning_rate": 9.89607452860319e-06, |
|
"loss": 0.088, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.6695573329925537, |
|
"learning_rate": 9.895282476512995e-06, |
|
"loss": 0.043, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.7949867248535156, |
|
"learning_rate": 9.894487449556934e-06, |
|
"loss": 0.0514, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.3810291290283203, |
|
"learning_rate": 9.893689448218146e-06, |
|
"loss": 0.0472, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.0681228637695312, |
|
"learning_rate": 9.892888472981577e-06, |
|
"loss": 0.0389, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.6548139452934265, |
|
"learning_rate": 9.89208452433398e-06, |
|
"loss": 0.0339, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.8944026231765747, |
|
"learning_rate": 9.891277602763916e-06, |
|
"loss": 0.037, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.7463440895080566, |
|
"learning_rate": 9.89046770876175e-06, |
|
"loss": 0.048, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.2079529762268066, |
|
"learning_rate": 9.889654842819658e-06, |
|
"loss": 0.0721, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.0868616104125977, |
|
"learning_rate": 9.888839005431615e-06, |
|
"loss": 0.0573, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.23513662815094, |
|
"learning_rate": 9.888020197093409e-06, |
|
"loss": 0.0542, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.7781217694282532, |
|
"learning_rate": 9.887198418302629e-06, |
|
"loss": 0.0386, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.390410304069519, |
|
"learning_rate": 9.886373669558669e-06, |
|
"loss": 0.0338, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.6135231256484985, |
|
"learning_rate": 9.885545951362733e-06, |
|
"loss": 0.0403, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.1802467107772827, |
|
"learning_rate": 9.884715264217823e-06, |
|
"loss": 0.0716, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.1783833503723145, |
|
"learning_rate": 9.883881608628748e-06, |
|
"loss": 0.0426, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.994340181350708, |
|
"learning_rate": 9.883044985102122e-06, |
|
"loss": 0.047, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.9849565625190735, |
|
"learning_rate": 9.882205394146362e-06, |
|
"loss": 0.0416, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.2525103092193604, |
|
"learning_rate": 9.881362836271686e-06, |
|
"loss": 0.0672, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.8505926728248596, |
|
"learning_rate": 9.880517311990118e-06, |
|
"loss": 0.0455, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.3629908561706543, |
|
"learning_rate": 9.879668821815484e-06, |
|
"loss": 0.0357, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.1365973949432373, |
|
"learning_rate": 9.878817366263412e-06, |
|
"loss": 0.0666, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.0324252843856812, |
|
"learning_rate": 9.87796294585133e-06, |
|
"loss": 0.0449, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.757729172706604, |
|
"learning_rate": 9.877105561098473e-06, |
|
"loss": 0.0248, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.2894716262817383, |
|
"learning_rate": 9.87624521252587e-06, |
|
"loss": 0.0382, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.5887492895126343, |
|
"learning_rate": 9.87538190065636e-06, |
|
"loss": 0.0459, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.5617096424102783, |
|
"learning_rate": 9.874515626014576e-06, |
|
"loss": 0.0673, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.4001352787017822, |
|
"learning_rate": 9.873646389126954e-06, |
|
"loss": 0.0937, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.1498814821243286, |
|
"learning_rate": 9.872774190521727e-06, |
|
"loss": 0.0609, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.620199680328369, |
|
"learning_rate": 9.871899030728932e-06, |
|
"loss": 0.078, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.5257648229599, |
|
"learning_rate": 9.871020910280408e-06, |
|
"loss": 0.0456, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.344609498977661, |
|
"learning_rate": 9.870139829709784e-06, |
|
"loss": 0.0579, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.6787387132644653, |
|
"learning_rate": 9.869255789552496e-06, |
|
"loss": 0.036, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.7965288162231445, |
|
"learning_rate": 9.868368790345777e-06, |
|
"loss": 0.0347, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.3934015035629272, |
|
"learning_rate": 9.867478832628652e-06, |
|
"loss": 0.0504, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.6102665662765503, |
|
"learning_rate": 9.866585916941951e-06, |
|
"loss": 0.0303, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.6944254636764526, |
|
"learning_rate": 9.865690043828302e-06, |
|
"loss": 0.0389, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.5572813153266907, |
|
"learning_rate": 9.864791213832125e-06, |
|
"loss": 0.0249, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.9218201041221619, |
|
"learning_rate": 9.863889427499641e-06, |
|
"loss": 0.0579, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.7617053985595703, |
|
"learning_rate": 9.862984685378864e-06, |
|
"loss": 0.0942, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.5800890922546387, |
|
"learning_rate": 9.862076988019609e-06, |
|
"loss": 0.0705, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.5009744763374329, |
|
"learning_rate": 9.86116633597348e-06, |
|
"loss": 0.0187, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.8876914381980896, |
|
"learning_rate": 9.860252729793885e-06, |
|
"loss": 0.0574, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.8853681087493896, |
|
"learning_rate": 9.859336170036022e-06, |
|
"loss": 0.0509, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.341853141784668, |
|
"learning_rate": 9.858416657256883e-06, |
|
"loss": 0.0697, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.9934710264205933, |
|
"learning_rate": 9.857494192015258e-06, |
|
"loss": 0.0531, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.259093165397644, |
|
"learning_rate": 9.85656877487173e-06, |
|
"loss": 0.0349, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.9945093393325806, |
|
"learning_rate": 9.855640406388673e-06, |
|
"loss": 0.0393, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.5558804273605347, |
|
"learning_rate": 9.854709087130261e-06, |
|
"loss": 0.0584, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.9720606803894043, |
|
"learning_rate": 9.853774817662453e-06, |
|
"loss": 0.0767, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.8328733444213867, |
|
"learning_rate": 9.85283759855301e-06, |
|
"loss": 0.0312, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.4241795539855957, |
|
"learning_rate": 9.851897430371475e-06, |
|
"loss": 0.0613, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.2547311782836914, |
|
"learning_rate": 9.850954313689193e-06, |
|
"loss": 0.0378, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.9641187191009521, |
|
"learning_rate": 9.850008249079295e-06, |
|
"loss": 0.0301, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.5166923999786377, |
|
"learning_rate": 9.849059237116702e-06, |
|
"loss": 0.0651, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.5394651889801025, |
|
"learning_rate": 9.848107278378136e-06, |
|
"loss": 0.0483, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.9585269689559937, |
|
"learning_rate": 9.847152373442096e-06, |
|
"loss": 0.0548, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.0429555177688599, |
|
"learning_rate": 9.846194522888884e-06, |
|
"loss": 0.0481, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.1581437587738037, |
|
"learning_rate": 9.84523372730058e-06, |
|
"loss": 0.0603, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.7063565850257874, |
|
"learning_rate": 9.844269987261066e-06, |
|
"loss": 0.0326, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.5360925197601318, |
|
"learning_rate": 9.843303303356005e-06, |
|
"loss": 0.0456, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.3182265758514404, |
|
"learning_rate": 9.84233367617285e-06, |
|
"loss": 0.0336, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.8530195951461792, |
|
"learning_rate": 9.841361106300846e-06, |
|
"loss": 0.0375, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.9681763052940369, |
|
"learning_rate": 9.840385594331022e-06, |
|
"loss": 0.0265, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.2474390268325806, |
|
"learning_rate": 9.839407140856199e-06, |
|
"loss": 0.0438, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.427484393119812, |
|
"learning_rate": 9.838425746470984e-06, |
|
"loss": 0.0506, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.8225058317184448, |
|
"learning_rate": 9.837441411771771e-06, |
|
"loss": 0.0355, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.9241979122161865, |
|
"learning_rate": 9.836454137356739e-06, |
|
"loss": 0.0386, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.8418800234794617, |
|
"learning_rate": 9.835463923825854e-06, |
|
"loss": 0.0392, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.9536418914794922, |
|
"learning_rate": 9.834470771780875e-06, |
|
"loss": 0.0577, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.7787923216819763, |
|
"learning_rate": 9.833474681825334e-06, |
|
"loss": 0.0325, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.5342555046081543, |
|
"learning_rate": 9.832475654564562e-06, |
|
"loss": 0.0413, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.160288691520691, |
|
"learning_rate": 9.831473690605664e-06, |
|
"loss": 0.0609, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.0293076038360596, |
|
"learning_rate": 9.830468790557536e-06, |
|
"loss": 0.0376, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.1950795650482178, |
|
"learning_rate": 9.829460955030854e-06, |
|
"loss": 0.0285, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.130022644996643, |
|
"learning_rate": 9.828450184638082e-06, |
|
"loss": 0.0725, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.2049533128738403, |
|
"learning_rate": 9.827436479993468e-06, |
|
"loss": 0.0345, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.9585927724838257, |
|
"learning_rate": 9.826419841713038e-06, |
|
"loss": 0.0539, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.7200453281402588, |
|
"learning_rate": 9.825400270414602e-06, |
|
"loss": 0.0358, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.9681141972541809, |
|
"learning_rate": 9.824377766717758e-06, |
|
"loss": 0.0288, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.843163788318634, |
|
"learning_rate": 9.823352331243881e-06, |
|
"loss": 0.0396, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.8464294075965881, |
|
"learning_rate": 9.822323964616125e-06, |
|
"loss": 0.0394, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.6887583136558533, |
|
"learning_rate": 9.821292667459435e-06, |
|
"loss": 0.0295, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.815610408782959, |
|
"learning_rate": 9.820258440400525e-06, |
|
"loss": 0.0372, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.1596908569335938, |
|
"learning_rate": 9.8192212840679e-06, |
|
"loss": 0.0247, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.0240830183029175, |
|
"learning_rate": 9.818181199091838e-06, |
|
"loss": 0.0497, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.9827424883842468, |
|
"learning_rate": 9.817138186104401e-06, |
|
"loss": 0.0585, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.8876912593841553, |
|
"learning_rate": 9.816092245739426e-06, |
|
"loss": 0.039, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.8267855644226074, |
|
"learning_rate": 9.81504337863253e-06, |
|
"loss": 0.0393, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.7727996706962585, |
|
"learning_rate": 9.813991585421118e-06, |
|
"loss": 0.0442, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.0796356201171875, |
|
"learning_rate": 9.812936866744358e-06, |
|
"loss": 0.0525, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.8108832836151123, |
|
"learning_rate": 9.811879223243207e-06, |
|
"loss": 0.0367, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.9708784818649292, |
|
"learning_rate": 9.810818655560393e-06, |
|
"loss": 0.0436, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.442888855934143, |
|
"learning_rate": 9.809755164340423e-06, |
|
"loss": 0.0432, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.8913246989250183, |
|
"learning_rate": 9.808688750229584e-06, |
|
"loss": 0.046, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.196491003036499, |
|
"learning_rate": 9.807619413875937e-06, |
|
"loss": 0.0466, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.9138450622558594, |
|
"learning_rate": 9.806547155929315e-06, |
|
"loss": 0.0355, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.3624818027019501, |
|
"learning_rate": 9.80547197704133e-06, |
|
"loss": 0.0186, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.0726361274719238, |
|
"learning_rate": 9.804393877865373e-06, |
|
"loss": 0.0497, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.8961818218231201, |
|
"learning_rate": 9.8033128590566e-06, |
|
"loss": 0.0356, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.240262746810913, |
|
"learning_rate": 9.80222892127195e-06, |
|
"loss": 0.0794, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.4816982746124268, |
|
"learning_rate": 9.801142065170132e-06, |
|
"loss": 0.0631, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.1969040632247925, |
|
"learning_rate": 9.80005229141163e-06, |
|
"loss": 0.0559, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.4784609079360962, |
|
"learning_rate": 9.798959600658697e-06, |
|
"loss": 0.0746, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.7828866839408875, |
|
"learning_rate": 9.797863993575365e-06, |
|
"loss": 0.0396, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.7891765832901001, |
|
"learning_rate": 9.796765470827435e-06, |
|
"loss": 0.0567, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.7710642218589783, |
|
"learning_rate": 9.795664033082476e-06, |
|
"loss": 0.0442, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.8450149297714233, |
|
"learning_rate": 9.794559681009837e-06, |
|
"loss": 0.036, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.545617401599884, |
|
"learning_rate": 9.79345241528063e-06, |
|
"loss": 0.0302, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.7093480825424194, |
|
"learning_rate": 9.792342236567743e-06, |
|
"loss": 0.0494, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.8590899109840393, |
|
"learning_rate": 9.791229145545832e-06, |
|
"loss": 0.0389, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.1689053773880005, |
|
"learning_rate": 9.790113142891323e-06, |
|
"loss": 0.0505, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.6099830269813538, |
|
"learning_rate": 9.78899422928241e-06, |
|
"loss": 0.036, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.2200748920440674, |
|
"learning_rate": 9.787872405399059e-06, |
|
"loss": 0.0557, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.0489903688430786, |
|
"learning_rate": 9.786747671923003e-06, |
|
"loss": 0.0719, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.578433871269226, |
|
"learning_rate": 9.785620029537741e-06, |
|
"loss": 0.03, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.9253179430961609, |
|
"learning_rate": 9.784489478928545e-06, |
|
"loss": 0.0527, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.7473218441009521, |
|
"learning_rate": 9.783356020782448e-06, |
|
"loss": 0.035, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.4502854347229004, |
|
"learning_rate": 9.782219655788257e-06, |
|
"loss": 0.0423, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.946733295917511, |
|
"learning_rate": 9.781080384636539e-06, |
|
"loss": 0.0413, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.4826123714447021, |
|
"learning_rate": 9.77993820801963e-06, |
|
"loss": 0.0414, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.0471692085266113, |
|
"learning_rate": 9.778793126631632e-06, |
|
"loss": 0.0466, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.7681257724761963, |
|
"learning_rate": 9.777645141168411e-06, |
|
"loss": 0.0504, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.7187155485153198, |
|
"learning_rate": 9.776494252327597e-06, |
|
"loss": 0.0447, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.7922236323356628, |
|
"learning_rate": 9.775340460808589e-06, |
|
"loss": 0.0313, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.724630117416382, |
|
"learning_rate": 9.774183767312545e-06, |
|
"loss": 0.0616, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.47513461112976074, |
|
"learning_rate": 9.773024172542389e-06, |
|
"loss": 0.0163, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.6144838333129883, |
|
"learning_rate": 9.771861677202804e-06, |
|
"loss": 0.0271, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.0170230865478516, |
|
"learning_rate": 9.770696282000245e-06, |
|
"loss": 0.0438, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.5385282635688782, |
|
"learning_rate": 9.76952798764292e-06, |
|
"loss": 0.0169, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.6152381896972656, |
|
"learning_rate": 9.7683567948408e-06, |
|
"loss": 0.068, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.9734664559364319, |
|
"learning_rate": 9.767182704305625e-06, |
|
"loss": 0.0681, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.7027530670166016, |
|
"learning_rate": 9.766005716750884e-06, |
|
"loss": 0.04, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.7407202124595642, |
|
"learning_rate": 9.764825832891837e-06, |
|
"loss": 0.033, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.8196337223052979, |
|
"learning_rate": 9.7636430534455e-06, |
|
"loss": 0.0451, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.600836753845215, |
|
"learning_rate": 9.762457379130649e-06, |
|
"loss": 0.075, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.4206620454788208, |
|
"learning_rate": 9.761268810667817e-06, |
|
"loss": 0.0255, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.9220699071884155, |
|
"learning_rate": 9.760077348779298e-06, |
|
"loss": 0.0564, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.6927193999290466, |
|
"learning_rate": 9.758882994189145e-06, |
|
"loss": 0.0375, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.9594948291778564, |
|
"learning_rate": 9.757685747623169e-06, |
|
"loss": 0.0523, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.9151678085327148, |
|
"learning_rate": 9.756485609808934e-06, |
|
"loss": 0.0634, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.0471961498260498, |
|
"learning_rate": 9.755282581475769e-06, |
|
"loss": 0.027, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.2358285188674927, |
|
"learning_rate": 9.75407666335475e-06, |
|
"loss": 0.0705, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.8452746272087097, |
|
"learning_rate": 9.752867856178719e-06, |
|
"loss": 0.0485, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.2570796012878418, |
|
"learning_rate": 9.751656160682265e-06, |
|
"loss": 0.0375, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.8666393756866455, |
|
"learning_rate": 9.750441577601738e-06, |
|
"loss": 0.0418, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.7684221267700195, |
|
"learning_rate": 9.749224107675239e-06, |
|
"loss": 0.0477, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.430303931236267, |
|
"learning_rate": 9.748003751642628e-06, |
|
"loss": 0.0389, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 4.4301066398620605, |
|
"learning_rate": 9.746780510245512e-06, |
|
"loss": 0.0868, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.655571699142456, |
|
"learning_rate": 9.74555438422726e-06, |
|
"loss": 0.0423, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.7431411743164062, |
|
"learning_rate": 9.744325374332986e-06, |
|
"loss": 0.0235, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.7228596210479736, |
|
"learning_rate": 9.743093481309563e-06, |
|
"loss": 0.0361, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.5912590026855469, |
|
"learning_rate": 9.741858705905609e-06, |
|
"loss": 0.0254, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.8103305101394653, |
|
"learning_rate": 9.740621048871501e-06, |
|
"loss": 0.0159, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.466233253479004, |
|
"learning_rate": 9.739380510959365e-06, |
|
"loss": 0.0803, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.5837281942367554, |
|
"learning_rate": 9.738137092923072e-06, |
|
"loss": 0.0293, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.528012990951538, |
|
"learning_rate": 9.73689079551825e-06, |
|
"loss": 0.0549, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.025675058364868, |
|
"learning_rate": 9.735641619502277e-06, |
|
"loss": 0.0663, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.34830641746521, |
|
"learning_rate": 9.734389565634277e-06, |
|
"loss": 0.0483, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.644051194190979, |
|
"learning_rate": 9.73313463467512e-06, |
|
"loss": 0.053, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.6768667697906494, |
|
"learning_rate": 9.731876827387433e-06, |
|
"loss": 0.0626, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.0125842094421387, |
|
"learning_rate": 9.730616144535581e-06, |
|
"loss": 0.0424, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 4.256353378295898, |
|
"learning_rate": 9.729352586885687e-06, |
|
"loss": 0.0734, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.4163427352905273, |
|
"learning_rate": 9.728086155205614e-06, |
|
"loss": 0.0544, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.842038154602051, |
|
"learning_rate": 9.726816850264971e-06, |
|
"loss": 0.0465, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.0849742889404297, |
|
"learning_rate": 9.725544672835118e-06, |
|
"loss": 0.0684, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.677302360534668, |
|
"learning_rate": 9.724269623689158e-06, |
|
"loss": 0.0284, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.040449619293213, |
|
"learning_rate": 9.722991703601936e-06, |
|
"loss": 0.0384, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.6753067374229431, |
|
"learning_rate": 9.721710913350048e-06, |
|
"loss": 0.0436, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.006178617477417, |
|
"learning_rate": 9.720427253711831e-06, |
|
"loss": 0.046, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.1364405155181885, |
|
"learning_rate": 9.719140725467362e-06, |
|
"loss": 0.0512, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.7395780086517334, |
|
"learning_rate": 9.717851329398469e-06, |
|
"loss": 0.0239, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.4531809091567993, |
|
"learning_rate": 9.716559066288716e-06, |
|
"loss": 0.0505, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.9090608954429626, |
|
"learning_rate": 9.715263936923413e-06, |
|
"loss": 0.0272, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.9618948698043823, |
|
"learning_rate": 9.713965942089612e-06, |
|
"loss": 0.0491, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.5173948407173157, |
|
"learning_rate": 9.712665082576104e-06, |
|
"loss": 0.0264, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.5747056603431702, |
|
"learning_rate": 9.711361359173422e-06, |
|
"loss": 0.0231, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.7778929471969604, |
|
"learning_rate": 9.710054772673839e-06, |
|
"loss": 0.0492, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.290955066680908, |
|
"learning_rate": 9.708745323871369e-06, |
|
"loss": 0.0465, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.1455390453338623, |
|
"learning_rate": 9.707433013561765e-06, |
|
"loss": 0.0625, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.4170002937316895, |
|
"learning_rate": 9.706117842542517e-06, |
|
"loss": 0.0761, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.6311193704605103, |
|
"learning_rate": 9.704799811612858e-06, |
|
"loss": 0.0736, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.4031122922897339, |
|
"learning_rate": 9.703478921573753e-06, |
|
"loss": 0.0362, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.10888671875, |
|
"learning_rate": 9.702155173227911e-06, |
|
"loss": 0.0468, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.612172842025757, |
|
"learning_rate": 9.700828567379772e-06, |
|
"loss": 0.0709, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.2346030473709106, |
|
"learning_rate": 9.699499104835514e-06, |
|
"loss": 0.0587, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.7313090562820435, |
|
"learning_rate": 9.698166786403057e-06, |
|
"loss": 0.0372, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.303956389427185, |
|
"learning_rate": 9.696831612892048e-06, |
|
"loss": 0.0415, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.4627138674259186, |
|
"learning_rate": 9.695493585113873e-06, |
|
"loss": 0.0276, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.7128018140792847, |
|
"learning_rate": 9.694152703881653e-06, |
|
"loss": 0.0265, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.8362938165664673, |
|
"learning_rate": 9.69280897001024e-06, |
|
"loss": 0.0597, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.9412689208984375, |
|
"learning_rate": 9.691462384316226e-06, |
|
"loss": 0.062, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.3194217681884766, |
|
"learning_rate": 9.690112947617929e-06, |
|
"loss": 0.0526, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.3153883218765259, |
|
"learning_rate": 9.688760660735403e-06, |
|
"loss": 0.0497, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.290602684020996, |
|
"learning_rate": 9.687405524490433e-06, |
|
"loss": 0.0277, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.6527288556098938, |
|
"learning_rate": 9.686047539706536e-06, |
|
"loss": 0.0353, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.1408582925796509, |
|
"learning_rate": 9.684686707208962e-06, |
|
"loss": 0.0407, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.5641573071479797, |
|
"learning_rate": 9.683323027824687e-06, |
|
"loss": 0.0311, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.8712812066078186, |
|
"learning_rate": 9.681956502382423e-06, |
|
"loss": 0.0484, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.6026149988174438, |
|
"learning_rate": 9.680587131712605e-06, |
|
"loss": 0.0697, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.7954007983207703, |
|
"learning_rate": 9.6792149166474e-06, |
|
"loss": 0.0621, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.8472158908843994, |
|
"learning_rate": 9.677839858020709e-06, |
|
"loss": 0.0437, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.9168758988380432, |
|
"learning_rate": 9.676461956668148e-06, |
|
"loss": 0.0535, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.1088653802871704, |
|
"learning_rate": 9.675081213427076e-06, |
|
"loss": 0.038, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.6966286301612854, |
|
"learning_rate": 9.673697629136566e-06, |
|
"loss": 0.0304, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.734716534614563, |
|
"learning_rate": 9.672311204637426e-06, |
|
"loss": 0.0705, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.8543561697006226, |
|
"learning_rate": 9.670921940772186e-06, |
|
"loss": 0.0585, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.6839298605918884, |
|
"learning_rate": 9.669529838385102e-06, |
|
"loss": 0.0381, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.794438362121582, |
|
"learning_rate": 9.668134898322157e-06, |
|
"loss": 0.0485, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.585090696811676, |
|
"learning_rate": 9.666737121431055e-06, |
|
"loss": 0.0295, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.14494788646698, |
|
"learning_rate": 9.665336508561225e-06, |
|
"loss": 0.0248, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.7456786632537842, |
|
"learning_rate": 9.663933060563824e-06, |
|
"loss": 0.0384, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.0646755695343018, |
|
"learning_rate": 9.662526778291725e-06, |
|
"loss": 0.056, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.6966055631637573, |
|
"learning_rate": 9.661117662599527e-06, |
|
"loss": 0.0279, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.8128595948219299, |
|
"learning_rate": 9.659705714343551e-06, |
|
"loss": 0.0421, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.1546441316604614, |
|
"learning_rate": 9.658290934381837e-06, |
|
"loss": 0.0527, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.7882161736488342, |
|
"learning_rate": 9.656873323574152e-06, |
|
"loss": 0.041, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.9414128065109253, |
|
"learning_rate": 9.655452882781972e-06, |
|
"loss": 0.0198, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.0596210956573486, |
|
"learning_rate": 9.654029612868507e-06, |
|
"loss": 0.0606, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.676780641078949, |
|
"learning_rate": 9.652603514698674e-06, |
|
"loss": 0.0232, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.8404201865196228, |
|
"learning_rate": 9.651174589139115e-06, |
|
"loss": 0.0314, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.47275248169898987, |
|
"learning_rate": 9.649742837058189e-06, |
|
"loss": 0.0169, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 3.815514087677002, |
|
"learning_rate": 9.648308259325973e-06, |
|
"loss": 0.0986, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.271995186805725, |
|
"learning_rate": 9.646870856814259e-06, |
|
"loss": 0.0271, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.6948990821838379, |
|
"learning_rate": 9.64543063039656e-06, |
|
"loss": 0.0224, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.3301115036010742, |
|
"learning_rate": 9.6439875809481e-06, |
|
"loss": 0.0375, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.6250678896903992, |
|
"learning_rate": 9.64254170934582e-06, |
|
"loss": 0.0184, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.9256348609924316, |
|
"learning_rate": 9.641093016468381e-06, |
|
"loss": 0.0375, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.3027982711791992, |
|
"learning_rate": 9.639641503196152e-06, |
|
"loss": 0.0276, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.560512065887451, |
|
"learning_rate": 9.638187170411218e-06, |
|
"loss": 0.0482, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.6088508367538452, |
|
"learning_rate": 9.63673001899738e-06, |
|
"loss": 0.0436, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.439906358718872, |
|
"learning_rate": 9.635270049840146e-06, |
|
"loss": 0.0772, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.1696199178695679, |
|
"learning_rate": 9.633807263826745e-06, |
|
"loss": 0.0388, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.6363476514816284, |
|
"learning_rate": 9.632341661846107e-06, |
|
"loss": 0.0592, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.1684820652008057, |
|
"learning_rate": 9.630873244788884e-06, |
|
"loss": 0.0696, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.787458658218384, |
|
"learning_rate": 9.629402013547432e-06, |
|
"loss": 0.0842, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.8504316806793213, |
|
"learning_rate": 9.627927969015817e-06, |
|
"loss": 0.0413, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.9233881235122681, |
|
"learning_rate": 9.62645111208982e-06, |
|
"loss": 0.0315, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.571606159210205, |
|
"learning_rate": 9.62497144366693e-06, |
|
"loss": 0.0716, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.602965831756592, |
|
"learning_rate": 9.623488964646334e-06, |
|
"loss": 0.0526, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.687855839729309, |
|
"learning_rate": 9.622003675928943e-06, |
|
"loss": 0.0517, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.535513162612915, |
|
"learning_rate": 9.620515578417364e-06, |
|
"loss": 0.0368, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.5331669449806213, |
|
"learning_rate": 9.619024673015916e-06, |
|
"loss": 0.0273, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.7347199320793152, |
|
"learning_rate": 9.617530960630624e-06, |
|
"loss": 0.022, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.8210560083389282, |
|
"learning_rate": 9.616034442169214e-06, |
|
"loss": 0.0625, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.0366301536560059, |
|
"learning_rate": 9.614535118541126e-06, |
|
"loss": 0.0409, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.8622118234634399, |
|
"learning_rate": 9.613032990657495e-06, |
|
"loss": 0.0529, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.1612430810928345, |
|
"learning_rate": 9.61152805943117e-06, |
|
"loss": 0.0298, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.6844496726989746, |
|
"learning_rate": 9.610020325776694e-06, |
|
"loss": 0.0306, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.7687200307846069, |
|
"learning_rate": 9.608509790610322e-06, |
|
"loss": 0.0416, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.7224605083465576, |
|
"learning_rate": 9.606996454850002e-06, |
|
"loss": 0.036, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.6508851051330566, |
|
"learning_rate": 9.605480319415391e-06, |
|
"loss": 0.0368, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.3081005811691284, |
|
"learning_rate": 9.603961385227848e-06, |
|
"loss": 0.0284, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.5530818700790405, |
|
"learning_rate": 9.602439653210426e-06, |
|
"loss": 0.0273, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.5170778036117554, |
|
"learning_rate": 9.600915124287886e-06, |
|
"loss": 0.0181, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.5652095079421997, |
|
"learning_rate": 9.599387799386684e-06, |
|
"loss": 0.0213, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.0414352416992188, |
|
"learning_rate": 9.597857679434974e-06, |
|
"loss": 0.0389, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.6755688786506653, |
|
"learning_rate": 9.596324765362614e-06, |
|
"loss": 0.0343, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.5740824937820435, |
|
"learning_rate": 9.594789058101154e-06, |
|
"loss": 0.0562, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.410057544708252, |
|
"learning_rate": 9.593250558583846e-06, |
|
"loss": 0.0394, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.4377081394195557, |
|
"learning_rate": 9.591709267745635e-06, |
|
"loss": 0.0255, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.9751909971237183, |
|
"learning_rate": 9.590165186523166e-06, |
|
"loss": 0.0395, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.8450660109519958, |
|
"learning_rate": 9.588618315854779e-06, |
|
"loss": 0.0331, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8118575811386108, |
|
"learning_rate": 9.587068656680506e-06, |
|
"loss": 0.0346, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.7216983437538147, |
|
"learning_rate": 9.585516209942077e-06, |
|
"loss": 0.0242, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.0194247961044312, |
|
"learning_rate": 9.583960976582914e-06, |
|
"loss": 0.0478, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.1861456632614136, |
|
"learning_rate": 9.582402957548132e-06, |
|
"loss": 0.0224, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.8888005614280701, |
|
"learning_rate": 9.580842153784542e-06, |
|
"loss": 0.0393, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.0420960187911987, |
|
"learning_rate": 9.579278566240646e-06, |
|
"loss": 0.035, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.7932503819465637, |
|
"learning_rate": 9.577712195866634e-06, |
|
"loss": 0.0361, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.295933246612549, |
|
"learning_rate": 9.576143043614393e-06, |
|
"loss": 0.0798, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.795536458492279, |
|
"learning_rate": 9.574571110437496e-06, |
|
"loss": 0.034, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.269714117050171, |
|
"learning_rate": 9.572996397291209e-06, |
|
"loss": 0.0308, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.7194578051567078, |
|
"learning_rate": 9.571418905132486e-06, |
|
"loss": 0.0303, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.9299863576889038, |
|
"learning_rate": 9.569838634919968e-06, |
|
"loss": 0.0549, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.1913076639175415, |
|
"learning_rate": 9.568255587613986e-06, |
|
"loss": 0.0419, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.6721378564834595, |
|
"learning_rate": 9.566669764176562e-06, |
|
"loss": 0.0227, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.9450292587280273, |
|
"learning_rate": 9.5650811655714e-06, |
|
"loss": 0.0272, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.6691453456878662, |
|
"learning_rate": 9.56348979276389e-06, |
|
"loss": 0.0506, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.0706772804260254, |
|
"learning_rate": 9.561895646721113e-06, |
|
"loss": 0.0438, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.0017832517623901, |
|
"learning_rate": 9.560298728411833e-06, |
|
"loss": 0.0604, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.9847087860107422, |
|
"learning_rate": 9.558699038806494e-06, |
|
"loss": 0.0827, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.05272376537323, |
|
"learning_rate": 9.557096578877232e-06, |
|
"loss": 0.0315, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.6529170274734497, |
|
"learning_rate": 9.555491349597862e-06, |
|
"loss": 0.0438, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.5359541177749634, |
|
"learning_rate": 9.553883351943882e-06, |
|
"loss": 0.0453, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.7716813087463379, |
|
"learning_rate": 9.552272586892475e-06, |
|
"loss": 0.0395, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.0042527914047241, |
|
"learning_rate": 9.550659055422502e-06, |
|
"loss": 0.0524, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.9220654368400574, |
|
"learning_rate": 9.549042758514505e-06, |
|
"loss": 0.052, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.202533483505249, |
|
"learning_rate": 9.547423697150714e-06, |
|
"loss": 0.0315, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.441113829612732, |
|
"learning_rate": 9.545801872315028e-06, |
|
"loss": 0.0406, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.1032451391220093, |
|
"learning_rate": 9.544177284993035e-06, |
|
"loss": 0.0562, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.613166332244873, |
|
"learning_rate": 9.542549936171994e-06, |
|
"loss": 0.0264, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.6434498429298401, |
|
"learning_rate": 9.540919826840848e-06, |
|
"loss": 0.0326, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.4755064845085144, |
|
"learning_rate": 9.539286957990215e-06, |
|
"loss": 0.0271, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.6659818887710571, |
|
"learning_rate": 9.53765133061239e-06, |
|
"loss": 0.0493, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.9639627933502197, |
|
"learning_rate": 9.536012945701345e-06, |
|
"loss": 0.0384, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.8150410056114197, |
|
"learning_rate": 9.534371804252727e-06, |
|
"loss": 0.0306, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.4704219102859497, |
|
"learning_rate": 9.532727907263861e-06, |
|
"loss": 0.0563, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.6380606889724731, |
|
"learning_rate": 9.53108125573374e-06, |
|
"loss": 0.0183, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.7984311580657959, |
|
"learning_rate": 9.529431850663036e-06, |
|
"loss": 0.0469, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.8775026798248291, |
|
"learning_rate": 9.527779693054095e-06, |
|
"loss": 0.0285, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.5551888346672058, |
|
"learning_rate": 9.526124783910935e-06, |
|
"loss": 0.0322, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.0795842409133911, |
|
"learning_rate": 9.524467124239243e-06, |
|
"loss": 0.0478, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.2850500345230103, |
|
"learning_rate": 9.52280671504638e-06, |
|
"loss": 0.0223, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.5365849733352661, |
|
"learning_rate": 9.521143557341378e-06, |
|
"loss": 0.0285, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.7505818605422974, |
|
"learning_rate": 9.519477652134938e-06, |
|
"loss": 0.0301, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.4962819516658783, |
|
"learning_rate": 9.517809000439432e-06, |
|
"loss": 0.0299, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.9355813264846802, |
|
"learning_rate": 9.516137603268903e-06, |
|
"loss": 0.0715, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.3954781293869019, |
|
"learning_rate": 9.514463461639055e-06, |
|
"loss": 0.0512, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.0368856191635132, |
|
"learning_rate": 9.51278657656727e-06, |
|
"loss": 0.0445, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.7911268472671509, |
|
"learning_rate": 9.511106949072588e-06, |
|
"loss": 0.0475, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.1066776514053345, |
|
"learning_rate": 9.509424580175724e-06, |
|
"loss": 0.049, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.1990307569503784, |
|
"learning_rate": 9.507739470899048e-06, |
|
"loss": 0.0574, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.1048943996429443, |
|
"learning_rate": 9.506051622266608e-06, |
|
"loss": 0.08, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.8120594024658203, |
|
"learning_rate": 9.504361035304106e-06, |
|
"loss": 0.0443, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.6603597402572632, |
|
"learning_rate": 9.502667711038917e-06, |
|
"loss": 0.0366, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.3819870948791504, |
|
"learning_rate": 9.500971650500072e-06, |
|
"loss": 0.0692, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.7831990718841553, |
|
"learning_rate": 9.499272854718268e-06, |
|
"loss": 0.0506, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.1036359071731567, |
|
"learning_rate": 9.497571324725865e-06, |
|
"loss": 0.0435, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.2589616775512695, |
|
"learning_rate": 9.495867061556884e-06, |
|
"loss": 0.0412, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.78188556432724, |
|
"learning_rate": 9.494160066247006e-06, |
|
"loss": 0.0534, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.7451815605163574, |
|
"learning_rate": 9.492450339833573e-06, |
|
"loss": 0.0287, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.3252469301223755, |
|
"learning_rate": 9.490737883355587e-06, |
|
"loss": 0.0334, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.8932815194129944, |
|
"learning_rate": 9.48902269785371e-06, |
|
"loss": 0.036, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.6676141023635864, |
|
"learning_rate": 9.487304784370257e-06, |
|
"loss": 0.0538, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.9928424954414368, |
|
"learning_rate": 9.48558414394921e-06, |
|
"loss": 0.0558, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.130738377571106, |
|
"learning_rate": 9.4838607776362e-06, |
|
"loss": 0.0454, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.8108890056610107, |
|
"learning_rate": 9.48213468647852e-06, |
|
"loss": 0.0265, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.0491758584976196, |
|
"learning_rate": 9.480405871525114e-06, |
|
"loss": 0.0518, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.0204825401306152, |
|
"learning_rate": 9.478674333826586e-06, |
|
"loss": 0.0339, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.026297926902771, |
|
"learning_rate": 9.476940074435189e-06, |
|
"loss": 0.0508, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.4111378192901611, |
|
"learning_rate": 9.475203094404836e-06, |
|
"loss": 0.0553, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.8152147531509399, |
|
"learning_rate": 9.473463394791093e-06, |
|
"loss": 0.0512, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.5428625345230103, |
|
"learning_rate": 9.471720976651173e-06, |
|
"loss": 0.0274, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.789997398853302, |
|
"learning_rate": 9.469975841043946e-06, |
|
"loss": 0.0456, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.5263166427612305, |
|
"learning_rate": 9.468227989029929e-06, |
|
"loss": 0.0912, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.9473277926445007, |
|
"learning_rate": 9.466477421671296e-06, |
|
"loss": 0.0445, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.9322047829627991, |
|
"learning_rate": 9.464724140031866e-06, |
|
"loss": 0.0473, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.0073190927505493, |
|
"learning_rate": 9.462968145177112e-06, |
|
"loss": 0.0506, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.5902945399284363, |
|
"learning_rate": 9.461209438174148e-06, |
|
"loss": 0.0391, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.0115785598754883, |
|
"learning_rate": 9.459448020091746e-06, |
|
"loss": 0.0614, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.8103097677230835, |
|
"learning_rate": 9.457683892000318e-06, |
|
"loss": 0.0481, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.718271017074585, |
|
"learning_rate": 9.455917054971929e-06, |
|
"loss": 0.0277, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.948197066783905, |
|
"learning_rate": 9.45414751008028e-06, |
|
"loss": 0.0424, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.613114356994629, |
|
"learning_rate": 9.452375258400732e-06, |
|
"loss": 0.0444, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.5611456632614136, |
|
"learning_rate": 9.450600301010279e-06, |
|
"loss": 0.0278, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.0461411476135254, |
|
"learning_rate": 9.448822638987564e-06, |
|
"loss": 0.062, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.203861951828003, |
|
"learning_rate": 9.447042273412873e-06, |
|
"loss": 0.0335, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.0347965955734253, |
|
"learning_rate": 9.445259205368138e-06, |
|
"loss": 0.0499, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.2198740243911743, |
|
"learning_rate": 9.44347343593693e-06, |
|
"loss": 0.0441, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.7504235506057739, |
|
"learning_rate": 9.441684966204456e-06, |
|
"loss": 0.0483, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.7221031188964844, |
|
"learning_rate": 9.439893797257578e-06, |
|
"loss": 0.0369, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.0137180089950562, |
|
"learning_rate": 9.438099930184783e-06, |
|
"loss": 0.0242, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.7642596364021301, |
|
"learning_rate": 9.436303366076213e-06, |
|
"loss": 0.0476, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.0482991933822632, |
|
"learning_rate": 9.434504106023634e-06, |
|
"loss": 0.0717, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.7821680903434753, |
|
"learning_rate": 9.432702151120464e-06, |
|
"loss": 0.0395, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.8012223839759827, |
|
"learning_rate": 9.430897502461745e-06, |
|
"loss": 0.0501, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.960848867893219, |
|
"learning_rate": 9.429090161144166e-06, |
|
"loss": 0.0194, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.9573109745979309, |
|
"learning_rate": 9.427280128266049e-06, |
|
"loss": 0.0485, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.6235270500183105, |
|
"learning_rate": 9.425467404927356e-06, |
|
"loss": 0.0354, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.024781346321106, |
|
"learning_rate": 9.423651992229673e-06, |
|
"loss": 0.0356, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.7387573719024658, |
|
"learning_rate": 9.421833891276233e-06, |
|
"loss": 0.0576, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.5336031913757324, |
|
"learning_rate": 9.420013103171893e-06, |
|
"loss": 0.0387, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.2542508840560913, |
|
"learning_rate": 9.418189629023149e-06, |
|
"loss": 0.0415, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.6477981805801392, |
|
"learning_rate": 9.416363469938128e-06, |
|
"loss": 0.0725, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.7093968391418457, |
|
"learning_rate": 9.414534627026586e-06, |
|
"loss": 0.0361, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.8406978845596313, |
|
"learning_rate": 9.412703101399912e-06, |
|
"loss": 0.0248, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.7647954821586609, |
|
"learning_rate": 9.410868894171126e-06, |
|
"loss": 0.0734, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.5869340300559998, |
|
"learning_rate": 9.409032006454877e-06, |
|
"loss": 0.0322, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.6841743588447571, |
|
"learning_rate": 9.407192439367443e-06, |
|
"loss": 0.0217, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.1286256313323975, |
|
"learning_rate": 9.405350194026728e-06, |
|
"loss": 0.0432, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.9575207233428955, |
|
"learning_rate": 9.403505271552267e-06, |
|
"loss": 0.0623, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.1534059047698975, |
|
"learning_rate": 9.401657673065218e-06, |
|
"loss": 0.0682, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.6419281959533691, |
|
"learning_rate": 9.399807399688371e-06, |
|
"loss": 0.0271, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.8669396638870239, |
|
"learning_rate": 9.397954452546139e-06, |
|
"loss": 0.0438, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.168561339378357, |
|
"learning_rate": 9.396098832764555e-06, |
|
"loss": 0.0456, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.2432861328125, |
|
"learning_rate": 9.394240541471282e-06, |
|
"loss": 0.0666, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.9158250093460083, |
|
"learning_rate": 9.392379579795605e-06, |
|
"loss": 0.0452, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.2606102228164673, |
|
"learning_rate": 9.39051594886843e-06, |
|
"loss": 0.0288, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.0844234228134155, |
|
"learning_rate": 9.388649649822289e-06, |
|
"loss": 0.0374, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.0901192426681519, |
|
"learning_rate": 9.386780683791331e-06, |
|
"loss": 0.0498, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.03596830368042, |
|
"learning_rate": 9.384909051911329e-06, |
|
"loss": 0.0544, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.7338258028030396, |
|
"learning_rate": 9.383034755319673e-06, |
|
"loss": 0.0389, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.973031759262085, |
|
"learning_rate": 9.381157795155374e-06, |
|
"loss": 0.0534, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.6111584305763245, |
|
"learning_rate": 9.379278172559065e-06, |
|
"loss": 0.0279, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.7228569388389587, |
|
"learning_rate": 9.37739588867299e-06, |
|
"loss": 0.0397, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.4140815734863281, |
|
"learning_rate": 9.375510944641017e-06, |
|
"loss": 0.0476, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.1325860023498535, |
|
"learning_rate": 9.373623341608624e-06, |
|
"loss": 0.0697, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.155360221862793, |
|
"learning_rate": 9.371733080722911e-06, |
|
"loss": 0.0493, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.2202762365341187, |
|
"learning_rate": 9.36984016313259e-06, |
|
"loss": 0.0425, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.9276245832443237, |
|
"learning_rate": 9.36794458998799e-06, |
|
"loss": 0.0324, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.8629313707351685, |
|
"learning_rate": 9.366046362441047e-06, |
|
"loss": 0.0551, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.3723730742931366, |
|
"learning_rate": 9.36414548164532e-06, |
|
"loss": 0.0157, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.9178370833396912, |
|
"learning_rate": 9.36224194875597e-06, |
|
"loss": 0.0467, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.7394289374351501, |
|
"learning_rate": 9.360335764929781e-06, |
|
"loss": 0.0303, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.757675290107727, |
|
"learning_rate": 9.358426931325137e-06, |
|
"loss": 0.0302, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.3911486864089966, |
|
"learning_rate": 9.356515449102041e-06, |
|
"loss": 0.0544, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.451570063829422, |
|
"learning_rate": 9.354601319422099e-06, |
|
"loss": 0.0207, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.43002304434776306, |
|
"learning_rate": 9.352684543448532e-06, |
|
"loss": 0.0186, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.37833526730537415, |
|
"learning_rate": 9.350765122346162e-06, |
|
"loss": 0.0146, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.9775627255439758, |
|
"learning_rate": 9.348843057281423e-06, |
|
"loss": 0.0451, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.626708447933197, |
|
"learning_rate": 9.346918349422356e-06, |
|
"loss": 0.0301, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.5922341346740723, |
|
"learning_rate": 9.344990999938609e-06, |
|
"loss": 0.0501, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.1948060989379883, |
|
"learning_rate": 9.343061010001428e-06, |
|
"loss": 0.0394, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.9602558016777039, |
|
"learning_rate": 9.341128380783674e-06, |
|
"loss": 0.0429, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.0513089895248413, |
|
"learning_rate": 9.339193113459805e-06, |
|
"loss": 0.0391, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.1344138383865356, |
|
"learning_rate": 9.337255209205884e-06, |
|
"loss": 0.0274, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.1134185791015625, |
|
"learning_rate": 9.335314669199576e-06, |
|
"loss": 0.0604, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.0586154460906982, |
|
"learning_rate": 9.33337149462015e-06, |
|
"loss": 0.0325, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.0996270179748535, |
|
"learning_rate": 9.331425686648472e-06, |
|
"loss": 0.0332, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.7945778369903564, |
|
"learning_rate": 9.32947724646701e-06, |
|
"loss": 0.0664, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.8699554204940796, |
|
"learning_rate": 9.327526175259837e-06, |
|
"loss": 0.0592, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.0859918594360352, |
|
"learning_rate": 9.325572474212615e-06, |
|
"loss": 0.0434, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.2848424911499023, |
|
"learning_rate": 9.323616144512612e-06, |
|
"loss": 0.0343, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.860479474067688, |
|
"learning_rate": 9.321657187348689e-06, |
|
"loss": 0.0581, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.3358099460601807, |
|
"learning_rate": 9.319695603911306e-06, |
|
"loss": 0.059, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.8692423701286316, |
|
"learning_rate": 9.317731395392517e-06, |
|
"loss": 0.0332, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.4998887777328491, |
|
"learning_rate": 9.315764562985976e-06, |
|
"loss": 0.0485, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.5280508995056152, |
|
"learning_rate": 9.313795107886925e-06, |
|
"loss": 0.0249, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.7580534219741821, |
|
"learning_rate": 9.311823031292205e-06, |
|
"loss": 0.0372, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.7582796216011047, |
|
"learning_rate": 9.309848334400247e-06, |
|
"loss": 0.0326, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.6401865482330322, |
|
"learning_rate": 9.307871018411074e-06, |
|
"loss": 0.0301, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.024916410446167, |
|
"learning_rate": 9.305891084526306e-06, |
|
"loss": 0.0723, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.180551767349243, |
|
"learning_rate": 9.303908533949146e-06, |
|
"loss": 0.0639, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.7816917896270752, |
|
"learning_rate": 9.301923367884393e-06, |
|
"loss": 0.0366, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.7270790934562683, |
|
"learning_rate": 9.299935587538432e-06, |
|
"loss": 0.0421, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.8784447312355042, |
|
"learning_rate": 9.29794519411924e-06, |
|
"loss": 0.043, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.6736301779747009, |
|
"learning_rate": 9.29595218883638e-06, |
|
"loss": 0.047, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.0458660125732422, |
|
"learning_rate": 9.293956572900999e-06, |
|
"loss": 0.0295, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.8319834470748901, |
|
"learning_rate": 9.29195834752584e-06, |
|
"loss": 0.0606, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.5236587524414062, |
|
"learning_rate": 9.28995751392522e-06, |
|
"loss": 0.0405, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.4151524305343628, |
|
"learning_rate": 9.28795407331505e-06, |
|
"loss": 0.0397, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.9959708452224731, |
|
"learning_rate": 9.285948026912822e-06, |
|
"loss": 0.0715, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.5822674632072449, |
|
"learning_rate": 9.283939375937609e-06, |
|
"loss": 0.0281, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.7008696794509888, |
|
"learning_rate": 9.28192812161007e-06, |
|
"loss": 0.0486, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.7523006796836853, |
|
"learning_rate": 9.279914265152448e-06, |
|
"loss": 0.0505, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.051295518875122, |
|
"learning_rate": 9.277897807788562e-06, |
|
"loss": 0.0499, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.8184940218925476, |
|
"learning_rate": 9.275878750743818e-06, |
|
"loss": 0.0422, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.372441291809082, |
|
"learning_rate": 9.273857095245192e-06, |
|
"loss": 0.0633, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.6757863759994507, |
|
"learning_rate": 9.271832842521249e-06, |
|
"loss": 0.0366, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.7655669450759888, |
|
"learning_rate": 9.26980599380213e-06, |
|
"loss": 0.0389, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.1087899208068848, |
|
"learning_rate": 9.267776550319548e-06, |
|
"loss": 0.0433, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.6310410499572754, |
|
"learning_rate": 9.265744513306798e-06, |
|
"loss": 0.0471, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.9184622764587402, |
|
"learning_rate": 9.263709883998753e-06, |
|
"loss": 0.0679, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 2.0910892486572266, |
|
"learning_rate": 9.261672663631854e-06, |
|
"loss": 0.0551, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 2.9525444507598877, |
|
"learning_rate": 9.259632853444126e-06, |
|
"loss": 0.0682, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.773461103439331, |
|
"learning_rate": 9.257590454675159e-06, |
|
"loss": 0.0441, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.9130051136016846, |
|
"learning_rate": 9.255545468566119e-06, |
|
"loss": 0.0454, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.34200993180274963, |
|
"learning_rate": 9.253497896359749e-06, |
|
"loss": 0.0119, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.0717602968215942, |
|
"learning_rate": 9.251447739300356e-06, |
|
"loss": 0.0552, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.0619879961013794, |
|
"learning_rate": 9.249394998633825e-06, |
|
"loss": 0.0568, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.8811701536178589, |
|
"learning_rate": 9.247339675607606e-06, |
|
"loss": 0.034, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.974205493927002, |
|
"learning_rate": 9.24528177147072e-06, |
|
"loss": 0.0398, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.8818910717964172, |
|
"learning_rate": 9.243221287473755e-06, |
|
"loss": 0.048, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.6580934524536133, |
|
"learning_rate": 9.241158224868871e-06, |
|
"loss": 0.042, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.4452764987945557, |
|
"learning_rate": 9.23909258490979e-06, |
|
"loss": 0.0438, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.6177107095718384, |
|
"learning_rate": 9.237024368851805e-06, |
|
"loss": 0.0434, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.6715316772460938, |
|
"learning_rate": 9.23495357795177e-06, |
|
"loss": 0.0242, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.8438655138015747, |
|
"learning_rate": 9.232880213468106e-06, |
|
"loss": 0.0421, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.011062741279602, |
|
"learning_rate": 9.230804276660799e-06, |
|
"loss": 0.0465, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.2409260272979736, |
|
"learning_rate": 9.228725768791394e-06, |
|
"loss": 0.029, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.2052364349365234, |
|
"learning_rate": 9.226644691123006e-06, |
|
"loss": 0.0465, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.60611891746521, |
|
"learning_rate": 9.224561044920303e-06, |
|
"loss": 0.0328, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.4640844464302063, |
|
"learning_rate": 9.222474831449519e-06, |
|
"loss": 0.0202, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.9622972011566162, |
|
"learning_rate": 9.220386051978449e-06, |
|
"loss": 0.0651, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.8986101150512695, |
|
"learning_rate": 9.218294707776441e-06, |
|
"loss": 0.0556, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.158408284187317, |
|
"learning_rate": 9.216200800114412e-06, |
|
"loss": 0.0368, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.9851293563842773, |
|
"learning_rate": 9.214104330264826e-06, |
|
"loss": 0.053, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.1018086671829224, |
|
"learning_rate": 9.212005299501712e-06, |
|
"loss": 0.0597, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.84424889087677, |
|
"learning_rate": 9.20990370910065e-06, |
|
"loss": 0.0497, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.2366299629211426, |
|
"learning_rate": 9.207799560338779e-06, |
|
"loss": 0.0602, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.1586567163467407, |
|
"learning_rate": 9.20569285449479e-06, |
|
"loss": 0.0316, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.6110067367553711, |
|
"learning_rate": 9.20358359284893e-06, |
|
"loss": 0.0305, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.6773253679275513, |
|
"learning_rate": 9.201471776682999e-06, |
|
"loss": 0.036, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.9832028150558472, |
|
"learning_rate": 9.199357407280349e-06, |
|
"loss": 0.0381, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.0233718156814575, |
|
"learning_rate": 9.197240485925883e-06, |
|
"loss": 0.0549, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.125337839126587, |
|
"learning_rate": 9.195121013906055e-06, |
|
"loss": 0.0776, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.2079508304595947, |
|
"learning_rate": 9.19299899250887e-06, |
|
"loss": 0.0384, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.0452898740768433, |
|
"learning_rate": 9.19087442302388e-06, |
|
"loss": 0.0387, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.8497399687767029, |
|
"learning_rate": 9.18874730674219e-06, |
|
"loss": 0.0386, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.1464147567749023, |
|
"learning_rate": 9.186617644956445e-06, |
|
"loss": 0.0725, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.4441956579685211, |
|
"learning_rate": 9.184485438960846e-06, |
|
"loss": 0.0214, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.818230390548706, |
|
"learning_rate": 9.182350690051134e-06, |
|
"loss": 0.0256, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.0162849426269531, |
|
"learning_rate": 9.180213399524599e-06, |
|
"loss": 0.0592, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.9444966316223145, |
|
"learning_rate": 9.178073568680071e-06, |
|
"loss": 0.0293, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.7616766691207886, |
|
"learning_rate": 9.175931198817926e-06, |
|
"loss": 0.0481, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.47808611392974854, |
|
"learning_rate": 9.173786291240085e-06, |
|
"loss": 0.0287, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.6669220328330994, |
|
"learning_rate": 9.17163884725001e-06, |
|
"loss": 0.0324, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.8807569146156311, |
|
"learning_rate": 9.169488868152704e-06, |
|
"loss": 0.0425, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.2071596384048462, |
|
"learning_rate": 9.16733635525471e-06, |
|
"loss": 0.046, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.2434258460998535, |
|
"learning_rate": 9.165181309864108e-06, |
|
"loss": 0.0383, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.7151886820793152, |
|
"learning_rate": 9.163023733290525e-06, |
|
"loss": 0.0381, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.6364666223526001, |
|
"learning_rate": 9.16086362684512e-06, |
|
"loss": 0.0328, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.2846086025238037, |
|
"learning_rate": 9.15870099184059e-06, |
|
"loss": 0.0317, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.7031409740447998, |
|
"learning_rate": 9.15653582959117e-06, |
|
"loss": 0.0416, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.8931663036346436, |
|
"learning_rate": 9.154368141412632e-06, |
|
"loss": 0.0544, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.5589671730995178, |
|
"learning_rate": 9.152197928622278e-06, |
|
"loss": 0.0204, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.7534042596817017, |
|
"learning_rate": 9.15002519253895e-06, |
|
"loss": 0.0291, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.8194689750671387, |
|
"learning_rate": 9.147849934483019e-06, |
|
"loss": 0.0363, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.4425467252731323, |
|
"learning_rate": 9.145672155776392e-06, |
|
"loss": 0.0583, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.4742876291275024, |
|
"learning_rate": 9.143491857742505e-06, |
|
"loss": 0.0577, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.5303352475166321, |
|
"learning_rate": 9.14130904170633e-06, |
|
"loss": 0.0311, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.7389684915542603, |
|
"learning_rate": 9.13912370899436e-06, |
|
"loss": 0.028, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.5198121070861816, |
|
"learning_rate": 9.136935860934628e-06, |
|
"loss": 0.0461, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.799206256866455, |
|
"learning_rate": 9.134745498856685e-06, |
|
"loss": 0.0478, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.1272491216659546, |
|
"learning_rate": 9.13255262409162e-06, |
|
"loss": 0.0495, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.0748385190963745, |
|
"learning_rate": 9.130357237972044e-06, |
|
"loss": 0.0388, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.8800269961357117, |
|
"learning_rate": 9.128159341832092e-06, |
|
"loss": 0.0233, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.6652606129646301, |
|
"learning_rate": 9.125958937007427e-06, |
|
"loss": 0.0401, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.7951803207397461, |
|
"learning_rate": 9.123756024835237e-06, |
|
"loss": 0.0194, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.6082125902175903, |
|
"learning_rate": 9.121550606654232e-06, |
|
"loss": 0.0221, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.656269907951355, |
|
"learning_rate": 9.119342683804649e-06, |
|
"loss": 0.0267, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.3084255456924438, |
|
"learning_rate": 9.11713225762824e-06, |
|
"loss": 0.0476, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.8326955437660217, |
|
"learning_rate": 9.114919329468283e-06, |
|
"loss": 0.0223, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.612882673740387, |
|
"learning_rate": 9.112703900669577e-06, |
|
"loss": 0.0186, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.0400992631912231, |
|
"learning_rate": 9.110485972578439e-06, |
|
"loss": 0.0494, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.9465930461883545, |
|
"learning_rate": 9.108265546542705e-06, |
|
"loss": 0.0336, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.8121449947357178, |
|
"learning_rate": 9.106042623911728e-06, |
|
"loss": 0.0392, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.7355393171310425, |
|
"learning_rate": 9.103817206036383e-06, |
|
"loss": 0.0492, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.5920339822769165, |
|
"learning_rate": 9.101589294269054e-06, |
|
"loss": 0.0354, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.1976126432418823, |
|
"learning_rate": 9.099358889963643e-06, |
|
"loss": 0.0618, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.0642493963241577, |
|
"learning_rate": 9.097125994475572e-06, |
|
"loss": 0.0555, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.2092516422271729, |
|
"learning_rate": 9.09489060916177e-06, |
|
"loss": 0.0391, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.67398601770401, |
|
"learning_rate": 9.092652735380683e-06, |
|
"loss": 0.0196, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.8952963948249817, |
|
"learning_rate": 9.09041237449227e-06, |
|
"loss": 0.0246, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.7937426567077637, |
|
"learning_rate": 9.088169527857996e-06, |
|
"loss": 0.0449, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.0983673334121704, |
|
"learning_rate": 9.085924196840841e-06, |
|
"loss": 0.0577, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.7625383138656616, |
|
"learning_rate": 9.083676382805295e-06, |
|
"loss": 0.0609, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.6659592390060425, |
|
"learning_rate": 9.081426087117356e-06, |
|
"loss": 0.0453, |
|
"step": 800 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 4040, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"total_flos": 2.335555778196275e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|