MS2501-24b-Ink-apollo-ep2 / trainer_state.json
Fizzarolli's picture
Upload folder using huggingface_hub
a0bfad6 verified
raw
history blame
172 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 1088,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001838235294117647,
"grad_norm": 0.0,
"learning_rate": 2.0000000000000002e-07,
"loss": 1.9627,
"step": 1
},
{
"epoch": 0.003676470588235294,
"grad_norm": 0.0,
"learning_rate": 4.0000000000000003e-07,
"loss": 1.6036,
"step": 2
},
{
"epoch": 0.0055147058823529415,
"grad_norm": 0.0,
"learning_rate": 6.000000000000001e-07,
"loss": 1.7332,
"step": 3
},
{
"epoch": 0.007352941176470588,
"grad_norm": 0.0,
"learning_rate": 8.000000000000001e-07,
"loss": 1.8943,
"step": 4
},
{
"epoch": 0.009191176470588236,
"grad_norm": 0.0,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.9555,
"step": 5
},
{
"epoch": 0.011029411764705883,
"grad_norm": 0.0,
"learning_rate": 1.2000000000000002e-06,
"loss": 1.972,
"step": 6
},
{
"epoch": 0.012867647058823529,
"grad_norm": 0.0,
"learning_rate": 1.4000000000000001e-06,
"loss": 1.7141,
"step": 7
},
{
"epoch": 0.014705882352941176,
"grad_norm": 0.0,
"learning_rate": 1.6000000000000001e-06,
"loss": 1.8038,
"step": 8
},
{
"epoch": 0.016544117647058824,
"grad_norm": 0.0,
"learning_rate": 1.8000000000000001e-06,
"loss": 1.9202,
"step": 9
},
{
"epoch": 0.01838235294117647,
"grad_norm": 0.0,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.6748,
"step": 10
},
{
"epoch": 0.02022058823529412,
"grad_norm": 0.0,
"learning_rate": 2.2e-06,
"loss": 1.8197,
"step": 11
},
{
"epoch": 0.022058823529411766,
"grad_norm": 0.0,
"learning_rate": 2.4000000000000003e-06,
"loss": 1.7396,
"step": 12
},
{
"epoch": 0.02389705882352941,
"grad_norm": 0.0,
"learning_rate": 2.6e-06,
"loss": 1.654,
"step": 13
},
{
"epoch": 0.025735294117647058,
"grad_norm": 0.0,
"learning_rate": 2.8000000000000003e-06,
"loss": 1.7685,
"step": 14
},
{
"epoch": 0.027573529411764705,
"grad_norm": 0.0,
"learning_rate": 3e-06,
"loss": 1.5536,
"step": 15
},
{
"epoch": 0.029411764705882353,
"grad_norm": 0.0,
"learning_rate": 3.2000000000000003e-06,
"loss": 1.7035,
"step": 16
},
{
"epoch": 0.03125,
"grad_norm": 0.0,
"learning_rate": 3.4000000000000005e-06,
"loss": 1.9268,
"step": 17
},
{
"epoch": 0.03308823529411765,
"grad_norm": 0.0,
"learning_rate": 3.6000000000000003e-06,
"loss": 2.0697,
"step": 18
},
{
"epoch": 0.034926470588235295,
"grad_norm": 0.0,
"learning_rate": 3.8000000000000005e-06,
"loss": 1.9225,
"step": 19
},
{
"epoch": 0.03676470588235294,
"grad_norm": 0.0,
"learning_rate": 4.000000000000001e-06,
"loss": 1.8257,
"step": 20
},
{
"epoch": 0.03860294117647059,
"grad_norm": 0.0,
"learning_rate": 4.2000000000000004e-06,
"loss": 1.8509,
"step": 21
},
{
"epoch": 0.04044117647058824,
"grad_norm": 0.0,
"learning_rate": 4.4e-06,
"loss": 1.8072,
"step": 22
},
{
"epoch": 0.042279411764705885,
"grad_norm": 0.0,
"learning_rate": 4.600000000000001e-06,
"loss": 1.7604,
"step": 23
},
{
"epoch": 0.04411764705882353,
"grad_norm": 0.0,
"learning_rate": 4.800000000000001e-06,
"loss": 1.7735,
"step": 24
},
{
"epoch": 0.04595588235294118,
"grad_norm": 0.0,
"learning_rate": 5e-06,
"loss": 1.8244,
"step": 25
},
{
"epoch": 0.04779411764705882,
"grad_norm": 0.0,
"learning_rate": 4.999989082004443e-06,
"loss": 1.9756,
"step": 26
},
{
"epoch": 0.04963235294117647,
"grad_norm": 0.0,
"learning_rate": 4.999956328113134e-06,
"loss": 1.6347,
"step": 27
},
{
"epoch": 0.051470588235294115,
"grad_norm": 0.0,
"learning_rate": 4.999901738612159e-06,
"loss": 1.817,
"step": 28
},
{
"epoch": 0.05330882352941176,
"grad_norm": 0.0,
"learning_rate": 4.999825313978322e-06,
"loss": 1.6143,
"step": 29
},
{
"epoch": 0.05514705882352941,
"grad_norm": 0.0,
"learning_rate": 4.999727054879149e-06,
"loss": 1.7087,
"step": 30
},
{
"epoch": 0.05698529411764706,
"grad_norm": 0.0,
"learning_rate": 4.999606962172872e-06,
"loss": 1.9148,
"step": 31
},
{
"epoch": 0.058823529411764705,
"grad_norm": 0.0,
"learning_rate": 4.999465036908429e-06,
"loss": 1.7264,
"step": 32
},
{
"epoch": 0.06066176470588235,
"grad_norm": 0.0,
"learning_rate": 4.999301280325452e-06,
"loss": 1.5771,
"step": 33
},
{
"epoch": 0.0625,
"grad_norm": 0.0,
"learning_rate": 4.999115693854255e-06,
"loss": 1.5012,
"step": 34
},
{
"epoch": 0.06433823529411764,
"grad_norm": 0.0,
"learning_rate": 4.998908279115825e-06,
"loss": 1.8459,
"step": 35
},
{
"epoch": 0.0661764705882353,
"grad_norm": 0.0,
"learning_rate": 4.998679037921803e-06,
"loss": 1.7001,
"step": 36
},
{
"epoch": 0.06801470588235294,
"grad_norm": 0.0,
"learning_rate": 4.998427972274473e-06,
"loss": 1.719,
"step": 37
},
{
"epoch": 0.06985294117647059,
"grad_norm": 0.0,
"learning_rate": 4.998155084366744e-06,
"loss": 1.9945,
"step": 38
},
{
"epoch": 0.07169117647058823,
"grad_norm": 0.0,
"learning_rate": 4.997860376582123e-06,
"loss": 1.8024,
"step": 39
},
{
"epoch": 0.07352941176470588,
"grad_norm": 0.0,
"learning_rate": 4.997543851494709e-06,
"loss": 1.7099,
"step": 40
},
{
"epoch": 0.07536764705882353,
"grad_norm": 0.0,
"learning_rate": 4.9972055118691545e-06,
"loss": 1.5121,
"step": 41
},
{
"epoch": 0.07720588235294118,
"grad_norm": 0.0,
"learning_rate": 4.996845360660652e-06,
"loss": 1.7949,
"step": 42
},
{
"epoch": 0.07904411764705882,
"grad_norm": 0.0,
"learning_rate": 4.996463401014908e-06,
"loss": 1.6773,
"step": 43
},
{
"epoch": 0.08088235294117647,
"grad_norm": 0.0,
"learning_rate": 4.9960596362681054e-06,
"loss": 1.7022,
"step": 44
},
{
"epoch": 0.08272058823529412,
"grad_norm": 0.0,
"learning_rate": 4.9956340699468896e-06,
"loss": 1.7003,
"step": 45
},
{
"epoch": 0.08455882352941177,
"grad_norm": 0.0,
"learning_rate": 4.995186705768322e-06,
"loss": 1.5486,
"step": 46
},
{
"epoch": 0.08639705882352941,
"grad_norm": 0.0,
"learning_rate": 4.9947175476398606e-06,
"loss": 1.7186,
"step": 47
},
{
"epoch": 0.08823529411764706,
"grad_norm": 0.0,
"learning_rate": 4.994226599659319e-06,
"loss": 1.6925,
"step": 48
},
{
"epoch": 0.0900735294117647,
"grad_norm": 0.0,
"learning_rate": 4.993713866114829e-06,
"loss": 1.9749,
"step": 49
},
{
"epoch": 0.09191176470588236,
"grad_norm": 0.0,
"learning_rate": 4.993179351484811e-06,
"loss": 2.0403,
"step": 50
},
{
"epoch": 0.09375,
"grad_norm": 0.0,
"learning_rate": 4.9926230604379275e-06,
"loss": 1.8667,
"step": 51
},
{
"epoch": 0.09558823529411764,
"grad_norm": 0.0,
"learning_rate": 4.992044997833044e-06,
"loss": 1.8817,
"step": 52
},
{
"epoch": 0.0974264705882353,
"grad_norm": 0.0,
"learning_rate": 4.991445168719189e-06,
"loss": 1.909,
"step": 53
},
{
"epoch": 0.09926470588235294,
"grad_norm": 0.0,
"learning_rate": 4.9908235783355075e-06,
"loss": 1.6816,
"step": 54
},
{
"epoch": 0.10110294117647059,
"grad_norm": 0.0,
"learning_rate": 4.990180232111217e-06,
"loss": 1.627,
"step": 55
},
{
"epoch": 0.10294117647058823,
"grad_norm": 0.0,
"learning_rate": 4.989515135665558e-06,
"loss": 1.7085,
"step": 56
},
{
"epoch": 0.10477941176470588,
"grad_norm": 0.0,
"learning_rate": 4.988828294807746e-06,
"loss": 1.884,
"step": 57
},
{
"epoch": 0.10661764705882353,
"grad_norm": 0.0,
"learning_rate": 4.988119715536922e-06,
"loss": 1.7246,
"step": 58
},
{
"epoch": 0.10845588235294118,
"grad_norm": 0.0,
"learning_rate": 4.987389404042098e-06,
"loss": 1.7751,
"step": 59
},
{
"epoch": 0.11029411764705882,
"grad_norm": 0.0,
"learning_rate": 4.986637366702105e-06,
"loss": 1.7463,
"step": 60
},
{
"epoch": 0.11213235294117647,
"grad_norm": 0.0,
"learning_rate": 4.985863610085534e-06,
"loss": 1.695,
"step": 61
},
{
"epoch": 0.11397058823529412,
"grad_norm": 0.0,
"learning_rate": 4.985068140950683e-06,
"loss": 1.8484,
"step": 62
},
{
"epoch": 0.11580882352941177,
"grad_norm": 0.0,
"learning_rate": 4.984250966245495e-06,
"loss": 1.9041,
"step": 63
},
{
"epoch": 0.11764705882352941,
"grad_norm": 0.0,
"learning_rate": 4.983412093107496e-06,
"loss": 1.9226,
"step": 64
},
{
"epoch": 0.11948529411764706,
"grad_norm": 0.0,
"learning_rate": 4.982551528863738e-06,
"loss": 1.8654,
"step": 65
},
{
"epoch": 0.1213235294117647,
"grad_norm": 0.0,
"learning_rate": 4.981669281030731e-06,
"loss": 1.7118,
"step": 66
},
{
"epoch": 0.12316176470588236,
"grad_norm": 0.0,
"learning_rate": 4.980765357314376e-06,
"loss": 1.7896,
"step": 67
},
{
"epoch": 0.125,
"grad_norm": 0.0,
"learning_rate": 4.9798397656099005e-06,
"loss": 1.5778,
"step": 68
},
{
"epoch": 0.12683823529411764,
"grad_norm": 0.0,
"learning_rate": 4.978892514001792e-06,
"loss": 1.9112,
"step": 69
},
{
"epoch": 0.12867647058823528,
"grad_norm": 0.0,
"learning_rate": 4.977923610763719e-06,
"loss": 1.8526,
"step": 70
},
{
"epoch": 0.13051470588235295,
"grad_norm": 0.0,
"learning_rate": 4.976933064358467e-06,
"loss": 1.6893,
"step": 71
},
{
"epoch": 0.1323529411764706,
"grad_norm": 0.0,
"learning_rate": 4.975920883437862e-06,
"loss": 1.8083,
"step": 72
},
{
"epoch": 0.13419117647058823,
"grad_norm": 0.0,
"learning_rate": 4.974887076842694e-06,
"loss": 1.9307,
"step": 73
},
{
"epoch": 0.13602941176470587,
"grad_norm": 0.0,
"learning_rate": 4.973831653602637e-06,
"loss": 1.828,
"step": 74
},
{
"epoch": 0.13786764705882354,
"grad_norm": 0.0,
"learning_rate": 4.972754622936178e-06,
"loss": 1.7038,
"step": 75
},
{
"epoch": 0.13970588235294118,
"grad_norm": 0.0,
"learning_rate": 4.971655994250529e-06,
"loss": 1.6064,
"step": 76
},
{
"epoch": 0.14154411764705882,
"grad_norm": 0.0,
"learning_rate": 4.97053577714155e-06,
"loss": 1.7252,
"step": 77
},
{
"epoch": 0.14338235294117646,
"grad_norm": 0.0,
"learning_rate": 4.96939398139366e-06,
"loss": 1.979,
"step": 78
},
{
"epoch": 0.14522058823529413,
"grad_norm": 0.0,
"learning_rate": 4.968230616979755e-06,
"loss": 1.9525,
"step": 79
},
{
"epoch": 0.14705882352941177,
"grad_norm": 0.0,
"learning_rate": 4.967045694061122e-06,
"loss": 1.912,
"step": 80
},
{
"epoch": 0.1488970588235294,
"grad_norm": 0.0,
"learning_rate": 4.965839222987348e-06,
"loss": 1.6298,
"step": 81
},
{
"epoch": 0.15073529411764705,
"grad_norm": 0.0,
"learning_rate": 4.9646112142962295e-06,
"loss": 1.6537,
"step": 82
},
{
"epoch": 0.15257352941176472,
"grad_norm": 0.0,
"learning_rate": 4.96336167871368e-06,
"loss": 1.9089,
"step": 83
},
{
"epoch": 0.15441176470588236,
"grad_norm": 0.0,
"learning_rate": 4.96209062715364e-06,
"loss": 1.8703,
"step": 84
},
{
"epoch": 0.15625,
"grad_norm": 0.0,
"learning_rate": 4.960798070717977e-06,
"loss": 1.9559,
"step": 85
},
{
"epoch": 0.15808823529411764,
"grad_norm": 0.0,
"learning_rate": 4.959484020696392e-06,
"loss": 1.8444,
"step": 86
},
{
"epoch": 0.15992647058823528,
"grad_norm": 0.0,
"learning_rate": 4.9581484885663175e-06,
"loss": 1.6396,
"step": 87
},
{
"epoch": 0.16176470588235295,
"grad_norm": 0.0,
"learning_rate": 4.956791485992823e-06,
"loss": 1.9488,
"step": 88
},
{
"epoch": 0.1636029411764706,
"grad_norm": 0.0,
"learning_rate": 4.955413024828504e-06,
"loss": 1.8038,
"step": 89
},
{
"epoch": 0.16544117647058823,
"grad_norm": 0.0,
"learning_rate": 4.9540131171133884e-06,
"loss": 1.7477,
"step": 90
},
{
"epoch": 0.16727941176470587,
"grad_norm": 0.0,
"learning_rate": 4.952591775074825e-06,
"loss": 1.7757,
"step": 91
},
{
"epoch": 0.16911764705882354,
"grad_norm": 0.0,
"learning_rate": 4.951149011127379e-06,
"loss": 1.7452,
"step": 92
},
{
"epoch": 0.17095588235294118,
"grad_norm": 0.0,
"learning_rate": 4.949684837872723e-06,
"loss": 1.6137,
"step": 93
},
{
"epoch": 0.17279411764705882,
"grad_norm": 0.0,
"learning_rate": 4.948199268099525e-06,
"loss": 1.8074,
"step": 94
},
{
"epoch": 0.17463235294117646,
"grad_norm": 0.0,
"learning_rate": 4.946692314783342e-06,
"loss": 1.7006,
"step": 95
},
{
"epoch": 0.17647058823529413,
"grad_norm": 0.0,
"learning_rate": 4.9451639910865016e-06,
"loss": 1.6746,
"step": 96
},
{
"epoch": 0.17830882352941177,
"grad_norm": 0.0,
"learning_rate": 4.943614310357987e-06,
"loss": 1.5338,
"step": 97
},
{
"epoch": 0.1801470588235294,
"grad_norm": 0.0,
"learning_rate": 4.942043286133326e-06,
"loss": 1.639,
"step": 98
},
{
"epoch": 0.18198529411764705,
"grad_norm": 0.0,
"learning_rate": 4.940450932134467e-06,
"loss": 1.8445,
"step": 99
},
{
"epoch": 0.18382352941176472,
"grad_norm": 0.0,
"learning_rate": 4.9388372622696605e-06,
"loss": 1.6577,
"step": 100
},
{
"epoch": 0.18566176470588236,
"grad_norm": 0.0,
"learning_rate": 4.937202290633337e-06,
"loss": 1.9322,
"step": 101
},
{
"epoch": 0.1875,
"grad_norm": 0.0,
"learning_rate": 4.935546031505991e-06,
"loss": 1.8592,
"step": 102
},
{
"epoch": 0.18933823529411764,
"grad_norm": 0.0,
"learning_rate": 4.933868499354043e-06,
"loss": 1.8238,
"step": 103
},
{
"epoch": 0.19117647058823528,
"grad_norm": 0.0,
"learning_rate": 4.932169708829725e-06,
"loss": 1.8892,
"step": 104
},
{
"epoch": 0.19301470588235295,
"grad_norm": 0.0,
"learning_rate": 4.930449674770947e-06,
"loss": 1.6668,
"step": 105
},
{
"epoch": 0.1948529411764706,
"grad_norm": 0.0,
"learning_rate": 4.928708412201169e-06,
"loss": 2.0112,
"step": 106
},
{
"epoch": 0.19669117647058823,
"grad_norm": 0.0,
"learning_rate": 4.926945936329266e-06,
"loss": 1.8705,
"step": 107
},
{
"epoch": 0.19852941176470587,
"grad_norm": 0.0,
"learning_rate": 4.925162262549405e-06,
"loss": 1.8025,
"step": 108
},
{
"epoch": 0.20036764705882354,
"grad_norm": 0.0,
"learning_rate": 4.923357406440896e-06,
"loss": 1.9824,
"step": 109
},
{
"epoch": 0.20220588235294118,
"grad_norm": 0.0,
"learning_rate": 4.921531383768071e-06,
"loss": 1.6375,
"step": 110
},
{
"epoch": 0.20404411764705882,
"grad_norm": 0.0,
"learning_rate": 4.919684210480134e-06,
"loss": 1.9491,
"step": 111
},
{
"epoch": 0.20588235294117646,
"grad_norm": 0.0,
"learning_rate": 4.917815902711029e-06,
"loss": 2.0238,
"step": 112
},
{
"epoch": 0.20772058823529413,
"grad_norm": 0.0,
"learning_rate": 4.915926476779297e-06,
"loss": 1.8125,
"step": 113
},
{
"epoch": 0.20955882352941177,
"grad_norm": 0.0,
"learning_rate": 4.914015949187934e-06,
"loss": 1.7428,
"step": 114
},
{
"epoch": 0.2113970588235294,
"grad_norm": 0.0,
"learning_rate": 4.912084336624243e-06,
"loss": 1.6849,
"step": 115
},
{
"epoch": 0.21323529411764705,
"grad_norm": 0.0,
"learning_rate": 4.910131655959697e-06,
"loss": 1.8793,
"step": 116
},
{
"epoch": 0.21507352941176472,
"grad_norm": 0.0,
"learning_rate": 4.908157924249781e-06,
"loss": 1.9526,
"step": 117
},
{
"epoch": 0.21691176470588236,
"grad_norm": 0.0,
"learning_rate": 4.906163158733851e-06,
"loss": 1.7916,
"step": 118
},
{
"epoch": 0.21875,
"grad_norm": 0.0,
"learning_rate": 4.904147376834979e-06,
"loss": 1.8987,
"step": 119
},
{
"epoch": 0.22058823529411764,
"grad_norm": 0.0,
"learning_rate": 4.9021105961598046e-06,
"loss": 1.877,
"step": 120
},
{
"epoch": 0.22242647058823528,
"grad_norm": 0.0,
"learning_rate": 4.900052834498377e-06,
"loss": 1.7896,
"step": 121
},
{
"epoch": 0.22426470588235295,
"grad_norm": 0.0,
"learning_rate": 4.897974109824002e-06,
"loss": 1.8914,
"step": 122
},
{
"epoch": 0.2261029411764706,
"grad_norm": 0.0,
"learning_rate": 4.895874440293085e-06,
"loss": 1.7302,
"step": 123
},
{
"epoch": 0.22794117647058823,
"grad_norm": 0.0,
"learning_rate": 4.8937538442449724e-06,
"loss": 1.807,
"step": 124
},
{
"epoch": 0.22977941176470587,
"grad_norm": 0.0,
"learning_rate": 4.891612340201791e-06,
"loss": 1.6097,
"step": 125
},
{
"epoch": 0.23161764705882354,
"grad_norm": 0.0,
"learning_rate": 4.8894499468682865e-06,
"loss": 1.9383,
"step": 126
},
{
"epoch": 0.23345588235294118,
"grad_norm": 0.0,
"learning_rate": 4.887266683131659e-06,
"loss": 1.6959,
"step": 127
},
{
"epoch": 0.23529411764705882,
"grad_norm": 0.0,
"learning_rate": 4.885062568061399e-06,
"loss": 1.9403,
"step": 128
},
{
"epoch": 0.23713235294117646,
"grad_norm": 0.0,
"learning_rate": 4.882837620909121e-06,
"loss": 1.6888,
"step": 129
},
{
"epoch": 0.23897058823529413,
"grad_norm": 0.0,
"learning_rate": 4.880591861108397e-06,
"loss": 1.7798,
"step": 130
},
{
"epoch": 0.24080882352941177,
"grad_norm": 0.0,
"learning_rate": 4.878325308274583e-06,
"loss": 1.796,
"step": 131
},
{
"epoch": 0.2426470588235294,
"grad_norm": 0.0,
"learning_rate": 4.876037982204649e-06,
"loss": 1.6234,
"step": 132
},
{
"epoch": 0.24448529411764705,
"grad_norm": 0.0,
"learning_rate": 4.873729902877009e-06,
"loss": 1.8065,
"step": 133
},
{
"epoch": 0.24632352941176472,
"grad_norm": 0.0,
"learning_rate": 4.871401090451342e-06,
"loss": 1.9266,
"step": 134
},
{
"epoch": 0.24816176470588236,
"grad_norm": 0.0,
"learning_rate": 4.869051565268419e-06,
"loss": 1.772,
"step": 135
},
{
"epoch": 0.25,
"grad_norm": 0.0,
"learning_rate": 4.866681347849925e-06,
"loss": 1.5869,
"step": 136
},
{
"epoch": 0.25183823529411764,
"grad_norm": 0.0,
"learning_rate": 4.8642904588982785e-06,
"loss": 1.9012,
"step": 137
},
{
"epoch": 0.2536764705882353,
"grad_norm": 0.0,
"learning_rate": 4.861878919296451e-06,
"loss": 1.9242,
"step": 138
},
{
"epoch": 0.2555147058823529,
"grad_norm": 0.0,
"learning_rate": 4.859446750107786e-06,
"loss": 2.0885,
"step": 139
},
{
"epoch": 0.25735294117647056,
"grad_norm": 0.0,
"learning_rate": 4.856993972575813e-06,
"loss": 1.5305,
"step": 140
},
{
"epoch": 0.25919117647058826,
"grad_norm": 0.0,
"learning_rate": 4.854520608124063e-06,
"loss": 1.8923,
"step": 141
},
{
"epoch": 0.2610294117647059,
"grad_norm": 0.0,
"learning_rate": 4.8520266783558825e-06,
"loss": 1.8581,
"step": 142
},
{
"epoch": 0.26286764705882354,
"grad_norm": 0.0,
"learning_rate": 4.849512205054242e-06,
"loss": 1.6467,
"step": 143
},
{
"epoch": 0.2647058823529412,
"grad_norm": 0.0,
"learning_rate": 4.846977210181549e-06,
"loss": 1.8146,
"step": 144
},
{
"epoch": 0.2665441176470588,
"grad_norm": 0.0,
"learning_rate": 4.844421715879453e-06,
"loss": 1.555,
"step": 145
},
{
"epoch": 0.26838235294117646,
"grad_norm": 0.0,
"learning_rate": 4.841845744468655e-06,
"loss": 1.7029,
"step": 146
},
{
"epoch": 0.2702205882352941,
"grad_norm": 0.0,
"learning_rate": 4.83924931844871e-06,
"loss": 1.7241,
"step": 147
},
{
"epoch": 0.27205882352941174,
"grad_norm": 0.0,
"learning_rate": 4.836632460497832e-06,
"loss": 1.667,
"step": 148
},
{
"epoch": 0.27389705882352944,
"grad_norm": 0.0,
"learning_rate": 4.833995193472697e-06,
"loss": 1.5294,
"step": 149
},
{
"epoch": 0.2757352941176471,
"grad_norm": 0.0,
"learning_rate": 4.831337540408239e-06,
"loss": 1.7341,
"step": 150
},
{
"epoch": 0.2775735294117647,
"grad_norm": 0.0,
"learning_rate": 4.828659524517455e-06,
"loss": 1.7731,
"step": 151
},
{
"epoch": 0.27941176470588236,
"grad_norm": 0.0,
"learning_rate": 4.825961169191196e-06,
"loss": 1.891,
"step": 152
},
{
"epoch": 0.28125,
"grad_norm": 0.0,
"learning_rate": 4.8232424979979684e-06,
"loss": 1.5459,
"step": 153
},
{
"epoch": 0.28308823529411764,
"grad_norm": 0.0,
"learning_rate": 4.820503534683725e-06,
"loss": 1.7663,
"step": 154
},
{
"epoch": 0.2849264705882353,
"grad_norm": 0.0,
"learning_rate": 4.8177443031716545e-06,
"loss": 1.9843,
"step": 155
},
{
"epoch": 0.2867647058823529,
"grad_norm": 0.0,
"learning_rate": 4.814964827561981e-06,
"loss": 1.9345,
"step": 156
},
{
"epoch": 0.28860294117647056,
"grad_norm": 0.0,
"learning_rate": 4.812165132131746e-06,
"loss": 1.651,
"step": 157
},
{
"epoch": 0.29044117647058826,
"grad_norm": 0.0,
"learning_rate": 4.809345241334598e-06,
"loss": 1.7562,
"step": 158
},
{
"epoch": 0.2922794117647059,
"grad_norm": 0.0,
"learning_rate": 4.806505179800583e-06,
"loss": 1.7144,
"step": 159
},
{
"epoch": 0.29411764705882354,
"grad_norm": 0.0,
"learning_rate": 4.803644972335925e-06,
"loss": 1.8868,
"step": 160
},
{
"epoch": 0.2959558823529412,
"grad_norm": 0.0,
"learning_rate": 4.800764643922806e-06,
"loss": 1.7201,
"step": 161
},
{
"epoch": 0.2977941176470588,
"grad_norm": 0.0,
"learning_rate": 4.797864219719161e-06,
"loss": 2.0389,
"step": 162
},
{
"epoch": 0.29963235294117646,
"grad_norm": 0.0,
"learning_rate": 4.794943725058441e-06,
"loss": 1.7262,
"step": 163
},
{
"epoch": 0.3014705882352941,
"grad_norm": 0.0,
"learning_rate": 4.792003185449406e-06,
"loss": 1.8069,
"step": 164
},
{
"epoch": 0.30330882352941174,
"grad_norm": 0.0,
"learning_rate": 4.789042626575895e-06,
"loss": 1.8573,
"step": 165
},
{
"epoch": 0.30514705882352944,
"grad_norm": 0.0,
"learning_rate": 4.786062074296602e-06,
"loss": 1.7839,
"step": 166
},
{
"epoch": 0.3069852941176471,
"grad_norm": 0.0,
"learning_rate": 4.783061554644853e-06,
"loss": 1.8461,
"step": 167
},
{
"epoch": 0.3088235294117647,
"grad_norm": 0.0,
"learning_rate": 4.780041093828376e-06,
"loss": 1.7538,
"step": 168
},
{
"epoch": 0.31066176470588236,
"grad_norm": 0.0,
"learning_rate": 4.777000718229072e-06,
"loss": 1.6497,
"step": 169
},
{
"epoch": 0.3125,
"grad_norm": 0.0,
"learning_rate": 4.773940454402789e-06,
"loss": 1.6723,
"step": 170
},
{
"epoch": 0.31433823529411764,
"grad_norm": 0.0,
"learning_rate": 4.770860329079083e-06,
"loss": 1.8927,
"step": 171
},
{
"epoch": 0.3161764705882353,
"grad_norm": 0.0,
"learning_rate": 4.7677603691609905e-06,
"loss": 1.7236,
"step": 172
},
{
"epoch": 0.3180147058823529,
"grad_norm": 0.0,
"learning_rate": 4.7646406017247895e-06,
"loss": 1.971,
"step": 173
},
{
"epoch": 0.31985294117647056,
"grad_norm": 0.0,
"learning_rate": 4.761501054019766e-06,
"loss": 1.8082,
"step": 174
},
{
"epoch": 0.32169117647058826,
"grad_norm": 0.0,
"learning_rate": 4.758341753467975e-06,
"loss": 1.9078,
"step": 175
},
{
"epoch": 0.3235294117647059,
"grad_norm": 0.0,
"learning_rate": 4.755162727663998e-06,
"loss": 1.6387,
"step": 176
},
{
"epoch": 0.32536764705882354,
"grad_norm": 0.0,
"learning_rate": 4.751964004374709e-06,
"loss": 2.0215,
"step": 177
},
{
"epoch": 0.3272058823529412,
"grad_norm": 0.0,
"learning_rate": 4.748745611539024e-06,
"loss": 1.8042,
"step": 178
},
{
"epoch": 0.3290441176470588,
"grad_norm": 0.0,
"learning_rate": 4.745507577267663e-06,
"loss": 2.1742,
"step": 179
},
{
"epoch": 0.33088235294117646,
"grad_norm": 0.0,
"learning_rate": 4.7422499298429e-06,
"loss": 1.8744,
"step": 180
},
{
"epoch": 0.3327205882352941,
"grad_norm": 0.0,
"learning_rate": 4.738972697718319e-06,
"loss": 1.7443,
"step": 181
},
{
"epoch": 0.33455882352941174,
"grad_norm": 0.0,
"learning_rate": 4.735675909518565e-06,
"loss": 1.9355,
"step": 182
},
{
"epoch": 0.33639705882352944,
"grad_norm": 0.0,
"learning_rate": 4.732359594039094e-06,
"loss": 1.5572,
"step": 183
},
{
"epoch": 0.3382352941176471,
"grad_norm": 0.0,
"learning_rate": 4.729023780245919e-06,
"loss": 1.6816,
"step": 184
},
{
"epoch": 0.3400735294117647,
"grad_norm": 0.0,
"learning_rate": 4.725668497275361e-06,
"loss": 1.8776,
"step": 185
},
{
"epoch": 0.34191176470588236,
"grad_norm": 0.0,
"learning_rate": 4.72229377443379e-06,
"loss": 1.7296,
"step": 186
},
{
"epoch": 0.34375,
"grad_norm": 0.0,
"learning_rate": 4.718899641197375e-06,
"loss": 1.8298,
"step": 187
},
{
"epoch": 0.34558823529411764,
"grad_norm": 0.0,
"learning_rate": 4.71548612721182e-06,
"loss": 1.7406,
"step": 188
},
{
"epoch": 0.3474264705882353,
"grad_norm": 0.0,
"learning_rate": 4.712053262292111e-06,
"loss": 1.953,
"step": 189
},
{
"epoch": 0.3492647058823529,
"grad_norm": 0.0,
"learning_rate": 4.70860107642225e-06,
"loss": 1.8692,
"step": 190
},
{
"epoch": 0.35110294117647056,
"grad_norm": 0.0,
"learning_rate": 4.7051295997549964e-06,
"loss": 1.8754,
"step": 191
},
{
"epoch": 0.35294117647058826,
"grad_norm": 0.0,
"learning_rate": 4.701638862611605e-06,
"loss": 1.8684,
"step": 192
},
{
"epoch": 0.3547794117647059,
"grad_norm": 0.0,
"learning_rate": 4.698128895481557e-06,
"loss": 1.5358,
"step": 193
},
{
"epoch": 0.35661764705882354,
"grad_norm": 0.0,
"learning_rate": 4.694599729022297e-06,
"loss": 1.56,
"step": 194
},
{
"epoch": 0.3584558823529412,
"grad_norm": 0.0,
"learning_rate": 4.691051394058965e-06,
"loss": 1.7223,
"step": 195
},
{
"epoch": 0.3602941176470588,
"grad_norm": 0.0,
"learning_rate": 4.687483921584124e-06,
"loss": 1.6848,
"step": 196
},
{
"epoch": 0.36213235294117646,
"grad_norm": 0.0,
"learning_rate": 4.683897342757493e-06,
"loss": 1.654,
"step": 197
},
{
"epoch": 0.3639705882352941,
"grad_norm": 0.0,
"learning_rate": 4.680291688905674e-06,
"loss": 1.6973,
"step": 198
},
{
"epoch": 0.36580882352941174,
"grad_norm": 0.0,
"learning_rate": 4.676666991521876e-06,
"loss": 1.6474,
"step": 199
},
{
"epoch": 0.36764705882352944,
"grad_norm": 0.0,
"learning_rate": 4.673023282265645e-06,
"loss": 1.5936,
"step": 200
},
{
"epoch": 0.3694852941176471,
"grad_norm": 0.0,
"learning_rate": 4.669360592962581e-06,
"loss": 1.6647,
"step": 201
},
{
"epoch": 0.3713235294117647,
"grad_norm": 0.0,
"learning_rate": 4.665678955604064e-06,
"loss": 1.9738,
"step": 202
},
{
"epoch": 0.37316176470588236,
"grad_norm": 0.0,
"learning_rate": 4.661978402346974e-06,
"loss": 1.7933,
"step": 203
},
{
"epoch": 0.375,
"grad_norm": 0.0,
"learning_rate": 4.658258965513412e-06,
"loss": 1.9133,
"step": 204
},
{
"epoch": 0.37683823529411764,
"grad_norm": 0.0,
"learning_rate": 4.654520677590412e-06,
"loss": 1.8377,
"step": 205
},
{
"epoch": 0.3786764705882353,
"grad_norm": 0.0,
"learning_rate": 4.650763571229664e-06,
"loss": 1.79,
"step": 206
},
{
"epoch": 0.3805147058823529,
"grad_norm": 0.0,
"learning_rate": 4.646987679247223e-06,
"loss": 1.5877,
"step": 207
},
{
"epoch": 0.38235294117647056,
"grad_norm": 0.0,
"learning_rate": 4.643193034623229e-06,
"loss": 1.7125,
"step": 208
},
{
"epoch": 0.38419117647058826,
"grad_norm": 0.0,
"learning_rate": 4.6393796705016105e-06,
"loss": 1.7207,
"step": 209
},
{
"epoch": 0.3860294117647059,
"grad_norm": 0.0,
"learning_rate": 4.635547620189802e-06,
"loss": 1.6849,
"step": 210
},
{
"epoch": 0.38786764705882354,
"grad_norm": 0.0,
"learning_rate": 4.631696917158449e-06,
"loss": 1.716,
"step": 211
},
{
"epoch": 0.3897058823529412,
"grad_norm": 0.0,
"learning_rate": 4.62782759504112e-06,
"loss": 1.7206,
"step": 212
},
{
"epoch": 0.3915441176470588,
"grad_norm": 0.0,
"learning_rate": 4.623939687634009e-06,
"loss": 1.4938,
"step": 213
},
{
"epoch": 0.39338235294117646,
"grad_norm": 0.0,
"learning_rate": 4.620033228895639e-06,
"loss": 1.9391,
"step": 214
},
{
"epoch": 0.3952205882352941,
"grad_norm": 0.0,
"learning_rate": 4.616108252946568e-06,
"loss": 1.688,
"step": 215
},
{
"epoch": 0.39705882352941174,
"grad_norm": 0.0,
"learning_rate": 4.612164794069096e-06,
"loss": 1.9585,
"step": 216
},
{
"epoch": 0.39889705882352944,
"grad_norm": 0.0,
"learning_rate": 4.608202886706953e-06,
"loss": 1.6469,
"step": 217
},
{
"epoch": 0.4007352941176471,
"grad_norm": 0.0,
"learning_rate": 4.6042225654650096e-06,
"loss": 1.8181,
"step": 218
},
{
"epoch": 0.4025735294117647,
"grad_norm": 0.0,
"learning_rate": 4.60022386510897e-06,
"loss": 1.8259,
"step": 219
},
{
"epoch": 0.40441176470588236,
"grad_norm": 0.0,
"learning_rate": 4.5962068205650674e-06,
"loss": 1.8962,
"step": 220
},
{
"epoch": 0.40625,
"grad_norm": 0.0,
"learning_rate": 4.592171466919762e-06,
"loss": 1.868,
"step": 221
},
{
"epoch": 0.40808823529411764,
"grad_norm": 0.0,
"learning_rate": 4.588117839419432e-06,
"loss": 1.7946,
"step": 222
},
{
"epoch": 0.4099264705882353,
"grad_norm": 0.0,
"learning_rate": 4.584045973470067e-06,
"loss": 1.6068,
"step": 223
},
{
"epoch": 0.4117647058823529,
"grad_norm": 0.0,
"learning_rate": 4.579955904636959e-06,
"loss": 1.8194,
"step": 224
},
{
"epoch": 0.41360294117647056,
"grad_norm": 0.0,
"learning_rate": 4.5758476686443905e-06,
"loss": 1.8958,
"step": 225
},
{
"epoch": 0.41544117647058826,
"grad_norm": 0.0,
"learning_rate": 4.571721301375323e-06,
"loss": 1.5318,
"step": 226
},
{
"epoch": 0.4172794117647059,
"grad_norm": 0.0,
"learning_rate": 4.5675768388710855e-06,
"loss": 1.6046,
"step": 227
},
{
"epoch": 0.41911764705882354,
"grad_norm": 0.0,
"learning_rate": 4.563414317331053e-06,
"loss": 1.6724,
"step": 228
},
{
"epoch": 0.4209558823529412,
"grad_norm": 0.0,
"learning_rate": 4.559233773112343e-06,
"loss": 1.8096,
"step": 229
},
{
"epoch": 0.4227941176470588,
"grad_norm": 0.0,
"learning_rate": 4.5550352427294836e-06,
"loss": 1.4821,
"step": 230
},
{
"epoch": 0.42463235294117646,
"grad_norm": 0.0,
"learning_rate": 4.550818762854105e-06,
"loss": 1.6695,
"step": 231
},
{
"epoch": 0.4264705882352941,
"grad_norm": 0.0,
"learning_rate": 4.546584370314613e-06,
"loss": 1.6973,
"step": 232
},
{
"epoch": 0.42830882352941174,
"grad_norm": 0.0,
"learning_rate": 4.542332102095871e-06,
"loss": 1.8328,
"step": 233
},
{
"epoch": 0.43014705882352944,
"grad_norm": 0.0,
"learning_rate": 4.538061995338875e-06,
"loss": 1.6589,
"step": 234
},
{
"epoch": 0.4319852941176471,
"grad_norm": 0.0,
"learning_rate": 4.533774087340431e-06,
"loss": 1.7145,
"step": 235
},
{
"epoch": 0.4338235294117647,
"grad_norm": 0.0,
"learning_rate": 4.529468415552829e-06,
"loss": 1.4717,
"step": 236
},
{
"epoch": 0.43566176470588236,
"grad_norm": 0.0,
"learning_rate": 4.52514501758351e-06,
"loss": 1.7362,
"step": 237
},
{
"epoch": 0.4375,
"grad_norm": 0.0,
"learning_rate": 4.520803931194747e-06,
"loss": 1.8571,
"step": 238
},
{
"epoch": 0.43933823529411764,
"grad_norm": 0.0,
"learning_rate": 4.5164451943033105e-06,
"loss": 1.9605,
"step": 239
},
{
"epoch": 0.4411764705882353,
"grad_norm": 0.0,
"learning_rate": 4.512068844980136e-06,
"loss": 1.9368,
"step": 240
},
{
"epoch": 0.4430147058823529,
"grad_norm": 0.0,
"learning_rate": 4.507674921449994e-06,
"loss": 1.45,
"step": 241
},
{
"epoch": 0.44485294117647056,
"grad_norm": 0.0,
"learning_rate": 4.503263462091153e-06,
"loss": 1.6417,
"step": 242
},
{
"epoch": 0.44669117647058826,
"grad_norm": 0.0,
"learning_rate": 4.49883450543505e-06,
"loss": 1.7531,
"step": 243
},
{
"epoch": 0.4485294117647059,
"grad_norm": 0.0,
"learning_rate": 4.494388090165947e-06,
"loss": 1.826,
"step": 244
},
{
"epoch": 0.45036764705882354,
"grad_norm": 0.0,
"learning_rate": 4.489924255120597e-06,
"loss": 1.5047,
"step": 245
},
{
"epoch": 0.4522058823529412,
"grad_norm": 0.0,
"learning_rate": 4.485443039287907e-06,
"loss": 1.7405,
"step": 246
},
{
"epoch": 0.4540441176470588,
"grad_norm": 0.0,
"learning_rate": 4.48094448180859e-06,
"loss": 1.7201,
"step": 247
},
{
"epoch": 0.45588235294117646,
"grad_norm": 0.0,
"learning_rate": 4.476428621974833e-06,
"loss": 1.9913,
"step": 248
},
{
"epoch": 0.4577205882352941,
"grad_norm": 0.0,
"learning_rate": 4.471895499229946e-06,
"loss": 1.5852,
"step": 249
},
{
"epoch": 0.45955882352941174,
"grad_norm": 0.0,
"learning_rate": 4.467345153168018e-06,
"loss": 1.5358,
"step": 250
},
{
"epoch": 0.46139705882352944,
"grad_norm": 0.0,
"learning_rate": 4.462777623533577e-06,
"loss": 1.7271,
"step": 251
},
{
"epoch": 0.4632352941176471,
"grad_norm": 0.0,
"learning_rate": 4.458192950221237e-06,
"loss": 1.5025,
"step": 252
},
{
"epoch": 0.4650735294117647,
"grad_norm": 0.0,
"learning_rate": 4.4535911732753535e-06,
"loss": 1.9186,
"step": 253
},
{
"epoch": 0.46691176470588236,
"grad_norm": 0.0,
"learning_rate": 4.448972332889669e-06,
"loss": 1.8936,
"step": 254
},
{
"epoch": 0.46875,
"grad_norm": 0.0,
"learning_rate": 4.444336469406968e-06,
"loss": 1.699,
"step": 255
},
{
"epoch": 0.47058823529411764,
"grad_norm": 0.0,
"learning_rate": 4.4396836233187195e-06,
"loss": 1.9617,
"step": 256
},
{
"epoch": 0.4724264705882353,
"grad_norm": 0.0,
"learning_rate": 4.435013835264725e-06,
"loss": 1.9323,
"step": 257
},
{
"epoch": 0.4742647058823529,
"grad_norm": 0.0,
"learning_rate": 4.4303271460327655e-06,
"loss": 1.6515,
"step": 258
},
{
"epoch": 0.47610294117647056,
"grad_norm": 0.0,
"learning_rate": 4.425623596558243e-06,
"loss": 1.6436,
"step": 259
},
{
"epoch": 0.47794117647058826,
"grad_norm": 0.0,
"learning_rate": 4.420903227923823e-06,
"loss": 1.9221,
"step": 260
},
{
"epoch": 0.4797794117647059,
"grad_norm": 0.0,
"learning_rate": 4.416166081359077e-06,
"loss": 1.9025,
"step": 261
},
{
"epoch": 0.48161764705882354,
"grad_norm": 0.0,
"learning_rate": 4.411412198240119e-06,
"loss": 1.866,
"step": 262
},
{
"epoch": 0.4834558823529412,
"grad_norm": 0.0,
"learning_rate": 4.406641620089252e-06,
"loss": 1.6989,
"step": 263
},
{
"epoch": 0.4852941176470588,
"grad_norm": 0.0,
"learning_rate": 4.401854388574595e-06,
"loss": 1.7039,
"step": 264
},
{
"epoch": 0.48713235294117646,
"grad_norm": 0.0,
"learning_rate": 4.397050545509726e-06,
"loss": 1.9074,
"step": 265
},
{
"epoch": 0.4889705882352941,
"grad_norm": 0.0,
"learning_rate": 4.392230132853316e-06,
"loss": 1.5768,
"step": 266
},
{
"epoch": 0.49080882352941174,
"grad_norm": 0.0,
"learning_rate": 4.387393192708758e-06,
"loss": 1.6722,
"step": 267
},
{
"epoch": 0.49264705882352944,
"grad_norm": 0.0,
"learning_rate": 4.382539767323805e-06,
"loss": 1.8256,
"step": 268
},
{
"epoch": 0.4944852941176471,
"grad_norm": 0.0,
"learning_rate": 4.377669899090202e-06,
"loss": 1.6908,
"step": 269
},
{
"epoch": 0.4963235294117647,
"grad_norm": 0.0,
"learning_rate": 4.372783630543305e-06,
"loss": 1.6795,
"step": 270
},
{
"epoch": 0.49816176470588236,
"grad_norm": 0.0,
"learning_rate": 4.3678810043617215e-06,
"loss": 1.7813,
"step": 271
},
{
"epoch": 0.5,
"grad_norm": 0.0,
"learning_rate": 4.362962063366933e-06,
"loss": 1.6982,
"step": 272
},
{
"epoch": 0.5018382352941176,
"grad_norm": 0.0,
"learning_rate": 4.358026850522919e-06,
"loss": 2.0328,
"step": 273
},
{
"epoch": 0.5036764705882353,
"grad_norm": 0.0,
"learning_rate": 4.353075408935787e-06,
"loss": 1.7353,
"step": 274
},
{
"epoch": 0.5055147058823529,
"grad_norm": 0.0,
"learning_rate": 4.348107781853389e-06,
"loss": 1.7333,
"step": 275
},
{
"epoch": 0.5073529411764706,
"grad_norm": 0.0,
"learning_rate": 4.34312401266495e-06,
"loss": 1.914,
"step": 276
},
{
"epoch": 0.5091911764705882,
"grad_norm": 0.0,
"learning_rate": 4.338124144900685e-06,
"loss": 1.6887,
"step": 277
},
{
"epoch": 0.5110294117647058,
"grad_norm": 0.0,
"learning_rate": 4.333108222231423e-06,
"loss": 1.6988,
"step": 278
},
{
"epoch": 0.5128676470588235,
"grad_norm": 0.0,
"learning_rate": 4.32807628846822e-06,
"loss": 1.5326,
"step": 279
},
{
"epoch": 0.5147058823529411,
"grad_norm": 0.0,
"learning_rate": 4.3230283875619815e-06,
"loss": 1.9346,
"step": 280
},
{
"epoch": 0.5165441176470589,
"grad_norm": 0.0,
"learning_rate": 4.317964563603073e-06,
"loss": 1.6371,
"step": 281
},
{
"epoch": 0.5183823529411765,
"grad_norm": 0.0,
"learning_rate": 4.312884860820942e-06,
"loss": 2.1047,
"step": 282
},
{
"epoch": 0.5202205882352942,
"grad_norm": 0.0,
"learning_rate": 4.307789323583727e-06,
"loss": 1.5355,
"step": 283
},
{
"epoch": 0.5220588235294118,
"grad_norm": 0.0,
"learning_rate": 4.302677996397868e-06,
"loss": 1.5629,
"step": 284
},
{
"epoch": 0.5238970588235294,
"grad_norm": 0.0,
"learning_rate": 4.297550923907726e-06,
"loss": 1.9965,
"step": 285
},
{
"epoch": 0.5257352941176471,
"grad_norm": 0.0,
"learning_rate": 4.2924081508951824e-06,
"loss": 1.8088,
"step": 286
},
{
"epoch": 0.5275735294117647,
"grad_norm": 0.0,
"learning_rate": 4.287249722279257e-06,
"loss": 1.8258,
"step": 287
},
{
"epoch": 0.5294117647058824,
"grad_norm": 0.0,
"learning_rate": 4.28207568311571e-06,
"loss": 1.7162,
"step": 288
},
{
"epoch": 0.53125,
"grad_norm": 0.0,
"learning_rate": 4.27688607859665e-06,
"loss": 1.6649,
"step": 289
},
{
"epoch": 0.5330882352941176,
"grad_norm": 0.0,
"learning_rate": 4.27168095405014e-06,
"loss": 1.8048,
"step": 290
},
{
"epoch": 0.5349264705882353,
"grad_norm": 0.0,
"learning_rate": 4.266460354939803e-06,
"loss": 1.7108,
"step": 291
},
{
"epoch": 0.5367647058823529,
"grad_norm": 0.0,
"learning_rate": 4.26122432686442e-06,
"loss": 1.9229,
"step": 292
},
{
"epoch": 0.5386029411764706,
"grad_norm": 0.0,
"learning_rate": 4.255972915557537e-06,
"loss": 1.9534,
"step": 293
},
{
"epoch": 0.5404411764705882,
"grad_norm": 0.0,
"learning_rate": 4.250706166887061e-06,
"loss": 1.9423,
"step": 294
},
{
"epoch": 0.5422794117647058,
"grad_norm": 0.0,
"learning_rate": 4.245424126854864e-06,
"loss": 1.7946,
"step": 295
},
{
"epoch": 0.5441176470588235,
"grad_norm": 0.0,
"learning_rate": 4.240126841596377e-06,
"loss": 1.6288,
"step": 296
},
{
"epoch": 0.5459558823529411,
"grad_norm": 0.0,
"learning_rate": 4.234814357380189e-06,
"loss": 1.9226,
"step": 297
},
{
"epoch": 0.5477941176470589,
"grad_norm": 0.0,
"learning_rate": 4.229486720607645e-06,
"loss": 1.5787,
"step": 298
},
{
"epoch": 0.5496323529411765,
"grad_norm": 0.0,
"learning_rate": 4.224143977812435e-06,
"loss": 1.6782,
"step": 299
},
{
"epoch": 0.5514705882352942,
"grad_norm": 0.0,
"learning_rate": 4.218786175660194e-06,
"loss": 1.4888,
"step": 300
},
{
"epoch": 0.5533088235294118,
"grad_norm": 0.0,
"learning_rate": 4.213413360948089e-06,
"loss": 1.7777,
"step": 301
},
{
"epoch": 0.5551470588235294,
"grad_norm": 0.0,
"learning_rate": 4.208025580604413e-06,
"loss": 1.7884,
"step": 302
},
{
"epoch": 0.5569852941176471,
"grad_norm": 0.0,
"learning_rate": 4.202622881688178e-06,
"loss": 1.6578,
"step": 303
},
{
"epoch": 0.5588235294117647,
"grad_norm": 0.0,
"learning_rate": 4.197205311388698e-06,
"loss": 1.991,
"step": 304
},
{
"epoch": 0.5606617647058824,
"grad_norm": 0.0,
"learning_rate": 4.1917729170251765e-06,
"loss": 1.7002,
"step": 305
},
{
"epoch": 0.5625,
"grad_norm": 0.0,
"learning_rate": 4.186325746046302e-06,
"loss": 1.9256,
"step": 306
},
{
"epoch": 0.5643382352941176,
"grad_norm": 0.0,
"learning_rate": 4.180863846029825e-06,
"loss": 1.8477,
"step": 307
},
{
"epoch": 0.5661764705882353,
"grad_norm": 0.0,
"learning_rate": 4.175387264682146e-06,
"loss": 1.9161,
"step": 308
},
{
"epoch": 0.5680147058823529,
"grad_norm": 0.0,
"learning_rate": 4.169896049837899e-06,
"loss": 1.6926,
"step": 309
},
{
"epoch": 0.5698529411764706,
"grad_norm": 0.0,
"learning_rate": 4.164390249459526e-06,
"loss": 1.8339,
"step": 310
},
{
"epoch": 0.5716911764705882,
"grad_norm": 0.0,
"learning_rate": 4.158869911636876e-06,
"loss": 1.8295,
"step": 311
},
{
"epoch": 0.5735294117647058,
"grad_norm": 0.0,
"learning_rate": 4.153335084586766e-06,
"loss": 1.8681,
"step": 312
},
{
"epoch": 0.5753676470588235,
"grad_norm": 0.0,
"learning_rate": 4.147785816652569e-06,
"loss": 1.6268,
"step": 313
},
{
"epoch": 0.5772058823529411,
"grad_norm": 0.0,
"learning_rate": 4.142222156303792e-06,
"loss": 1.773,
"step": 314
},
{
"epoch": 0.5790441176470589,
"grad_norm": 0.0,
"learning_rate": 4.13664415213565e-06,
"loss": 2.0425,
"step": 315
},
{
"epoch": 0.5808823529411765,
"grad_norm": 0.0,
"learning_rate": 4.131051852868643e-06,
"loss": 1.8064,
"step": 316
},
{
"epoch": 0.5827205882352942,
"grad_norm": 0.0,
"learning_rate": 4.125445307348129e-06,
"loss": 1.5052,
"step": 317
},
{
"epoch": 0.5845588235294118,
"grad_norm": 0.0,
"learning_rate": 4.119824564543901e-06,
"loss": 1.7783,
"step": 318
},
{
"epoch": 0.5863970588235294,
"grad_norm": 0.0,
"learning_rate": 4.114189673549752e-06,
"loss": 1.6945,
"step": 319
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.0,
"learning_rate": 4.108540683583057e-06,
"loss": 1.7935,
"step": 320
},
{
"epoch": 0.5900735294117647,
"grad_norm": 0.0,
"learning_rate": 4.102877643984332e-06,
"loss": 2.0515,
"step": 321
},
{
"epoch": 0.5919117647058824,
"grad_norm": 0.0,
"learning_rate": 4.097200604216811e-06,
"loss": 1.7803,
"step": 322
},
{
"epoch": 0.59375,
"grad_norm": 0.0,
"learning_rate": 4.09150961386601e-06,
"loss": 1.7374,
"step": 323
},
{
"epoch": 0.5955882352941176,
"grad_norm": 0.0,
"learning_rate": 4.085804722639293e-06,
"loss": 1.6242,
"step": 324
},
{
"epoch": 0.5974264705882353,
"grad_norm": 0.0,
"learning_rate": 4.0800859803654436e-06,
"loss": 1.8858,
"step": 325
},
{
"epoch": 0.5992647058823529,
"grad_norm": 0.0,
"learning_rate": 4.074353436994223e-06,
"loss": 1.8843,
"step": 326
},
{
"epoch": 0.6011029411764706,
"grad_norm": 0.0,
"learning_rate": 4.068607142595939e-06,
"loss": 1.4963,
"step": 327
},
{
"epoch": 0.6029411764705882,
"grad_norm": 0.0,
"learning_rate": 4.062847147361003e-06,
"loss": 1.6638,
"step": 328
},
{
"epoch": 0.6047794117647058,
"grad_norm": 0.0,
"learning_rate": 4.0570735015994986e-06,
"loss": 1.9207,
"step": 329
},
{
"epoch": 0.6066176470588235,
"grad_norm": 0.0,
"learning_rate": 4.0512862557407365e-06,
"loss": 1.5746,
"step": 330
},
{
"epoch": 0.6084558823529411,
"grad_norm": 0.0,
"learning_rate": 4.045485460332815e-06,
"loss": 1.8553,
"step": 331
},
{
"epoch": 0.6102941176470589,
"grad_norm": 0.0,
"learning_rate": 4.0396711660421825e-06,
"loss": 1.8915,
"step": 332
},
{
"epoch": 0.6121323529411765,
"grad_norm": 0.0,
"learning_rate": 4.03384342365319e-06,
"loss": 1.8034,
"step": 333
},
{
"epoch": 0.6139705882352942,
"grad_norm": 0.0,
"learning_rate": 4.02800228406765e-06,
"loss": 1.9337,
"step": 334
},
{
"epoch": 0.6158088235294118,
"grad_norm": 0.0,
"learning_rate": 4.02214779830439e-06,
"loss": 1.9172,
"step": 335
},
{
"epoch": 0.6176470588235294,
"grad_norm": 0.0,
"learning_rate": 4.016280017498812e-06,
"loss": 1.5344,
"step": 336
},
{
"epoch": 0.6194852941176471,
"grad_norm": 0.0,
"learning_rate": 4.010398992902437e-06,
"loss": 1.6145,
"step": 337
},
{
"epoch": 0.6213235294117647,
"grad_norm": 0.0,
"learning_rate": 4.004504775882467e-06,
"loss": 1.6857,
"step": 338
},
{
"epoch": 0.6231617647058824,
"grad_norm": 0.0,
"learning_rate": 3.998597417921331e-06,
"loss": 1.6453,
"step": 339
},
{
"epoch": 0.625,
"grad_norm": 0.0,
"learning_rate": 3.992676970616233e-06,
"loss": 1.9115,
"step": 340
},
{
"epoch": 0.6268382352941176,
"grad_norm": 0.0,
"learning_rate": 3.98674348567871e-06,
"loss": 1.7092,
"step": 341
},
{
"epoch": 0.6286764705882353,
"grad_norm": 0.0,
"learning_rate": 3.980797014934169e-06,
"loss": 1.7614,
"step": 342
},
{
"epoch": 0.6305147058823529,
"grad_norm": 0.0,
"learning_rate": 3.974837610321445e-06,
"loss": 1.6805,
"step": 343
},
{
"epoch": 0.6323529411764706,
"grad_norm": 0.0,
"learning_rate": 3.968865323892339e-06,
"loss": 1.9099,
"step": 344
},
{
"epoch": 0.6341911764705882,
"grad_norm": 0.0,
"learning_rate": 3.962880207811168e-06,
"loss": 1.5895,
"step": 345
},
{
"epoch": 0.6360294117647058,
"grad_norm": 0.0,
"learning_rate": 3.95688231435431e-06,
"loss": 1.6966,
"step": 346
},
{
"epoch": 0.6378676470588235,
"grad_norm": 0.0,
"learning_rate": 3.950871695909744e-06,
"loss": 1.6833,
"step": 347
},
{
"epoch": 0.6397058823529411,
"grad_norm": 0.0,
"learning_rate": 3.944848404976593e-06,
"loss": 1.8615,
"step": 348
},
{
"epoch": 0.6415441176470589,
"grad_norm": 0.0,
"learning_rate": 3.93881249416467e-06,
"loss": 1.93,
"step": 349
},
{
"epoch": 0.6433823529411765,
"grad_norm": 0.0,
"learning_rate": 3.932764016194013e-06,
"loss": 1.8436,
"step": 350
},
{
"epoch": 0.6452205882352942,
"grad_norm": 0.0,
"learning_rate": 3.926703023894424e-06,
"loss": 1.4891,
"step": 351
},
{
"epoch": 0.6470588235294118,
"grad_norm": 0.0,
"learning_rate": 3.920629570205014e-06,
"loss": 2.0484,
"step": 352
},
{
"epoch": 0.6488970588235294,
"grad_norm": 0.0,
"learning_rate": 3.914543708173735e-06,
"loss": 1.7981,
"step": 353
},
{
"epoch": 0.6507352941176471,
"grad_norm": 0.0,
"learning_rate": 3.90844549095692e-06,
"loss": 1.8744,
"step": 354
},
{
"epoch": 0.6525735294117647,
"grad_norm": 0.0,
"learning_rate": 3.9023349718188155e-06,
"loss": 1.9961,
"step": 355
},
{
"epoch": 0.6544117647058824,
"grad_norm": 0.0,
"learning_rate": 3.8962122041311155e-06,
"loss": 1.8839,
"step": 356
},
{
"epoch": 0.65625,
"grad_norm": 0.0,
"learning_rate": 3.890077241372503e-06,
"loss": 1.7395,
"step": 357
},
{
"epoch": 0.6580882352941176,
"grad_norm": 0.0,
"learning_rate": 3.883930137128175e-06,
"loss": 1.6163,
"step": 358
},
{
"epoch": 0.6599264705882353,
"grad_norm": 0.0,
"learning_rate": 3.877770945089377e-06,
"loss": 1.7127,
"step": 359
},
{
"epoch": 0.6617647058823529,
"grad_norm": 0.0,
"learning_rate": 3.871599719052931e-06,
"loss": 1.6822,
"step": 360
},
{
"epoch": 0.6636029411764706,
"grad_norm": 0.0,
"learning_rate": 3.865416512920776e-06,
"loss": 2.0061,
"step": 361
},
{
"epoch": 0.6654411764705882,
"grad_norm": 0.0,
"learning_rate": 3.859221380699482e-06,
"loss": 1.4916,
"step": 362
},
{
"epoch": 0.6672794117647058,
"grad_norm": 0.0,
"learning_rate": 3.853014376499792e-06,
"loss": 1.5192,
"step": 363
},
{
"epoch": 0.6691176470588235,
"grad_norm": 0.0,
"learning_rate": 3.846795554536141e-06,
"loss": 1.8608,
"step": 364
},
{
"epoch": 0.6709558823529411,
"grad_norm": 0.0,
"learning_rate": 3.840564969126186e-06,
"loss": 1.7084,
"step": 365
},
{
"epoch": 0.6727941176470589,
"grad_norm": 0.0,
"learning_rate": 3.834322674690329e-06,
"loss": 1.6686,
"step": 366
},
{
"epoch": 0.6746323529411765,
"grad_norm": 0.0,
"learning_rate": 3.828068725751245e-06,
"loss": 1.7066,
"step": 367
},
{
"epoch": 0.6764705882352942,
"grad_norm": 0.0,
"learning_rate": 3.8218031769334024e-06,
"loss": 1.7413,
"step": 368
},
{
"epoch": 0.6783088235294118,
"grad_norm": 0.0,
"learning_rate": 3.81552608296259e-06,
"loss": 1.9639,
"step": 369
},
{
"epoch": 0.6801470588235294,
"grad_norm": 0.0,
"learning_rate": 3.809237498665434e-06,
"loss": 1.6569,
"step": 370
},
{
"epoch": 0.6819852941176471,
"grad_norm": 0.0,
"learning_rate": 3.8029374789689234e-06,
"loss": 1.7029,
"step": 371
},
{
"epoch": 0.6838235294117647,
"grad_norm": 0.0,
"learning_rate": 3.7966260788999278e-06,
"loss": 1.4536,
"step": 372
},
{
"epoch": 0.6856617647058824,
"grad_norm": 0.0,
"learning_rate": 3.7903033535847167e-06,
"loss": 1.7632,
"step": 373
},
{
"epoch": 0.6875,
"grad_norm": 0.0,
"learning_rate": 3.7839693582484806e-06,
"loss": 1.698,
"step": 374
},
{
"epoch": 0.6893382352941176,
"grad_norm": 0.0,
"learning_rate": 3.7776241482148452e-06,
"loss": 1.6655,
"step": 375
},
{
"epoch": 0.6911764705882353,
"grad_norm": 0.0,
"learning_rate": 3.771267778905391e-06,
"loss": 1.8655,
"step": 376
},
{
"epoch": 0.6930147058823529,
"grad_norm": 0.0,
"learning_rate": 3.7649003058391664e-06,
"loss": 1.7093,
"step": 377
},
{
"epoch": 0.6948529411764706,
"grad_norm": 0.0,
"learning_rate": 3.7585217846322075e-06,
"loss": 1.6746,
"step": 378
},
{
"epoch": 0.6966911764705882,
"grad_norm": 0.0,
"learning_rate": 3.7521322709970454e-06,
"loss": 1.9697,
"step": 379
},
{
"epoch": 0.6985294117647058,
"grad_norm": 0.0,
"learning_rate": 3.745731820742227e-06,
"loss": 2.0496,
"step": 380
},
{
"epoch": 0.7003676470588235,
"grad_norm": 0.0,
"learning_rate": 3.7393204897718194e-06,
"loss": 1.8899,
"step": 381
},
{
"epoch": 0.7022058823529411,
"grad_norm": 0.0,
"learning_rate": 3.7328983340849324e-06,
"loss": 1.9481,
"step": 382
},
{
"epoch": 0.7040441176470589,
"grad_norm": 0.0,
"learning_rate": 3.7264654097752173e-06,
"loss": 1.8767,
"step": 383
},
{
"epoch": 0.7058823529411765,
"grad_norm": 0.0,
"learning_rate": 3.7200217730303865e-06,
"loss": 1.7622,
"step": 384
},
{
"epoch": 0.7077205882352942,
"grad_norm": 0.0,
"learning_rate": 3.713567480131718e-06,
"loss": 1.8596,
"step": 385
},
{
"epoch": 0.7095588235294118,
"grad_norm": 0.0,
"learning_rate": 3.7071025874535643e-06,
"loss": 1.7558,
"step": 386
},
{
"epoch": 0.7113970588235294,
"grad_norm": 0.0,
"learning_rate": 3.7006271514628617e-06,
"loss": 2.0891,
"step": 387
},
{
"epoch": 0.7132352941176471,
"grad_norm": 0.0,
"learning_rate": 3.694141228718634e-06,
"loss": 1.5486,
"step": 388
},
{
"epoch": 0.7150735294117647,
"grad_norm": 0.0,
"learning_rate": 3.6876448758715028e-06,
"loss": 1.6308,
"step": 389
},
{
"epoch": 0.7169117647058824,
"grad_norm": 0.0,
"learning_rate": 3.681138149663189e-06,
"loss": 1.9086,
"step": 390
},
{
"epoch": 0.71875,
"grad_norm": 0.0,
"learning_rate": 3.6746211069260197e-06,
"loss": 1.9397,
"step": 391
},
{
"epoch": 0.7205882352941176,
"grad_norm": 0.0,
"learning_rate": 3.6680938045824284e-06,
"loss": 1.6673,
"step": 392
},
{
"epoch": 0.7224264705882353,
"grad_norm": 0.0,
"learning_rate": 3.661556299644462e-06,
"loss": 1.7074,
"step": 393
},
{
"epoch": 0.7242647058823529,
"grad_norm": 0.0,
"learning_rate": 3.6550086492132804e-06,
"loss": 1.298,
"step": 394
},
{
"epoch": 0.7261029411764706,
"grad_norm": 0.0,
"learning_rate": 3.6484509104786582e-06,
"loss": 1.711,
"step": 395
},
{
"epoch": 0.7279411764705882,
"grad_norm": 0.0,
"learning_rate": 3.6418831407184856e-06,
"loss": 1.5623,
"step": 396
},
{
"epoch": 0.7297794117647058,
"grad_norm": 0.0,
"learning_rate": 3.6353053972982676e-06,
"loss": 1.7687,
"step": 397
},
{
"epoch": 0.7316176470588235,
"grad_norm": 0.0,
"learning_rate": 3.628717737670623e-06,
"loss": 1.6687,
"step": 398
},
{
"epoch": 0.7334558823529411,
"grad_norm": 0.0,
"learning_rate": 3.6221202193747818e-06,
"loss": 1.6686,
"step": 399
},
{
"epoch": 0.7352941176470589,
"grad_norm": 0.0,
"learning_rate": 3.6155129000360846e-06,
"loss": 1.4463,
"step": 400
},
{
"epoch": 0.7371323529411765,
"grad_norm": 0.0,
"learning_rate": 3.6088958373654794e-06,
"loss": 1.8492,
"step": 401
},
{
"epoch": 0.7389705882352942,
"grad_norm": 0.0,
"learning_rate": 3.602269089159013e-06,
"loss": 1.7782,
"step": 402
},
{
"epoch": 0.7408088235294118,
"grad_norm": 0.0,
"learning_rate": 3.5956327132973313e-06,
"loss": 1.7213,
"step": 403
},
{
"epoch": 0.7426470588235294,
"grad_norm": 0.0,
"learning_rate": 3.588986767745174e-06,
"loss": 1.6712,
"step": 404
},
{
"epoch": 0.7444852941176471,
"grad_norm": 0.0,
"learning_rate": 3.5823313105508626e-06,
"loss": 1.679,
"step": 405
},
{
"epoch": 0.7463235294117647,
"grad_norm": 0.0,
"learning_rate": 3.575666399845799e-06,
"loss": 1.8439,
"step": 406
},
{
"epoch": 0.7481617647058824,
"grad_norm": 0.0,
"learning_rate": 3.568992093843956e-06,
"loss": 2.0623,
"step": 407
},
{
"epoch": 0.75,
"grad_norm": 0.0,
"learning_rate": 3.5623084508413685e-06,
"loss": 1.6133,
"step": 408
},
{
"epoch": 0.7518382352941176,
"grad_norm": 0.0,
"learning_rate": 3.555615529215623e-06,
"loss": 1.8713,
"step": 409
},
{
"epoch": 0.7536764705882353,
"grad_norm": 0.0,
"learning_rate": 3.5489133874253516e-06,
"loss": 1.8986,
"step": 410
},
{
"epoch": 0.7555147058823529,
"grad_norm": 0.0,
"learning_rate": 3.5422020840097173e-06,
"loss": 1.6793,
"step": 411
},
{
"epoch": 0.7573529411764706,
"grad_norm": 0.0,
"learning_rate": 3.535481677587904e-06,
"loss": 1.6225,
"step": 412
},
{
"epoch": 0.7591911764705882,
"grad_norm": 0.0,
"learning_rate": 3.5287522268586074e-06,
"loss": 1.7254,
"step": 413
},
{
"epoch": 0.7610294117647058,
"grad_norm": 0.0,
"learning_rate": 3.5220137905995165e-06,
"loss": 1.7691,
"step": 414
},
{
"epoch": 0.7628676470588235,
"grad_norm": 0.0,
"learning_rate": 3.515266427666806e-06,
"loss": 1.7055,
"step": 415
},
{
"epoch": 0.7647058823529411,
"grad_norm": 0.0,
"learning_rate": 3.508510196994618e-06,
"loss": 1.7593,
"step": 416
},
{
"epoch": 0.7665441176470589,
"grad_norm": 0.0,
"learning_rate": 3.50174515759455e-06,
"loss": 1.7012,
"step": 417
},
{
"epoch": 0.7683823529411765,
"grad_norm": 0.0,
"learning_rate": 3.4949713685551377e-06,
"loss": 1.887,
"step": 418
},
{
"epoch": 0.7702205882352942,
"grad_norm": 0.0,
"learning_rate": 3.488188889041341e-06,
"loss": 1.7187,
"step": 419
},
{
"epoch": 0.7720588235294118,
"grad_norm": 0.0,
"learning_rate": 3.4813977782940234e-06,
"loss": 1.8475,
"step": 420
},
{
"epoch": 0.7738970588235294,
"grad_norm": 0.0,
"learning_rate": 3.4745980956294396e-06,
"loss": 1.6388,
"step": 421
},
{
"epoch": 0.7757352941176471,
"grad_norm": 0.0,
"learning_rate": 3.4677899004387134e-06,
"loss": 1.9088,
"step": 422
},
{
"epoch": 0.7775735294117647,
"grad_norm": 0.0,
"learning_rate": 3.460973252187321e-06,
"loss": 1.6816,
"step": 423
},
{
"epoch": 0.7794117647058824,
"grad_norm": 0.0,
"learning_rate": 3.4541482104145695e-06,
"loss": 1.6671,
"step": 424
},
{
"epoch": 0.78125,
"grad_norm": 0.0,
"learning_rate": 3.447314834733081e-06,
"loss": 1.6012,
"step": 425
},
{
"epoch": 0.7830882352941176,
"grad_norm": 0.0,
"learning_rate": 3.440473184828266e-06,
"loss": 1.6968,
"step": 426
},
{
"epoch": 0.7849264705882353,
"grad_norm": 0.0,
"learning_rate": 3.433623320457809e-06,
"loss": 1.5466,
"step": 427
},
{
"epoch": 0.7867647058823529,
"grad_norm": 0.0,
"learning_rate": 3.4267653014511405e-06,
"loss": 1.8788,
"step": 428
},
{
"epoch": 0.7886029411764706,
"grad_norm": 0.0,
"learning_rate": 3.419899187708917e-06,
"loss": 1.8398,
"step": 429
},
{
"epoch": 0.7904411764705882,
"grad_norm": 0.0,
"learning_rate": 3.4130250392024973e-06,
"loss": 1.7668,
"step": 430
},
{
"epoch": 0.7922794117647058,
"grad_norm": 0.0,
"learning_rate": 3.4061429159734207e-06,
"loss": 1.7916,
"step": 431
},
{
"epoch": 0.7941176470588235,
"grad_norm": 0.0,
"learning_rate": 3.3992528781328793e-06,
"loss": 1.628,
"step": 432
},
{
"epoch": 0.7959558823529411,
"grad_norm": 0.0,
"learning_rate": 3.3923549858611958e-06,
"loss": 1.6921,
"step": 433
},
{
"epoch": 0.7977941176470589,
"grad_norm": 0.0,
"learning_rate": 3.385449299407296e-06,
"loss": 1.7011,
"step": 434
},
{
"epoch": 0.7996323529411765,
"grad_norm": 0.0,
"learning_rate": 3.378535879088182e-06,
"loss": 1.6465,
"step": 435
},
{
"epoch": 0.8014705882352942,
"grad_norm": 0.0,
"learning_rate": 3.3716147852884073e-06,
"loss": 1.7443,
"step": 436
},
{
"epoch": 0.8033088235294118,
"grad_norm": 0.0,
"learning_rate": 3.3646860784595512e-06,
"loss": 1.7987,
"step": 437
},
{
"epoch": 0.8051470588235294,
"grad_norm": 0.0,
"learning_rate": 3.357749819119685e-06,
"loss": 1.9102,
"step": 438
},
{
"epoch": 0.8069852941176471,
"grad_norm": 0.0,
"learning_rate": 3.3508060678528464e-06,
"loss": 1.6303,
"step": 439
},
{
"epoch": 0.8088235294117647,
"grad_norm": 0.0,
"learning_rate": 3.3438548853085135e-06,
"loss": 1.5065,
"step": 440
},
{
"epoch": 0.8106617647058824,
"grad_norm": 0.0,
"learning_rate": 3.3368963322010695e-06,
"loss": 1.7563,
"step": 441
},
{
"epoch": 0.8125,
"grad_norm": 0.0,
"learning_rate": 3.329930469309276e-06,
"loss": 1.6226,
"step": 442
},
{
"epoch": 0.8143382352941176,
"grad_norm": 0.0,
"learning_rate": 3.322957357475741e-06,
"loss": 1.8419,
"step": 443
},
{
"epoch": 0.8161764705882353,
"grad_norm": 0.0,
"learning_rate": 3.315977057606388e-06,
"loss": 1.7456,
"step": 444
},
{
"epoch": 0.8180147058823529,
"grad_norm": 0.0,
"learning_rate": 3.3089896306699233e-06,
"loss": 1.9603,
"step": 445
},
{
"epoch": 0.8198529411764706,
"grad_norm": 0.0,
"learning_rate": 3.301995137697304e-06,
"loss": 1.6238,
"step": 446
},
{
"epoch": 0.8216911764705882,
"grad_norm": 0.0,
"learning_rate": 3.2949936397812055e-06,
"loss": 1.6546,
"step": 447
},
{
"epoch": 0.8235294117647058,
"grad_norm": 0.0,
"learning_rate": 3.287985198075484e-06,
"loss": 1.5644,
"step": 448
},
{
"epoch": 0.8253676470588235,
"grad_norm": 0.0,
"learning_rate": 3.2809698737946494e-06,
"loss": 1.9652,
"step": 449
},
{
"epoch": 0.8272058823529411,
"grad_norm": 0.0,
"learning_rate": 3.2739477282133253e-06,
"loss": 1.6981,
"step": 450
},
{
"epoch": 0.8290441176470589,
"grad_norm": 0.0,
"learning_rate": 3.266918822665715e-06,
"loss": 1.8254,
"step": 451
},
{
"epoch": 0.8308823529411765,
"grad_norm": 0.0,
"learning_rate": 3.259883218545065e-06,
"loss": 1.5648,
"step": 452
},
{
"epoch": 0.8327205882352942,
"grad_norm": 0.0,
"learning_rate": 3.2528409773031322e-06,
"loss": 1.649,
"step": 453
},
{
"epoch": 0.8345588235294118,
"grad_norm": 0.0,
"learning_rate": 3.2457921604496435e-06,
"loss": 1.9707,
"step": 454
},
{
"epoch": 0.8363970588235294,
"grad_norm": 0.0,
"learning_rate": 3.2387368295517586e-06,
"loss": 1.4134,
"step": 455
},
{
"epoch": 0.8382352941176471,
"grad_norm": 0.0,
"learning_rate": 3.231675046233536e-06,
"loss": 1.834,
"step": 456
},
{
"epoch": 0.8400735294117647,
"grad_norm": 0.0,
"learning_rate": 3.22460687217539e-06,
"loss": 1.7787,
"step": 457
},
{
"epoch": 0.8419117647058824,
"grad_norm": 0.0,
"learning_rate": 3.217532369113555e-06,
"loss": 1.7776,
"step": 458
},
{
"epoch": 0.84375,
"grad_norm": 0.0,
"learning_rate": 3.2104515988395456e-06,
"loss": 1.8862,
"step": 459
},
{
"epoch": 0.8455882352941176,
"grad_norm": 0.0,
"learning_rate": 3.2033646231996167e-06,
"loss": 1.5536,
"step": 460
},
{
"epoch": 0.8474264705882353,
"grad_norm": 0.0,
"learning_rate": 3.196271504094223e-06,
"loss": 1.6952,
"step": 461
},
{
"epoch": 0.8492647058823529,
"grad_norm": 0.0,
"learning_rate": 3.189172303477478e-06,
"loss": 1.8626,
"step": 462
},
{
"epoch": 0.8511029411764706,
"grad_norm": 0.0,
"learning_rate": 3.182067083356616e-06,
"loss": 1.7898,
"step": 463
},
{
"epoch": 0.8529411764705882,
"grad_norm": 0.0,
"learning_rate": 3.174955905791444e-06,
"loss": 1.7797,
"step": 464
},
{
"epoch": 0.8547794117647058,
"grad_norm": 0.0,
"learning_rate": 3.1678388328938093e-06,
"loss": 1.6529,
"step": 465
},
{
"epoch": 0.8566176470588235,
"grad_norm": 0.0,
"learning_rate": 3.1607159268270447e-06,
"loss": 1.6602,
"step": 466
},
{
"epoch": 0.8584558823529411,
"grad_norm": 0.0,
"learning_rate": 3.153587249805438e-06,
"loss": 1.6258,
"step": 467
},
{
"epoch": 0.8602941176470589,
"grad_norm": 0.0,
"learning_rate": 3.1464528640936797e-06,
"loss": 1.7756,
"step": 468
},
{
"epoch": 0.8621323529411765,
"grad_norm": 0.0,
"learning_rate": 3.139312832006323e-06,
"loss": 1.771,
"step": 469
},
{
"epoch": 0.8639705882352942,
"grad_norm": 0.0,
"learning_rate": 3.132167215907238e-06,
"loss": 1.9377,
"step": 470
},
{
"epoch": 0.8658088235294118,
"grad_norm": 0.0,
"learning_rate": 3.12501607820907e-06,
"loss": 1.7851,
"step": 471
},
{
"epoch": 0.8676470588235294,
"grad_norm": 0.0,
"learning_rate": 3.11785948137269e-06,
"loss": 1.9384,
"step": 472
},
{
"epoch": 0.8694852941176471,
"grad_norm": 0.0,
"learning_rate": 3.1106974879066514e-06,
"loss": 1.4842,
"step": 473
},
{
"epoch": 0.8713235294117647,
"grad_norm": 0.0,
"learning_rate": 3.1035301603666456e-06,
"loss": 1.7289,
"step": 474
},
{
"epoch": 0.8731617647058824,
"grad_norm": 0.0,
"learning_rate": 3.0963575613549523e-06,
"loss": 1.8963,
"step": 475
},
{
"epoch": 0.875,
"grad_norm": 0.0,
"learning_rate": 3.089179753519894e-06,
"loss": 1.8238,
"step": 476
},
{
"epoch": 0.8768382352941176,
"grad_norm": 0.0,
"learning_rate": 3.0819967995552913e-06,
"loss": 2.1243,
"step": 477
},
{
"epoch": 0.8786764705882353,
"grad_norm": 0.0,
"learning_rate": 3.074808762199911e-06,
"loss": 1.7607,
"step": 478
},
{
"epoch": 0.8805147058823529,
"grad_norm": 0.0,
"learning_rate": 3.0676157042369213e-06,
"loss": 1.7313,
"step": 479
},
{
"epoch": 0.8823529411764706,
"grad_norm": 0.0,
"learning_rate": 3.0604176884933422e-06,
"loss": 1.6726,
"step": 480
},
{
"epoch": 0.8841911764705882,
"grad_norm": 0.0,
"learning_rate": 3.053214777839496e-06,
"loss": 1.7602,
"step": 481
},
{
"epoch": 0.8860294117647058,
"grad_norm": 0.0,
"learning_rate": 3.0460070351884614e-06,
"loss": 1.7777,
"step": 482
},
{
"epoch": 0.8878676470588235,
"grad_norm": 0.0,
"learning_rate": 3.0387945234955187e-06,
"loss": 1.759,
"step": 483
},
{
"epoch": 0.8897058823529411,
"grad_norm": 0.0,
"learning_rate": 3.031577305757605e-06,
"loss": 2.0917,
"step": 484
},
{
"epoch": 0.8915441176470589,
"grad_norm": 0.0,
"learning_rate": 3.024355445012761e-06,
"loss": 1.7402,
"step": 485
},
{
"epoch": 0.8933823529411765,
"grad_norm": 0.0,
"learning_rate": 3.0171290043395823e-06,
"loss": 1.9261,
"step": 486
},
{
"epoch": 0.8952205882352942,
"grad_norm": 0.0,
"learning_rate": 3.0098980468566663e-06,
"loss": 1.6524,
"step": 487
},
{
"epoch": 0.8970588235294118,
"grad_norm": 0.0,
"learning_rate": 3.0026626357220623e-06,
"loss": 1.8296,
"step": 488
},
{
"epoch": 0.8988970588235294,
"grad_norm": 0.0,
"learning_rate": 2.9954228341327192e-06,
"loss": 1.8665,
"step": 489
},
{
"epoch": 0.9007352941176471,
"grad_norm": 0.0,
"learning_rate": 2.988178705323934e-06,
"loss": 1.8146,
"step": 490
},
{
"epoch": 0.9025735294117647,
"grad_norm": 0.0,
"learning_rate": 2.9809303125688004e-06,
"loss": 1.7391,
"step": 491
},
{
"epoch": 0.9044117647058824,
"grad_norm": 0.0,
"learning_rate": 2.9736777191776543e-06,
"loss": 1.6417,
"step": 492
},
{
"epoch": 0.90625,
"grad_norm": 0.0,
"learning_rate": 2.966420988497522e-06,
"loss": 1.8464,
"step": 493
},
{
"epoch": 0.9080882352941176,
"grad_norm": 0.0,
"learning_rate": 2.959160183911565e-06,
"loss": 1.8636,
"step": 494
},
{
"epoch": 0.9099264705882353,
"grad_norm": 0.0,
"learning_rate": 2.9518953688385298e-06,
"loss": 1.6568,
"step": 495
},
{
"epoch": 0.9117647058823529,
"grad_norm": 0.0,
"learning_rate": 2.9446266067321904e-06,
"loss": 1.9179,
"step": 496
},
{
"epoch": 0.9136029411764706,
"grad_norm": 0.0,
"learning_rate": 2.9373539610807983e-06,
"loss": 1.9894,
"step": 497
},
{
"epoch": 0.9154411764705882,
"grad_norm": 0.0,
"learning_rate": 2.930077495406523e-06,
"loss": 1.8537,
"step": 498
},
{
"epoch": 0.9172794117647058,
"grad_norm": 0.0,
"learning_rate": 2.9227972732649e-06,
"loss": 1.8176,
"step": 499
},
{
"epoch": 0.9191176470588235,
"grad_norm": 0.0,
"learning_rate": 2.915513358244276e-06,
"loss": 1.7762,
"step": 500
},
{
"epoch": 0.9209558823529411,
"grad_norm": 0.0,
"learning_rate": 2.9082258139652536e-06,
"loss": 1.7569,
"step": 501
},
{
"epoch": 0.9227941176470589,
"grad_norm": 0.0,
"learning_rate": 2.900934704080133e-06,
"loss": 1.7657,
"step": 502
},
{
"epoch": 0.9246323529411765,
"grad_norm": 0.0,
"learning_rate": 2.893640092272357e-06,
"loss": 1.7845,
"step": 503
},
{
"epoch": 0.9264705882352942,
"grad_norm": 0.0,
"learning_rate": 2.8863420422559577e-06,
"loss": 1.4962,
"step": 504
},
{
"epoch": 0.9283088235294118,
"grad_norm": 0.0,
"learning_rate": 2.8790406177749985e-06,
"loss": 1.6051,
"step": 505
},
{
"epoch": 0.9301470588235294,
"grad_norm": 0.0,
"learning_rate": 2.8717358826030158e-06,
"loss": 1.8549,
"step": 506
},
{
"epoch": 0.9319852941176471,
"grad_norm": 0.0,
"learning_rate": 2.86442790054246e-06,
"loss": 1.7021,
"step": 507
},
{
"epoch": 0.9338235294117647,
"grad_norm": 0.0,
"learning_rate": 2.8571167354241445e-06,
"loss": 1.6309,
"step": 508
},
{
"epoch": 0.9356617647058824,
"grad_norm": 0.0,
"learning_rate": 2.849802451106685e-06,
"loss": 1.77,
"step": 509
},
{
"epoch": 0.9375,
"grad_norm": 0.0,
"learning_rate": 2.84248511147594e-06,
"loss": 1.7613,
"step": 510
},
{
"epoch": 0.9393382352941176,
"grad_norm": 0.0,
"learning_rate": 2.835164780444455e-06,
"loss": 1.9886,
"step": 511
},
{
"epoch": 0.9411764705882353,
"grad_norm": 0.0,
"learning_rate": 2.8278415219509025e-06,
"loss": 1.6941,
"step": 512
},
{
"epoch": 0.9430147058823529,
"grad_norm": 0.0,
"learning_rate": 2.8205153999595253e-06,
"loss": 1.641,
"step": 513
},
{
"epoch": 0.9448529411764706,
"grad_norm": 0.0,
"learning_rate": 2.8131864784595788e-06,
"loss": 1.8998,
"step": 514
},
{
"epoch": 0.9466911764705882,
"grad_norm": 0.0,
"learning_rate": 2.8058548214647674e-06,
"loss": 1.7034,
"step": 515
},
{
"epoch": 0.9485294117647058,
"grad_norm": 0.0,
"learning_rate": 2.798520493012691e-06,
"loss": 1.7346,
"step": 516
},
{
"epoch": 0.9503676470588235,
"grad_norm": 0.0,
"learning_rate": 2.7911835571642816e-06,
"loss": 2.0461,
"step": 517
},
{
"epoch": 0.9522058823529411,
"grad_norm": 0.0,
"learning_rate": 2.783844078003245e-06,
"loss": 1.7676,
"step": 518
},
{
"epoch": 0.9540441176470589,
"grad_norm": 0.0,
"learning_rate": 2.7765021196355023e-06,
"loss": 1.592,
"step": 519
},
{
"epoch": 0.9558823529411765,
"grad_norm": 0.0,
"learning_rate": 2.76915774618863e-06,
"loss": 1.5685,
"step": 520
},
{
"epoch": 0.9577205882352942,
"grad_norm": 0.0,
"learning_rate": 2.761811021811295e-06,
"loss": 1.7379,
"step": 521
},
{
"epoch": 0.9595588235294118,
"grad_norm": 0.0,
"learning_rate": 2.754462010672701e-06,
"loss": 1.9914,
"step": 522
},
{
"epoch": 0.9613970588235294,
"grad_norm": 0.0,
"learning_rate": 2.7471107769620258e-06,
"loss": 1.8213,
"step": 523
},
{
"epoch": 0.9632352941176471,
"grad_norm": 0.0,
"learning_rate": 2.739757384887859e-06,
"loss": 1.6564,
"step": 524
},
{
"epoch": 0.9650735294117647,
"grad_norm": 0.0,
"learning_rate": 2.732401898677642e-06,
"loss": 1.8431,
"step": 525
},
{
"epoch": 0.9669117647058824,
"grad_norm": 0.0,
"learning_rate": 2.725044382577107e-06,
"loss": 1.8911,
"step": 526
},
{
"epoch": 0.96875,
"grad_norm": 0.0,
"learning_rate": 2.7176849008497165e-06,
"loss": 1.6735,
"step": 527
},
{
"epoch": 0.9705882352941176,
"grad_norm": 0.0,
"learning_rate": 2.7103235177761018e-06,
"loss": 1.6467,
"step": 528
},
{
"epoch": 0.9724264705882353,
"grad_norm": 0.0,
"learning_rate": 2.702960297653501e-06,
"loss": 1.7016,
"step": 529
},
{
"epoch": 0.9742647058823529,
"grad_norm": 0.0,
"learning_rate": 2.695595304795197e-06,
"loss": 1.8497,
"step": 530
},
{
"epoch": 0.9761029411764706,
"grad_norm": 0.0,
"learning_rate": 2.688228603529959e-06,
"loss": 1.9022,
"step": 531
},
{
"epoch": 0.9779411764705882,
"grad_norm": 0.0,
"learning_rate": 2.680860258201475e-06,
"loss": 1.6943,
"step": 532
},
{
"epoch": 0.9797794117647058,
"grad_norm": 0.0,
"learning_rate": 2.6734903331677946e-06,
"loss": 1.886,
"step": 533
},
{
"epoch": 0.9816176470588235,
"grad_norm": 0.0,
"learning_rate": 2.666118892800765e-06,
"loss": 1.715,
"step": 534
},
{
"epoch": 0.9834558823529411,
"grad_norm": 0.0,
"learning_rate": 2.658746001485469e-06,
"loss": 1.7098,
"step": 535
},
{
"epoch": 0.9852941176470589,
"grad_norm": 0.0,
"learning_rate": 2.651371723619661e-06,
"loss": 1.6282,
"step": 536
},
{
"epoch": 0.9871323529411765,
"grad_norm": 0.0,
"learning_rate": 2.6439961236132083e-06,
"loss": 1.8106,
"step": 537
},
{
"epoch": 0.9889705882352942,
"grad_norm": 0.0,
"learning_rate": 2.6366192658875256e-06,
"loss": 1.95,
"step": 538
},
{
"epoch": 0.9908088235294118,
"grad_norm": 0.0,
"learning_rate": 2.629241214875013e-06,
"loss": 1.5364,
"step": 539
},
{
"epoch": 0.9926470588235294,
"grad_norm": 0.0,
"learning_rate": 2.621862035018492e-06,
"loss": 1.8866,
"step": 540
},
{
"epoch": 0.9944852941176471,
"grad_norm": 0.0,
"learning_rate": 2.6144817907706453e-06,
"loss": 1.6631,
"step": 541
},
{
"epoch": 0.9963235294117647,
"grad_norm": 0.0,
"learning_rate": 2.607100546593453e-06,
"loss": 1.7325,
"step": 542
},
{
"epoch": 0.9981617647058824,
"grad_norm": 0.0,
"learning_rate": 2.5997183669576264e-06,
"loss": 1.6731,
"step": 543
},
{
"epoch": 1.0,
"grad_norm": 0.0,
"learning_rate": 2.59233531634205e-06,
"loss": 1.481,
"step": 544
},
{
"epoch": 1.0018382352941178,
"grad_norm": 0.0,
"learning_rate": 2.584951459233215e-06,
"loss": 1.864,
"step": 545
},
{
"epoch": 1.0036764705882353,
"grad_norm": 0.0,
"learning_rate": 2.5775668601246555e-06,
"loss": 1.8555,
"step": 546
},
{
"epoch": 1.005514705882353,
"grad_norm": 0.0,
"learning_rate": 2.5701815835163896e-06,
"loss": 1.8919,
"step": 547
},
{
"epoch": 1.0073529411764706,
"grad_norm": 0.0,
"learning_rate": 2.5627956939143507e-06,
"loss": 1.9853,
"step": 548
},
{
"epoch": 1.0091911764705883,
"grad_norm": 0.0,
"learning_rate": 2.555409255829825e-06,
"loss": 2.0538,
"step": 549
},
{
"epoch": 1.0110294117647058,
"grad_norm": 0.0,
"learning_rate": 2.548022333778892e-06,
"loss": 1.632,
"step": 550
},
{
"epoch": 1.0128676470588236,
"grad_norm": 0.0,
"learning_rate": 2.540634992281858e-06,
"loss": 1.5268,
"step": 551
},
{
"epoch": 1.0147058823529411,
"grad_norm": 0.0,
"learning_rate": 2.5332472958626923e-06,
"loss": 1.6835,
"step": 552
},
{
"epoch": 1.0165441176470589,
"grad_norm": 0.0,
"learning_rate": 2.525859309048463e-06,
"loss": 1.7067,
"step": 553
},
{
"epoch": 1.0183823529411764,
"grad_norm": 0.0,
"learning_rate": 2.518471096368777e-06,
"loss": 1.8755,
"step": 554
},
{
"epoch": 1.0202205882352942,
"grad_norm": 0.0,
"learning_rate": 2.511082722355212e-06,
"loss": 1.8055,
"step": 555
},
{
"epoch": 1.0220588235294117,
"grad_norm": 0.0,
"learning_rate": 2.503694251540757e-06,
"loss": 1.5165,
"step": 556
},
{
"epoch": 1.0238970588235294,
"grad_norm": 0.0,
"learning_rate": 2.496305748459244e-06,
"loss": 1.4758,
"step": 557
},
{
"epoch": 1.025735294117647,
"grad_norm": 0.0,
"learning_rate": 2.4889172776447885e-06,
"loss": 1.811,
"step": 558
},
{
"epoch": 1.0275735294117647,
"grad_norm": 0.0,
"learning_rate": 2.4815289036312236e-06,
"loss": 1.8183,
"step": 559
},
{
"epoch": 1.0294117647058822,
"grad_norm": 0.0,
"learning_rate": 2.474140690951538e-06,
"loss": 1.9046,
"step": 560
},
{
"epoch": 1.03125,
"grad_norm": 0.0,
"learning_rate": 2.4667527041373085e-06,
"loss": 1.5942,
"step": 561
},
{
"epoch": 1.0330882352941178,
"grad_norm": 0.0,
"learning_rate": 2.459365007718143e-06,
"loss": 1.5341,
"step": 562
},
{
"epoch": 1.0349264705882353,
"grad_norm": 0.0,
"learning_rate": 2.4519776662211083e-06,
"loss": 1.8902,
"step": 563
},
{
"epoch": 1.036764705882353,
"grad_norm": 0.0,
"learning_rate": 2.444590744170176e-06,
"loss": 1.9901,
"step": 564
},
{
"epoch": 1.0386029411764706,
"grad_norm": 0.0,
"learning_rate": 2.437204306085651e-06,
"loss": 1.421,
"step": 565
},
{
"epoch": 1.0404411764705883,
"grad_norm": 0.0,
"learning_rate": 2.4298184164836104e-06,
"loss": 1.8848,
"step": 566
},
{
"epoch": 1.0422794117647058,
"grad_norm": 0.0,
"learning_rate": 2.422433139875345e-06,
"loss": 1.5938,
"step": 567
},
{
"epoch": 1.0441176470588236,
"grad_norm": 0.0,
"learning_rate": 2.4150485407667855e-06,
"loss": 1.9612,
"step": 568
},
{
"epoch": 1.0459558823529411,
"grad_norm": 0.0,
"learning_rate": 2.4076646836579508e-06,
"loss": 1.7775,
"step": 569
},
{
"epoch": 1.0477941176470589,
"grad_norm": 0.0,
"learning_rate": 2.4002816330423744e-06,
"loss": 1.6265,
"step": 570
},
{
"epoch": 1.0496323529411764,
"grad_norm": 0.0,
"learning_rate": 2.392899453406548e-06,
"loss": 1.9559,
"step": 571
},
{
"epoch": 1.0514705882352942,
"grad_norm": 0.0,
"learning_rate": 2.385518209229355e-06,
"loss": 1.6096,
"step": 572
},
{
"epoch": 1.0533088235294117,
"grad_norm": 0.0,
"learning_rate": 2.3781379649815094e-06,
"loss": 2.0393,
"step": 573
},
{
"epoch": 1.0551470588235294,
"grad_norm": 0.0,
"learning_rate": 2.3707587851249875e-06,
"loss": 1.7581,
"step": 574
},
{
"epoch": 1.056985294117647,
"grad_norm": 0.0,
"learning_rate": 2.3633807341124753e-06,
"loss": 1.857,
"step": 575
},
{
"epoch": 1.0588235294117647,
"grad_norm": 0.0,
"learning_rate": 2.3560038763867913e-06,
"loss": 1.8291,
"step": 576
},
{
"epoch": 1.0606617647058822,
"grad_norm": 0.0,
"learning_rate": 2.3486282763803397e-06,
"loss": 1.6881,
"step": 577
},
{
"epoch": 1.0625,
"grad_norm": 0.0,
"learning_rate": 2.3412539985145324e-06,
"loss": 1.5025,
"step": 578
},
{
"epoch": 1.0643382352941178,
"grad_norm": 0.0,
"learning_rate": 2.3338811071992353e-06,
"loss": 1.7593,
"step": 579
},
{
"epoch": 1.0661764705882353,
"grad_norm": 0.0,
"learning_rate": 2.3265096668322063e-06,
"loss": 1.8755,
"step": 580
},
{
"epoch": 1.068014705882353,
"grad_norm": 0.0,
"learning_rate": 2.319139741798525e-06,
"loss": 1.7058,
"step": 581
},
{
"epoch": 1.0698529411764706,
"grad_norm": 0.0,
"learning_rate": 2.3117713964700415e-06,
"loss": 2.0831,
"step": 582
},
{
"epoch": 1.0716911764705883,
"grad_norm": 0.0,
"learning_rate": 2.304404695204804e-06,
"loss": 1.8701,
"step": 583
},
{
"epoch": 1.0735294117647058,
"grad_norm": 0.0,
"learning_rate": 2.2970397023465e-06,
"loss": 1.8427,
"step": 584
},
{
"epoch": 1.0753676470588236,
"grad_norm": 0.0,
"learning_rate": 2.289676482223899e-06,
"loss": 1.4616,
"step": 585
},
{
"epoch": 1.0772058823529411,
"grad_norm": 0.0,
"learning_rate": 2.2823150991502844e-06,
"loss": 1.6877,
"step": 586
},
{
"epoch": 1.0790441176470589,
"grad_norm": 0.0,
"learning_rate": 2.274955617422894e-06,
"loss": 1.7195,
"step": 587
},
{
"epoch": 1.0808823529411764,
"grad_norm": 0.0,
"learning_rate": 2.267598101322359e-06,
"loss": 1.7431,
"step": 588
},
{
"epoch": 1.0827205882352942,
"grad_norm": 0.0,
"learning_rate": 2.2602426151121413e-06,
"loss": 1.2884,
"step": 589
},
{
"epoch": 1.0845588235294117,
"grad_norm": 0.0,
"learning_rate": 2.2528892230379746e-06,
"loss": 1.7814,
"step": 590
},
{
"epoch": 1.0863970588235294,
"grad_norm": 0.0,
"learning_rate": 2.245537989327299e-06,
"loss": 1.6408,
"step": 591
},
{
"epoch": 1.088235294117647,
"grad_norm": 0.0,
"learning_rate": 2.2381889781887063e-06,
"loss": 1.708,
"step": 592
},
{
"epoch": 1.0900735294117647,
"grad_norm": 0.0,
"learning_rate": 2.2308422538113718e-06,
"loss": 1.4733,
"step": 593
},
{
"epoch": 1.0919117647058822,
"grad_norm": 0.0,
"learning_rate": 2.2234978803644972e-06,
"loss": 1.7233,
"step": 594
},
{
"epoch": 1.09375,
"grad_norm": 0.0,
"learning_rate": 2.2161559219967555e-06,
"loss": 1.6656,
"step": 595
},
{
"epoch": 1.0955882352941178,
"grad_norm": 0.0,
"learning_rate": 2.2088164428357196e-06,
"loss": 1.7206,
"step": 596
},
{
"epoch": 1.0974264705882353,
"grad_norm": 0.0,
"learning_rate": 2.2014795069873092e-06,
"loss": 1.915,
"step": 597
},
{
"epoch": 1.099264705882353,
"grad_norm": 0.0,
"learning_rate": 2.1941451785352334e-06,
"loss": 1.7666,
"step": 598
},
{
"epoch": 1.1011029411764706,
"grad_norm": 0.0,
"learning_rate": 2.1868135215404216e-06,
"loss": 1.5042,
"step": 599
},
{
"epoch": 1.1029411764705883,
"grad_norm": 0.0,
"learning_rate": 2.1794846000404756e-06,
"loss": 1.9193,
"step": 600
},
{
"epoch": 1.1047794117647058,
"grad_norm": 0.0,
"learning_rate": 2.1721584780490988e-06,
"loss": 1.9347,
"step": 601
},
{
"epoch": 1.1066176470588236,
"grad_norm": 0.0,
"learning_rate": 2.1648352195555456e-06,
"loss": 1.8926,
"step": 602
},
{
"epoch": 1.1084558823529411,
"grad_norm": 0.0,
"learning_rate": 2.157514888524061e-06,
"loss": 1.73,
"step": 603
},
{
"epoch": 1.1102941176470589,
"grad_norm": 0.0,
"learning_rate": 2.150197548893315e-06,
"loss": 1.5678,
"step": 604
},
{
"epoch": 1.1121323529411764,
"grad_norm": 0.0,
"learning_rate": 2.142883264575856e-06,
"loss": 1.837,
"step": 605
},
{
"epoch": 1.1139705882352942,
"grad_norm": 0.0,
"learning_rate": 2.1355720994575414e-06,
"loss": 1.6828,
"step": 606
},
{
"epoch": 1.1158088235294117,
"grad_norm": 0.0,
"learning_rate": 2.1282641173969855e-06,
"loss": 1.6306,
"step": 607
},
{
"epoch": 1.1176470588235294,
"grad_norm": 0.0,
"learning_rate": 2.120959382225002e-06,
"loss": 1.8238,
"step": 608
},
{
"epoch": 1.119485294117647,
"grad_norm": 0.0,
"learning_rate": 2.113657957744042e-06,
"loss": 1.9431,
"step": 609
},
{
"epoch": 1.1213235294117647,
"grad_norm": 0.0,
"learning_rate": 2.106359907727644e-06,
"loss": 1.754,
"step": 610
},
{
"epoch": 1.1231617647058822,
"grad_norm": 0.0,
"learning_rate": 2.099065295919869e-06,
"loss": 1.8101,
"step": 611
},
{
"epoch": 1.125,
"grad_norm": 0.0,
"learning_rate": 2.0917741860347463e-06,
"loss": 1.8708,
"step": 612
},
{
"epoch": 1.1268382352941178,
"grad_norm": 0.0,
"learning_rate": 2.0844866417557243e-06,
"loss": 1.8487,
"step": 613
},
{
"epoch": 1.1286764705882353,
"grad_norm": 0.0,
"learning_rate": 2.0772027267351e-06,
"loss": 1.9463,
"step": 614
},
{
"epoch": 1.130514705882353,
"grad_norm": 0.0,
"learning_rate": 2.069922504593478e-06,
"loss": 1.5628,
"step": 615
},
{
"epoch": 1.1323529411764706,
"grad_norm": 0.0,
"learning_rate": 2.062646038919202e-06,
"loss": 1.6557,
"step": 616
},
{
"epoch": 1.1341911764705883,
"grad_norm": 0.0,
"learning_rate": 2.0553733932678096e-06,
"loss": 1.5397,
"step": 617
},
{
"epoch": 1.1360294117647058,
"grad_norm": 0.0,
"learning_rate": 2.048104631161471e-06,
"loss": 1.6991,
"step": 618
},
{
"epoch": 1.1378676470588236,
"grad_norm": 0.0,
"learning_rate": 2.040839816088436e-06,
"loss": 1.8088,
"step": 619
},
{
"epoch": 1.1397058823529411,
"grad_norm": 0.0,
"learning_rate": 2.0335790115024787e-06,
"loss": 1.7756,
"step": 620
},
{
"epoch": 1.1415441176470589,
"grad_norm": 0.0,
"learning_rate": 2.026322280822346e-06,
"loss": 2.0245,
"step": 621
},
{
"epoch": 1.1433823529411764,
"grad_norm": 0.0,
"learning_rate": 2.019069687431199e-06,
"loss": 1.7583,
"step": 622
},
{
"epoch": 1.1452205882352942,
"grad_norm": 0.0,
"learning_rate": 2.0118212946760664e-06,
"loss": 1.7974,
"step": 623
},
{
"epoch": 1.1470588235294117,
"grad_norm": 0.0,
"learning_rate": 2.004577165867282e-06,
"loss": 1.4694,
"step": 624
},
{
"epoch": 1.1488970588235294,
"grad_norm": 0.0,
"learning_rate": 1.9973373642779385e-06,
"loss": 1.7528,
"step": 625
},
{
"epoch": 1.150735294117647,
"grad_norm": 0.0,
"learning_rate": 1.9901019531433345e-06,
"loss": 1.5792,
"step": 626
},
{
"epoch": 1.1525735294117647,
"grad_norm": 0.0,
"learning_rate": 1.9828709956604176e-06,
"loss": 1.5808,
"step": 627
},
{
"epoch": 1.1544117647058822,
"grad_norm": 0.0,
"learning_rate": 1.9756445549872396e-06,
"loss": 1.8014,
"step": 628
},
{
"epoch": 1.15625,
"grad_norm": 0.0,
"learning_rate": 1.968422694242396e-06,
"loss": 1.7312,
"step": 629
},
{
"epoch": 1.1580882352941178,
"grad_norm": 0.0,
"learning_rate": 1.961205476504482e-06,
"loss": 1.5888,
"step": 630
},
{
"epoch": 1.1599264705882353,
"grad_norm": 0.0,
"learning_rate": 1.9539929648115395e-06,
"loss": 1.7464,
"step": 631
},
{
"epoch": 1.161764705882353,
"grad_norm": 0.0,
"learning_rate": 1.9467852221605044e-06,
"loss": 1.7115,
"step": 632
},
{
"epoch": 1.1636029411764706,
"grad_norm": 0.0,
"learning_rate": 1.939582311506658e-06,
"loss": 1.7716,
"step": 633
},
{
"epoch": 1.1654411764705883,
"grad_norm": 0.0,
"learning_rate": 1.9323842957630795e-06,
"loss": 1.7229,
"step": 634
},
{
"epoch": 1.1672794117647058,
"grad_norm": 0.0,
"learning_rate": 1.925191237800089e-06,
"loss": 1.7373,
"step": 635
},
{
"epoch": 1.1691176470588236,
"grad_norm": 0.0,
"learning_rate": 1.9180032004447095e-06,
"loss": 1.5658,
"step": 636
},
{
"epoch": 1.1709558823529411,
"grad_norm": 0.0,
"learning_rate": 1.910820246480106e-06,
"loss": 1.8198,
"step": 637
},
{
"epoch": 1.1727941176470589,
"grad_norm": 0.0,
"learning_rate": 1.9036424386450487e-06,
"loss": 1.6393,
"step": 638
},
{
"epoch": 1.1746323529411764,
"grad_norm": 0.0,
"learning_rate": 1.8964698396333555e-06,
"loss": 1.872,
"step": 639
},
{
"epoch": 1.1764705882352942,
"grad_norm": 0.0,
"learning_rate": 1.8893025120933488e-06,
"loss": 1.5913,
"step": 640
},
{
"epoch": 1.1783088235294117,
"grad_norm": 0.0,
"learning_rate": 1.882140518627311e-06,
"loss": 1.7481,
"step": 641
},
{
"epoch": 1.1801470588235294,
"grad_norm": 0.0,
"learning_rate": 1.874983921790931e-06,
"loss": 1.7131,
"step": 642
},
{
"epoch": 1.181985294117647,
"grad_norm": 0.0,
"learning_rate": 1.8678327840927627e-06,
"loss": 1.5675,
"step": 643
},
{
"epoch": 1.1838235294117647,
"grad_norm": 0.0,
"learning_rate": 1.860687167993678e-06,
"loss": 1.889,
"step": 644
},
{
"epoch": 1.1856617647058822,
"grad_norm": 0.0,
"learning_rate": 1.8535471359063211e-06,
"loss": 1.8815,
"step": 645
},
{
"epoch": 1.1875,
"grad_norm": 0.0,
"learning_rate": 1.8464127501945625e-06,
"loss": 1.5414,
"step": 646
},
{
"epoch": 1.1893382352941178,
"grad_norm": 0.0,
"learning_rate": 1.8392840731729561e-06,
"loss": 1.8284,
"step": 647
},
{
"epoch": 1.1911764705882353,
"grad_norm": 0.0,
"learning_rate": 1.8321611671061915e-06,
"loss": 1.712,
"step": 648
},
{
"epoch": 1.193014705882353,
"grad_norm": 0.0,
"learning_rate": 1.8250440942085562e-06,
"loss": 1.6119,
"step": 649
},
{
"epoch": 1.1948529411764706,
"grad_norm": 0.0,
"learning_rate": 1.8179329166433846e-06,
"loss": 1.6625,
"step": 650
},
{
"epoch": 1.1966911764705883,
"grad_norm": 0.0,
"learning_rate": 1.8108276965225225e-06,
"loss": 1.9299,
"step": 651
},
{
"epoch": 1.1985294117647058,
"grad_norm": 0.0,
"learning_rate": 1.8037284959057782e-06,
"loss": 1.7874,
"step": 652
},
{
"epoch": 1.2003676470588236,
"grad_norm": 0.0,
"learning_rate": 1.7966353768003838e-06,
"loss": 1.6945,
"step": 653
},
{
"epoch": 1.2022058823529411,
"grad_norm": 0.0,
"learning_rate": 1.7895484011604553e-06,
"loss": 1.7856,
"step": 654
},
{
"epoch": 1.2040441176470589,
"grad_norm": 0.0,
"learning_rate": 1.7824676308864452e-06,
"loss": 1.516,
"step": 655
},
{
"epoch": 1.2058823529411764,
"grad_norm": 0.0,
"learning_rate": 1.7753931278246109e-06,
"loss": 1.5837,
"step": 656
},
{
"epoch": 1.2077205882352942,
"grad_norm": 0.0,
"learning_rate": 1.768324953766465e-06,
"loss": 1.8389,
"step": 657
},
{
"epoch": 1.2095588235294117,
"grad_norm": 0.0,
"learning_rate": 1.761263170448242e-06,
"loss": 1.5752,
"step": 658
},
{
"epoch": 1.2113970588235294,
"grad_norm": 0.0,
"learning_rate": 1.7542078395503574e-06,
"loss": 1.5576,
"step": 659
},
{
"epoch": 1.213235294117647,
"grad_norm": 0.0,
"learning_rate": 1.7471590226968682e-06,
"loss": 1.7369,
"step": 660
},
{
"epoch": 1.2150735294117647,
"grad_norm": 0.0,
"learning_rate": 1.7401167814549353e-06,
"loss": 1.7101,
"step": 661
},
{
"epoch": 1.2169117647058822,
"grad_norm": 0.0,
"learning_rate": 1.7330811773342864e-06,
"loss": 1.7831,
"step": 662
},
{
"epoch": 1.21875,
"grad_norm": 0.0,
"learning_rate": 1.7260522717866751e-06,
"loss": 1.8079,
"step": 663
},
{
"epoch": 1.2205882352941178,
"grad_norm": 0.0,
"learning_rate": 1.719030126205351e-06,
"loss": 1.9275,
"step": 664
},
{
"epoch": 1.2224264705882353,
"grad_norm": 0.0,
"learning_rate": 1.7120148019245173e-06,
"loss": 1.7212,
"step": 665
},
{
"epoch": 1.224264705882353,
"grad_norm": 0.0,
"learning_rate": 1.7050063602187956e-06,
"loss": 1.6668,
"step": 666
},
{
"epoch": 1.2261029411764706,
"grad_norm": 0.0,
"learning_rate": 1.6980048623026967e-06,
"loss": 1.756,
"step": 667
},
{
"epoch": 1.2279411764705883,
"grad_norm": 0.0,
"learning_rate": 1.6910103693300767e-06,
"loss": 2.0557,
"step": 668
},
{
"epoch": 1.2297794117647058,
"grad_norm": 0.0,
"learning_rate": 1.6840229423936127e-06,
"loss": 1.5587,
"step": 669
},
{
"epoch": 1.2316176470588236,
"grad_norm": 0.0,
"learning_rate": 1.6770426425242603e-06,
"loss": 1.9321,
"step": 670
},
{
"epoch": 1.2334558823529411,
"grad_norm": 0.0,
"learning_rate": 1.670069530690725e-06,
"loss": 1.8981,
"step": 671
},
{
"epoch": 1.2352941176470589,
"grad_norm": 0.0,
"learning_rate": 1.6631036677989315e-06,
"loss": 1.7275,
"step": 672
},
{
"epoch": 1.2371323529411764,
"grad_norm": 0.0,
"learning_rate": 1.6561451146914873e-06,
"loss": 1.6767,
"step": 673
},
{
"epoch": 1.2389705882352942,
"grad_norm": 0.0,
"learning_rate": 1.649193932147154e-06,
"loss": 1.7657,
"step": 674
},
{
"epoch": 1.2408088235294117,
"grad_norm": 0.0,
"learning_rate": 1.6422501808803165e-06,
"loss": 1.9796,
"step": 675
},
{
"epoch": 1.2426470588235294,
"grad_norm": 0.0,
"learning_rate": 1.635313921540449e-06,
"loss": 1.7294,
"step": 676
},
{
"epoch": 1.244485294117647,
"grad_norm": 0.0,
"learning_rate": 1.6283852147115931e-06,
"loss": 1.6732,
"step": 677
},
{
"epoch": 1.2463235294117647,
"grad_norm": 0.0,
"learning_rate": 1.6214641209118186e-06,
"loss": 1.8132,
"step": 678
},
{
"epoch": 1.2481617647058822,
"grad_norm": 0.0,
"learning_rate": 1.6145507005927052e-06,
"loss": 1.7224,
"step": 679
},
{
"epoch": 1.25,
"grad_norm": 0.0,
"learning_rate": 1.607645014138805e-06,
"loss": 1.9829,
"step": 680
},
{
"epoch": 1.2518382352941178,
"grad_norm": 0.0,
"learning_rate": 1.6007471218671209e-06,
"loss": 1.7143,
"step": 681
},
{
"epoch": 1.2536764705882353,
"grad_norm": 0.0,
"learning_rate": 1.59385708402658e-06,
"loss": 1.7415,
"step": 682
},
{
"epoch": 1.2555147058823528,
"grad_norm": 0.0,
"learning_rate": 1.586974960797503e-06,
"loss": 1.7169,
"step": 683
},
{
"epoch": 1.2573529411764706,
"grad_norm": 0.0,
"learning_rate": 1.5801008122910838e-06,
"loss": 1.865,
"step": 684
},
{
"epoch": 1.2591911764705883,
"grad_norm": 0.0,
"learning_rate": 1.5732346985488605e-06,
"loss": 1.9046,
"step": 685
},
{
"epoch": 1.2610294117647058,
"grad_norm": 0.0,
"learning_rate": 1.5663766795421912e-06,
"loss": 1.8258,
"step": 686
},
{
"epoch": 1.2628676470588236,
"grad_norm": 0.0,
"learning_rate": 1.5595268151717347e-06,
"loss": 1.666,
"step": 687
},
{
"epoch": 1.2647058823529411,
"grad_norm": 0.0,
"learning_rate": 1.55268516526692e-06,
"loss": 1.9431,
"step": 688
},
{
"epoch": 1.2665441176470589,
"grad_norm": 0.0,
"learning_rate": 1.5458517895854309e-06,
"loss": 1.7691,
"step": 689
},
{
"epoch": 1.2683823529411764,
"grad_norm": 0.0,
"learning_rate": 1.5390267478126802e-06,
"loss": 1.9737,
"step": 690
},
{
"epoch": 1.2702205882352942,
"grad_norm": 0.0,
"learning_rate": 1.5322100995612868e-06,
"loss": 1.8369,
"step": 691
},
{
"epoch": 1.2720588235294117,
"grad_norm": 0.0,
"learning_rate": 1.525401904370561e-06,
"loss": 1.8232,
"step": 692
},
{
"epoch": 1.2738970588235294,
"grad_norm": 0.0,
"learning_rate": 1.5186022217059777e-06,
"loss": 1.5475,
"step": 693
},
{
"epoch": 1.2757352941176472,
"grad_norm": 0.0,
"learning_rate": 1.5118111109586598e-06,
"loss": 1.9996,
"step": 694
},
{
"epoch": 1.2775735294117647,
"grad_norm": 0.0,
"learning_rate": 1.5050286314448632e-06,
"loss": 1.7864,
"step": 695
},
{
"epoch": 1.2794117647058822,
"grad_norm": 0.0,
"learning_rate": 1.4982548424054505e-06,
"loss": 1.7143,
"step": 696
},
{
"epoch": 1.28125,
"grad_norm": 0.0,
"learning_rate": 1.4914898030053827e-06,
"loss": 1.7523,
"step": 697
},
{
"epoch": 1.2830882352941178,
"grad_norm": 0.0,
"learning_rate": 1.4847335723331952e-06,
"loss": 1.8499,
"step": 698
},
{
"epoch": 1.2849264705882353,
"grad_norm": 0.0,
"learning_rate": 1.4779862094004837e-06,
"loss": 1.8997,
"step": 699
},
{
"epoch": 1.2867647058823528,
"grad_norm": 0.0,
"learning_rate": 1.4712477731413935e-06,
"loss": 1.825,
"step": 700
},
{
"epoch": 1.2886029411764706,
"grad_norm": 0.0,
"learning_rate": 1.4645183224120956e-06,
"loss": 1.9566,
"step": 701
},
{
"epoch": 1.2904411764705883,
"grad_norm": 0.0,
"learning_rate": 1.457797915990283e-06,
"loss": 1.6981,
"step": 702
},
{
"epoch": 1.2922794117647058,
"grad_norm": 0.0,
"learning_rate": 1.4510866125746497e-06,
"loss": 1.7447,
"step": 703
},
{
"epoch": 1.2941176470588236,
"grad_norm": 0.0,
"learning_rate": 1.4443844707843767e-06,
"loss": 1.8724,
"step": 704
},
{
"epoch": 1.2959558823529411,
"grad_norm": 0.0,
"learning_rate": 1.437691549158633e-06,
"loss": 1.967,
"step": 705
},
{
"epoch": 1.2977941176470589,
"grad_norm": 0.0,
"learning_rate": 1.4310079061560438e-06,
"loss": 1.7536,
"step": 706
},
{
"epoch": 1.2996323529411764,
"grad_norm": 0.0,
"learning_rate": 1.424333600154202e-06,
"loss": 1.9413,
"step": 707
},
{
"epoch": 1.3014705882352942,
"grad_norm": 0.0,
"learning_rate": 1.4176686894491386e-06,
"loss": 2.1014,
"step": 708
},
{
"epoch": 1.3033088235294117,
"grad_norm": 0.0,
"learning_rate": 1.411013232254827e-06,
"loss": 1.7457,
"step": 709
},
{
"epoch": 1.3051470588235294,
"grad_norm": 0.0,
"learning_rate": 1.4043672867026693e-06,
"loss": 1.7354,
"step": 710
},
{
"epoch": 1.3069852941176472,
"grad_norm": 0.0,
"learning_rate": 1.3977309108409882e-06,
"loss": 1.8861,
"step": 711
},
{
"epoch": 1.3088235294117647,
"grad_norm": 0.0,
"learning_rate": 1.3911041626345215e-06,
"loss": 1.4903,
"step": 712
},
{
"epoch": 1.3106617647058822,
"grad_norm": 0.0,
"learning_rate": 1.3844870999639154e-06,
"loss": 1.788,
"step": 713
},
{
"epoch": 1.3125,
"grad_norm": 0.0,
"learning_rate": 1.3778797806252186e-06,
"loss": 1.7226,
"step": 714
},
{
"epoch": 1.3143382352941178,
"grad_norm": 0.0,
"learning_rate": 1.3712822623293776e-06,
"loss": 1.7147,
"step": 715
},
{
"epoch": 1.3161764705882353,
"grad_norm": 0.0,
"learning_rate": 1.3646946027017333e-06,
"loss": 1.7169,
"step": 716
},
{
"epoch": 1.3180147058823528,
"grad_norm": 0.0,
"learning_rate": 1.3581168592815142e-06,
"loss": 1.7809,
"step": 717
},
{
"epoch": 1.3198529411764706,
"grad_norm": 0.0,
"learning_rate": 1.3515490895213428e-06,
"loss": 1.6852,
"step": 718
},
{
"epoch": 1.3216911764705883,
"grad_norm": 0.0,
"learning_rate": 1.3449913507867202e-06,
"loss": 1.8936,
"step": 719
},
{
"epoch": 1.3235294117647058,
"grad_norm": 0.0,
"learning_rate": 1.3384437003555394e-06,
"loss": 1.8982,
"step": 720
},
{
"epoch": 1.3253676470588236,
"grad_norm": 0.0,
"learning_rate": 1.3319061954175733e-06,
"loss": 1.5213,
"step": 721
},
{
"epoch": 1.3272058823529411,
"grad_norm": 0.0,
"learning_rate": 1.3253788930739816e-06,
"loss": 1.5643,
"step": 722
},
{
"epoch": 1.3290441176470589,
"grad_norm": 0.0,
"learning_rate": 1.3188618503368117e-06,
"loss": 1.6885,
"step": 723
},
{
"epoch": 1.3308823529411764,
"grad_norm": 0.0,
"learning_rate": 1.3123551241284976e-06,
"loss": 1.8524,
"step": 724
},
{
"epoch": 1.3327205882352942,
"grad_norm": 0.0,
"learning_rate": 1.3058587712813665e-06,
"loss": 1.9121,
"step": 725
},
{
"epoch": 1.3345588235294117,
"grad_norm": 0.0,
"learning_rate": 1.299372848537139e-06,
"loss": 1.8104,
"step": 726
},
{
"epoch": 1.3363970588235294,
"grad_norm": 0.0,
"learning_rate": 1.292897412546436e-06,
"loss": 1.7105,
"step": 727
},
{
"epoch": 1.3382352941176472,
"grad_norm": 0.0,
"learning_rate": 1.286432519868282e-06,
"loss": 1.7432,
"step": 728
},
{
"epoch": 1.3400735294117647,
"grad_norm": 0.0,
"learning_rate": 1.2799782269696137e-06,
"loss": 1.8036,
"step": 729
},
{
"epoch": 1.3419117647058822,
"grad_norm": 0.0,
"learning_rate": 1.2735345902247831e-06,
"loss": 1.8589,
"step": 730
},
{
"epoch": 1.34375,
"grad_norm": 0.0,
"learning_rate": 1.2671016659150693e-06,
"loss": 1.5688,
"step": 731
},
{
"epoch": 1.3455882352941178,
"grad_norm": 0.0,
"learning_rate": 1.2606795102281806e-06,
"loss": 1.6113,
"step": 732
},
{
"epoch": 1.3474264705882353,
"grad_norm": 0.0,
"learning_rate": 1.2542681792577749e-06,
"loss": 1.799,
"step": 733
},
{
"epoch": 1.3492647058823528,
"grad_norm": 0.0,
"learning_rate": 1.2478677290029554e-06,
"loss": 1.6972,
"step": 734
},
{
"epoch": 1.3511029411764706,
"grad_norm": 0.0,
"learning_rate": 1.2414782153677935e-06,
"loss": 1.586,
"step": 735
},
{
"epoch": 1.3529411764705883,
"grad_norm": 0.0,
"learning_rate": 1.235099694160834e-06,
"loss": 1.88,
"step": 736
},
{
"epoch": 1.3547794117647058,
"grad_norm": 0.0,
"learning_rate": 1.2287322210946098e-06,
"loss": 1.6245,
"step": 737
},
{
"epoch": 1.3566176470588236,
"grad_norm": 0.0,
"learning_rate": 1.2223758517851556e-06,
"loss": 1.8224,
"step": 738
},
{
"epoch": 1.3584558823529411,
"grad_norm": 0.0,
"learning_rate": 1.21603064175152e-06,
"loss": 1.9156,
"step": 739
},
{
"epoch": 1.3602941176470589,
"grad_norm": 0.0,
"learning_rate": 1.2096966464152837e-06,
"loss": 1.8021,
"step": 740
},
{
"epoch": 1.3621323529411764,
"grad_norm": 0.0,
"learning_rate": 1.2033739211000729e-06,
"loss": 1.9186,
"step": 741
},
{
"epoch": 1.3639705882352942,
"grad_norm": 0.0,
"learning_rate": 1.1970625210310768e-06,
"loss": 1.7245,
"step": 742
},
{
"epoch": 1.3658088235294117,
"grad_norm": 0.0,
"learning_rate": 1.190762501334566e-06,
"loss": 1.6118,
"step": 743
},
{
"epoch": 1.3676470588235294,
"grad_norm": 0.0,
"learning_rate": 1.184473917037411e-06,
"loss": 1.8043,
"step": 744
},
{
"epoch": 1.3694852941176472,
"grad_norm": 0.0,
"learning_rate": 1.1781968230665978e-06,
"loss": 1.7861,
"step": 745
},
{
"epoch": 1.3713235294117647,
"grad_norm": 0.0,
"learning_rate": 1.1719312742487565e-06,
"loss": 1.6989,
"step": 746
},
{
"epoch": 1.3731617647058822,
"grad_norm": 0.0,
"learning_rate": 1.1656773253096716e-06,
"loss": 1.7684,
"step": 747
},
{
"epoch": 1.375,
"grad_norm": 0.0,
"learning_rate": 1.1594350308738154e-06,
"loss": 1.7307,
"step": 748
},
{
"epoch": 1.3768382352941178,
"grad_norm": 0.0,
"learning_rate": 1.1532044454638598e-06,
"loss": 1.733,
"step": 749
},
{
"epoch": 1.3786764705882353,
"grad_norm": 0.0,
"learning_rate": 1.1469856235002088e-06,
"loss": 1.7285,
"step": 750
},
{
"epoch": 1.3805147058823528,
"grad_norm": 0.0,
"learning_rate": 1.140778619300519e-06,
"loss": 1.8647,
"step": 751
},
{
"epoch": 1.3823529411764706,
"grad_norm": 0.0,
"learning_rate": 1.1345834870792256e-06,
"loss": 1.6412,
"step": 752
},
{
"epoch": 1.3841911764705883,
"grad_norm": 0.0,
"learning_rate": 1.1284002809470695e-06,
"loss": 1.6432,
"step": 753
},
{
"epoch": 1.3860294117647058,
"grad_norm": 0.0,
"learning_rate": 1.1222290549106245e-06,
"loss": 1.779,
"step": 754
},
{
"epoch": 1.3878676470588236,
"grad_norm": 0.0,
"learning_rate": 1.1160698628718253e-06,
"loss": 1.8016,
"step": 755
},
{
"epoch": 1.3897058823529411,
"grad_norm": 0.0,
"learning_rate": 1.1099227586274965e-06,
"loss": 1.708,
"step": 756
},
{
"epoch": 1.3915441176470589,
"grad_norm": 0.0,
"learning_rate": 1.1037877958688856e-06,
"loss": 1.5641,
"step": 757
},
{
"epoch": 1.3933823529411764,
"grad_norm": 0.0,
"learning_rate": 1.0976650281811855e-06,
"loss": 1.6455,
"step": 758
},
{
"epoch": 1.3952205882352942,
"grad_norm": 0.0,
"learning_rate": 1.091554509043081e-06,
"loss": 1.6056,
"step": 759
},
{
"epoch": 1.3970588235294117,
"grad_norm": 0.0,
"learning_rate": 1.0854562918262645e-06,
"loss": 1.669,
"step": 760
},
{
"epoch": 1.3988970588235294,
"grad_norm": 0.0,
"learning_rate": 1.0793704297949872e-06,
"loss": 1.8561,
"step": 761
},
{
"epoch": 1.4007352941176472,
"grad_norm": 0.0,
"learning_rate": 1.0732969761055774e-06,
"loss": 1.608,
"step": 762
},
{
"epoch": 1.4025735294117647,
"grad_norm": 0.0,
"learning_rate": 1.0672359838059884e-06,
"loss": 1.681,
"step": 763
},
{
"epoch": 1.4044117647058822,
"grad_norm": 0.0,
"learning_rate": 1.0611875058353301e-06,
"loss": 1.8089,
"step": 764
},
{
"epoch": 1.40625,
"grad_norm": 0.0,
"learning_rate": 1.055151595023407e-06,
"loss": 1.8458,
"step": 765
},
{
"epoch": 1.4080882352941178,
"grad_norm": 0.0,
"learning_rate": 1.0491283040902567e-06,
"loss": 2.0752,
"step": 766
},
{
"epoch": 1.4099264705882353,
"grad_norm": 0.0,
"learning_rate": 1.0431176856456904e-06,
"loss": 1.7032,
"step": 767
},
{
"epoch": 1.4117647058823528,
"grad_norm": 0.0,
"learning_rate": 1.0371197921888324e-06,
"loss": 1.9582,
"step": 768
},
{
"epoch": 1.4136029411764706,
"grad_norm": 0.0,
"learning_rate": 1.0311346761076619e-06,
"loss": 1.7644,
"step": 769
},
{
"epoch": 1.4154411764705883,
"grad_norm": 0.0,
"learning_rate": 1.0251623896785553e-06,
"loss": 1.8227,
"step": 770
},
{
"epoch": 1.4172794117647058,
"grad_norm": 0.0,
"learning_rate": 1.0192029850658306e-06,
"loss": 1.9582,
"step": 771
},
{
"epoch": 1.4191176470588236,
"grad_norm": 0.0,
"learning_rate": 1.0132565143212907e-06,
"loss": 1.9154,
"step": 772
},
{
"epoch": 1.4209558823529411,
"grad_norm": 0.0,
"learning_rate": 1.007323029383766e-06,
"loss": 1.769,
"step": 773
},
{
"epoch": 1.4227941176470589,
"grad_norm": 0.0,
"learning_rate": 1.00140258207867e-06,
"loss": 1.7419,
"step": 774
},
{
"epoch": 1.4246323529411764,
"grad_norm": 0.0,
"learning_rate": 9.954952241175325e-07,
"loss": 1.7485,
"step": 775
},
{
"epoch": 1.4264705882352942,
"grad_norm": 0.0,
"learning_rate": 9.896010070975638e-07,
"loss": 1.8709,
"step": 776
},
{
"epoch": 1.4283088235294117,
"grad_norm": 0.0,
"learning_rate": 9.837199825011895e-07,
"loss": 1.7689,
"step": 777
},
{
"epoch": 1.4301470588235294,
"grad_norm": 0.0,
"learning_rate": 9.778522016956105e-07,
"loss": 1.7562,
"step": 778
},
{
"epoch": 1.4319852941176472,
"grad_norm": 0.0,
"learning_rate": 9.71997715932351e-07,
"loss": 1.6869,
"step": 779
},
{
"epoch": 1.4338235294117647,
"grad_norm": 0.0,
"learning_rate": 9.661565763468106e-07,
"loss": 1.8674,
"step": 780
},
{
"epoch": 1.4356617647058822,
"grad_norm": 0.0,
"learning_rate": 9.603288339578177e-07,
"loss": 1.6821,
"step": 781
},
{
"epoch": 1.4375,
"grad_norm": 0.0,
"learning_rate": 9.54514539667185e-07,
"loss": 1.7868,
"step": 782
},
{
"epoch": 1.4393382352941178,
"grad_norm": 0.0,
"learning_rate": 9.487137442592642e-07,
"loss": 1.8007,
"step": 783
},
{
"epoch": 1.4411764705882353,
"grad_norm": 0.0,
"learning_rate": 9.429264984005015e-07,
"loss": 1.8453,
"step": 784
},
{
"epoch": 1.4430147058823528,
"grad_norm": 0.0,
"learning_rate": 9.371528526389978e-07,
"loss": 1.9375,
"step": 785
},
{
"epoch": 1.4448529411764706,
"grad_norm": 0.0,
"learning_rate": 9.313928574040615e-07,
"loss": 1.8563,
"step": 786
},
{
"epoch": 1.4466911764705883,
"grad_norm": 0.0,
"learning_rate": 9.256465630057778e-07,
"loss": 1.7986,
"step": 787
},
{
"epoch": 1.4485294117647058,
"grad_norm": 0.0,
"learning_rate": 9.199140196345569e-07,
"loss": 1.7898,
"step": 788
},
{
"epoch": 1.4503676470588236,
"grad_norm": 0.0,
"learning_rate": 9.141952773607082e-07,
"loss": 1.8142,
"step": 789
},
{
"epoch": 1.4522058823529411,
"grad_norm": 0.0,
"learning_rate": 9.084903861339916e-07,
"loss": 1.7731,
"step": 790
},
{
"epoch": 1.4540441176470589,
"grad_norm": 0.0,
"learning_rate": 9.027993957831898e-07,
"loss": 1.5631,
"step": 791
},
{
"epoch": 1.4558823529411764,
"grad_norm": 0.0,
"learning_rate": 8.971223560156686e-07,
"loss": 1.7644,
"step": 792
},
{
"epoch": 1.4577205882352942,
"grad_norm": 0.0,
"learning_rate": 8.914593164169438e-07,
"loss": 1.7418,
"step": 793
},
{
"epoch": 1.4595588235294117,
"grad_norm": 0.0,
"learning_rate": 8.858103264502482e-07,
"loss": 1.7843,
"step": 794
},
{
"epoch": 1.4613970588235294,
"grad_norm": 0.0,
"learning_rate": 8.801754354561001e-07,
"loss": 1.6739,
"step": 795
},
{
"epoch": 1.4632352941176472,
"grad_norm": 0.0,
"learning_rate": 8.745546926518714e-07,
"loss": 1.8804,
"step": 796
},
{
"epoch": 1.4650735294117647,
"grad_norm": 0.0,
"learning_rate": 8.689481471313574e-07,
"loss": 1.8066,
"step": 797
},
{
"epoch": 1.4669117647058822,
"grad_norm": 0.0,
"learning_rate": 8.633558478643503e-07,
"loss": 1.6212,
"step": 798
},
{
"epoch": 1.46875,
"grad_norm": 0.0,
"learning_rate": 8.577778436962081e-07,
"loss": 1.7998,
"step": 799
},
{
"epoch": 1.4705882352941178,
"grad_norm": 0.0,
"learning_rate": 8.522141833474318e-07,
"loss": 1.693,
"step": 800
},
{
"epoch": 1.4724264705882353,
"grad_norm": 0.0,
"learning_rate": 8.466649154132342e-07,
"loss": 1.729,
"step": 801
},
{
"epoch": 1.4742647058823528,
"grad_norm": 0.0,
"learning_rate": 8.411300883631246e-07,
"loss": 1.722,
"step": 802
},
{
"epoch": 1.4761029411764706,
"grad_norm": 0.0,
"learning_rate": 8.356097505404742e-07,
"loss": 1.6582,
"step": 803
},
{
"epoch": 1.4779411764705883,
"grad_norm": 0.0,
"learning_rate": 8.301039501621028e-07,
"loss": 1.6609,
"step": 804
},
{
"epoch": 1.4797794117647058,
"grad_norm": 0.0,
"learning_rate": 8.24612735317854e-07,
"loss": 1.8046,
"step": 805
},
{
"epoch": 1.4816176470588236,
"grad_norm": 0.0,
"learning_rate": 8.191361539701748e-07,
"loss": 1.7152,
"step": 806
},
{
"epoch": 1.4834558823529411,
"grad_norm": 0.0,
"learning_rate": 8.136742539536979e-07,
"loss": 1.7417,
"step": 807
},
{
"epoch": 1.4852941176470589,
"grad_norm": 0.0,
"learning_rate": 8.082270829748245e-07,
"loss": 1.6673,
"step": 808
},
{
"epoch": 1.4871323529411764,
"grad_norm": 0.0,
"learning_rate": 8.02794688611303e-07,
"loss": 1.9246,
"step": 809
},
{
"epoch": 1.4889705882352942,
"grad_norm": 0.0,
"learning_rate": 7.973771183118222e-07,
"loss": 1.5252,
"step": 810
},
{
"epoch": 1.4908088235294117,
"grad_norm": 0.0,
"learning_rate": 7.919744193955864e-07,
"loss": 1.6546,
"step": 811
},
{
"epoch": 1.4926470588235294,
"grad_norm": 0.0,
"learning_rate": 7.865866390519114e-07,
"loss": 1.7019,
"step": 812
},
{
"epoch": 1.4944852941176472,
"grad_norm": 0.0,
"learning_rate": 7.812138243398071e-07,
"loss": 1.8009,
"step": 813
},
{
"epoch": 1.4963235294117647,
"grad_norm": 0.0,
"learning_rate": 7.75856022187565e-07,
"loss": 1.8821,
"step": 814
},
{
"epoch": 1.4981617647058822,
"grad_norm": 0.0,
"learning_rate": 7.705132793923559e-07,
"loss": 1.4378,
"step": 815
},
{
"epoch": 1.5,
"grad_norm": 0.0,
"learning_rate": 7.651856426198106e-07,
"loss": 1.6485,
"step": 816
},
{
"epoch": 1.5018382352941178,
"grad_norm": 0.0,
"learning_rate": 7.59873158403624e-07,
"loss": 1.8485,
"step": 817
},
{
"epoch": 1.5036764705882353,
"grad_norm": 0.0,
"learning_rate": 7.54575873145137e-07,
"loss": 1.5154,
"step": 818
},
{
"epoch": 1.5055147058823528,
"grad_norm": 0.0,
"learning_rate": 7.492938331129393e-07,
"loss": 1.6477,
"step": 819
},
{
"epoch": 1.5073529411764706,
"grad_norm": 0.0,
"learning_rate": 7.440270844424635e-07,
"loss": 1.6652,
"step": 820
},
{
"epoch": 1.5091911764705883,
"grad_norm": 0.0,
"learning_rate": 7.3877567313558e-07,
"loss": 1.8875,
"step": 821
},
{
"epoch": 1.5110294117647058,
"grad_norm": 0.0,
"learning_rate": 7.335396450601972e-07,
"loss": 1.7319,
"step": 822
},
{
"epoch": 1.5128676470588234,
"grad_norm": 0.0,
"learning_rate": 7.283190459498607e-07,
"loss": 1.6376,
"step": 823
},
{
"epoch": 1.5147058823529411,
"grad_norm": 0.0,
"learning_rate": 7.231139214033505e-07,
"loss": 1.6051,
"step": 824
},
{
"epoch": 1.5165441176470589,
"grad_norm": 0.0,
"learning_rate": 7.179243168842911e-07,
"loss": 1.8559,
"step": 825
},
{
"epoch": 1.5183823529411766,
"grad_norm": 0.0,
"learning_rate": 7.127502777207437e-07,
"loss": 1.6349,
"step": 826
},
{
"epoch": 1.5202205882352942,
"grad_norm": 0.0,
"learning_rate": 7.075918491048172e-07,
"loss": 1.4663,
"step": 827
},
{
"epoch": 1.5220588235294117,
"grad_norm": 0.0,
"learning_rate": 7.024490760922748e-07,
"loss": 1.6839,
"step": 828
},
{
"epoch": 1.5238970588235294,
"grad_norm": 0.0,
"learning_rate": 6.973220036021313e-07,
"loss": 1.5657,
"step": 829
},
{
"epoch": 1.5257352941176472,
"grad_norm": 0.0,
"learning_rate": 6.922106764162742e-07,
"loss": 1.9338,
"step": 830
},
{
"epoch": 1.5275735294117647,
"grad_norm": 0.0,
"learning_rate": 6.871151391790584e-07,
"loss": 1.9015,
"step": 831
},
{
"epoch": 1.5294117647058822,
"grad_norm": 0.0,
"learning_rate": 6.820354363969276e-07,
"loss": 1.4708,
"step": 832
},
{
"epoch": 1.53125,
"grad_norm": 0.0,
"learning_rate": 6.769716124380193e-07,
"loss": 1.9289,
"step": 833
},
{
"epoch": 1.5330882352941178,
"grad_norm": 0.0,
"learning_rate": 6.7192371153178e-07,
"loss": 1.6729,
"step": 834
},
{
"epoch": 1.5349264705882353,
"grad_norm": 0.0,
"learning_rate": 6.668917777685771e-07,
"loss": 1.828,
"step": 835
},
{
"epoch": 1.5367647058823528,
"grad_norm": 0.0,
"learning_rate": 6.618758550993157e-07,
"loss": 1.8761,
"step": 836
},
{
"epoch": 1.5386029411764706,
"grad_norm": 0.0,
"learning_rate": 6.568759873350505e-07,
"loss": 1.7344,
"step": 837
},
{
"epoch": 1.5404411764705883,
"grad_norm": 0.0,
"learning_rate": 6.518922181466123e-07,
"loss": 1.7223,
"step": 838
},
{
"epoch": 1.5422794117647058,
"grad_norm": 0.0,
"learning_rate": 6.469245910642135e-07,
"loss": 1.5391,
"step": 839
},
{
"epoch": 1.5441176470588234,
"grad_norm": 0.0,
"learning_rate": 6.419731494770814e-07,
"loss": 1.7717,
"step": 840
},
{
"epoch": 1.5459558823529411,
"grad_norm": 0.0,
"learning_rate": 6.370379366330682e-07,
"loss": 1.7384,
"step": 841
},
{
"epoch": 1.5477941176470589,
"grad_norm": 0.0,
"learning_rate": 6.321189956382795e-07,
"loss": 1.7945,
"step": 842
},
{
"epoch": 1.5496323529411766,
"grad_norm": 0.0,
"learning_rate": 6.27216369456696e-07,
"loss": 1.8308,
"step": 843
},
{
"epoch": 1.5514705882352942,
"grad_norm": 0.0,
"learning_rate": 6.223301009097982e-07,
"loss": 1.8692,
"step": 844
},
{
"epoch": 1.5533088235294117,
"grad_norm": 0.0,
"learning_rate": 6.174602326761947e-07,
"loss": 1.8698,
"step": 845
},
{
"epoch": 1.5551470588235294,
"grad_norm": 0.0,
"learning_rate": 6.126068072912431e-07,
"loss": 2.0008,
"step": 846
},
{
"epoch": 1.5569852941176472,
"grad_norm": 0.0,
"learning_rate": 6.077698671466852e-07,
"loss": 1.7407,
"step": 847
},
{
"epoch": 1.5588235294117647,
"grad_norm": 0.0,
"learning_rate": 6.029494544902742e-07,
"loss": 1.79,
"step": 848
},
{
"epoch": 1.5606617647058822,
"grad_norm": 0.0,
"learning_rate": 5.981456114254061e-07,
"loss": 1.719,
"step": 849
},
{
"epoch": 1.5625,
"grad_norm": 0.0,
"learning_rate": 5.933583799107482e-07,
"loss": 1.7807,
"step": 850
},
{
"epoch": 1.5643382352941178,
"grad_norm": 0.0,
"learning_rate": 5.885878017598815e-07,
"loss": 1.9422,
"step": 851
},
{
"epoch": 1.5661764705882353,
"grad_norm": 0.0,
"learning_rate": 5.838339186409236e-07,
"loss": 1.9177,
"step": 852
},
{
"epoch": 1.5680147058823528,
"grad_norm": 0.0,
"learning_rate": 5.790967720761778e-07,
"loss": 1.5679,
"step": 853
},
{
"epoch": 1.5698529411764706,
"grad_norm": 0.0,
"learning_rate": 5.743764034417579e-07,
"loss": 1.8958,
"step": 854
},
{
"epoch": 1.5716911764705883,
"grad_norm": 0.0,
"learning_rate": 5.696728539672353e-07,
"loss": 1.978,
"step": 855
},
{
"epoch": 1.5735294117647058,
"grad_norm": 0.0,
"learning_rate": 5.649861647352758e-07,
"loss": 1.5037,
"step": 856
},
{
"epoch": 1.5753676470588234,
"grad_norm": 0.0,
"learning_rate": 5.603163766812817e-07,
"loss": 1.7701,
"step": 857
},
{
"epoch": 1.5772058823529411,
"grad_norm": 0.0,
"learning_rate": 5.556635305930327e-07,
"loss": 1.4404,
"step": 858
},
{
"epoch": 1.5790441176470589,
"grad_norm": 0.0,
"learning_rate": 5.510276671103315e-07,
"loss": 1.8623,
"step": 859
},
{
"epoch": 1.5808823529411766,
"grad_norm": 0.0,
"learning_rate": 5.464088267246473e-07,
"loss": 1.7948,
"step": 860
},
{
"epoch": 1.5827205882352942,
"grad_norm": 0.0,
"learning_rate": 5.418070497787634e-07,
"loss": 1.8761,
"step": 861
},
{
"epoch": 1.5845588235294117,
"grad_norm": 0.0,
"learning_rate": 5.372223764664236e-07,
"loss": 1.8479,
"step": 862
},
{
"epoch": 1.5863970588235294,
"grad_norm": 0.0,
"learning_rate": 5.326548468319825e-07,
"loss": 1.7426,
"step": 863
},
{
"epoch": 1.5882352941176472,
"grad_norm": 0.0,
"learning_rate": 5.281045007700555e-07,
"loss": 1.821,
"step": 864
},
{
"epoch": 1.5900735294117647,
"grad_norm": 0.0,
"learning_rate": 5.235713780251669e-07,
"loss": 1.8196,
"step": 865
},
{
"epoch": 1.5919117647058822,
"grad_norm": 0.0,
"learning_rate": 5.190555181914106e-07,
"loss": 1.6626,
"step": 866
},
{
"epoch": 1.59375,
"grad_norm": 0.0,
"learning_rate": 5.14556960712094e-07,
"loss": 1.7291,
"step": 867
},
{
"epoch": 1.5955882352941178,
"grad_norm": 0.0,
"learning_rate": 5.100757448794039e-07,
"loss": 1.5247,
"step": 868
},
{
"epoch": 1.5974264705882353,
"grad_norm": 0.0,
"learning_rate": 5.056119098340545e-07,
"loss": 1.5109,
"step": 869
},
{
"epoch": 1.5992647058823528,
"grad_norm": 0.0,
"learning_rate": 5.01165494564951e-07,
"loss": 1.9381,
"step": 870
},
{
"epoch": 1.6011029411764706,
"grad_norm": 0.0,
"learning_rate": 4.967365379088473e-07,
"loss": 1.7729,
"step": 871
},
{
"epoch": 1.6029411764705883,
"grad_norm": 0.0,
"learning_rate": 4.923250785500069e-07,
"loss": 1.7253,
"step": 872
},
{
"epoch": 1.6047794117647058,
"grad_norm": 0.0,
"learning_rate": 4.879311550198643e-07,
"loss": 1.7911,
"step": 873
},
{
"epoch": 1.6066176470588234,
"grad_norm": 0.0,
"learning_rate": 4.835548056966899e-07,
"loss": 1.8662,
"step": 874
},
{
"epoch": 1.6084558823529411,
"grad_norm": 0.0,
"learning_rate": 4.791960688052533e-07,
"loss": 1.1291,
"step": 875
},
{
"epoch": 1.6102941176470589,
"grad_norm": 0.0,
"learning_rate": 4.748549824164908e-07,
"loss": 1.6652,
"step": 876
},
{
"epoch": 1.6121323529411766,
"grad_norm": 0.0,
"learning_rate": 4.705315844471728e-07,
"loss": 1.8267,
"step": 877
},
{
"epoch": 1.6139705882352942,
"grad_norm": 0.0,
"learning_rate": 4.6622591265956863e-07,
"loss": 1.9036,
"step": 878
},
{
"epoch": 1.6158088235294117,
"grad_norm": 0.0,
"learning_rate": 4.6193800466112576e-07,
"loss": 1.884,
"step": 879
},
{
"epoch": 1.6176470588235294,
"grad_norm": 0.0,
"learning_rate": 4.5766789790412963e-07,
"loss": 1.7381,
"step": 880
},
{
"epoch": 1.6194852941176472,
"grad_norm": 0.0,
"learning_rate": 4.534156296853884e-07,
"loss": 1.8417,
"step": 881
},
{
"epoch": 1.6213235294117647,
"grad_norm": 0.0,
"learning_rate": 4.491812371458962e-07,
"loss": 1.5894,
"step": 882
},
{
"epoch": 1.6231617647058822,
"grad_norm": 0.0,
"learning_rate": 4.449647572705171e-07,
"loss": 1.8028,
"step": 883
},
{
"epoch": 1.625,
"grad_norm": 0.0,
"learning_rate": 4.407662268876578e-07,
"loss": 1.9584,
"step": 884
},
{
"epoch": 1.6268382352941178,
"grad_norm": 0.0,
"learning_rate": 4.3658568266894716e-07,
"loss": 1.7494,
"step": 885
},
{
"epoch": 1.6286764705882353,
"grad_norm": 0.0,
"learning_rate": 4.324231611289159e-07,
"loss": 1.7979,
"step": 886
},
{
"epoch": 1.6305147058823528,
"grad_norm": 0.0,
"learning_rate": 4.282786986246773e-07,
"loss": 1.7872,
"step": 887
},
{
"epoch": 1.6323529411764706,
"grad_norm": 0.0,
"learning_rate": 4.2415233135560977e-07,
"loss": 2.0073,
"step": 888
},
{
"epoch": 1.6341911764705883,
"grad_norm": 0.0,
"learning_rate": 4.200440953630411e-07,
"loss": 1.7227,
"step": 889
},
{
"epoch": 1.6360294117647058,
"grad_norm": 0.0,
"learning_rate": 4.1595402652993304e-07,
"loss": 1.6261,
"step": 890
},
{
"epoch": 1.6378676470588234,
"grad_norm": 0.0,
"learning_rate": 4.118821605805684e-07,
"loss": 1.9185,
"step": 891
},
{
"epoch": 1.6397058823529411,
"grad_norm": 0.0,
"learning_rate": 4.0782853308023895e-07,
"loss": 1.6901,
"step": 892
},
{
"epoch": 1.6415441176470589,
"grad_norm": 0.0,
"learning_rate": 4.0379317943493287e-07,
"loss": 2.045,
"step": 893
},
{
"epoch": 1.6433823529411766,
"grad_norm": 0.0,
"learning_rate": 3.997761348910309e-07,
"loss": 1.8319,
"step": 894
},
{
"epoch": 1.6452205882352942,
"grad_norm": 0.0,
"learning_rate": 3.95777434534991e-07,
"loss": 1.5535,
"step": 895
},
{
"epoch": 1.6470588235294117,
"grad_norm": 0.0,
"learning_rate": 3.917971132930476e-07,
"loss": 1.8543,
"step": 896
},
{
"epoch": 1.6488970588235294,
"grad_norm": 0.0,
"learning_rate": 3.8783520593090486e-07,
"loss": 1.8723,
"step": 897
},
{
"epoch": 1.6507352941176472,
"grad_norm": 0.0,
"learning_rate": 3.83891747053432e-07,
"loss": 1.8325,
"step": 898
},
{
"epoch": 1.6525735294117647,
"grad_norm": 0.0,
"learning_rate": 3.7996677110436226e-07,
"loss": 1.841,
"step": 899
},
{
"epoch": 1.6544117647058822,
"grad_norm": 0.0,
"learning_rate": 3.76060312365992e-07,
"loss": 1.7471,
"step": 900
},
{
"epoch": 1.65625,
"grad_norm": 0.0,
"learning_rate": 3.721724049588801e-07,
"loss": 1.592,
"step": 901
},
{
"epoch": 1.6580882352941178,
"grad_norm": 0.0,
"learning_rate": 3.6830308284155126e-07,
"loss": 1.7401,
"step": 902
},
{
"epoch": 1.6599264705882353,
"grad_norm": 0.0,
"learning_rate": 3.64452379810199e-07,
"loss": 1.6918,
"step": 903
},
{
"epoch": 1.6617647058823528,
"grad_norm": 0.0,
"learning_rate": 3.606203294983901e-07,
"loss": 1.9804,
"step": 904
},
{
"epoch": 1.6636029411764706,
"grad_norm": 0.0,
"learning_rate": 3.5680696537677193e-07,
"loss": 1.7059,
"step": 905
},
{
"epoch": 1.6654411764705883,
"grad_norm": 0.0,
"learning_rate": 3.5301232075277635e-07,
"loss": 1.7507,
"step": 906
},
{
"epoch": 1.6672794117647058,
"grad_norm": 0.0,
"learning_rate": 3.4923642877033674e-07,
"loss": 1.5373,
"step": 907
},
{
"epoch": 1.6691176470588234,
"grad_norm": 0.0,
"learning_rate": 3.454793224095879e-07,
"loss": 1.5712,
"step": 908
},
{
"epoch": 1.6709558823529411,
"grad_norm": 0.0,
"learning_rate": 3.4174103448658875e-07,
"loss": 1.8673,
"step": 909
},
{
"epoch": 1.6727941176470589,
"grad_norm": 0.0,
"learning_rate": 3.3802159765302627e-07,
"loss": 1.6926,
"step": 910
},
{
"epoch": 1.6746323529411766,
"grad_norm": 0.0,
"learning_rate": 3.343210443959369e-07,
"loss": 1.9163,
"step": 911
},
{
"epoch": 1.6764705882352942,
"grad_norm": 0.0,
"learning_rate": 3.3063940703742e-07,
"loss": 1.7858,
"step": 912
},
{
"epoch": 1.6783088235294117,
"grad_norm": 0.0,
"learning_rate": 3.269767177343555e-07,
"loss": 1.6493,
"step": 913
},
{
"epoch": 1.6801470588235294,
"grad_norm": 0.0,
"learning_rate": 3.2333300847812387e-07,
"loss": 1.9067,
"step": 914
},
{
"epoch": 1.6819852941176472,
"grad_norm": 0.0,
"learning_rate": 3.1970831109432666e-07,
"loss": 1.7049,
"step": 915
},
{
"epoch": 1.6838235294117647,
"grad_norm": 0.0,
"learning_rate": 3.161026572425074e-07,
"loss": 2.0241,
"step": 916
},
{
"epoch": 1.6856617647058822,
"grad_norm": 0.0,
"learning_rate": 3.125160784158765e-07,
"loss": 1.4931,
"step": 917
},
{
"epoch": 1.6875,
"grad_norm": 0.0,
"learning_rate": 3.0894860594103597e-07,
"loss": 1.8086,
"step": 918
},
{
"epoch": 1.6893382352941178,
"grad_norm": 0.0,
"learning_rate": 3.054002709777029e-07,
"loss": 1.9141,
"step": 919
},
{
"epoch": 1.6911764705882353,
"grad_norm": 0.0,
"learning_rate": 3.018711045184436e-07,
"loss": 1.5003,
"step": 920
},
{
"epoch": 1.6930147058823528,
"grad_norm": 0.0,
"learning_rate": 2.9836113738839544e-07,
"loss": 2.0187,
"step": 921
},
{
"epoch": 1.6948529411764706,
"grad_norm": 0.0,
"learning_rate": 2.9487040024500424e-07,
"loss": 1.723,
"step": 922
},
{
"epoch": 1.6966911764705883,
"grad_norm": 0.0,
"learning_rate": 2.9139892357775115e-07,
"loss": 1.881,
"step": 923
},
{
"epoch": 1.6985294117647058,
"grad_norm": 0.0,
"learning_rate": 2.879467377078896e-07,
"loss": 1.6959,
"step": 924
},
{
"epoch": 1.7003676470588234,
"grad_norm": 0.0,
"learning_rate": 2.845138727881802e-07,
"loss": 1.5516,
"step": 925
},
{
"epoch": 1.7022058823529411,
"grad_norm": 0.0,
"learning_rate": 2.8110035880262563e-07,
"loss": 1.5723,
"step": 926
},
{
"epoch": 1.7040441176470589,
"grad_norm": 0.0,
"learning_rate": 2.7770622556621054e-07,
"loss": 1.7377,
"step": 927
},
{
"epoch": 1.7058823529411766,
"grad_norm": 0.0,
"learning_rate": 2.7433150272463985e-07,
"loss": 1.5755,
"step": 928
},
{
"epoch": 1.7077205882352942,
"grad_norm": 0.0,
"learning_rate": 2.7097621975408136e-07,
"loss": 1.5451,
"step": 929
},
{
"epoch": 1.7095588235294117,
"grad_norm": 0.0,
"learning_rate": 2.676404059609061e-07,
"loss": 1.7694,
"step": 930
},
{
"epoch": 1.7113970588235294,
"grad_norm": 0.0,
"learning_rate": 2.6432409048143483e-07,
"loss": 2.0309,
"step": 931
},
{
"epoch": 1.7132352941176472,
"grad_norm": 0.0,
"learning_rate": 2.610273022816812e-07,
"loss": 1.6251,
"step": 932
},
{
"epoch": 1.7150735294117647,
"grad_norm": 0.0,
"learning_rate": 2.577500701571009e-07,
"loss": 1.8236,
"step": 933
},
{
"epoch": 1.7169117647058822,
"grad_norm": 0.0,
"learning_rate": 2.5449242273233753e-07,
"loss": 1.6628,
"step": 934
},
{
"epoch": 1.71875,
"grad_norm": 0.0,
"learning_rate": 2.5125438846097636e-07,
"loss": 1.9572,
"step": 935
},
{
"epoch": 1.7205882352941178,
"grad_norm": 0.0,
"learning_rate": 2.480359956252912e-07,
"loss": 1.7628,
"step": 936
},
{
"epoch": 1.7224264705882353,
"grad_norm": 0.0,
"learning_rate": 2.4483727233600237e-07,
"loss": 1.5424,
"step": 937
},
{
"epoch": 1.7242647058823528,
"grad_norm": 0.0,
"learning_rate": 2.4165824653202605e-07,
"loss": 1.772,
"step": 938
},
{
"epoch": 1.7261029411764706,
"grad_norm": 0.0,
"learning_rate": 2.384989459802345e-07,
"loss": 1.806,
"step": 939
},
{
"epoch": 1.7279411764705883,
"grad_norm": 0.0,
"learning_rate": 2.3535939827521114e-07,
"loss": 1.9861,
"step": 940
},
{
"epoch": 1.7297794117647058,
"grad_norm": 0.0,
"learning_rate": 2.3223963083901014e-07,
"loss": 1.7898,
"step": 941
},
{
"epoch": 1.7316176470588234,
"grad_norm": 0.0,
"learning_rate": 2.2913967092091706e-07,
"loss": 1.9501,
"step": 942
},
{
"epoch": 1.7334558823529411,
"grad_norm": 0.0,
"learning_rate": 2.2605954559721116e-07,
"loss": 1.4893,
"step": 943
},
{
"epoch": 1.7352941176470589,
"grad_norm": 0.0,
"learning_rate": 2.2299928177092784e-07,
"loss": 1.9125,
"step": 944
},
{
"epoch": 1.7371323529411766,
"grad_norm": 0.0,
"learning_rate": 2.199589061716245e-07,
"loss": 1.9624,
"step": 945
},
{
"epoch": 1.7389705882352942,
"grad_norm": 0.0,
"learning_rate": 2.1693844535514763e-07,
"loss": 1.8543,
"step": 946
},
{
"epoch": 1.7408088235294117,
"grad_norm": 0.0,
"learning_rate": 2.13937925703398e-07,
"loss": 1.5834,
"step": 947
},
{
"epoch": 1.7426470588235294,
"grad_norm": 0.0,
"learning_rate": 2.1095737342410577e-07,
"loss": 1.5379,
"step": 948
},
{
"epoch": 1.7444852941176472,
"grad_norm": 0.0,
"learning_rate": 2.0799681455059423e-07,
"loss": 1.8171,
"step": 949
},
{
"epoch": 1.7463235294117647,
"grad_norm": 0.0,
"learning_rate": 2.0505627494155994e-07,
"loss": 1.9249,
"step": 950
},
{
"epoch": 1.7481617647058822,
"grad_norm": 0.0,
"learning_rate": 2.0213578028084018e-07,
"loss": 1.8258,
"step": 951
},
{
"epoch": 1.75,
"grad_norm": 0.0,
"learning_rate": 1.9923535607719423e-07,
"loss": 1.5903,
"step": 952
},
{
"epoch": 1.7518382352941178,
"grad_norm": 0.0,
"learning_rate": 1.9635502766407632e-07,
"loss": 1.7791,
"step": 953
},
{
"epoch": 1.7536764705882353,
"grad_norm": 0.0,
"learning_rate": 1.934948201994169e-07,
"loss": 1.687,
"step": 954
},
{
"epoch": 1.7555147058823528,
"grad_norm": 0.0,
"learning_rate": 1.9065475866540172e-07,
"loss": 1.8509,
"step": 955
},
{
"epoch": 1.7573529411764706,
"grad_norm": 0.0,
"learning_rate": 1.8783486786825455e-07,
"loss": 1.8326,
"step": 956
},
{
"epoch": 1.7591911764705883,
"grad_norm": 0.0,
"learning_rate": 1.8503517243801922e-07,
"loss": 1.8198,
"step": 957
},
{
"epoch": 1.7610294117647058,
"grad_norm": 0.0,
"learning_rate": 1.8225569682834565e-07,
"loss": 1.7581,
"step": 958
},
{
"epoch": 1.7628676470588234,
"grad_norm": 0.0,
"learning_rate": 1.794964653162759e-07,
"loss": 1.9452,
"step": 959
},
{
"epoch": 1.7647058823529411,
"grad_norm": 0.0,
"learning_rate": 1.7675750200203152e-07,
"loss": 1.7043,
"step": 960
},
{
"epoch": 1.7665441176470589,
"grad_norm": 0.0,
"learning_rate": 1.7403883080880424e-07,
"loss": 1.6634,
"step": 961
},
{
"epoch": 1.7683823529411766,
"grad_norm": 0.0,
"learning_rate": 1.713404754825454e-07,
"loss": 1.8329,
"step": 962
},
{
"epoch": 1.7702205882352942,
"grad_norm": 0.0,
"learning_rate": 1.6866245959176157e-07,
"loss": 1.3605,
"step": 963
},
{
"epoch": 1.7720588235294117,
"grad_norm": 0.0,
"learning_rate": 1.66004806527304e-07,
"loss": 1.8235,
"step": 964
},
{
"epoch": 1.7738970588235294,
"grad_norm": 0.0,
"learning_rate": 1.6336753950216833e-07,
"loss": 1.6423,
"step": 965
},
{
"epoch": 1.7757352941176472,
"grad_norm": 0.0,
"learning_rate": 1.6075068155129075e-07,
"loss": 1.5276,
"step": 966
},
{
"epoch": 1.7775735294117647,
"grad_norm": 0.0,
"learning_rate": 1.5815425553134562e-07,
"loss": 1.6792,
"step": 967
},
{
"epoch": 1.7794117647058822,
"grad_norm": 0.0,
"learning_rate": 1.555782841205475e-07,
"loss": 1.7641,
"step": 968
},
{
"epoch": 1.78125,
"grad_norm": 0.0,
"learning_rate": 1.5302278981845136e-07,
"loss": 1.6337,
"step": 969
},
{
"epoch": 1.7830882352941178,
"grad_norm": 0.0,
"learning_rate": 1.5048779494575838e-07,
"loss": 1.6087,
"step": 970
},
{
"epoch": 1.7849264705882353,
"grad_norm": 0.0,
"learning_rate": 1.4797332164411816e-07,
"loss": 1.8162,
"step": 971
},
{
"epoch": 1.7867647058823528,
"grad_norm": 0.0,
"learning_rate": 1.454793918759373e-07,
"loss": 1.7674,
"step": 972
},
{
"epoch": 1.7886029411764706,
"grad_norm": 0.0,
"learning_rate": 1.430060274241876e-07,
"loss": 1.8653,
"step": 973
},
{
"epoch": 1.7904411764705883,
"grad_norm": 0.0,
"learning_rate": 1.4055324989221464e-07,
"loss": 1.7376,
"step": 974
},
{
"epoch": 1.7922794117647058,
"grad_norm": 0.0,
"learning_rate": 1.3812108070354908e-07,
"loss": 1.7221,
"step": 975
},
{
"epoch": 1.7941176470588234,
"grad_norm": 0.0,
"learning_rate": 1.3570954110172203e-07,
"loss": 1.9376,
"step": 976
},
{
"epoch": 1.7959558823529411,
"grad_norm": 0.0,
"learning_rate": 1.3331865215007482e-07,
"loss": 1.7548,
"step": 977
},
{
"epoch": 1.7977941176470589,
"grad_norm": 0.0,
"learning_rate": 1.309484347315812e-07,
"loss": 1.8472,
"step": 978
},
{
"epoch": 1.7996323529411766,
"grad_norm": 0.0,
"learning_rate": 1.2859890954865867e-07,
"loss": 1.6442,
"step": 979
},
{
"epoch": 1.8014705882352942,
"grad_norm": 0.0,
"learning_rate": 1.262700971229916e-07,
"loss": 1.6644,
"step": 980
},
{
"epoch": 1.8033088235294117,
"grad_norm": 0.0,
"learning_rate": 1.2396201779535154e-07,
"loss": 1.6133,
"step": 981
},
{
"epoch": 1.8051470588235294,
"grad_norm": 0.0,
"learning_rate": 1.2167469172541773e-07,
"loss": 1.7667,
"step": 982
},
{
"epoch": 1.8069852941176472,
"grad_norm": 0.0,
"learning_rate": 1.194081388916035e-07,
"loss": 1.8097,
"step": 983
},
{
"epoch": 1.8088235294117647,
"grad_norm": 0.0,
"learning_rate": 1.1716237909087991e-07,
"loss": 1.9465,
"step": 984
},
{
"epoch": 1.8106617647058822,
"grad_norm": 0.0,
"learning_rate": 1.1493743193860207e-07,
"loss": 1.8992,
"step": 985
},
{
"epoch": 1.8125,
"grad_norm": 0.0,
"learning_rate": 1.12733316868342e-07,
"loss": 1.8624,
"step": 986
},
{
"epoch": 1.8143382352941178,
"grad_norm": 0.0,
"learning_rate": 1.1055005313171413e-07,
"loss": 1.7256,
"step": 987
},
{
"epoch": 1.8161764705882353,
"grad_norm": 0.0,
"learning_rate": 1.0838765979820892e-07,
"loss": 1.7664,
"step": 988
},
{
"epoch": 1.8180147058823528,
"grad_norm": 0.0,
"learning_rate": 1.0624615575502789e-07,
"loss": 1.6467,
"step": 989
},
{
"epoch": 1.8198529411764706,
"grad_norm": 0.0,
"learning_rate": 1.0412555970691519e-07,
"loss": 1.7614,
"step": 990
},
{
"epoch": 1.8216911764705883,
"grad_norm": 0.0,
"learning_rate": 1.0202589017599878e-07,
"loss": 1.8051,
"step": 991
},
{
"epoch": 1.8235294117647058,
"grad_norm": 0.0,
"learning_rate": 9.994716550162376e-08,
"loss": 2.0208,
"step": 992
},
{
"epoch": 1.8253676470588234,
"grad_norm": 0.0,
"learning_rate": 9.788940384019591e-08,
"loss": 1.8248,
"step": 993
},
{
"epoch": 1.8272058823529411,
"grad_norm": 0.0,
"learning_rate": 9.585262316502114e-08,
"loss": 1.7762,
"step": 994
},
{
"epoch": 1.8290441176470589,
"grad_norm": 0.0,
"learning_rate": 9.383684126614945e-08,
"loss": 1.6026,
"step": 995
},
{
"epoch": 1.8308823529411766,
"grad_norm": 0.0,
"learning_rate": 9.184207575021947e-08,
"loss": 1.5231,
"step": 996
},
{
"epoch": 1.8327205882352942,
"grad_norm": 0.0,
"learning_rate": 8.986834404030364e-08,
"loss": 1.6845,
"step": 997
},
{
"epoch": 1.8345588235294117,
"grad_norm": 0.0,
"learning_rate": 8.79156633757569e-08,
"loss": 1.6766,
"step": 998
},
{
"epoch": 1.8363970588235294,
"grad_norm": 0.0,
"learning_rate": 8.598405081206712e-08,
"loss": 1.7158,
"step": 999
},
{
"epoch": 1.8382352941176472,
"grad_norm": 0.0,
"learning_rate": 8.407352322070323e-08,
"loss": 1.9128,
"step": 1000
},
{
"epoch": 1.8400735294117647,
"grad_norm": 0.0,
"learning_rate": 8.218409728897148e-08,
"loss": 1.892,
"step": 1001
},
{
"epoch": 1.8419117647058822,
"grad_norm": 0.0,
"learning_rate": 8.031578951986697e-08,
"loss": 1.699,
"step": 1002
},
{
"epoch": 1.84375,
"grad_norm": 0.0,
"learning_rate": 7.846861623192953e-08,
"loss": 1.5973,
"step": 1003
},
{
"epoch": 1.8455882352941178,
"grad_norm": 0.0,
"learning_rate": 7.66425935591042e-08,
"loss": 1.8389,
"step": 1004
},
{
"epoch": 1.8474264705882353,
"grad_norm": 0.0,
"learning_rate": 7.483773745059569e-08,
"loss": 1.7224,
"step": 1005
},
{
"epoch": 1.8492647058823528,
"grad_norm": 0.0,
"learning_rate": 7.305406367073387e-08,
"loss": 1.7114,
"step": 1006
},
{
"epoch": 1.8511029411764706,
"grad_norm": 0.0,
"learning_rate": 7.129158779883211e-08,
"loss": 1.8995,
"step": 1007
},
{
"epoch": 1.8529411764705883,
"grad_norm": 0.0,
"learning_rate": 6.955032522905331e-08,
"loss": 1.8587,
"step": 1008
},
{
"epoch": 1.8547794117647058,
"grad_norm": 0.0,
"learning_rate": 6.78302911702755e-08,
"loss": 1.934,
"step": 1009
},
{
"epoch": 1.8566176470588234,
"grad_norm": 0.0,
"learning_rate": 6.613150064595786e-08,
"loss": 1.9598,
"step": 1010
},
{
"epoch": 1.8584558823529411,
"grad_norm": 0.0,
"learning_rate": 6.445396849400987e-08,
"loss": 1.5031,
"step": 1011
},
{
"epoch": 1.8602941176470589,
"grad_norm": 0.0,
"learning_rate": 6.27977093666629e-08,
"loss": 1.8191,
"step": 1012
},
{
"epoch": 1.8621323529411766,
"grad_norm": 0.0,
"learning_rate": 6.11627377303406e-08,
"loss": 1.856,
"step": 1013
},
{
"epoch": 1.8639705882352942,
"grad_norm": 0.0,
"learning_rate": 5.954906786553361e-08,
"loss": 1.4916,
"step": 1014
},
{
"epoch": 1.8658088235294117,
"grad_norm": 0.0,
"learning_rate": 5.795671386667423e-08,
"loss": 1.7956,
"step": 1015
},
{
"epoch": 1.8676470588235294,
"grad_norm": 0.0,
"learning_rate": 5.638568964201313e-08,
"loss": 1.8146,
"step": 1016
},
{
"epoch": 1.8694852941176472,
"grad_norm": 0.0,
"learning_rate": 5.4836008913499164e-08,
"loss": 1.6043,
"step": 1017
},
{
"epoch": 1.8713235294117647,
"grad_norm": 0.0,
"learning_rate": 5.33076852166578e-08,
"loss": 1.8612,
"step": 1018
},
{
"epoch": 1.8731617647058822,
"grad_norm": 0.0,
"learning_rate": 5.1800731900474855e-08,
"loss": 1.6387,
"step": 1019
},
{
"epoch": 1.875,
"grad_norm": 0.0,
"learning_rate": 5.0315162127277384e-08,
"loss": 1.7442,
"step": 1020
},
{
"epoch": 1.8768382352941178,
"grad_norm": 0.0,
"learning_rate": 4.885098887262074e-08,
"loss": 1.9966,
"step": 1021
},
{
"epoch": 1.8786764705882353,
"grad_norm": 0.0,
"learning_rate": 4.7408224925174776e-08,
"loss": 1.7452,
"step": 1022
},
{
"epoch": 1.8805147058823528,
"grad_norm": 0.0,
"learning_rate": 4.598688288661196e-08,
"loss": 1.6622,
"step": 1023
},
{
"epoch": 1.8823529411764706,
"grad_norm": 0.0,
"learning_rate": 4.4586975171496686e-08,
"loss": 1.6588,
"step": 1024
},
{
"epoch": 1.8841911764705883,
"grad_norm": 0.0,
"learning_rate": 4.320851400717835e-08,
"loss": 1.6102,
"step": 1025
},
{
"epoch": 1.8860294117647058,
"grad_norm": 0.0,
"learning_rate": 4.18515114336826e-08,
"loss": 1.6396,
"step": 1026
},
{
"epoch": 1.8878676470588234,
"grad_norm": 0.0,
"learning_rate": 4.05159793036089e-08,
"loss": 1.7462,
"step": 1027
},
{
"epoch": 1.8897058823529411,
"grad_norm": 0.0,
"learning_rate": 3.9201929282023375e-08,
"loss": 1.7489,
"step": 1028
},
{
"epoch": 1.8915441176470589,
"grad_norm": 0.0,
"learning_rate": 3.7909372846360593e-08,
"loss": 1.8584,
"step": 1029
},
{
"epoch": 1.8933823529411766,
"grad_norm": 0.0,
"learning_rate": 3.663832128632028e-08,
"loss": 1.9724,
"step": 1030
},
{
"epoch": 1.8952205882352942,
"grad_norm": 0.0,
"learning_rate": 3.53887857037713e-08,
"loss": 1.9278,
"step": 1031
},
{
"epoch": 1.8970588235294117,
"grad_norm": 0.0,
"learning_rate": 3.416077701265203e-08,
"loss": 1.8119,
"step": 1032
},
{
"epoch": 1.8988970588235294,
"grad_norm": 0.0,
"learning_rate": 3.295430593887788e-08,
"loss": 1.7219,
"step": 1033
},
{
"epoch": 1.9007352941176472,
"grad_norm": 0.0,
"learning_rate": 3.176938302024535e-08,
"loss": 1.7584,
"step": 1034
},
{
"epoch": 1.9025735294117647,
"grad_norm": 0.0,
"learning_rate": 3.060601860634088e-08,
"loss": 1.6304,
"step": 1035
},
{
"epoch": 1.9044117647058822,
"grad_norm": 0.0,
"learning_rate": 2.9464222858450465e-08,
"loss": 1.8839,
"step": 1036
},
{
"epoch": 1.90625,
"grad_norm": 0.0,
"learning_rate": 2.834400574947077e-08,
"loss": 1.8399,
"step": 1037
},
{
"epoch": 1.9080882352941178,
"grad_norm": 0.0,
"learning_rate": 2.72453770638223e-08,
"loss": 1.656,
"step": 1038
},
{
"epoch": 1.9099264705882353,
"grad_norm": 0.0,
"learning_rate": 2.616834639736332e-08,
"loss": 1.7035,
"step": 1039
},
{
"epoch": 1.9117647058823528,
"grad_norm": 0.0,
"learning_rate": 2.5112923157306902e-08,
"loss": 1.9063,
"step": 1040
},
{
"epoch": 1.9136029411764706,
"grad_norm": 0.0,
"learning_rate": 2.4079116562137906e-08,
"loss": 1.4835,
"step": 1041
},
{
"epoch": 1.9154411764705883,
"grad_norm": 0.0,
"learning_rate": 2.3066935641533063e-08,
"loss": 1.7912,
"step": 1042
},
{
"epoch": 1.9172794117647058,
"grad_norm": 0.0,
"learning_rate": 2.207638923628158e-08,
"loss": 1.7931,
"step": 1043
},
{
"epoch": 1.9191176470588234,
"grad_norm": 0.0,
"learning_rate": 2.110748599820883e-08,
"loss": 1.7911,
"step": 1044
},
{
"epoch": 1.9209558823529411,
"grad_norm": 0.0,
"learning_rate": 2.0160234390099454e-08,
"loss": 1.8429,
"step": 1045
},
{
"epoch": 1.9227941176470589,
"grad_norm": 0.0,
"learning_rate": 1.9234642685624915e-08,
"loss": 1.4734,
"step": 1046
},
{
"epoch": 1.9246323529411766,
"grad_norm": 0.0,
"learning_rate": 1.833071896926969e-08,
"loss": 1.7346,
"step": 1047
},
{
"epoch": 1.9264705882352942,
"grad_norm": 0.0,
"learning_rate": 1.744847113626186e-08,
"loss": 1.6973,
"step": 1048
},
{
"epoch": 1.9283088235294117,
"grad_norm": 0.0,
"learning_rate": 1.658790689250428e-08,
"loss": 1.9301,
"step": 1049
},
{
"epoch": 1.9301470588235294,
"grad_norm": 0.0,
"learning_rate": 1.574903375450576e-08,
"loss": 1.7079,
"step": 1050
},
{
"epoch": 1.9319852941176472,
"grad_norm": 0.0,
"learning_rate": 1.493185904931721e-08,
"loss": 1.4348,
"step": 1051
},
{
"epoch": 1.9338235294117647,
"grad_norm": 0.0,
"learning_rate": 1.4136389914466142e-08,
"loss": 1.9293,
"step": 1052
},
{
"epoch": 1.9356617647058822,
"grad_norm": 0.0,
"learning_rate": 1.3362633297895334e-08,
"loss": 1.5467,
"step": 1053
},
{
"epoch": 1.9375,
"grad_norm": 0.0,
"learning_rate": 1.2610595957902039e-08,
"loss": 1.8591,
"step": 1054
},
{
"epoch": 1.9393382352941178,
"grad_norm": 0.0,
"learning_rate": 1.1880284463078596e-08,
"loss": 1.871,
"step": 1055
},
{
"epoch": 1.9411764705882353,
"grad_norm": 0.0,
"learning_rate": 1.1171705192254689e-08,
"loss": 1.966,
"step": 1056
},
{
"epoch": 1.9430147058823528,
"grad_norm": 0.0,
"learning_rate": 1.0484864334442956e-08,
"loss": 1.5263,
"step": 1057
},
{
"epoch": 1.9448529411764706,
"grad_norm": 0.0,
"learning_rate": 9.819767888783749e-09,
"loss": 1.6543,
"step": 1058
},
{
"epoch": 1.9466911764705883,
"grad_norm": 0.0,
"learning_rate": 9.176421664492952e-09,
"loss": 1.7169,
"step": 1059
},
{
"epoch": 1.9485294117647058,
"grad_norm": 0.0,
"learning_rate": 8.554831280811748e-09,
"loss": 1.7802,
"step": 1060
},
{
"epoch": 1.9503676470588234,
"grad_norm": 0.0,
"learning_rate": 7.95500216695666e-09,
"loss": 1.6599,
"step": 1061
},
{
"epoch": 1.9522058823529411,
"grad_norm": 0.0,
"learning_rate": 7.3769395620731934e-09,
"loss": 1.7335,
"step": 1062
},
{
"epoch": 1.9540441176470589,
"grad_norm": 0.0,
"learning_rate": 6.82064851518921e-09,
"loss": 1.8277,
"step": 1063
},
{
"epoch": 1.9558823529411766,
"grad_norm": 0.0,
"learning_rate": 6.286133885171075e-09,
"loss": 1.938,
"step": 1064
},
{
"epoch": 1.9577205882352942,
"grad_norm": 0.0,
"learning_rate": 5.773400340682023e-09,
"loss": 1.8225,
"step": 1065
},
{
"epoch": 1.9595588235294117,
"grad_norm": 0.0,
"learning_rate": 5.282452360139689e-09,
"loss": 1.7666,
"step": 1066
},
{
"epoch": 1.9613970588235294,
"grad_norm": 0.0,
"learning_rate": 4.813294231678367e-09,
"loss": 1.5706,
"step": 1067
},
{
"epoch": 1.9632352941176472,
"grad_norm": 0.0,
"learning_rate": 4.3659300531112555e-09,
"loss": 1.9548,
"step": 1068
},
{
"epoch": 1.9650735294117647,
"grad_norm": 0.0,
"learning_rate": 3.9403637318943814e-09,
"loss": 1.8425,
"step": 1069
},
{
"epoch": 1.9669117647058822,
"grad_norm": 0.0,
"learning_rate": 3.536598985092732e-09,
"loss": 1.8042,
"step": 1070
},
{
"epoch": 1.96875,
"grad_norm": 0.0,
"learning_rate": 3.154639339347787e-09,
"loss": 1.6248,
"step": 1071
},
{
"epoch": 1.9705882352941178,
"grad_norm": 0.0,
"learning_rate": 2.7944881308464288e-09,
"loss": 1.7392,
"step": 1072
},
{
"epoch": 1.9724264705882353,
"grad_norm": 0.0,
"learning_rate": 2.4561485052920774e-09,
"loss": 1.6152,
"step": 1073
},
{
"epoch": 1.9742647058823528,
"grad_norm": 0.0,
"learning_rate": 2.1396234178769347e-09,
"loss": 1.8047,
"step": 1074
},
{
"epoch": 1.9761029411764706,
"grad_norm": 0.0,
"learning_rate": 1.844915633257005e-09,
"loss": 1.8002,
"step": 1075
},
{
"epoch": 1.9779411764705883,
"grad_norm": 0.0,
"learning_rate": 1.5720277255268368e-09,
"loss": 1.874,
"step": 1076
},
{
"epoch": 1.9797794117647058,
"grad_norm": 0.0,
"learning_rate": 1.3209620781973188e-09,
"loss": 1.8465,
"step": 1077
},
{
"epoch": 1.9816176470588234,
"grad_norm": 0.0,
"learning_rate": 1.0917208841756954e-09,
"loss": 1.7619,
"step": 1078
},
{
"epoch": 1.9834558823529411,
"grad_norm": 0.0,
"learning_rate": 8.843061457450286e-10,
"loss": 1.8742,
"step": 1079
},
{
"epoch": 1.9852941176470589,
"grad_norm": 0.0,
"learning_rate": 6.98719674548376e-10,
"loss": 1.6781,
"step": 1080
},
{
"epoch": 1.9871323529411766,
"grad_norm": 0.0,
"learning_rate": 5.349630915710279e-10,
"loss": 1.6849,
"step": 1081
},
{
"epoch": 1.9889705882352942,
"grad_norm": 0.0,
"learning_rate": 3.930378271282953e-10,
"loss": 1.6883,
"step": 1082
},
{
"epoch": 1.9908088235294117,
"grad_norm": 0.0,
"learning_rate": 2.7294512085163093e-10,
"loss": 1.8295,
"step": 1083
},
{
"epoch": 1.9926470588235294,
"grad_norm": 0.0,
"learning_rate": 1.746860216783608e-10,
"loss": 1.7199,
"step": 1084
},
{
"epoch": 1.9944852941176472,
"grad_norm": 0.0,
"learning_rate": 9.82613878422467e-11,
"loss": 1.8275,
"step": 1085
},
{
"epoch": 1.9963235294117647,
"grad_norm": 0.0,
"learning_rate": 4.3671886866269995e-11,
"loss": 1.9304,
"step": 1086
},
{
"epoch": 1.9981617647058822,
"grad_norm": 0.0,
"learning_rate": 1.0917995557080397e-11,
"loss": 1.8221,
"step": 1087
},
{
"epoch": 2.0,
"grad_norm": 0.0,
"learning_rate": 0.0,
"loss": 1.4433,
"step": 1088
}
],
"logging_steps": 1,
"max_steps": 1088,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 272,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.797617676955156e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}