q-align-iqa / trainer_state.json
teowu's picture
Upload folder using huggingface_hub
982a182
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"global_step": 376,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.1764705882352942e-06,
"loss": 1.3193,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 2.3529411764705885e-06,
"loss": 1.2891,
"step": 2
},
{
"epoch": 0.02,
"learning_rate": 3.529411764705883e-06,
"loss": 1.3105,
"step": 3
},
{
"epoch": 0.02,
"learning_rate": 4.705882352941177e-06,
"loss": 1.2383,
"step": 4
},
{
"epoch": 0.03,
"learning_rate": 5.882352941176471e-06,
"loss": 0.8359,
"step": 5
},
{
"epoch": 0.03,
"learning_rate": 7.058823529411766e-06,
"loss": 0.49,
"step": 6
},
{
"epoch": 0.04,
"learning_rate": 8.23529411764706e-06,
"loss": 0.1881,
"step": 7
},
{
"epoch": 0.04,
"learning_rate": 9.411764705882354e-06,
"loss": 0.2035,
"step": 8
},
{
"epoch": 0.05,
"learning_rate": 1.0588235294117648e-05,
"loss": 0.1893,
"step": 9
},
{
"epoch": 0.05,
"learning_rate": 1.1764705882352942e-05,
"loss": 0.2448,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 1.2941176470588238e-05,
"loss": 0.1973,
"step": 11
},
{
"epoch": 0.06,
"learning_rate": 1.4117647058823532e-05,
"loss": 0.2133,
"step": 12
},
{
"epoch": 0.07,
"learning_rate": 1.5294117647058822e-05,
"loss": 0.2087,
"step": 13
},
{
"epoch": 0.07,
"learning_rate": 1.647058823529412e-05,
"loss": 0.2408,
"step": 14
},
{
"epoch": 0.08,
"learning_rate": 1.7647058823529414e-05,
"loss": 0.1686,
"step": 15
},
{
"epoch": 0.09,
"learning_rate": 1.8823529411764708e-05,
"loss": 0.1917,
"step": 16
},
{
"epoch": 0.09,
"learning_rate": 2e-05,
"loss": 0.2241,
"step": 17
},
{
"epoch": 0.1,
"learning_rate": 1.9999835072185805e-05,
"loss": 0.2195,
"step": 18
},
{
"epoch": 0.1,
"learning_rate": 1.999934029418346e-05,
"loss": 0.1847,
"step": 19
},
{
"epoch": 0.11,
"learning_rate": 1.9998515682313485e-05,
"loss": 0.1765,
"step": 20
},
{
"epoch": 0.11,
"learning_rate": 1.999736126377618e-05,
"loss": 0.1572,
"step": 21
},
{
"epoch": 0.12,
"learning_rate": 1.999587707665068e-05,
"loss": 0.1677,
"step": 22
},
{
"epoch": 0.12,
"learning_rate": 1.999406316989374e-05,
"loss": 0.1906,
"step": 23
},
{
"epoch": 0.13,
"learning_rate": 1.9991919603338088e-05,
"loss": 0.153,
"step": 24
},
{
"epoch": 0.13,
"learning_rate": 1.998944644769048e-05,
"loss": 0.1473,
"step": 25
},
{
"epoch": 0.14,
"learning_rate": 1.9986643784529346e-05,
"loss": 0.1345,
"step": 26
},
{
"epoch": 0.14,
"learning_rate": 1.9983511706302102e-05,
"loss": 0.1476,
"step": 27
},
{
"epoch": 0.15,
"learning_rate": 1.9980050316322118e-05,
"loss": 0.1219,
"step": 28
},
{
"epoch": 0.15,
"learning_rate": 1.997625972876529e-05,
"loss": 0.1336,
"step": 29
},
{
"epoch": 0.16,
"learning_rate": 1.997214006866628e-05,
"loss": 0.1133,
"step": 30
},
{
"epoch": 0.16,
"learning_rate": 1.9967691471914392e-05,
"loss": 0.1424,
"step": 31
},
{
"epoch": 0.17,
"learning_rate": 1.99629140852491e-05,
"loss": 0.123,
"step": 32
},
{
"epoch": 0.18,
"learning_rate": 1.9957808066255187e-05,
"loss": 0.1199,
"step": 33
},
{
"epoch": 0.18,
"learning_rate": 1.9952373583357566e-05,
"loss": 0.1308,
"step": 34
},
{
"epoch": 0.19,
"learning_rate": 1.994661081581571e-05,
"loss": 0.1172,
"step": 35
},
{
"epoch": 0.19,
"learning_rate": 1.9940519953717762e-05,
"loss": 0.1299,
"step": 36
},
{
"epoch": 0.2,
"learning_rate": 1.993410119797422e-05,
"loss": 0.1358,
"step": 37
},
{
"epoch": 0.2,
"learning_rate": 1.9927354760311365e-05,
"loss": 0.121,
"step": 38
},
{
"epoch": 0.21,
"learning_rate": 1.992028086326424e-05,
"loss": 0.1273,
"step": 39
},
{
"epoch": 0.21,
"learning_rate": 1.991287974016932e-05,
"loss": 0.1215,
"step": 40
},
{
"epoch": 0.22,
"learning_rate": 1.9905151635156813e-05,
"loss": 0.1239,
"step": 41
},
{
"epoch": 0.22,
"learning_rate": 1.9897096803142616e-05,
"loss": 0.1178,
"step": 42
},
{
"epoch": 0.23,
"learning_rate": 1.988871550981989e-05,
"loss": 0.1135,
"step": 43
},
{
"epoch": 0.23,
"learning_rate": 1.988000803165032e-05,
"loss": 0.1103,
"step": 44
},
{
"epoch": 0.24,
"learning_rate": 1.9870974655854974e-05,
"loss": 0.124,
"step": 45
},
{
"epoch": 0.24,
"learning_rate": 1.9861615680404833e-05,
"loss": 0.1031,
"step": 46
},
{
"epoch": 0.25,
"learning_rate": 1.985193141401097e-05,
"loss": 0.1032,
"step": 47
},
{
"epoch": 0.26,
"learning_rate": 1.9841922176114366e-05,
"loss": 0.1151,
"step": 48
},
{
"epoch": 0.26,
"learning_rate": 1.9831588296875367e-05,
"loss": 0.1061,
"step": 49
},
{
"epoch": 0.27,
"learning_rate": 1.982093011716279e-05,
"loss": 0.1019,
"step": 50
},
{
"epoch": 0.27,
"learning_rate": 1.9809947988542696e-05,
"loss": 0.1004,
"step": 51
},
{
"epoch": 0.28,
"learning_rate": 1.979864227326678e-05,
"loss": 0.0971,
"step": 52
},
{
"epoch": 0.28,
"learning_rate": 1.9787013344260422e-05,
"loss": 0.1137,
"step": 53
},
{
"epoch": 0.29,
"learning_rate": 1.9775061585110387e-05,
"loss": 0.1052,
"step": 54
},
{
"epoch": 0.29,
"learning_rate": 1.976278739005218e-05,
"loss": 0.1034,
"step": 55
},
{
"epoch": 0.3,
"learning_rate": 1.9750191163957042e-05,
"loss": 0.0911,
"step": 56
},
{
"epoch": 0.3,
"learning_rate": 1.9737273322318565e-05,
"loss": 0.1107,
"step": 57
},
{
"epoch": 0.31,
"learning_rate": 1.972403429123904e-05,
"loss": 0.1055,
"step": 58
},
{
"epoch": 0.31,
"learning_rate": 1.971047450741535e-05,
"loss": 0.106,
"step": 59
},
{
"epoch": 0.32,
"learning_rate": 1.9696594418124598e-05,
"loss": 0.0934,
"step": 60
},
{
"epoch": 0.32,
"learning_rate": 1.9682394481209338e-05,
"loss": 0.0986,
"step": 61
},
{
"epoch": 0.33,
"learning_rate": 1.966787516506249e-05,
"loss": 0.1112,
"step": 62
},
{
"epoch": 0.34,
"learning_rate": 1.9653036948611864e-05,
"loss": 0.1075,
"step": 63
},
{
"epoch": 0.34,
"learning_rate": 1.9637880321304387e-05,
"loss": 0.1089,
"step": 64
},
{
"epoch": 0.35,
"learning_rate": 1.962240578308993e-05,
"loss": 0.0972,
"step": 65
},
{
"epoch": 0.35,
"learning_rate": 1.9606613844404853e-05,
"loss": 0.0898,
"step": 66
},
{
"epoch": 0.36,
"learning_rate": 1.9590505026155146e-05,
"loss": 0.088,
"step": 67
},
{
"epoch": 0.36,
"learning_rate": 1.9574079859699236e-05,
"loss": 0.1041,
"step": 68
},
{
"epoch": 0.37,
"learning_rate": 1.955733888683049e-05,
"loss": 0.1028,
"step": 69
},
{
"epoch": 0.37,
"learning_rate": 1.9540282659759317e-05,
"loss": 0.0876,
"step": 70
},
{
"epoch": 0.38,
"learning_rate": 1.9522911741094966e-05,
"loss": 0.088,
"step": 71
},
{
"epoch": 0.38,
"learning_rate": 1.9505226703826973e-05,
"loss": 0.0905,
"step": 72
},
{
"epoch": 0.39,
"learning_rate": 1.948722813130624e-05,
"loss": 0.0945,
"step": 73
},
{
"epoch": 0.39,
"learning_rate": 1.9468916617225814e-05,
"loss": 0.0978,
"step": 74
},
{
"epoch": 0.4,
"learning_rate": 1.9450292765601287e-05,
"loss": 0.1132,
"step": 75
},
{
"epoch": 0.4,
"learning_rate": 1.94313571907509e-05,
"loss": 0.0826,
"step": 76
},
{
"epoch": 0.41,
"learning_rate": 1.941211051727524e-05,
"loss": 0.1053,
"step": 77
},
{
"epoch": 0.41,
"learning_rate": 1.939255338003666e-05,
"loss": 0.1049,
"step": 78
},
{
"epoch": 0.42,
"learning_rate": 1.937268642413835e-05,
"loss": 0.1079,
"step": 79
},
{
"epoch": 0.43,
"learning_rate": 1.9352510304903017e-05,
"loss": 0.1078,
"step": 80
},
{
"epoch": 0.43,
"learning_rate": 1.9332025687851325e-05,
"loss": 0.0937,
"step": 81
},
{
"epoch": 0.44,
"learning_rate": 1.931123324867989e-05,
"loss": 0.0822,
"step": 82
},
{
"epoch": 0.44,
"learning_rate": 1.929013367323902e-05,
"loss": 0.1162,
"step": 83
},
{
"epoch": 0.45,
"learning_rate": 1.926872765751009e-05,
"loss": 0.0896,
"step": 84
},
{
"epoch": 0.45,
"learning_rate": 1.9247015907582574e-05,
"loss": 0.1021,
"step": 85
},
{
"epoch": 0.46,
"learning_rate": 1.9224999139630766e-05,
"loss": 0.1036,
"step": 86
},
{
"epoch": 0.46,
"learning_rate": 1.920267807989015e-05,
"loss": 0.0952,
"step": 87
},
{
"epoch": 0.47,
"learning_rate": 1.918005346463344e-05,
"loss": 0.0869,
"step": 88
},
{
"epoch": 0.47,
"learning_rate": 1.9157126040146307e-05,
"loss": 0.0923,
"step": 89
},
{
"epoch": 0.48,
"learning_rate": 1.9133896562702746e-05,
"loss": 0.0983,
"step": 90
},
{
"epoch": 0.48,
"learning_rate": 1.911036579854016e-05,
"loss": 0.1044,
"step": 91
},
{
"epoch": 0.49,
"learning_rate": 1.9086534523834032e-05,
"loss": 0.0901,
"step": 92
},
{
"epoch": 0.49,
"learning_rate": 1.906240352467238e-05,
"loss": 0.0774,
"step": 93
},
{
"epoch": 0.5,
"learning_rate": 1.9037973597029796e-05,
"loss": 0.0863,
"step": 94
},
{
"epoch": 0.51,
"learning_rate": 1.901324554674119e-05,
"loss": 0.0906,
"step": 95
},
{
"epoch": 0.51,
"learning_rate": 1.8988220189475216e-05,
"loss": 0.11,
"step": 96
},
{
"epoch": 0.52,
"learning_rate": 1.896289835070737e-05,
"loss": 0.1056,
"step": 97
},
{
"epoch": 0.52,
"learning_rate": 1.893728086569276e-05,
"loss": 0.0897,
"step": 98
},
{
"epoch": 0.53,
"learning_rate": 1.891136857943854e-05,
"loss": 0.0954,
"step": 99
},
{
"epoch": 0.53,
"learning_rate": 1.8885162346676063e-05,
"loss": 0.095,
"step": 100
},
{
"epoch": 0.54,
"learning_rate": 1.8858663031832665e-05,
"loss": 0.0786,
"step": 101
},
{
"epoch": 0.54,
"learning_rate": 1.8831871509003164e-05,
"loss": 0.0933,
"step": 102
},
{
"epoch": 0.55,
"learning_rate": 1.8804788661921012e-05,
"loss": 0.0833,
"step": 103
},
{
"epoch": 0.55,
"learning_rate": 1.877741538392917e-05,
"loss": 0.0784,
"step": 104
},
{
"epoch": 0.56,
"learning_rate": 1.8749752577950614e-05,
"loss": 0.0851,
"step": 105
},
{
"epoch": 0.56,
"learning_rate": 1.8721801156458573e-05,
"loss": 0.0891,
"step": 106
},
{
"epoch": 0.57,
"learning_rate": 1.869356204144642e-05,
"loss": 0.0812,
"step": 107
},
{
"epoch": 0.57,
"learning_rate": 1.866503616439725e-05,
"loss": 0.1109,
"step": 108
},
{
"epoch": 0.58,
"learning_rate": 1.8636224466253177e-05,
"loss": 0.0948,
"step": 109
},
{
"epoch": 0.59,
"learning_rate": 1.860712789738428e-05,
"loss": 0.0933,
"step": 110
},
{
"epoch": 0.59,
"learning_rate": 1.857774741755726e-05,
"loss": 0.0872,
"step": 111
},
{
"epoch": 0.6,
"learning_rate": 1.854808399590378e-05,
"loss": 0.0995,
"step": 112
},
{
"epoch": 0.6,
"learning_rate": 1.8518138610888505e-05,
"loss": 0.0862,
"step": 113
},
{
"epoch": 0.61,
"learning_rate": 1.8487912250276805e-05,
"loss": 0.0716,
"step": 114
},
{
"epoch": 0.61,
"learning_rate": 1.8457405911102202e-05,
"loss": 0.0977,
"step": 115
},
{
"epoch": 0.62,
"learning_rate": 1.8426620599633464e-05,
"loss": 0.0782,
"step": 116
},
{
"epoch": 0.62,
"learning_rate": 1.8395557331341413e-05,
"loss": 0.0891,
"step": 117
},
{
"epoch": 0.63,
"learning_rate": 1.836421713086544e-05,
"loss": 0.096,
"step": 118
},
{
"epoch": 0.63,
"learning_rate": 1.83326010319797e-05,
"loss": 0.0912,
"step": 119
},
{
"epoch": 0.64,
"learning_rate": 1.830071007755901e-05,
"loss": 0.0807,
"step": 120
},
{
"epoch": 0.64,
"learning_rate": 1.8268545319544443e-05,
"loss": 0.092,
"step": 121
},
{
"epoch": 0.65,
"learning_rate": 1.823610781890865e-05,
"loss": 0.0931,
"step": 122
},
{
"epoch": 0.65,
"learning_rate": 1.820339864562085e-05,
"loss": 0.0859,
"step": 123
},
{
"epoch": 0.66,
"learning_rate": 1.817041887861153e-05,
"loss": 0.089,
"step": 124
},
{
"epoch": 0.66,
"learning_rate": 1.8137169605736867e-05,
"loss": 0.0991,
"step": 125
},
{
"epoch": 0.67,
"learning_rate": 1.8103651923742846e-05,
"loss": 0.0741,
"step": 126
},
{
"epoch": 0.68,
"learning_rate": 1.8069866938229066e-05,
"loss": 0.087,
"step": 127
},
{
"epoch": 0.68,
"learning_rate": 1.8035815763612293e-05,
"loss": 0.0969,
"step": 128
},
{
"epoch": 0.69,
"learning_rate": 1.8001499523089683e-05,
"loss": 0.071,
"step": 129
},
{
"epoch": 0.69,
"learning_rate": 1.7966919348601754e-05,
"loss": 0.0804,
"step": 130
},
{
"epoch": 0.7,
"learning_rate": 1.7932076380795017e-05,
"loss": 0.0895,
"step": 131
},
{
"epoch": 0.7,
"learning_rate": 1.7896971768984373e-05,
"loss": 0.085,
"step": 132
},
{
"epoch": 0.71,
"learning_rate": 1.7861606671115207e-05,
"loss": 0.1048,
"step": 133
},
{
"epoch": 0.71,
"learning_rate": 1.7825982253725175e-05,
"loss": 0.0858,
"step": 134
},
{
"epoch": 0.72,
"learning_rate": 1.7790099691905736e-05,
"loss": 0.1055,
"step": 135
},
{
"epoch": 0.72,
"learning_rate": 1.7753960169263387e-05,
"loss": 0.0835,
"step": 136
},
{
"epoch": 0.73,
"learning_rate": 1.7717564877880623e-05,
"loss": 0.0967,
"step": 137
},
{
"epoch": 0.73,
"learning_rate": 1.7680915018276613e-05,
"loss": 0.0972,
"step": 138
},
{
"epoch": 0.74,
"learning_rate": 1.764401179936761e-05,
"loss": 0.0823,
"step": 139
},
{
"epoch": 0.74,
"learning_rate": 1.7606856438427054e-05,
"loss": 0.0803,
"step": 140
},
{
"epoch": 0.75,
"learning_rate": 1.7569450161045444e-05,
"loss": 0.0926,
"step": 141
},
{
"epoch": 0.76,
"learning_rate": 1.7531794201089888e-05,
"loss": 0.11,
"step": 142
},
{
"epoch": 0.76,
"learning_rate": 1.749388980066342e-05,
"loss": 0.0889,
"step": 143
},
{
"epoch": 0.77,
"learning_rate": 1.745573821006403e-05,
"loss": 0.093,
"step": 144
},
{
"epoch": 0.77,
"learning_rate": 1.7417340687743393e-05,
"loss": 0.0857,
"step": 145
},
{
"epoch": 0.78,
"learning_rate": 1.7378698500265402e-05,
"loss": 0.0871,
"step": 146
},
{
"epoch": 0.78,
"learning_rate": 1.7339812922264366e-05,
"loss": 0.0911,
"step": 147
},
{
"epoch": 0.79,
"learning_rate": 1.730068523640295e-05,
"loss": 0.0938,
"step": 148
},
{
"epoch": 0.79,
"learning_rate": 1.72613167333299e-05,
"loss": 0.0883,
"step": 149
},
{
"epoch": 0.8,
"learning_rate": 1.7221708711637455e-05,
"loss": 0.0897,
"step": 150
},
{
"epoch": 0.8,
"learning_rate": 1.718186247781849e-05,
"loss": 0.0814,
"step": 151
},
{
"epoch": 0.81,
"learning_rate": 1.7141779346223465e-05,
"loss": 0.0793,
"step": 152
},
{
"epoch": 0.81,
"learning_rate": 1.7101460639017034e-05,
"loss": 0.0717,
"step": 153
},
{
"epoch": 0.82,
"learning_rate": 1.7060907686134445e-05,
"loss": 0.0847,
"step": 154
},
{
"epoch": 0.82,
"learning_rate": 1.7020121825237672e-05,
"loss": 0.0798,
"step": 155
},
{
"epoch": 0.83,
"learning_rate": 1.6979104401671296e-05,
"loss": 0.0865,
"step": 156
},
{
"epoch": 0.84,
"learning_rate": 1.693785676841812e-05,
"loss": 0.0914,
"step": 157
},
{
"epoch": 0.84,
"learning_rate": 1.6896380286054537e-05,
"loss": 0.0797,
"step": 158
},
{
"epoch": 0.85,
"learning_rate": 1.6854676322705673e-05,
"loss": 0.0736,
"step": 159
},
{
"epoch": 0.85,
"learning_rate": 1.6812746254000222e-05,
"loss": 0.0765,
"step": 160
},
{
"epoch": 0.86,
"learning_rate": 1.67705914630251e-05,
"loss": 0.0843,
"step": 161
},
{
"epoch": 0.86,
"learning_rate": 1.6728213340279822e-05,
"loss": 0.0772,
"step": 162
},
{
"epoch": 0.87,
"learning_rate": 1.668561328363061e-05,
"loss": 0.0944,
"step": 163
},
{
"epoch": 0.87,
"learning_rate": 1.6642792698264313e-05,
"loss": 0.0915,
"step": 164
},
{
"epoch": 0.88,
"learning_rate": 1.6599752996642044e-05,
"loss": 0.0805,
"step": 165
},
{
"epoch": 0.88,
"learning_rate": 1.655649559845258e-05,
"loss": 0.0925,
"step": 166
},
{
"epoch": 0.89,
"learning_rate": 1.651302193056555e-05,
"loss": 0.0811,
"step": 167
},
{
"epoch": 0.89,
"learning_rate": 1.6469333426984357e-05,
"loss": 0.0787,
"step": 168
},
{
"epoch": 0.9,
"learning_rate": 1.6425431528798883e-05,
"loss": 0.0858,
"step": 169
},
{
"epoch": 0.9,
"learning_rate": 1.6381317684137946e-05,
"loss": 0.0768,
"step": 170
},
{
"epoch": 0.91,
"learning_rate": 1.6336993348121543e-05,
"loss": 0.0818,
"step": 171
},
{
"epoch": 0.91,
"learning_rate": 1.6292459982812845e-05,
"loss": 0.0868,
"step": 172
},
{
"epoch": 0.92,
"learning_rate": 1.624771905716997e-05,
"loss": 0.0828,
"step": 173
},
{
"epoch": 0.93,
"learning_rate": 1.620277204699754e-05,
"loss": 0.0821,
"step": 174
},
{
"epoch": 0.93,
"learning_rate": 1.615762043489797e-05,
"loss": 0.0858,
"step": 175
},
{
"epoch": 0.94,
"learning_rate": 1.611226571022261e-05,
"loss": 0.0711,
"step": 176
},
{
"epoch": 0.94,
"learning_rate": 1.6066709369022576e-05,
"loss": 0.0771,
"step": 177
},
{
"epoch": 0.95,
"learning_rate": 1.6020952913999423e-05,
"loss": 0.0788,
"step": 178
},
{
"epoch": 0.95,
"learning_rate": 1.5974997854455575e-05,
"loss": 0.0716,
"step": 179
},
{
"epoch": 0.96,
"learning_rate": 1.5928845706244537e-05,
"loss": 0.0894,
"step": 180
},
{
"epoch": 0.96,
"learning_rate": 1.588249799172089e-05,
"loss": 0.085,
"step": 181
},
{
"epoch": 0.97,
"learning_rate": 1.583595623969009e-05,
"loss": 0.0832,
"step": 182
},
{
"epoch": 0.97,
"learning_rate": 1.5789221985358017e-05,
"loss": 0.0787,
"step": 183
},
{
"epoch": 0.98,
"learning_rate": 1.574229677028036e-05,
"loss": 0.0792,
"step": 184
},
{
"epoch": 0.98,
"learning_rate": 1.5695182142311743e-05,
"loss": 0.0798,
"step": 185
},
{
"epoch": 0.99,
"learning_rate": 1.564787965555469e-05,
"loss": 0.0978,
"step": 186
},
{
"epoch": 0.99,
"learning_rate": 1.560039087030836e-05,
"loss": 0.0909,
"step": 187
},
{
"epoch": 1.0,
"learning_rate": 1.5552717353017045e-05,
"loss": 0.0749,
"step": 188
},
{
"epoch": 1.01,
"learning_rate": 1.5504860676218557e-05,
"loss": 0.0788,
"step": 189
},
{
"epoch": 1.01,
"learning_rate": 1.5456822418492312e-05,
"loss": 0.07,
"step": 190
},
{
"epoch": 1.02,
"learning_rate": 1.540860416440728e-05,
"loss": 0.073,
"step": 191
},
{
"epoch": 1.02,
"learning_rate": 1.5360207504469715e-05,
"loss": 0.0621,
"step": 192
},
{
"epoch": 1.03,
"learning_rate": 1.5311634035070678e-05,
"loss": 0.0738,
"step": 193
},
{
"epoch": 1.03,
"learning_rate": 1.5262885358433404e-05,
"loss": 0.0698,
"step": 194
},
{
"epoch": 1.04,
"learning_rate": 1.5213963082560424e-05,
"loss": 0.0666,
"step": 195
},
{
"epoch": 1.04,
"learning_rate": 1.5164868821180538e-05,
"loss": 0.0663,
"step": 196
},
{
"epoch": 1.05,
"learning_rate": 1.5115604193695599e-05,
"loss": 0.0686,
"step": 197
},
{
"epoch": 1.05,
"learning_rate": 1.5066170825127069e-05,
"loss": 0.0612,
"step": 198
},
{
"epoch": 1.06,
"learning_rate": 1.5016570346062432e-05,
"loss": 0.0657,
"step": 199
},
{
"epoch": 1.06,
"learning_rate": 1.496680439260141e-05,
"loss": 0.0626,
"step": 200
},
{
"epoch": 1.07,
"learning_rate": 1.4916874606301989e-05,
"loss": 0.068,
"step": 201
},
{
"epoch": 1.07,
"learning_rate": 1.4866782634126266e-05,
"loss": 0.0637,
"step": 202
},
{
"epoch": 1.08,
"learning_rate": 1.4816530128386144e-05,
"loss": 0.0698,
"step": 203
},
{
"epoch": 1.09,
"learning_rate": 1.4766118746688805e-05,
"loss": 0.0714,
"step": 204
},
{
"epoch": 1.09,
"learning_rate": 1.471555015188205e-05,
"loss": 0.061,
"step": 205
},
{
"epoch": 1.1,
"learning_rate": 1.4664826011999436e-05,
"loss": 0.0645,
"step": 206
},
{
"epoch": 1.1,
"learning_rate": 1.4613948000205272e-05,
"loss": 0.0714,
"step": 207
},
{
"epoch": 1.11,
"learning_rate": 1.4562917794739412e-05,
"loss": 0.0583,
"step": 208
},
{
"epoch": 1.11,
"learning_rate": 1.4511737078861903e-05,
"loss": 0.0609,
"step": 209
},
{
"epoch": 1.12,
"learning_rate": 1.4460407540797467e-05,
"loss": 0.0695,
"step": 210
},
{
"epoch": 1.12,
"learning_rate": 1.4408930873679805e-05,
"loss": 0.0824,
"step": 211
},
{
"epoch": 1.13,
"learning_rate": 1.4357308775495757e-05,
"loss": 0.0541,
"step": 212
},
{
"epoch": 1.13,
"learning_rate": 1.4305542949029286e-05,
"loss": 0.0706,
"step": 213
},
{
"epoch": 1.14,
"learning_rate": 1.4253635101805313e-05,
"loss": 0.0812,
"step": 214
},
{
"epoch": 1.14,
"learning_rate": 1.4201586946033397e-05,
"loss": 0.0605,
"step": 215
},
{
"epoch": 1.15,
"learning_rate": 1.4149400198551247e-05,
"loss": 0.0674,
"step": 216
},
{
"epoch": 1.15,
"learning_rate": 1.4097076580768103e-05,
"loss": 0.0709,
"step": 217
},
{
"epoch": 1.16,
"learning_rate": 1.4044617818607949e-05,
"loss": 0.0727,
"step": 218
},
{
"epoch": 1.16,
"learning_rate": 1.3992025642452579e-05,
"loss": 0.0734,
"step": 219
},
{
"epoch": 1.17,
"learning_rate": 1.3939301787084522e-05,
"loss": 0.0593,
"step": 220
},
{
"epoch": 1.18,
"learning_rate": 1.3886447991629828e-05,
"loss": 0.0762,
"step": 221
},
{
"epoch": 1.18,
"learning_rate": 1.3833465999500689e-05,
"loss": 0.0668,
"step": 222
},
{
"epoch": 1.19,
"learning_rate": 1.3780357558337927e-05,
"loss": 0.0658,
"step": 223
},
{
"epoch": 1.19,
"learning_rate": 1.372712441995337e-05,
"loss": 0.0823,
"step": 224
},
{
"epoch": 1.2,
"learning_rate": 1.3673768340272053e-05,
"loss": 0.066,
"step": 225
},
{
"epoch": 1.2,
"learning_rate": 1.362029107927429e-05,
"loss": 0.0658,
"step": 226
},
{
"epoch": 1.21,
"learning_rate": 1.3566694400937635e-05,
"loss": 0.0618,
"step": 227
},
{
"epoch": 1.21,
"learning_rate": 1.3512980073178693e-05,
"loss": 0.0536,
"step": 228
},
{
"epoch": 1.22,
"learning_rate": 1.3459149867794794e-05,
"loss": 0.0676,
"step": 229
},
{
"epoch": 1.22,
"learning_rate": 1.3405205560405558e-05,
"loss": 0.0597,
"step": 230
},
{
"epoch": 1.23,
"learning_rate": 1.3351148930394333e-05,
"loss": 0.0618,
"step": 231
},
{
"epoch": 1.23,
"learning_rate": 1.329698176084948e-05,
"loss": 0.0642,
"step": 232
},
{
"epoch": 1.24,
"learning_rate": 1.3242705838505577e-05,
"loss": 0.0508,
"step": 233
},
{
"epoch": 1.24,
"learning_rate": 1.3188322953684467e-05,
"loss": 0.0688,
"step": 234
},
{
"epoch": 1.25,
"learning_rate": 1.3133834900236217e-05,
"loss": 0.0589,
"step": 235
},
{
"epoch": 1.26,
"learning_rate": 1.3079243475479942e-05,
"loss": 0.0569,
"step": 236
},
{
"epoch": 1.26,
"learning_rate": 1.3024550480144506e-05,
"loss": 0.0825,
"step": 237
},
{
"epoch": 1.27,
"learning_rate": 1.296975771830915e-05,
"loss": 0.0652,
"step": 238
},
{
"epoch": 1.27,
"learning_rate": 1.2914866997343957e-05,
"loss": 0.0654,
"step": 239
},
{
"epoch": 1.28,
"learning_rate": 1.2859880127850258e-05,
"loss": 0.0629,
"step": 240
},
{
"epoch": 1.28,
"learning_rate": 1.2804798923600888e-05,
"loss": 0.0704,
"step": 241
},
{
"epoch": 1.29,
"learning_rate": 1.2749625201480375e-05,
"loss": 0.0688,
"step": 242
},
{
"epoch": 1.29,
"learning_rate": 1.2694360781424994e-05,
"loss": 0.0623,
"step": 243
},
{
"epoch": 1.3,
"learning_rate": 1.2639007486362745e-05,
"loss": 0.0948,
"step": 244
},
{
"epoch": 1.3,
"learning_rate": 1.2583567142153224e-05,
"loss": 0.0744,
"step": 245
},
{
"epoch": 1.31,
"learning_rate": 1.2528041577527384e-05,
"loss": 0.0526,
"step": 246
},
{
"epoch": 1.31,
"learning_rate": 1.2472432624027228e-05,
"loss": 0.0624,
"step": 247
},
{
"epoch": 1.32,
"learning_rate": 1.2416742115945391e-05,
"loss": 0.0732,
"step": 248
},
{
"epoch": 1.32,
"learning_rate": 1.2360971890264621e-05,
"loss": 0.0645,
"step": 249
},
{
"epoch": 1.33,
"learning_rate": 1.2305123786597202e-05,
"loss": 0.0678,
"step": 250
},
{
"epoch": 1.34,
"learning_rate": 1.224919964712427e-05,
"loss": 0.0802,
"step": 251
},
{
"epoch": 1.34,
"learning_rate": 1.219320131653504e-05,
"loss": 0.0568,
"step": 252
},
{
"epoch": 1.35,
"learning_rate": 1.2137130641965964e-05,
"loss": 0.0526,
"step": 253
},
{
"epoch": 1.35,
"learning_rate": 1.20809894729398e-05,
"loss": 0.0632,
"step": 254
},
{
"epoch": 1.36,
"learning_rate": 1.2024779661304614e-05,
"loss": 0.0667,
"step": 255
},
{
"epoch": 1.36,
"learning_rate": 1.1968503061172674e-05,
"loss": 0.0727,
"step": 256
},
{
"epoch": 1.37,
"learning_rate": 1.1912161528859308e-05,
"loss": 0.0641,
"step": 257
},
{
"epoch": 1.37,
"learning_rate": 1.1855756922821675e-05,
"loss": 0.0522,
"step": 258
},
{
"epoch": 1.38,
"learning_rate": 1.179929110359745e-05,
"loss": 0.0599,
"step": 259
},
{
"epoch": 1.38,
"learning_rate": 1.1742765933743459e-05,
"loss": 0.0681,
"step": 260
},
{
"epoch": 1.39,
"learning_rate": 1.168618327777425e-05,
"loss": 0.0576,
"step": 261
},
{
"epoch": 1.39,
"learning_rate": 1.1629545002100573e-05,
"loss": 0.0647,
"step": 262
},
{
"epoch": 1.4,
"learning_rate": 1.157285297496783e-05,
"loss": 0.0646,
"step": 263
},
{
"epoch": 1.4,
"learning_rate": 1.1516109066394445e-05,
"loss": 0.0587,
"step": 264
},
{
"epoch": 1.41,
"learning_rate": 1.1459315148110179e-05,
"loss": 0.064,
"step": 265
},
{
"epoch": 1.41,
"learning_rate": 1.1402473093494395e-05,
"loss": 0.0677,
"step": 266
},
{
"epoch": 1.42,
"learning_rate": 1.1345584777514253e-05,
"loss": 0.0599,
"step": 267
},
{
"epoch": 1.43,
"learning_rate": 1.1288652076662878e-05,
"loss": 0.0674,
"step": 268
},
{
"epoch": 1.43,
"learning_rate": 1.1231676868897452e-05,
"loss": 0.0716,
"step": 269
},
{
"epoch": 1.44,
"learning_rate": 1.1174661033577267e-05,
"loss": 0.0797,
"step": 270
},
{
"epoch": 1.44,
"learning_rate": 1.1117606451401745e-05,
"loss": 0.0577,
"step": 271
},
{
"epoch": 1.45,
"learning_rate": 1.1060515004348394e-05,
"loss": 0.0643,
"step": 272
},
{
"epoch": 1.45,
"learning_rate": 1.1003388575610724e-05,
"loss": 0.0735,
"step": 273
},
{
"epoch": 1.46,
"learning_rate": 1.0946229049536136e-05,
"loss": 0.0619,
"step": 274
},
{
"epoch": 1.46,
"learning_rate": 1.088903831156378e-05,
"loss": 0.0667,
"step": 275
},
{
"epoch": 1.47,
"learning_rate": 1.0831818248162328e-05,
"loss": 0.0721,
"step": 276
},
{
"epoch": 1.47,
"learning_rate": 1.0774570746767785e-05,
"loss": 0.0646,
"step": 277
},
{
"epoch": 1.48,
"learning_rate": 1.0717297695721199e-05,
"loss": 0.0604,
"step": 278
},
{
"epoch": 1.48,
"learning_rate": 1.0660000984206395e-05,
"loss": 0.0623,
"step": 279
},
{
"epoch": 1.49,
"learning_rate": 1.0602682502187655e-05,
"loss": 0.0648,
"step": 280
},
{
"epoch": 1.49,
"learning_rate": 1.0545344140347365e-05,
"loss": 0.0605,
"step": 281
},
{
"epoch": 1.5,
"learning_rate": 1.0487987790023665e-05,
"loss": 0.0667,
"step": 282
},
{
"epoch": 1.51,
"learning_rate": 1.0430615343148054e-05,
"loss": 0.0638,
"step": 283
},
{
"epoch": 1.51,
"learning_rate": 1.0373228692182982e-05,
"loss": 0.0598,
"step": 284
},
{
"epoch": 1.52,
"learning_rate": 1.031582973005943e-05,
"loss": 0.0805,
"step": 285
},
{
"epoch": 1.52,
"learning_rate": 1.0258420350114473e-05,
"loss": 0.0682,
"step": 286
},
{
"epoch": 1.53,
"learning_rate": 1.0201002446028815e-05,
"loss": 0.059,
"step": 287
},
{
"epoch": 1.53,
"learning_rate": 1.0143577911764341e-05,
"loss": 0.0558,
"step": 288
},
{
"epoch": 1.54,
"learning_rate": 1.008614864150164e-05,
"loss": 0.0685,
"step": 289
},
{
"epoch": 1.54,
"learning_rate": 1.002871652957751e-05,
"loss": 0.0539,
"step": 290
},
{
"epoch": 1.55,
"learning_rate": 9.97128347042249e-06,
"loss": 0.0588,
"step": 291
},
{
"epoch": 1.55,
"learning_rate": 9.91385135849836e-06,
"loss": 0.0513,
"step": 292
},
{
"epoch": 1.56,
"learning_rate": 9.85642208823566e-06,
"loss": 0.0616,
"step": 293
},
{
"epoch": 1.56,
"learning_rate": 9.79899755397119e-06,
"loss": 0.0588,
"step": 294
},
{
"epoch": 1.57,
"learning_rate": 9.741579649885532e-06,
"loss": 0.0645,
"step": 295
},
{
"epoch": 1.57,
"learning_rate": 9.684170269940573e-06,
"loss": 0.0644,
"step": 296
},
{
"epoch": 1.58,
"learning_rate": 9.62677130781702e-06,
"loss": 0.0638,
"step": 297
},
{
"epoch": 1.59,
"learning_rate": 9.569384656851948e-06,
"loss": 0.085,
"step": 298
},
{
"epoch": 1.59,
"learning_rate": 9.512012209976335e-06,
"loss": 0.0567,
"step": 299
},
{
"epoch": 1.6,
"learning_rate": 9.454655859652637e-06,
"loss": 0.0639,
"step": 300
},
{
"epoch": 1.6,
"learning_rate": 9.39731749781235e-06,
"loss": 0.073,
"step": 301
},
{
"epoch": 1.61,
"learning_rate": 9.339999015793606e-06,
"loss": 0.0703,
"step": 302
},
{
"epoch": 1.61,
"learning_rate": 9.282702304278806e-06,
"loss": 0.0711,
"step": 303
},
{
"epoch": 1.62,
"learning_rate": 9.225429253232218e-06,
"loss": 0.0641,
"step": 304
},
{
"epoch": 1.62,
"learning_rate": 9.168181751837673e-06,
"loss": 0.0591,
"step": 305
},
{
"epoch": 1.63,
"learning_rate": 9.110961688436222e-06,
"loss": 0.0684,
"step": 306
},
{
"epoch": 1.63,
"learning_rate": 9.053770950463865e-06,
"loss": 0.0775,
"step": 307
},
{
"epoch": 1.64,
"learning_rate": 8.996611424389283e-06,
"loss": 0.0688,
"step": 308
},
{
"epoch": 1.64,
"learning_rate": 8.93948499565161e-06,
"loss": 0.0785,
"step": 309
},
{
"epoch": 1.65,
"learning_rate": 8.882393548598258e-06,
"loss": 0.0737,
"step": 310
},
{
"epoch": 1.65,
"learning_rate": 8.825338966422735e-06,
"loss": 0.062,
"step": 311
},
{
"epoch": 1.66,
"learning_rate": 8.768323131102552e-06,
"loss": 0.0685,
"step": 312
},
{
"epoch": 1.66,
"learning_rate": 8.711347923337122e-06,
"loss": 0.0618,
"step": 313
},
{
"epoch": 1.67,
"learning_rate": 8.65441522248575e-06,
"loss": 0.0728,
"step": 314
},
{
"epoch": 1.68,
"learning_rate": 8.59752690650561e-06,
"loss": 0.0562,
"step": 315
},
{
"epoch": 1.68,
"learning_rate": 8.540684851889823e-06,
"loss": 0.0772,
"step": 316
},
{
"epoch": 1.69,
"learning_rate": 8.483890933605558e-06,
"loss": 0.0711,
"step": 317
},
{
"epoch": 1.69,
"learning_rate": 8.427147025032171e-06,
"loss": 0.0579,
"step": 318
},
{
"epoch": 1.7,
"learning_rate": 8.37045499789943e-06,
"loss": 0.0607,
"step": 319
},
{
"epoch": 1.7,
"learning_rate": 8.313816722225751e-06,
"loss": 0.0692,
"step": 320
},
{
"epoch": 1.71,
"learning_rate": 8.257234066256543e-06,
"loss": 0.0632,
"step": 321
},
{
"epoch": 1.71,
"learning_rate": 8.200708896402557e-06,
"loss": 0.0706,
"step": 322
},
{
"epoch": 1.72,
"learning_rate": 8.144243077178329e-06,
"loss": 0.0612,
"step": 323
},
{
"epoch": 1.72,
"learning_rate": 8.087838471140696e-06,
"loss": 0.0684,
"step": 324
},
{
"epoch": 1.73,
"learning_rate": 8.031496938827329e-06,
"loss": 0.0712,
"step": 325
},
{
"epoch": 1.73,
"learning_rate": 7.97522033869539e-06,
"loss": 0.0676,
"step": 326
},
{
"epoch": 1.74,
"learning_rate": 7.9190105270602e-06,
"loss": 0.0643,
"step": 327
},
{
"epoch": 1.74,
"learning_rate": 7.86286935803404e-06,
"loss": 0.0602,
"step": 328
},
{
"epoch": 1.75,
"learning_rate": 7.806798683464965e-06,
"loss": 0.0594,
"step": 329
},
{
"epoch": 1.76,
"learning_rate": 7.750800352875734e-06,
"loss": 0.0677,
"step": 330
},
{
"epoch": 1.76,
"learning_rate": 7.694876213402801e-06,
"loss": 0.0684,
"step": 331
},
{
"epoch": 1.77,
"learning_rate": 7.63902810973538e-06,
"loss": 0.0599,
"step": 332
},
{
"epoch": 1.77,
"learning_rate": 7.583257884054613e-06,
"loss": 0.0711,
"step": 333
},
{
"epoch": 1.78,
"learning_rate": 7.527567375972772e-06,
"loss": 0.0724,
"step": 334
},
{
"epoch": 1.78,
"learning_rate": 7.471958422472618e-06,
"loss": 0.0579,
"step": 335
},
{
"epoch": 1.79,
"learning_rate": 7.416432857846783e-06,
"loss": 0.0544,
"step": 336
},
{
"epoch": 1.79,
"learning_rate": 7.360992513637257e-06,
"loss": 0.0559,
"step": 337
},
{
"epoch": 1.8,
"learning_rate": 7.305639218575009e-06,
"loss": 0.0681,
"step": 338
},
{
"epoch": 1.8,
"learning_rate": 7.250374798519626e-06,
"loss": 0.0668,
"step": 339
},
{
"epoch": 1.81,
"learning_rate": 7.1952010763991146e-06,
"loss": 0.0787,
"step": 340
},
{
"epoch": 1.81,
"learning_rate": 7.140119872149743e-06,
"loss": 0.0739,
"step": 341
},
{
"epoch": 1.82,
"learning_rate": 7.085133002656044e-06,
"loss": 0.0616,
"step": 342
},
{
"epoch": 1.82,
"learning_rate": 7.030242281690856e-06,
"loss": 0.052,
"step": 343
},
{
"epoch": 1.83,
"learning_rate": 6.975449519855495e-06,
"loss": 0.0676,
"step": 344
},
{
"epoch": 1.84,
"learning_rate": 6.9207565245200614e-06,
"loss": 0.0576,
"step": 345
},
{
"epoch": 1.84,
"learning_rate": 6.866165099763782e-06,
"loss": 0.0547,
"step": 346
},
{
"epoch": 1.85,
"learning_rate": 6.811677046315535e-06,
"loss": 0.0615,
"step": 347
},
{
"epoch": 1.85,
"learning_rate": 6.757294161494426e-06,
"loss": 0.0533,
"step": 348
},
{
"epoch": 1.86,
"learning_rate": 6.70301823915052e-06,
"loss": 0.0654,
"step": 349
},
{
"epoch": 1.86,
"learning_rate": 6.64885106960567e-06,
"loss": 0.0616,
"step": 350
},
{
"epoch": 1.87,
"learning_rate": 6.594794439594443e-06,
"loss": 0.0602,
"step": 351
},
{
"epoch": 1.87,
"learning_rate": 6.54085013220521e-06,
"loss": 0.0565,
"step": 352
},
{
"epoch": 1.88,
"learning_rate": 6.48701992682131e-06,
"loss": 0.0481,
"step": 353
},
{
"epoch": 1.88,
"learning_rate": 6.4333055990623674e-06,
"loss": 0.0583,
"step": 354
},
{
"epoch": 1.89,
"learning_rate": 6.379708920725713e-06,
"loss": 0.0598,
"step": 355
},
{
"epoch": 1.89,
"learning_rate": 6.3262316597279506e-06,
"loss": 0.0565,
"step": 356
},
{
"epoch": 1.9,
"learning_rate": 6.272875580046633e-06,
"loss": 0.0684,
"step": 357
},
{
"epoch": 1.9,
"learning_rate": 6.219642441662077e-06,
"loss": 0.0593,
"step": 358
},
{
"epoch": 1.91,
"learning_rate": 6.1665340004993164e-06,
"loss": 0.0567,
"step": 359
},
{
"epoch": 1.91,
"learning_rate": 6.113552008370172e-06,
"loss": 0.064,
"step": 360
},
{
"epoch": 1.92,
"learning_rate": 6.06069821291548e-06,
"loss": 0.0613,
"step": 361
},
{
"epoch": 1.93,
"learning_rate": 6.007974357547424e-06,
"loss": 0.0671,
"step": 362
},
{
"epoch": 1.93,
"learning_rate": 5.9553821813920545e-06,
"loss": 0.0572,
"step": 363
},
{
"epoch": 1.94,
"learning_rate": 5.902923419231902e-06,
"loss": 0.0432,
"step": 364
},
{
"epoch": 1.94,
"learning_rate": 5.850599801448757e-06,
"loss": 0.0624,
"step": 365
},
{
"epoch": 1.95,
"learning_rate": 5.798413053966607e-06,
"loss": 0.0717,
"step": 366
},
{
"epoch": 1.95,
"learning_rate": 5.74636489819469e-06,
"loss": 0.064,
"step": 367
},
{
"epoch": 1.96,
"learning_rate": 5.6944570509707185e-06,
"loss": 0.0546,
"step": 368
},
{
"epoch": 1.96,
"learning_rate": 5.6426912245042435e-06,
"loss": 0.0653,
"step": 369
},
{
"epoch": 1.97,
"learning_rate": 5.5910691263201985e-06,
"loss": 0.0492,
"step": 370
},
{
"epoch": 1.97,
"learning_rate": 5.5395924592025384e-06,
"loss": 0.0595,
"step": 371
},
{
"epoch": 1.98,
"learning_rate": 5.488262921138098e-06,
"loss": 0.0607,
"step": 372
},
{
"epoch": 1.98,
"learning_rate": 5.437082205260593e-06,
"loss": 0.0571,
"step": 373
},
{
"epoch": 1.99,
"learning_rate": 5.3860519997947295e-06,
"loss": 0.0818,
"step": 374
},
{
"epoch": 1.99,
"learning_rate": 5.335173988000566e-06,
"loss": 0.0599,
"step": 375
},
{
"epoch": 2.0,
"learning_rate": 5.284449848117954e-06,
"loss": 0.052,
"step": 376
}
],
"max_steps": 564,
"num_train_epochs": 3,
"total_flos": 20423908786176.0,
"trial_name": null,
"trial_params": null
}