sujithatz commited on
Commit
10cfcac
·
verified ·
1 Parent(s): 2f72c67

sujithatz/finbot-quen2.5-merged_adapter

Browse files
README.md CHANGED
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.7650
22
 
23
  ## Model description
24
 
@@ -37,216 +37,808 @@ More information needed
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
- - learning_rate: 0.0001
41
  - train_batch_size: 4
42
  - eval_batch_size: 4
43
  - seed: 0
44
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_ratio: 0.01
47
- - num_epochs: 8
48
 
49
  ### Training results
50
 
51
- | Training Loss | Epoch | Step | Validation Loss |
52
- |:-------------:|:------:|:----:|:---------------:|
53
- | 3.1119 | 0.0405 | 3 | 3.1017 |
54
- | 2.8734 | 0.0811 | 6 | 2.6094 |
55
- | 2.4188 | 0.1216 | 9 | 2.1546 |
56
- | 1.9335 | 0.1622 | 12 | 1.7862 |
57
- | 1.5361 | 0.2027 | 15 | 1.4732 |
58
- | 1.314 | 0.2432 | 18 | 1.2791 |
59
- | 1.1747 | 0.2838 | 21 | 1.1653 |
60
- | 1.1407 | 0.3243 | 24 | 1.1104 |
61
- | 1.1734 | 0.3649 | 27 | 1.0887 |
62
- | 1.0495 | 0.4054 | 30 | 1.0401 |
63
- | 1.0401 | 0.4459 | 33 | 0.9906 |
64
- | 1.0623 | 0.4865 | 36 | 0.9594 |
65
- | 0.8152 | 0.5270 | 39 | 0.9367 |
66
- | 0.9261 | 0.5676 | 42 | 0.9248 |
67
- | 0.9469 | 0.6081 | 45 | 0.9135 |
68
- | 0.8381 | 0.6486 | 48 | 0.8871 |
69
- | 0.8456 | 0.6892 | 51 | 0.8553 |
70
- | 0.908 | 0.7297 | 54 | 0.8272 |
71
- | 0.999 | 0.7703 | 57 | 0.8152 |
72
- | 0.897 | 0.8108 | 60 | 0.8053 |
73
- | 0.8747 | 0.8514 | 63 | 0.7915 |
74
- | 1.0646 | 0.8919 | 66 | 0.7873 |
75
- | 0.6771 | 0.9324 | 69 | 0.7815 |
76
- | 0.9386 | 0.9730 | 72 | 0.7635 |
77
- | 0.6348 | 1.0135 | 75 | 0.7552 |
78
- | 0.5763 | 1.0541 | 78 | 0.7474 |
79
- | 0.7059 | 1.0946 | 81 | 0.7431 |
80
- | 0.6111 | 1.1351 | 84 | 0.7394 |
81
- | 0.6937 | 1.1757 | 87 | 0.7416 |
82
- | 0.6408 | 1.2162 | 90 | 0.7332 |
83
- | 0.5931 | 1.2568 | 93 | 0.7319 |
84
- | 0.5792 | 1.2973 | 96 | 0.7179 |
85
- | 0.5502 | 1.3378 | 99 | 0.7083 |
86
- | 0.7356 | 1.3784 | 102 | 0.7049 |
87
- | 0.5204 | 1.4189 | 105 | 0.6983 |
88
- | 0.6132 | 1.4595 | 108 | 0.6858 |
89
- | 0.7527 | 1.5 | 111 | 0.6744 |
90
- | 0.6798 | 1.5405 | 114 | 0.6716 |
91
- | 0.7266 | 1.5811 | 117 | 0.6656 |
92
- | 0.5347 | 1.6216 | 120 | 0.6711 |
93
- | 0.6522 | 1.6622 | 123 | 0.6680 |
94
- | 0.8567 | 1.7027 | 126 | 0.6558 |
95
- | 0.5204 | 1.7432 | 129 | 0.6488 |
96
- | 0.6443 | 1.7838 | 132 | 0.6393 |
97
- | 0.5436 | 1.8243 | 135 | 0.6322 |
98
- | 0.6054 | 1.8649 | 138 | 0.6346 |
99
- | 0.4488 | 1.9054 | 141 | 0.6389 |
100
- | 0.7171 | 1.9459 | 144 | 0.6322 |
101
- | 0.5383 | 1.9865 | 147 | 0.6273 |
102
- | 0.4812 | 2.0270 | 150 | 0.6235 |
103
- | 0.7911 | 2.0676 | 153 | 0.6296 |
104
- | 0.514 | 2.1081 | 156 | 0.6444 |
105
- | 0.4481 | 2.1486 | 159 | 0.6453 |
106
- | 0.3983 | 2.1892 | 162 | 0.6357 |
107
- | 0.2386 | 2.2297 | 165 | 0.6327 |
108
- | 0.4447 | 2.2703 | 168 | 0.6359 |
109
- | 0.4302 | 2.3108 | 171 | 0.6387 |
110
- | 0.4514 | 2.3514 | 174 | 0.6402 |
111
- | 0.2813 | 2.3919 | 177 | 0.6313 |
112
- | 0.4747 | 2.4324 | 180 | 0.6259 |
113
- | 0.547 | 2.4730 | 183 | 0.6257 |
114
- | 0.3154 | 2.5135 | 186 | 0.6247 |
115
- | 0.5583 | 2.5541 | 189 | 0.6130 |
116
- | 0.5726 | 2.5946 | 192 | 0.6087 |
117
- | 0.4701 | 2.6351 | 195 | 0.6086 |
118
- | 0.4866 | 2.6757 | 198 | 0.6133 |
119
- | 0.4015 | 2.7162 | 201 | 0.6213 |
120
- | 0.4277 | 2.7568 | 204 | 0.6238 |
121
- | 0.3157 | 2.7973 | 207 | 0.6249 |
122
- | 0.3883 | 2.8378 | 210 | 0.6210 |
123
- | 0.5436 | 2.8784 | 213 | 0.6127 |
124
- | 0.526 | 2.9189 | 216 | 0.6056 |
125
- | 0.4465 | 2.9595 | 219 | 0.5982 |
126
- | 0.3692 | 3.0 | 222 | 0.5917 |
127
- | 0.3022 | 3.0405 | 225 | 0.5956 |
128
- | 0.2875 | 3.0811 | 228 | 0.6153 |
129
- | 0.5544 | 3.1216 | 231 | 0.6343 |
130
- | 0.4159 | 3.1622 | 234 | 0.6370 |
131
- | 0.4963 | 3.2027 | 237 | 0.6289 |
132
- | 0.3248 | 3.2432 | 240 | 0.6190 |
133
- | 0.349 | 3.2838 | 243 | 0.6111 |
134
- | 0.3107 | 3.3243 | 246 | 0.6121 |
135
- | 0.2219 | 3.3649 | 249 | 0.6196 |
136
- | 0.251 | 3.4054 | 252 | 0.6273 |
137
- | 0.3366 | 3.4459 | 255 | 0.6259 |
138
- | 0.3182 | 3.4865 | 258 | 0.6243 |
139
- | 0.4133 | 3.5270 | 261 | 0.6260 |
140
- | 0.3297 | 3.5676 | 264 | 0.6303 |
141
- | 0.2276 | 3.6081 | 267 | 0.6365 |
142
- | 0.1842 | 3.6486 | 270 | 0.6383 |
143
- | 0.4961 | 3.6892 | 273 | 0.6314 |
144
- | 0.3785 | 3.7297 | 276 | 0.6240 |
145
- | 0.2834 | 3.7703 | 279 | 0.6189 |
146
- | 0.3397 | 3.8108 | 282 | 0.6139 |
147
- | 0.2795 | 3.8514 | 285 | 0.6104 |
148
- | 0.3114 | 3.8919 | 288 | 0.6078 |
149
- | 0.2395 | 3.9324 | 291 | 0.6111 |
150
- | 0.1948 | 3.9730 | 294 | 0.6142 |
151
- | 0.2284 | 4.0135 | 297 | 0.6194 |
152
- | 0.2638 | 4.0541 | 300 | 0.6303 |
153
- | 0.252 | 4.0946 | 303 | 0.6517 |
154
- | 0.183 | 4.1351 | 306 | 0.6645 |
155
- | 0.2415 | 4.1757 | 309 | 0.6604 |
156
- | 0.1791 | 4.2162 | 312 | 0.6530 |
157
- | 0.2013 | 4.2568 | 315 | 0.6489 |
158
- | 0.1827 | 4.2973 | 318 | 0.6532 |
159
- | 0.1792 | 4.3378 | 321 | 0.6585 |
160
- | 0.2568 | 4.3784 | 324 | 0.6628 |
161
- | 0.1682 | 4.4189 | 327 | 0.6650 |
162
- | 0.25 | 4.4595 | 330 | 0.6730 |
163
- | 0.2664 | 4.5 | 333 | 0.6741 |
164
- | 0.2316 | 4.5405 | 336 | 0.6773 |
165
- | 0.3467 | 4.5811 | 339 | 0.6705 |
166
- | 0.2426 | 4.6216 | 342 | 0.6681 |
167
- | 0.2421 | 4.6622 | 345 | 0.6622 |
168
- | 0.4063 | 4.7027 | 348 | 0.6609 |
169
- | 0.2255 | 4.7432 | 351 | 0.6569 |
170
- | 0.1933 | 4.7838 | 354 | 0.6534 |
171
- | 0.1975 | 4.8243 | 357 | 0.6497 |
172
- | 0.3065 | 4.8649 | 360 | 0.6491 |
173
- | 0.1641 | 4.9054 | 363 | 0.6513 |
174
- | 0.2032 | 4.9459 | 366 | 0.6553 |
175
- | 0.1886 | 4.9865 | 369 | 0.6590 |
176
- | 0.2539 | 5.0270 | 372 | 0.6677 |
177
- | 0.1464 | 5.0676 | 375 | 0.6879 |
178
- | 0.2295 | 5.1081 | 378 | 0.7066 |
179
- | 0.1438 | 5.1486 | 381 | 0.7167 |
180
- | 0.1603 | 5.1892 | 384 | 0.7135 |
181
- | 0.1379 | 5.2297 | 387 | 0.7095 |
182
- | 0.2783 | 5.2703 | 390 | 0.7103 |
183
- | 0.1575 | 5.3108 | 393 | 0.7042 |
184
- | 0.1743 | 5.3514 | 396 | 0.7114 |
185
- | 0.1168 | 5.3919 | 399 | 0.7083 |
186
- | 0.284 | 5.4324 | 402 | 0.7072 |
187
- | 0.1013 | 5.4730 | 405 | 0.7063 |
188
- | 0.2612 | 5.5135 | 408 | 0.7051 |
189
- | 0.1343 | 5.5541 | 411 | 0.7103 |
190
- | 0.1009 | 5.5946 | 414 | 0.7105 |
191
- | 0.3127 | 5.6351 | 417 | 0.7090 |
192
- | 0.1355 | 5.6757 | 420 | 0.7060 |
193
- | 0.1345 | 5.7162 | 423 | 0.7057 |
194
- | 0.1865 | 5.7568 | 426 | 0.7039 |
195
- | 0.119 | 5.7973 | 429 | 0.7067 |
196
- | 0.149 | 5.8378 | 432 | 0.7120 |
197
- | 0.1748 | 5.8784 | 435 | 0.7123 |
198
- | 0.1781 | 5.9189 | 438 | 0.7113 |
199
- | 0.2075 | 5.9595 | 441 | 0.7118 |
200
- | 0.1591 | 6.0 | 444 | 0.7112 |
201
- | 0.1224 | 6.0405 | 447 | 0.7118 |
202
- | 0.2234 | 6.0811 | 450 | 0.7171 |
203
- | 0.1165 | 6.1216 | 453 | 0.7227 |
204
- | 0.2518 | 6.1622 | 456 | 0.7326 |
205
- | 0.1288 | 6.2027 | 459 | 0.7431 |
206
- | 0.1176 | 6.2432 | 462 | 0.7481 |
207
- | 0.1231 | 6.2838 | 465 | 0.7511 |
208
- | 0.146 | 6.3243 | 468 | 0.7530 |
209
- | 0.1169 | 6.3649 | 471 | 0.7518 |
210
- | 0.1048 | 6.4054 | 474 | 0.7541 |
211
- | 0.1092 | 6.4459 | 477 | 0.7507 |
212
- | 0.1458 | 6.4865 | 480 | 0.7518 |
213
- | 0.2177 | 6.5270 | 483 | 0.7509 |
214
- | 0.1585 | 6.5676 | 486 | 0.7510 |
215
- | 0.1337 | 6.6081 | 489 | 0.7546 |
216
- | 0.122 | 6.6486 | 492 | 0.7524 |
217
- | 0.0793 | 6.6892 | 495 | 0.7554 |
218
- | 0.1544 | 6.7297 | 498 | 0.7521 |
219
- | 0.1387 | 6.7703 | 501 | 0.7529 |
220
- | 0.1253 | 6.8108 | 504 | 0.7529 |
221
- | 0.1296 | 6.8514 | 507 | 0.7542 |
222
- | 0.1094 | 6.8919 | 510 | 0.7516 |
223
- | 0.1009 | 6.9324 | 513 | 0.7512 |
224
- | 0.1871 | 6.9730 | 516 | 0.7526 |
225
- | 0.1017 | 7.0135 | 519 | 0.7538 |
226
- | 0.1536 | 7.0541 | 522 | 0.7529 |
227
- | 0.0886 | 7.0946 | 525 | 0.7565 |
228
- | 0.0983 | 7.1351 | 528 | 0.7576 |
229
- | 0.084 | 7.1757 | 531 | 0.7586 |
230
- | 0.099 | 7.2162 | 534 | 0.7564 |
231
- | 0.1089 | 7.2568 | 537 | 0.7592 |
232
- | 0.1145 | 7.2973 | 540 | 0.7612 |
233
- | 0.1823 | 7.3378 | 543 | 0.7615 |
234
- | 0.2784 | 7.3784 | 546 | 0.7602 |
235
- | 0.0887 | 7.4189 | 549 | 0.7617 |
236
- | 0.1133 | 7.4595 | 552 | 0.7640 |
237
- | 0.1207 | 7.5 | 555 | 0.7630 |
238
- | 0.0954 | 7.5405 | 558 | 0.7643 |
239
- | 0.1135 | 7.5811 | 561 | 0.7629 |
240
- | 0.096 | 7.6216 | 564 | 0.7637 |
241
- | 0.1088 | 7.6622 | 567 | 0.7655 |
242
- | 0.1628 | 7.7027 | 570 | 0.7655 |
243
- | 0.1431 | 7.7432 | 573 | 0.7650 |
244
- | 0.1034 | 7.7838 | 576 | 0.7639 |
245
- | 0.181 | 7.8243 | 579 | 0.7637 |
246
- | 0.062 | 7.8649 | 582 | 0.7649 |
247
- | 0.0769 | 7.9054 | 585 | 0.7654 |
248
- | 0.1951 | 7.9459 | 588 | 0.7647 |
249
- | 0.1175 | 7.9865 | 591 | 0.7650 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
 
252
  ### Framework versions
 
18
 
19
  This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.7387
22
 
23
  ## Model description
24
 
 
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
+ - learning_rate: 4e-05
41
  - train_batch_size: 4
42
  - eval_batch_size: 4
43
  - seed: 0
44
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_ratio: 0.01
47
+ - num_epochs: 32
48
 
49
  ### Training results
50
 
51
+ | Training Loss | Epoch | Step | Validation Loss |
52
+ |:-------------:|:-------:|:----:|:---------------:|
53
+ | 3.1208 | 0.0405 | 3 | 3.2027 |
54
+ | 3.1051 | 0.0811 | 6 | 3.1913 |
55
+ | 3.1475 | 0.1216 | 9 | 3.1761 |
56
+ | 2.9958 | 0.1622 | 12 | 3.1458 |
57
+ | 2.8187 | 0.2027 | 15 | 3.1096 |
58
+ | 2.8758 | 0.2432 | 18 | 3.0628 |
59
+ | 2.8679 | 0.2838 | 21 | 3.0088 |
60
+ | 2.8846 | 0.3243 | 24 | 2.9546 |
61
+ | 2.7974 | 0.3649 | 27 | 2.8958 |
62
+ | 2.8269 | 0.4054 | 30 | 2.8389 |
63
+ | 2.7379 | 0.4459 | 33 | 2.7776 |
64
+ | 2.6123 | 0.4865 | 36 | 2.7106 |
65
+ | 2.5641 | 0.5270 | 39 | 2.6495 |
66
+ | 2.5012 | 0.5676 | 42 | 2.5868 |
67
+ | 2.4825 | 0.6081 | 45 | 2.5148 |
68
+ | 2.336 | 0.6486 | 48 | 2.4660 |
69
+ | 2.4145 | 0.6892 | 51 | 2.4062 |
70
+ | 2.3906 | 0.7297 | 54 | 2.3530 |
71
+ | 2.2689 | 0.7703 | 57 | 2.2990 |
72
+ | 2.2372 | 0.8108 | 60 | 2.2181 |
73
+ | 2.2054 | 0.8514 | 63 | 2.1602 |
74
+ | 2.1776 | 0.8919 | 66 | 2.0944 |
75
+ | 1.9452 | 0.9324 | 69 | 2.0230 |
76
+ | 2.0305 | 0.9730 | 72 | 1.9619 |
77
+ | 1.8456 | 1.0135 | 75 | 1.8966 |
78
+ | 1.8186 | 1.0541 | 78 | 1.8361 |
79
+ | 1.7897 | 1.0946 | 81 | 1.7808 |
80
+ | 1.7574 | 1.1351 | 84 | 1.7257 |
81
+ | 1.7225 | 1.1757 | 87 | 1.6673 |
82
+ | 1.5421 | 1.2162 | 90 | 1.6174 |
83
+ | 1.5612 | 1.2568 | 93 | 1.5772 |
84
+ | 1.5226 | 1.2973 | 96 | 1.5207 |
85
+ | 1.4257 | 1.3378 | 99 | 1.4864 |
86
+ | 1.5004 | 1.3784 | 102 | 1.4500 |
87
+ | 1.3826 | 1.4189 | 105 | 1.4225 |
88
+ | 1.4051 | 1.4595 | 108 | 1.3967 |
89
+ | 1.4726 | 1.5 | 111 | 1.3684 |
90
+ | 1.2813 | 1.5405 | 114 | 1.3477 |
91
+ | 1.3158 | 1.5811 | 117 | 1.3245 |
92
+ | 1.2704 | 1.6216 | 120 | 1.3132 |
93
+ | 1.2899 | 1.6622 | 123 | 1.2953 |
94
+ | 1.5335 | 1.7027 | 126 | 1.2824 |
95
+ | 1.2311 | 1.7432 | 129 | 1.2727 |
96
+ | 1.2331 | 1.7838 | 132 | 1.2568 |
97
+ | 1.2152 | 1.8243 | 135 | 1.2386 |
98
+ | 1.2849 | 1.8649 | 138 | 1.2285 |
99
+ | 1.1681 | 1.9054 | 141 | 1.2190 |
100
+ | 1.2107 | 1.9459 | 144 | 1.2137 |
101
+ | 1.122 | 1.9865 | 147 | 1.2030 |
102
+ | 1.2358 | 2.0270 | 150 | 1.1939 |
103
+ | 1.444 | 2.0676 | 153 | 1.1884 |
104
+ | 1.2996 | 2.1081 | 156 | 1.1801 |
105
+ | 1.1422 | 2.1486 | 159 | 1.1748 |
106
+ | 1.1755 | 2.1892 | 162 | 1.1672 |
107
+ | 1.0404 | 2.2297 | 165 | 1.1653 |
108
+ | 1.1468 | 2.2703 | 168 | 1.1548 |
109
+ | 1.1091 | 2.3108 | 171 | 1.1488 |
110
+ | 1.1161 | 2.3514 | 174 | 1.1533 |
111
+ | 1.0319 | 2.3919 | 177 | 1.1421 |
112
+ | 1.0984 | 2.4324 | 180 | 1.1380 |
113
+ | 1.1694 | 2.4730 | 183 | 1.1273 |
114
+ | 0.9938 | 2.5135 | 186 | 1.1238 |
115
+ | 1.1249 | 2.5541 | 189 | 1.1151 |
116
+ | 1.2545 | 2.5946 | 192 | 1.1133 |
117
+ | 1.1458 | 2.6351 | 195 | 1.1104 |
118
+ | 1.1756 | 2.6757 | 198 | 1.1031 |
119
+ | 1.0413 | 2.7162 | 201 | 1.0978 |
120
+ | 1.0521 | 2.7568 | 204 | 1.0915 |
121
+ | 1.0209 | 2.7973 | 207 | 1.0917 |
122
+ | 0.9818 | 2.8378 | 210 | 1.0888 |
123
+ | 1.1486 | 2.8784 | 213 | 1.0861 |
124
+ | 1.0615 | 2.9189 | 216 | 1.0838 |
125
+ | 1.0094 | 2.9595 | 219 | 1.0781 |
126
+ | 1.027 | 3.0 | 222 | 1.0788 |
127
+ | 1.0367 | 3.0405 | 225 | 1.0744 |
128
+ | 1.0732 | 3.0811 | 228 | 1.0692 |
129
+ | 1.2963 | 3.1216 | 231 | 1.0681 |
130
+ | 1.0756 | 3.1622 | 234 | 1.0666 |
131
+ | 1.281 | 3.2027 | 237 | 1.0622 |
132
+ | 1.0219 | 3.2432 | 240 | 1.0607 |
133
+ | 1.0859 | 3.2838 | 243 | 1.0617 |
134
+ | 1.0362 | 3.3243 | 246 | 1.0554 |
135
+ | 0.9374 | 3.3649 | 249 | 1.0506 |
136
+ | 0.9561 | 3.4054 | 252 | 1.0439 |
137
+ | 1.0258 | 3.4459 | 255 | 1.0428 |
138
+ | 1.0305 | 3.4865 | 258 | 1.0417 |
139
+ | 1.0936 | 3.5270 | 261 | 1.0401 |
140
+ | 1.0279 | 3.5676 | 264 | 1.0381 |
141
+ | 0.9374 | 3.6081 | 267 | 1.0337 |
142
+ | 0.9181 | 3.6486 | 270 | 1.0314 |
143
+ | 1.1563 | 3.6892 | 273 | 1.0308 |
144
+ | 1.0048 | 3.7297 | 276 | 1.0329 |
145
+ | 1.0085 | 3.7703 | 279 | 1.0318 |
146
+ | 0.9796 | 3.8108 | 282 | 1.0305 |
147
+ | 1.0081 | 3.8514 | 285 | 1.0257 |
148
+ | 0.9767 | 3.8919 | 288 | 1.0221 |
149
+ | 0.9264 | 3.9324 | 291 | 1.0196 |
150
+ | 0.8994 | 3.9730 | 294 | 1.0177 |
151
+ | 0.8931 | 4.0135 | 297 | 1.0175 |
152
+ | 1.0456 | 4.0541 | 300 | 1.0140 |
153
+ | 0.9918 | 4.0946 | 303 | 1.0127 |
154
+ | 0.9371 | 4.1351 | 306 | 1.0089 |
155
+ | 0.9584 | 4.1757 | 309 | 1.0143 |
156
+ | 0.869 | 4.2162 | 312 | 1.0034 |
157
+ | 0.9225 | 4.2568 | 315 | 0.9995 |
158
+ | 0.9197 | 4.2973 | 318 | 1.0017 |
159
+ | 0.9189 | 4.3378 | 321 | 0.9999 |
160
+ | 0.9804 | 4.3784 | 324 | 0.9972 |
161
+ | 0.875 | 4.4189 | 327 | 0.9972 |
162
+ | 0.9624 | 4.4595 | 330 | 0.9968 |
163
+ | 1.0302 | 4.5 | 333 | 0.9937 |
164
+ | 0.9522 | 4.5405 | 336 | 0.9913 |
165
+ | 1.1925 | 4.5811 | 339 | 0.9910 |
166
+ | 0.9591 | 4.6216 | 342 | 0.9883 |
167
+ | 0.9859 | 4.6622 | 345 | 0.9852 |
168
+ | 1.1778 | 4.7027 | 348 | 0.9862 |
169
+ | 0.9507 | 4.7432 | 351 | 0.9867 |
170
+ | 0.9281 | 4.7838 | 354 | 0.9822 |
171
+ | 0.8874 | 4.8243 | 357 | 0.9779 |
172
+ | 1.0883 | 4.8649 | 360 | 0.9748 |
173
+ | 0.8251 | 4.9054 | 363 | 0.9764 |
174
+ | 0.9055 | 4.9459 | 366 | 0.9728 |
175
+ | 0.8919 | 4.9865 | 369 | 0.9768 |
176
+ | 1.0278 | 5.0270 | 372 | 0.9694 |
177
+ | 0.8941 | 5.0676 | 375 | 0.9679 |
178
+ | 1.0327 | 5.1081 | 378 | 0.9690 |
179
+ | 0.9175 | 5.1486 | 381 | 0.9655 |
180
+ | 0.8924 | 5.1892 | 384 | 0.9606 |
181
+ | 0.8728 | 5.2297 | 387 | 0.9610 |
182
+ | 1.1136 | 5.2703 | 390 | 0.9620 |
183
+ | 0.9395 | 5.3108 | 393 | 0.9580 |
184
+ | 0.9548 | 5.3514 | 396 | 0.9563 |
185
+ | 0.8094 | 5.3919 | 399 | 0.9535 |
186
+ | 1.1105 | 5.4324 | 402 | 0.9547 |
187
+ | 0.7559 | 5.4730 | 405 | 0.9552 |
188
+ | 1.0868 | 5.5135 | 408 | 0.9564 |
189
+ | 0.8924 | 5.5541 | 411 | 0.9534 |
190
+ | 0.7555 | 5.5946 | 414 | 0.9531 |
191
+ | 1.1301 | 5.6351 | 417 | 0.9530 |
192
+ | 0.853 | 5.6757 | 420 | 0.9504 |
193
+ | 0.8389 | 5.7162 | 423 | 0.9505 |
194
+ | 0.9227 | 5.7568 | 426 | 0.9475 |
195
+ | 0.8284 | 5.7973 | 429 | 0.9471 |
196
+ | 0.8861 | 5.8378 | 432 | 0.9462 |
197
+ | 0.8685 | 5.8784 | 435 | 0.9439 |
198
+ | 0.9224 | 5.9189 | 438 | 0.9397 |
199
+ | 1.0131 | 5.9595 | 441 | 0.9377 |
200
+ | 0.8987 | 6.0 | 444 | 0.9383 |
201
+ | 0.8799 | 6.0405 | 447 | 0.9382 |
202
+ | 1.0713 | 6.0811 | 450 | 0.9383 |
203
+ | 0.8875 | 6.1216 | 453 | 0.9358 |
204
+ | 1.1155 | 6.1622 | 456 | 0.9374 |
205
+ | 0.9545 | 6.2027 | 459 | 0.9332 |
206
+ | 0.856 | 6.2432 | 462 | 0.9301 |
207
+ | 0.8397 | 6.2838 | 465 | 0.9293 |
208
+ | 0.8901 | 6.3243 | 468 | 0.9270 |
209
+ | 0.8582 | 6.3649 | 471 | 0.9296 |
210
+ | 0.8374 | 6.4054 | 474 | 0.9270 |
211
+ | 0.8233 | 6.4459 | 477 | 0.9228 |
212
+ | 0.8611 | 6.4865 | 480 | 0.9205 |
213
+ | 0.9882 | 6.5270 | 483 | 0.9222 |
214
+ | 0.878 | 6.5676 | 486 | 0.9231 |
215
+ | 0.8198 | 6.6081 | 489 | 0.9216 |
216
+ | 0.8485 | 6.6486 | 492 | 0.9220 |
217
+ | 0.7551 | 6.6892 | 495 | 0.9232 |
218
+ | 0.9539 | 6.7297 | 498 | 0.9212 |
219
+ | 0.9046 | 6.7703 | 501 | 0.9219 |
220
+ | 0.8941 | 6.8108 | 504 | 0.9213 |
221
+ | 0.8578 | 6.8514 | 507 | 0.9151 |
222
+ | 0.8106 | 6.8919 | 510 | 0.9157 |
223
+ | 0.7412 | 6.9324 | 513 | 0.9164 |
224
+ | 0.9782 | 6.9730 | 516 | 0.9154 |
225
+ | 0.8391 | 7.0135 | 519 | 0.9178 |
226
+ | 1.0248 | 7.0541 | 522 | 0.9198 |
227
+ | 0.7815 | 7.0946 | 525 | 0.9139 |
228
+ | 0.7832 | 7.1351 | 528 | 0.9081 |
229
+ | 0.7286 | 7.1757 | 531 | 0.9074 |
230
+ | 0.7792 | 7.2162 | 534 | 0.9132 |
231
+ | 0.8675 | 7.2568 | 537 | 0.9078 |
232
+ | 0.8072 | 7.2973 | 540 | 0.9065 |
233
+ | 0.9848 | 7.3378 | 543 | 0.9038 |
234
+ | 1.1688 | 7.3784 | 546 | 0.9072 |
235
+ | 0.7637 | 7.4189 | 549 | 0.9032 |
236
+ | 0.8637 | 7.4595 | 552 | 0.8992 |
237
+ | 0.873 | 7.5 | 555 | 0.9019 |
238
+ | 0.8521 | 7.5405 | 558 | 0.9017 |
239
+ | 0.8397 | 7.5811 | 561 | 0.9028 |
240
+ | 0.8654 | 7.6216 | 564 | 0.8940 |
241
+ | 0.7825 | 7.6622 | 567 | 0.8960 |
242
+ | 0.929 | 7.7027 | 570 | 0.8933 |
243
+ | 0.8989 | 7.7432 | 573 | 0.8914 |
244
+ | 0.757 | 7.7838 | 576 | 0.8910 |
245
+ | 1.0276 | 7.8243 | 579 | 0.8913 |
246
+ | 0.6477 | 7.8649 | 582 | 0.8910 |
247
+ | 0.7152 | 7.9054 | 585 | 0.8939 |
248
+ | 0.9926 | 7.9459 | 588 | 0.8940 |
249
+ | 0.9186 | 7.9865 | 591 | 0.8911 |
250
+ | 0.7522 | 8.0270 | 594 | 0.8887 |
251
+ | 0.741 | 8.0676 | 597 | 0.8860 |
252
+ | 0.704 | 8.1081 | 600 | 0.8850 |
253
+ | 0.6462 | 8.1486 | 603 | 0.8892 |
254
+ | 0.8187 | 8.1892 | 606 | 0.8826 |
255
+ | 0.8307 | 8.2297 | 609 | 0.8789 |
256
+ | 0.851 | 8.2703 | 612 | 0.8809 |
257
+ | 1.031 | 8.3108 | 615 | 0.8828 |
258
+ | 1.0176 | 8.3514 | 618 | 0.8813 |
259
+ | 0.7549 | 8.3919 | 621 | 0.8778 |
260
+ | 0.8189 | 8.4324 | 624 | 0.8763 |
261
+ | 1.1007 | 8.4730 | 627 | 0.8750 |
262
+ | 0.8216 | 8.5135 | 630 | 0.8762 |
263
+ | 0.715 | 8.5541 | 633 | 0.8743 |
264
+ | 0.8203 | 8.5946 | 636 | 0.8763 |
265
+ | 0.7866 | 8.6351 | 639 | 0.8748 |
266
+ | 0.9153 | 8.6757 | 642 | 0.8728 |
267
+ | 0.9688 | 8.7162 | 645 | 0.8685 |
268
+ | 0.6317 | 8.7568 | 648 | 0.8710 |
269
+ | 0.7267 | 8.7973 | 651 | 0.8707 |
270
+ | 0.8292 | 8.8378 | 654 | 0.8691 |
271
+ | 0.8516 | 8.8784 | 657 | 0.8648 |
272
+ | 0.7637 | 8.9189 | 660 | 0.8676 |
273
+ | 0.7873 | 8.9595 | 663 | 0.8703 |
274
+ | 0.9375 | 9.0 | 666 | 0.8711 |
275
+ | 0.7844 | 9.0405 | 669 | 0.8634 |
276
+ | 0.7969 | 9.0811 | 672 | 0.8639 |
277
+ | 1.0146 | 9.1216 | 675 | 0.8647 |
278
+ | 0.9386 | 9.1622 | 678 | 0.8636 |
279
+ | 0.7381 | 9.2027 | 681 | 0.8611 |
280
+ | 0.7708 | 9.2432 | 684 | 0.8619 |
281
+ | 0.8501 | 9.2838 | 687 | 0.8636 |
282
+ | 1.0895 | 9.3243 | 690 | 0.8624 |
283
+ | 0.7784 | 9.3649 | 693 | 0.8624 |
284
+ | 0.7794 | 9.4054 | 696 | 0.8637 |
285
+ | 0.6984 | 9.4459 | 699 | 0.8590 |
286
+ | 0.9442 | 9.4865 | 702 | 0.8558 |
287
+ | 0.8026 | 9.5270 | 705 | 0.8558 |
288
+ | 0.8767 | 9.5676 | 708 | 0.8536 |
289
+ | 0.7298 | 9.6081 | 711 | 0.8511 |
290
+ | 0.6928 | 9.6486 | 714 | 0.8524 |
291
+ | 0.7068 | 9.6892 | 717 | 0.8532 |
292
+ | 0.7159 | 9.7297 | 720 | 0.8539 |
293
+ | 0.627 | 9.7703 | 723 | 0.8534 |
294
+ | 0.7097 | 9.8108 | 726 | 0.8557 |
295
+ | 0.6333 | 9.8514 | 729 | 0.8602 |
296
+ | 0.858 | 9.8919 | 732 | 0.8563 |
297
+ | 0.8599 | 9.9324 | 735 | 0.8547 |
298
+ | 0.8943 | 9.9730 | 738 | 0.8533 |
299
+ | 0.6119 | 10.0135 | 741 | 0.8536 |
300
+ | 0.7747 | 10.0541 | 744 | 0.8496 |
301
+ | 0.6964 | 10.0946 | 747 | 0.8481 |
302
+ | 0.8405 | 10.1351 | 750 | 0.8477 |
303
+ | 0.7406 | 10.1757 | 753 | 0.8468 |
304
+ | 0.8602 | 10.2162 | 756 | 0.8464 |
305
+ | 0.7174 | 10.2568 | 759 | 0.8424 |
306
+ | 0.8197 | 10.2973 | 762 | 0.8421 |
307
+ | 0.9546 | 10.3378 | 765 | 0.8408 |
308
+ | 0.8244 | 10.3784 | 768 | 0.8390 |
309
+ | 0.8684 | 10.4189 | 771 | 0.8381 |
310
+ | 0.6375 | 10.4595 | 774 | 0.8381 |
311
+ | 1.0448 | 10.5 | 777 | 0.8379 |
312
+ | 0.7016 | 10.5405 | 780 | 0.8392 |
313
+ | 0.7816 | 10.5811 | 783 | 0.8380 |
314
+ | 0.7377 | 10.6216 | 786 | 0.8390 |
315
+ | 0.8123 | 10.6622 | 789 | 0.8397 |
316
+ | 0.7813 | 10.7027 | 792 | 0.8403 |
317
+ | 0.7057 | 10.7432 | 795 | 0.8377 |
318
+ | 0.8193 | 10.7838 | 798 | 0.8391 |
319
+ | 0.641 | 10.8243 | 801 | 0.8395 |
320
+ | 0.7123 | 10.8649 | 804 | 0.8363 |
321
+ | 0.895 | 10.9054 | 807 | 0.8356 |
322
+ | 0.7314 | 10.9459 | 810 | 0.8318 |
323
+ | 0.6816 | 10.9865 | 813 | 0.8328 |
324
+ | 0.7839 | 11.0270 | 816 | 0.8333 |
325
+ | 0.6903 | 11.0676 | 819 | 0.8403 |
326
+ | 0.8589 | 11.1081 | 822 | 0.8388 |
327
+ | 0.7616 | 11.1486 | 825 | 0.8345 |
328
+ | 0.8326 | 11.1892 | 828 | 0.8337 |
329
+ | 0.6695 | 11.2297 | 831 | 0.8311 |
330
+ | 0.6722 | 11.2703 | 834 | 0.8330 |
331
+ | 0.6469 | 11.3108 | 837 | 0.8343 |
332
+ | 0.7524 | 11.3514 | 840 | 0.8335 |
333
+ | 0.9929 | 11.3919 | 843 | 0.8263 |
334
+ | 0.9125 | 11.4324 | 846 | 0.8253 |
335
+ | 0.7264 | 11.4730 | 849 | 0.8281 |
336
+ | 0.7659 | 11.5135 | 852 | 0.8258 |
337
+ | 0.5996 | 11.5541 | 855 | 0.8266 |
338
+ | 0.9027 | 11.5946 | 858 | 0.8246 |
339
+ | 0.7925 | 11.6351 | 861 | 0.8287 |
340
+ | 0.7139 | 11.6757 | 864 | 0.8277 |
341
+ | 0.8313 | 11.7162 | 867 | 0.8277 |
342
+ | 0.8107 | 11.7568 | 870 | 0.8232 |
343
+ | 0.6551 | 11.7973 | 873 | 0.8238 |
344
+ | 0.7706 | 11.8378 | 876 | 0.8243 |
345
+ | 0.8224 | 11.8784 | 879 | 0.8218 |
346
+ | 0.5772 | 11.9189 | 882 | 0.8190 |
347
+ | 0.7463 | 11.9595 | 885 | 0.8195 |
348
+ | 0.7346 | 12.0 | 888 | 0.8215 |
349
+ | 0.7386 | 12.0405 | 891 | 0.8193 |
350
+ | 0.6977 | 12.0811 | 894 | 0.8177 |
351
+ | 0.7275 | 12.1216 | 897 | 0.8163 |
352
+ | 0.834 | 12.1622 | 900 | 0.8178 |
353
+ | 0.6453 | 12.2027 | 903 | 0.8172 |
354
+ | 0.6808 | 12.2432 | 906 | 0.8179 |
355
+ | 0.8235 | 12.2838 | 909 | 0.8196 |
356
+ | 0.6978 | 12.3243 | 912 | 0.8143 |
357
+ | 0.6775 | 12.3649 | 915 | 0.8141 |
358
+ | 0.7675 | 12.4054 | 918 | 0.8159 |
359
+ | 0.7184 | 12.4459 | 921 | 0.8149 |
360
+ | 0.8134 | 12.4865 | 924 | 0.8155 |
361
+ | 0.7627 | 12.5270 | 927 | 0.8142 |
362
+ | 0.6658 | 12.5676 | 930 | 0.8138 |
363
+ | 0.7203 | 12.6081 | 933 | 0.8094 |
364
+ | 0.8712 | 12.6486 | 936 | 0.8102 |
365
+ | 0.7376 | 12.6892 | 939 | 0.8110 |
366
+ | 0.6303 | 12.7297 | 942 | 0.8112 |
367
+ | 0.8266 | 12.7703 | 945 | 0.8112 |
368
+ | 0.6992 | 12.8108 | 948 | 0.8112 |
369
+ | 0.6973 | 12.8514 | 951 | 0.8204 |
370
+ | 0.7004 | 12.8919 | 954 | 0.8187 |
371
+ | 0.8472 | 12.9324 | 957 | 0.8104 |
372
+ | 0.6852 | 12.9730 | 960 | 0.8076 |
373
+ | 0.7859 | 13.0135 | 963 | 0.8034 |
374
+ | 0.748 | 13.0541 | 966 | 0.8005 |
375
+ | 0.647 | 13.0946 | 969 | 0.8017 |
376
+ | 0.5999 | 13.1351 | 972 | 0.8068 |
377
+ | 0.8251 | 13.1757 | 975 | 0.8085 |
378
+ | 0.751 | 13.2162 | 978 | 0.8019 |
379
+ | 0.8143 | 13.2568 | 981 | 0.8029 |
380
+ | 0.8602 | 13.2973 | 984 | 0.8021 |
381
+ | 0.7271 | 13.3378 | 987 | 0.8047 |
382
+ | 0.6583 | 13.3784 | 990 | 0.8032 |
383
+ | 0.7526 | 13.4189 | 993 | 0.8040 |
384
+ | 0.9622 | 13.4595 | 996 | 0.8047 |
385
+ | 0.7012 | 13.5 | 999 | 0.8016 |
386
+ | 0.7386 | 13.5405 | 1002 | 0.8023 |
387
+ | 0.7437 | 13.5811 | 1005 | 0.7979 |
388
+ | 0.7107 | 13.6216 | 1008 | 0.7971 |
389
+ | 0.5084 | 13.6622 | 1011 | 0.7955 |
390
+ | 0.7172 | 13.7027 | 1014 | 0.7958 |
391
+ | 0.6663 | 13.7432 | 1017 | 0.7957 |
392
+ | 0.6369 | 13.7838 | 1020 | 0.7937 |
393
+ | 0.6689 | 13.8243 | 1023 | 0.7912 |
394
+ | 0.7544 | 13.8649 | 1026 | 0.7928 |
395
+ | 0.7894 | 13.9054 | 1029 | 0.7913 |
396
+ | 0.7585 | 13.9459 | 1032 | 0.7911 |
397
+ | 0.7564 | 13.9865 | 1035 | 0.7915 |
398
+ | 0.7037 | 14.0270 | 1038 | 0.7942 |
399
+ | 0.6665 | 14.0676 | 1041 | 0.7924 |
400
+ | 0.8052 | 14.1081 | 1044 | 0.7961 |
401
+ | 0.7059 | 14.1486 | 1047 | 0.7966 |
402
+ | 0.9295 | 14.1892 | 1050 | 0.7922 |
403
+ | 0.7731 | 14.2297 | 1053 | 0.7905 |
404
+ | 0.7104 | 14.2703 | 1056 | 0.7953 |
405
+ | 0.7656 | 14.3108 | 1059 | 0.7908 |
406
+ | 0.6772 | 14.3514 | 1062 | 0.7932 |
407
+ | 0.564 | 14.3919 | 1065 | 0.7949 |
408
+ | 0.7368 | 14.4324 | 1068 | 0.7923 |
409
+ | 0.7671 | 14.4730 | 1071 | 0.7978 |
410
+ | 0.6184 | 14.5135 | 1074 | 0.7965 |
411
+ | 0.7065 | 14.5541 | 1077 | 0.7918 |
412
+ | 0.611 | 14.5946 | 1080 | 0.7901 |
413
+ | 0.5999 | 14.6351 | 1083 | 0.7906 |
414
+ | 0.7597 | 14.6757 | 1086 | 0.7887 |
415
+ | 0.5979 | 14.7162 | 1089 | 0.7868 |
416
+ | 0.6442 | 14.7568 | 1092 | 0.7880 |
417
+ | 0.7898 | 14.7973 | 1095 | 0.7863 |
418
+ | 0.7596 | 14.8378 | 1098 | 0.7869 |
419
+ | 0.7963 | 14.8784 | 1101 | 0.7873 |
420
+ | 0.7001 | 14.9189 | 1104 | 0.7863 |
421
+ | 0.613 | 14.9595 | 1107 | 0.7827 |
422
+ | 0.6938 | 15.0 | 1110 | 0.7820 |
423
+ | 0.5834 | 15.0405 | 1113 | 0.7846 |
424
+ | 0.8899 | 15.0811 | 1116 | 0.7872 |
425
+ | 0.7885 | 15.1216 | 1119 | 0.7838 |
426
+ | 0.7913 | 15.1622 | 1122 | 0.7838 |
427
+ | 0.6819 | 15.2027 | 1125 | 0.7847 |
428
+ | 0.627 | 15.2432 | 1128 | 0.7884 |
429
+ | 0.7899 | 15.2838 | 1131 | 0.7823 |
430
+ | 0.8729 | 15.3243 | 1134 | 0.7810 |
431
+ | 0.5565 | 15.3649 | 1137 | 0.7767 |
432
+ | 0.65 | 15.4054 | 1140 | 0.7798 |
433
+ | 0.8706 | 15.4459 | 1143 | 0.7759 |
434
+ | 0.6454 | 15.4865 | 1146 | 0.7752 |
435
+ | 0.4782 | 15.5270 | 1149 | 0.7768 |
436
+ | 0.5503 | 15.5676 | 1152 | 0.7770 |
437
+ | 0.6918 | 15.6081 | 1155 | 0.7785 |
438
+ | 0.6653 | 15.6486 | 1158 | 0.7786 |
439
+ | 0.9263 | 15.6892 | 1161 | 0.7767 |
440
+ | 0.555 | 15.7297 | 1164 | 0.7771 |
441
+ | 0.6206 | 15.7703 | 1167 | 0.7828 |
442
+ | 0.8373 | 15.8108 | 1170 | 0.7792 |
443
+ | 0.6903 | 15.8514 | 1173 | 0.7757 |
444
+ | 0.6952 | 15.8919 | 1176 | 0.7767 |
445
+ | 0.6991 | 15.9324 | 1179 | 0.7761 |
446
+ | 0.6659 | 15.9730 | 1182 | 0.7763 |
447
+ | 0.6286 | 16.0135 | 1185 | 0.7761 |
448
+ | 0.5775 | 16.0541 | 1188 | 0.7767 |
449
+ | 0.6576 | 16.0946 | 1191 | 0.7734 |
450
+ | 0.6216 | 16.1351 | 1194 | 0.7765 |
451
+ | 0.6129 | 16.1757 | 1197 | 0.7764 |
452
+ | 0.7025 | 16.2162 | 1200 | 0.7759 |
453
+ | 0.8607 | 16.2568 | 1203 | 0.7742 |
454
+ | 0.4131 | 16.2973 | 1206 | 0.7718 |
455
+ | 0.6074 | 16.3378 | 1209 | 0.7712 |
456
+ | 0.6138 | 16.3784 | 1212 | 0.7735 |
457
+ | 0.6341 | 16.4189 | 1215 | 0.7719 |
458
+ | 0.6118 | 16.4595 | 1218 | 0.7723 |
459
+ | 0.7456 | 16.5 | 1221 | 0.7731 |
460
+ | 0.7303 | 16.5405 | 1224 | 0.7744 |
461
+ | 0.9041 | 16.5811 | 1227 | 0.7728 |
462
+ | 0.6858 | 16.6216 | 1230 | 0.7696 |
463
+ | 0.7813 | 16.6622 | 1233 | 0.7709 |
464
+ | 0.6222 | 16.7027 | 1236 | 0.7718 |
465
+ | 0.756 | 16.7432 | 1239 | 0.7699 |
466
+ | 0.6971 | 16.7838 | 1242 | 0.7691 |
467
+ | 0.685 | 16.8243 | 1245 | 0.7671 |
468
+ | 0.8077 | 16.8649 | 1248 | 0.7700 |
469
+ | 0.5113 | 16.9054 | 1251 | 0.7725 |
470
+ | 0.863 | 16.9459 | 1254 | 0.7723 |
471
+ | 0.5907 | 16.9865 | 1257 | 0.7732 |
472
+ | 0.7326 | 17.0270 | 1260 | 0.7705 |
473
+ | 0.6305 | 17.0676 | 1263 | 0.7704 |
474
+ | 0.5333 | 17.1081 | 1266 | 0.7696 |
475
+ | 0.6263 | 17.1486 | 1269 | 0.7736 |
476
+ | 0.6031 | 17.1892 | 1272 | 0.7688 |
477
+ | 0.7785 | 17.2297 | 1275 | 0.7714 |
478
+ | 0.6821 | 17.2703 | 1278 | 0.7698 |
479
+ | 0.7371 | 17.3108 | 1281 | 0.7650 |
480
+ | 0.53 | 17.3514 | 1284 | 0.7681 |
481
+ | 0.7413 | 17.3919 | 1287 | 0.7674 |
482
+ | 0.5742 | 17.4324 | 1290 | 0.7650 |
483
+ | 0.5972 | 17.4730 | 1293 | 0.7647 |
484
+ | 0.6501 | 17.5135 | 1296 | 0.7638 |
485
+ | 0.7033 | 17.5541 | 1299 | 0.7623 |
486
+ | 0.9062 | 17.5946 | 1302 | 0.7605 |
487
+ | 0.6346 | 17.6351 | 1305 | 0.7623 |
488
+ | 0.6128 | 17.6757 | 1308 | 0.7610 |
489
+ | 0.5238 | 17.7162 | 1311 | 0.7649 |
490
+ | 0.7174 | 17.7568 | 1314 | 0.7638 |
491
+ | 0.5736 | 17.7973 | 1317 | 0.7613 |
492
+ | 0.6981 | 17.8378 | 1320 | 0.7663 |
493
+ | 0.7606 | 17.8784 | 1323 | 0.7639 |
494
+ | 0.8921 | 17.9189 | 1326 | 0.7627 |
495
+ | 0.8137 | 17.9595 | 1329 | 0.7596 |
496
+ | 0.5805 | 18.0 | 1332 | 0.7611 |
497
+ | 0.7555 | 18.0405 | 1335 | 0.7630 |
498
+ | 0.7198 | 18.0811 | 1338 | 0.7657 |
499
+ | 0.6985 | 18.1216 | 1341 | 0.7668 |
500
+ | 0.751 | 18.1622 | 1344 | 0.7647 |
501
+ | 0.6312 | 18.2027 | 1347 | 0.7666 |
502
+ | 0.5514 | 18.2432 | 1350 | 0.7633 |
503
+ | 0.7886 | 18.2838 | 1353 | 0.7598 |
504
+ | 0.6188 | 18.3243 | 1356 | 0.7591 |
505
+ | 0.9161 | 18.3649 | 1359 | 0.7609 |
506
+ | 0.7103 | 18.4054 | 1362 | 0.7597 |
507
+ | 0.5801 | 18.4459 | 1365 | 0.7600 |
508
+ | 0.5811 | 18.4865 | 1368 | 0.7568 |
509
+ | 0.7084 | 18.5270 | 1371 | 0.7583 |
510
+ | 0.6469 | 18.5676 | 1374 | 0.7567 |
511
+ | 0.4795 | 18.6081 | 1377 | 0.7577 |
512
+ | 0.5333 | 18.6486 | 1380 | 0.7608 |
513
+ | 0.6912 | 18.6892 | 1383 | 0.7599 |
514
+ | 0.5693 | 18.7297 | 1386 | 0.7605 |
515
+ | 0.765 | 18.7703 | 1389 | 0.7586 |
516
+ | 0.6908 | 18.8108 | 1392 | 0.7576 |
517
+ | 0.554 | 18.8514 | 1395 | 0.7588 |
518
+ | 0.5517 | 18.8919 | 1398 | 0.7593 |
519
+ | 0.6006 | 18.9324 | 1401 | 0.7624 |
520
+ | 0.8661 | 18.9730 | 1404 | 0.7610 |
521
+ | 0.5183 | 19.0135 | 1407 | 0.7586 |
522
+ | 0.7951 | 19.0541 | 1410 | 0.7585 |
523
+ | 0.6838 | 19.0946 | 1413 | 0.7561 |
524
+ | 0.7278 | 19.1351 | 1416 | 0.7545 |
525
+ | 0.6053 | 19.1757 | 1419 | 0.7541 |
526
+ | 0.5119 | 19.2162 | 1422 | 0.7537 |
527
+ | 0.5743 | 19.2568 | 1425 | 0.7566 |
528
+ | 0.8562 | 19.2973 | 1428 | 0.7532 |
529
+ | 0.4632 | 19.3378 | 1431 | 0.7539 |
530
+ | 0.8029 | 19.3784 | 1434 | 0.7568 |
531
+ | 0.8046 | 19.4189 | 1437 | 0.7553 |
532
+ | 0.6308 | 19.4595 | 1440 | 0.7547 |
533
+ | 0.5366 | 19.5 | 1443 | 0.7549 |
534
+ | 0.5249 | 19.5405 | 1446 | 0.7557 |
535
+ | 0.7144 | 19.5811 | 1449 | 0.7550 |
536
+ | 0.7054 | 19.6216 | 1452 | 0.7563 |
537
+ | 0.6575 | 19.6622 | 1455 | 0.7541 |
538
+ | 0.5057 | 19.7027 | 1458 | 0.7544 |
539
+ | 0.6339 | 19.7432 | 1461 | 0.7568 |
540
+ | 0.5965 | 19.7838 | 1464 | 0.7566 |
541
+ | 1.0225 | 19.8243 | 1467 | 0.7582 |
542
+ | 0.5466 | 19.8649 | 1470 | 0.7534 |
543
+ | 0.5946 | 19.9054 | 1473 | 0.7574 |
544
+ | 0.7015 | 19.9459 | 1476 | 0.7558 |
545
+ | 0.6772 | 19.9865 | 1479 | 0.7527 |
546
+ | 0.628 | 20.0270 | 1482 | 0.7520 |
547
+ | 0.8432 | 20.0676 | 1485 | 0.7546 |
548
+ | 0.5012 | 20.1081 | 1488 | 0.7524 |
549
+ | 0.7843 | 20.1486 | 1491 | 0.7560 |
550
+ | 0.6263 | 20.1892 | 1494 | 0.7533 |
551
+ | 0.5751 | 20.2297 | 1497 | 0.7530 |
552
+ | 0.5538 | 20.2703 | 1500 | 0.7574 |
553
+ | 0.4866 | 20.3108 | 1503 | 0.7526 |
554
+ | 0.5303 | 20.3514 | 1506 | 0.7558 |
555
+ | 0.8348 | 20.3919 | 1509 | 0.7519 |
556
+ | 0.6568 | 20.4324 | 1512 | 0.7540 |
557
+ | 0.587 | 20.4730 | 1515 | 0.7505 |
558
+ | 0.5837 | 20.5135 | 1518 | 0.7523 |
559
+ | 0.7225 | 20.5541 | 1521 | 0.7493 |
560
+ | 0.6323 | 20.5946 | 1524 | 0.7533 |
561
+ | 0.7393 | 20.6351 | 1527 | 0.7542 |
562
+ | 0.9057 | 20.6757 | 1530 | 0.7537 |
563
+ | 0.5939 | 20.7162 | 1533 | 0.7554 |
564
+ | 0.7613 | 20.7568 | 1536 | 0.7528 |
565
+ | 0.4678 | 20.7973 | 1539 | 0.7524 |
566
+ | 0.6316 | 20.8378 | 1542 | 0.7501 |
567
+ | 0.5362 | 20.8784 | 1545 | 0.7522 |
568
+ | 0.653 | 20.9189 | 1548 | 0.7495 |
569
+ | 0.8229 | 20.9595 | 1551 | 0.7499 |
570
+ | 0.5539 | 21.0 | 1554 | 0.7486 |
571
+ | 0.586 | 21.0405 | 1557 | 0.7498 |
572
+ | 0.5707 | 21.0811 | 1560 | 0.7505 |
573
+ | 0.6017 | 21.1216 | 1563 | 0.7497 |
574
+ | 0.5576 | 21.1622 | 1566 | 0.7489 |
575
+ | 0.7408 | 21.2027 | 1569 | 0.7503 |
576
+ | 0.542 | 21.2432 | 1572 | 0.7490 |
577
+ | 0.576 | 21.2838 | 1575 | 0.7478 |
578
+ | 0.5323 | 21.3243 | 1578 | 0.7525 |
579
+ | 0.6224 | 21.3649 | 1581 | 0.7499 |
580
+ | 0.5921 | 21.4054 | 1584 | 0.7521 |
581
+ | 0.5888 | 21.4459 | 1587 | 0.7480 |
582
+ | 0.493 | 21.4865 | 1590 | 0.7492 |
583
+ | 0.6878 | 21.5270 | 1593 | 0.7491 |
584
+ | 0.5939 | 21.5676 | 1596 | 0.7487 |
585
+ | 0.6568 | 21.6081 | 1599 | 0.7464 |
586
+ | 0.7762 | 21.6486 | 1602 | 0.7491 |
587
+ | 0.7653 | 21.6892 | 1605 | 0.7495 |
588
+ | 1.048 | 21.7297 | 1608 | 0.7442 |
589
+ | 0.4252 | 21.7703 | 1611 | 0.7448 |
590
+ | 0.7077 | 21.8108 | 1614 | 0.7462 |
591
+ | 0.7929 | 21.8514 | 1617 | 0.7441 |
592
+ | 0.7359 | 21.8919 | 1620 | 0.7431 |
593
+ | 0.579 | 21.9324 | 1623 | 0.7404 |
594
+ | 0.5986 | 21.9730 | 1626 | 0.7424 |
595
+ | 0.6294 | 22.0135 | 1629 | 0.7483 |
596
+ | 0.6028 | 22.0541 | 1632 | 0.7487 |
597
+ | 0.71 | 22.0946 | 1635 | 0.7486 |
598
+ | 0.7272 | 22.1351 | 1638 | 0.7474 |
599
+ | 0.7421 | 22.1757 | 1641 | 0.7484 |
600
+ | 0.6425 | 22.2162 | 1644 | 0.7485 |
601
+ | 0.7581 | 22.2568 | 1647 | 0.7454 |
602
+ | 0.6693 | 22.2973 | 1650 | 0.7461 |
603
+ | 0.7514 | 22.3378 | 1653 | 0.7439 |
604
+ | 0.4701 | 22.3784 | 1656 | 0.7446 |
605
+ | 0.5123 | 22.4189 | 1659 | 0.7474 |
606
+ | 0.5777 | 22.4595 | 1662 | 0.7448 |
607
+ | 0.611 | 22.5 | 1665 | 0.7442 |
608
+ | 0.5888 | 22.5405 | 1668 | 0.7456 |
609
+ | 0.5861 | 22.5811 | 1671 | 0.7462 |
610
+ | 0.7648 | 22.6216 | 1674 | 0.7442 |
611
+ | 0.5827 | 22.6622 | 1677 | 0.7464 |
612
+ | 0.7285 | 22.7027 | 1680 | 0.7477 |
613
+ | 0.5688 | 22.7432 | 1683 | 0.7465 |
614
+ | 0.65 | 22.7838 | 1686 | 0.7466 |
615
+ | 0.4758 | 22.8243 | 1689 | 0.7461 |
616
+ | 0.6542 | 22.8649 | 1692 | 0.7446 |
617
+ | 0.5368 | 22.9054 | 1695 | 0.7479 |
618
+ | 0.8234 | 22.9459 | 1698 | 0.7471 |
619
+ | 0.5605 | 22.9865 | 1701 | 0.7457 |
620
+ | 0.6373 | 23.0270 | 1704 | 0.7468 |
621
+ | 0.5194 | 23.0676 | 1707 | 0.7462 |
622
+ | 0.7406 | 23.1081 | 1710 | 0.7447 |
623
+ | 0.5238 | 23.1486 | 1713 | 0.7434 |
624
+ | 0.6407 | 23.1892 | 1716 | 0.7431 |
625
+ | 0.4933 | 23.2297 | 1719 | 0.7436 |
626
+ | 0.8002 | 23.2703 | 1722 | 0.7450 |
627
+ | 0.6489 | 23.3108 | 1725 | 0.7458 |
628
+ | 0.6325 | 23.3514 | 1728 | 0.7433 |
629
+ | 0.5101 | 23.3919 | 1731 | 0.7440 |
630
+ | 0.6503 | 23.4324 | 1734 | 0.7442 |
631
+ | 0.8555 | 23.4730 | 1737 | 0.7455 |
632
+ | 0.4222 | 23.5135 | 1740 | 0.7447 |
633
+ | 0.843 | 23.5541 | 1743 | 0.7446 |
634
+ | 0.8313 | 23.5946 | 1746 | 0.7441 |
635
+ | 0.7178 | 23.6351 | 1749 | 0.7439 |
636
+ | 0.6409 | 23.6757 | 1752 | 0.7431 |
637
+ | 0.6184 | 23.7162 | 1755 | 0.7437 |
638
+ | 0.5277 | 23.7568 | 1758 | 0.7432 |
639
+ | 0.5987 | 23.7973 | 1761 | 0.7445 |
640
+ | 0.7011 | 23.8378 | 1764 | 0.7407 |
641
+ | 0.6693 | 23.8784 | 1767 | 0.7429 |
642
+ | 0.6101 | 23.9189 | 1770 | 0.7423 |
643
+ | 0.5883 | 23.9595 | 1773 | 0.7437 |
644
+ | 0.4803 | 24.0 | 1776 | 0.7473 |
645
+ | 0.6667 | 24.0405 | 1779 | 0.7445 |
646
+ | 0.6156 | 24.0811 | 1782 | 0.7445 |
647
+ | 0.7909 | 24.1216 | 1785 | 0.7416 |
648
+ | 0.449 | 24.1622 | 1788 | 0.7397 |
649
+ | 0.6064 | 24.2027 | 1791 | 0.7412 |
650
+ | 0.5522 | 24.2432 | 1794 | 0.7404 |
651
+ | 0.5244 | 24.2838 | 1797 | 0.7440 |
652
+ | 0.6211 | 24.3243 | 1800 | 0.7421 |
653
+ | 0.572 | 24.3649 | 1803 | 0.7422 |
654
+ | 0.7 | 24.4054 | 1806 | 0.7425 |
655
+ | 0.6095 | 24.4459 | 1809 | 0.7447 |
656
+ | 0.9109 | 24.4865 | 1812 | 0.7447 |
657
+ | 0.5852 | 24.5270 | 1815 | 0.7445 |
658
+ | 0.8397 | 24.5676 | 1818 | 0.7408 |
659
+ | 0.6186 | 24.6081 | 1821 | 0.7393 |
660
+ | 0.7605 | 24.6486 | 1824 | 0.7419 |
661
+ | 0.4131 | 24.6892 | 1827 | 0.7393 |
662
+ | 0.8149 | 24.7297 | 1830 | 0.7434 |
663
+ | 0.5877 | 24.7703 | 1833 | 0.7435 |
664
+ | 0.5594 | 24.8108 | 1836 | 0.7420 |
665
+ | 0.6607 | 24.8514 | 1839 | 0.7433 |
666
+ | 0.5369 | 24.8919 | 1842 | 0.7437 |
667
+ | 0.5268 | 24.9324 | 1845 | 0.7410 |
668
+ | 0.5273 | 24.9730 | 1848 | 0.7440 |
669
+ | 0.6146 | 25.0135 | 1851 | 0.7427 |
670
+ | 0.8054 | 25.0541 | 1854 | 0.7440 |
671
+ | 0.8785 | 25.0946 | 1857 | 0.7451 |
672
+ | 0.7208 | 25.1351 | 1860 | 0.7450 |
673
+ | 0.6592 | 25.1757 | 1863 | 0.7434 |
674
+ | 0.7174 | 25.2162 | 1866 | 0.7430 |
675
+ | 0.5552 | 25.2568 | 1869 | 0.7398 |
676
+ | 0.7374 | 25.2973 | 1872 | 0.7411 |
677
+ | 0.6694 | 25.3378 | 1875 | 0.7400 |
678
+ | 0.5145 | 25.3784 | 1878 | 0.7406 |
679
+ | 0.6904 | 25.4189 | 1881 | 0.7405 |
680
+ | 0.6441 | 25.4595 | 1884 | 0.7425 |
681
+ | 0.4699 | 25.5 | 1887 | 0.7410 |
682
+ | 0.6629 | 25.5405 | 1890 | 0.7400 |
683
+ | 0.5895 | 25.5811 | 1893 | 0.7434 |
684
+ | 0.6364 | 25.6216 | 1896 | 0.7407 |
685
+ | 0.7567 | 25.6622 | 1899 | 0.7440 |
686
+ | 0.7204 | 25.7027 | 1902 | 0.7409 |
687
+ | 0.5391 | 25.7432 | 1905 | 0.7427 |
688
+ | 0.5649 | 25.7838 | 1908 | 0.7411 |
689
+ | 0.7254 | 25.8243 | 1911 | 0.7401 |
690
+ | 0.5305 | 25.8649 | 1914 | 0.7390 |
691
+ | 0.523 | 25.9054 | 1917 | 0.7389 |
692
+ | 0.4508 | 25.9459 | 1920 | 0.7394 |
693
+ | 0.4191 | 25.9865 | 1923 | 0.7414 |
694
+ | 0.5966 | 26.0270 | 1926 | 0.7438 |
695
+ | 0.6259 | 26.0676 | 1929 | 0.7404 |
696
+ | 0.5717 | 26.1081 | 1932 | 0.7429 |
697
+ | 0.5566 | 26.1486 | 1935 | 0.7413 |
698
+ | 0.612 | 26.1892 | 1938 | 0.7405 |
699
+ | 0.7154 | 26.2297 | 1941 | 0.7419 |
700
+ | 0.7489 | 26.2703 | 1944 | 0.7417 |
701
+ | 0.5922 | 26.3108 | 1947 | 0.7421 |
702
+ | 0.6247 | 26.3514 | 1950 | 0.7420 |
703
+ | 0.5243 | 26.3919 | 1953 | 0.7407 |
704
+ | 1.0104 | 26.4324 | 1956 | 0.7427 |
705
+ | 0.516 | 26.4730 | 1959 | 0.7419 |
706
+ | 0.5302 | 26.5135 | 1962 | 0.7393 |
707
+ | 0.742 | 26.5541 | 1965 | 0.7415 |
708
+ | 0.5358 | 26.5946 | 1968 | 0.7421 |
709
+ | 0.4858 | 26.6351 | 1971 | 0.7450 |
710
+ | 0.6545 | 26.6757 | 1974 | 0.7425 |
711
+ | 0.6161 | 26.7162 | 1977 | 0.7417 |
712
+ | 0.7481 | 26.7568 | 1980 | 0.7404 |
713
+ | 0.5783 | 26.7973 | 1983 | 0.7436 |
714
+ | 0.457 | 26.8378 | 1986 | 0.7422 |
715
+ | 0.5594 | 26.8784 | 1989 | 0.7424 |
716
+ | 0.609 | 26.9189 | 1992 | 0.7435 |
717
+ | 0.6018 | 26.9595 | 1995 | 0.7434 |
718
+ | 0.7986 | 27.0 | 1998 | 0.7404 |
719
+ | 0.7093 | 27.0405 | 2001 | 0.7419 |
720
+ | 0.8197 | 27.0811 | 2004 | 0.7427 |
721
+ | 0.5748 | 27.1216 | 2007 | 0.7434 |
722
+ | 0.446 | 27.1622 | 2010 | 0.7386 |
723
+ | 0.5958 | 27.2027 | 2013 | 0.7387 |
724
+ | 0.7049 | 27.2432 | 2016 | 0.7400 |
725
+ | 0.6765 | 27.2838 | 2019 | 0.7419 |
726
+ | 0.5303 | 27.3243 | 2022 | 0.7398 |
727
+ | 0.7938 | 27.3649 | 2025 | 0.7409 |
728
+ | 0.5543 | 27.4054 | 2028 | 0.7388 |
729
+ | 0.9195 | 27.4459 | 2031 | 0.7379 |
730
+ | 0.6682 | 27.4865 | 2034 | 0.7404 |
731
+ | 0.4695 | 27.5270 | 2037 | 0.7394 |
732
+ | 0.5242 | 27.5676 | 2040 | 0.7410 |
733
+ | 0.7289 | 27.6081 | 2043 | 0.7399 |
734
+ | 0.5307 | 27.6486 | 2046 | 0.7396 |
735
+ | 0.5228 | 27.6892 | 2049 | 0.7408 |
736
+ | 0.5756 | 27.7297 | 2052 | 0.7400 |
737
+ | 0.5873 | 27.7703 | 2055 | 0.7404 |
738
+ | 0.5304 | 27.8108 | 2058 | 0.7376 |
739
+ | 0.6231 | 27.8514 | 2061 | 0.7370 |
740
+ | 0.6331 | 27.8919 | 2064 | 0.7389 |
741
+ | 0.6596 | 27.9324 | 2067 | 0.7410 |
742
+ | 0.6554 | 27.9730 | 2070 | 0.7415 |
743
+ | 0.4172 | 28.0135 | 2073 | 0.7400 |
744
+ | 0.8645 | 28.0541 | 2076 | 0.7394 |
745
+ | 0.5343 | 28.0946 | 2079 | 0.7404 |
746
+ | 0.6244 | 28.1351 | 2082 | 0.7403 |
747
+ | 0.7855 | 28.1757 | 2085 | 0.7382 |
748
+ | 0.5522 | 28.2162 | 2088 | 0.7408 |
749
+ | 0.5947 | 28.2568 | 2091 | 0.7402 |
750
+ | 0.7008 | 28.2973 | 2094 | 0.7388 |
751
+ | 0.7432 | 28.3378 | 2097 | 0.7404 |
752
+ | 0.3971 | 28.3784 | 2100 | 0.7404 |
753
+ | 0.5977 | 28.4189 | 2103 | 0.7413 |
754
+ | 0.6171 | 28.4595 | 2106 | 0.7421 |
755
+ | 0.63 | 28.5 | 2109 | 0.7418 |
756
+ | 0.6669 | 28.5405 | 2112 | 0.7402 |
757
+ | 0.739 | 28.5811 | 2115 | 0.7413 |
758
+ | 0.7795 | 28.6216 | 2118 | 0.7426 |
759
+ | 0.49 | 28.6622 | 2121 | 0.7389 |
760
+ | 0.5843 | 28.7027 | 2124 | 0.7412 |
761
+ | 0.3649 | 28.7432 | 2127 | 0.7419 |
762
+ | 0.5686 | 28.7838 | 2130 | 0.7379 |
763
+ | 0.8602 | 28.8243 | 2133 | 0.7396 |
764
+ | 0.6258 | 28.8649 | 2136 | 0.7380 |
765
+ | 0.5918 | 28.9054 | 2139 | 0.7397 |
766
+ | 0.5472 | 28.9459 | 2142 | 0.7382 |
767
+ | 0.5999 | 28.9865 | 2145 | 0.7389 |
768
+ | 0.637 | 29.0270 | 2148 | 0.7395 |
769
+ | 0.7309 | 29.0676 | 2151 | 0.7386 |
770
+ | 0.5701 | 29.1081 | 2154 | 0.7396 |
771
+ | 0.5048 | 29.1486 | 2157 | 0.7398 |
772
+ | 0.4754 | 29.1892 | 2160 | 0.7370 |
773
+ | 0.5792 | 29.2297 | 2163 | 0.7381 |
774
+ | 0.7897 | 29.2703 | 2166 | 0.7387 |
775
+ | 0.6935 | 29.3108 | 2169 | 0.7407 |
776
+ | 0.7357 | 29.3514 | 2172 | 0.7412 |
777
+ | 0.5114 | 29.3919 | 2175 | 0.7389 |
778
+ | 0.7875 | 29.4324 | 2178 | 0.7399 |
779
+ | 0.5673 | 29.4730 | 2181 | 0.7385 |
780
+ | 0.6423 | 29.5135 | 2184 | 0.7382 |
781
+ | 0.7393 | 29.5541 | 2187 | 0.7396 |
782
+ | 0.5183 | 29.5946 | 2190 | 0.7395 |
783
+ | 0.5972 | 29.6351 | 2193 | 0.7389 |
784
+ | 0.6461 | 29.6757 | 2196 | 0.7398 |
785
+ | 0.427 | 29.7162 | 2199 | 0.7396 |
786
+ | 0.7357 | 29.7568 | 2202 | 0.7381 |
787
+ | 0.603 | 29.7973 | 2205 | 0.7394 |
788
+ | 0.6142 | 29.8378 | 2208 | 0.7406 |
789
+ | 0.4877 | 29.8784 | 2211 | 0.7408 |
790
+ | 0.7774 | 29.9189 | 2214 | 0.7401 |
791
+ | 0.6909 | 29.9595 | 2217 | 0.7413 |
792
+ | 0.5127 | 30.0 | 2220 | 0.7398 |
793
+ | 0.4667 | 30.0405 | 2223 | 0.7414 |
794
+ | 0.5583 | 30.0811 | 2226 | 0.7412 |
795
+ | 0.6727 | 30.1216 | 2229 | 0.7378 |
796
+ | 0.5809 | 30.1622 | 2232 | 0.7419 |
797
+ | 0.6331 | 30.2027 | 2235 | 0.7411 |
798
+ | 0.5906 | 30.2432 | 2238 | 0.7416 |
799
+ | 0.7849 | 30.2838 | 2241 | 0.7441 |
800
+ | 0.5856 | 30.3243 | 2244 | 0.7413 |
801
+ | 0.5886 | 30.3649 | 2247 | 0.7389 |
802
+ | 0.5763 | 30.4054 | 2250 | 0.7402 |
803
+ | 0.5239 | 30.4459 | 2253 | 0.7397 |
804
+ | 0.614 | 30.4865 | 2256 | 0.7412 |
805
+ | 0.6579 | 30.5270 | 2259 | 0.7418 |
806
+ | 0.7466 | 30.5676 | 2262 | 0.7408 |
807
+ | 0.7607 | 30.6081 | 2265 | 0.7414 |
808
+ | 0.6251 | 30.6486 | 2268 | 0.7414 |
809
+ | 0.9175 | 30.6892 | 2271 | 0.7396 |
810
+ | 0.5611 | 30.7297 | 2274 | 0.7390 |
811
+ | 0.7051 | 30.7703 | 2277 | 0.7403 |
812
+ | 0.6331 | 30.8108 | 2280 | 0.7381 |
813
+ | 0.5536 | 30.8514 | 2283 | 0.7380 |
814
+ | 0.6293 | 30.8919 | 2286 | 0.7393 |
815
+ | 0.4456 | 30.9324 | 2289 | 0.7400 |
816
+ | 0.5521 | 30.9730 | 2292 | 0.7393 |
817
+ | 0.5012 | 31.0135 | 2295 | 0.7403 |
818
+ | 0.5571 | 31.0541 | 2298 | 0.7413 |
819
+ | 0.6326 | 31.0946 | 2301 | 0.7420 |
820
+ | 0.7587 | 31.1351 | 2304 | 0.7394 |
821
+ | 0.6534 | 31.1757 | 2307 | 0.7400 |
822
+ | 0.5276 | 31.2162 | 2310 | 0.7402 |
823
+ | 0.6514 | 31.2568 | 2313 | 0.7392 |
824
+ | 0.8247 | 31.2973 | 2316 | 0.7392 |
825
+ | 0.7972 | 31.3378 | 2319 | 0.7409 |
826
+ | 0.4689 | 31.3784 | 2322 | 0.7420 |
827
+ | 0.5428 | 31.4189 | 2325 | 0.7398 |
828
+ | 0.7894 | 31.4595 | 2328 | 0.7409 |
829
+ | 0.5278 | 31.5 | 2331 | 0.7411 |
830
+ | 0.5384 | 31.5405 | 2334 | 0.7419 |
831
+ | 0.5847 | 31.5811 | 2337 | 0.7414 |
832
+ | 0.5634 | 31.6216 | 2340 | 0.7415 |
833
+ | 0.6 | 31.6622 | 2343 | 0.7418 |
834
+ | 0.7443 | 31.7027 | 2346 | 0.7425 |
835
+ | 0.5771 | 31.7432 | 2349 | 0.7392 |
836
+ | 0.6404 | 31.7838 | 2352 | 0.7381 |
837
+ | 0.5441 | 31.8243 | 2355 | 0.7406 |
838
+ | 0.5123 | 31.8649 | 2358 | 0.7375 |
839
+ | 0.7296 | 31.9054 | 2361 | 0.7383 |
840
+ | 0.4944 | 31.9459 | 2364 | 0.7399 |
841
+ | 0.7002 | 31.9865 | 2367 | 0.7387 |
842
 
843
 
844
  ### Framework versions
adapter_config.json CHANGED
@@ -11,7 +11,7 @@
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 32,
14
- "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
@@ -20,13 +20,8 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "down_proj",
24
- "gate_proj",
25
- "o_proj",
26
- "up_proj",
27
  "q_proj",
28
- "v_proj",
29
- "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 32,
14
+ "lora_dropout": 0.01,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
 
23
  "q_proj",
24
+ "v_proj"
 
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b036d3c4044bc3c7152cfdc35c1edc67b951d0c35baaea7351ca546fb5e6283
3
- size 35237104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:476a889673f1c2afe73af44a62f4f650acafa30b0fadec371509f3c66d8a583b
3
+ size 4338000
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 8.0,
3
+ "eval_loss": 0.7665330171585083,
4
+ "eval_runtime": 1.0749,
5
+ "eval_samples": 2,
6
+ "eval_samples_per_second": 14.885,
7
+ "eval_steps_per_second": 3.721,
8
+ "total_flos": 1188976147968000.0,
9
+ "train_loss": 0.4161318518926163,
10
+ "train_runtime": 741.424,
11
+ "train_samples_per_second": 3.194,
12
+ "train_steps_per_second": 0.798
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 8.0,
3
+ "eval_loss": 0.7665330171585083,
4
+ "eval_runtime": 1.0749,
5
+ "eval_samples": 2,
6
+ "eval_samples_per_second": 14.885,
7
+ "eval_steps_per_second": 3.721
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 8.0,
3
+ "total_flos": 1188976147968000.0,
4
+ "train_loss": 0.4161318518926163,
5
+ "train_runtime": 741.424,
6
+ "train_samples_per_second": 3.194,
7
+ "train_steps_per_second": 0.798
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,2997 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 8.0,
5
+ "eval_steps": 3,
6
+ "global_step": 592,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04054054054054054,
13
+ "grad_norm": 5.807250022888184,
14
+ "learning_rate": 5e-05,
15
+ "loss": 3.1119,
16
+ "step": 3
17
+ },
18
+ {
19
+ "epoch": 0.04054054054054054,
20
+ "eval_loss": 3.1016640663146973,
21
+ "eval_runtime": 1.0551,
22
+ "eval_samples_per_second": 15.164,
23
+ "eval_steps_per_second": 3.791,
24
+ "step": 3
25
+ },
26
+ {
27
+ "epoch": 0.08108108108108109,
28
+ "grad_norm": 4.004100322723389,
29
+ "learning_rate": 0.0001,
30
+ "loss": 2.8734,
31
+ "step": 6
32
+ },
33
+ {
34
+ "epoch": 0.08108108108108109,
35
+ "eval_loss": 2.6094236373901367,
36
+ "eval_runtime": 1.0592,
37
+ "eval_samples_per_second": 15.106,
38
+ "eval_steps_per_second": 3.777,
39
+ "step": 6
40
+ },
41
+ {
42
+ "epoch": 0.12162162162162163,
43
+ "grad_norm": 3.935053586959839,
44
+ "learning_rate": 9.999353337510526e-05,
45
+ "loss": 2.4188,
46
+ "step": 9
47
+ },
48
+ {
49
+ "epoch": 0.12162162162162163,
50
+ "eval_loss": 2.1545872688293457,
51
+ "eval_runtime": 1.0511,
52
+ "eval_samples_per_second": 15.222,
53
+ "eval_steps_per_second": 3.805,
54
+ "step": 9
55
+ },
56
+ {
57
+ "epoch": 0.16216216216216217,
58
+ "grad_norm": 5.741048812866211,
59
+ "learning_rate": 9.997413517311055e-05,
60
+ "loss": 1.9335,
61
+ "step": 12
62
+ },
63
+ {
64
+ "epoch": 0.16216216216216217,
65
+ "eval_loss": 1.786160945892334,
66
+ "eval_runtime": 1.0532,
67
+ "eval_samples_per_second": 15.192,
68
+ "eval_steps_per_second": 3.798,
69
+ "step": 12
70
+ },
71
+ {
72
+ "epoch": 0.20270270270270271,
73
+ "grad_norm": 4.155601978302002,
74
+ "learning_rate": 9.99418104116517e-05,
75
+ "loss": 1.5361,
76
+ "step": 15
77
+ },
78
+ {
79
+ "epoch": 0.20270270270270271,
80
+ "eval_loss": 1.4731855392456055,
81
+ "eval_runtime": 1.0511,
82
+ "eval_samples_per_second": 15.222,
83
+ "eval_steps_per_second": 3.805,
84
+ "step": 15
85
+ },
86
+ {
87
+ "epoch": 0.24324324324324326,
88
+ "grad_norm": 2.4831109046936035,
89
+ "learning_rate": 9.989656745201298e-05,
90
+ "loss": 1.314,
91
+ "step": 18
92
+ },
93
+ {
94
+ "epoch": 0.24324324324324326,
95
+ "eval_loss": 1.2790606021881104,
96
+ "eval_runtime": 1.0553,
97
+ "eval_samples_per_second": 15.161,
98
+ "eval_steps_per_second": 3.79,
99
+ "step": 18
100
+ },
101
+ {
102
+ "epoch": 0.28378378378378377,
103
+ "grad_norm": 1.9509971141815186,
104
+ "learning_rate": 9.983841799696438e-05,
105
+ "loss": 1.1747,
106
+ "step": 21
107
+ },
108
+ {
109
+ "epoch": 0.28378378378378377,
110
+ "eval_loss": 1.1653475761413574,
111
+ "eval_runtime": 1.0512,
112
+ "eval_samples_per_second": 15.221,
113
+ "eval_steps_per_second": 3.805,
114
+ "step": 21
115
+ },
116
+ {
117
+ "epoch": 0.32432432432432434,
118
+ "grad_norm": 2.245741367340088,
119
+ "learning_rate": 9.976737708773445e-05,
120
+ "loss": 1.1407,
121
+ "step": 24
122
+ },
123
+ {
124
+ "epoch": 0.32432432432432434,
125
+ "eval_loss": 1.110356092453003,
126
+ "eval_runtime": 1.0534,
127
+ "eval_samples_per_second": 15.188,
128
+ "eval_steps_per_second": 3.797,
129
+ "step": 24
130
+ },
131
+ {
132
+ "epoch": 0.36486486486486486,
133
+ "grad_norm": 2.0690531730651855,
134
+ "learning_rate": 9.968346310011964e-05,
135
+ "loss": 1.1734,
136
+ "step": 27
137
+ },
138
+ {
139
+ "epoch": 0.36486486486486486,
140
+ "eval_loss": 1.088733434677124,
141
+ "eval_runtime": 1.0508,
142
+ "eval_samples_per_second": 15.226,
143
+ "eval_steps_per_second": 3.806,
144
+ "step": 27
145
+ },
146
+ {
147
+ "epoch": 0.40540540540540543,
148
+ "grad_norm": 1.8963656425476074,
149
+ "learning_rate": 9.958669773973123e-05,
150
+ "loss": 1.0495,
151
+ "step": 30
152
+ },
153
+ {
154
+ "epoch": 0.40540540540540543,
155
+ "eval_loss": 1.0401344299316406,
156
+ "eval_runtime": 1.0528,
157
+ "eval_samples_per_second": 15.197,
158
+ "eval_steps_per_second": 3.799,
159
+ "step": 30
160
+ },
161
+ {
162
+ "epoch": 0.44594594594594594,
163
+ "grad_norm": 1.753909945487976,
164
+ "learning_rate": 9.947710603638078e-05,
165
+ "loss": 1.0401,
166
+ "step": 33
167
+ },
168
+ {
169
+ "epoch": 0.44594594594594594,
170
+ "eval_loss": 0.990611732006073,
171
+ "eval_runtime": 1.0507,
172
+ "eval_samples_per_second": 15.227,
173
+ "eval_steps_per_second": 3.807,
174
+ "step": 33
175
+ },
176
+ {
177
+ "epoch": 0.4864864864864865,
178
+ "grad_norm": 2.1073760986328125,
179
+ "learning_rate": 9.935471633760573e-05,
180
+ "loss": 1.0623,
181
+ "step": 36
182
+ },
183
+ {
184
+ "epoch": 0.4864864864864865,
185
+ "eval_loss": 0.9593618512153625,
186
+ "eval_runtime": 1.0535,
187
+ "eval_samples_per_second": 15.188,
188
+ "eval_steps_per_second": 3.797,
189
+ "step": 36
190
+ },
191
+ {
192
+ "epoch": 0.527027027027027,
193
+ "grad_norm": 1.5675249099731445,
194
+ "learning_rate": 9.921956030133701e-05,
195
+ "loss": 0.8152,
196
+ "step": 39
197
+ },
198
+ {
199
+ "epoch": 0.527027027027027,
200
+ "eval_loss": 0.9366932511329651,
201
+ "eval_runtime": 1.0514,
202
+ "eval_samples_per_second": 15.218,
203
+ "eval_steps_per_second": 3.805,
204
+ "step": 39
205
+ },
206
+ {
207
+ "epoch": 0.5675675675675675,
208
+ "grad_norm": 2.219888210296631,
209
+ "learning_rate": 9.907167288771019e-05,
210
+ "loss": 0.9261,
211
+ "step": 42
212
+ },
213
+ {
214
+ "epoch": 0.5675675675675675,
215
+ "eval_loss": 0.9247606992721558,
216
+ "eval_runtime": 1.0532,
217
+ "eval_samples_per_second": 15.192,
218
+ "eval_steps_per_second": 3.798,
219
+ "step": 42
220
+ },
221
+ {
222
+ "epoch": 0.6081081081081081,
223
+ "grad_norm": 1.6866446733474731,
224
+ "learning_rate": 9.891109235002249e-05,
225
+ "loss": 0.9469,
226
+ "step": 45
227
+ },
228
+ {
229
+ "epoch": 0.6081081081081081,
230
+ "eval_loss": 0.9134540557861328,
231
+ "eval_runtime": 1.0562,
232
+ "eval_samples_per_second": 15.149,
233
+ "eval_steps_per_second": 3.787,
234
+ "step": 45
235
+ },
236
+ {
237
+ "epoch": 0.6486486486486487,
238
+ "grad_norm": 1.7272800207138062,
239
+ "learning_rate": 9.8737860224838e-05,
240
+ "loss": 0.8381,
241
+ "step": 48
242
+ },
243
+ {
244
+ "epoch": 0.6486486486486487,
245
+ "eval_loss": 0.8871217370033264,
246
+ "eval_runtime": 1.0527,
247
+ "eval_samples_per_second": 15.199,
248
+ "eval_steps_per_second": 3.8,
249
+ "step": 48
250
+ },
251
+ {
252
+ "epoch": 0.6891891891891891,
253
+ "grad_norm": 2.6152303218841553,
254
+ "learning_rate": 9.855202132124365e-05,
255
+ "loss": 0.8456,
256
+ "step": 51
257
+ },
258
+ {
259
+ "epoch": 0.6891891891891891,
260
+ "eval_loss": 0.8553087711334229,
261
+ "eval_runtime": 1.0521,
262
+ "eval_samples_per_second": 15.208,
263
+ "eval_steps_per_second": 3.802,
264
+ "step": 51
265
+ },
266
+ {
267
+ "epoch": 0.7297297297297297,
268
+ "grad_norm": 1.8282960653305054,
269
+ "learning_rate": 9.835362370925868e-05,
270
+ "loss": 0.908,
271
+ "step": 54
272
+ },
273
+ {
274
+ "epoch": 0.7297297297297297,
275
+ "eval_loss": 0.8271682858467102,
276
+ "eval_runtime": 1.052,
277
+ "eval_samples_per_second": 15.21,
278
+ "eval_steps_per_second": 3.802,
279
+ "step": 54
280
+ },
281
+ {
282
+ "epoch": 0.7702702702702703,
283
+ "grad_norm": 2.466750383377075,
284
+ "learning_rate": 9.814271870740054e-05,
285
+ "loss": 0.999,
286
+ "step": 57
287
+ },
288
+ {
289
+ "epoch": 0.7702702702702703,
290
+ "eval_loss": 0.8151593208312988,
291
+ "eval_runtime": 1.0549,
292
+ "eval_samples_per_second": 15.167,
293
+ "eval_steps_per_second": 3.792,
294
+ "step": 57
295
+ },
296
+ {
297
+ "epoch": 0.8108108108108109,
298
+ "grad_norm": 1.8908120393753052,
299
+ "learning_rate": 9.791936086941064e-05,
300
+ "loss": 0.897,
301
+ "step": 60
302
+ },
303
+ {
304
+ "epoch": 0.8108108108108109,
305
+ "eval_loss": 0.8052847981452942,
306
+ "eval_runtime": 1.0512,
307
+ "eval_samples_per_second": 15.22,
308
+ "eval_steps_per_second": 3.805,
309
+ "step": 60
310
+ },
311
+ {
312
+ "epoch": 0.8513513513513513,
313
+ "grad_norm": 1.9563689231872559,
314
+ "learning_rate": 9.768360797014324e-05,
315
+ "loss": 0.8747,
316
+ "step": 63
317
+ },
318
+ {
319
+ "epoch": 0.8513513513513513,
320
+ "eval_loss": 0.7914941906929016,
321
+ "eval_runtime": 1.0519,
322
+ "eval_samples_per_second": 15.21,
323
+ "eval_steps_per_second": 3.803,
324
+ "step": 63
325
+ },
326
+ {
327
+ "epoch": 0.8918918918918919,
328
+ "grad_norm": 1.9292480945587158,
329
+ "learning_rate": 9.7435520990621e-05,
330
+ "loss": 1.0646,
331
+ "step": 66
332
+ },
333
+ {
334
+ "epoch": 0.8918918918918919,
335
+ "eval_loss": 0.7872657179832458,
336
+ "eval_runtime": 1.0526,
337
+ "eval_samples_per_second": 15.201,
338
+ "eval_steps_per_second": 3.8,
339
+ "step": 66
340
+ },
341
+ {
342
+ "epoch": 0.9324324324324325,
343
+ "grad_norm": 1.7248555421829224,
344
+ "learning_rate": 9.717516410226145e-05,
345
+ "loss": 0.6771,
346
+ "step": 69
347
+ },
348
+ {
349
+ "epoch": 0.9324324324324325,
350
+ "eval_loss": 0.7814666628837585,
351
+ "eval_runtime": 1.0522,
352
+ "eval_samples_per_second": 15.207,
353
+ "eval_steps_per_second": 3.802,
354
+ "step": 69
355
+ },
356
+ {
357
+ "epoch": 0.972972972972973,
358
+ "grad_norm": 2.171896457672119,
359
+ "learning_rate": 9.690260465027801e-05,
360
+ "loss": 0.9386,
361
+ "step": 72
362
+ },
363
+ {
364
+ "epoch": 0.972972972972973,
365
+ "eval_loss": 0.7634860873222351,
366
+ "eval_runtime": 1.0498,
367
+ "eval_samples_per_second": 15.241,
368
+ "eval_steps_per_second": 3.81,
369
+ "step": 72
370
+ },
371
+ {
372
+ "epoch": 1.0135135135135136,
373
+ "grad_norm": 1.625179409980774,
374
+ "learning_rate": 9.661791313626018e-05,
375
+ "loss": 0.6348,
376
+ "step": 75
377
+ },
378
+ {
379
+ "epoch": 1.0135135135135136,
380
+ "eval_loss": 0.75515216588974,
381
+ "eval_runtime": 1.0536,
382
+ "eval_samples_per_second": 15.186,
383
+ "eval_steps_per_second": 3.796,
384
+ "step": 75
385
+ },
386
+ {
387
+ "epoch": 1.054054054054054,
388
+ "grad_norm": 1.4293404817581177,
389
+ "learning_rate": 9.632116319993725e-05,
390
+ "loss": 0.5763,
391
+ "step": 78
392
+ },
393
+ {
394
+ "epoch": 1.054054054054054,
395
+ "eval_loss": 0.7473800182342529,
396
+ "eval_runtime": 1.0524,
397
+ "eval_samples_per_second": 15.203,
398
+ "eval_steps_per_second": 3.801,
399
+ "step": 78
400
+ },
401
+ {
402
+ "epoch": 1.0945945945945945,
403
+ "grad_norm": 1.9279707670211792,
404
+ "learning_rate": 9.601243160013023e-05,
405
+ "loss": 0.7059,
406
+ "step": 81
407
+ },
408
+ {
409
+ "epoch": 1.0945945945945945,
410
+ "eval_loss": 0.7430617213249207,
411
+ "eval_runtime": 1.0539,
412
+ "eval_samples_per_second": 15.181,
413
+ "eval_steps_per_second": 3.795,
414
+ "step": 81
415
+ },
416
+ {
417
+ "epoch": 1.135135135135135,
418
+ "grad_norm": 1.7644144296646118,
419
+ "learning_rate": 9.56917981948971e-05,
420
+ "loss": 0.6111,
421
+ "step": 84
422
+ },
423
+ {
424
+ "epoch": 1.135135135135135,
425
+ "eval_loss": 0.7393875122070312,
426
+ "eval_runtime": 1.0525,
427
+ "eval_samples_per_second": 15.202,
428
+ "eval_steps_per_second": 3.8,
429
+ "step": 84
430
+ },
431
+ {
432
+ "epoch": 1.1756756756756757,
433
+ "grad_norm": 1.4910467863082886,
434
+ "learning_rate": 9.535934592087627e-05,
435
+ "loss": 0.6937,
436
+ "step": 87
437
+ },
438
+ {
439
+ "epoch": 1.1756756756756757,
440
+ "eval_loss": 0.7415614724159241,
441
+ "eval_runtime": 1.0533,
442
+ "eval_samples_per_second": 15.191,
443
+ "eval_steps_per_second": 3.798,
444
+ "step": 87
445
+ },
446
+ {
447
+ "epoch": 1.2162162162162162,
448
+ "grad_norm": 1.989018440246582,
449
+ "learning_rate": 9.50151607718338e-05,
450
+ "loss": 0.6408,
451
+ "step": 90
452
+ },
453
+ {
454
+ "epoch": 1.2162162162162162,
455
+ "eval_loss": 0.7331891059875488,
456
+ "eval_runtime": 1.0504,
457
+ "eval_samples_per_second": 15.232,
458
+ "eval_steps_per_second": 3.808,
459
+ "step": 90
460
+ },
461
+ {
462
+ "epoch": 1.2567567567567568,
463
+ "grad_norm": 1.5546590089797974,
464
+ "learning_rate": 9.465933177641982e-05,
465
+ "loss": 0.5931,
466
+ "step": 93
467
+ },
468
+ {
469
+ "epoch": 1.2567567567567568,
470
+ "eval_loss": 0.7319458723068237,
471
+ "eval_runtime": 1.0532,
472
+ "eval_samples_per_second": 15.191,
473
+ "eval_steps_per_second": 3.798,
474
+ "step": 93
475
+ },
476
+ {
477
+ "epoch": 1.2972972972972974,
478
+ "grad_norm": 2.128746271133423,
479
+ "learning_rate": 9.429195097513993e-05,
480
+ "loss": 0.5792,
481
+ "step": 96
482
+ },
483
+ {
484
+ "epoch": 1.2972972972972974,
485
+ "eval_loss": 0.7179479598999023,
486
+ "eval_runtime": 1.0504,
487
+ "eval_samples_per_second": 15.232,
488
+ "eval_steps_per_second": 3.808,
489
+ "step": 96
490
+ },
491
+ {
492
+ "epoch": 1.3378378378378377,
493
+ "grad_norm": 2.069204092025757,
494
+ "learning_rate": 9.391311339654753e-05,
495
+ "loss": 0.5502,
496
+ "step": 99
497
+ },
498
+ {
499
+ "epoch": 1.3378378378378377,
500
+ "eval_loss": 0.7083268165588379,
501
+ "eval_runtime": 1.0531,
502
+ "eval_samples_per_second": 15.193,
503
+ "eval_steps_per_second": 3.798,
504
+ "step": 99
505
+ },
506
+ {
507
+ "epoch": 1.3783783783783785,
508
+ "grad_norm": 2.069469928741455,
509
+ "learning_rate": 9.352291703266331e-05,
510
+ "loss": 0.7356,
511
+ "step": 102
512
+ },
513
+ {
514
+ "epoch": 1.3783783783783785,
515
+ "eval_loss": 0.7048563957214355,
516
+ "eval_runtime": 1.0519,
517
+ "eval_samples_per_second": 15.21,
518
+ "eval_steps_per_second": 3.803,
519
+ "step": 102
520
+ },
521
+ {
522
+ "epoch": 1.4189189189189189,
523
+ "grad_norm": 1.507051706314087,
524
+ "learning_rate": 9.31214628136281e-05,
525
+ "loss": 0.5204,
526
+ "step": 105
527
+ },
528
+ {
529
+ "epoch": 1.4189189189189189,
530
+ "eval_loss": 0.6983195543289185,
531
+ "eval_runtime": 1.0543,
532
+ "eval_samples_per_second": 15.176,
533
+ "eval_steps_per_second": 3.794,
534
+ "step": 105
535
+ },
536
+ {
537
+ "epoch": 1.4594594594594594,
538
+ "grad_norm": 1.918865442276001,
539
+ "learning_rate": 9.270885458159575e-05,
540
+ "loss": 0.6132,
541
+ "step": 108
542
+ },
543
+ {
544
+ "epoch": 1.4594594594594594,
545
+ "eval_loss": 0.6857842803001404,
546
+ "eval_runtime": 1.0525,
547
+ "eval_samples_per_second": 15.202,
548
+ "eval_steps_per_second": 3.8,
549
+ "step": 108
550
+ },
551
+ {
552
+ "epoch": 1.5,
553
+ "grad_norm": 2.062997341156006,
554
+ "learning_rate": 9.228519906387288e-05,
555
+ "loss": 0.7527,
556
+ "step": 111
557
+ },
558
+ {
559
+ "epoch": 1.5,
560
+ "eval_loss": 0.6743776798248291,
561
+ "eval_runtime": 1.0512,
562
+ "eval_samples_per_second": 15.221,
563
+ "eval_steps_per_second": 3.805,
564
+ "step": 111
565
+ },
566
+ {
567
+ "epoch": 1.5405405405405406,
568
+ "grad_norm": 1.8099018335342407,
569
+ "learning_rate": 9.185060584531217e-05,
570
+ "loss": 0.6798,
571
+ "step": 114
572
+ },
573
+ {
574
+ "epoch": 1.5405405405405406,
575
+ "eval_loss": 0.6715844869613647,
576
+ "eval_runtime": 1.0529,
577
+ "eval_samples_per_second": 15.196,
578
+ "eval_steps_per_second": 3.799,
579
+ "step": 114
580
+ },
581
+ {
582
+ "epoch": 1.5810810810810811,
583
+ "grad_norm": 2.0540611743927,
584
+ "learning_rate": 9.140518733996672e-05,
585
+ "loss": 0.7266,
586
+ "step": 117
587
+ },
588
+ {
589
+ "epoch": 1.5810810810810811,
590
+ "eval_loss": 0.6656138896942139,
591
+ "eval_runtime": 1.0523,
592
+ "eval_samples_per_second": 15.204,
593
+ "eval_steps_per_second": 3.801,
594
+ "step": 117
595
+ },
596
+ {
597
+ "epoch": 1.6216216216216215,
598
+ "grad_norm": 2.3945634365081787,
599
+ "learning_rate": 9.094905876201229e-05,
600
+ "loss": 0.5347,
601
+ "step": 120
602
+ },
603
+ {
604
+ "epoch": 1.6216216216216215,
605
+ "eval_loss": 0.6710730791091919,
606
+ "eval_runtime": 1.053,
607
+ "eval_samples_per_second": 15.195,
608
+ "eval_steps_per_second": 3.799,
609
+ "step": 120
610
+ },
611
+ {
612
+ "epoch": 1.6621621621621623,
613
+ "grad_norm": 2.006612777709961,
614
+ "learning_rate": 9.048233809594561e-05,
615
+ "loss": 0.6522,
616
+ "step": 123
617
+ },
618
+ {
619
+ "epoch": 1.6621621621621623,
620
+ "eval_loss": 0.6679877042770386,
621
+ "eval_runtime": 1.0519,
622
+ "eval_samples_per_second": 15.211,
623
+ "eval_steps_per_second": 3.803,
624
+ "step": 123
625
+ },
626
+ {
627
+ "epoch": 1.7027027027027026,
628
+ "grad_norm": 1.751696228981018,
629
+ "learning_rate": 9.000514606606581e-05,
630
+ "loss": 0.8567,
631
+ "step": 126
632
+ },
633
+ {
634
+ "epoch": 1.7027027027027026,
635
+ "eval_loss": 0.6558159589767456,
636
+ "eval_runtime": 1.0531,
637
+ "eval_samples_per_second": 15.193,
638
+ "eval_steps_per_second": 3.798,
639
+ "step": 126
640
+ },
641
+ {
642
+ "epoch": 1.7432432432432432,
643
+ "grad_norm": 1.5286139249801636,
644
+ "learning_rate": 8.951760610524724e-05,
645
+ "loss": 0.5204,
646
+ "step": 129
647
+ },
648
+ {
649
+ "epoch": 1.7432432432432432,
650
+ "eval_loss": 0.6488269567489624,
651
+ "eval_runtime": 1.0516,
652
+ "eval_samples_per_second": 15.215,
653
+ "eval_steps_per_second": 3.804,
654
+ "step": 129
655
+ },
656
+ {
657
+ "epoch": 1.7837837837837838,
658
+ "grad_norm": 2.1092898845672607,
659
+ "learning_rate": 8.901984432301185e-05,
660
+ "loss": 0.6443,
661
+ "step": 132
662
+ },
663
+ {
664
+ "epoch": 1.7837837837837838,
665
+ "eval_loss": 0.6392868161201477,
666
+ "eval_runtime": 1.053,
667
+ "eval_samples_per_second": 15.195,
668
+ "eval_steps_per_second": 3.799,
669
+ "step": 132
670
+ },
671
+ {
672
+ "epoch": 1.8243243243243243,
673
+ "grad_norm": 1.7279053926467896,
674
+ "learning_rate": 8.851198947290894e-05,
675
+ "loss": 0.5436,
676
+ "step": 135
677
+ },
678
+ {
679
+ "epoch": 1.8243243243243243,
680
+ "eval_loss": 0.6321672201156616,
681
+ "eval_runtime": 1.0499,
682
+ "eval_samples_per_second": 15.239,
683
+ "eval_steps_per_second": 3.81,
684
+ "step": 135
685
+ },
686
+ {
687
+ "epoch": 1.864864864864865,
688
+ "grad_norm": 2.6842877864837646,
689
+ "learning_rate": 8.799417291921117e-05,
690
+ "loss": 0.6054,
691
+ "step": 138
692
+ },
693
+ {
694
+ "epoch": 1.864864864864865,
695
+ "eval_loss": 0.6346270442008972,
696
+ "eval_runtime": 1.0528,
697
+ "eval_samples_per_second": 15.198,
698
+ "eval_steps_per_second": 3.799,
699
+ "step": 138
700
+ },
701
+ {
702
+ "epoch": 1.9054054054054053,
703
+ "grad_norm": 1.9958398342132568,
704
+ "learning_rate": 8.746652860293523e-05,
705
+ "loss": 0.4488,
706
+ "step": 141
707
+ },
708
+ {
709
+ "epoch": 1.9054054054054053,
710
+ "eval_loss": 0.6389164924621582,
711
+ "eval_runtime": 1.0505,
712
+ "eval_samples_per_second": 15.231,
713
+ "eval_steps_per_second": 3.808,
714
+ "step": 141
715
+ },
716
+ {
717
+ "epoch": 1.945945945945946,
718
+ "grad_norm": 2.0705783367156982,
719
+ "learning_rate": 8.692919300719595e-05,
720
+ "loss": 0.7171,
721
+ "step": 144
722
+ },
723
+ {
724
+ "epoch": 1.945945945945946,
725
+ "eval_loss": 0.632194995880127,
726
+ "eval_runtime": 1.0537,
727
+ "eval_samples_per_second": 15.184,
728
+ "eval_steps_per_second": 3.796,
729
+ "step": 144
730
+ },
731
+ {
732
+ "epoch": 1.9864864864864864,
733
+ "grad_norm": 2.0737218856811523,
734
+ "learning_rate": 8.638230512190298e-05,
735
+ "loss": 0.5383,
736
+ "step": 147
737
+ },
738
+ {
739
+ "epoch": 1.9864864864864864,
740
+ "eval_loss": 0.6272808313369751,
741
+ "eval_runtime": 1.0507,
742
+ "eval_samples_per_second": 15.228,
743
+ "eval_steps_per_second": 3.807,
744
+ "step": 147
745
+ },
746
+ {
747
+ "epoch": 2.027027027027027,
748
+ "grad_norm": 1.6119190454483032,
749
+ "learning_rate": 8.58260064078088e-05,
750
+ "loss": 0.4812,
751
+ "step": 150
752
+ },
753
+ {
754
+ "epoch": 2.027027027027027,
755
+ "eval_loss": 0.6234598755836487,
756
+ "eval_runtime": 1.0541,
757
+ "eval_samples_per_second": 15.179,
758
+ "eval_steps_per_second": 3.795,
759
+ "step": 150
760
+ },
761
+ {
762
+ "epoch": 2.0675675675675675,
763
+ "grad_norm": 2.104738712310791,
764
+ "learning_rate": 8.526044075991802e-05,
765
+ "loss": 0.7911,
766
+ "step": 153
767
+ },
768
+ {
769
+ "epoch": 2.0675675675675675,
770
+ "eval_loss": 0.6295649409294128,
771
+ "eval_runtime": 1.0504,
772
+ "eval_samples_per_second": 15.232,
773
+ "eval_steps_per_second": 3.808,
774
+ "step": 153
775
+ },
776
+ {
777
+ "epoch": 2.108108108108108,
778
+ "grad_norm": 2.041696786880493,
779
+ "learning_rate": 8.468575447026651e-05,
780
+ "loss": 0.514,
781
+ "step": 156
782
+ },
783
+ {
784
+ "epoch": 2.108108108108108,
785
+ "eval_loss": 0.6444165706634521,
786
+ "eval_runtime": 1.0539,
787
+ "eval_samples_per_second": 15.182,
788
+ "eval_steps_per_second": 3.795,
789
+ "step": 156
790
+ },
791
+ {
792
+ "epoch": 2.1486486486486487,
793
+ "grad_norm": 1.7887616157531738,
794
+ "learning_rate": 8.410209619008101e-05,
795
+ "loss": 0.4481,
796
+ "step": 159
797
+ },
798
+ {
799
+ "epoch": 2.1486486486486487,
800
+ "eval_loss": 0.6452795267105103,
801
+ "eval_runtime": 1.0508,
802
+ "eval_samples_per_second": 15.227,
803
+ "eval_steps_per_second": 3.807,
804
+ "step": 159
805
+ },
806
+ {
807
+ "epoch": 2.189189189189189,
808
+ "grad_norm": 2.2852938175201416,
809
+ "learning_rate": 8.350961689132808e-05,
810
+ "loss": 0.3983,
811
+ "step": 162
812
+ },
813
+ {
814
+ "epoch": 2.189189189189189,
815
+ "eval_loss": 0.6356573104858398,
816
+ "eval_runtime": 1.0538,
817
+ "eval_samples_per_second": 15.183,
818
+ "eval_steps_per_second": 3.796,
819
+ "step": 162
820
+ },
821
+ {
822
+ "epoch": 2.22972972972973,
823
+ "grad_norm": 1.3814259767532349,
824
+ "learning_rate": 8.290846982766305e-05,
825
+ "loss": 0.2386,
826
+ "step": 165
827
+ },
828
+ {
829
+ "epoch": 2.22972972972973,
830
+ "eval_loss": 0.632733166217804,
831
+ "eval_runtime": 1.053,
832
+ "eval_samples_per_second": 15.195,
833
+ "eval_steps_per_second": 3.799,
834
+ "step": 165
835
+ },
836
+ {
837
+ "epoch": 2.27027027027027,
838
+ "grad_norm": 2.624509572982788,
839
+ "learning_rate": 8.22988104947886e-05,
840
+ "loss": 0.4447,
841
+ "step": 168
842
+ },
843
+ {
844
+ "epoch": 2.27027027027027,
845
+ "eval_loss": 0.6358802318572998,
846
+ "eval_runtime": 1.0518,
847
+ "eval_samples_per_second": 15.212,
848
+ "eval_steps_per_second": 3.803,
849
+ "step": 168
850
+ },
851
+ {
852
+ "epoch": 2.310810810810811,
853
+ "grad_norm": 2.1006217002868652,
854
+ "learning_rate": 8.168079659023349e-05,
855
+ "loss": 0.4302,
856
+ "step": 171
857
+ },
858
+ {
859
+ "epoch": 2.310810810810811,
860
+ "eval_loss": 0.6386667490005493,
861
+ "eval_runtime": 1.0534,
862
+ "eval_samples_per_second": 15.188,
863
+ "eval_steps_per_second": 3.797,
864
+ "step": 171
865
+ },
866
+ {
867
+ "epoch": 2.3513513513513513,
868
+ "grad_norm": 2.631301164627075,
869
+ "learning_rate": 8.105458797256178e-05,
870
+ "loss": 0.4514,
871
+ "step": 174
872
+ },
873
+ {
874
+ "epoch": 2.3513513513513513,
875
+ "eval_loss": 0.6402238607406616,
876
+ "eval_runtime": 1.0545,
877
+ "eval_samples_per_second": 15.174,
878
+ "eval_steps_per_second": 3.793,
879
+ "step": 174
880
+ },
881
+ {
882
+ "epoch": 2.391891891891892,
883
+ "grad_norm": 1.4005826711654663,
884
+ "learning_rate": 8.04203466200229e-05,
885
+ "loss": 0.2813,
886
+ "step": 177
887
+ },
888
+ {
889
+ "epoch": 2.391891891891892,
890
+ "eval_loss": 0.6313220262527466,
891
+ "eval_runtime": 1.0541,
892
+ "eval_samples_per_second": 15.178,
893
+ "eval_steps_per_second": 3.795,
894
+ "step": 177
895
+ },
896
+ {
897
+ "epoch": 2.4324324324324325,
898
+ "grad_norm": 2.4380390644073486,
899
+ "learning_rate": 7.977823658865364e-05,
900
+ "loss": 0.4747,
901
+ "step": 180
902
+ },
903
+ {
904
+ "epoch": 2.4324324324324325,
905
+ "eval_loss": 0.6258513927459717,
906
+ "eval_runtime": 1.0533,
907
+ "eval_samples_per_second": 15.191,
908
+ "eval_steps_per_second": 3.798,
909
+ "step": 180
910
+ },
911
+ {
912
+ "epoch": 2.472972972972973,
913
+ "grad_norm": 2.3655426502227783,
914
+ "learning_rate": 7.912842396984254e-05,
915
+ "loss": 0.547,
916
+ "step": 183
917
+ },
918
+ {
919
+ "epoch": 2.472972972972973,
920
+ "eval_loss": 0.6256988048553467,
921
+ "eval_runtime": 1.053,
922
+ "eval_samples_per_second": 15.195,
923
+ "eval_steps_per_second": 3.799,
924
+ "step": 183
925
+ },
926
+ {
927
+ "epoch": 2.5135135135135136,
928
+ "grad_norm": 1.9949471950531006,
929
+ "learning_rate": 7.847107684736792e-05,
930
+ "loss": 0.3154,
931
+ "step": 186
932
+ },
933
+ {
934
+ "epoch": 2.5135135135135136,
935
+ "eval_loss": 0.6247289776802063,
936
+ "eval_runtime": 1.0523,
937
+ "eval_samples_per_second": 15.205,
938
+ "eval_steps_per_second": 3.801,
939
+ "step": 186
940
+ },
941
+ {
942
+ "epoch": 2.554054054054054,
943
+ "grad_norm": 3.2453622817993164,
944
+ "learning_rate": 7.780636525392046e-05,
945
+ "loss": 0.5583,
946
+ "step": 189
947
+ },
948
+ {
949
+ "epoch": 2.554054054054054,
950
+ "eval_loss": 0.6129618883132935,
951
+ "eval_runtime": 1.0519,
952
+ "eval_samples_per_second": 15.21,
953
+ "eval_steps_per_second": 3.803,
954
+ "step": 189
955
+ },
956
+ {
957
+ "epoch": 2.5945945945945947,
958
+ "grad_norm": 2.022986888885498,
959
+ "learning_rate": 7.713446112712169e-05,
960
+ "loss": 0.5726,
961
+ "step": 192
962
+ },
963
+ {
964
+ "epoch": 2.5945945945945947,
965
+ "eval_loss": 0.6086827516555786,
966
+ "eval_runtime": 1.0543,
967
+ "eval_samples_per_second": 15.175,
968
+ "eval_steps_per_second": 3.794,
969
+ "step": 192
970
+ },
971
+ {
972
+ "epoch": 2.635135135135135,
973
+ "grad_norm": 2.429865598678589,
974
+ "learning_rate": 7.645553826504969e-05,
975
+ "loss": 0.4701,
976
+ "step": 195
977
+ },
978
+ {
979
+ "epoch": 2.635135135135135,
980
+ "eval_loss": 0.6085944175720215,
981
+ "eval_runtime": 1.0521,
982
+ "eval_samples_per_second": 15.208,
983
+ "eval_steps_per_second": 3.802,
984
+ "step": 195
985
+ },
986
+ {
987
+ "epoch": 2.6756756756756754,
988
+ "grad_norm": 1.991803526878357,
989
+ "learning_rate": 7.576977228128376e-05,
990
+ "loss": 0.4866,
991
+ "step": 198
992
+ },
993
+ {
994
+ "epoch": 2.6756756756756754,
995
+ "eval_loss": 0.6133272647857666,
996
+ "eval_runtime": 1.0535,
997
+ "eval_samples_per_second": 15.187,
998
+ "eval_steps_per_second": 3.797,
999
+ "step": 198
1000
+ },
1001
+ {
1002
+ "epoch": 2.7162162162162162,
1003
+ "grad_norm": 2.537832021713257,
1004
+ "learning_rate": 7.50773405594792e-05,
1005
+ "loss": 0.4015,
1006
+ "step": 201
1007
+ },
1008
+ {
1009
+ "epoch": 2.7162162162162162,
1010
+ "eval_loss": 0.6213403940200806,
1011
+ "eval_runtime": 1.0524,
1012
+ "eval_samples_per_second": 15.203,
1013
+ "eval_steps_per_second": 3.801,
1014
+ "step": 201
1015
+ },
1016
+ {
1017
+ "epoch": 2.756756756756757,
1018
+ "grad_norm": 1.758016586303711,
1019
+ "learning_rate": 7.437842220748441e-05,
1020
+ "loss": 0.4277,
1021
+ "step": 204
1022
+ },
1023
+ {
1024
+ "epoch": 2.756756756756757,
1025
+ "eval_loss": 0.623763382434845,
1026
+ "eval_runtime": 1.0527,
1027
+ "eval_samples_per_second": 15.198,
1028
+ "eval_steps_per_second": 3.8,
1029
+ "step": 204
1030
+ },
1031
+ {
1032
+ "epoch": 2.7972972972972974,
1033
+ "grad_norm": 1.8930737972259521,
1034
+ "learning_rate": 7.367319801101196e-05,
1035
+ "loss": 0.3157,
1036
+ "step": 207
1037
+ },
1038
+ {
1039
+ "epoch": 2.7972972972972974,
1040
+ "eval_loss": 0.6248853206634521,
1041
+ "eval_runtime": 1.0562,
1042
+ "eval_samples_per_second": 15.149,
1043
+ "eval_steps_per_second": 3.787,
1044
+ "step": 207
1045
+ },
1046
+ {
1047
+ "epoch": 2.8378378378378377,
1048
+ "grad_norm": 2.071988105773926,
1049
+ "learning_rate": 7.296185038687566e-05,
1050
+ "loss": 0.3883,
1051
+ "step": 210
1052
+ },
1053
+ {
1054
+ "epoch": 2.8378378378378377,
1055
+ "eval_loss": 0.6209710240364075,
1056
+ "eval_runtime": 1.0518,
1057
+ "eval_samples_per_second": 15.212,
1058
+ "eval_steps_per_second": 3.803,
1059
+ "step": 210
1060
+ },
1061
+ {
1062
+ "epoch": 2.8783783783783785,
1063
+ "grad_norm": 1.579237937927246,
1064
+ "learning_rate": 7.224456333580573e-05,
1065
+ "loss": 0.5436,
1066
+ "step": 213
1067
+ },
1068
+ {
1069
+ "epoch": 2.8783783783783785,
1070
+ "eval_loss": 0.6127223968505859,
1071
+ "eval_runtime": 1.0524,
1072
+ "eval_samples_per_second": 15.204,
1073
+ "eval_steps_per_second": 3.801,
1074
+ "step": 213
1075
+ },
1076
+ {
1077
+ "epoch": 2.918918918918919,
1078
+ "grad_norm": 2.4129927158355713,
1079
+ "learning_rate": 7.152152239485419e-05,
1080
+ "loss": 0.526,
1081
+ "step": 216
1082
+ },
1083
+ {
1084
+ "epoch": 2.918918918918919,
1085
+ "eval_loss": 0.6055560111999512,
1086
+ "eval_runtime": 1.0502,
1087
+ "eval_samples_per_second": 15.236,
1088
+ "eval_steps_per_second": 3.809,
1089
+ "step": 216
1090
+ },
1091
+ {
1092
+ "epoch": 2.9594594594594597,
1093
+ "grad_norm": 2.252251148223877,
1094
+ "learning_rate": 7.079291458940301e-05,
1095
+ "loss": 0.4465,
1096
+ "step": 219
1097
+ },
1098
+ {
1099
+ "epoch": 2.9594594594594597,
1100
+ "eval_loss": 0.5982283353805542,
1101
+ "eval_runtime": 1.0529,
1102
+ "eval_samples_per_second": 15.197,
1103
+ "eval_steps_per_second": 3.799,
1104
+ "step": 219
1105
+ },
1106
+ {
1107
+ "epoch": 3.0,
1108
+ "grad_norm": 1.9773114919662476,
1109
+ "learning_rate": 7.005892838478711e-05,
1110
+ "loss": 0.3692,
1111
+ "step": 222
1112
+ },
1113
+ {
1114
+ "epoch": 3.0,
1115
+ "eval_loss": 0.5916565656661987,
1116
+ "eval_runtime": 1.0501,
1117
+ "eval_samples_per_second": 15.237,
1118
+ "eval_steps_per_second": 3.809,
1119
+ "step": 222
1120
+ },
1121
+ {
1122
+ "epoch": 3.0405405405405403,
1123
+ "grad_norm": 1.1434626579284668,
1124
+ "learning_rate": 6.931975363754502e-05,
1125
+ "loss": 0.3022,
1126
+ "step": 225
1127
+ },
1128
+ {
1129
+ "epoch": 3.0405405405405403,
1130
+ "eval_loss": 0.5955583453178406,
1131
+ "eval_runtime": 1.0535,
1132
+ "eval_samples_per_second": 15.187,
1133
+ "eval_steps_per_second": 3.797,
1134
+ "step": 225
1135
+ },
1136
+ {
1137
+ "epoch": 3.081081081081081,
1138
+ "grad_norm": 1.9162238836288452,
1139
+ "learning_rate": 6.85755815463096e-05,
1140
+ "loss": 0.2875,
1141
+ "step": 228
1142
+ },
1143
+ {
1144
+ "epoch": 3.081081081081081,
1145
+ "eval_loss": 0.6152929067611694,
1146
+ "eval_runtime": 1.0516,
1147
+ "eval_samples_per_second": 15.215,
1148
+ "eval_steps_per_second": 3.804,
1149
+ "step": 228
1150
+ },
1151
+ {
1152
+ "epoch": 3.1216216216216215,
1153
+ "grad_norm": 2.688631057739258,
1154
+ "learning_rate": 6.782660460235174e-05,
1155
+ "loss": 0.5544,
1156
+ "step": 231
1157
+ },
1158
+ {
1159
+ "epoch": 3.1216216216216215,
1160
+ "eval_loss": 0.6343094110488892,
1161
+ "eval_runtime": 1.052,
1162
+ "eval_samples_per_second": 15.21,
1163
+ "eval_steps_per_second": 3.802,
1164
+ "step": 231
1165
+ },
1166
+ {
1167
+ "epoch": 3.1621621621621623,
1168
+ "grad_norm": 2.58313250541687,
1169
+ "learning_rate": 6.707301653978945e-05,
1170
+ "loss": 0.4159,
1171
+ "step": 234
1172
+ },
1173
+ {
1174
+ "epoch": 3.1621621621621623,
1175
+ "eval_loss": 0.6369538307189941,
1176
+ "eval_runtime": 1.0524,
1177
+ "eval_samples_per_second": 15.203,
1178
+ "eval_steps_per_second": 3.801,
1179
+ "step": 234
1180
+ },
1181
+ {
1182
+ "epoch": 3.2027027027027026,
1183
+ "grad_norm": 2.2415409088134766,
1184
+ "learning_rate": 6.63150122854758e-05,
1185
+ "loss": 0.4963,
1186
+ "step": 237
1187
+ },
1188
+ {
1189
+ "epoch": 3.2027027027027026,
1190
+ "eval_loss": 0.6289186477661133,
1191
+ "eval_runtime": 1.0528,
1192
+ "eval_samples_per_second": 15.198,
1193
+ "eval_steps_per_second": 3.799,
1194
+ "step": 237
1195
+ },
1196
+ {
1197
+ "epoch": 3.2432432432432434,
1198
+ "grad_norm": 2.974931240081787,
1199
+ "learning_rate": 6.5552787908578e-05,
1200
+ "loss": 0.3248,
1201
+ "step": 240
1202
+ },
1203
+ {
1204
+ "epoch": 3.2432432432432434,
1205
+ "eval_loss": 0.6189987659454346,
1206
+ "eval_runtime": 1.0515,
1207
+ "eval_samples_per_second": 15.217,
1208
+ "eval_steps_per_second": 3.804,
1209
+ "step": 240
1210
+ },
1211
+ {
1212
+ "epoch": 3.2837837837837838,
1213
+ "grad_norm": 2.0078535079956055,
1214
+ "learning_rate": 6.478654056986131e-05,
1215
+ "loss": 0.349,
1216
+ "step": 243
1217
+ },
1218
+ {
1219
+ "epoch": 3.2837837837837838,
1220
+ "eval_loss": 0.6110680103302002,
1221
+ "eval_runtime": 1.0532,
1222
+ "eval_samples_per_second": 15.192,
1223
+ "eval_steps_per_second": 3.798,
1224
+ "step": 243
1225
+ },
1226
+ {
1227
+ "epoch": 3.3243243243243246,
1228
+ "grad_norm": 2.6236143112182617,
1229
+ "learning_rate": 6.401646847069039e-05,
1230
+ "loss": 0.3107,
1231
+ "step": 246
1232
+ },
1233
+ {
1234
+ "epoch": 3.3243243243243246,
1235
+ "eval_loss": 0.6120755672454834,
1236
+ "eval_runtime": 1.0508,
1237
+ "eval_samples_per_second": 15.227,
1238
+ "eval_steps_per_second": 3.807,
1239
+ "step": 246
1240
+ },
1241
+ {
1242
+ "epoch": 3.364864864864865,
1243
+ "grad_norm": 1.75555419921875,
1244
+ "learning_rate": 6.32427708017615e-05,
1245
+ "loss": 0.2219,
1246
+ "step": 249
1247
+ },
1248
+ {
1249
+ "epoch": 3.364864864864865,
1250
+ "eval_loss": 0.6196171641349792,
1251
+ "eval_runtime": 1.0523,
1252
+ "eval_samples_per_second": 15.204,
1253
+ "eval_steps_per_second": 3.801,
1254
+ "step": 249
1255
+ },
1256
+ {
1257
+ "epoch": 3.4054054054054053,
1258
+ "grad_norm": 3.003138303756714,
1259
+ "learning_rate": 6.246564769157894e-05,
1260
+ "loss": 0.251,
1261
+ "step": 252
1262
+ },
1263
+ {
1264
+ "epoch": 3.4054054054054053,
1265
+ "eval_loss": 0.6273298263549805,
1266
+ "eval_runtime": 1.0546,
1267
+ "eval_samples_per_second": 15.171,
1268
+ "eval_steps_per_second": 3.793,
1269
+ "step": 252
1270
+ },
1271
+ {
1272
+ "epoch": 3.445945945945946,
1273
+ "grad_norm": 2.2066917419433594,
1274
+ "learning_rate": 6.168530015468872e-05,
1275
+ "loss": 0.3366,
1276
+ "step": 255
1277
+ },
1278
+ {
1279
+ "epoch": 3.445945945945946,
1280
+ "eval_loss": 0.6258885860443115,
1281
+ "eval_runtime": 1.0514,
1282
+ "eval_samples_per_second": 15.217,
1283
+ "eval_steps_per_second": 3.804,
1284
+ "step": 255
1285
+ },
1286
+ {
1287
+ "epoch": 3.4864864864864864,
1288
+ "grad_norm": 1.7121000289916992,
1289
+ "learning_rate": 6.0901930039683184e-05,
1290
+ "loss": 0.3182,
1291
+ "step": 258
1292
+ },
1293
+ {
1294
+ "epoch": 3.4864864864864864,
1295
+ "eval_loss": 0.6243223547935486,
1296
+ "eval_runtime": 1.0739,
1297
+ "eval_samples_per_second": 14.898,
1298
+ "eval_steps_per_second": 3.725,
1299
+ "step": 258
1300
+ },
1301
+ {
1302
+ "epoch": 3.527027027027027,
1303
+ "grad_norm": 2.7600913047790527,
1304
+ "learning_rate": 6.011573997698985e-05,
1305
+ "loss": 0.4133,
1306
+ "step": 261
1307
+ },
1308
+ {
1309
+ "epoch": 3.527027027027027,
1310
+ "eval_loss": 0.6259996294975281,
1311
+ "eval_runtime": 1.0561,
1312
+ "eval_samples_per_second": 15.151,
1313
+ "eval_steps_per_second": 3.788,
1314
+ "step": 261
1315
+ },
1316
+ {
1317
+ "epoch": 3.5675675675675675,
1318
+ "grad_norm": 2.611302614212036,
1319
+ "learning_rate": 5.9326933326457956e-05,
1320
+ "loss": 0.3297,
1321
+ "step": 264
1322
+ },
1323
+ {
1324
+ "epoch": 3.5675675675675675,
1325
+ "eval_loss": 0.6303350925445557,
1326
+ "eval_runtime": 1.0534,
1327
+ "eval_samples_per_second": 15.189,
1328
+ "eval_steps_per_second": 3.797,
1329
+ "step": 264
1330
+ },
1331
+ {
1332
+ "epoch": 3.608108108108108,
1333
+ "grad_norm": 1.6527258157730103,
1334
+ "learning_rate": 5.8535714124756434e-05,
1335
+ "loss": 0.2276,
1336
+ "step": 267
1337
+ },
1338
+ {
1339
+ "epoch": 3.608108108108108,
1340
+ "eval_loss": 0.6364917159080505,
1341
+ "eval_runtime": 1.052,
1342
+ "eval_samples_per_second": 15.209,
1343
+ "eval_steps_per_second": 3.802,
1344
+ "step": 267
1345
+ },
1346
+ {
1347
+ "epoch": 3.6486486486486487,
1348
+ "grad_norm": 1.1108059883117676,
1349
+ "learning_rate": 5.774228703259678e-05,
1350
+ "loss": 0.1842,
1351
+ "step": 270
1352
+ },
1353
+ {
1354
+ "epoch": 3.6486486486486487,
1355
+ "eval_loss": 0.6382502317428589,
1356
+ "eval_runtime": 1.0549,
1357
+ "eval_samples_per_second": 15.168,
1358
+ "eval_steps_per_second": 3.792,
1359
+ "step": 270
1360
+ },
1361
+ {
1362
+ "epoch": 3.689189189189189,
1363
+ "grad_norm": 2.822380781173706,
1364
+ "learning_rate": 5.694685728179442e-05,
1365
+ "loss": 0.4961,
1366
+ "step": 273
1367
+ },
1368
+ {
1369
+ "epoch": 3.689189189189189,
1370
+ "eval_loss": 0.6313918828964233,
1371
+ "eval_runtime": 1.0523,
1372
+ "eval_samples_per_second": 15.205,
1373
+ "eval_steps_per_second": 3.801,
1374
+ "step": 273
1375
+ },
1376
+ {
1377
+ "epoch": 3.72972972972973,
1378
+ "grad_norm": 2.4894397258758545,
1379
+ "learning_rate": 5.6149630622182526e-05,
1380
+ "loss": 0.3785,
1381
+ "step": 276
1382
+ },
1383
+ {
1384
+ "epoch": 3.72972972972973,
1385
+ "eval_loss": 0.6239753365516663,
1386
+ "eval_runtime": 1.053,
1387
+ "eval_samples_per_second": 15.195,
1388
+ "eval_steps_per_second": 3.799,
1389
+ "step": 276
1390
+ },
1391
+ {
1392
+ "epoch": 3.77027027027027,
1393
+ "grad_norm": 2.1039986610412598,
1394
+ "learning_rate": 5.535081326839165e-05,
1395
+ "loss": 0.2834,
1396
+ "step": 279
1397
+ },
1398
+ {
1399
+ "epoch": 3.77027027027027,
1400
+ "eval_loss": 0.6189073920249939,
1401
+ "eval_runtime": 1.0515,
1402
+ "eval_samples_per_second": 15.217,
1403
+ "eval_steps_per_second": 3.804,
1404
+ "step": 279
1405
+ },
1406
+ {
1407
+ "epoch": 3.810810810810811,
1408
+ "grad_norm": 2.7096340656280518,
1409
+ "learning_rate": 5.455061184650921e-05,
1410
+ "loss": 0.3397,
1411
+ "step": 282
1412
+ },
1413
+ {
1414
+ "epoch": 3.810810810810811,
1415
+ "eval_loss": 0.6138538122177124,
1416
+ "eval_runtime": 1.0521,
1417
+ "eval_samples_per_second": 15.208,
1418
+ "eval_steps_per_second": 3.802,
1419
+ "step": 282
1420
+ },
1421
+ {
1422
+ "epoch": 3.8513513513513513,
1423
+ "grad_norm": 2.030907154083252,
1424
+ "learning_rate": 5.3749233340632674e-05,
1425
+ "loss": 0.2795,
1426
+ "step": 285
1427
+ },
1428
+ {
1429
+ "epoch": 3.8513513513513513,
1430
+ "eval_loss": 0.6104437708854675,
1431
+ "eval_runtime": 1.0581,
1432
+ "eval_samples_per_second": 15.122,
1433
+ "eval_steps_per_second": 3.78,
1434
+ "step": 285
1435
+ },
1436
+ {
1437
+ "epoch": 3.891891891891892,
1438
+ "grad_norm": 2.061206340789795,
1439
+ "learning_rate": 5.2946885039329866e-05,
1440
+ "loss": 0.3114,
1441
+ "step": 288
1442
+ },
1443
+ {
1444
+ "epoch": 3.891891891891892,
1445
+ "eval_loss": 0.6077687740325928,
1446
+ "eval_runtime": 1.0527,
1447
+ "eval_samples_per_second": 15.199,
1448
+ "eval_steps_per_second": 3.8,
1449
+ "step": 288
1450
+ },
1451
+ {
1452
+ "epoch": 3.9324324324324325,
1453
+ "grad_norm": 2.062087059020996,
1454
+ "learning_rate": 5.2143774482020744e-05,
1455
+ "loss": 0.2395,
1456
+ "step": 291
1457
+ },
1458
+ {
1459
+ "epoch": 3.9324324324324325,
1460
+ "eval_loss": 0.6111433506011963,
1461
+ "eval_runtime": 1.0517,
1462
+ "eval_samples_per_second": 15.214,
1463
+ "eval_steps_per_second": 3.804,
1464
+ "step": 291
1465
+ },
1466
+ {
1467
+ "epoch": 3.972972972972973,
1468
+ "grad_norm": 1.6344010829925537,
1469
+ "learning_rate": 5.134010940529429e-05,
1470
+ "loss": 0.1948,
1471
+ "step": 294
1472
+ },
1473
+ {
1474
+ "epoch": 3.972972972972973,
1475
+ "eval_loss": 0.6142452955245972,
1476
+ "eval_runtime": 1.0529,
1477
+ "eval_samples_per_second": 15.196,
1478
+ "eval_steps_per_second": 3.799,
1479
+ "step": 294
1480
+ },
1481
+ {
1482
+ "epoch": 4.013513513513513,
1483
+ "grad_norm": 1.9017384052276611,
1484
+ "learning_rate": 5.053609768917413e-05,
1485
+ "loss": 0.2284,
1486
+ "step": 297
1487
+ },
1488
+ {
1489
+ "epoch": 4.013513513513513,
1490
+ "eval_loss": 0.6194114685058594,
1491
+ "eval_runtime": 1.0515,
1492
+ "eval_samples_per_second": 15.217,
1493
+ "eval_steps_per_second": 3.804,
1494
+ "step": 297
1495
+ },
1496
+ {
1497
+ "epoch": 4.054054054054054,
1498
+ "grad_norm": 2.1609394550323486,
1499
+ "learning_rate": 4.973194730334748e-05,
1500
+ "loss": 0.2638,
1501
+ "step": 300
1502
+ },
1503
+ {
1504
+ "epoch": 4.054054054054054,
1505
+ "eval_loss": 0.6303145885467529,
1506
+ "eval_runtime": 1.053,
1507
+ "eval_samples_per_second": 15.194,
1508
+ "eval_steps_per_second": 3.798,
1509
+ "step": 300
1510
+ },
1511
+ {
1512
+ "epoch": 4.094594594594595,
1513
+ "grad_norm": 1.5275555849075317,
1514
+ "learning_rate": 4.892786625337047e-05,
1515
+ "loss": 0.252,
1516
+ "step": 303
1517
+ },
1518
+ {
1519
+ "epoch": 4.094594594594595,
1520
+ "eval_loss": 0.6517325639724731,
1521
+ "eval_runtime": 1.051,
1522
+ "eval_samples_per_second": 15.224,
1523
+ "eval_steps_per_second": 3.806,
1524
+ "step": 303
1525
+ },
1526
+ {
1527
+ "epoch": 4.135135135135135,
1528
+ "grad_norm": 2.807483434677124,
1529
+ "learning_rate": 4.8124062526864534e-05,
1530
+ "loss": 0.183,
1531
+ "step": 306
1532
+ },
1533
+ {
1534
+ "epoch": 4.135135135135135,
1535
+ "eval_loss": 0.6644703149795532,
1536
+ "eval_runtime": 1.0531,
1537
+ "eval_samples_per_second": 15.193,
1538
+ "eval_steps_per_second": 3.798,
1539
+ "step": 306
1540
+ },
1541
+ {
1542
+ "epoch": 4.175675675675675,
1543
+ "grad_norm": 2.6279256343841553,
1544
+ "learning_rate": 4.7320744039717154e-05,
1545
+ "loss": 0.2415,
1546
+ "step": 309
1547
+ },
1548
+ {
1549
+ "epoch": 4.175675675675675,
1550
+ "eval_loss": 0.6603893041610718,
1551
+ "eval_runtime": 1.0531,
1552
+ "eval_samples_per_second": 15.193,
1553
+ "eval_steps_per_second": 3.798,
1554
+ "step": 309
1555
+ },
1556
+ {
1557
+ "epoch": 4.216216216216216,
1558
+ "grad_norm": 0.42106354236602783,
1559
+ "learning_rate": 4.651811858230149e-05,
1560
+ "loss": 0.1791,
1561
+ "step": 312
1562
+ },
1563
+ {
1564
+ "epoch": 4.216216216216216,
1565
+ "eval_loss": 0.652984082698822,
1566
+ "eval_runtime": 1.053,
1567
+ "eval_samples_per_second": 15.195,
1568
+ "eval_steps_per_second": 3.799,
1569
+ "step": 312
1570
+ },
1571
+ {
1572
+ "epoch": 4.256756756756757,
1573
+ "grad_norm": 2.064615249633789,
1574
+ "learning_rate": 4.571639376572806e-05,
1575
+ "loss": 0.2013,
1576
+ "step": 315
1577
+ },
1578
+ {
1579
+ "epoch": 4.256756756756757,
1580
+ "eval_loss": 0.6488903760910034,
1581
+ "eval_runtime": 1.0505,
1582
+ "eval_samples_per_second": 15.23,
1583
+ "eval_steps_per_second": 3.808,
1584
+ "step": 315
1585
+ },
1586
+ {
1587
+ "epoch": 4.297297297297297,
1588
+ "grad_norm": 2.4248170852661133,
1589
+ "learning_rate": 4.491577696814318e-05,
1590
+ "loss": 0.1827,
1591
+ "step": 318
1592
+ },
1593
+ {
1594
+ "epoch": 4.297297297297297,
1595
+ "eval_loss": 0.653176486492157,
1596
+ "eval_runtime": 1.0536,
1597
+ "eval_samples_per_second": 15.186,
1598
+ "eval_steps_per_second": 3.797,
1599
+ "step": 318
1600
+ },
1601
+ {
1602
+ "epoch": 4.337837837837838,
1603
+ "grad_norm": 2.055769443511963,
1604
+ "learning_rate": 4.411647528108743e-05,
1605
+ "loss": 0.1792,
1606
+ "step": 321
1607
+ },
1608
+ {
1609
+ "epoch": 4.337837837837838,
1610
+ "eval_loss": 0.6584765315055847,
1611
+ "eval_runtime": 1.052,
1612
+ "eval_samples_per_second": 15.209,
1613
+ "eval_steps_per_second": 3.802,
1614
+ "step": 321
1615
+ },
1616
+ {
1617
+ "epoch": 4.378378378378378,
1618
+ "grad_norm": 3.4611449241638184,
1619
+ "learning_rate": 4.331869545592834e-05,
1620
+ "loss": 0.2568,
1621
+ "step": 324
1622
+ },
1623
+ {
1624
+ "epoch": 4.378378378378378,
1625
+ "eval_loss": 0.6628451347351074,
1626
+ "eval_runtime": 1.055,
1627
+ "eval_samples_per_second": 15.166,
1628
+ "eval_steps_per_second": 3.791,
1629
+ "step": 324
1630
+ },
1631
+ {
1632
+ "epoch": 4.418918918918919,
1633
+ "grad_norm": 1.6108025312423706,
1634
+ "learning_rate": 4.252264385038098e-05,
1635
+ "loss": 0.1682,
1636
+ "step": 327
1637
+ },
1638
+ {
1639
+ "epoch": 4.418918918918919,
1640
+ "eval_loss": 0.66502845287323,
1641
+ "eval_runtime": 1.0508,
1642
+ "eval_samples_per_second": 15.227,
1643
+ "eval_steps_per_second": 3.807,
1644
+ "step": 327
1645
+ },
1646
+ {
1647
+ "epoch": 4.45945945945946,
1648
+ "grad_norm": 1.828131914138794,
1649
+ "learning_rate": 4.1728526375130614e-05,
1650
+ "loss": 0.25,
1651
+ "step": 330
1652
+ },
1653
+ {
1654
+ "epoch": 4.45945945945946,
1655
+ "eval_loss": 0.6729562282562256,
1656
+ "eval_runtime": 1.0534,
1657
+ "eval_samples_per_second": 15.189,
1658
+ "eval_steps_per_second": 3.797,
1659
+ "step": 330
1660
+ },
1661
+ {
1662
+ "epoch": 4.5,
1663
+ "grad_norm": 2.5057499408721924,
1664
+ "learning_rate": 4.093654844057059e-05,
1665
+ "loss": 0.2664,
1666
+ "step": 333
1667
+ },
1668
+ {
1669
+ "epoch": 4.5,
1670
+ "eval_loss": 0.6741403937339783,
1671
+ "eval_runtime": 1.052,
1672
+ "eval_samples_per_second": 15.209,
1673
+ "eval_steps_per_second": 3.802,
1674
+ "step": 333
1675
+ },
1676
+ {
1677
+ "epoch": 4.54054054054054,
1678
+ "grad_norm": 1.6008535623550415,
1679
+ "learning_rate": 4.014691490367e-05,
1680
+ "loss": 0.2316,
1681
+ "step": 336
1682
+ },
1683
+ {
1684
+ "epoch": 4.54054054054054,
1685
+ "eval_loss": 0.6773088574409485,
1686
+ "eval_runtime": 1.053,
1687
+ "eval_samples_per_second": 15.194,
1688
+ "eval_steps_per_second": 3.799,
1689
+ "step": 336
1690
+ },
1691
+ {
1692
+ "epoch": 4.581081081081081,
1693
+ "grad_norm": 2.551591157913208,
1694
+ "learning_rate": 3.935983001498439e-05,
1695
+ "loss": 0.3467,
1696
+ "step": 339
1697
+ },
1698
+ {
1699
+ "epoch": 4.581081081081081,
1700
+ "eval_loss": 0.6705477237701416,
1701
+ "eval_runtime": 1.0509,
1702
+ "eval_samples_per_second": 15.226,
1703
+ "eval_steps_per_second": 3.806,
1704
+ "step": 339
1705
+ },
1706
+ {
1707
+ "epoch": 4.621621621621622,
1708
+ "grad_norm": 2.130202054977417,
1709
+ "learning_rate": 3.857549736582316e-05,
1710
+ "loss": 0.2426,
1711
+ "step": 342
1712
+ },
1713
+ {
1714
+ "epoch": 4.621621621621622,
1715
+ "eval_loss": 0.6681296825408936,
1716
+ "eval_runtime": 1.0529,
1717
+ "eval_samples_per_second": 15.196,
1718
+ "eval_steps_per_second": 3.799,
1719
+ "step": 342
1720
+ },
1721
+ {
1722
+ "epoch": 4.662162162162162,
1723
+ "grad_norm": 2.043670415878296,
1724
+ "learning_rate": 3.7794119835587685e-05,
1725
+ "loss": 0.2421,
1726
+ "step": 345
1727
+ },
1728
+ {
1729
+ "epoch": 4.662162162162162,
1730
+ "eval_loss": 0.6622060537338257,
1731
+ "eval_runtime": 1.0519,
1732
+ "eval_samples_per_second": 15.21,
1733
+ "eval_steps_per_second": 3.803,
1734
+ "step": 345
1735
+ },
1736
+ {
1737
+ "epoch": 4.702702702702703,
1738
+ "grad_norm": 1.9365885257720947,
1739
+ "learning_rate": 3.701589953929354e-05,
1740
+ "loss": 0.4063,
1741
+ "step": 348
1742
+ },
1743
+ {
1744
+ "epoch": 4.702702702702703,
1745
+ "eval_loss": 0.6608781814575195,
1746
+ "eval_runtime": 1.0528,
1747
+ "eval_samples_per_second": 15.197,
1748
+ "eval_steps_per_second": 3.799,
1749
+ "step": 348
1750
+ },
1751
+ {
1752
+ "epoch": 4.743243243243243,
1753
+ "grad_norm": 2.596634864807129,
1754
+ "learning_rate": 3.62410377752904e-05,
1755
+ "loss": 0.2255,
1756
+ "step": 351
1757
+ },
1758
+ {
1759
+ "epoch": 4.743243243243243,
1760
+ "eval_loss": 0.6569182276725769,
1761
+ "eval_runtime": 1.0522,
1762
+ "eval_samples_per_second": 15.206,
1763
+ "eval_steps_per_second": 3.802,
1764
+ "step": 351
1765
+ },
1766
+ {
1767
+ "epoch": 4.783783783783784,
1768
+ "grad_norm": 2.039332628250122,
1769
+ "learning_rate": 3.546973497319319e-05,
1770
+ "loss": 0.1933,
1771
+ "step": 354
1772
+ },
1773
+ {
1774
+ "epoch": 4.783783783783784,
1775
+ "eval_loss": 0.6534222364425659,
1776
+ "eval_runtime": 1.0498,
1777
+ "eval_samples_per_second": 15.241,
1778
+ "eval_steps_per_second": 3.81,
1779
+ "step": 354
1780
+ },
1781
+ {
1782
+ "epoch": 4.824324324324325,
1783
+ "grad_norm": 1.994629144668579,
1784
+ "learning_rate": 3.4702190642037944e-05,
1785
+ "loss": 0.1975,
1786
+ "step": 357
1787
+ },
1788
+ {
1789
+ "epoch": 4.824324324324325,
1790
+ "eval_loss": 0.649687647819519,
1791
+ "eval_runtime": 1.0523,
1792
+ "eval_samples_per_second": 15.204,
1793
+ "eval_steps_per_second": 3.801,
1794
+ "step": 357
1795
+ },
1796
+ {
1797
+ "epoch": 4.864864864864865,
1798
+ "grad_norm": 2.154684543609619,
1799
+ "learning_rate": 3.393860331867589e-05,
1800
+ "loss": 0.3065,
1801
+ "step": 360
1802
+ },
1803
+ {
1804
+ "epoch": 4.864864864864865,
1805
+ "eval_loss": 0.6491411924362183,
1806
+ "eval_runtime": 1.0519,
1807
+ "eval_samples_per_second": 15.21,
1808
+ "eval_steps_per_second": 3.803,
1809
+ "step": 360
1810
+ },
1811
+ {
1812
+ "epoch": 4.905405405405405,
1813
+ "grad_norm": 1.61858069896698,
1814
+ "learning_rate": 3.317917051641877e-05,
1815
+ "loss": 0.1641,
1816
+ "step": 363
1817
+ },
1818
+ {
1819
+ "epoch": 4.905405405405405,
1820
+ "eval_loss": 0.651297926902771,
1821
+ "eval_runtime": 1.0521,
1822
+ "eval_samples_per_second": 15.208,
1823
+ "eval_steps_per_second": 3.802,
1824
+ "step": 363
1825
+ },
1826
+ {
1827
+ "epoch": 4.945945945945946,
1828
+ "grad_norm": 2.7362637519836426,
1829
+ "learning_rate": 3.242408867394919e-05,
1830
+ "loss": 0.2032,
1831
+ "step": 366
1832
+ },
1833
+ {
1834
+ "epoch": 4.945945945945946,
1835
+ "eval_loss": 0.6552869081497192,
1836
+ "eval_runtime": 1.0506,
1837
+ "eval_samples_per_second": 15.229,
1838
+ "eval_steps_per_second": 3.807,
1839
+ "step": 366
1840
+ },
1841
+ {
1842
+ "epoch": 4.986486486486487,
1843
+ "grad_norm": 2.0567097663879395,
1844
+ "learning_rate": 3.167355310450877e-05,
1845
+ "loss": 0.1886,
1846
+ "step": 369
1847
+ },
1848
+ {
1849
+ "epoch": 4.986486486486487,
1850
+ "eval_loss": 0.6590157747268677,
1851
+ "eval_runtime": 1.0528,
1852
+ "eval_samples_per_second": 15.197,
1853
+ "eval_steps_per_second": 3.799,
1854
+ "step": 369
1855
+ },
1856
+ {
1857
+ "epoch": 5.027027027027027,
1858
+ "grad_norm": 1.5418853759765625,
1859
+ "learning_rate": 3.092775794537741e-05,
1860
+ "loss": 0.2539,
1861
+ "step": 372
1862
+ },
1863
+ {
1864
+ "epoch": 5.027027027027027,
1865
+ "eval_loss": 0.6676727533340454,
1866
+ "eval_runtime": 1.0516,
1867
+ "eval_samples_per_second": 15.215,
1868
+ "eval_steps_per_second": 3.804,
1869
+ "step": 372
1870
+ },
1871
+ {
1872
+ "epoch": 5.0675675675675675,
1873
+ "grad_norm": 1.229972004890442,
1874
+ "learning_rate": 3.0186896107656803e-05,
1875
+ "loss": 0.1464,
1876
+ "step": 375
1877
+ },
1878
+ {
1879
+ "epoch": 5.0675675675675675,
1880
+ "eval_loss": 0.687861979007721,
1881
+ "eval_runtime": 1.0539,
1882
+ "eval_samples_per_second": 15.182,
1883
+ "eval_steps_per_second": 3.796,
1884
+ "step": 375
1885
+ },
1886
+ {
1887
+ "epoch": 5.108108108108108,
1888
+ "grad_norm": 2.421496868133545,
1889
+ "learning_rate": 2.9451159226371095e-05,
1890
+ "loss": 0.2295,
1891
+ "step": 378
1892
+ },
1893
+ {
1894
+ "epoch": 5.108108108108108,
1895
+ "eval_loss": 0.7066453695297241,
1896
+ "eval_runtime": 1.0503,
1897
+ "eval_samples_per_second": 15.233,
1898
+ "eval_steps_per_second": 3.808,
1899
+ "step": 378
1900
+ },
1901
+ {
1902
+ "epoch": 5.148648648648648,
1903
+ "grad_norm": 2.3475804328918457,
1904
+ "learning_rate": 2.8720737610897575e-05,
1905
+ "loss": 0.1438,
1906
+ "step": 381
1907
+ },
1908
+ {
1909
+ "epoch": 5.148648648648648,
1910
+ "eval_loss": 0.7166962623596191,
1911
+ "eval_runtime": 1.0534,
1912
+ "eval_samples_per_second": 15.189,
1913
+ "eval_steps_per_second": 3.797,
1914
+ "step": 381
1915
+ },
1916
+ {
1917
+ "epoch": 5.1891891891891895,
1918
+ "grad_norm": 2.2746946811676025,
1919
+ "learning_rate": 2.799582019574033e-05,
1920
+ "loss": 0.1603,
1921
+ "step": 384
1922
+ },
1923
+ {
1924
+ "epoch": 5.1891891891891895,
1925
+ "eval_loss": 0.7134541273117065,
1926
+ "eval_runtime": 1.0519,
1927
+ "eval_samples_per_second": 15.211,
1928
+ "eval_steps_per_second": 3.803,
1929
+ "step": 384
1930
+ },
1931
+ {
1932
+ "epoch": 5.22972972972973,
1933
+ "grad_norm": 1.2550048828125,
1934
+ "learning_rate": 2.7276594491659525e-05,
1935
+ "loss": 0.1379,
1936
+ "step": 387
1937
+ },
1938
+ {
1939
+ "epoch": 5.22972972972973,
1940
+ "eval_loss": 0.7095359563827515,
1941
+ "eval_runtime": 1.0543,
1942
+ "eval_samples_per_second": 15.176,
1943
+ "eval_steps_per_second": 3.794,
1944
+ "step": 387
1945
+ },
1946
+ {
1947
+ "epoch": 5.27027027027027,
1948
+ "grad_norm": 1.7738205194473267,
1949
+ "learning_rate": 2.656324653716884e-05,
1950
+ "loss": 0.2783,
1951
+ "step": 390
1952
+ },
1953
+ {
1954
+ "epoch": 5.27027027027027,
1955
+ "eval_loss": 0.7103461623191833,
1956
+ "eval_runtime": 1.0515,
1957
+ "eval_samples_per_second": 15.216,
1958
+ "eval_steps_per_second": 3.804,
1959
+ "step": 390
1960
+ },
1961
+ {
1962
+ "epoch": 5.3108108108108105,
1963
+ "grad_norm": 2.2887580394744873,
1964
+ "learning_rate": 2.5855960850413935e-05,
1965
+ "loss": 0.1575,
1966
+ "step": 393
1967
+ },
1968
+ {
1969
+ "epoch": 5.3108108108108105,
1970
+ "eval_loss": 0.7042403817176819,
1971
+ "eval_runtime": 1.0523,
1972
+ "eval_samples_per_second": 15.204,
1973
+ "eval_steps_per_second": 3.801,
1974
+ "step": 393
1975
+ },
1976
+ {
1977
+ "epoch": 5.351351351351352,
1978
+ "grad_norm": 2.6281135082244873,
1979
+ "learning_rate": 2.5154920381444025e-05,
1980
+ "loss": 0.1743,
1981
+ "step": 396
1982
+ },
1983
+ {
1984
+ "epoch": 5.351351351351352,
1985
+ "eval_loss": 0.7114053964614868,
1986
+ "eval_runtime": 1.0527,
1987
+ "eval_samples_per_second": 15.199,
1988
+ "eval_steps_per_second": 3.8,
1989
+ "step": 396
1990
+ },
1991
+ {
1992
+ "epoch": 5.391891891891892,
1993
+ "grad_norm": 1.8125991821289062,
1994
+ "learning_rate": 2.4460306464889022e-05,
1995
+ "loss": 0.1168,
1996
+ "step": 399
1997
+ },
1998
+ {
1999
+ "epoch": 5.391891891891892,
2000
+ "eval_loss": 0.7083012461662292,
2001
+ "eval_runtime": 1.0506,
2002
+ "eval_samples_per_second": 15.23,
2003
+ "eval_steps_per_second": 3.807,
2004
+ "step": 399
2005
+ },
2006
+ {
2007
+ "epoch": 5.4324324324324325,
2008
+ "grad_norm": 2.5157058238983154,
2009
+ "learning_rate": 2.3772298773054757e-05,
2010
+ "loss": 0.284,
2011
+ "step": 402
2012
+ },
2013
+ {
2014
+ "epoch": 5.4324324324324325,
2015
+ "eval_loss": 0.7072416543960571,
2016
+ "eval_runtime": 1.0524,
2017
+ "eval_samples_per_second": 15.204,
2018
+ "eval_steps_per_second": 3.801,
2019
+ "step": 402
2020
+ },
2021
+ {
2022
+ "epoch": 5.472972972972973,
2023
+ "grad_norm": 0.8739199042320251,
2024
+ "learning_rate": 2.309107526944792e-05,
2025
+ "loss": 0.1013,
2026
+ "step": 405
2027
+ },
2028
+ {
2029
+ "epoch": 5.472972972972973,
2030
+ "eval_loss": 0.7062889933586121,
2031
+ "eval_runtime": 1.051,
2032
+ "eval_samples_per_second": 15.223,
2033
+ "eval_steps_per_second": 3.806,
2034
+ "step": 405
2035
+ },
2036
+ {
2037
+ "epoch": 5.513513513513513,
2038
+ "grad_norm": 2.2809295654296875,
2039
+ "learning_rate": 2.2416812162743223e-05,
2040
+ "loss": 0.2612,
2041
+ "step": 408
2042
+ },
2043
+ {
2044
+ "epoch": 5.513513513513513,
2045
+ "eval_loss": 0.70506751537323,
2046
+ "eval_runtime": 1.053,
2047
+ "eval_samples_per_second": 15.195,
2048
+ "eval_steps_per_second": 3.799,
2049
+ "step": 408
2050
+ },
2051
+ {
2052
+ "epoch": 5.554054054054054,
2053
+ "grad_norm": 2.2030365467071533,
2054
+ "learning_rate": 2.17496838612043e-05,
2055
+ "loss": 0.1343,
2056
+ "step": 411
2057
+ },
2058
+ {
2059
+ "epoch": 5.554054054054054,
2060
+ "eval_loss": 0.7102519273757935,
2061
+ "eval_runtime": 1.0534,
2062
+ "eval_samples_per_second": 15.188,
2063
+ "eval_steps_per_second": 3.797,
2064
+ "step": 411
2065
+ },
2066
+ {
2067
+ "epoch": 5.594594594594595,
2068
+ "grad_norm": 1.4592159986495972,
2069
+ "learning_rate": 2.1089862927570475e-05,
2070
+ "loss": 0.1009,
2071
+ "step": 414
2072
+ },
2073
+ {
2074
+ "epoch": 5.594594594594595,
2075
+ "eval_loss": 0.7105306386947632,
2076
+ "eval_runtime": 1.0533,
2077
+ "eval_samples_per_second": 15.19,
2078
+ "eval_steps_per_second": 3.797,
2079
+ "step": 414
2080
+ },
2081
+ {
2082
+ "epoch": 5.635135135135135,
2083
+ "grad_norm": 2.2018954753875732,
2084
+ "learning_rate": 2.0437520034420776e-05,
2085
+ "loss": 0.3127,
2086
+ "step": 417
2087
+ },
2088
+ {
2089
+ "epoch": 5.635135135135135,
2090
+ "eval_loss": 0.7089606523513794,
2091
+ "eval_runtime": 1.0533,
2092
+ "eval_samples_per_second": 15.191,
2093
+ "eval_steps_per_second": 3.798,
2094
+ "step": 417
2095
+ },
2096
+ {
2097
+ "epoch": 5.675675675675675,
2098
+ "grad_norm": 1.8359624147415161,
2099
+ "learning_rate": 1.979282392002691e-05,
2100
+ "loss": 0.1355,
2101
+ "step": 420
2102
+ },
2103
+ {
2104
+ "epoch": 5.675675675675675,
2105
+ "eval_loss": 0.7059516906738281,
2106
+ "eval_runtime": 1.0526,
2107
+ "eval_samples_per_second": 15.201,
2108
+ "eval_steps_per_second": 3.8,
2109
+ "step": 420
2110
+ },
2111
+ {
2112
+ "epoch": 5.716216216216216,
2113
+ "grad_norm": 2.3145079612731934,
2114
+ "learning_rate": 1.9155941344706546e-05,
2115
+ "loss": 0.1345,
2116
+ "step": 423
2117
+ },
2118
+ {
2119
+ "epoch": 5.716216216216216,
2120
+ "eval_loss": 0.705683171749115,
2121
+ "eval_runtime": 1.0519,
2122
+ "eval_samples_per_second": 15.21,
2123
+ "eval_steps_per_second": 3.802,
2124
+ "step": 423
2125
+ },
2126
+ {
2127
+ "epoch": 5.756756756756757,
2128
+ "grad_norm": 1.7434961795806885,
2129
+ "learning_rate": 1.852703704768842e-05,
2130
+ "loss": 0.1865,
2131
+ "step": 426
2132
+ },
2133
+ {
2134
+ "epoch": 5.756756756756757,
2135
+ "eval_loss": 0.7038547396659851,
2136
+ "eval_runtime": 1.0535,
2137
+ "eval_samples_per_second": 15.188,
2138
+ "eval_steps_per_second": 3.797,
2139
+ "step": 426
2140
+ },
2141
+ {
2142
+ "epoch": 5.797297297297297,
2143
+ "grad_norm": 1.5850327014923096,
2144
+ "learning_rate": 1.7906273704499845e-05,
2145
+ "loss": 0.119,
2146
+ "step": 429
2147
+ },
2148
+ {
2149
+ "epoch": 5.797297297297297,
2150
+ "eval_loss": 0.7066537737846375,
2151
+ "eval_runtime": 1.0521,
2152
+ "eval_samples_per_second": 15.208,
2153
+ "eval_steps_per_second": 3.802,
2154
+ "step": 429
2155
+ },
2156
+ {
2157
+ "epoch": 5.837837837837838,
2158
+ "grad_norm": 1.599552035331726,
2159
+ "learning_rate": 1.7293811884888344e-05,
2160
+ "loss": 0.149,
2161
+ "step": 432
2162
+ },
2163
+ {
2164
+ "epoch": 5.837837837837838,
2165
+ "eval_loss": 0.7120293974876404,
2166
+ "eval_runtime": 1.0536,
2167
+ "eval_samples_per_second": 15.185,
2168
+ "eval_steps_per_second": 3.796,
2169
+ "step": 432
2170
+ },
2171
+ {
2172
+ "epoch": 5.878378378378378,
2173
+ "grad_norm": 1.8353303670883179,
2174
+ "learning_rate": 1.6689810011287932e-05,
2175
+ "loss": 0.1748,
2176
+ "step": 435
2177
+ },
2178
+ {
2179
+ "epoch": 5.878378378378378,
2180
+ "eval_loss": 0.7123138308525085,
2181
+ "eval_runtime": 1.0524,
2182
+ "eval_samples_per_second": 15.203,
2183
+ "eval_steps_per_second": 3.801,
2184
+ "step": 435
2185
+ },
2186
+ {
2187
+ "epoch": 5.918918918918919,
2188
+ "grad_norm": 1.4937026500701904,
2189
+ "learning_rate": 1.6094424317840723e-05,
2190
+ "loss": 0.1781,
2191
+ "step": 438
2192
+ },
2193
+ {
2194
+ "epoch": 5.918918918918919,
2195
+ "eval_loss": 0.7113088965415955,
2196
+ "eval_runtime": 1.0528,
2197
+ "eval_samples_per_second": 15.198,
2198
+ "eval_steps_per_second": 3.799,
2199
+ "step": 438
2200
+ },
2201
+ {
2202
+ "epoch": 5.95945945945946,
2203
+ "grad_norm": 2.0092716217041016,
2204
+ "learning_rate": 1.550780880998456e-05,
2205
+ "loss": 0.2075,
2206
+ "step": 441
2207
+ },
2208
+ {
2209
+ "epoch": 5.95945945945946,
2210
+ "eval_loss": 0.7117879390716553,
2211
+ "eval_runtime": 1.0532,
2212
+ "eval_samples_per_second": 15.192,
2213
+ "eval_steps_per_second": 3.798,
2214
+ "step": 441
2215
+ },
2216
+ {
2217
+ "epoch": 6.0,
2218
+ "grad_norm": 2.762338161468506,
2219
+ "learning_rate": 1.4930115224617353e-05,
2220
+ "loss": 0.1591,
2221
+ "step": 444
2222
+ },
2223
+ {
2224
+ "epoch": 6.0,
2225
+ "eval_loss": 0.7111848592758179,
2226
+ "eval_runtime": 1.0522,
2227
+ "eval_samples_per_second": 15.206,
2228
+ "eval_steps_per_second": 3.801,
2229
+ "step": 444
2230
+ },
2231
+ {
2232
+ "epoch": 6.04054054054054,
2233
+ "grad_norm": 1.825244665145874,
2234
+ "learning_rate": 1.436149299084789e-05,
2235
+ "loss": 0.1224,
2236
+ "step": 447
2237
+ },
2238
+ {
2239
+ "epoch": 6.04054054054054,
2240
+ "eval_loss": 0.7117843627929688,
2241
+ "eval_runtime": 1.0529,
2242
+ "eval_samples_per_second": 15.195,
2243
+ "eval_steps_per_second": 3.799,
2244
+ "step": 447
2245
+ },
2246
+ {
2247
+ "epoch": 6.081081081081081,
2248
+ "grad_norm": 0.9274085760116577,
2249
+ "learning_rate": 1.380208919134392e-05,
2250
+ "loss": 0.2234,
2251
+ "step": 450
2252
+ },
2253
+ {
2254
+ "epoch": 6.081081081081081,
2255
+ "eval_loss": 0.7170644402503967,
2256
+ "eval_runtime": 1.0513,
2257
+ "eval_samples_per_second": 15.219,
2258
+ "eval_steps_per_second": 3.805,
2259
+ "step": 450
2260
+ },
2261
+ {
2262
+ "epoch": 6.121621621621622,
2263
+ "grad_norm": 1.5220532417297363,
2264
+ "learning_rate": 1.3252048524286842e-05,
2265
+ "loss": 0.1165,
2266
+ "step": 453
2267
+ },
2268
+ {
2269
+ "epoch": 6.121621621621622,
2270
+ "eval_loss": 0.7227377891540527,
2271
+ "eval_runtime": 1.0532,
2272
+ "eval_samples_per_second": 15.191,
2273
+ "eval_steps_per_second": 3.798,
2274
+ "step": 453
2275
+ },
2276
+ {
2277
+ "epoch": 6.162162162162162,
2278
+ "grad_norm": 1.669662594795227,
2279
+ "learning_rate": 1.271151326594352e-05,
2280
+ "loss": 0.2518,
2281
+ "step": 456
2282
+ },
2283
+ {
2284
+ "epoch": 6.162162162162162,
2285
+ "eval_loss": 0.7325636148452759,
2286
+ "eval_runtime": 1.0523,
2287
+ "eval_samples_per_second": 15.205,
2288
+ "eval_steps_per_second": 3.801,
2289
+ "step": 456
2290
+ },
2291
+ {
2292
+ "epoch": 6.202702702702703,
2293
+ "grad_norm": 1.6538748741149902,
2294
+ "learning_rate": 1.2180623233864253e-05,
2295
+ "loss": 0.1288,
2296
+ "step": 459
2297
+ },
2298
+ {
2299
+ "epoch": 6.202702702702703,
2300
+ "eval_loss": 0.7430564165115356,
2301
+ "eval_runtime": 1.0597,
2302
+ "eval_samples_per_second": 15.099,
2303
+ "eval_steps_per_second": 3.775,
2304
+ "step": 459
2305
+ },
2306
+ {
2307
+ "epoch": 6.243243243243243,
2308
+ "grad_norm": 1.5836577415466309,
2309
+ "learning_rate": 1.1659515750716955e-05,
2310
+ "loss": 0.1176,
2311
+ "step": 462
2312
+ },
2313
+ {
2314
+ "epoch": 6.243243243243243,
2315
+ "eval_loss": 0.7481391429901123,
2316
+ "eval_runtime": 1.0512,
2317
+ "eval_samples_per_second": 15.221,
2318
+ "eval_steps_per_second": 3.805,
2319
+ "step": 462
2320
+ },
2321
+ {
2322
+ "epoch": 6.283783783783784,
2323
+ "grad_norm": 1.0982418060302734,
2324
+ "learning_rate": 1.1148325608766585e-05,
2325
+ "loss": 0.1231,
2326
+ "step": 465
2327
+ },
2328
+ {
2329
+ "epoch": 6.283783783783784,
2330
+ "eval_loss": 0.7511347532272339,
2331
+ "eval_runtime": 1.0552,
2332
+ "eval_samples_per_second": 15.163,
2333
+ "eval_steps_per_second": 3.791,
2334
+ "step": 465
2335
+ },
2336
+ {
2337
+ "epoch": 6.324324324324325,
2338
+ "grad_norm": 1.9232176542282104,
2339
+ "learning_rate": 1.0647185035009038e-05,
2340
+ "loss": 0.146,
2341
+ "step": 468
2342
+ },
2343
+ {
2344
+ "epoch": 6.324324324324325,
2345
+ "eval_loss": 0.7529792785644531,
2346
+ "eval_runtime": 1.0535,
2347
+ "eval_samples_per_second": 15.188,
2348
+ "eval_steps_per_second": 3.797,
2349
+ "step": 468
2350
+ },
2351
+ {
2352
+ "epoch": 6.364864864864865,
2353
+ "grad_norm": 2.5786333084106445,
2354
+ "learning_rate": 1.0156223656968694e-05,
2355
+ "loss": 0.1169,
2356
+ "step": 471
2357
+ },
2358
+ {
2359
+ "epoch": 6.364864864864865,
2360
+ "eval_loss": 0.7518468499183655,
2361
+ "eval_runtime": 1.0523,
2362
+ "eval_samples_per_second": 15.205,
2363
+ "eval_steps_per_second": 3.801,
2364
+ "step": 471
2365
+ },
2366
+ {
2367
+ "epoch": 6.405405405405405,
2368
+ "grad_norm": 1.4718759059906006,
2369
+ "learning_rate": 9.675568469168388e-06,
2370
+ "loss": 0.1048,
2371
+ "step": 474
2372
+ },
2373
+ {
2374
+ "epoch": 6.405405405405405,
2375
+ "eval_loss": 0.7540909051895142,
2376
+ "eval_runtime": 1.049,
2377
+ "eval_samples_per_second": 15.253,
2378
+ "eval_steps_per_second": 3.813,
2379
+ "step": 474
2380
+ },
2381
+ {
2382
+ "epoch": 6.445945945945946,
2383
+ "grad_norm": 1.3492368459701538,
2384
+ "learning_rate": 9.205343800280219e-06,
2385
+ "loss": 0.1092,
2386
+ "step": 477
2387
+ },
2388
+ {
2389
+ "epoch": 6.445945945945946,
2390
+ "eval_loss": 0.750686764717102,
2391
+ "eval_runtime": 1.0533,
2392
+ "eval_samples_per_second": 15.19,
2393
+ "eval_steps_per_second": 3.798,
2394
+ "step": 477
2395
+ },
2396
+ {
2397
+ "epoch": 6.486486486486487,
2398
+ "grad_norm": 2.10587739944458,
2399
+ "learning_rate": 8.745671280966177e-06,
2400
+ "loss": 0.1458,
2401
+ "step": 480
2402
+ },
2403
+ {
2404
+ "epoch": 6.486486486486487,
2405
+ "eval_loss": 0.7518497705459595,
2406
+ "eval_runtime": 1.0499,
2407
+ "eval_samples_per_second": 15.239,
2408
+ "eval_steps_per_second": 3.81,
2409
+ "step": 480
2410
+ },
2411
+ {
2412
+ "epoch": 6.527027027027027,
2413
+ "grad_norm": 0.8871177434921265,
2414
+ "learning_rate": 8.296669812416547e-06,
2415
+ "loss": 0.2177,
2416
+ "step": 483
2417
+ },
2418
+ {
2419
+ "epoch": 6.527027027027027,
2420
+ "eval_loss": 0.7509324550628662,
2421
+ "eval_runtime": 1.0528,
2422
+ "eval_samples_per_second": 15.198,
2423
+ "eval_steps_per_second": 3.8,
2424
+ "step": 483
2425
+ },
2426
+ {
2427
+ "epoch": 6.5675675675675675,
2428
+ "grad_norm": 1.299116611480713,
2429
+ "learning_rate": 7.858455535594306e-06,
2430
+ "loss": 0.1585,
2431
+ "step": 486
2432
+ },
2433
+ {
2434
+ "epoch": 6.5675675675675675,
2435
+ "eval_loss": 0.7509753108024597,
2436
+ "eval_runtime": 1.0507,
2437
+ "eval_samples_per_second": 15.228,
2438
+ "eval_steps_per_second": 3.807,
2439
+ "step": 486
2440
+ },
2441
+ {
2442
+ "epoch": 6.608108108108108,
2443
+ "grad_norm": 1.8996071815490723,
2444
+ "learning_rate": 7.431141801193508e-06,
2445
+ "loss": 0.1337,
2446
+ "step": 489
2447
+ },
2448
+ {
2449
+ "epoch": 6.608108108108108,
2450
+ "eval_loss": 0.7546273469924927,
2451
+ "eval_runtime": 1.0538,
2452
+ "eval_samples_per_second": 15.183,
2453
+ "eval_steps_per_second": 3.796,
2454
+ "step": 489
2455
+ },
2456
+ {
2457
+ "epoch": 6.648648648648649,
2458
+ "grad_norm": 2.193199634552002,
2459
+ "learning_rate": 7.014839140319485e-06,
2460
+ "loss": 0.122,
2461
+ "step": 492
2462
+ },
2463
+ {
2464
+ "epoch": 6.648648648648649,
2465
+ "eval_loss": 0.7523775100708008,
2466
+ "eval_runtime": 1.0517,
2467
+ "eval_samples_per_second": 15.213,
2468
+ "eval_steps_per_second": 3.803,
2469
+ "step": 492
2470
+ },
2471
+ {
2472
+ "epoch": 6.6891891891891895,
2473
+ "grad_norm": 1.310517430305481,
2474
+ "learning_rate": 6.609655235898227e-06,
2475
+ "loss": 0.0793,
2476
+ "step": 495
2477
+ },
2478
+ {
2479
+ "epoch": 6.6891891891891895,
2480
+ "eval_loss": 0.7553800344467163,
2481
+ "eval_runtime": 1.0524,
2482
+ "eval_samples_per_second": 15.203,
2483
+ "eval_steps_per_second": 3.801,
2484
+ "step": 495
2485
+ },
2486
+ {
2487
+ "epoch": 6.72972972972973,
2488
+ "grad_norm": 1.7615861892700195,
2489
+ "learning_rate": 6.215694894822699e-06,
2490
+ "loss": 0.1544,
2491
+ "step": 498
2492
+ },
2493
+ {
2494
+ "epoch": 6.72972972972973,
2495
+ "eval_loss": 0.7521288394927979,
2496
+ "eval_runtime": 1.0505,
2497
+ "eval_samples_per_second": 15.231,
2498
+ "eval_steps_per_second": 3.808,
2499
+ "step": 498
2500
+ },
2501
+ {
2502
+ "epoch": 6.77027027027027,
2503
+ "grad_norm": 1.4952490329742432,
2504
+ "learning_rate": 5.83306002084284e-06,
2505
+ "loss": 0.1387,
2506
+ "step": 501
2507
+ },
2508
+ {
2509
+ "epoch": 6.77027027027027,
2510
+ "eval_loss": 0.7528640627861023,
2511
+ "eval_runtime": 1.052,
2512
+ "eval_samples_per_second": 15.209,
2513
+ "eval_steps_per_second": 3.802,
2514
+ "step": 501
2515
+ },
2516
+ {
2517
+ "epoch": 6.8108108108108105,
2518
+ "grad_norm": 1.7409045696258545,
2519
+ "learning_rate": 5.461849588206724e-06,
2520
+ "loss": 0.1253,
2521
+ "step": 504
2522
+ },
2523
+ {
2524
+ "epoch": 6.8108108108108105,
2525
+ "eval_loss": 0.7528926134109497,
2526
+ "eval_runtime": 1.059,
2527
+ "eval_samples_per_second": 15.108,
2528
+ "eval_steps_per_second": 3.777,
2529
+ "step": 504
2530
+ },
2531
+ {
2532
+ "epoch": 6.851351351351351,
2533
+ "grad_norm": 0.7362686395645142,
2534
+ "learning_rate": 5.102159616059365e-06,
2535
+ "loss": 0.1296,
2536
+ "step": 507
2537
+ },
2538
+ {
2539
+ "epoch": 6.851351351351351,
2540
+ "eval_loss": 0.7542049884796143,
2541
+ "eval_runtime": 1.0521,
2542
+ "eval_samples_per_second": 15.207,
2543
+ "eval_steps_per_second": 3.802,
2544
+ "step": 507
2545
+ },
2546
+ {
2547
+ "epoch": 6.891891891891892,
2548
+ "grad_norm": 0.806505560874939,
2549
+ "learning_rate": 4.754083143605869e-06,
2550
+ "loss": 0.1094,
2551
+ "step": 510
2552
+ },
2553
+ {
2554
+ "epoch": 6.891891891891892,
2555
+ "eval_loss": 0.7515612840652466,
2556
+ "eval_runtime": 1.0559,
2557
+ "eval_samples_per_second": 15.152,
2558
+ "eval_steps_per_second": 3.788,
2559
+ "step": 510
2560
+ },
2561
+ {
2562
+ "epoch": 6.9324324324324325,
2563
+ "grad_norm": 1.5709373950958252,
2564
+ "learning_rate": 4.417710206045533e-06,
2565
+ "loss": 0.1009,
2566
+ "step": 513
2567
+ },
2568
+ {
2569
+ "epoch": 6.9324324324324325,
2570
+ "eval_loss": 0.751240611076355,
2571
+ "eval_runtime": 1.0523,
2572
+ "eval_samples_per_second": 15.205,
2573
+ "eval_steps_per_second": 3.801,
2574
+ "step": 513
2575
+ },
2576
+ {
2577
+ "epoch": 6.972972972972973,
2578
+ "grad_norm": 1.2641761302947998,
2579
+ "learning_rate": 4.093127811282821e-06,
2580
+ "loss": 0.1871,
2581
+ "step": 516
2582
+ },
2583
+ {
2584
+ "epoch": 6.972972972972973,
2585
+ "eval_loss": 0.7525576949119568,
2586
+ "eval_runtime": 1.0539,
2587
+ "eval_samples_per_second": 15.181,
2588
+ "eval_steps_per_second": 3.795,
2589
+ "step": 516
2590
+ },
2591
+ {
2592
+ "epoch": 7.013513513513513,
2593
+ "grad_norm": 0.9734938144683838,
2594
+ "learning_rate": 3.7804199174215183e-06,
2595
+ "loss": 0.1017,
2596
+ "step": 519
2597
+ },
2598
+ {
2599
+ "epoch": 7.013513513513513,
2600
+ "eval_loss": 0.7537960410118103,
2601
+ "eval_runtime": 1.0511,
2602
+ "eval_samples_per_second": 15.222,
2603
+ "eval_steps_per_second": 3.805,
2604
+ "step": 519
2605
+ },
2606
+ {
2607
+ "epoch": 7.054054054054054,
2608
+ "grad_norm": 1.4745818376541138,
2609
+ "learning_rate": 3.479667411047677e-06,
2610
+ "loss": 0.1536,
2611
+ "step": 522
2612
+ },
2613
+ {
2614
+ "epoch": 7.054054054054054,
2615
+ "eval_loss": 0.7529079914093018,
2616
+ "eval_runtime": 1.0543,
2617
+ "eval_samples_per_second": 15.176,
2618
+ "eval_steps_per_second": 3.794,
2619
+ "step": 522
2620
+ },
2621
+ {
2622
+ "epoch": 7.094594594594595,
2623
+ "grad_norm": 1.0725492238998413,
2624
+ "learning_rate": 3.1909480863070884e-06,
2625
+ "loss": 0.0886,
2626
+ "step": 525
2627
+ },
2628
+ {
2629
+ "epoch": 7.094594594594595,
2630
+ "eval_loss": 0.7565038204193115,
2631
+ "eval_runtime": 1.0511,
2632
+ "eval_samples_per_second": 15.222,
2633
+ "eval_steps_per_second": 3.806,
2634
+ "step": 525
2635
+ },
2636
+ {
2637
+ "epoch": 7.135135135135135,
2638
+ "grad_norm": 1.1345540285110474,
2639
+ "learning_rate": 2.9143366247826598e-06,
2640
+ "loss": 0.0983,
2641
+ "step": 528
2642
+ },
2643
+ {
2644
+ "epoch": 7.135135135135135,
2645
+ "eval_loss": 0.7576066255569458,
2646
+ "eval_runtime": 1.0528,
2647
+ "eval_samples_per_second": 15.198,
2648
+ "eval_steps_per_second": 3.799,
2649
+ "step": 528
2650
+ },
2651
+ {
2652
+ "epoch": 7.175675675675675,
2653
+ "grad_norm": 1.122189998626709,
2654
+ "learning_rate": 2.6499045761769315e-06,
2655
+ "loss": 0.084,
2656
+ "step": 531
2657
+ },
2658
+ {
2659
+ "epoch": 7.175675675675675,
2660
+ "eval_loss": 0.758578896522522,
2661
+ "eval_runtime": 1.0508,
2662
+ "eval_samples_per_second": 15.227,
2663
+ "eval_steps_per_second": 3.807,
2664
+ "step": 531
2665
+ },
2666
+ {
2667
+ "epoch": 7.216216216216216,
2668
+ "grad_norm": 1.6193064451217651,
2669
+ "learning_rate": 2.397720339804649e-06,
2670
+ "loss": 0.099,
2671
+ "step": 534
2672
+ },
2673
+ {
2674
+ "epoch": 7.216216216216216,
2675
+ "eval_loss": 0.7563527822494507,
2676
+ "eval_runtime": 1.0563,
2677
+ "eval_samples_per_second": 15.147,
2678
+ "eval_steps_per_second": 3.787,
2679
+ "step": 534
2680
+ },
2681
+ {
2682
+ "epoch": 7.256756756756757,
2683
+ "grad_norm": 1.373356580734253,
2684
+ "learning_rate": 2.1578491469002373e-06,
2685
+ "loss": 0.1089,
2686
+ "step": 537
2687
+ },
2688
+ {
2689
+ "epoch": 7.256756756756757,
2690
+ "eval_loss": 0.7592064142227173,
2691
+ "eval_runtime": 1.0528,
2692
+ "eval_samples_per_second": 15.197,
2693
+ "eval_steps_per_second": 3.799,
2694
+ "step": 537
2695
+ },
2696
+ {
2697
+ "epoch": 7.297297297297297,
2698
+ "grad_norm": 1.1875869035720825,
2699
+ "learning_rate": 1.9303530437448035e-06,
2700
+ "loss": 0.1145,
2701
+ "step": 540
2702
+ },
2703
+ {
2704
+ "epoch": 7.297297297297297,
2705
+ "eval_loss": 0.7611518502235413,
2706
+ "eval_runtime": 1.0529,
2707
+ "eval_samples_per_second": 15.196,
2708
+ "eval_steps_per_second": 3.799,
2709
+ "step": 540
2710
+ },
2711
+ {
2712
+ "epoch": 7.337837837837838,
2713
+ "grad_norm": 1.8787821531295776,
2714
+ "learning_rate": 1.7152908756169262e-06,
2715
+ "loss": 0.1823,
2716
+ "step": 543
2717
+ },
2718
+ {
2719
+ "epoch": 7.337837837837838,
2720
+ "eval_loss": 0.7614726424217224,
2721
+ "eval_runtime": 1.0548,
2722
+ "eval_samples_per_second": 15.168,
2723
+ "eval_steps_per_second": 3.792,
2724
+ "step": 543
2725
+ },
2726
+ {
2727
+ "epoch": 7.378378378378378,
2728
+ "grad_norm": 1.9469506740570068,
2729
+ "learning_rate": 1.5127182715714006e-06,
2730
+ "loss": 0.2784,
2731
+ "step": 546
2732
+ },
2733
+ {
2734
+ "epoch": 7.378378378378378,
2735
+ "eval_loss": 0.7602246999740601,
2736
+ "eval_runtime": 1.053,
2737
+ "eval_samples_per_second": 15.194,
2738
+ "eval_steps_per_second": 3.799,
2739
+ "step": 546
2740
+ },
2741
+ {
2742
+ "epoch": 7.418918918918919,
2743
+ "grad_norm": 1.6328327655792236,
2744
+ "learning_rate": 1.3226876300500123e-06,
2745
+ "loss": 0.0887,
2746
+ "step": 549
2747
+ },
2748
+ {
2749
+ "epoch": 7.418918918918919,
2750
+ "eval_loss": 0.7616763114929199,
2751
+ "eval_runtime": 1.0504,
2752
+ "eval_samples_per_second": 15.232,
2753
+ "eval_steps_per_second": 3.808,
2754
+ "step": 549
2755
+ },
2756
+ {
2757
+ "epoch": 7.45945945945946,
2758
+ "grad_norm": 1.5713064670562744,
2759
+ "learning_rate": 1.1452481053278396e-06,
2760
+ "loss": 0.1133,
2761
+ "step": 552
2762
+ },
2763
+ {
2764
+ "epoch": 7.45945945945946,
2765
+ "eval_loss": 0.7640103101730347,
2766
+ "eval_runtime": 1.053,
2767
+ "eval_samples_per_second": 15.195,
2768
+ "eval_steps_per_second": 3.799,
2769
+ "step": 552
2770
+ },
2771
+ {
2772
+ "epoch": 7.5,
2773
+ "grad_norm": 1.5901539325714111,
2774
+ "learning_rate": 9.804455947988067e-07,
2775
+ "loss": 0.1207,
2776
+ "step": 555
2777
+ },
2778
+ {
2779
+ "epoch": 7.5,
2780
+ "eval_loss": 0.7629836797714233,
2781
+ "eval_runtime": 1.0516,
2782
+ "eval_samples_per_second": 15.216,
2783
+ "eval_steps_per_second": 3.804,
2784
+ "step": 555
2785
+ },
2786
+ {
2787
+ "epoch": 7.54054054054054,
2788
+ "grad_norm": 1.5648808479309082,
2789
+ "learning_rate": 8.283227271035976e-07,
2790
+ "loss": 0.0954,
2791
+ "step": 558
2792
+ },
2793
+ {
2794
+ "epoch": 7.54054054054054,
2795
+ "eval_loss": 0.7643275260925293,
2796
+ "eval_runtime": 1.0548,
2797
+ "eval_samples_per_second": 15.169,
2798
+ "eval_steps_per_second": 3.792,
2799
+ "step": 558
2800
+ },
2801
+ {
2802
+ "epoch": 7.581081081081081,
2803
+ "grad_norm": 1.6403340101242065,
2804
+ "learning_rate": 6.889188511031542e-07,
2805
+ "loss": 0.1135,
2806
+ "step": 561
2807
+ },
2808
+ {
2809
+ "epoch": 7.581081081081081,
2810
+ "eval_loss": 0.7628697156906128,
2811
+ "eval_runtime": 1.0531,
2812
+ "eval_samples_per_second": 15.194,
2813
+ "eval_steps_per_second": 3.798,
2814
+ "step": 561
2815
+ },
2816
+ {
2817
+ "epoch": 7.621621621621622,
2818
+ "grad_norm": 1.393983244895935,
2819
+ "learning_rate": 5.622700257004676e-07,
2820
+ "loss": 0.096,
2821
+ "step": 564
2822
+ },
2823
+ {
2824
+ "epoch": 7.621621621621622,
2825
+ "eval_loss": 0.7637063264846802,
2826
+ "eval_runtime": 1.0544,
2827
+ "eval_samples_per_second": 15.174,
2828
+ "eval_steps_per_second": 3.793,
2829
+ "step": 564
2830
+ },
2831
+ {
2832
+ "epoch": 7.662162162162162,
2833
+ "grad_norm": 1.2016361951828003,
2834
+ "learning_rate": 4.484090105134231e-07,
2835
+ "loss": 0.1088,
2836
+ "step": 567
2837
+ },
2838
+ {
2839
+ "epoch": 7.662162162162162,
2840
+ "eval_loss": 0.7655338048934937,
2841
+ "eval_runtime": 1.0534,
2842
+ "eval_samples_per_second": 15.189,
2843
+ "eval_steps_per_second": 3.797,
2844
+ "step": 567
2845
+ },
2846
+ {
2847
+ "epoch": 7.702702702702703,
2848
+ "grad_norm": 1.1388864517211914,
2849
+ "learning_rate": 3.4736525740104444e-07,
2850
+ "loss": 0.1628,
2851
+ "step": 570
2852
+ },
2853
+ {
2854
+ "epoch": 7.702702702702703,
2855
+ "eval_loss": 0.7655097842216492,
2856
+ "eval_runtime": 1.053,
2857
+ "eval_samples_per_second": 15.195,
2858
+ "eval_steps_per_second": 3.799,
2859
+ "step": 570
2860
+ },
2861
+ {
2862
+ "epoch": 7.743243243243243,
2863
+ "grad_norm": 1.9650497436523438,
2864
+ "learning_rate": 2.591649028453047e-07,
2865
+ "loss": 0.1431,
2866
+ "step": 573
2867
+ },
2868
+ {
2869
+ "epoch": 7.743243243243243,
2870
+ "eval_loss": 0.7649960517883301,
2871
+ "eval_runtime": 1.0519,
2872
+ "eval_samples_per_second": 15.211,
2873
+ "eval_steps_per_second": 3.803,
2874
+ "step": 573
2875
+ },
2876
+ {
2877
+ "epoch": 7.783783783783784,
2878
+ "grad_norm": 1.7549225091934204,
2879
+ "learning_rate": 1.8383076119053432e-07,
2880
+ "loss": 0.1034,
2881
+ "step": 576
2882
+ },
2883
+ {
2884
+ "epoch": 7.783783783783784,
2885
+ "eval_loss": 0.763870358467102,
2886
+ "eval_runtime": 1.0529,
2887
+ "eval_samples_per_second": 15.196,
2888
+ "eval_steps_per_second": 3.799,
2889
+ "step": 576
2890
+ },
2891
+ {
2892
+ "epoch": 7.824324324324325,
2893
+ "grad_norm": 1.7549595832824707,
2894
+ "learning_rate": 1.2138231874217475e-07,
2895
+ "loss": 0.181,
2896
+ "step": 579
2897
+ },
2898
+ {
2899
+ "epoch": 7.824324324324325,
2900
+ "eval_loss": 0.7637079358100891,
2901
+ "eval_runtime": 1.0546,
2902
+ "eval_samples_per_second": 15.172,
2903
+ "eval_steps_per_second": 3.793,
2904
+ "step": 579
2905
+ },
2906
+ {
2907
+ "epoch": 7.864864864864865,
2908
+ "grad_norm": 1.3891515731811523,
2909
+ "learning_rate": 7.183572872632715e-08,
2910
+ "loss": 0.062,
2911
+ "step": 582
2912
+ },
2913
+ {
2914
+ "epoch": 7.864864864864865,
2915
+ "eval_loss": 0.7649126052856445,
2916
+ "eval_runtime": 1.0509,
2917
+ "eval_samples_per_second": 15.225,
2918
+ "eval_steps_per_second": 3.806,
2919
+ "step": 582
2920
+ },
2921
+ {
2922
+ "epoch": 7.905405405405405,
2923
+ "grad_norm": 1.0669249296188354,
2924
+ "learning_rate": 3.5203807111489074e-08,
2925
+ "loss": 0.0769,
2926
+ "step": 585
2927
+ },
2928
+ {
2929
+ "epoch": 7.905405405405405,
2930
+ "eval_loss": 0.7653980255126953,
2931
+ "eval_runtime": 1.0536,
2932
+ "eval_samples_per_second": 15.185,
2933
+ "eval_steps_per_second": 3.796,
2934
+ "step": 585
2935
+ },
2936
+ {
2937
+ "epoch": 7.945945945945946,
2938
+ "grad_norm": 2.3302104473114014,
2939
+ "learning_rate": 1.1496029293511789e-08,
2940
+ "loss": 0.1951,
2941
+ "step": 588
2942
+ },
2943
+ {
2944
+ "epoch": 7.945945945945946,
2945
+ "eval_loss": 0.7646524906158447,
2946
+ "eval_runtime": 1.0566,
2947
+ "eval_samples_per_second": 15.143,
2948
+ "eval_steps_per_second": 3.786,
2949
+ "step": 588
2950
+ },
2951
+ {
2952
+ "epoch": 7.986486486486487,
2953
+ "grad_norm": 1.9744952917099,
2954
+ "learning_rate": 7.185276446441958e-10,
2955
+ "loss": 0.1175,
2956
+ "step": 591
2957
+ },
2958
+ {
2959
+ "epoch": 7.986486486486487,
2960
+ "eval_loss": 0.765015721321106,
2961
+ "eval_runtime": 1.0522,
2962
+ "eval_samples_per_second": 15.206,
2963
+ "eval_steps_per_second": 3.801,
2964
+ "step": 591
2965
+ },
2966
+ {
2967
+ "epoch": 8.0,
2968
+ "step": 592,
2969
+ "total_flos": 1188976147968000.0,
2970
+ "train_loss": 0.4161318518926163,
2971
+ "train_runtime": 741.424,
2972
+ "train_samples_per_second": 3.194,
2973
+ "train_steps_per_second": 0.798
2974
+ }
2975
+ ],
2976
+ "logging_steps": 3,
2977
+ "max_steps": 592,
2978
+ "num_input_tokens_seen": 0,
2979
+ "num_train_epochs": 8,
2980
+ "save_steps": 50,
2981
+ "stateful_callbacks": {
2982
+ "TrainerControl": {
2983
+ "args": {
2984
+ "should_epoch_stop": false,
2985
+ "should_evaluate": false,
2986
+ "should_log": false,
2987
+ "should_save": true,
2988
+ "should_training_stop": true
2989
+ },
2990
+ "attributes": {}
2991
+ }
2992
+ },
2993
+ "total_flos": 1188976147968000.0,
2994
+ "train_batch_size": 4,
2995
+ "trial_name": null,
2996
+ "trial_params": null
2997
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5d717faee84e567649c08b8e6fc776fbd52823ae864a482fe19d98642a27aca
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ada886b0a7db7fa910beb2db1b673de82468f97a7444c0403b79df92cd914bcd
3
  size 5496