DeepDream2045 commited on
Commit
4182f3d
·
verified ·
1 Parent(s): 3eebec3

Training in progress, step 25, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "gate_proj",
 
24
  "k_proj",
 
25
  "up_proj",
26
  "v_proj",
27
- "q_proj",
28
- "down_proj",
29
- "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "o_proj",
24
+ "down_proj",
25
  "k_proj",
26
+ "q_proj",
27
  "up_proj",
28
  "v_proj",
29
+ "gate_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c85530c8829c972c803678f4e61f994ba11f32568c95b4ce7d189759a42c21ca
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de2f393ba7d3307f8201765e52ce0a815d5c2a1308f1d79b80c8b0a10eaf7505
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be4c182d0a59789e68ffb1c10dc74475d6f5676656825b0b691f5227f0492fb3
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:355747ebcb892880883f348859a1427ce13bd528006f22bf5e453e46e5c1bf38
3
  size 671466706
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6eb8d5c147ad9f15f58d642bd9a80dcfa788cef5f4c846030459c947252abe11
3
- size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87ed465b8131ade14ccc40a75420ec5291bdb48b0617b0af2e06cae9f1980e34
3
+ size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:691c181ae1aaa6fa73f587c7e924f10984925bc0671fca98e57f83b274ababe8
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:937b3c763de285bf8e7976c73ee06ad859d8bffcb0e083004bb3c438d6bf5d1c
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec17bc49e5b067fb69153fc378af21855884587be985ef4fdf200934460cc8a2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f23b5c8be9c199811b18b21da2310d649f6bb783653318cb5d79b5bdb05a3478
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54eef41e231db63f3f2d49b345aa1a99ba729a652a120d8a8825f26878c98d0f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a2a6911123be9d1d2e369e6d11060a86b30a5641e6b182922c97d232b2ff96e
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,386 +1,203 @@
1
  {
2
- "best_metric": 2.1860828399658203,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 0.017588989292702768,
5
  "eval_steps": 25,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.00035177978585405537,
13
- "grad_norm": 5.452480792999268,
14
  "learning_rate": 5e-05,
15
- "loss": 32.5319,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.00035177978585405537,
20
- "eval_loss": 2.299668550491333,
21
- "eval_runtime": 824.925,
22
- "eval_samples_per_second": 23.215,
23
- "eval_steps_per_second": 2.902,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.0007035595717081107,
28
- "grad_norm": 7.733003616333008,
29
  "learning_rate": 0.0001,
30
  "loss": 33.1713,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.001055339357562166,
35
- "grad_norm": 7.30857515335083,
36
  "learning_rate": 9.989294616193017e-05,
37
- "loss": 32.8704,
38
  "step": 3
39
  },
40
  {
41
  "epoch": 0.0014071191434162215,
42
- "grad_norm": 6.937483310699463,
43
  "learning_rate": 9.957224306869053e-05,
44
- "loss": 33.1679,
45
  "step": 4
46
  },
47
  {
48
  "epoch": 0.0017588989292702769,
49
- "grad_norm": 7.842650890350342,
50
  "learning_rate": 9.903926402016153e-05,
51
- "loss": 33.91,
52
  "step": 5
53
  },
54
  {
55
  "epoch": 0.002110678715124332,
56
- "grad_norm": 9.340250015258789,
57
  "learning_rate": 9.829629131445342e-05,
58
- "loss": 33.5161,
59
  "step": 6
60
  },
61
  {
62
  "epoch": 0.0024624585009783875,
63
- "grad_norm": 9.52157211303711,
64
  "learning_rate": 9.73465064747553e-05,
65
- "loss": 33.634,
66
  "step": 7
67
  },
68
  {
69
  "epoch": 0.002814238286832443,
70
- "grad_norm": 9.34510612487793,
71
  "learning_rate": 9.619397662556435e-05,
72
- "loss": 34.761,
73
  "step": 8
74
  },
75
  {
76
  "epoch": 0.0031660180726864984,
77
- "grad_norm": 13.48401165008545,
78
  "learning_rate": 9.484363707663442e-05,
79
- "loss": 36.0081,
80
  "step": 9
81
  },
82
  {
83
  "epoch": 0.0035177978585405538,
84
- "grad_norm": 15.09841537475586,
85
  "learning_rate": 9.330127018922194e-05,
86
- "loss": 36.3556,
87
  "step": 10
88
  },
89
  {
90
  "epoch": 0.0038695776443946088,
91
- "grad_norm": 17.29768180847168,
92
  "learning_rate": 9.157348061512727e-05,
93
- "loss": 38.7349,
94
  "step": 11
95
  },
96
  {
97
  "epoch": 0.004221357430248664,
98
- "grad_norm": 22.131227493286133,
99
  "learning_rate": 8.966766701456177e-05,
100
- "loss": 41.6837,
101
  "step": 12
102
  },
103
  {
104
  "epoch": 0.00457313721610272,
105
- "grad_norm": 23.215797424316406,
106
  "learning_rate": 8.759199037394887e-05,
107
- "loss": 39.7892,
108
  "step": 13
109
  },
110
  {
111
  "epoch": 0.004924917001956775,
112
- "grad_norm": 9.220294952392578,
113
  "learning_rate": 8.535533905932738e-05,
114
- "loss": 33.2681,
115
  "step": 14
116
  },
117
  {
118
  "epoch": 0.0052766967878108305,
119
- "grad_norm": 9.76252555847168,
120
  "learning_rate": 8.296729075500344e-05,
121
- "loss": 33.4629,
122
  "step": 15
123
  },
124
  {
125
  "epoch": 0.005628476573664886,
126
- "grad_norm": 8.49485969543457,
127
  "learning_rate": 8.043807145043604e-05,
128
- "loss": 33.9832,
129
  "step": 16
130
  },
131
  {
132
  "epoch": 0.005980256359518941,
133
- "grad_norm": 8.919995307922363,
134
  "learning_rate": 7.777851165098012e-05,
135
- "loss": 34.0772,
136
  "step": 17
137
  },
138
  {
139
  "epoch": 0.006332036145372997,
140
- "grad_norm": 10.096283912658691,
141
  "learning_rate": 7.500000000000001e-05,
142
- "loss": 32.312,
143
  "step": 18
144
  },
145
  {
146
  "epoch": 0.006683815931227052,
147
- "grad_norm": 9.981393814086914,
148
  "learning_rate": 7.211443451095007e-05,
149
- "loss": 33.6215,
150
  "step": 19
151
  },
152
  {
153
  "epoch": 0.0070355957170811076,
154
- "grad_norm": 9.682584762573242,
155
  "learning_rate": 6.91341716182545e-05,
156
- "loss": 34.3393,
157
  "step": 20
158
  },
159
  {
160
  "epoch": 0.007387375502935163,
161
- "grad_norm": 10.434945106506348,
162
  "learning_rate": 6.607197326515808e-05,
163
- "loss": 34.8918,
164
  "step": 21
165
  },
166
  {
167
  "epoch": 0.0077391552887892175,
168
- "grad_norm": 11.270380973815918,
169
  "learning_rate": 6.294095225512603e-05,
170
- "loss": 35.4063,
171
  "step": 22
172
  },
173
  {
174
  "epoch": 0.008090935074643273,
175
- "grad_norm": 14.67983341217041,
176
  "learning_rate": 5.9754516100806423e-05,
177
- "loss": 36.4115,
178
  "step": 23
179
  },
180
  {
181
  "epoch": 0.008442714860497328,
182
- "grad_norm": 14.8894681930542,
183
  "learning_rate": 5.6526309611002594e-05,
184
- "loss": 38.8561,
185
  "step": 24
186
  },
187
  {
188
  "epoch": 0.008794494646351384,
189
- "grad_norm": 24.04422378540039,
190
  "learning_rate": 5.327015646150716e-05,
191
- "loss": 45.1928,
192
  "step": 25
193
  },
194
  {
195
  "epoch": 0.008794494646351384,
196
- "eval_loss": 2.192323923110962,
197
- "eval_runtime": 826.0176,
198
- "eval_samples_per_second": 23.185,
199
- "eval_steps_per_second": 2.898,
200
  "step": 25
201
- },
202
- {
203
- "epoch": 0.00914627443220544,
204
- "grad_norm": 3.8151047229766846,
205
- "learning_rate": 5e-05,
206
- "loss": 32.5243,
207
- "step": 26
208
- },
209
- {
210
- "epoch": 0.009498054218059495,
211
- "grad_norm": 3.8212320804595947,
212
- "learning_rate": 4.6729843538492847e-05,
213
- "loss": 31.5323,
214
- "step": 27
215
- },
216
- {
217
- "epoch": 0.00984983400391355,
218
- "grad_norm": 4.514546871185303,
219
- "learning_rate": 4.347369038899744e-05,
220
- "loss": 32.7899,
221
- "step": 28
222
- },
223
- {
224
- "epoch": 0.010201613789767605,
225
- "grad_norm": 5.079270839691162,
226
- "learning_rate": 4.0245483899193595e-05,
227
- "loss": 33.4231,
228
- "step": 29
229
- },
230
- {
231
- "epoch": 0.010553393575621661,
232
- "grad_norm": 5.683069705963135,
233
- "learning_rate": 3.705904774487396e-05,
234
- "loss": 31.9174,
235
- "step": 30
236
- },
237
- {
238
- "epoch": 0.010905173361475716,
239
- "grad_norm": 7.779140949249268,
240
- "learning_rate": 3.392802673484193e-05,
241
- "loss": 32.5951,
242
- "step": 31
243
- },
244
- {
245
- "epoch": 0.011256953147329772,
246
- "grad_norm": 7.834434986114502,
247
- "learning_rate": 3.086582838174551e-05,
248
- "loss": 33.5486,
249
- "step": 32
250
- },
251
- {
252
- "epoch": 0.011608732933183827,
253
- "grad_norm": 8.553311347961426,
254
- "learning_rate": 2.7885565489049946e-05,
255
- "loss": 35.0256,
256
- "step": 33
257
- },
258
- {
259
- "epoch": 0.011960512719037883,
260
- "grad_norm": 10.053312301635742,
261
- "learning_rate": 2.500000000000001e-05,
262
- "loss": 35.1863,
263
- "step": 34
264
- },
265
- {
266
- "epoch": 0.012312292504891938,
267
- "grad_norm": 11.821266174316406,
268
- "learning_rate": 2.2221488349019903e-05,
269
- "loss": 36.9949,
270
- "step": 35
271
- },
272
- {
273
- "epoch": 0.012664072290745993,
274
- "grad_norm": 14.664130210876465,
275
- "learning_rate": 1.9561928549563968e-05,
276
- "loss": 38.2195,
277
- "step": 36
278
- },
279
- {
280
- "epoch": 0.013015852076600049,
281
- "grad_norm": 17.453157424926758,
282
- "learning_rate": 1.703270924499656e-05,
283
- "loss": 39.6056,
284
- "step": 37
285
- },
286
- {
287
- "epoch": 0.013367631862454104,
288
- "grad_norm": 20.678699493408203,
289
- "learning_rate": 1.4644660940672627e-05,
290
- "loss": 39.366,
291
- "step": 38
292
- },
293
- {
294
- "epoch": 0.01371941164830816,
295
- "grad_norm": 3.5132977962493896,
296
- "learning_rate": 1.2408009626051137e-05,
297
- "loss": 33.041,
298
- "step": 39
299
- },
300
- {
301
- "epoch": 0.014071191434162215,
302
- "grad_norm": 7.413121700286865,
303
- "learning_rate": 1.0332332985438248e-05,
304
- "loss": 33.2397,
305
- "step": 40
306
- },
307
- {
308
- "epoch": 0.01442297122001627,
309
- "grad_norm": 4.901597499847412,
310
- "learning_rate": 8.426519384872733e-06,
311
- "loss": 33.1532,
312
- "step": 41
313
- },
314
- {
315
- "epoch": 0.014774751005870326,
316
- "grad_norm": 5.69987678527832,
317
- "learning_rate": 6.698729810778065e-06,
318
- "loss": 31.7657,
319
- "step": 42
320
- },
321
- {
322
- "epoch": 0.015126530791724381,
323
- "grad_norm": 6.1713056564331055,
324
- "learning_rate": 5.156362923365588e-06,
325
- "loss": 32.1028,
326
- "step": 43
327
- },
328
- {
329
- "epoch": 0.015478310577578435,
330
- "grad_norm": 6.70172119140625,
331
- "learning_rate": 3.8060233744356633e-06,
332
- "loss": 34.2542,
333
- "step": 44
334
- },
335
- {
336
- "epoch": 0.01583009036343249,
337
- "grad_norm": 7.707215309143066,
338
- "learning_rate": 2.653493525244721e-06,
339
- "loss": 33.1567,
340
- "step": 45
341
- },
342
- {
343
- "epoch": 0.016181870149286546,
344
- "grad_norm": 8.802955627441406,
345
- "learning_rate": 1.70370868554659e-06,
346
- "loss": 34.1178,
347
- "step": 46
348
- },
349
- {
350
- "epoch": 0.0165336499351406,
351
- "grad_norm": 10.024290084838867,
352
- "learning_rate": 9.607359798384785e-07,
353
- "loss": 35.9152,
354
- "step": 47
355
- },
356
- {
357
- "epoch": 0.016885429720994657,
358
- "grad_norm": 11.615968704223633,
359
- "learning_rate": 4.277569313094809e-07,
360
- "loss": 37.0089,
361
- "step": 48
362
- },
363
- {
364
- "epoch": 0.017237209506848712,
365
- "grad_norm": 14.557236671447754,
366
- "learning_rate": 1.0705383806982606e-07,
367
- "loss": 38.5182,
368
- "step": 49
369
- },
370
- {
371
- "epoch": 0.017588989292702768,
372
- "grad_norm": 21.180112838745117,
373
- "learning_rate": 0.0,
374
- "loss": 45.6105,
375
- "step": 50
376
- },
377
- {
378
- "epoch": 0.017588989292702768,
379
- "eval_loss": 2.1860828399658203,
380
- "eval_runtime": 840.0113,
381
- "eval_samples_per_second": 22.799,
382
- "eval_steps_per_second": 2.85,
383
- "step": 50
384
  }
385
  ],
386
  "logging_steps": 1,
@@ -404,12 +221,12 @@
404
  "should_evaluate": false,
405
  "should_log": false,
406
  "should_save": true,
407
- "should_training_stop": true
408
  },
409
  "attributes": {}
410
  }
411
  },
412
- "total_flos": 5.658021338284032e+17,
413
  "train_batch_size": 2,
414
  "trial_name": null,
415
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.192530632019043,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
+ "epoch": 0.008794494646351384,
5
  "eval_steps": 25,
6
+ "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.00035177978585405537,
13
+ "grad_norm": 5.538167476654053,
14
  "learning_rate": 5e-05,
15
+ "loss": 32.5311,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.00035177978585405537,
20
+ "eval_loss": 2.2996881008148193,
21
+ "eval_runtime": 1602.8053,
22
+ "eval_samples_per_second": 11.948,
23
+ "eval_steps_per_second": 1.494,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.0007035595717081107,
28
+ "grad_norm": 7.6789164543151855,
29
  "learning_rate": 0.0001,
30
  "loss": 33.1713,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.001055339357562166,
35
+ "grad_norm": 7.3845014572143555,
36
  "learning_rate": 9.989294616193017e-05,
37
+ "loss": 32.8712,
38
  "step": 3
39
  },
40
  {
41
  "epoch": 0.0014071191434162215,
42
+ "grad_norm": 6.956945419311523,
43
  "learning_rate": 9.957224306869053e-05,
44
+ "loss": 33.1673,
45
  "step": 4
46
  },
47
  {
48
  "epoch": 0.0017588989292702769,
49
+ "grad_norm": 8.048707008361816,
50
  "learning_rate": 9.903926402016153e-05,
51
+ "loss": 33.9079,
52
  "step": 5
53
  },
54
  {
55
  "epoch": 0.002110678715124332,
56
+ "grad_norm": 9.416152000427246,
57
  "learning_rate": 9.829629131445342e-05,
58
+ "loss": 33.522,
59
  "step": 6
60
  },
61
  {
62
  "epoch": 0.0024624585009783875,
63
+ "grad_norm": 9.693207740783691,
64
  "learning_rate": 9.73465064747553e-05,
65
+ "loss": 33.6303,
66
  "step": 7
67
  },
68
  {
69
  "epoch": 0.002814238286832443,
70
+ "grad_norm": 9.254134178161621,
71
  "learning_rate": 9.619397662556435e-05,
72
+ "loss": 34.7466,
73
  "step": 8
74
  },
75
  {
76
  "epoch": 0.0031660180726864984,
77
+ "grad_norm": 14.359371185302734,
78
  "learning_rate": 9.484363707663442e-05,
79
+ "loss": 36.0151,
80
  "step": 9
81
  },
82
  {
83
  "epoch": 0.0035177978585405538,
84
+ "grad_norm": 15.26530933380127,
85
  "learning_rate": 9.330127018922194e-05,
86
+ "loss": 36.3619,
87
  "step": 10
88
  },
89
  {
90
  "epoch": 0.0038695776443946088,
91
+ "grad_norm": 17.525087356567383,
92
  "learning_rate": 9.157348061512727e-05,
93
+ "loss": 38.7371,
94
  "step": 11
95
  },
96
  {
97
  "epoch": 0.004221357430248664,
98
+ "grad_norm": 22.53554916381836,
99
  "learning_rate": 8.966766701456177e-05,
100
+ "loss": 41.6351,
101
  "step": 12
102
  },
103
  {
104
  "epoch": 0.00457313721610272,
105
+ "grad_norm": 22.92333221435547,
106
  "learning_rate": 8.759199037394887e-05,
107
+ "loss": 39.7522,
108
  "step": 13
109
  },
110
  {
111
  "epoch": 0.004924917001956775,
112
+ "grad_norm": 9.440299034118652,
113
  "learning_rate": 8.535533905932738e-05,
114
+ "loss": 33.28,
115
  "step": 14
116
  },
117
  {
118
  "epoch": 0.0052766967878108305,
119
+ "grad_norm": 10.076234817504883,
120
  "learning_rate": 8.296729075500344e-05,
121
+ "loss": 33.4697,
122
  "step": 15
123
  },
124
  {
125
  "epoch": 0.005628476573664886,
126
+ "grad_norm": 8.652132034301758,
127
  "learning_rate": 8.043807145043604e-05,
128
+ "loss": 33.9816,
129
  "step": 16
130
  },
131
  {
132
  "epoch": 0.005980256359518941,
133
+ "grad_norm": 8.856562614440918,
134
  "learning_rate": 7.777851165098012e-05,
135
+ "loss": 34.08,
136
  "step": 17
137
  },
138
  {
139
  "epoch": 0.006332036145372997,
140
+ "grad_norm": 9.761141777038574,
141
  "learning_rate": 7.500000000000001e-05,
142
+ "loss": 32.3105,
143
  "step": 18
144
  },
145
  {
146
  "epoch": 0.006683815931227052,
147
+ "grad_norm": 9.883894920349121,
148
  "learning_rate": 7.211443451095007e-05,
149
+ "loss": 33.6084,
150
  "step": 19
151
  },
152
  {
153
  "epoch": 0.0070355957170811076,
154
+ "grad_norm": 9.651225090026855,
155
  "learning_rate": 6.91341716182545e-05,
156
+ "loss": 34.3374,
157
  "step": 20
158
  },
159
  {
160
  "epoch": 0.007387375502935163,
161
+ "grad_norm": 10.468379974365234,
162
  "learning_rate": 6.607197326515808e-05,
163
+ "loss": 34.8942,
164
  "step": 21
165
  },
166
  {
167
  "epoch": 0.0077391552887892175,
168
+ "grad_norm": 11.01174259185791,
169
  "learning_rate": 6.294095225512603e-05,
170
+ "loss": 35.4242,
171
  "step": 22
172
  },
173
  {
174
  "epoch": 0.008090935074643273,
175
+ "grad_norm": 14.812124252319336,
176
  "learning_rate": 5.9754516100806423e-05,
177
+ "loss": 36.4068,
178
  "step": 23
179
  },
180
  {
181
  "epoch": 0.008442714860497328,
182
+ "grad_norm": 14.898117065429688,
183
  "learning_rate": 5.6526309611002594e-05,
184
+ "loss": 38.8318,
185
  "step": 24
186
  },
187
  {
188
  "epoch": 0.008794494646351384,
189
+ "grad_norm": 24.162452697753906,
190
  "learning_rate": 5.327015646150716e-05,
191
+ "loss": 45.2215,
192
  "step": 25
193
  },
194
  {
195
  "epoch": 0.008794494646351384,
196
+ "eval_loss": 2.192530632019043,
197
+ "eval_runtime": 1599.3943,
198
+ "eval_samples_per_second": 11.974,
199
+ "eval_steps_per_second": 1.497,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
 
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
+ "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
+ "total_flos": 2.829010669142016e+17,
230
  "train_batch_size": 2,
231
  "trial_name": null,
232
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16f2b131c480d74238a837157ac676c7e56c9d6bee5aa1f992285c582f0217e2
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69bac34a33d8ba4e16c3a405dec0325e35527f02667dc56a89ad05189d58bea7
3
  size 6776