anoaky commited on
Commit
344bebb
·
verified ·
1 Parent(s): 0a68e4c

Training in progress, epoch 1, checkpoint

Browse files
checkpoint-555/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91b0eaf62f6feb29f8e6c91e27f4c943c7c5cb4f86aeb4b64c72f034a0e92bdd
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4925590b4fcc049337dd052850da563e4659200d9b37fc4bd1edc4cc27cbc07d
3
  size 437958648
checkpoint-555/trainer_state.json CHANGED
@@ -10,409 +10,409 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.018026137899954935,
13
- "grad_norm": 13.032843589782715,
14
- "learning_rate": 4.981949458483755e-05,
15
- "loss": 0.6633,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.03605227579990987,
20
- "grad_norm": 35.63669204711914,
21
- "learning_rate": 4.963898916967509e-05,
22
- "loss": 0.5651,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.054078413699864804,
27
- "grad_norm": 16.95796012878418,
28
- "learning_rate": 4.945848375451264e-05,
29
- "loss": 0.5359,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.07210455159981974,
34
- "grad_norm": 14.045697212219238,
35
- "learning_rate": 4.927797833935018e-05,
36
- "loss": 0.5421,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.09013068949977468,
41
- "grad_norm": 43.34794235229492,
42
- "learning_rate": 4.909747292418773e-05,
43
- "loss": 0.5222,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.10815682739972961,
48
- "grad_norm": 32.69561767578125,
49
- "learning_rate": 4.891696750902527e-05,
50
- "loss": 0.4589,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.12618296529968454,
55
- "grad_norm": 29.39051055908203,
56
- "learning_rate": 4.873646209386282e-05,
57
- "loss": 0.4858,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.14420910319963948,
62
- "grad_norm": 30.52906608581543,
63
- "learning_rate": 4.855595667870036e-05,
64
- "loss": 0.4418,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.16223524109959442,
69
- "grad_norm": 15.103797912597656,
70
- "learning_rate": 4.837545126353791e-05,
71
- "loss": 0.4713,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.18026137899954936,
76
- "grad_norm": 40.57420349121094,
77
- "learning_rate": 4.819494584837546e-05,
78
- "loss": 0.468,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.19828751689950427,
83
- "grad_norm": 37.67963409423828,
84
- "learning_rate": 4.8014440433213e-05,
85
- "loss": 0.492,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.21631365479945922,
90
- "grad_norm": 30.945823669433594,
91
- "learning_rate": 4.783393501805055e-05,
92
- "loss": 0.4633,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.23433979269941416,
97
- "grad_norm": 40.709590911865234,
98
- "learning_rate": 4.765342960288809e-05,
99
- "loss": 0.4392,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.25236593059936907,
104
- "grad_norm": 19.698150634765625,
105
- "learning_rate": 4.747292418772563e-05,
106
- "loss": 0.4707,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.270392068499324,
111
- "grad_norm": 21.289947509765625,
112
- "learning_rate": 4.7292418772563177e-05,
113
- "loss": 0.4576,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.28841820639927895,
118
- "grad_norm": 26.911935806274414,
119
- "learning_rate": 4.711191335740072e-05,
120
- "loss": 0.4583,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.3064443442992339,
125
- "grad_norm": 19.308853149414062,
126
- "learning_rate": 4.693140794223827e-05,
127
- "loss": 0.4937,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.32447048219918884,
132
- "grad_norm": 34.61503219604492,
133
- "learning_rate": 4.675090252707581e-05,
134
- "loss": 0.4836,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.3424966200991438,
139
- "grad_norm": 16.551424026489258,
140
- "learning_rate": 4.657039711191336e-05,
141
- "loss": 0.4919,
142
  "step": 190
143
  },
144
  {
145
  "epoch": 0.3605227579990987,
146
- "grad_norm": 16.011377334594727,
147
- "learning_rate": 4.63898916967509e-05,
148
- "loss": 0.4518,
149
  "step": 200
150
  },
151
  {
152
  "epoch": 0.3785488958990536,
153
- "grad_norm": 22.71525764465332,
154
- "learning_rate": 4.620938628158845e-05,
155
- "loss": 0.4221,
156
  "step": 210
157
  },
158
  {
159
  "epoch": 0.39657503379900855,
160
- "grad_norm": 22.2515811920166,
161
- "learning_rate": 4.602888086642599e-05,
162
- "loss": 0.4286,
163
  "step": 220
164
  },
165
  {
166
  "epoch": 0.4146011716989635,
167
- "grad_norm": 18.233482360839844,
168
- "learning_rate": 4.584837545126354e-05,
169
- "loss": 0.4442,
170
  "step": 230
171
  },
172
  {
173
  "epoch": 0.43262730959891843,
174
- "grad_norm": 33.190948486328125,
175
- "learning_rate": 4.566787003610109e-05,
176
- "loss": 0.4584,
177
  "step": 240
178
  },
179
  {
180
  "epoch": 0.45065344749887337,
181
- "grad_norm": 29.945205688476562,
182
- "learning_rate": 4.548736462093863e-05,
183
- "loss": 0.4339,
184
  "step": 250
185
  },
186
  {
187
  "epoch": 0.4686795853988283,
188
- "grad_norm": 17.004419326782227,
189
- "learning_rate": 4.530685920577618e-05,
190
- "loss": 0.4504,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 0.48670572329878325,
195
- "grad_norm": 23.36432647705078,
196
- "learning_rate": 4.5126353790613716e-05,
197
- "loss": 0.4405,
198
  "step": 270
199
  },
200
  {
201
  "epoch": 0.5047318611987381,
202
- "grad_norm": 29.156505584716797,
203
- "learning_rate": 4.494584837545127e-05,
204
- "loss": 0.4319,
205
  "step": 280
206
  },
207
  {
208
  "epoch": 0.5227579990986931,
209
- "grad_norm": 16.330259323120117,
210
- "learning_rate": 4.4765342960288806e-05,
211
- "loss": 0.453,
212
  "step": 290
213
  },
214
  {
215
  "epoch": 0.540784136998648,
216
- "grad_norm": 21.66246223449707,
217
- "learning_rate": 4.458483754512636e-05,
218
- "loss": 0.416,
219
  "step": 300
220
  },
221
  {
222
  "epoch": 0.558810274898603,
223
- "grad_norm": 22.177268981933594,
224
- "learning_rate": 4.44043321299639e-05,
225
- "loss": 0.4456,
226
  "step": 310
227
  },
228
  {
229
  "epoch": 0.5768364127985579,
230
- "grad_norm": 38.450069427490234,
231
- "learning_rate": 4.422382671480145e-05,
232
- "loss": 0.4216,
233
  "step": 320
234
  },
235
  {
236
  "epoch": 0.5948625506985128,
237
- "grad_norm": 28.213655471801758,
238
- "learning_rate": 4.404332129963899e-05,
239
- "loss": 0.4392,
240
  "step": 330
241
  },
242
  {
243
  "epoch": 0.6128886885984678,
244
- "grad_norm": 19.354459762573242,
245
- "learning_rate": 4.386281588447654e-05,
246
- "loss": 0.4303,
247
  "step": 340
248
  },
249
  {
250
  "epoch": 0.6309148264984227,
251
- "grad_norm": 20.212663650512695,
252
- "learning_rate": 4.368231046931408e-05,
253
- "loss": 0.4426,
254
  "step": 350
255
  },
256
  {
257
  "epoch": 0.6489409643983777,
258
- "grad_norm": 34.96459197998047,
259
- "learning_rate": 4.350180505415163e-05,
260
- "loss": 0.4146,
261
  "step": 360
262
  },
263
  {
264
  "epoch": 0.6669671022983326,
265
- "grad_norm": 17.65025520324707,
266
- "learning_rate": 4.332129963898917e-05,
267
- "loss": 0.4506,
268
  "step": 370
269
  },
270
  {
271
  "epoch": 0.6849932401982876,
272
- "grad_norm": 18.796554565429688,
273
- "learning_rate": 4.314079422382672e-05,
274
- "loss": 0.3765,
275
  "step": 380
276
  },
277
  {
278
  "epoch": 0.7030193780982424,
279
- "grad_norm": 21.690357208251953,
280
- "learning_rate": 4.296028880866426e-05,
281
- "loss": 0.4158,
282
  "step": 390
283
  },
284
  {
285
  "epoch": 0.7210455159981974,
286
- "grad_norm": 39.8782844543457,
287
- "learning_rate": 4.277978339350181e-05,
288
- "loss": 0.4602,
289
  "step": 400
290
  },
291
  {
292
  "epoch": 0.7390716538981523,
293
- "grad_norm": 26.3798885345459,
294
- "learning_rate": 4.259927797833935e-05,
295
- "loss": 0.4512,
296
  "step": 410
297
  },
298
  {
299
  "epoch": 0.7570977917981072,
300
- "grad_norm": 18.073617935180664,
301
- "learning_rate": 4.24187725631769e-05,
302
- "loss": 0.4347,
303
  "step": 420
304
  },
305
  {
306
  "epoch": 0.7751239296980622,
307
- "grad_norm": 30.09633445739746,
308
- "learning_rate": 4.223826714801444e-05,
309
  "loss": 0.3858,
310
  "step": 430
311
  },
312
  {
313
  "epoch": 0.7931500675980171,
314
- "grad_norm": 17.02672576904297,
315
- "learning_rate": 4.205776173285199e-05,
316
- "loss": 0.4815,
317
  "step": 440
318
  },
319
  {
320
  "epoch": 0.8111762054979721,
321
- "grad_norm": 19.149808883666992,
322
- "learning_rate": 4.187725631768953e-05,
323
- "loss": 0.4196,
324
  "step": 450
325
  },
326
  {
327
  "epoch": 0.829202343397927,
328
- "grad_norm": 28.140705108642578,
329
- "learning_rate": 4.169675090252708e-05,
330
- "loss": 0.4085,
331
  "step": 460
332
  },
333
  {
334
  "epoch": 0.847228481297882,
335
- "grad_norm": 21.819814682006836,
336
- "learning_rate": 4.151624548736462e-05,
337
- "loss": 0.3986,
338
  "step": 470
339
  },
340
  {
341
  "epoch": 0.8652546191978369,
342
- "grad_norm": 15.050581932067871,
343
- "learning_rate": 4.1335740072202167e-05,
344
- "loss": 0.4267,
345
  "step": 480
346
  },
347
  {
348
  "epoch": 0.8832807570977917,
349
- "grad_norm": 14.929478645324707,
350
- "learning_rate": 4.115523465703972e-05,
351
- "loss": 0.4315,
352
  "step": 490
353
  },
354
  {
355
  "epoch": 0.9013068949977467,
356
- "grad_norm": 20.087432861328125,
357
- "learning_rate": 4.0974729241877256e-05,
358
- "loss": 0.3886,
359
  "step": 500
360
  },
361
  {
362
  "epoch": 0.9193330328977016,
363
- "grad_norm": 32.96128463745117,
364
- "learning_rate": 4.079422382671481e-05,
365
- "loss": 0.4046,
366
  "step": 510
367
  },
368
  {
369
  "epoch": 0.9373591707976566,
370
- "grad_norm": 19.673940658569336,
371
- "learning_rate": 4.0613718411552346e-05,
372
- "loss": 0.4127,
373
  "step": 520
374
  },
375
  {
376
  "epoch": 0.9553853086976115,
377
- "grad_norm": 16.12790298461914,
378
- "learning_rate": 4.043321299638989e-05,
379
- "loss": 0.4227,
380
  "step": 530
381
  },
382
  {
383
  "epoch": 0.9734114465975665,
384
- "grad_norm": 20.4881649017334,
385
- "learning_rate": 4.0252707581227436e-05,
386
- "loss": 0.4223,
387
  "step": 540
388
  },
389
  {
390
  "epoch": 0.9914375844975214,
391
- "grad_norm": 17.96915054321289,
392
- "learning_rate": 4.007220216606498e-05,
393
- "loss": 0.395,
394
  "step": 550
395
  },
396
  {
397
  "epoch": 1.0,
398
- "eval_f1": 0.8363481591828048,
399
- "eval_fn": 322,
400
- "eval_fp": 447,
401
- "eval_loss": 0.372616171836853,
402
- "eval_precision": 0.8146766169154229,
403
- "eval_recall": 0.8592041976388282,
404
- "eval_runtime": 12.0589,
405
- "eval_samples_per_second": 387.513,
406
- "eval_steps_per_second": 48.512,
407
- "eval_tn": 1939,
408
- "eval_tp": 1965,
409
  "step": 555
410
  }
411
  ],
412
  "logging_steps": 10,
413
- "max_steps": 2770,
414
  "num_input_tokens_seen": 0,
415
- "num_train_epochs": 5,
416
  "save_steps": 500,
417
  "stateful_callbacks": {
418
  "TrainerControl": {
 
10
  "log_history": [
11
  {
12
  "epoch": 0.018026137899954935,
13
+ "grad_norm": 16.059541702270508,
14
+ "learning_rate": 4.9548736462093865e-05,
15
+ "loss": 0.6728,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.03605227579990987,
20
+ "grad_norm": 22.368194580078125,
21
+ "learning_rate": 4.909747292418773e-05,
22
+ "loss": 0.5752,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.054078413699864804,
27
+ "grad_norm": 21.088069915771484,
28
+ "learning_rate": 4.864620938628159e-05,
29
+ "loss": 0.5414,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.07210455159981974,
34
+ "grad_norm": 17.759164810180664,
35
+ "learning_rate": 4.819494584837546e-05,
36
+ "loss": 0.5483,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.09013068949977468,
41
+ "grad_norm": 35.45125961303711,
42
+ "learning_rate": 4.7743682310469314e-05,
43
+ "loss": 0.5284,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.10815682739972961,
48
+ "grad_norm": 34.69957733154297,
49
+ "learning_rate": 4.7292418772563177e-05,
50
+ "loss": 0.4792,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.12618296529968454,
55
+ "grad_norm": 31.315162658691406,
56
+ "learning_rate": 4.684115523465704e-05,
57
+ "loss": 0.505,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.14420910319963948,
62
+ "grad_norm": 28.01712417602539,
63
+ "learning_rate": 4.63898916967509e-05,
64
+ "loss": 0.4575,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.16223524109959442,
69
+ "grad_norm": 22.80365562438965,
70
+ "learning_rate": 4.5938628158844764e-05,
71
+ "loss": 0.4678,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.18026137899954936,
76
+ "grad_norm": 38.55846405029297,
77
+ "learning_rate": 4.548736462093863e-05,
78
+ "loss": 0.4724,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.19828751689950427,
83
+ "grad_norm": 29.91495704650879,
84
+ "learning_rate": 4.5036101083032495e-05,
85
+ "loss": 0.4891,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.21631365479945922,
90
+ "grad_norm": 26.530668258666992,
91
+ "learning_rate": 4.458483754512636e-05,
92
+ "loss": 0.4578,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.23433979269941416,
97
+ "grad_norm": 53.58538055419922,
98
+ "learning_rate": 4.413357400722022e-05,
99
+ "loss": 0.4735,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.25236593059936907,
104
+ "grad_norm": 22.026208877563477,
105
+ "learning_rate": 4.368231046931408e-05,
106
+ "loss": 0.4743,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.270392068499324,
111
+ "grad_norm": 28.057222366333008,
112
+ "learning_rate": 4.3231046931407945e-05,
113
+ "loss": 0.4559,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.28841820639927895,
118
+ "grad_norm": 22.690412521362305,
119
+ "learning_rate": 4.277978339350181e-05,
120
+ "loss": 0.4638,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.3064443442992339,
125
+ "grad_norm": 21.292953491210938,
126
+ "learning_rate": 4.232851985559567e-05,
127
+ "loss": 0.4877,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.32447048219918884,
132
+ "grad_norm": 36.56389236450195,
133
+ "learning_rate": 4.187725631768953e-05,
134
+ "loss": 0.4927,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.3424966200991438,
139
+ "grad_norm": 17.6762752532959,
140
+ "learning_rate": 4.1425992779783394e-05,
141
+ "loss": 0.491,
142
  "step": 190
143
  },
144
  {
145
  "epoch": 0.3605227579990987,
146
+ "grad_norm": 19.962385177612305,
147
+ "learning_rate": 4.0974729241877256e-05,
148
+ "loss": 0.4428,
149
  "step": 200
150
  },
151
  {
152
  "epoch": 0.3785488958990536,
153
+ "grad_norm": 23.71368408203125,
154
+ "learning_rate": 4.052346570397112e-05,
155
+ "loss": 0.4318,
156
  "step": 210
157
  },
158
  {
159
  "epoch": 0.39657503379900855,
160
+ "grad_norm": 24.030736923217773,
161
+ "learning_rate": 4.007220216606498e-05,
162
+ "loss": 0.4532,
163
  "step": 220
164
  },
165
  {
166
  "epoch": 0.4146011716989635,
167
+ "grad_norm": 23.85675811767578,
168
+ "learning_rate": 3.962093862815885e-05,
169
+ "loss": 0.4427,
170
  "step": 230
171
  },
172
  {
173
  "epoch": 0.43262730959891843,
174
+ "grad_norm": 32.091190338134766,
175
+ "learning_rate": 3.916967509025271e-05,
176
+ "loss": 0.4432,
177
  "step": 240
178
  },
179
  {
180
  "epoch": 0.45065344749887337,
181
+ "grad_norm": 32.51976776123047,
182
+ "learning_rate": 3.8718411552346575e-05,
183
+ "loss": 0.4289,
184
  "step": 250
185
  },
186
  {
187
  "epoch": 0.4686795853988283,
188
+ "grad_norm": 17.93523406982422,
189
+ "learning_rate": 3.826714801444044e-05,
190
+ "loss": 0.4485,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 0.48670572329878325,
195
+ "grad_norm": 34.29135513305664,
196
+ "learning_rate": 3.78158844765343e-05,
197
+ "loss": 0.4373,
198
  "step": 270
199
  },
200
  {
201
  "epoch": 0.5047318611987381,
202
+ "grad_norm": 21.879440307617188,
203
+ "learning_rate": 3.7364620938628155e-05,
204
+ "loss": 0.4367,
205
  "step": 280
206
  },
207
  {
208
  "epoch": 0.5227579990986931,
209
+ "grad_norm": 16.587345123291016,
210
+ "learning_rate": 3.6913357400722025e-05,
211
+ "loss": 0.4476,
212
  "step": 290
213
  },
214
  {
215
  "epoch": 0.540784136998648,
216
+ "grad_norm": 21.923376083374023,
217
+ "learning_rate": 3.646209386281589e-05,
218
+ "loss": 0.4186,
219
  "step": 300
220
  },
221
  {
222
  "epoch": 0.558810274898603,
223
+ "grad_norm": 25.472400665283203,
224
+ "learning_rate": 3.601083032490975e-05,
225
+ "loss": 0.4317,
226
  "step": 310
227
  },
228
  {
229
  "epoch": 0.5768364127985579,
230
+ "grad_norm": 36.07789611816406,
231
+ "learning_rate": 3.555956678700361e-05,
232
+ "loss": 0.4281,
233
  "step": 320
234
  },
235
  {
236
  "epoch": 0.5948625506985128,
237
+ "grad_norm": 32.51295852661133,
238
+ "learning_rate": 3.5108303249097474e-05,
239
+ "loss": 0.4391,
240
  "step": 330
241
  },
242
  {
243
  "epoch": 0.6128886885984678,
244
+ "grad_norm": 19.069826126098633,
245
+ "learning_rate": 3.4657039711191336e-05,
246
+ "loss": 0.4249,
247
  "step": 340
248
  },
249
  {
250
  "epoch": 0.6309148264984227,
251
+ "grad_norm": 22.865673065185547,
252
+ "learning_rate": 3.42057761732852e-05,
253
+ "loss": 0.4394,
254
  "step": 350
255
  },
256
  {
257
  "epoch": 0.6489409643983777,
258
+ "grad_norm": 35.98612594604492,
259
+ "learning_rate": 3.375451263537907e-05,
260
+ "loss": 0.407,
261
  "step": 360
262
  },
263
  {
264
  "epoch": 0.6669671022983326,
265
+ "grad_norm": 19.89366340637207,
266
+ "learning_rate": 3.330324909747293e-05,
267
+ "loss": 0.434,
268
  "step": 370
269
  },
270
  {
271
  "epoch": 0.6849932401982876,
272
+ "grad_norm": 20.73752212524414,
273
+ "learning_rate": 3.2851985559566786e-05,
274
+ "loss": 0.3815,
275
  "step": 380
276
  },
277
  {
278
  "epoch": 0.7030193780982424,
279
+ "grad_norm": 20.23769187927246,
280
+ "learning_rate": 3.240072202166065e-05,
281
+ "loss": 0.4159,
282
  "step": 390
283
  },
284
  {
285
  "epoch": 0.7210455159981974,
286
+ "grad_norm": 40.59239196777344,
287
+ "learning_rate": 3.194945848375451e-05,
288
+ "loss": 0.4582,
289
  "step": 400
290
  },
291
  {
292
  "epoch": 0.7390716538981523,
293
+ "grad_norm": 22.728296279907227,
294
+ "learning_rate": 3.149819494584837e-05,
295
+ "loss": 0.4387,
296
  "step": 410
297
  },
298
  {
299
  "epoch": 0.7570977917981072,
300
+ "grad_norm": 20.43068504333496,
301
+ "learning_rate": 3.104693140794224e-05,
302
+ "loss": 0.4302,
303
  "step": 420
304
  },
305
  {
306
  "epoch": 0.7751239296980622,
307
+ "grad_norm": 30.140209197998047,
308
+ "learning_rate": 3.0595667870036104e-05,
309
  "loss": 0.3858,
310
  "step": 430
311
  },
312
  {
313
  "epoch": 0.7931500675980171,
314
+ "grad_norm": 15.338220596313477,
315
+ "learning_rate": 3.0144404332129967e-05,
316
+ "loss": 0.4634,
317
  "step": 440
318
  },
319
  {
320
  "epoch": 0.8111762054979721,
321
+ "grad_norm": 24.517107009887695,
322
+ "learning_rate": 2.969314079422383e-05,
323
+ "loss": 0.4079,
324
  "step": 450
325
  },
326
  {
327
  "epoch": 0.829202343397927,
328
+ "grad_norm": 25.604442596435547,
329
+ "learning_rate": 2.924187725631769e-05,
330
+ "loss": 0.3903,
331
  "step": 460
332
  },
333
  {
334
  "epoch": 0.847228481297882,
335
+ "grad_norm": 26.43514060974121,
336
+ "learning_rate": 2.879061371841155e-05,
337
+ "loss": 0.4037,
338
  "step": 470
339
  },
340
  {
341
  "epoch": 0.8652546191978369,
342
+ "grad_norm": 18.30228042602539,
343
+ "learning_rate": 2.8339350180505413e-05,
344
+ "loss": 0.4374,
345
  "step": 480
346
  },
347
  {
348
  "epoch": 0.8832807570977917,
349
+ "grad_norm": 16.212318420410156,
350
+ "learning_rate": 2.7888086642599282e-05,
351
+ "loss": 0.4219,
352
  "step": 490
353
  },
354
  {
355
  "epoch": 0.9013068949977467,
356
+ "grad_norm": 23.251705169677734,
357
+ "learning_rate": 2.7436823104693144e-05,
358
+ "loss": 0.3784,
359
  "step": 500
360
  },
361
  {
362
  "epoch": 0.9193330328977016,
363
+ "grad_norm": 37.26618194580078,
364
+ "learning_rate": 2.6985559566787007e-05,
365
+ "loss": 0.4144,
366
  "step": 510
367
  },
368
  {
369
  "epoch": 0.9373591707976566,
370
+ "grad_norm": 22.583784103393555,
371
+ "learning_rate": 2.6534296028880866e-05,
372
+ "loss": 0.4123,
373
  "step": 520
374
  },
375
  {
376
  "epoch": 0.9553853086976115,
377
+ "grad_norm": 18.812397003173828,
378
+ "learning_rate": 2.6083032490974728e-05,
379
+ "loss": 0.422,
380
  "step": 530
381
  },
382
  {
383
  "epoch": 0.9734114465975665,
384
+ "grad_norm": 20.292518615722656,
385
+ "learning_rate": 2.563176895306859e-05,
386
+ "loss": 0.4346,
387
  "step": 540
388
  },
389
  {
390
  "epoch": 0.9914375844975214,
391
+ "grad_norm": 25.314495086669922,
392
+ "learning_rate": 2.518050541516246e-05,
393
+ "loss": 0.393,
394
  "step": 550
395
  },
396
  {
397
  "epoch": 1.0,
398
+ "eval_f1": 0.8425531914893617,
399
+ "eval_fn": 307,
400
+ "eval_fp": 433,
401
+ "eval_loss": 0.36401602625846863,
402
+ "eval_precision": 0.8205553253211769,
403
+ "eval_recall": 0.8657630083078268,
404
+ "eval_runtime": 13.0048,
405
+ "eval_samples_per_second": 359.329,
406
+ "eval_steps_per_second": 44.983,
407
+ "eval_tn": 1953,
408
+ "eval_tp": 1980,
409
  "step": 555
410
  }
411
  ],
412
  "logging_steps": 10,
413
+ "max_steps": 1108,
414
  "num_input_tokens_seen": 0,
415
+ "num_train_epochs": 2,
416
  "save_steps": 500,
417
  "stateful_callbacks": {
418
  "TrainerControl": {
checkpoint-555/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e924750e5e12e6bb9372df333907b4d7f42d9dd2da839a2407f8f861ab40940
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b4fc31351a02f492c40017a3c0ee23cf1c98a7172c96374c9e2c9f12affd1d3
3
  size 5432