mariusjabami commited on
Commit
57ad5ea
·
verified ·
1 Parent(s): 0975aaf

Delete trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -1826
trainer_state.json DELETED
@@ -1,1826 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 1.9975031210986267,
6
- "eval_steps": 500,
7
- "global_step": 12800,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.007802746566791511,
14
- "grad_norm": 0.09780355542898178,
15
- "learning_rate": 4.9923533083645446e-05,
16
- "loss": 2.1517,
17
- "step": 50
18
- },
19
- {
20
- "epoch": 0.015605493133583021,
21
- "grad_norm": 0.1110800951719284,
22
- "learning_rate": 4.984550561797753e-05,
23
- "loss": 2.0733,
24
- "step": 100
25
- },
26
- {
27
- "epoch": 0.023408239700374533,
28
- "grad_norm": 0.11351309716701508,
29
- "learning_rate": 4.9767478152309616e-05,
30
- "loss": 1.9999,
31
- "step": 150
32
- },
33
- {
34
- "epoch": 0.031210986267166042,
35
- "grad_norm": 0.12184558063745499,
36
- "learning_rate": 4.96894506866417e-05,
37
- "loss": 1.911,
38
- "step": 200
39
- },
40
- {
41
- "epoch": 0.03901373283395755,
42
- "grad_norm": 0.12001396715641022,
43
- "learning_rate": 4.9611423220973786e-05,
44
- "loss": 1.8957,
45
- "step": 250
46
- },
47
- {
48
- "epoch": 0.04681647940074907,
49
- "grad_norm": 0.13359545171260834,
50
- "learning_rate": 4.9533395755305875e-05,
51
- "loss": 1.881,
52
- "step": 300
53
- },
54
- {
55
- "epoch": 0.054619225967540576,
56
- "grad_norm": 0.177729532122612,
57
- "learning_rate": 4.9455368289637956e-05,
58
- "loss": 1.8426,
59
- "step": 350
60
- },
61
- {
62
- "epoch": 0.062421972534332085,
63
- "grad_norm": 0.15538156032562256,
64
- "learning_rate": 4.9377340823970044e-05,
65
- "loss": 1.8528,
66
- "step": 400
67
- },
68
- {
69
- "epoch": 0.0702247191011236,
70
- "grad_norm": 0.1490369588136673,
71
- "learning_rate": 4.9299313358302126e-05,
72
- "loss": 1.8604,
73
- "step": 450
74
- },
75
- {
76
- "epoch": 0.0780274656679151,
77
- "grad_norm": 0.15821540355682373,
78
- "learning_rate": 4.9221285892634214e-05,
79
- "loss": 1.8454,
80
- "step": 500
81
- },
82
- {
83
- "epoch": 0.08583021223470662,
84
- "grad_norm": 0.15961939096450806,
85
- "learning_rate": 4.9143258426966296e-05,
86
- "loss": 1.8235,
87
- "step": 550
88
- },
89
- {
90
- "epoch": 0.09363295880149813,
91
- "grad_norm": 0.18454231321811676,
92
- "learning_rate": 4.906523096129838e-05,
93
- "loss": 1.8245,
94
- "step": 600
95
- },
96
- {
97
- "epoch": 0.10143570536828964,
98
- "grad_norm": 0.19588346779346466,
99
- "learning_rate": 4.8987203495630466e-05,
100
- "loss": 1.8332,
101
- "step": 650
102
- },
103
- {
104
- "epoch": 0.10923845193508115,
105
- "grad_norm": 0.20523308217525482,
106
- "learning_rate": 4.890917602996255e-05,
107
- "loss": 1.8176,
108
- "step": 700
109
- },
110
- {
111
- "epoch": 0.11704119850187265,
112
- "grad_norm": 0.2048981934785843,
113
- "learning_rate": 4.883114856429463e-05,
114
- "loss": 1.7751,
115
- "step": 750
116
- },
117
- {
118
- "epoch": 0.12484394506866417,
119
- "grad_norm": 0.19311609864234924,
120
- "learning_rate": 4.875312109862672e-05,
121
- "loss": 1.8034,
122
- "step": 800
123
- },
124
- {
125
- "epoch": 0.13264669163545567,
126
- "grad_norm": 0.21123754978179932,
127
- "learning_rate": 4.86750936329588e-05,
128
- "loss": 1.7859,
129
- "step": 850
130
- },
131
- {
132
- "epoch": 0.1404494382022472,
133
- "grad_norm": 0.270113080739975,
134
- "learning_rate": 4.859706616729089e-05,
135
- "loss": 1.8121,
136
- "step": 900
137
- },
138
- {
139
- "epoch": 0.1482521847690387,
140
- "grad_norm": 0.23597899079322815,
141
- "learning_rate": 4.8519038701622975e-05,
142
- "loss": 1.8213,
143
- "step": 950
144
- },
145
- {
146
- "epoch": 0.1560549313358302,
147
- "grad_norm": 0.2787221372127533,
148
- "learning_rate": 4.844101123595506e-05,
149
- "loss": 1.8048,
150
- "step": 1000
151
- },
152
- {
153
- "epoch": 0.16385767790262173,
154
- "grad_norm": 0.2505972981452942,
155
- "learning_rate": 4.8362983770287145e-05,
156
- "loss": 1.7799,
157
- "step": 1050
158
- },
159
- {
160
- "epoch": 0.17166042446941324,
161
- "grad_norm": 0.2511955797672272,
162
- "learning_rate": 4.828495630461923e-05,
163
- "loss": 1.7758,
164
- "step": 1100
165
- },
166
- {
167
- "epoch": 0.17946317103620474,
168
- "grad_norm": 0.2740708291530609,
169
- "learning_rate": 4.8206928838951315e-05,
170
- "loss": 1.7817,
171
- "step": 1150
172
- },
173
- {
174
- "epoch": 0.18726591760299627,
175
- "grad_norm": 0.30172184109687805,
176
- "learning_rate": 4.81289013732834e-05,
177
- "loss": 1.7665,
178
- "step": 1200
179
- },
180
- {
181
- "epoch": 0.19506866416978777,
182
- "grad_norm": 0.2734954059123993,
183
- "learning_rate": 4.8050873907615485e-05,
184
- "loss": 1.7673,
185
- "step": 1250
186
- },
187
- {
188
- "epoch": 0.20287141073657927,
189
- "grad_norm": 0.2837677597999573,
190
- "learning_rate": 4.797284644194757e-05,
191
- "loss": 1.7539,
192
- "step": 1300
193
- },
194
- {
195
- "epoch": 0.21067415730337077,
196
- "grad_norm": 0.287616491317749,
197
- "learning_rate": 4.7894818976279655e-05,
198
- "loss": 1.7845,
199
- "step": 1350
200
- },
201
- {
202
- "epoch": 0.2184769038701623,
203
- "grad_norm": 0.35649779438972473,
204
- "learning_rate": 4.7816791510611737e-05,
205
- "loss": 1.7629,
206
- "step": 1400
207
- },
208
- {
209
- "epoch": 0.2262796504369538,
210
- "grad_norm": 0.3466143012046814,
211
- "learning_rate": 4.7738764044943825e-05,
212
- "loss": 1.7637,
213
- "step": 1450
214
- },
215
- {
216
- "epoch": 0.2340823970037453,
217
- "grad_norm": 0.29961591958999634,
218
- "learning_rate": 4.7660736579275906e-05,
219
- "loss": 1.7775,
220
- "step": 1500
221
- },
222
- {
223
- "epoch": 0.24188514357053684,
224
- "grad_norm": 0.3261962831020355,
225
- "learning_rate": 4.7582709113607995e-05,
226
- "loss": 1.7651,
227
- "step": 1550
228
- },
229
- {
230
- "epoch": 0.24968789013732834,
231
- "grad_norm": 0.31850898265838623,
232
- "learning_rate": 4.7504681647940076e-05,
233
- "loss": 1.7601,
234
- "step": 1600
235
- },
236
- {
237
- "epoch": 0.25749063670411987,
238
- "grad_norm": 0.3334502577781677,
239
- "learning_rate": 4.7426654182272165e-05,
240
- "loss": 1.7783,
241
- "step": 1650
242
- },
243
- {
244
- "epoch": 0.26529338327091134,
245
- "grad_norm": 0.3199051320552826,
246
- "learning_rate": 4.7348626716604246e-05,
247
- "loss": 1.7492,
248
- "step": 1700
249
- },
250
- {
251
- "epoch": 0.2730961298377029,
252
- "grad_norm": 0.3381502628326416,
253
- "learning_rate": 4.7270599250936335e-05,
254
- "loss": 1.7592,
255
- "step": 1750
256
- },
257
- {
258
- "epoch": 0.2808988764044944,
259
- "grad_norm": 0.33800897002220154,
260
- "learning_rate": 4.7192571785268416e-05,
261
- "loss": 1.7403,
262
- "step": 1800
263
- },
264
- {
265
- "epoch": 0.2887016229712859,
266
- "grad_norm": 0.3942790925502777,
267
- "learning_rate": 4.71145443196005e-05,
268
- "loss": 1.7955,
269
- "step": 1850
270
- },
271
- {
272
- "epoch": 0.2965043695380774,
273
- "grad_norm": 0.3514516353607178,
274
- "learning_rate": 4.7036516853932586e-05,
275
- "loss": 1.7453,
276
- "step": 1900
277
- },
278
- {
279
- "epoch": 0.30430711610486894,
280
- "grad_norm": 0.3649444878101349,
281
- "learning_rate": 4.695848938826467e-05,
282
- "loss": 1.7584,
283
- "step": 1950
284
- },
285
- {
286
- "epoch": 0.3121098626716604,
287
- "grad_norm": 0.33589330315589905,
288
- "learning_rate": 4.6880461922596756e-05,
289
- "loss": 1.7369,
290
- "step": 2000
291
- },
292
- {
293
- "epoch": 0.31991260923845194,
294
- "grad_norm": 0.3481082320213318,
295
- "learning_rate": 4.680243445692884e-05,
296
- "loss": 1.7281,
297
- "step": 2050
298
- },
299
- {
300
- "epoch": 0.32771535580524347,
301
- "grad_norm": 0.34206053614616394,
302
- "learning_rate": 4.6724406991260926e-05,
303
- "loss": 1.7266,
304
- "step": 2100
305
- },
306
- {
307
- "epoch": 0.33551810237203494,
308
- "grad_norm": 0.3519572615623474,
309
- "learning_rate": 4.664637952559301e-05,
310
- "loss": 1.7365,
311
- "step": 2150
312
- },
313
- {
314
- "epoch": 0.3433208489388265,
315
- "grad_norm": 0.35082659125328064,
316
- "learning_rate": 4.6568352059925096e-05,
317
- "loss": 1.7411,
318
- "step": 2200
319
- },
320
- {
321
- "epoch": 0.351123595505618,
322
- "grad_norm": 0.3652777373790741,
323
- "learning_rate": 4.649032459425718e-05,
324
- "loss": 1.7466,
325
- "step": 2250
326
- },
327
- {
328
- "epoch": 0.3589263420724095,
329
- "grad_norm": 0.39173486828804016,
330
- "learning_rate": 4.6412297128589266e-05,
331
- "loss": 1.7443,
332
- "step": 2300
333
- },
334
- {
335
- "epoch": 0.366729088639201,
336
- "grad_norm": 0.47386667132377625,
337
- "learning_rate": 4.633426966292135e-05,
338
- "loss": 1.7276,
339
- "step": 2350
340
- },
341
- {
342
- "epoch": 0.37453183520599254,
343
- "grad_norm": 0.3906112015247345,
344
- "learning_rate": 4.6256242197253436e-05,
345
- "loss": 1.7194,
346
- "step": 2400
347
- },
348
- {
349
- "epoch": 0.382334581772784,
350
- "grad_norm": 0.4129493832588196,
351
- "learning_rate": 4.6178214731585524e-05,
352
- "loss": 1.7203,
353
- "step": 2450
354
- },
355
- {
356
- "epoch": 0.39013732833957554,
357
- "grad_norm": 0.4499260485172272,
358
- "learning_rate": 4.6100187265917605e-05,
359
- "loss": 1.7268,
360
- "step": 2500
361
- },
362
- {
363
- "epoch": 0.397940074906367,
364
- "grad_norm": 0.4021989703178406,
365
- "learning_rate": 4.6022159800249694e-05,
366
- "loss": 1.7532,
367
- "step": 2550
368
- },
369
- {
370
- "epoch": 0.40574282147315854,
371
- "grad_norm": 0.3842841386795044,
372
- "learning_rate": 4.5944132334581775e-05,
373
- "loss": 1.7195,
374
- "step": 2600
375
- },
376
- {
377
- "epoch": 0.4135455680399501,
378
- "grad_norm": 0.3462275266647339,
379
- "learning_rate": 4.5866104868913864e-05,
380
- "loss": 1.7269,
381
- "step": 2650
382
- },
383
- {
384
- "epoch": 0.42134831460674155,
385
- "grad_norm": 0.39410606026649475,
386
- "learning_rate": 4.5788077403245945e-05,
387
- "loss": 1.7192,
388
- "step": 2700
389
- },
390
- {
391
- "epoch": 0.4291510611735331,
392
- "grad_norm": 0.4126410484313965,
393
- "learning_rate": 4.5710049937578034e-05,
394
- "loss": 1.7421,
395
- "step": 2750
396
- },
397
- {
398
- "epoch": 0.4369538077403246,
399
- "grad_norm": 0.36984720826148987,
400
- "learning_rate": 4.5632022471910115e-05,
401
- "loss": 1.7101,
402
- "step": 2800
403
- },
404
- {
405
- "epoch": 0.4447565543071161,
406
- "grad_norm": 0.3527930974960327,
407
- "learning_rate": 4.5553995006242203e-05,
408
- "loss": 1.7276,
409
- "step": 2850
410
- },
411
- {
412
- "epoch": 0.4525593008739076,
413
- "grad_norm": 0.4303431510925293,
414
- "learning_rate": 4.5475967540574285e-05,
415
- "loss": 1.7292,
416
- "step": 2900
417
- },
418
- {
419
- "epoch": 0.46036204744069914,
420
- "grad_norm": 0.43609175086021423,
421
- "learning_rate": 4.5397940074906367e-05,
422
- "loss": 1.7167,
423
- "step": 2950
424
- },
425
- {
426
- "epoch": 0.4681647940074906,
427
- "grad_norm": 0.43081963062286377,
428
- "learning_rate": 4.5319912609238455e-05,
429
- "loss": 1.7214,
430
- "step": 3000
431
- },
432
- {
433
- "epoch": 0.47596754057428214,
434
- "grad_norm": 0.4468071162700653,
435
- "learning_rate": 4.5241885143570536e-05,
436
- "loss": 1.7375,
437
- "step": 3050
438
- },
439
- {
440
- "epoch": 0.4837702871410737,
441
- "grad_norm": 0.4700297713279724,
442
- "learning_rate": 4.516385767790262e-05,
443
- "loss": 1.7245,
444
- "step": 3100
445
- },
446
- {
447
- "epoch": 0.49157303370786515,
448
- "grad_norm": 0.4510380029678345,
449
- "learning_rate": 4.5085830212234706e-05,
450
- "loss": 1.7303,
451
- "step": 3150
452
- },
453
- {
454
- "epoch": 0.4993757802746567,
455
- "grad_norm": 0.4726191461086273,
456
- "learning_rate": 4.5007802746566795e-05,
457
- "loss": 1.7146,
458
- "step": 3200
459
- },
460
- {
461
- "epoch": 0.5071785268414482,
462
- "grad_norm": 0.4493762254714966,
463
- "learning_rate": 4.4929775280898876e-05,
464
- "loss": 1.7214,
465
- "step": 3250
466
- },
467
- {
468
- "epoch": 0.5149812734082397,
469
- "grad_norm": 0.425413578748703,
470
- "learning_rate": 4.4851747815230965e-05,
471
- "loss": 1.6912,
472
- "step": 3300
473
- },
474
- {
475
- "epoch": 0.5227840199750312,
476
- "grad_norm": 0.49669891595840454,
477
- "learning_rate": 4.4773720349563046e-05,
478
- "loss": 1.7082,
479
- "step": 3350
480
- },
481
- {
482
- "epoch": 0.5305867665418227,
483
- "grad_norm": 0.5002058148384094,
484
- "learning_rate": 4.4695692883895134e-05,
485
- "loss": 1.7119,
486
- "step": 3400
487
- },
488
- {
489
- "epoch": 0.5383895131086143,
490
- "grad_norm": 0.5210412740707397,
491
- "learning_rate": 4.4617665418227216e-05,
492
- "loss": 1.7139,
493
- "step": 3450
494
- },
495
- {
496
- "epoch": 0.5461922596754057,
497
- "grad_norm": 0.42913952469825745,
498
- "learning_rate": 4.4539637952559304e-05,
499
- "loss": 1.7083,
500
- "step": 3500
501
- },
502
- {
503
- "epoch": 0.5539950062421972,
504
- "grad_norm": 0.45524224638938904,
505
- "learning_rate": 4.4461610486891386e-05,
506
- "loss": 1.715,
507
- "step": 3550
508
- },
509
- {
510
- "epoch": 0.5617977528089888,
511
- "grad_norm": 0.47845831513404846,
512
- "learning_rate": 4.4383583021223474e-05,
513
- "loss": 1.7194,
514
- "step": 3600
515
- },
516
- {
517
- "epoch": 0.5696004993757803,
518
- "grad_norm": 0.4836772680282593,
519
- "learning_rate": 4.4305555555555556e-05,
520
- "loss": 1.7009,
521
- "step": 3650
522
- },
523
- {
524
- "epoch": 0.5774032459425718,
525
- "grad_norm": 0.4728386700153351,
526
- "learning_rate": 4.4227528089887644e-05,
527
- "loss": 1.7128,
528
- "step": 3700
529
- },
530
- {
531
- "epoch": 0.5852059925093633,
532
- "grad_norm": 0.4861275255680084,
533
- "learning_rate": 4.4149500624219726e-05,
534
- "loss": 1.7089,
535
- "step": 3750
536
- },
537
- {
538
- "epoch": 0.5930087390761548,
539
- "grad_norm": 0.5425216555595398,
540
- "learning_rate": 4.4071473158551814e-05,
541
- "loss": 1.7238,
542
- "step": 3800
543
- },
544
- {
545
- "epoch": 0.6008114856429463,
546
- "grad_norm": 0.4091911315917969,
547
- "learning_rate": 4.3993445692883896e-05,
548
- "loss": 1.683,
549
- "step": 3850
550
- },
551
- {
552
- "epoch": 0.6086142322097379,
553
- "grad_norm": 0.38465380668640137,
554
- "learning_rate": 4.3915418227215984e-05,
555
- "loss": 1.6991,
556
- "step": 3900
557
- },
558
- {
559
- "epoch": 0.6164169787765293,
560
- "grad_norm": 0.4543341398239136,
561
- "learning_rate": 4.383739076154807e-05,
562
- "loss": 1.7105,
563
- "step": 3950
564
- },
565
- {
566
- "epoch": 0.6242197253433208,
567
- "grad_norm": 0.46507373452186584,
568
- "learning_rate": 4.3759363295880154e-05,
569
- "loss": 1.6935,
570
- "step": 4000
571
- },
572
- {
573
- "epoch": 0.6320224719101124,
574
- "grad_norm": 0.4509834349155426,
575
- "learning_rate": 4.368133583021224e-05,
576
- "loss": 1.6869,
577
- "step": 4050
578
- },
579
- {
580
- "epoch": 0.6398252184769039,
581
- "grad_norm": 0.5607530474662781,
582
- "learning_rate": 4.3603308364544324e-05,
583
- "loss": 1.7125,
584
- "step": 4100
585
- },
586
- {
587
- "epoch": 0.6476279650436954,
588
- "grad_norm": 0.4719931483268738,
589
- "learning_rate": 4.3525280898876405e-05,
590
- "loss": 1.677,
591
- "step": 4150
592
- },
593
- {
594
- "epoch": 0.6554307116104869,
595
- "grad_norm": 0.4882090091705322,
596
- "learning_rate": 4.344725343320849e-05,
597
- "loss": 1.6981,
598
- "step": 4200
599
- },
600
- {
601
- "epoch": 0.6632334581772784,
602
- "grad_norm": 0.5204262733459473,
603
- "learning_rate": 4.3369225967540575e-05,
604
- "loss": 1.6905,
605
- "step": 4250
606
- },
607
- {
608
- "epoch": 0.6710362047440699,
609
- "grad_norm": 0.5416198372840881,
610
- "learning_rate": 4.329119850187266e-05,
611
- "loss": 1.6874,
612
- "step": 4300
613
- },
614
- {
615
- "epoch": 0.6788389513108615,
616
- "grad_norm": 0.484465628862381,
617
- "learning_rate": 4.3213171036204745e-05,
618
- "loss": 1.6967,
619
- "step": 4350
620
- },
621
- {
622
- "epoch": 0.686641697877653,
623
- "grad_norm": 0.4895637333393097,
624
- "learning_rate": 4.313514357053683e-05,
625
- "loss": 1.7088,
626
- "step": 4400
627
- },
628
- {
629
- "epoch": 0.6944444444444444,
630
- "grad_norm": 0.5524686574935913,
631
- "learning_rate": 4.3057116104868915e-05,
632
- "loss": 1.6826,
633
- "step": 4450
634
- },
635
- {
636
- "epoch": 0.702247191011236,
637
- "grad_norm": 0.5131920576095581,
638
- "learning_rate": 4.2979088639200997e-05,
639
- "loss": 1.6932,
640
- "step": 4500
641
- },
642
- {
643
- "epoch": 0.7100499375780275,
644
- "grad_norm": 0.5526320934295654,
645
- "learning_rate": 4.2901061173533085e-05,
646
- "loss": 1.7017,
647
- "step": 4550
648
- },
649
- {
650
- "epoch": 0.717852684144819,
651
- "grad_norm": 0.5150460600852966,
652
- "learning_rate": 4.282303370786517e-05,
653
- "loss": 1.6903,
654
- "step": 4600
655
- },
656
- {
657
- "epoch": 0.7256554307116105,
658
- "grad_norm": 0.5039018988609314,
659
- "learning_rate": 4.2745006242197255e-05,
660
- "loss": 1.7134,
661
- "step": 4650
662
- },
663
- {
664
- "epoch": 0.733458177278402,
665
- "grad_norm": 0.48572325706481934,
666
- "learning_rate": 4.266697877652934e-05,
667
- "loss": 1.6976,
668
- "step": 4700
669
- },
670
- {
671
- "epoch": 0.7412609238451935,
672
- "grad_norm": 0.5142014026641846,
673
- "learning_rate": 4.2588951310861425e-05,
674
- "loss": 1.6834,
675
- "step": 4750
676
- },
677
- {
678
- "epoch": 0.7490636704119851,
679
- "grad_norm": 0.4606572985649109,
680
- "learning_rate": 4.251092384519351e-05,
681
- "loss": 1.6793,
682
- "step": 4800
683
- },
684
- {
685
- "epoch": 0.7568664169787765,
686
- "grad_norm": 0.46960020065307617,
687
- "learning_rate": 4.2432896379525595e-05,
688
- "loss": 1.6828,
689
- "step": 4850
690
- },
691
- {
692
- "epoch": 0.764669163545568,
693
- "grad_norm": 0.4922361671924591,
694
- "learning_rate": 4.235486891385768e-05,
695
- "loss": 1.7022,
696
- "step": 4900
697
- },
698
- {
699
- "epoch": 0.7724719101123596,
700
- "grad_norm": 0.5289677381515503,
701
- "learning_rate": 4.2276841448189764e-05,
702
- "loss": 1.6903,
703
- "step": 4950
704
- },
705
- {
706
- "epoch": 0.7802746566791511,
707
- "grad_norm": 0.5616611838340759,
708
- "learning_rate": 4.219881398252185e-05,
709
- "loss": 1.6915,
710
- "step": 5000
711
- },
712
- {
713
- "epoch": 0.7880774032459426,
714
- "grad_norm": 0.4942910075187683,
715
- "learning_rate": 4.2120786516853934e-05,
716
- "loss": 1.6813,
717
- "step": 5050
718
- },
719
- {
720
- "epoch": 0.795880149812734,
721
- "grad_norm": 0.5219690203666687,
722
- "learning_rate": 4.204275905118602e-05,
723
- "loss": 1.6731,
724
- "step": 5100
725
- },
726
- {
727
- "epoch": 0.8036828963795256,
728
- "grad_norm": 0.4913477897644043,
729
- "learning_rate": 4.1964731585518104e-05,
730
- "loss": 1.7001,
731
- "step": 5150
732
- },
733
- {
734
- "epoch": 0.8114856429463171,
735
- "grad_norm": 0.49623045325279236,
736
- "learning_rate": 4.188670411985019e-05,
737
- "loss": 1.686,
738
- "step": 5200
739
- },
740
- {
741
- "epoch": 0.8192883895131086,
742
- "grad_norm": 0.5660040974617004,
743
- "learning_rate": 4.1808676654182274e-05,
744
- "loss": 1.6794,
745
- "step": 5250
746
- },
747
- {
748
- "epoch": 0.8270911360799001,
749
- "grad_norm": 0.5747349262237549,
750
- "learning_rate": 4.173064918851436e-05,
751
- "loss": 1.6847,
752
- "step": 5300
753
- },
754
- {
755
- "epoch": 0.8348938826466916,
756
- "grad_norm": 0.5206372737884521,
757
- "learning_rate": 4.1652621722846444e-05,
758
- "loss": 1.6719,
759
- "step": 5350
760
- },
761
- {
762
- "epoch": 0.8426966292134831,
763
- "grad_norm": 0.5366120934486389,
764
- "learning_rate": 4.1574594257178526e-05,
765
- "loss": 1.6696,
766
- "step": 5400
767
- },
768
- {
769
- "epoch": 0.8504993757802747,
770
- "grad_norm": 0.6354568600654602,
771
- "learning_rate": 4.1496566791510614e-05,
772
- "loss": 1.6657,
773
- "step": 5450
774
- },
775
- {
776
- "epoch": 0.8583021223470662,
777
- "grad_norm": 0.5194640159606934,
778
- "learning_rate": 4.1418539325842695e-05,
779
- "loss": 1.6731,
780
- "step": 5500
781
- },
782
- {
783
- "epoch": 0.8661048689138576,
784
- "grad_norm": 0.43448275327682495,
785
- "learning_rate": 4.1340511860174784e-05,
786
- "loss": 1.6639,
787
- "step": 5550
788
- },
789
- {
790
- "epoch": 0.8739076154806492,
791
- "grad_norm": 0.5286650657653809,
792
- "learning_rate": 4.1262484394506865e-05,
793
- "loss": 1.677,
794
- "step": 5600
795
- },
796
- {
797
- "epoch": 0.8817103620474407,
798
- "grad_norm": 0.49659380316734314,
799
- "learning_rate": 4.1184456928838954e-05,
800
- "loss": 1.6758,
801
- "step": 5650
802
- },
803
- {
804
- "epoch": 0.8895131086142322,
805
- "grad_norm": 0.5224044322967529,
806
- "learning_rate": 4.1106429463171035e-05,
807
- "loss": 1.6528,
808
- "step": 5700
809
- },
810
- {
811
- "epoch": 0.8973158551810237,
812
- "grad_norm": 0.510977566242218,
813
- "learning_rate": 4.1028401997503124e-05,
814
- "loss": 1.6869,
815
- "step": 5750
816
- },
817
- {
818
- "epoch": 0.9051186017478152,
819
- "grad_norm": 0.5862101912498474,
820
- "learning_rate": 4.0950374531835205e-05,
821
- "loss": 1.68,
822
- "step": 5800
823
- },
824
- {
825
- "epoch": 0.9129213483146067,
826
- "grad_norm": 0.5646480321884155,
827
- "learning_rate": 4.0872347066167293e-05,
828
- "loss": 1.6749,
829
- "step": 5850
830
- },
831
- {
832
- "epoch": 0.9207240948813983,
833
- "grad_norm": 0.5872883200645447,
834
- "learning_rate": 4.0794319600499375e-05,
835
- "loss": 1.6661,
836
- "step": 5900
837
- },
838
- {
839
- "epoch": 0.9285268414481898,
840
- "grad_norm": 0.5308676958084106,
841
- "learning_rate": 4.0716292134831463e-05,
842
- "loss": 1.6747,
843
- "step": 5950
844
- },
845
- {
846
- "epoch": 0.9363295880149812,
847
- "grad_norm": 0.5872898101806641,
848
- "learning_rate": 4.0638264669163545e-05,
849
- "loss": 1.6719,
850
- "step": 6000
851
- },
852
- {
853
- "epoch": 0.9441323345817728,
854
- "grad_norm": 0.6066872477531433,
855
- "learning_rate": 4.056023720349563e-05,
856
- "loss": 1.6746,
857
- "step": 6050
858
- },
859
- {
860
- "epoch": 0.9519350811485643,
861
- "grad_norm": 0.5329908132553101,
862
- "learning_rate": 4.048220973782772e-05,
863
- "loss": 1.6789,
864
- "step": 6100
865
- },
866
- {
867
- "epoch": 0.9597378277153558,
868
- "grad_norm": 0.4528316855430603,
869
- "learning_rate": 4.04041822721598e-05,
870
- "loss": 1.6767,
871
- "step": 6150
872
- },
873
- {
874
- "epoch": 0.9675405742821473,
875
- "grad_norm": 0.5394971966743469,
876
- "learning_rate": 4.032615480649189e-05,
877
- "loss": 1.6797,
878
- "step": 6200
879
- },
880
- {
881
- "epoch": 0.9753433208489388,
882
- "grad_norm": 0.6735771298408508,
883
- "learning_rate": 4.024812734082397e-05,
884
- "loss": 1.6842,
885
- "step": 6250
886
- },
887
- {
888
- "epoch": 0.9831460674157303,
889
- "grad_norm": 0.48064225912094116,
890
- "learning_rate": 4.017009987515606e-05,
891
- "loss": 1.6767,
892
- "step": 6300
893
- },
894
- {
895
- "epoch": 0.9909488139825219,
896
- "grad_norm": 0.49285510182380676,
897
- "learning_rate": 4.009207240948814e-05,
898
- "loss": 1.666,
899
- "step": 6350
900
- },
901
- {
902
- "epoch": 0.9987515605493134,
903
- "grad_norm": 0.5762596726417542,
904
- "learning_rate": 4.001404494382023e-05,
905
- "loss": 1.6766,
906
- "step": 6400
907
- },
908
- {
909
- "epoch": 1.006554307116105,
910
- "grad_norm": 0.4971337616443634,
911
- "learning_rate": 3.993601747815231e-05,
912
- "loss": 1.6727,
913
- "step": 6450
914
- },
915
- {
916
- "epoch": 1.0143570536828963,
917
- "grad_norm": 0.5485156178474426,
918
- "learning_rate": 3.9857990012484394e-05,
919
- "loss": 1.6842,
920
- "step": 6500
921
- },
922
- {
923
- "epoch": 1.0221598002496879,
924
- "grad_norm": 0.4900757968425751,
925
- "learning_rate": 3.977996254681648e-05,
926
- "loss": 1.6661,
927
- "step": 6550
928
- },
929
- {
930
- "epoch": 1.0299625468164795,
931
- "grad_norm": 0.4844594895839691,
932
- "learning_rate": 3.9701935081148564e-05,
933
- "loss": 1.6601,
934
- "step": 6600
935
- },
936
- {
937
- "epoch": 1.0377652933832708,
938
- "grad_norm": 0.6316475868225098,
939
- "learning_rate": 3.9623907615480646e-05,
940
- "loss": 1.6481,
941
- "step": 6650
942
- },
943
- {
944
- "epoch": 1.0455680399500624,
945
- "grad_norm": 0.6341625452041626,
946
- "learning_rate": 3.9545880149812734e-05,
947
- "loss": 1.6656,
948
- "step": 6700
949
- },
950
- {
951
- "epoch": 1.053370786516854,
952
- "grad_norm": 0.6208726763725281,
953
- "learning_rate": 3.9467852684144816e-05,
954
- "loss": 1.6379,
955
- "step": 6750
956
- },
957
- {
958
- "epoch": 1.0611735330836454,
959
- "grad_norm": 0.605767011642456,
960
- "learning_rate": 3.9389825218476904e-05,
961
- "loss": 1.6537,
962
- "step": 6800
963
- },
964
- {
965
- "epoch": 1.068976279650437,
966
- "grad_norm": 0.5422509908676147,
967
- "learning_rate": 3.931179775280899e-05,
968
- "loss": 1.6689,
969
- "step": 6850
970
- },
971
- {
972
- "epoch": 1.0767790262172285,
973
- "grad_norm": 0.6556758880615234,
974
- "learning_rate": 3.9233770287141074e-05,
975
- "loss": 1.6619,
976
- "step": 6900
977
- },
978
- {
979
- "epoch": 1.08458177278402,
980
- "grad_norm": 0.5463916659355164,
981
- "learning_rate": 3.915574282147316e-05,
982
- "loss": 1.6584,
983
- "step": 6950
984
- },
985
- {
986
- "epoch": 1.0923845193508115,
987
- "grad_norm": 0.660961925983429,
988
- "learning_rate": 3.9077715355805244e-05,
989
- "loss": 1.6813,
990
- "step": 7000
991
- },
992
- {
993
- "epoch": 1.100187265917603,
994
- "grad_norm": 0.5504911541938782,
995
- "learning_rate": 3.899968789013733e-05,
996
- "loss": 1.6482,
997
- "step": 7050
998
- },
999
- {
1000
- "epoch": 1.1079900124843944,
1001
- "grad_norm": 0.5077877044677734,
1002
- "learning_rate": 3.8921660424469414e-05,
1003
- "loss": 1.6508,
1004
- "step": 7100
1005
- },
1006
- {
1007
- "epoch": 1.115792759051186,
1008
- "grad_norm": 0.5340275764465332,
1009
- "learning_rate": 3.88436329588015e-05,
1010
- "loss": 1.6685,
1011
- "step": 7150
1012
- },
1013
- {
1014
- "epoch": 1.1235955056179776,
1015
- "grad_norm": 0.615564227104187,
1016
- "learning_rate": 3.8765605493133584e-05,
1017
- "loss": 1.6617,
1018
- "step": 7200
1019
- },
1020
- {
1021
- "epoch": 1.131398252184769,
1022
- "grad_norm": 0.600592315196991,
1023
- "learning_rate": 3.868757802746567e-05,
1024
- "loss": 1.6478,
1025
- "step": 7250
1026
- },
1027
- {
1028
- "epoch": 1.1392009987515606,
1029
- "grad_norm": 0.606829047203064,
1030
- "learning_rate": 3.8609550561797754e-05,
1031
- "loss": 1.6627,
1032
- "step": 7300
1033
- },
1034
- {
1035
- "epoch": 1.1470037453183521,
1036
- "grad_norm": 0.5715992450714111,
1037
- "learning_rate": 3.853152309612984e-05,
1038
- "loss": 1.6724,
1039
- "step": 7350
1040
- },
1041
- {
1042
- "epoch": 1.1548064918851435,
1043
- "grad_norm": 0.5475966334342957,
1044
- "learning_rate": 3.8453495630461923e-05,
1045
- "loss": 1.6762,
1046
- "step": 7400
1047
- },
1048
- {
1049
- "epoch": 1.162609238451935,
1050
- "grad_norm": 0.5486684441566467,
1051
- "learning_rate": 3.837546816479401e-05,
1052
- "loss": 1.6418,
1053
- "step": 7450
1054
- },
1055
- {
1056
- "epoch": 1.1704119850187267,
1057
- "grad_norm": 0.5656526684761047,
1058
- "learning_rate": 3.829744069912609e-05,
1059
- "loss": 1.6623,
1060
- "step": 7500
1061
- },
1062
- {
1063
- "epoch": 1.178214731585518,
1064
- "grad_norm": 0.471967875957489,
1065
- "learning_rate": 3.821941323345818e-05,
1066
- "loss": 1.6339,
1067
- "step": 7550
1068
- },
1069
- {
1070
- "epoch": 1.1860174781523096,
1071
- "grad_norm": 0.5814192891120911,
1072
- "learning_rate": 3.814138576779026e-05,
1073
- "loss": 1.6626,
1074
- "step": 7600
1075
- },
1076
- {
1077
- "epoch": 1.1938202247191012,
1078
- "grad_norm": 0.5809513926506042,
1079
- "learning_rate": 3.806335830212235e-05,
1080
- "loss": 1.6521,
1081
- "step": 7650
1082
- },
1083
- {
1084
- "epoch": 1.2016229712858926,
1085
- "grad_norm": 0.564431369304657,
1086
- "learning_rate": 3.798533083645443e-05,
1087
- "loss": 1.633,
1088
- "step": 7700
1089
- },
1090
- {
1091
- "epoch": 1.2094257178526842,
1092
- "grad_norm": 0.5864349007606506,
1093
- "learning_rate": 3.7907303370786515e-05,
1094
- "loss": 1.6724,
1095
- "step": 7750
1096
- },
1097
- {
1098
- "epoch": 1.2172284644194757,
1099
- "grad_norm": 0.5883368849754333,
1100
- "learning_rate": 3.78292759051186e-05,
1101
- "loss": 1.6661,
1102
- "step": 7800
1103
- },
1104
- {
1105
- "epoch": 1.225031210986267,
1106
- "grad_norm": 0.5318378806114197,
1107
- "learning_rate": 3.7751248439450685e-05,
1108
- "loss": 1.6703,
1109
- "step": 7850
1110
- },
1111
- {
1112
- "epoch": 1.2328339575530587,
1113
- "grad_norm": 0.6735764741897583,
1114
- "learning_rate": 3.767322097378277e-05,
1115
- "loss": 1.6558,
1116
- "step": 7900
1117
- },
1118
- {
1119
- "epoch": 1.2406367041198503,
1120
- "grad_norm": 0.5900487303733826,
1121
- "learning_rate": 3.7595193508114855e-05,
1122
- "loss": 1.6472,
1123
- "step": 7950
1124
- },
1125
- {
1126
- "epoch": 1.2484394506866416,
1127
- "grad_norm": 0.4971151649951935,
1128
- "learning_rate": 3.751716604244694e-05,
1129
- "loss": 1.6357,
1130
- "step": 8000
1131
- },
1132
- {
1133
- "epoch": 1.2562421972534332,
1134
- "grad_norm": 0.6045508980751038,
1135
- "learning_rate": 3.7439138576779024e-05,
1136
- "loss": 1.6556,
1137
- "step": 8050
1138
- },
1139
- {
1140
- "epoch": 1.2640449438202248,
1141
- "grad_norm": 0.5860553979873657,
1142
- "learning_rate": 3.736111111111111e-05,
1143
- "loss": 1.6659,
1144
- "step": 8100
1145
- },
1146
- {
1147
- "epoch": 1.2718476903870162,
1148
- "grad_norm": 0.5339462161064148,
1149
- "learning_rate": 3.7283083645443194e-05,
1150
- "loss": 1.6707,
1151
- "step": 8150
1152
- },
1153
- {
1154
- "epoch": 1.2796504369538078,
1155
- "grad_norm": 0.5763932466506958,
1156
- "learning_rate": 3.720505617977528e-05,
1157
- "loss": 1.6694,
1158
- "step": 8200
1159
- },
1160
- {
1161
- "epoch": 1.2874531835205993,
1162
- "grad_norm": 0.5927013754844666,
1163
- "learning_rate": 3.712702871410737e-05,
1164
- "loss": 1.6258,
1165
- "step": 8250
1166
- },
1167
- {
1168
- "epoch": 1.2952559300873907,
1169
- "grad_norm": 0.6203845739364624,
1170
- "learning_rate": 3.704900124843945e-05,
1171
- "loss": 1.6459,
1172
- "step": 8300
1173
- },
1174
- {
1175
- "epoch": 1.3030586766541823,
1176
- "grad_norm": 0.5438473224639893,
1177
- "learning_rate": 3.697097378277154e-05,
1178
- "loss": 1.6352,
1179
- "step": 8350
1180
- },
1181
- {
1182
- "epoch": 1.3108614232209739,
1183
- "grad_norm": 0.6493474245071411,
1184
- "learning_rate": 3.689294631710362e-05,
1185
- "loss": 1.6643,
1186
- "step": 8400
1187
- },
1188
- {
1189
- "epoch": 1.3186641697877652,
1190
- "grad_norm": 0.5788607597351074,
1191
- "learning_rate": 3.681491885143571e-05,
1192
- "loss": 1.6753,
1193
- "step": 8450
1194
- },
1195
- {
1196
- "epoch": 1.3264669163545568,
1197
- "grad_norm": 0.5591830015182495,
1198
- "learning_rate": 3.673689138576779e-05,
1199
- "loss": 1.6316,
1200
- "step": 8500
1201
- },
1202
- {
1203
- "epoch": 1.3342696629213484,
1204
- "grad_norm": 0.4842735230922699,
1205
- "learning_rate": 3.665886392009988e-05,
1206
- "loss": 1.6699,
1207
- "step": 8550
1208
- },
1209
- {
1210
- "epoch": 1.3420724094881398,
1211
- "grad_norm": 0.6692916750907898,
1212
- "learning_rate": 3.658083645443196e-05,
1213
- "loss": 1.6249,
1214
- "step": 8600
1215
- },
1216
- {
1217
- "epoch": 1.3498751560549314,
1218
- "grad_norm": 0.5876237154006958,
1219
- "learning_rate": 3.650280898876405e-05,
1220
- "loss": 1.6576,
1221
- "step": 8650
1222
- },
1223
- {
1224
- "epoch": 1.357677902621723,
1225
- "grad_norm": 0.6215786933898926,
1226
- "learning_rate": 3.642478152309613e-05,
1227
- "loss": 1.6332,
1228
- "step": 8700
1229
- },
1230
- {
1231
- "epoch": 1.3654806491885143,
1232
- "grad_norm": 0.54453444480896,
1233
- "learning_rate": 3.634675405742822e-05,
1234
- "loss": 1.6456,
1235
- "step": 8750
1236
- },
1237
- {
1238
- "epoch": 1.373283395755306,
1239
- "grad_norm": 0.6348562836647034,
1240
- "learning_rate": 3.62687265917603e-05,
1241
- "loss": 1.6476,
1242
- "step": 8800
1243
- },
1244
- {
1245
- "epoch": 1.3810861423220975,
1246
- "grad_norm": 0.6236295700073242,
1247
- "learning_rate": 3.6190699126092384e-05,
1248
- "loss": 1.6676,
1249
- "step": 8850
1250
- },
1251
- {
1252
- "epoch": 1.3888888888888888,
1253
- "grad_norm": 0.5054611563682556,
1254
- "learning_rate": 3.611267166042447e-05,
1255
- "loss": 1.6506,
1256
- "step": 8900
1257
- },
1258
- {
1259
- "epoch": 1.3966916354556804,
1260
- "grad_norm": 0.582488477230072,
1261
- "learning_rate": 3.6034644194756553e-05,
1262
- "loss": 1.6413,
1263
- "step": 8950
1264
- },
1265
- {
1266
- "epoch": 1.404494382022472,
1267
- "grad_norm": 0.6589245796203613,
1268
- "learning_rate": 3.595661672908864e-05,
1269
- "loss": 1.6824,
1270
- "step": 9000
1271
- },
1272
- {
1273
- "epoch": 1.4122971285892634,
1274
- "grad_norm": 0.5584797859191895,
1275
- "learning_rate": 3.587858926342072e-05,
1276
- "loss": 1.6685,
1277
- "step": 9050
1278
- },
1279
- {
1280
- "epoch": 1.420099875156055,
1281
- "grad_norm": 0.6652534604072571,
1282
- "learning_rate": 3.580056179775281e-05,
1283
- "loss": 1.6362,
1284
- "step": 9100
1285
- },
1286
- {
1287
- "epoch": 1.4279026217228465,
1288
- "grad_norm": 0.6449257731437683,
1289
- "learning_rate": 3.572253433208489e-05,
1290
- "loss": 1.6543,
1291
- "step": 9150
1292
- },
1293
- {
1294
- "epoch": 1.435705368289638,
1295
- "grad_norm": 0.6358399391174316,
1296
- "learning_rate": 3.564450686641698e-05,
1297
- "loss": 1.6464,
1298
- "step": 9200
1299
- },
1300
- {
1301
- "epoch": 1.4435081148564295,
1302
- "grad_norm": 0.6031101942062378,
1303
- "learning_rate": 3.556647940074906e-05,
1304
- "loss": 1.6435,
1305
- "step": 9250
1306
- },
1307
- {
1308
- "epoch": 1.451310861423221,
1309
- "grad_norm": 0.5363774299621582,
1310
- "learning_rate": 3.548845193508115e-05,
1311
- "loss": 1.6728,
1312
- "step": 9300
1313
- },
1314
- {
1315
- "epoch": 1.4591136079900124,
1316
- "grad_norm": 0.6634340286254883,
1317
- "learning_rate": 3.541042446941323e-05,
1318
- "loss": 1.6638,
1319
- "step": 9350
1320
- },
1321
- {
1322
- "epoch": 1.466916354556804,
1323
- "grad_norm": 0.6200147867202759,
1324
- "learning_rate": 3.533239700374532e-05,
1325
- "loss": 1.6537,
1326
- "step": 9400
1327
- },
1328
- {
1329
- "epoch": 1.4747191011235956,
1330
- "grad_norm": 0.5800793766975403,
1331
- "learning_rate": 3.52543695380774e-05,
1332
- "loss": 1.6549,
1333
- "step": 9450
1334
- },
1335
- {
1336
- "epoch": 1.482521847690387,
1337
- "grad_norm": 0.5839795470237732,
1338
- "learning_rate": 3.517634207240949e-05,
1339
- "loss": 1.6571,
1340
- "step": 9500
1341
- },
1342
- {
1343
- "epoch": 1.4903245942571786,
1344
- "grad_norm": 0.5768577456474304,
1345
- "learning_rate": 3.509831460674157e-05,
1346
- "loss": 1.679,
1347
- "step": 9550
1348
- },
1349
- {
1350
- "epoch": 1.4981273408239701,
1351
- "grad_norm": 0.5268595218658447,
1352
- "learning_rate": 3.502028714107366e-05,
1353
- "loss": 1.6461,
1354
- "step": 9600
1355
- },
1356
- {
1357
- "epoch": 1.5059300873907615,
1358
- "grad_norm": 0.6356619000434875,
1359
- "learning_rate": 3.494225967540574e-05,
1360
- "loss": 1.6312,
1361
- "step": 9650
1362
- },
1363
- {
1364
- "epoch": 1.513732833957553,
1365
- "grad_norm": 0.5722295641899109,
1366
- "learning_rate": 3.486423220973783e-05,
1367
- "loss": 1.6418,
1368
- "step": 9700
1369
- },
1370
- {
1371
- "epoch": 1.5215355805243447,
1372
- "grad_norm": 0.5974990129470825,
1373
- "learning_rate": 3.478620474406992e-05,
1374
- "loss": 1.6526,
1375
- "step": 9750
1376
- },
1377
- {
1378
- "epoch": 1.529338327091136,
1379
- "grad_norm": 0.6584164500236511,
1380
- "learning_rate": 3.4708177278402e-05,
1381
- "loss": 1.6749,
1382
- "step": 9800
1383
- },
1384
- {
1385
- "epoch": 1.5371410736579276,
1386
- "grad_norm": 0.6195454001426697,
1387
- "learning_rate": 3.463014981273409e-05,
1388
- "loss": 1.6406,
1389
- "step": 9850
1390
- },
1391
- {
1392
- "epoch": 1.5449438202247192,
1393
- "grad_norm": 0.5923195481300354,
1394
- "learning_rate": 3.455212234706617e-05,
1395
- "loss": 1.6471,
1396
- "step": 9900
1397
- },
1398
- {
1399
- "epoch": 1.5527465667915106,
1400
- "grad_norm": 0.59232097864151,
1401
- "learning_rate": 3.447409488139825e-05,
1402
- "loss": 1.6595,
1403
- "step": 9950
1404
- },
1405
- {
1406
- "epoch": 1.5605493133583022,
1407
- "grad_norm": 0.5838867425918579,
1408
- "learning_rate": 3.439606741573034e-05,
1409
- "loss": 1.6449,
1410
- "step": 10000
1411
- },
1412
- {
1413
- "epoch": 1.5683520599250937,
1414
- "grad_norm": 0.6070720553398132,
1415
- "learning_rate": 3.431803995006242e-05,
1416
- "loss": 1.6376,
1417
- "step": 10050
1418
- },
1419
- {
1420
- "epoch": 1.576154806491885,
1421
- "grad_norm": 0.5864161849021912,
1422
- "learning_rate": 3.4240012484394504e-05,
1423
- "loss": 1.6555,
1424
- "step": 10100
1425
- },
1426
- {
1427
- "epoch": 1.5839575530586767,
1428
- "grad_norm": 0.6388084292411804,
1429
- "learning_rate": 3.416198501872659e-05,
1430
- "loss": 1.6522,
1431
- "step": 10150
1432
- },
1433
- {
1434
- "epoch": 1.5917602996254683,
1435
- "grad_norm": 0.5700277090072632,
1436
- "learning_rate": 3.4083957553058674e-05,
1437
- "loss": 1.6347,
1438
- "step": 10200
1439
- },
1440
- {
1441
- "epoch": 1.5995630461922596,
1442
- "grad_norm": 0.6094324588775635,
1443
- "learning_rate": 3.400593008739076e-05,
1444
- "loss": 1.6439,
1445
- "step": 10250
1446
- },
1447
- {
1448
- "epoch": 1.6073657927590512,
1449
- "grad_norm": 0.6227761507034302,
1450
- "learning_rate": 3.3927902621722844e-05,
1451
- "loss": 1.6567,
1452
- "step": 10300
1453
- },
1454
- {
1455
- "epoch": 1.6151685393258428,
1456
- "grad_norm": 0.6303547024726868,
1457
- "learning_rate": 3.384987515605493e-05,
1458
- "loss": 1.6221,
1459
- "step": 10350
1460
- },
1461
- {
1462
- "epoch": 1.6229712858926342,
1463
- "grad_norm": 0.6025314927101135,
1464
- "learning_rate": 3.3771847690387014e-05,
1465
- "loss": 1.6371,
1466
- "step": 10400
1467
- },
1468
- {
1469
- "epoch": 1.6307740324594258,
1470
- "grad_norm": 0.683813214302063,
1471
- "learning_rate": 3.36938202247191e-05,
1472
- "loss": 1.6364,
1473
- "step": 10450
1474
- },
1475
- {
1476
- "epoch": 1.6385767790262173,
1477
- "grad_norm": 0.763845682144165,
1478
- "learning_rate": 3.361579275905119e-05,
1479
- "loss": 1.6274,
1480
- "step": 10500
1481
- },
1482
- {
1483
- "epoch": 1.6463795255930087,
1484
- "grad_norm": 0.6550936698913574,
1485
- "learning_rate": 3.353776529338327e-05,
1486
- "loss": 1.6433,
1487
- "step": 10550
1488
- },
1489
- {
1490
- "epoch": 1.6541822721598003,
1491
- "grad_norm": 0.6213018894195557,
1492
- "learning_rate": 3.345973782771536e-05,
1493
- "loss": 1.6405,
1494
- "step": 10600
1495
- },
1496
- {
1497
- "epoch": 1.6619850187265919,
1498
- "grad_norm": 0.6199821829795837,
1499
- "learning_rate": 3.338171036204744e-05,
1500
- "loss": 1.6221,
1501
- "step": 10650
1502
- },
1503
- {
1504
- "epoch": 1.6697877652933832,
1505
- "grad_norm": 0.6940792798995972,
1506
- "learning_rate": 3.330368289637953e-05,
1507
- "loss": 1.6064,
1508
- "step": 10700
1509
- },
1510
- {
1511
- "epoch": 1.6775905118601748,
1512
- "grad_norm": 0.5895411968231201,
1513
- "learning_rate": 3.322565543071161e-05,
1514
- "loss": 1.6246,
1515
- "step": 10750
1516
- },
1517
- {
1518
- "epoch": 1.6853932584269664,
1519
- "grad_norm": 0.584697425365448,
1520
- "learning_rate": 3.31476279650437e-05,
1521
- "loss": 1.6164,
1522
- "step": 10800
1523
- },
1524
- {
1525
- "epoch": 1.6931960049937578,
1526
- "grad_norm": 0.5995935201644897,
1527
- "learning_rate": 3.306960049937578e-05,
1528
- "loss": 1.6303,
1529
- "step": 10850
1530
- },
1531
- {
1532
- "epoch": 1.7009987515605494,
1533
- "grad_norm": 0.6184208989143372,
1534
- "learning_rate": 3.299157303370787e-05,
1535
- "loss": 1.6226,
1536
- "step": 10900
1537
- },
1538
- {
1539
- "epoch": 1.708801498127341,
1540
- "grad_norm": 0.6035963892936707,
1541
- "learning_rate": 3.291354556803995e-05,
1542
- "loss": 1.6519,
1543
- "step": 10950
1544
- },
1545
- {
1546
- "epoch": 1.7166042446941323,
1547
- "grad_norm": 0.6514495611190796,
1548
- "learning_rate": 3.283551810237204e-05,
1549
- "loss": 1.6311,
1550
- "step": 11000
1551
- },
1552
- {
1553
- "epoch": 1.724406991260924,
1554
- "grad_norm": 0.681898832321167,
1555
- "learning_rate": 3.275749063670412e-05,
1556
- "loss": 1.6493,
1557
- "step": 11050
1558
- },
1559
- {
1560
- "epoch": 1.7322097378277155,
1561
- "grad_norm": 0.641394317150116,
1562
- "learning_rate": 3.267946317103621e-05,
1563
- "loss": 1.6562,
1564
- "step": 11100
1565
- },
1566
- {
1567
- "epoch": 1.7400124843945068,
1568
- "grad_norm": 0.6565654277801514,
1569
- "learning_rate": 3.260143570536829e-05,
1570
- "loss": 1.6193,
1571
- "step": 11150
1572
- },
1573
- {
1574
- "epoch": 1.7478152309612984,
1575
- "grad_norm": 0.6501032114028931,
1576
- "learning_rate": 3.252340823970037e-05,
1577
- "loss": 1.6294,
1578
- "step": 11200
1579
- },
1580
- {
1581
- "epoch": 1.75561797752809,
1582
- "grad_norm": 0.5766665935516357,
1583
- "learning_rate": 3.244538077403246e-05,
1584
- "loss": 1.6236,
1585
- "step": 11250
1586
- },
1587
- {
1588
- "epoch": 1.7634207240948814,
1589
- "grad_norm": 0.6376942992210388,
1590
- "learning_rate": 3.236735330836454e-05,
1591
- "loss": 1.6592,
1592
- "step": 11300
1593
- },
1594
- {
1595
- "epoch": 1.771223470661673,
1596
- "grad_norm": 0.6972282528877258,
1597
- "learning_rate": 3.228932584269663e-05,
1598
- "loss": 1.6173,
1599
- "step": 11350
1600
- },
1601
- {
1602
- "epoch": 1.7790262172284645,
1603
- "grad_norm": 0.6171312928199768,
1604
- "learning_rate": 3.221129837702871e-05,
1605
- "loss": 1.6414,
1606
- "step": 11400
1607
- },
1608
- {
1609
- "epoch": 1.786828963795256,
1610
- "grad_norm": 0.715184211730957,
1611
- "learning_rate": 3.21332709113608e-05,
1612
- "loss": 1.637,
1613
- "step": 11450
1614
- },
1615
- {
1616
- "epoch": 1.7946317103620475,
1617
- "grad_norm": 0.6628192663192749,
1618
- "learning_rate": 3.205524344569288e-05,
1619
- "loss": 1.6102,
1620
- "step": 11500
1621
- },
1622
- {
1623
- "epoch": 1.802434456928839,
1624
- "grad_norm": 0.6058287024497986,
1625
- "learning_rate": 3.197721598002497e-05,
1626
- "loss": 1.6512,
1627
- "step": 11550
1628
- },
1629
- {
1630
- "epoch": 1.8102372034956304,
1631
- "grad_norm": 0.6275887489318848,
1632
- "learning_rate": 3.189918851435705e-05,
1633
- "loss": 1.6435,
1634
- "step": 11600
1635
- },
1636
- {
1637
- "epoch": 1.818039950062422,
1638
- "grad_norm": 0.7389242053031921,
1639
- "learning_rate": 3.182116104868914e-05,
1640
- "loss": 1.6234,
1641
- "step": 11650
1642
- },
1643
- {
1644
- "epoch": 1.8258426966292136,
1645
- "grad_norm": 0.649131715297699,
1646
- "learning_rate": 3.174313358302122e-05,
1647
- "loss": 1.6332,
1648
- "step": 11700
1649
- },
1650
- {
1651
- "epoch": 1.833645443196005,
1652
- "grad_norm": 0.5898476839065552,
1653
- "learning_rate": 3.166510611735331e-05,
1654
- "loss": 1.6299,
1655
- "step": 11750
1656
- },
1657
- {
1658
- "epoch": 1.8414481897627963,
1659
- "grad_norm": 0.617365837097168,
1660
- "learning_rate": 3.158707865168539e-05,
1661
- "loss": 1.608,
1662
- "step": 11800
1663
- },
1664
- {
1665
- "epoch": 1.8492509363295881,
1666
- "grad_norm": 0.6347021460533142,
1667
- "learning_rate": 3.150905118601748e-05,
1668
- "loss": 1.6168,
1669
- "step": 11850
1670
- },
1671
- {
1672
- "epoch": 1.8570536828963795,
1673
- "grad_norm": 0.6479565501213074,
1674
- "learning_rate": 3.143102372034957e-05,
1675
- "loss": 1.6089,
1676
- "step": 11900
1677
- },
1678
- {
1679
- "epoch": 1.8648564294631709,
1680
- "grad_norm": 0.6168213486671448,
1681
- "learning_rate": 3.135299625468165e-05,
1682
- "loss": 1.63,
1683
- "step": 11950
1684
- },
1685
- {
1686
- "epoch": 1.8726591760299627,
1687
- "grad_norm": 0.5773766040802002,
1688
- "learning_rate": 3.127496878901374e-05,
1689
- "loss": 1.6183,
1690
- "step": 12000
1691
- },
1692
- {
1693
- "epoch": 1.880461922596754,
1694
- "grad_norm": 0.5600804686546326,
1695
- "learning_rate": 3.119694132334582e-05,
1696
- "loss": 1.6393,
1697
- "step": 12050
1698
- },
1699
- {
1700
- "epoch": 1.8882646691635454,
1701
- "grad_norm": 0.623058557510376,
1702
- "learning_rate": 3.111891385767791e-05,
1703
- "loss": 1.6112,
1704
- "step": 12100
1705
- },
1706
- {
1707
- "epoch": 1.8960674157303372,
1708
- "grad_norm": 0.5952323079109192,
1709
- "learning_rate": 3.104088639200999e-05,
1710
- "loss": 1.6319,
1711
- "step": 12150
1712
- },
1713
- {
1714
- "epoch": 1.9038701622971286,
1715
- "grad_norm": 0.7055174112319946,
1716
- "learning_rate": 3.096285892634208e-05,
1717
- "loss": 1.6275,
1718
- "step": 12200
1719
- },
1720
- {
1721
- "epoch": 1.91167290886392,
1722
- "grad_norm": 0.5625096559524536,
1723
- "learning_rate": 3.088483146067416e-05,
1724
- "loss": 1.6345,
1725
- "step": 12250
1726
- },
1727
- {
1728
- "epoch": 1.9194756554307117,
1729
- "grad_norm": 0.5937293767929077,
1730
- "learning_rate": 3.080680399500624e-05,
1731
- "loss": 1.6197,
1732
- "step": 12300
1733
- },
1734
- {
1735
- "epoch": 1.927278401997503,
1736
- "grad_norm": 0.6606655120849609,
1737
- "learning_rate": 3.072877652933833e-05,
1738
- "loss": 1.6201,
1739
- "step": 12350
1740
- },
1741
- {
1742
- "epoch": 1.9350811485642945,
1743
- "grad_norm": 0.6392807960510254,
1744
- "learning_rate": 3.065074906367041e-05,
1745
- "loss": 1.6309,
1746
- "step": 12400
1747
- },
1748
- {
1749
- "epoch": 1.9428838951310863,
1750
- "grad_norm": 0.7471784353256226,
1751
- "learning_rate": 3.057272159800249e-05,
1752
- "loss": 1.6131,
1753
- "step": 12450
1754
- },
1755
- {
1756
- "epoch": 1.9506866416978776,
1757
- "grad_norm": 0.6735255718231201,
1758
- "learning_rate": 3.0494694132334585e-05,
1759
- "loss": 1.6514,
1760
- "step": 12500
1761
- },
1762
- {
1763
- "epoch": 1.958489388264669,
1764
- "grad_norm": 0.6417968273162842,
1765
- "learning_rate": 3.0416666666666666e-05,
1766
- "loss": 1.6245,
1767
- "step": 12550
1768
- },
1769
- {
1770
- "epoch": 1.9662921348314608,
1771
- "grad_norm": 0.6633313894271851,
1772
- "learning_rate": 3.0338639200998755e-05,
1773
- "loss": 1.6267,
1774
- "step": 12600
1775
- },
1776
- {
1777
- "epoch": 1.9740948813982522,
1778
- "grad_norm": 0.6218631267547607,
1779
- "learning_rate": 3.026061173533084e-05,
1780
- "loss": 1.6472,
1781
- "step": 12650
1782
- },
1783
- {
1784
- "epoch": 1.9818976279650435,
1785
- "grad_norm": 0.594956636428833,
1786
- "learning_rate": 3.018258426966292e-05,
1787
- "loss": 1.6298,
1788
- "step": 12700
1789
- },
1790
- {
1791
- "epoch": 1.9897003745318353,
1792
- "grad_norm": 0.5888795852661133,
1793
- "learning_rate": 3.010455680399501e-05,
1794
- "loss": 1.626,
1795
- "step": 12750
1796
- },
1797
- {
1798
- "epoch": 1.9975031210986267,
1799
- "grad_norm": 0.6327818632125854,
1800
- "learning_rate": 3.002652933832709e-05,
1801
- "loss": 1.625,
1802
- "step": 12800
1803
- }
1804
- ],
1805
- "logging_steps": 50,
1806
- "max_steps": 32040,
1807
- "num_input_tokens_seen": 0,
1808
- "num_train_epochs": 5,
1809
- "save_steps": 100,
1810
- "stateful_callbacks": {
1811
- "TrainerControl": {
1812
- "args": {
1813
- "should_epoch_stop": false,
1814
- "should_evaluate": false,
1815
- "should_log": false,
1816
- "should_save": true,
1817
- "should_training_stop": false
1818
- },
1819
- "attributes": {}
1820
- }
1821
- },
1822
- "total_flos": 1.7153416265151283e+17,
1823
- "train_batch_size": 10,
1824
- "trial_name": null,
1825
- "trial_params": null
1826
- }