romainnn commited on
Commit
ade1abd
·
verified ·
1 Parent(s): 9aebc73

Training in progress, step 3800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ee66a361296b6ddade4373f90ebf70dfec210f4c2cc8a673761b669a0a19a29
3
  size 35237104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b097fe16dbacfd6384828bee8b0af2e19ba6892664dceddd1f0d546f9c31571
3
  size 35237104
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b69ade3d8143fa53df3c44e13e0e86f751c9d91f48e8ba8422b235fd7a4ef95c
3
  size 18810356
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e666d9eea54720024abc1ab42e7b827ea7a1bb020bb9aef9673ec0cc15120d8a
3
  size 18810356
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:525e871b8bfa9d0c55029d3a5724dab788324d84396021519791e25b39fc6797
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:957a4cfc2b6817a96aeac2e1fcff9ce55867670984c61b5a97af9cd4dda67c60
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:110477147f82823f2afe8f6b04f642e31b0df79e35f16b64a881cf01711c33d2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1dd23ba82b3ca1f0ee26353d9cc3ddfc63459acf74051174c6a70a84f210c52
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.8474695682525635,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-3600",
4
- "epoch": 1.0297422339722406,
5
  "eval_steps": 100,
6
- "global_step": 3700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -26211,6 +26211,714 @@
26211
  "eval_samples_per_second": 59.226,
26212
  "eval_steps_per_second": 14.807,
26213
  "step": 3700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26214
  }
26215
  ],
26216
  "logging_steps": 1,
@@ -26225,7 +26933,7 @@
26225
  "early_stopping_threshold": 0.0
26226
  },
26227
  "attributes": {
26228
- "early_stopping_patience_counter": 1
26229
  }
26230
  },
26231
  "TrainerControl": {
@@ -26234,12 +26942,12 @@
26234
  "should_evaluate": false,
26235
  "should_log": false,
26236
  "should_save": true,
26237
- "should_training_stop": false
26238
  },
26239
  "attributes": {}
26240
  }
26241
  },
26242
- "total_flos": 2.66753091305472e+17,
26243
  "train_batch_size": 4,
26244
  "trial_name": null,
26245
  "trial_params": null
 
1
  {
2
  "best_metric": 2.8474695682525635,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-3600",
4
+ "epoch": 1.0575712248234599,
5
  "eval_steps": 100,
6
+ "global_step": 3800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
26211
  "eval_samples_per_second": 59.226,
26212
  "eval_steps_per_second": 14.807,
26213
  "step": 3700
26214
+ },
26215
+ {
26216
+ "epoch": 1.0300205238807527,
26217
+ "grad_norm": 2.34928560256958,
26218
+ "learning_rate": 9.551476636488089e-05,
26219
+ "loss": 2.4443,
26220
+ "step": 3701
26221
+ },
26222
+ {
26223
+ "epoch": 1.030298813789265,
26224
+ "grad_norm": 2.5956695079803467,
26225
+ "learning_rate": 9.547103778606931e-05,
26226
+ "loss": 2.798,
26227
+ "step": 3702
26228
+ },
26229
+ {
26230
+ "epoch": 1.030577103697777,
26231
+ "grad_norm": 2.6470494270324707,
26232
+ "learning_rate": 9.542731007504356e-05,
26233
+ "loss": 2.7361,
26234
+ "step": 3703
26235
+ },
26236
+ {
26237
+ "epoch": 1.0308553936062894,
26238
+ "grad_norm": 2.35915207862854,
26239
+ "learning_rate": 9.538358324018215e-05,
26240
+ "loss": 2.4667,
26241
+ "step": 3704
26242
+ },
26243
+ {
26244
+ "epoch": 1.0311336835148015,
26245
+ "grad_norm": 2.26962947845459,
26246
+ "learning_rate": 9.533985728986359e-05,
26247
+ "loss": 2.5105,
26248
+ "step": 3705
26249
+ },
26250
+ {
26251
+ "epoch": 1.0314119734233138,
26252
+ "grad_norm": 2.273303747177124,
26253
+ "learning_rate": 9.529613223246607e-05,
26254
+ "loss": 2.7761,
26255
+ "step": 3706
26256
+ },
26257
+ {
26258
+ "epoch": 1.031690263331826,
26259
+ "grad_norm": 2.518406867980957,
26260
+ "learning_rate": 9.525240807636766e-05,
26261
+ "loss": 2.6669,
26262
+ "step": 3707
26263
+ },
26264
+ {
26265
+ "epoch": 1.0319685532403382,
26266
+ "grad_norm": 2.45633602142334,
26267
+ "learning_rate": 9.52086848299463e-05,
26268
+ "loss": 2.5069,
26269
+ "step": 3708
26270
+ },
26271
+ {
26272
+ "epoch": 1.0322468431488503,
26273
+ "grad_norm": 3.081700086593628,
26274
+ "learning_rate": 9.51649625015797e-05,
26275
+ "loss": 2.6667,
26276
+ "step": 3709
26277
+ },
26278
+ {
26279
+ "epoch": 1.0325251330573626,
26280
+ "grad_norm": 2.3902275562286377,
26281
+ "learning_rate": 9.512124109964539e-05,
26282
+ "loss": 2.5777,
26283
+ "step": 3710
26284
+ },
26285
+ {
26286
+ "epoch": 1.0328034229658747,
26287
+ "grad_norm": 2.087824583053589,
26288
+ "learning_rate": 9.507752063252078e-05,
26289
+ "loss": 2.4125,
26290
+ "step": 3711
26291
+ },
26292
+ {
26293
+ "epoch": 1.033081712874387,
26294
+ "grad_norm": 2.584324836730957,
26295
+ "learning_rate": 9.503380110858304e-05,
26296
+ "loss": 2.596,
26297
+ "step": 3712
26298
+ },
26299
+ {
26300
+ "epoch": 1.033360002782899,
26301
+ "grad_norm": 2.4409570693969727,
26302
+ "learning_rate": 9.499008253620919e-05,
26303
+ "loss": 2.5021,
26304
+ "step": 3713
26305
+ },
26306
+ {
26307
+ "epoch": 1.0336382926914114,
26308
+ "grad_norm": 2.357409954071045,
26309
+ "learning_rate": 9.494636492377607e-05,
26310
+ "loss": 2.4717,
26311
+ "step": 3714
26312
+ },
26313
+ {
26314
+ "epoch": 1.0339165825999235,
26315
+ "grad_norm": 2.637960195541382,
26316
+ "learning_rate": 9.490264827966033e-05,
26317
+ "loss": 2.7572,
26318
+ "step": 3715
26319
+ },
26320
+ {
26321
+ "epoch": 1.0341948725084356,
26322
+ "grad_norm": 2.5555248260498047,
26323
+ "learning_rate": 9.485893261223842e-05,
26324
+ "loss": 2.6943,
26325
+ "step": 3716
26326
+ },
26327
+ {
26328
+ "epoch": 1.0344731624169479,
26329
+ "grad_norm": 2.3676626682281494,
26330
+ "learning_rate": 9.481521792988664e-05,
26331
+ "loss": 2.7945,
26332
+ "step": 3717
26333
+ },
26334
+ {
26335
+ "epoch": 1.03475145232546,
26336
+ "grad_norm": 2.2306549549102783,
26337
+ "learning_rate": 9.477150424098105e-05,
26338
+ "loss": 2.3112,
26339
+ "step": 3718
26340
+ },
26341
+ {
26342
+ "epoch": 1.0350297422339723,
26343
+ "grad_norm": 2.835977792739868,
26344
+ "learning_rate": 9.47277915538975e-05,
26345
+ "loss": 2.8379,
26346
+ "step": 3719
26347
+ },
26348
+ {
26349
+ "epoch": 1.0353080321424843,
26350
+ "grad_norm": 2.4739816188812256,
26351
+ "learning_rate": 9.468407987701179e-05,
26352
+ "loss": 2.6375,
26353
+ "step": 3720
26354
+ },
26355
+ {
26356
+ "epoch": 1.0355863220509967,
26357
+ "grad_norm": 2.7540197372436523,
26358
+ "learning_rate": 9.464036921869941e-05,
26359
+ "loss": 2.416,
26360
+ "step": 3721
26361
+ },
26362
+ {
26363
+ "epoch": 1.0358646119595087,
26364
+ "grad_norm": 2.581390142440796,
26365
+ "learning_rate": 9.459665958733565e-05,
26366
+ "loss": 2.3666,
26367
+ "step": 3722
26368
+ },
26369
+ {
26370
+ "epoch": 1.036142901868021,
26371
+ "grad_norm": 2.3293237686157227,
26372
+ "learning_rate": 9.455295099129563e-05,
26373
+ "loss": 2.5112,
26374
+ "step": 3723
26375
+ },
26376
+ {
26377
+ "epoch": 1.0364211917765331,
26378
+ "grad_norm": 2.709442377090454,
26379
+ "learning_rate": 9.450924343895428e-05,
26380
+ "loss": 3.0548,
26381
+ "step": 3724
26382
+ },
26383
+ {
26384
+ "epoch": 1.0366994816850454,
26385
+ "grad_norm": 2.5723798274993896,
26386
+ "learning_rate": 9.446553693868633e-05,
26387
+ "loss": 2.7705,
26388
+ "step": 3725
26389
+ },
26390
+ {
26391
+ "epoch": 1.0369777715935575,
26392
+ "grad_norm": 2.4315478801727295,
26393
+ "learning_rate": 9.442183149886627e-05,
26394
+ "loss": 2.7446,
26395
+ "step": 3726
26396
+ },
26397
+ {
26398
+ "epoch": 1.0372560615020698,
26399
+ "grad_norm": 2.1737117767333984,
26400
+ "learning_rate": 9.437812712786844e-05,
26401
+ "loss": 2.534,
26402
+ "step": 3727
26403
+ },
26404
+ {
26405
+ "epoch": 1.037534351410582,
26406
+ "grad_norm": 2.1455442905426025,
26407
+ "learning_rate": 9.433442383406696e-05,
26408
+ "loss": 2.3731,
26409
+ "step": 3728
26410
+ },
26411
+ {
26412
+ "epoch": 1.0378126413190942,
26413
+ "grad_norm": 2.4659476280212402,
26414
+ "learning_rate": 9.429072162583567e-05,
26415
+ "loss": 2.3374,
26416
+ "step": 3729
26417
+ },
26418
+ {
26419
+ "epoch": 1.0380909312276063,
26420
+ "grad_norm": 2.5009284019470215,
26421
+ "learning_rate": 9.424702051154836e-05,
26422
+ "loss": 2.6745,
26423
+ "step": 3730
26424
+ },
26425
+ {
26426
+ "epoch": 1.0383692211361186,
26427
+ "grad_norm": 2.334843158721924,
26428
+ "learning_rate": 9.420332049957846e-05,
26429
+ "loss": 2.3093,
26430
+ "step": 3731
26431
+ },
26432
+ {
26433
+ "epoch": 1.0386475110446307,
26434
+ "grad_norm": 2.483473539352417,
26435
+ "learning_rate": 9.415962159829926e-05,
26436
+ "loss": 2.4984,
26437
+ "step": 3732
26438
+ },
26439
+ {
26440
+ "epoch": 1.038925800953143,
26441
+ "grad_norm": 2.4551875591278076,
26442
+ "learning_rate": 9.411592381608381e-05,
26443
+ "loss": 2.423,
26444
+ "step": 3733
26445
+ },
26446
+ {
26447
+ "epoch": 1.039204090861655,
26448
+ "grad_norm": 2.400089740753174,
26449
+ "learning_rate": 9.407222716130499e-05,
26450
+ "loss": 2.0407,
26451
+ "step": 3734
26452
+ },
26453
+ {
26454
+ "epoch": 1.0394823807701674,
26455
+ "grad_norm": 2.617560863494873,
26456
+ "learning_rate": 9.402853164233538e-05,
26457
+ "loss": 2.8515,
26458
+ "step": 3735
26459
+ },
26460
+ {
26461
+ "epoch": 1.0397606706786795,
26462
+ "grad_norm": 2.454972505569458,
26463
+ "learning_rate": 9.398483726754746e-05,
26464
+ "loss": 2.3012,
26465
+ "step": 3736
26466
+ },
26467
+ {
26468
+ "epoch": 1.0400389605871918,
26469
+ "grad_norm": 2.8407280445098877,
26470
+ "learning_rate": 9.394114404531338e-05,
26471
+ "loss": 2.7096,
26472
+ "step": 3737
26473
+ },
26474
+ {
26475
+ "epoch": 1.040317250495704,
26476
+ "grad_norm": 2.6554818153381348,
26477
+ "learning_rate": 9.389745198400513e-05,
26478
+ "loss": 2.7251,
26479
+ "step": 3738
26480
+ },
26481
+ {
26482
+ "epoch": 1.0405955404042162,
26483
+ "grad_norm": 2.69405460357666,
26484
+ "learning_rate": 9.385376109199448e-05,
26485
+ "loss": 2.3692,
26486
+ "step": 3739
26487
+ },
26488
+ {
26489
+ "epoch": 1.0408738303127283,
26490
+ "grad_norm": 2.626368761062622,
26491
+ "learning_rate": 9.381007137765292e-05,
26492
+ "loss": 2.6053,
26493
+ "step": 3740
26494
+ },
26495
+ {
26496
+ "epoch": 1.0411521202212404,
26497
+ "grad_norm": 2.5650789737701416,
26498
+ "learning_rate": 9.376638284935176e-05,
26499
+ "loss": 2.7401,
26500
+ "step": 3741
26501
+ },
26502
+ {
26503
+ "epoch": 1.0414304101297527,
26504
+ "grad_norm": 2.240908622741699,
26505
+ "learning_rate": 9.372269551546211e-05,
26506
+ "loss": 2.4553,
26507
+ "step": 3742
26508
+ },
26509
+ {
26510
+ "epoch": 1.0417087000382648,
26511
+ "grad_norm": 2.357799530029297,
26512
+ "learning_rate": 9.367900938435479e-05,
26513
+ "loss": 2.818,
26514
+ "step": 3743
26515
+ },
26516
+ {
26517
+ "epoch": 1.041986989946777,
26518
+ "grad_norm": 2.7151036262512207,
26519
+ "learning_rate": 9.36353244644004e-05,
26520
+ "loss": 2.7159,
26521
+ "step": 3744
26522
+ },
26523
+ {
26524
+ "epoch": 1.0422652798552892,
26525
+ "grad_norm": 2.4492530822753906,
26526
+ "learning_rate": 9.359164076396937e-05,
26527
+ "loss": 2.4194,
26528
+ "step": 3745
26529
+ },
26530
+ {
26531
+ "epoch": 1.0425435697638015,
26532
+ "grad_norm": 2.7243385314941406,
26533
+ "learning_rate": 9.354795829143182e-05,
26534
+ "loss": 2.739,
26535
+ "step": 3746
26536
+ },
26537
+ {
26538
+ "epoch": 1.0428218596723136,
26539
+ "grad_norm": 2.5599193572998047,
26540
+ "learning_rate": 9.350427705515766e-05,
26541
+ "loss": 2.6885,
26542
+ "step": 3747
26543
+ },
26544
+ {
26545
+ "epoch": 1.0431001495808259,
26546
+ "grad_norm": 2.4527359008789062,
26547
+ "learning_rate": 9.346059706351659e-05,
26548
+ "loss": 2.5548,
26549
+ "step": 3748
26550
+ },
26551
+ {
26552
+ "epoch": 1.043378439489338,
26553
+ "grad_norm": 2.3975369930267334,
26554
+ "learning_rate": 9.341691832487804e-05,
26555
+ "loss": 2.4877,
26556
+ "step": 3749
26557
+ },
26558
+ {
26559
+ "epoch": 1.0436567293978503,
26560
+ "grad_norm": 2.267547369003296,
26561
+ "learning_rate": 9.337324084761118e-05,
26562
+ "loss": 2.2493,
26563
+ "step": 3750
26564
+ },
26565
+ {
26566
+ "epoch": 1.0439350193063623,
26567
+ "grad_norm": 2.2563583850860596,
26568
+ "learning_rate": 9.332956464008503e-05,
26569
+ "loss": 2.4411,
26570
+ "step": 3751
26571
+ },
26572
+ {
26573
+ "epoch": 1.0442133092148747,
26574
+ "grad_norm": 2.401918649673462,
26575
+ "learning_rate": 9.328588971066827e-05,
26576
+ "loss": 2.8112,
26577
+ "step": 3752
26578
+ },
26579
+ {
26580
+ "epoch": 1.0444915991233867,
26581
+ "grad_norm": 2.5725367069244385,
26582
+ "learning_rate": 9.324221606772935e-05,
26583
+ "loss": 2.5735,
26584
+ "step": 3753
26585
+ },
26586
+ {
26587
+ "epoch": 1.044769889031899,
26588
+ "grad_norm": 2.5067310333251953,
26589
+ "learning_rate": 9.319854371963653e-05,
26590
+ "loss": 2.4901,
26591
+ "step": 3754
26592
+ },
26593
+ {
26594
+ "epoch": 1.0450481789404111,
26595
+ "grad_norm": 2.332139730453491,
26596
+ "learning_rate": 9.315487267475777e-05,
26597
+ "loss": 2.3954,
26598
+ "step": 3755
26599
+ },
26600
+ {
26601
+ "epoch": 1.0453264688489234,
26602
+ "grad_norm": 2.5820467472076416,
26603
+ "learning_rate": 9.311120294146078e-05,
26604
+ "loss": 2.685,
26605
+ "step": 3756
26606
+ },
26607
+ {
26608
+ "epoch": 1.0456047587574355,
26609
+ "grad_norm": 2.7918787002563477,
26610
+ "learning_rate": 9.306753452811308e-05,
26611
+ "loss": 2.8996,
26612
+ "step": 3757
26613
+ },
26614
+ {
26615
+ "epoch": 1.0458830486659478,
26616
+ "grad_norm": 2.6018383502960205,
26617
+ "learning_rate": 9.302386744308185e-05,
26618
+ "loss": 2.6813,
26619
+ "step": 3758
26620
+ },
26621
+ {
26622
+ "epoch": 1.04616133857446,
26623
+ "grad_norm": 2.2214136123657227,
26624
+ "learning_rate": 9.298020169473402e-05,
26625
+ "loss": 2.5446,
26626
+ "step": 3759
26627
+ },
26628
+ {
26629
+ "epoch": 1.0464396284829722,
26630
+ "grad_norm": 2.195864677429199,
26631
+ "learning_rate": 9.293653729143636e-05,
26632
+ "loss": 2.3378,
26633
+ "step": 3760
26634
+ },
26635
+ {
26636
+ "epoch": 1.0467179183914843,
26637
+ "grad_norm": 2.4155166149139404,
26638
+ "learning_rate": 9.289287424155536e-05,
26639
+ "loss": 2.5393,
26640
+ "step": 3761
26641
+ },
26642
+ {
26643
+ "epoch": 1.0469962082999966,
26644
+ "grad_norm": 2.280897617340088,
26645
+ "learning_rate": 9.284921255345715e-05,
26646
+ "loss": 2.472,
26647
+ "step": 3762
26648
+ },
26649
+ {
26650
+ "epoch": 1.0472744982085087,
26651
+ "grad_norm": 2.2828261852264404,
26652
+ "learning_rate": 9.280555223550767e-05,
26653
+ "loss": 2.4396,
26654
+ "step": 3763
26655
+ },
26656
+ {
26657
+ "epoch": 1.047552788117021,
26658
+ "grad_norm": 2.5520870685577393,
26659
+ "learning_rate": 9.27618932960726e-05,
26660
+ "loss": 2.5667,
26661
+ "step": 3764
26662
+ },
26663
+ {
26664
+ "epoch": 1.047831078025533,
26665
+ "grad_norm": 2.394723892211914,
26666
+ "learning_rate": 9.271823574351736e-05,
26667
+ "loss": 2.5952,
26668
+ "step": 3765
26669
+ },
26670
+ {
26671
+ "epoch": 1.0481093679340452,
26672
+ "grad_norm": 2.639838695526123,
26673
+ "learning_rate": 9.267457958620703e-05,
26674
+ "loss": 2.7062,
26675
+ "step": 3766
26676
+ },
26677
+ {
26678
+ "epoch": 1.0483876578425575,
26679
+ "grad_norm": 2.5440590381622314,
26680
+ "learning_rate": 9.263092483250657e-05,
26681
+ "loss": 2.5498,
26682
+ "step": 3767
26683
+ },
26684
+ {
26685
+ "epoch": 1.0486659477510696,
26686
+ "grad_norm": 2.683584213256836,
26687
+ "learning_rate": 9.258727149078055e-05,
26688
+ "loss": 2.6906,
26689
+ "step": 3768
26690
+ },
26691
+ {
26692
+ "epoch": 1.048944237659582,
26693
+ "grad_norm": 2.7254245281219482,
26694
+ "learning_rate": 9.254361956939327e-05,
26695
+ "loss": 2.6168,
26696
+ "step": 3769
26697
+ },
26698
+ {
26699
+ "epoch": 1.049222527568094,
26700
+ "grad_norm": 2.4922702312469482,
26701
+ "learning_rate": 9.249996907670881e-05,
26702
+ "loss": 2.7162,
26703
+ "step": 3770
26704
+ },
26705
+ {
26706
+ "epoch": 1.0495008174766063,
26707
+ "grad_norm": 2.6056606769561768,
26708
+ "learning_rate": 9.245632002109099e-05,
26709
+ "loss": 2.4848,
26710
+ "step": 3771
26711
+ },
26712
+ {
26713
+ "epoch": 1.0497791073851184,
26714
+ "grad_norm": 2.89192271232605,
26715
+ "learning_rate": 9.241267241090327e-05,
26716
+ "loss": 2.6764,
26717
+ "step": 3772
26718
+ },
26719
+ {
26720
+ "epoch": 1.0500573972936307,
26721
+ "grad_norm": 2.1888632774353027,
26722
+ "learning_rate": 9.236902625450893e-05,
26723
+ "loss": 2.3813,
26724
+ "step": 3773
26725
+ },
26726
+ {
26727
+ "epoch": 1.0503356872021428,
26728
+ "grad_norm": 2.4971957206726074,
26729
+ "learning_rate": 9.232538156027091e-05,
26730
+ "loss": 2.3947,
26731
+ "step": 3774
26732
+ },
26733
+ {
26734
+ "epoch": 1.050613977110655,
26735
+ "grad_norm": 2.0770082473754883,
26736
+ "learning_rate": 9.228173833655186e-05,
26737
+ "loss": 2.0168,
26738
+ "step": 3775
26739
+ },
26740
+ {
26741
+ "epoch": 1.0508922670191672,
26742
+ "grad_norm": 2.418344497680664,
26743
+ "learning_rate": 9.223809659171423e-05,
26744
+ "loss": 2.7327,
26745
+ "step": 3776
26746
+ },
26747
+ {
26748
+ "epoch": 1.0511705569276795,
26749
+ "grad_norm": 2.1940460205078125,
26750
+ "learning_rate": 9.21944563341201e-05,
26751
+ "loss": 2.3535,
26752
+ "step": 3777
26753
+ },
26754
+ {
26755
+ "epoch": 1.0514488468361916,
26756
+ "grad_norm": 2.1863420009613037,
26757
+ "learning_rate": 9.215081757213127e-05,
26758
+ "loss": 2.357,
26759
+ "step": 3778
26760
+ },
26761
+ {
26762
+ "epoch": 1.0517271367447039,
26763
+ "grad_norm": 2.4358723163604736,
26764
+ "learning_rate": 9.210718031410934e-05,
26765
+ "loss": 2.5809,
26766
+ "step": 3779
26767
+ },
26768
+ {
26769
+ "epoch": 1.052005426653216,
26770
+ "grad_norm": 2.3010590076446533,
26771
+ "learning_rate": 9.206354456841551e-05,
26772
+ "loss": 2.4569,
26773
+ "step": 3780
26774
+ },
26775
+ {
26776
+ "epoch": 1.0522837165617283,
26777
+ "grad_norm": 2.511343002319336,
26778
+ "learning_rate": 9.201991034341075e-05,
26779
+ "loss": 2.6625,
26780
+ "step": 3781
26781
+ },
26782
+ {
26783
+ "epoch": 1.0525620064702403,
26784
+ "grad_norm": 2.421273708343506,
26785
+ "learning_rate": 9.197627764745577e-05,
26786
+ "loss": 2.472,
26787
+ "step": 3782
26788
+ },
26789
+ {
26790
+ "epoch": 1.0528402963787526,
26791
+ "grad_norm": 2.5087594985961914,
26792
+ "learning_rate": 9.193264648891091e-05,
26793
+ "loss": 2.462,
26794
+ "step": 3783
26795
+ },
26796
+ {
26797
+ "epoch": 1.0531185862872647,
26798
+ "grad_norm": 2.3734774589538574,
26799
+ "learning_rate": 9.188901687613624e-05,
26800
+ "loss": 2.6545,
26801
+ "step": 3784
26802
+ },
26803
+ {
26804
+ "epoch": 1.053396876195777,
26805
+ "grad_norm": 2.4143643379211426,
26806
+ "learning_rate": 9.18453888174916e-05,
26807
+ "loss": 2.4521,
26808
+ "step": 3785
26809
+ },
26810
+ {
26811
+ "epoch": 1.0536751661042891,
26812
+ "grad_norm": 2.7103075981140137,
26813
+ "learning_rate": 9.180176232133647e-05,
26814
+ "loss": 2.9208,
26815
+ "step": 3786
26816
+ },
26817
+ {
26818
+ "epoch": 1.0539534560128014,
26819
+ "grad_norm": 2.6463193893432617,
26820
+ "learning_rate": 9.175813739602996e-05,
26821
+ "loss": 2.7337,
26822
+ "step": 3787
26823
+ },
26824
+ {
26825
+ "epoch": 1.0542317459213135,
26826
+ "grad_norm": 2.1172423362731934,
26827
+ "learning_rate": 9.171451404993105e-05,
26828
+ "loss": 2.3709,
26829
+ "step": 3788
26830
+ },
26831
+ {
26832
+ "epoch": 1.0545100358298258,
26833
+ "grad_norm": 2.3747429847717285,
26834
+ "learning_rate": 9.167089229139829e-05,
26835
+ "loss": 2.469,
26836
+ "step": 3789
26837
+ },
26838
+ {
26839
+ "epoch": 1.054788325738338,
26840
+ "grad_norm": 2.7493515014648438,
26841
+ "learning_rate": 9.162727212878996e-05,
26842
+ "loss": 2.7483,
26843
+ "step": 3790
26844
+ },
26845
+ {
26846
+ "epoch": 1.05506661564685,
26847
+ "grad_norm": 2.1751036643981934,
26848
+ "learning_rate": 9.158365357046405e-05,
26849
+ "loss": 2.3784,
26850
+ "step": 3791
26851
+ },
26852
+ {
26853
+ "epoch": 1.0553449055553623,
26854
+ "grad_norm": 2.5898613929748535,
26855
+ "learning_rate": 9.154003662477821e-05,
26856
+ "loss": 2.4678,
26857
+ "step": 3792
26858
+ },
26859
+ {
26860
+ "epoch": 1.0556231954638744,
26861
+ "grad_norm": 2.432840347290039,
26862
+ "learning_rate": 9.14964213000898e-05,
26863
+ "loss": 2.2535,
26864
+ "step": 3793
26865
+ },
26866
+ {
26867
+ "epoch": 1.0559014853723867,
26868
+ "grad_norm": 2.558281183242798,
26869
+ "learning_rate": 9.14528076047559e-05,
26870
+ "loss": 2.8376,
26871
+ "step": 3794
26872
+ },
26873
+ {
26874
+ "epoch": 1.0561797752808988,
26875
+ "grad_norm": 2.4376842975616455,
26876
+ "learning_rate": 9.140919554713323e-05,
26877
+ "loss": 2.5847,
26878
+ "step": 3795
26879
+ },
26880
+ {
26881
+ "epoch": 1.056458065189411,
26882
+ "grad_norm": 2.5773372650146484,
26883
+ "learning_rate": 9.136558513557818e-05,
26884
+ "loss": 2.3371,
26885
+ "step": 3796
26886
+ },
26887
+ {
26888
+ "epoch": 1.0567363550979232,
26889
+ "grad_norm": 2.790679454803467,
26890
+ "learning_rate": 9.132197637844691e-05,
26891
+ "loss": 2.5507,
26892
+ "step": 3797
26893
+ },
26894
+ {
26895
+ "epoch": 1.0570146450064355,
26896
+ "grad_norm": 2.5002150535583496,
26897
+ "learning_rate": 9.12783692840952e-05,
26898
+ "loss": 2.6795,
26899
+ "step": 3798
26900
+ },
26901
+ {
26902
+ "epoch": 1.0572929349149476,
26903
+ "grad_norm": 2.6883461475372314,
26904
+ "learning_rate": 9.123476386087844e-05,
26905
+ "loss": 2.6772,
26906
+ "step": 3799
26907
+ },
26908
+ {
26909
+ "epoch": 1.0575712248234599,
26910
+ "grad_norm": 2.8790204524993896,
26911
+ "learning_rate": 9.119116011715189e-05,
26912
+ "loss": 2.5414,
26913
+ "step": 3800
26914
+ },
26915
+ {
26916
+ "epoch": 1.0575712248234599,
26917
+ "eval_loss": 2.8708136081695557,
26918
+ "eval_runtime": 84.6585,
26919
+ "eval_samples_per_second": 59.061,
26920
+ "eval_steps_per_second": 14.765,
26921
+ "step": 3800
26922
  }
26923
  ],
26924
  "logging_steps": 1,
 
26933
  "early_stopping_threshold": 0.0
26934
  },
26935
  "attributes": {
26936
+ "early_stopping_patience_counter": 2
26937
  }
26938
  },
26939
  "TrainerControl": {
 
26942
  "should_evaluate": false,
26943
  "should_log": false,
26944
  "should_save": true,
26945
+ "should_training_stop": true
26946
  },
26947
  "attributes": {}
26948
  }
26949
  },
26950
+ "total_flos": 2.73962634313728e+17,
26951
  "train_batch_size": 4,
26952
  "trial_name": null,
26953
  "trial_params": null