diaenra commited on
Commit
81afde0
·
verified ·
1 Parent(s): 1fa9105

Training in progress, step 2821, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:707c53915db9354531b39384b83e5b2175a8f2487b5ea87e42d91121aa48743d
3
  size 677271474
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e71acb3aaa1cbdb9a2608831f36ed6bd2ad90dc878ebc4f574cf1abe8b87ef8a
3
  size 677271474
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f9746318154a835528e2eaf78be0ec4fb45e9d1010ac9b9a21dccba38aa1608
3
  size 1354738888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22b9676249e1f23320872babd4fdfdfb7911603e377235deeb7766fd71f1fb3d
3
  size 1354738888
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6263c0f6343e3ef0ca1ac0bc2952e57da5a20203324caf223be31814881238b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed5a074d47e09b0dc453a6efb17fbfeca03488f605d01a314d755493b7d1cbd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93b32f82e7a32d3b56fe4d9485192d8c3429a7f4e8c6baa22aa69b8f99b9cdaf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd6b5f2dd07fcbe6072acae46c112970452c1ec1fd434ff82fc7fc43f301d19
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9316913263045982,
5
  "eval_steps": 500,
6
- "global_step": 2629,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -18410,6 +18410,1350 @@
18410
  "learning_rate": 1.22350548971622e-06,
18411
  "loss": 8.0504,
18412
  "step": 2629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18413
  }
18414
  ],
18415
  "logging_steps": 1,
@@ -18424,12 +19768,12 @@
18424
  "should_evaluate": false,
18425
  "should_log": false,
18426
  "should_save": true,
18427
- "should_training_stop": false
18428
  },
18429
  "attributes": {}
18430
  }
18431
  },
18432
- "total_flos": 9.600165307779318e+17,
18433
  "train_batch_size": 4,
18434
  "trial_name": null,
18435
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9997342074953486,
5
  "eval_steps": 500,
6
+ "global_step": 2821,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
18410
  "learning_rate": 1.22350548971622e-06,
18411
  "loss": 8.0504,
18412
  "step": 2629
18413
+ },
18414
+ {
18415
+ "epoch": 0.9320457163108,
18416
+ "grad_norm": 10.457837104797363,
18417
+ "learning_rate": 1.2108453883011905e-06,
18418
+ "loss": 7.9261,
18419
+ "step": 2630
18420
+ },
18421
+ {
18422
+ "epoch": 0.9324001063170019,
18423
+ "grad_norm": 10.277976989746094,
18424
+ "learning_rate": 1.1982503246958044e-06,
18425
+ "loss": 7.3617,
18426
+ "step": 2631
18427
+ },
18428
+ {
18429
+ "epoch": 0.9327544963232037,
18430
+ "grad_norm": 10.135127067565918,
18431
+ "learning_rate": 1.1857203156897646e-06,
18432
+ "loss": 6.6617,
18433
+ "step": 2632
18434
+ },
18435
+ {
18436
+ "epoch": 0.9331088863294055,
18437
+ "grad_norm": 9.504755973815918,
18438
+ "learning_rate": 1.1732553779860544e-06,
18439
+ "loss": 6.8578,
18440
+ "step": 2633
18441
+ },
18442
+ {
18443
+ "epoch": 0.9334632763356073,
18444
+ "grad_norm": 9.920090675354004,
18445
+ "learning_rate": 1.1608555282009048e-06,
18446
+ "loss": 6.782,
18447
+ "step": 2634
18448
+ },
18449
+ {
18450
+ "epoch": 0.9338176663418092,
18451
+ "grad_norm": 10.654485702514648,
18452
+ "learning_rate": 1.148520782863799e-06,
18453
+ "loss": 7.0663,
18454
+ "step": 2635
18455
+ },
18456
+ {
18457
+ "epoch": 0.934172056348011,
18458
+ "grad_norm": 11.514944076538086,
18459
+ "learning_rate": 1.1362511584174173e-06,
18460
+ "loss": 7.9396,
18461
+ "step": 2636
18462
+ },
18463
+ {
18464
+ "epoch": 0.9345264463542128,
18465
+ "grad_norm": 9.572590827941895,
18466
+ "learning_rate": 1.1240466712176379e-06,
18467
+ "loss": 7.6735,
18468
+ "step": 2637
18469
+ },
18470
+ {
18471
+ "epoch": 0.9348808363604146,
18472
+ "grad_norm": 10.75777816772461,
18473
+ "learning_rate": 1.1119073375335132e-06,
18474
+ "loss": 6.4615,
18475
+ "step": 2638
18476
+ },
18477
+ {
18478
+ "epoch": 0.9352352263666165,
18479
+ "grad_norm": 10.495604515075684,
18480
+ "learning_rate": 1.099833173547249e-06,
18481
+ "loss": 6.7071,
18482
+ "step": 2639
18483
+ },
18484
+ {
18485
+ "epoch": 0.9355896163728182,
18486
+ "grad_norm": 10.387083053588867,
18487
+ "learning_rate": 1.0878241953541478e-06,
18488
+ "loss": 6.2487,
18489
+ "step": 2640
18490
+ },
18491
+ {
18492
+ "epoch": 0.9359440063790201,
18493
+ "grad_norm": 11.0549955368042,
18494
+ "learning_rate": 1.0758804189626492e-06,
18495
+ "loss": 7.5193,
18496
+ "step": 2641
18497
+ },
18498
+ {
18499
+ "epoch": 0.9362983963852219,
18500
+ "grad_norm": 9.914316177368164,
18501
+ "learning_rate": 1.0640018602942614e-06,
18502
+ "loss": 7.0707,
18503
+ "step": 2642
18504
+ },
18505
+ {
18506
+ "epoch": 0.9366527863914238,
18507
+ "grad_norm": 9.522320747375488,
18508
+ "learning_rate": 1.0521885351835625e-06,
18509
+ "loss": 6.3052,
18510
+ "step": 2643
18511
+ },
18512
+ {
18513
+ "epoch": 0.9370071763976255,
18514
+ "grad_norm": 10.41584300994873,
18515
+ "learning_rate": 1.0404404593781559e-06,
18516
+ "loss": 8.4116,
18517
+ "step": 2644
18518
+ },
18519
+ {
18520
+ "epoch": 0.9373615664038274,
18521
+ "grad_norm": 10.059273719787598,
18522
+ "learning_rate": 1.0287576485386864e-06,
18523
+ "loss": 6.749,
18524
+ "step": 2645
18525
+ },
18526
+ {
18527
+ "epoch": 0.9377159564100292,
18528
+ "grad_norm": 11.211931228637695,
18529
+ "learning_rate": 1.0171401182387852e-06,
18530
+ "loss": 7.9731,
18531
+ "step": 2646
18532
+ },
18533
+ {
18534
+ "epoch": 0.9380703464162311,
18535
+ "grad_norm": 10.676755905151367,
18536
+ "learning_rate": 1.005587883965059e-06,
18537
+ "loss": 7.7199,
18538
+ "step": 2647
18539
+ },
18540
+ {
18541
+ "epoch": 0.9384247364224328,
18542
+ "grad_norm": 10.626141548156738,
18543
+ "learning_rate": 9.941009611170838e-07,
18544
+ "loss": 7.326,
18545
+ "step": 2648
18546
+ },
18547
+ {
18548
+ "epoch": 0.9387791264286347,
18549
+ "grad_norm": 11.591911315917969,
18550
+ "learning_rate": 9.826793650073606e-07,
18551
+ "loss": 7.0318,
18552
+ "step": 2649
18553
+ },
18554
+ {
18555
+ "epoch": 0.9391335164348366,
18556
+ "grad_norm": 11.885016441345215,
18557
+ "learning_rate": 9.713231108613274e-07,
18558
+ "loss": 6.9919,
18559
+ "step": 2650
18560
+ },
18561
+ {
18562
+ "epoch": 0.9394879064410384,
18563
+ "grad_norm": 6.760797500610352,
18564
+ "learning_rate": 9.600322138172968e-07,
18565
+ "loss": 9.4879,
18566
+ "step": 2651
18567
+ },
18568
+ {
18569
+ "epoch": 0.9398422964472402,
18570
+ "grad_norm": 7.657442569732666,
18571
+ "learning_rate": 9.488066889264624e-07,
18572
+ "loss": 9.0172,
18573
+ "step": 2652
18574
+ },
18575
+ {
18576
+ "epoch": 0.940196686453442,
18577
+ "grad_norm": 8.542561531066895,
18578
+ "learning_rate": 9.376465511528876e-07,
18579
+ "loss": 9.5194,
18580
+ "step": 2653
18581
+ },
18582
+ {
18583
+ "epoch": 0.9405510764596439,
18584
+ "grad_norm": 8.692965507507324,
18585
+ "learning_rate": 9.265518153734498e-07,
18586
+ "loss": 8.0484,
18587
+ "step": 2654
18588
+ },
18589
+ {
18590
+ "epoch": 0.9409054664658456,
18591
+ "grad_norm": 9.451211929321289,
18592
+ "learning_rate": 9.155224963778685e-07,
18593
+ "loss": 8.7355,
18594
+ "step": 2655
18595
+ },
18596
+ {
18597
+ "epoch": 0.9412598564720475,
18598
+ "grad_norm": 9.57262134552002,
18599
+ "learning_rate": 9.045586088686497e-07,
18600
+ "loss": 7.5628,
18601
+ "step": 2656
18602
+ },
18603
+ {
18604
+ "epoch": 0.9416142464782493,
18605
+ "grad_norm": 10.283690452575684,
18606
+ "learning_rate": 8.936601674610634e-07,
18607
+ "loss": 7.0982,
18608
+ "step": 2657
18609
+ },
18610
+ {
18611
+ "epoch": 0.9419686364844512,
18612
+ "grad_norm": 10.907401084899902,
18613
+ "learning_rate": 8.82827186683155e-07,
18614
+ "loss": 8.2248,
18615
+ "step": 2658
18616
+ },
18617
+ {
18618
+ "epoch": 0.9423230264906529,
18619
+ "grad_norm": 12.737578392028809,
18620
+ "learning_rate": 8.720596809757064e-07,
18621
+ "loss": 9.0023,
18622
+ "step": 2659
18623
+ },
18624
+ {
18625
+ "epoch": 0.9426774164968548,
18626
+ "grad_norm": 13.0936279296875,
18627
+ "learning_rate": 8.613576646922083e-07,
18628
+ "loss": 7.4829,
18629
+ "step": 2660
18630
+ },
18631
+ {
18632
+ "epoch": 0.9430318065030566,
18633
+ "grad_norm": 10.905577659606934,
18634
+ "learning_rate": 8.5072115209886e-07,
18635
+ "loss": 7.4625,
18636
+ "step": 2661
18637
+ },
18638
+ {
18639
+ "epoch": 0.9433861965092585,
18640
+ "grad_norm": 8.523266792297363,
18641
+ "learning_rate": 8.401501573745363e-07,
18642
+ "loss": 7.4627,
18643
+ "step": 2662
18644
+ },
18645
+ {
18646
+ "epoch": 0.9437405865154602,
18647
+ "grad_norm": 9.642807960510254,
18648
+ "learning_rate": 8.296446946107817e-07,
18649
+ "loss": 8.6972,
18650
+ "step": 2663
18651
+ },
18652
+ {
18653
+ "epoch": 0.9440949765216621,
18654
+ "grad_norm": 9.578781127929688,
18655
+ "learning_rate": 8.192047778117828e-07,
18656
+ "loss": 7.3822,
18657
+ "step": 2664
18658
+ },
18659
+ {
18660
+ "epoch": 0.9444493665278639,
18661
+ "grad_norm": 9.447463989257812,
18662
+ "learning_rate": 8.088304208943409e-07,
18663
+ "loss": 8.0613,
18664
+ "step": 2665
18665
+ },
18666
+ {
18667
+ "epoch": 0.9448037565340658,
18668
+ "grad_norm": 8.89419937133789,
18669
+ "learning_rate": 7.985216376878823e-07,
18670
+ "loss": 7.4979,
18671
+ "step": 2666
18672
+ },
18673
+ {
18674
+ "epoch": 0.9451581465402675,
18675
+ "grad_norm": 8.579427719116211,
18676
+ "learning_rate": 7.882784419343925e-07,
18677
+ "loss": 8.4803,
18678
+ "step": 2667
18679
+ },
18680
+ {
18681
+ "epoch": 0.9455125365464694,
18682
+ "grad_norm": 9.972301483154297,
18683
+ "learning_rate": 7.781008472884543e-07,
18684
+ "loss": 8.2678,
18685
+ "step": 2668
18686
+ },
18687
+ {
18688
+ "epoch": 0.9458669265526712,
18689
+ "grad_norm": 9.519344329833984,
18690
+ "learning_rate": 7.679888673171875e-07,
18691
+ "loss": 8.1661,
18692
+ "step": 2669
18693
+ },
18694
+ {
18695
+ "epoch": 0.9462213165588731,
18696
+ "grad_norm": 10.169025421142578,
18697
+ "learning_rate": 7.579425155002484e-07,
18698
+ "loss": 7.7365,
18699
+ "step": 2670
18700
+ },
18701
+ {
18702
+ "epoch": 0.9465757065650748,
18703
+ "grad_norm": 9.821728706359863,
18704
+ "learning_rate": 7.479618052298132e-07,
18705
+ "loss": 7.0132,
18706
+ "step": 2671
18707
+ },
18708
+ {
18709
+ "epoch": 0.9469300965712767,
18710
+ "grad_norm": 10.060563087463379,
18711
+ "learning_rate": 7.380467498105448e-07,
18712
+ "loss": 8.3833,
18713
+ "step": 2672
18714
+ },
18715
+ {
18716
+ "epoch": 0.9472844865774785,
18717
+ "grad_norm": 10.54714584350586,
18718
+ "learning_rate": 7.281973624595928e-07,
18719
+ "loss": 8.1672,
18720
+ "step": 2673
18721
+ },
18722
+ {
18723
+ "epoch": 0.9476388765836803,
18724
+ "grad_norm": 9.718517303466797,
18725
+ "learning_rate": 7.184136563065714e-07,
18726
+ "loss": 8.3017,
18727
+ "step": 2674
18728
+ },
18729
+ {
18730
+ "epoch": 0.9479932665898821,
18731
+ "grad_norm": 9.251363754272461,
18732
+ "learning_rate": 7.086956443935255e-07,
18733
+ "loss": 6.7159,
18734
+ "step": 2675
18735
+ },
18736
+ {
18737
+ "epoch": 0.948347656596084,
18738
+ "grad_norm": 10.097101211547852,
18739
+ "learning_rate": 6.990433396749429e-07,
18740
+ "loss": 7.9204,
18741
+ "step": 2676
18742
+ },
18743
+ {
18744
+ "epoch": 0.9487020466022859,
18745
+ "grad_norm": 9.57497501373291,
18746
+ "learning_rate": 6.894567550177145e-07,
18747
+ "loss": 8.0954,
18748
+ "step": 2677
18749
+ },
18750
+ {
18751
+ "epoch": 0.9490564366084876,
18752
+ "grad_norm": 9.721705436706543,
18753
+ "learning_rate": 6.799359032011343e-07,
18754
+ "loss": 7.5147,
18755
+ "step": 2678
18756
+ },
18757
+ {
18758
+ "epoch": 0.9494108266146895,
18759
+ "grad_norm": 9.056941986083984,
18760
+ "learning_rate": 6.704807969168447e-07,
18761
+ "loss": 7.4122,
18762
+ "step": 2679
18763
+ },
18764
+ {
18765
+ "epoch": 0.9497652166208913,
18766
+ "grad_norm": 9.660868644714355,
18767
+ "learning_rate": 6.610914487688691e-07,
18768
+ "loss": 8.4362,
18769
+ "step": 2680
18770
+ },
18771
+ {
18772
+ "epoch": 0.9501196066270932,
18773
+ "grad_norm": 8.478535652160645,
18774
+ "learning_rate": 6.517678712735786e-07,
18775
+ "loss": 6.5174,
18776
+ "step": 2681
18777
+ },
18778
+ {
18779
+ "epoch": 0.9504739966332949,
18780
+ "grad_norm": 10.852248191833496,
18781
+ "learning_rate": 6.425100768596481e-07,
18782
+ "loss": 7.8922,
18783
+ "step": 2682
18784
+ },
18785
+ {
18786
+ "epoch": 0.9508283866394968,
18787
+ "grad_norm": 10.073144912719727,
18788
+ "learning_rate": 6.333180778680725e-07,
18789
+ "loss": 7.5269,
18790
+ "step": 2683
18791
+ },
18792
+ {
18793
+ "epoch": 0.9511827766456986,
18794
+ "grad_norm": 8.856159210205078,
18795
+ "learning_rate": 6.241918865521446e-07,
18796
+ "loss": 7.8673,
18797
+ "step": 2684
18798
+ },
18799
+ {
18800
+ "epoch": 0.9515371666519005,
18801
+ "grad_norm": 10.096376419067383,
18802
+ "learning_rate": 6.151315150774162e-07,
18803
+ "loss": 7.0726,
18804
+ "step": 2685
18805
+ },
18806
+ {
18807
+ "epoch": 0.9518915566581022,
18808
+ "grad_norm": 10.148407936096191,
18809
+ "learning_rate": 6.06136975521715e-07,
18810
+ "loss": 7.3849,
18811
+ "step": 2686
18812
+ },
18813
+ {
18814
+ "epoch": 0.9522459466643041,
18815
+ "grad_norm": 10.342374801635742,
18816
+ "learning_rate": 5.972082798751056e-07,
18817
+ "loss": 7.6771,
18818
+ "step": 2687
18819
+ },
18820
+ {
18821
+ "epoch": 0.9526003366705059,
18822
+ "grad_norm": 10.118182182312012,
18823
+ "learning_rate": 5.883454400398891e-07,
18824
+ "loss": 8.3771,
18825
+ "step": 2688
18826
+ },
18827
+ {
18828
+ "epoch": 0.9529547266767077,
18829
+ "grad_norm": 9.883116722106934,
18830
+ "learning_rate": 5.795484678305541e-07,
18831
+ "loss": 7.5366,
18832
+ "step": 2689
18833
+ },
18834
+ {
18835
+ "epoch": 0.9533091166829095,
18836
+ "grad_norm": 10.128074645996094,
18837
+ "learning_rate": 5.708173749738143e-07,
18838
+ "loss": 7.0381,
18839
+ "step": 2690
18840
+ },
18841
+ {
18842
+ "epoch": 0.9536635066891114,
18843
+ "grad_norm": 10.316349983215332,
18844
+ "learning_rate": 5.621521731085433e-07,
18845
+ "loss": 7.694,
18846
+ "step": 2691
18847
+ },
18848
+ {
18849
+ "epoch": 0.9540178966953132,
18850
+ "grad_norm": 10.563957214355469,
18851
+ "learning_rate": 5.535528737857898e-07,
18852
+ "loss": 7.7825,
18853
+ "step": 2692
18854
+ },
18855
+ {
18856
+ "epoch": 0.954372286701515,
18857
+ "grad_norm": 10.890534400939941,
18858
+ "learning_rate": 5.450194884687509e-07,
18859
+ "loss": 6.7118,
18860
+ "step": 2693
18861
+ },
18862
+ {
18863
+ "epoch": 0.9547266767077168,
18864
+ "grad_norm": 9.884705543518066,
18865
+ "learning_rate": 5.365520285327441e-07,
18866
+ "loss": 5.9642,
18867
+ "step": 2694
18868
+ },
18869
+ {
18870
+ "epoch": 0.9550810667139187,
18871
+ "grad_norm": 10.200454711914062,
18872
+ "learning_rate": 5.281505052652347e-07,
18873
+ "loss": 7.461,
18874
+ "step": 2695
18875
+ },
18876
+ {
18877
+ "epoch": 0.9554354567201205,
18878
+ "grad_norm": 10.992594718933105,
18879
+ "learning_rate": 5.19814929865764e-07,
18880
+ "loss": 7.0572,
18881
+ "step": 2696
18882
+ },
18883
+ {
18884
+ "epoch": 0.9557898467263223,
18885
+ "grad_norm": 11.103619575500488,
18886
+ "learning_rate": 5.115453134459769e-07,
18887
+ "loss": 6.8733,
18888
+ "step": 2697
18889
+ },
18890
+ {
18891
+ "epoch": 0.9561442367325241,
18892
+ "grad_norm": 11.309475898742676,
18893
+ "learning_rate": 5.033416670295832e-07,
18894
+ "loss": 8.2346,
18895
+ "step": 2698
18896
+ },
18897
+ {
18898
+ "epoch": 0.956498626738726,
18899
+ "grad_norm": 11.035356521606445,
18900
+ "learning_rate": 4.952040015523629e-07,
18901
+ "loss": 6.114,
18902
+ "step": 2699
18903
+ },
18904
+ {
18905
+ "epoch": 0.9568530167449278,
18906
+ "grad_norm": 12.30701732635498,
18907
+ "learning_rate": 4.871323278621331e-07,
18908
+ "loss": 7.3802,
18909
+ "step": 2700
18910
+ },
18911
+ {
18912
+ "epoch": 0.9572074067511296,
18913
+ "grad_norm": 6.6567301750183105,
18914
+ "learning_rate": 4.791266567187424e-07,
18915
+ "loss": 8.8847,
18916
+ "step": 2701
18917
+ },
18918
+ {
18919
+ "epoch": 0.9575617967573314,
18920
+ "grad_norm": 7.835629940032959,
18921
+ "learning_rate": 4.711869987940598e-07,
18922
+ "loss": 8.6423,
18923
+ "step": 2702
18924
+ },
18925
+ {
18926
+ "epoch": 0.9579161867635333,
18927
+ "grad_norm": 9.732732772827148,
18928
+ "learning_rate": 4.633133646719523e-07,
18929
+ "loss": 10.0649,
18930
+ "step": 2703
18931
+ },
18932
+ {
18933
+ "epoch": 0.958270576769735,
18934
+ "grad_norm": 9.80262279510498,
18935
+ "learning_rate": 4.5550576484827414e-07,
18936
+ "loss": 9.8086,
18937
+ "step": 2704
18938
+ },
18939
+ {
18940
+ "epoch": 0.9586249667759369,
18941
+ "grad_norm": 9.661641120910645,
18942
+ "learning_rate": 4.477642097308499e-07,
18943
+ "loss": 8.6068,
18944
+ "step": 2705
18945
+ },
18946
+ {
18947
+ "epoch": 0.9589793567821387,
18948
+ "grad_norm": 9.993663787841797,
18949
+ "learning_rate": 4.400887096394801e-07,
18950
+ "loss": 7.7553,
18951
+ "step": 2706
18952
+ },
18953
+ {
18954
+ "epoch": 0.9593337467883406,
18955
+ "grad_norm": 11.445382118225098,
18956
+ "learning_rate": 4.324792748058915e-07,
18957
+ "loss": 8.1653,
18958
+ "step": 2707
18959
+ },
18960
+ {
18961
+ "epoch": 0.9596881367945423,
18962
+ "grad_norm": 11.129366874694824,
18963
+ "learning_rate": 4.249359153737531e-07,
18964
+ "loss": 6.551,
18965
+ "step": 2708
18966
+ },
18967
+ {
18968
+ "epoch": 0.9600425268007442,
18969
+ "grad_norm": 11.85264778137207,
18970
+ "learning_rate": 4.1745864139865476e-07,
18971
+ "loss": 7.6299,
18972
+ "step": 2709
18973
+ },
18974
+ {
18975
+ "epoch": 0.960396916806946,
18976
+ "grad_norm": 12.840014457702637,
18977
+ "learning_rate": 4.100474628480844e-07,
18978
+ "loss": 8.1851,
18979
+ "step": 2710
18980
+ },
18981
+ {
18982
+ "epoch": 0.9607513068131479,
18983
+ "grad_norm": 10.203207015991211,
18984
+ "learning_rate": 4.0270238960142813e-07,
18985
+ "loss": 7.9981,
18986
+ "step": 2711
18987
+ },
18988
+ {
18989
+ "epoch": 0.9611056968193497,
18990
+ "grad_norm": 9.188422203063965,
18991
+ "learning_rate": 3.954234314499539e-07,
18992
+ "loss": 7.931,
18993
+ "step": 2712
18994
+ },
18995
+ {
18996
+ "epoch": 0.9614600868255515,
18997
+ "grad_norm": 9.173449516296387,
18998
+ "learning_rate": 3.8821059809678315e-07,
18999
+ "loss": 8.5163,
19000
+ "step": 2713
19001
+ },
19002
+ {
19003
+ "epoch": 0.9618144768317534,
19004
+ "grad_norm": 9.872574806213379,
19005
+ "learning_rate": 3.8106389915690264e-07,
19006
+ "loss": 7.8934,
19007
+ "step": 2714
19008
+ },
19009
+ {
19010
+ "epoch": 0.9621688668379552,
19011
+ "grad_norm": 8.945125579833984,
19012
+ "learning_rate": 3.7398334415714163e-07,
19013
+ "loss": 8.1885,
19014
+ "step": 2715
19015
+ },
19016
+ {
19017
+ "epoch": 0.962523256844157,
19018
+ "grad_norm": 10.056449890136719,
19019
+ "learning_rate": 3.6696894253614447e-07,
19020
+ "loss": 7.7838,
19021
+ "step": 2716
19022
+ },
19023
+ {
19024
+ "epoch": 0.9628776468503588,
19025
+ "grad_norm": 9.586847305297852,
19026
+ "learning_rate": 3.600207036443759e-07,
19027
+ "loss": 7.3825,
19028
+ "step": 2717
19029
+ },
19030
+ {
19031
+ "epoch": 0.9632320368565607,
19032
+ "grad_norm": 9.794684410095215,
19033
+ "learning_rate": 3.5313863674410476e-07,
19034
+ "loss": 8.0346,
19035
+ "step": 2718
19036
+ },
19037
+ {
19038
+ "epoch": 0.9635864268627625,
19039
+ "grad_norm": 9.196282386779785,
19040
+ "learning_rate": 3.463227510093925e-07,
19041
+ "loss": 6.8752,
19042
+ "step": 2719
19043
+ },
19044
+ {
19045
+ "epoch": 0.9639408168689643,
19046
+ "grad_norm": 9.417471885681152,
19047
+ "learning_rate": 3.3957305552607123e-07,
19048
+ "loss": 7.9057,
19049
+ "step": 2720
19050
+ },
19051
+ {
19052
+ "epoch": 0.9642952068751661,
19053
+ "grad_norm": 9.390932083129883,
19054
+ "learning_rate": 3.328895592917491e-07,
19055
+ "loss": 8.1303,
19056
+ "step": 2721
19057
+ },
19058
+ {
19059
+ "epoch": 0.964649596881368,
19060
+ "grad_norm": 9.340128898620605,
19061
+ "learning_rate": 3.262722712157773e-07,
19062
+ "loss": 7.6802,
19063
+ "step": 2722
19064
+ },
19065
+ {
19066
+ "epoch": 0.9650039868875697,
19067
+ "grad_norm": 10.40151309967041,
19068
+ "learning_rate": 3.197212001192551e-07,
19069
+ "loss": 7.4673,
19070
+ "step": 2723
19071
+ },
19072
+ {
19073
+ "epoch": 0.9653583768937716,
19074
+ "grad_norm": 9.127900123596191,
19075
+ "learning_rate": 3.1323635473501366e-07,
19076
+ "loss": 7.2802,
19077
+ "step": 2724
19078
+ },
19079
+ {
19080
+ "epoch": 0.9657127668999734,
19081
+ "grad_norm": 9.510401725769043,
19082
+ "learning_rate": 3.0681774370759365e-07,
19083
+ "loss": 8.0888,
19084
+ "step": 2725
19085
+ },
19086
+ {
19087
+ "epoch": 0.9660671569061753,
19088
+ "grad_norm": 10.736952781677246,
19089
+ "learning_rate": 3.0046537559325626e-07,
19090
+ "loss": 8.2639,
19091
+ "step": 2726
19092
+ },
19093
+ {
19094
+ "epoch": 0.966421546912377,
19095
+ "grad_norm": 9.19753646850586,
19096
+ "learning_rate": 2.9417925885994455e-07,
19097
+ "loss": 8.6266,
19098
+ "step": 2727
19099
+ },
19100
+ {
19101
+ "epoch": 0.9667759369185789,
19102
+ "grad_norm": 10.589143753051758,
19103
+ "learning_rate": 2.879594018873e-07,
19104
+ "loss": 7.7698,
19105
+ "step": 2728
19106
+ },
19107
+ {
19108
+ "epoch": 0.9671303269247807,
19109
+ "grad_norm": 10.227514266967773,
19110
+ "learning_rate": 2.81805812966629e-07,
19111
+ "loss": 7.5404,
19112
+ "step": 2729
19113
+ },
19114
+ {
19115
+ "epoch": 0.9674847169309826,
19116
+ "grad_norm": 9.569182395935059,
19117
+ "learning_rate": 2.757185003008922e-07,
19118
+ "loss": 6.8167,
19119
+ "step": 2730
19120
+ },
19121
+ {
19122
+ "epoch": 0.9678391069371843,
19123
+ "grad_norm": 10.21536922454834,
19124
+ "learning_rate": 2.6969747200472075e-07,
19125
+ "loss": 7.0856,
19126
+ "step": 2731
19127
+ },
19128
+ {
19129
+ "epoch": 0.9681934969433862,
19130
+ "grad_norm": 10.365554809570312,
19131
+ "learning_rate": 2.637427361043665e-07,
19132
+ "loss": 7.2888,
19133
+ "step": 2732
19134
+ },
19135
+ {
19136
+ "epoch": 0.968547886949588,
19137
+ "grad_norm": 9.141191482543945,
19138
+ "learning_rate": 2.5785430053772984e-07,
19139
+ "loss": 6.754,
19140
+ "step": 2733
19141
+ },
19142
+ {
19143
+ "epoch": 0.9689022769557899,
19144
+ "grad_norm": 9.845773696899414,
19145
+ "learning_rate": 2.5203217315431517e-07,
19146
+ "loss": 7.6066,
19147
+ "step": 2734
19148
+ },
19149
+ {
19150
+ "epoch": 0.9692566669619916,
19151
+ "grad_norm": 10.204642295837402,
19152
+ "learning_rate": 2.4627636171523635e-07,
19153
+ "loss": 6.6599,
19154
+ "step": 2735
19155
+ },
19156
+ {
19157
+ "epoch": 0.9696110569681935,
19158
+ "grad_norm": 9.55280876159668,
19159
+ "learning_rate": 2.4058687389322266e-07,
19160
+ "loss": 7.4796,
19161
+ "step": 2736
19162
+ },
19163
+ {
19164
+ "epoch": 0.9699654469743954,
19165
+ "grad_norm": 10.470826148986816,
19166
+ "learning_rate": 2.3496371727256829e-07,
19167
+ "loss": 8.3157,
19168
+ "step": 2737
19169
+ },
19170
+ {
19171
+ "epoch": 0.9703198369805971,
19172
+ "grad_norm": 9.35001277923584,
19173
+ "learning_rate": 2.294068993491605e-07,
19174
+ "loss": 6.3077,
19175
+ "step": 2738
19176
+ },
19177
+ {
19178
+ "epoch": 0.970674226986799,
19179
+ "grad_norm": 10.80824089050293,
19180
+ "learning_rate": 2.2391642753044617e-07,
19181
+ "loss": 8.9389,
19182
+ "step": 2739
19183
+ },
19184
+ {
19185
+ "epoch": 0.9710286169930008,
19186
+ "grad_norm": 10.638192176818848,
19187
+ "learning_rate": 2.184923091354374e-07,
19188
+ "loss": 7.5248,
19189
+ "step": 2740
19190
+ },
19191
+ {
19192
+ "epoch": 0.9713830069992027,
19193
+ "grad_norm": 10.541278839111328,
19194
+ "learning_rate": 2.1313455139469474e-07,
19195
+ "loss": 7.765,
19196
+ "step": 2741
19197
+ },
19198
+ {
19199
+ "epoch": 0.9717373970054044,
19200
+ "grad_norm": 10.201549530029297,
19201
+ "learning_rate": 2.0784316145031624e-07,
19202
+ "loss": 7.2592,
19203
+ "step": 2742
19204
+ },
19205
+ {
19206
+ "epoch": 0.9720917870116063,
19207
+ "grad_norm": 9.781977653503418,
19208
+ "learning_rate": 2.0261814635591514e-07,
19209
+ "loss": 7.7688,
19210
+ "step": 2743
19211
+ },
19212
+ {
19213
+ "epoch": 0.9724461770178081,
19214
+ "grad_norm": 10.238073348999023,
19215
+ "learning_rate": 1.974595130766421e-07,
19216
+ "loss": 6.4656,
19217
+ "step": 2744
19218
+ },
19219
+ {
19220
+ "epoch": 0.97280056702401,
19221
+ "grad_norm": 10.194886207580566,
19222
+ "learning_rate": 1.9236726848915754e-07,
19223
+ "loss": 7.2333,
19224
+ "step": 2745
19225
+ },
19226
+ {
19227
+ "epoch": 0.9731549570302117,
19228
+ "grad_norm": 10.862103462219238,
19229
+ "learning_rate": 1.873414193816092e-07,
19230
+ "loss": 6.7863,
19231
+ "step": 2746
19232
+ },
19233
+ {
19234
+ "epoch": 0.9735093470364136,
19235
+ "grad_norm": 10.344438552856445,
19236
+ "learning_rate": 1.8238197245366018e-07,
19237
+ "loss": 7.2087,
19238
+ "step": 2747
19239
+ },
19240
+ {
19241
+ "epoch": 0.9738637370426154,
19242
+ "grad_norm": 10.415502548217773,
19243
+ "learning_rate": 1.7748893431642767e-07,
19244
+ "loss": 6.8113,
19245
+ "step": 2748
19246
+ },
19247
+ {
19248
+ "epoch": 0.9742181270488173,
19249
+ "grad_norm": 11.136659622192383,
19250
+ "learning_rate": 1.72662311492533e-07,
19251
+ "loss": 6.8009,
19252
+ "step": 2749
19253
+ },
19254
+ {
19255
+ "epoch": 0.974572517055019,
19256
+ "grad_norm": 11.401845932006836,
19257
+ "learning_rate": 1.67902110416035e-07,
19258
+ "loss": 5.7257,
19259
+ "step": 2750
19260
+ },
19261
+ {
19262
+ "epoch": 0.9749269070612209,
19263
+ "grad_norm": 6.9431891441345215,
19264
+ "learning_rate": 1.632083374324689e-07,
19265
+ "loss": 9.8697,
19266
+ "step": 2751
19267
+ },
19268
+ {
19269
+ "epoch": 0.9752812970674227,
19270
+ "grad_norm": 7.812088966369629,
19271
+ "learning_rate": 1.5858099879881848e-07,
19272
+ "loss": 9.035,
19273
+ "step": 2752
19274
+ },
19275
+ {
19276
+ "epoch": 0.9756356870736245,
19277
+ "grad_norm": 8.463058471679688,
19278
+ "learning_rate": 1.540201006834996e-07,
19279
+ "loss": 9.1594,
19280
+ "step": 2753
19281
+ },
19282
+ {
19283
+ "epoch": 0.9759900770798263,
19284
+ "grad_norm": 9.573930740356445,
19285
+ "learning_rate": 1.4952564916636546e-07,
19286
+ "loss": 9.2905,
19287
+ "step": 2754
19288
+ },
19289
+ {
19290
+ "epoch": 0.9763444670860282,
19291
+ "grad_norm": 9.488774299621582,
19292
+ "learning_rate": 1.4509765023868472e-07,
19293
+ "loss": 8.1606,
19294
+ "step": 2755
19295
+ },
19296
+ {
19297
+ "epoch": 0.97669885709223,
19298
+ "grad_norm": 11.166428565979004,
19299
+ "learning_rate": 1.4073610980316344e-07,
19300
+ "loss": 8.8016,
19301
+ "step": 2756
19302
+ },
19303
+ {
19304
+ "epoch": 0.9770532470984318,
19305
+ "grad_norm": 11.221932411193848,
19306
+ "learning_rate": 1.364410336738897e-07,
19307
+ "loss": 8.18,
19308
+ "step": 2757
19309
+ },
19310
+ {
19311
+ "epoch": 0.9774076371046336,
19312
+ "grad_norm": 11.63525676727295,
19313
+ "learning_rate": 1.32212427576367e-07,
19314
+ "loss": 7.9339,
19315
+ "step": 2758
19316
+ },
19317
+ {
19318
+ "epoch": 0.9777620271108355,
19319
+ "grad_norm": 9.624247550964355,
19320
+ "learning_rate": 1.2805029714749173e-07,
19321
+ "loss": 6.4656,
19322
+ "step": 2759
19323
+ },
19324
+ {
19325
+ "epoch": 0.9781164171170373,
19326
+ "grad_norm": 12.943864822387695,
19327
+ "learning_rate": 1.239546479355369e-07,
19328
+ "loss": 7.5002,
19329
+ "step": 2760
19330
+ },
19331
+ {
19332
+ "epoch": 0.9784708071232391,
19333
+ "grad_norm": 10.584692001342773,
19334
+ "learning_rate": 1.1992548540016856e-07,
19335
+ "loss": 8.1984,
19336
+ "step": 2761
19337
+ },
19338
+ {
19339
+ "epoch": 0.9788251971294409,
19340
+ "grad_norm": 10.64038372039795,
19341
+ "learning_rate": 1.1596281491241257e-07,
19342
+ "loss": 9.0096,
19343
+ "step": 2762
19344
+ },
19345
+ {
19346
+ "epoch": 0.9791795871356428,
19347
+ "grad_norm": 10.040376663208008,
19348
+ "learning_rate": 1.1206664175465453e-07,
19349
+ "loss": 7.4336,
19350
+ "step": 2763
19351
+ },
19352
+ {
19353
+ "epoch": 0.9795339771418446,
19354
+ "grad_norm": 9.4454345703125,
19355
+ "learning_rate": 1.0823697112064546e-07,
19356
+ "loss": 7.6408,
19357
+ "step": 2764
19358
+ },
19359
+ {
19360
+ "epoch": 0.9798883671480464,
19361
+ "grad_norm": 8.535350799560547,
19362
+ "learning_rate": 1.0447380811548502e-07,
19363
+ "loss": 6.9056,
19364
+ "step": 2765
19365
+ },
19366
+ {
19367
+ "epoch": 0.9802427571542482,
19368
+ "grad_norm": 9.074763298034668,
19369
+ "learning_rate": 1.0077715775561047e-07,
19370
+ "loss": 9.1287,
19371
+ "step": 2766
19372
+ },
19373
+ {
19374
+ "epoch": 0.9805971471604501,
19375
+ "grad_norm": 10.088266372680664,
19376
+ "learning_rate": 9.714702496880224e-08,
19377
+ "loss": 8.1048,
19378
+ "step": 2767
19379
+ },
19380
+ {
19381
+ "epoch": 0.980951537166652,
19382
+ "grad_norm": 9.633566856384277,
19383
+ "learning_rate": 9.35834145941561e-08,
19384
+ "loss": 7.726,
19385
+ "step": 2768
19386
+ },
19387
+ {
19388
+ "epoch": 0.9813059271728537,
19389
+ "grad_norm": 9.597146034240723,
19390
+ "learning_rate": 9.008633138211098e-08,
19391
+ "loss": 7.7658,
19392
+ "step": 2769
19393
+ },
19394
+ {
19395
+ "epoch": 0.9816603171790556,
19396
+ "grad_norm": 8.681363105773926,
19397
+ "learning_rate": 8.66557799944101e-08,
19398
+ "loss": 8.0223,
19399
+ "step": 2770
19400
+ },
19401
+ {
19402
+ "epoch": 0.9820147071852574,
19403
+ "grad_norm": 9.311779022216797,
19404
+ "learning_rate": 8.329176500411206e-08,
19405
+ "loss": 8.4576,
19406
+ "step": 2771
19407
+ },
19408
+ {
19409
+ "epoch": 0.9823690971914592,
19410
+ "grad_norm": 9.054991722106934,
19411
+ "learning_rate": 7.99942908955742e-08,
19412
+ "loss": 8.237,
19413
+ "step": 2772
19414
+ },
19415
+ {
19416
+ "epoch": 0.982723487197661,
19417
+ "grad_norm": 9.217580795288086,
19418
+ "learning_rate": 7.676336206445256e-08,
19419
+ "loss": 8.411,
19420
+ "step": 2773
19421
+ },
19422
+ {
19423
+ "epoch": 0.9830778772038629,
19424
+ "grad_norm": 8.914115905761719,
19425
+ "learning_rate": 7.35989828177075e-08,
19426
+ "loss": 8.1964,
19427
+ "step": 2774
19428
+ },
19429
+ {
19430
+ "epoch": 0.9834322672100647,
19431
+ "grad_norm": 10.451409339904785,
19432
+ "learning_rate": 7.050115737356477e-08,
19433
+ "loss": 7.1974,
19434
+ "step": 2775
19435
+ },
19436
+ {
19437
+ "epoch": 0.9837866572162665,
19438
+ "grad_norm": 8.894097328186035,
19439
+ "learning_rate": 6.746988986156e-08,
19440
+ "loss": 7.4948,
19441
+ "step": 2776
19442
+ },
19443
+ {
19444
+ "epoch": 0.9841410472224683,
19445
+ "grad_norm": 8.992011070251465,
19446
+ "learning_rate": 6.450518432247754e-08,
19447
+ "loss": 7.7269,
19448
+ "step": 2777
19449
+ },
19450
+ {
19451
+ "epoch": 0.9844954372286702,
19452
+ "grad_norm": 9.361895561218262,
19453
+ "learning_rate": 6.160704470838385e-08,
19454
+ "loss": 8.4291,
19455
+ "step": 2778
19456
+ },
19457
+ {
19458
+ "epoch": 0.984849827234872,
19459
+ "grad_norm": 10.004155158996582,
19460
+ "learning_rate": 5.8775474882616365e-08,
19461
+ "loss": 7.9027,
19462
+ "step": 2779
19463
+ },
19464
+ {
19465
+ "epoch": 0.9852042172410738,
19466
+ "grad_norm": 9.721527099609375,
19467
+ "learning_rate": 5.601047861976127e-08,
19468
+ "loss": 8.0284,
19469
+ "step": 2780
19470
+ },
19471
+ {
19472
+ "epoch": 0.9855586072472756,
19473
+ "grad_norm": 11.492663383483887,
19474
+ "learning_rate": 5.3312059605670204e-08,
19475
+ "loss": 8.6191,
19476
+ "step": 2781
19477
+ },
19478
+ {
19479
+ "epoch": 0.9859129972534775,
19480
+ "grad_norm": 9.143500328063965,
19481
+ "learning_rate": 5.06802214374269e-08,
19482
+ "loss": 7.0557,
19483
+ "step": 2782
19484
+ },
19485
+ {
19486
+ "epoch": 0.9862673872596793,
19487
+ "grad_norm": 9.80386734008789,
19488
+ "learning_rate": 4.8114967623380525e-08,
19489
+ "loss": 7.0624,
19490
+ "step": 2783
19491
+ },
19492
+ {
19493
+ "epoch": 0.9866217772658811,
19494
+ "grad_norm": 9.714198112487793,
19495
+ "learning_rate": 4.561630158311792e-08,
19496
+ "loss": 8.0296,
19497
+ "step": 2784
19498
+ },
19499
+ {
19500
+ "epoch": 0.9869761672720829,
19501
+ "grad_norm": 9.890578269958496,
19502
+ "learning_rate": 4.318422664744137e-08,
19503
+ "loss": 7.2464,
19504
+ "step": 2785
19505
+ },
19506
+ {
19507
+ "epoch": 0.9873305572782848,
19508
+ "grad_norm": 10.45261001586914,
19509
+ "learning_rate": 4.081874605841307e-08,
19510
+ "loss": 7.5394,
19511
+ "step": 2786
19512
+ },
19513
+ {
19514
+ "epoch": 0.9876849472844865,
19515
+ "grad_norm": 10.546357154846191,
19516
+ "learning_rate": 3.85198629693051e-08,
19517
+ "loss": 8.9054,
19518
+ "step": 2787
19519
+ },
19520
+ {
19521
+ "epoch": 0.9880393372906884,
19522
+ "grad_norm": 10.089964866638184,
19523
+ "learning_rate": 3.628758044461611e-08,
19524
+ "loss": 7.3345,
19525
+ "step": 2788
19526
+ },
19527
+ {
19528
+ "epoch": 0.9883937272968902,
19529
+ "grad_norm": 10.31136417388916,
19530
+ "learning_rate": 3.412190146006578e-08,
19531
+ "loss": 8.3785,
19532
+ "step": 2789
19533
+ },
19534
+ {
19535
+ "epoch": 0.9887481173030921,
19536
+ "grad_norm": 10.49755859375,
19537
+ "learning_rate": 3.202282890258368e-08,
19538
+ "loss": 6.5573,
19539
+ "step": 2790
19540
+ },
19541
+ {
19542
+ "epoch": 0.9891025073092938,
19543
+ "grad_norm": 10.216747283935547,
19544
+ "learning_rate": 2.9990365570314873e-08,
19545
+ "loss": 7.7684,
19546
+ "step": 2791
19547
+ },
19548
+ {
19549
+ "epoch": 0.9894568973154957,
19550
+ "grad_norm": 10.758793830871582,
19551
+ "learning_rate": 2.8024514172608763e-08,
19552
+ "loss": 7.4823,
19553
+ "step": 2792
19554
+ },
19555
+ {
19556
+ "epoch": 0.9898112873216975,
19557
+ "grad_norm": 11.304996490478516,
19558
+ "learning_rate": 2.612527733002468e-08,
19559
+ "loss": 8.3669,
19560
+ "step": 2793
19561
+ },
19562
+ {
19563
+ "epoch": 0.9901656773278994,
19564
+ "grad_norm": 9.883719444274902,
19565
+ "learning_rate": 2.4292657574320755e-08,
19566
+ "loss": 6.5523,
19567
+ "step": 2794
19568
+ },
19569
+ {
19570
+ "epoch": 0.9905200673341011,
19571
+ "grad_norm": 11.035935401916504,
19572
+ "learning_rate": 2.2526657348442835e-08,
19573
+ "loss": 7.3817,
19574
+ "step": 2795
19575
+ },
19576
+ {
19577
+ "epoch": 0.990874457340303,
19578
+ "grad_norm": 10.151780128479004,
19579
+ "learning_rate": 2.0827279006535582e-08,
19580
+ "loss": 6.5106,
19581
+ "step": 2796
19582
+ },
19583
+ {
19584
+ "epoch": 0.9912288473465048,
19585
+ "grad_norm": 10.024761199951172,
19586
+ "learning_rate": 1.919452481394246e-08,
19587
+ "loss": 6.3903,
19588
+ "step": 2797
19589
+ },
19590
+ {
19591
+ "epoch": 0.9915832373527067,
19592
+ "grad_norm": 11.098858833312988,
19593
+ "learning_rate": 1.7628396947183547e-08,
19594
+ "loss": 7.1063,
19595
+ "step": 2798
19596
+ },
19597
+ {
19598
+ "epoch": 0.9919376273589084,
19599
+ "grad_norm": 11.342761039733887,
19600
+ "learning_rate": 1.612889749396107e-08,
19601
+ "loss": 7.025,
19602
+ "step": 2799
19603
+ },
19604
+ {
19605
+ "epoch": 0.9922920173651103,
19606
+ "grad_norm": 12.230018615722656,
19607
+ "learning_rate": 1.469602845317608e-08,
19608
+ "loss": 7.5984,
19609
+ "step": 2800
19610
+ },
19611
+ {
19612
+ "epoch": 0.9926464073713122,
19613
+ "grad_norm": 7.639358043670654,
19614
+ "learning_rate": 1.3329791734895124e-08,
19615
+ "loss": 8.759,
19616
+ "step": 2801
19617
+ },
19618
+ {
19619
+ "epoch": 0.9930007973775139,
19620
+ "grad_norm": 8.9176607131958,
19621
+ "learning_rate": 1.2030189160355809e-08,
19622
+ "loss": 8.2035,
19623
+ "step": 2802
19624
+ },
19625
+ {
19626
+ "epoch": 0.9933551873837158,
19627
+ "grad_norm": 10.628494262695312,
19628
+ "learning_rate": 1.0797222461988998e-08,
19629
+ "loss": 8.0889,
19630
+ "step": 2803
19631
+ },
19632
+ {
19633
+ "epoch": 0.9937095773899176,
19634
+ "grad_norm": 12.253202438354492,
19635
+ "learning_rate": 9.63089328337996e-09,
19636
+ "loss": 7.7258,
19637
+ "step": 2804
19638
+ },
19639
+ {
19640
+ "epoch": 0.9940639673961195,
19641
+ "grad_norm": 12.697697639465332,
19642
+ "learning_rate": 8.53120317929057e-09,
19643
+ "loss": 8.0472,
19644
+ "step": 2805
19645
+ },
19646
+ {
19647
+ "epoch": 0.9944183574023212,
19648
+ "grad_norm": 9.967103958129883,
19649
+ "learning_rate": 7.498153615653758e-09,
19650
+ "loss": 8.0163,
19651
+ "step": 2806
19652
+ },
19653
+ {
19654
+ "epoch": 0.9947727474085231,
19655
+ "grad_norm": 10.074397087097168,
19656
+ "learning_rate": 6.531745969562408e-09,
19657
+ "loss": 7.7091,
19658
+ "step": 2807
19659
+ },
19660
+ {
19661
+ "epoch": 0.9951271374147249,
19662
+ "grad_norm": 10.248494148254395,
19663
+ "learning_rate": 5.631981529269359e-09,
19664
+ "loss": 7.9076,
19665
+ "step": 2808
19666
+ },
19667
+ {
19668
+ "epoch": 0.9954815274209268,
19669
+ "grad_norm": 9.425408363342285,
19670
+ "learning_rate": 4.798861494204054e-09,
19671
+ "loss": 8.3727,
19672
+ "step": 2809
19673
+ },
19674
+ {
19675
+ "epoch": 0.9958359174271285,
19676
+ "grad_norm": 9.309487342834473,
19677
+ "learning_rate": 4.032386974939239e-09,
19678
+ "loss": 7.7269,
19679
+ "step": 2810
19680
+ },
19681
+ {
19682
+ "epoch": 0.9961903074333304,
19683
+ "grad_norm": 10.345414161682129,
19684
+ "learning_rate": 3.332558993218715e-09,
19685
+ "loss": 7.8104,
19686
+ "step": 2811
19687
+ },
19688
+ {
19689
+ "epoch": 0.9965446974395322,
19690
+ "grad_norm": 9.41676139831543,
19691
+ "learning_rate": 2.699378481940684e-09,
19692
+ "loss": 7.3879,
19693
+ "step": 2812
19694
+ },
19695
+ {
19696
+ "epoch": 0.9968990874457341,
19697
+ "grad_norm": 9.750468254089355,
19698
+ "learning_rate": 2.1328462851577525e-09,
19699
+ "loss": 7.4963,
19700
+ "step": 2813
19701
+ },
19702
+ {
19703
+ "epoch": 0.9972534774519358,
19704
+ "grad_norm": 9.87458324432373,
19705
+ "learning_rate": 1.6329631580769278e-09,
19706
+ "loss": 7.548,
19707
+ "step": 2814
19708
+ },
19709
+ {
19710
+ "epoch": 0.9976078674581377,
19711
+ "grad_norm": 10.732804298400879,
19712
+ "learning_rate": 1.1997297670651718e-09,
19713
+ "loss": 7.1676,
19714
+ "step": 2815
19715
+ },
19716
+ {
19717
+ "epoch": 0.9979622574643395,
19718
+ "grad_norm": 9.683318138122559,
19719
+ "learning_rate": 8.331466896382978e-10,
19720
+ "loss": 7.6941,
19721
+ "step": 2816
19722
+ },
19723
+ {
19724
+ "epoch": 0.9983166474705414,
19725
+ "grad_norm": 9.885022163391113,
19726
+ "learning_rate": 5.332144144665208e-10,
19727
+ "loss": 8.3338,
19728
+ "step": 2817
19729
+ },
19730
+ {
19731
+ "epoch": 0.9986710374767431,
19732
+ "grad_norm": 11.414604187011719,
19733
+ "learning_rate": 2.999333413689076e-10,
19734
+ "loss": 8.1915,
19735
+ "step": 2818
19736
+ },
19737
+ {
19738
+ "epoch": 0.999025427482945,
19739
+ "grad_norm": 10.389874458312988,
19740
+ "learning_rate": 1.333037813133764e-10,
19741
+ "loss": 6.8466,
19742
+ "step": 2819
19743
+ },
19744
+ {
19745
+ "epoch": 0.9993798174891468,
19746
+ "grad_norm": 9.702605247497559,
19747
+ "learning_rate": 3.33259564333499e-11,
19748
+ "loss": 6.4676,
19749
+ "step": 2820
19750
+ },
19751
+ {
19752
+ "epoch": 0.9997342074953486,
19753
+ "grad_norm": 10.419767379760742,
19754
+ "learning_rate": 0.0,
19755
+ "loss": 7.04,
19756
+ "step": 2821
19757
  }
19758
  ],
19759
  "logging_steps": 1,
 
19768
  "should_evaluate": false,
19769
  "should_log": false,
19770
  "should_save": true,
19771
+ "should_training_stop": true
19772
  },
19773
  "attributes": {}
19774
  }
19775
  },
19776
+ "total_flos": 1.0293193213649388e+18,
19777
  "train_batch_size": 4,
19778
  "trial_name": null,
19779
  "trial_params": null