error577 commited on
Commit
7d19dbc
·
verified ·
1 Parent(s): 048c42a

Training in progress, step 800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e2d3e565906909995f0316ab1b42c5b8582f202c5b46bdb3debada08aad9f23
3
  size 323014168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2316cbf7807803c10504f0552a3df41c6728ec9769dcfef615a415ea7c583af0
3
  size 323014168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65d3780db8a939784f55f6ed16bff2c90230ce0f04e95f25dd2d77d968c0cf27
3
  size 165484738
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:833cdbd9a1b69a3478278dfc0e64ccecf27f27d112d125d68306b87027ce427a
3
  size 165484738
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:324895416f240bdb5ec1dbc24af23e6a273244e76245fac846a838380db560e5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7b315b29c598de44532516d3ac125be85a14a2f551aba8e3d144136258f1dba
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:512b8e162e341e64fd4843838fdd946779531701440bd44036ec363f546a8e5f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2c17f8131c6c5ea7f0a3b5c0825855397d8dc22a4373cf0983d7dd31b7657c7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.3638736605644226,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-650",
4
- "epoch": 1.273074474856779,
5
  "eval_steps": 50,
6
- "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5385,6 +5385,364 @@
5385
  "eval_samples_per_second": 2.928,
5386
  "eval_steps_per_second": 2.928,
5387
  "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5388
  }
5389
  ],
5390
  "logging_steps": 1,
@@ -5399,7 +5757,7 @@
5399
  "early_stopping_threshold": 0.0
5400
  },
5401
  "attributes": {
5402
- "early_stopping_patience_counter": 2
5403
  }
5404
  },
5405
  "TrainerControl": {
@@ -5408,12 +5766,12 @@
5408
  "should_evaluate": false,
5409
  "should_log": false,
5410
  "should_save": true,
5411
- "should_training_stop": false
5412
  },
5413
  "attributes": {}
5414
  }
5415
  },
5416
- "total_flos": 4.861295086094254e+17,
5417
  "train_batch_size": 1,
5418
  "trial_name": null,
5419
  "trial_params": null
 
1
  {
2
  "best_metric": 0.3638736605644226,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-650",
4
+ "epoch": 1.3579461065138978,
5
  "eval_steps": 50,
6
+ "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5385
  "eval_samples_per_second": 2.928,
5386
  "eval_steps_per_second": 2.928,
5387
  "step": 750
5388
+ },
5389
+ {
5390
+ "epoch": 1.2747719074899215,
5391
+ "grad_norm": 0.2629989981651306,
5392
+ "learning_rate": 4.4440055569454936e-05,
5393
+ "loss": 0.5687,
5394
+ "step": 751
5395
+ },
5396
+ {
5397
+ "epoch": 1.2764693401230638,
5398
+ "grad_norm": 0.29386061429977417,
5399
+ "learning_rate": 4.410240728232653e-05,
5400
+ "loss": 0.4559,
5401
+ "step": 752
5402
+ },
5403
+ {
5404
+ "epoch": 1.2781667727562063,
5405
+ "grad_norm": 0.32051095366477966,
5406
+ "learning_rate": 4.376582538278114e-05,
5407
+ "loss": 0.7155,
5408
+ "step": 753
5409
+ },
5410
+ {
5411
+ "epoch": 1.2798642053893485,
5412
+ "grad_norm": 0.2592981159687042,
5413
+ "learning_rate": 4.3430313260194697e-05,
5414
+ "loss": 0.4972,
5415
+ "step": 754
5416
+ },
5417
+ {
5418
+ "epoch": 1.281561638022491,
5419
+ "grad_norm": 0.23039335012435913,
5420
+ "learning_rate": 4.309587429317061e-05,
5421
+ "loss": 0.4049,
5422
+ "step": 755
5423
+ },
5424
+ {
5425
+ "epoch": 1.2832590706556333,
5426
+ "grad_norm": 0.2293672114610672,
5427
+ "learning_rate": 4.2762511849505476e-05,
5428
+ "loss": 0.4085,
5429
+ "step": 756
5430
+ },
5431
+ {
5432
+ "epoch": 1.2849565032887758,
5433
+ "grad_norm": 0.2180144041776657,
5434
+ "learning_rate": 4.2430229286155484e-05,
5435
+ "loss": 0.3829,
5436
+ "step": 757
5437
+ },
5438
+ {
5439
+ "epoch": 1.286653935921918,
5440
+ "grad_norm": 0.22328434884548187,
5441
+ "learning_rate": 4.209902994920235e-05,
5442
+ "loss": 0.381,
5443
+ "step": 758
5444
+ },
5445
+ {
5446
+ "epoch": 1.2883513685550605,
5447
+ "grad_norm": 0.19186857342720032,
5448
+ "learning_rate": 4.176891717381967e-05,
5449
+ "loss": 0.2676,
5450
+ "step": 759
5451
+ },
5452
+ {
5453
+ "epoch": 1.290048801188203,
5454
+ "grad_norm": 0.19644580781459808,
5455
+ "learning_rate": 4.143989428423947e-05,
5456
+ "loss": 0.2604,
5457
+ "step": 760
5458
+ },
5459
+ {
5460
+ "epoch": 1.2917462338213452,
5461
+ "grad_norm": 0.1470167487859726,
5462
+ "learning_rate": 4.111196459371862e-05,
5463
+ "loss": 0.1697,
5464
+ "step": 761
5465
+ },
5466
+ {
5467
+ "epoch": 1.2934436664544875,
5468
+ "grad_norm": 0.10344719886779785,
5469
+ "learning_rate": 4.0785131404505376e-05,
5470
+ "loss": 0.0742,
5471
+ "step": 762
5472
+ },
5473
+ {
5474
+ "epoch": 1.29514109908763,
5475
+ "grad_norm": 0.08508791774511337,
5476
+ "learning_rate": 4.045939800780639e-05,
5477
+ "loss": 0.0492,
5478
+ "step": 763
5479
+ },
5480
+ {
5481
+ "epoch": 1.2968385317207725,
5482
+ "grad_norm": 0.08410584181547165,
5483
+ "learning_rate": 4.0134767683753385e-05,
5484
+ "loss": 0.049,
5485
+ "step": 764
5486
+ },
5487
+ {
5488
+ "epoch": 1.2985359643539147,
5489
+ "grad_norm": 0.001906770863570273,
5490
+ "learning_rate": 3.981124370137001e-05,
5491
+ "loss": 0.0001,
5492
+ "step": 765
5493
+ },
5494
+ {
5495
+ "epoch": 1.300233396987057,
5496
+ "grad_norm": 0.14595600962638855,
5497
+ "learning_rate": 3.948882931853924e-05,
5498
+ "loss": 0.119,
5499
+ "step": 766
5500
+ },
5501
+ {
5502
+ "epoch": 1.3019308296201995,
5503
+ "grad_norm": 0.00104467140045017,
5504
+ "learning_rate": 3.916752778197039e-05,
5505
+ "loss": 0.0001,
5506
+ "step": 767
5507
+ },
5508
+ {
5509
+ "epoch": 1.303628262253342,
5510
+ "grad_norm": 0.00029723646002821624,
5511
+ "learning_rate": 3.8847342327166244e-05,
5512
+ "loss": 0.0,
5513
+ "step": 768
5514
+ },
5515
+ {
5516
+ "epoch": 1.3053256948864842,
5517
+ "grad_norm": 0.07592643052339554,
5518
+ "learning_rate": 3.852827617839084e-05,
5519
+ "loss": 0.026,
5520
+ "step": 769
5521
+ },
5522
+ {
5523
+ "epoch": 1.3070231275196265,
5524
+ "grad_norm": 0.00041246655746363103,
5525
+ "learning_rate": 3.8210332548636796e-05,
5526
+ "loss": 0.0,
5527
+ "step": 770
5528
+ },
5529
+ {
5530
+ "epoch": 1.308720560152769,
5531
+ "grad_norm": 0.002659448655322194,
5532
+ "learning_rate": 3.7893514639592895e-05,
5533
+ "loss": 0.0001,
5534
+ "step": 771
5535
+ },
5536
+ {
5537
+ "epoch": 1.3104179927859114,
5538
+ "grad_norm": 0.00021023498266004026,
5539
+ "learning_rate": 3.757782564161191e-05,
5540
+ "loss": 0.0,
5541
+ "step": 772
5542
+ },
5543
+ {
5544
+ "epoch": 1.3121154254190537,
5545
+ "grad_norm": 0.0010800276650115848,
5546
+ "learning_rate": 3.7263268733678606e-05,
5547
+ "loss": 0.0,
5548
+ "step": 773
5549
+ },
5550
+ {
5551
+ "epoch": 1.313812858052196,
5552
+ "grad_norm": 0.0006105787470005453,
5553
+ "learning_rate": 3.694984708337756e-05,
5554
+ "loss": 0.0,
5555
+ "step": 774
5556
+ },
5557
+ {
5558
+ "epoch": 1.3155102906853384,
5559
+ "grad_norm": 0.0007423445931635797,
5560
+ "learning_rate": 3.663756384686127e-05,
5561
+ "loss": 0.0,
5562
+ "step": 775
5563
+ },
5564
+ {
5565
+ "epoch": 1.317207723318481,
5566
+ "grad_norm": 0.0007892303401604295,
5567
+ "learning_rate": 3.632642216881847e-05,
5568
+ "loss": 0.0,
5569
+ "step": 776
5570
+ },
5571
+ {
5572
+ "epoch": 1.3189051559516232,
5573
+ "grad_norm": 0.0015005484456196427,
5574
+ "learning_rate": 3.601642518244247e-05,
5575
+ "loss": 0.0,
5576
+ "step": 777
5577
+ },
5578
+ {
5579
+ "epoch": 1.3206025885847654,
5580
+ "grad_norm": 0.0008817919879220426,
5581
+ "learning_rate": 3.570757600939939e-05,
5582
+ "loss": 0.0,
5583
+ "step": 778
5584
+ },
5585
+ {
5586
+ "epoch": 1.322300021217908,
5587
+ "grad_norm": 0.0003715140337590128,
5588
+ "learning_rate": 3.5399877759797e-05,
5589
+ "loss": 0.0,
5590
+ "step": 779
5591
+ },
5592
+ {
5593
+ "epoch": 1.3239974538510504,
5594
+ "grad_norm": 0.001832145731896162,
5595
+ "learning_rate": 3.509333353215331e-05,
5596
+ "loss": 0.0001,
5597
+ "step": 780
5598
+ },
5599
+ {
5600
+ "epoch": 1.3256948864841926,
5601
+ "grad_norm": 0.0006237781490199268,
5602
+ "learning_rate": 3.47879464133652e-05,
5603
+ "loss": 0.0,
5604
+ "step": 781
5605
+ },
5606
+ {
5607
+ "epoch": 1.327392319117335,
5608
+ "grad_norm": 0.0015418545808643103,
5609
+ "learning_rate": 3.448371947867763e-05,
5610
+ "loss": 0.0001,
5611
+ "step": 782
5612
+ },
5613
+ {
5614
+ "epoch": 1.3290897517504774,
5615
+ "grad_norm": 0.0035702355671674013,
5616
+ "learning_rate": 3.4180655791652476e-05,
5617
+ "loss": 0.0001,
5618
+ "step": 783
5619
+ },
5620
+ {
5621
+ "epoch": 1.3307871843836199,
5622
+ "grad_norm": 0.0013561249943450093,
5623
+ "learning_rate": 3.3878758404137624e-05,
5624
+ "loss": 0.0001,
5625
+ "step": 784
5626
+ },
5627
+ {
5628
+ "epoch": 1.3324846170167621,
5629
+ "grad_norm": 0.001489428337663412,
5630
+ "learning_rate": 3.3578030356236455e-05,
5631
+ "loss": 0.0001,
5632
+ "step": 785
5633
+ },
5634
+ {
5635
+ "epoch": 1.3341820496499044,
5636
+ "grad_norm": 0.0011818065540865064,
5637
+ "learning_rate": 3.3278474676277114e-05,
5638
+ "loss": 0.0,
5639
+ "step": 786
5640
+ },
5641
+ {
5642
+ "epoch": 1.3358794822830469,
5643
+ "grad_norm": 0.0076867276802659035,
5644
+ "learning_rate": 3.298009438078194e-05,
5645
+ "loss": 0.0003,
5646
+ "step": 787
5647
+ },
5648
+ {
5649
+ "epoch": 1.3375769149161894,
5650
+ "grad_norm": 0.002071813913062215,
5651
+ "learning_rate": 3.268289247443713e-05,
5652
+ "loss": 0.0001,
5653
+ "step": 788
5654
+ },
5655
+ {
5656
+ "epoch": 1.3392743475493316,
5657
+ "grad_norm": 0.002192431129515171,
5658
+ "learning_rate": 3.238687195006264e-05,
5659
+ "loss": 0.0001,
5660
+ "step": 789
5661
+ },
5662
+ {
5663
+ "epoch": 1.3409717801824739,
5664
+ "grad_norm": 0.24951210618019104,
5665
+ "learning_rate": 3.209203578858191e-05,
5666
+ "loss": 0.6115,
5667
+ "step": 790
5668
+ },
5669
+ {
5670
+ "epoch": 1.3426692128156164,
5671
+ "grad_norm": 0.29264548420906067,
5672
+ "learning_rate": 3.1798386958991714e-05,
5673
+ "loss": 0.71,
5674
+ "step": 791
5675
+ },
5676
+ {
5677
+ "epoch": 1.3443666454487588,
5678
+ "grad_norm": 0.2815876603126526,
5679
+ "learning_rate": 3.1505928418332574e-05,
5680
+ "loss": 0.6586,
5681
+ "step": 792
5682
+ },
5683
+ {
5684
+ "epoch": 1.346064078081901,
5685
+ "grad_norm": 0.25446027517318726,
5686
+ "learning_rate": 3.121466311165875e-05,
5687
+ "loss": 0.5623,
5688
+ "step": 793
5689
+ },
5690
+ {
5691
+ "epoch": 1.3477615107150436,
5692
+ "grad_norm": 0.2643551230430603,
5693
+ "learning_rate": 3.092459397200861e-05,
5694
+ "loss": 0.6298,
5695
+ "step": 794
5696
+ },
5697
+ {
5698
+ "epoch": 1.3494589433481858,
5699
+ "grad_norm": 0.24555166065692902,
5700
+ "learning_rate": 3.0635723920375164e-05,
5701
+ "loss": 0.5237,
5702
+ "step": 795
5703
+ },
5704
+ {
5705
+ "epoch": 1.3511563759813283,
5706
+ "grad_norm": 0.2268795222043991,
5707
+ "learning_rate": 3.0348055865676707e-05,
5708
+ "loss": 0.3814,
5709
+ "step": 796
5710
+ },
5711
+ {
5712
+ "epoch": 1.3528538086144706,
5713
+ "grad_norm": 0.25621020793914795,
5714
+ "learning_rate": 3.0061592704727257e-05,
5715
+ "loss": 0.5183,
5716
+ "step": 797
5717
+ },
5718
+ {
5719
+ "epoch": 1.354551241247613,
5720
+ "grad_norm": 0.2662016451358795,
5721
+ "learning_rate": 2.9776337322207687e-05,
5722
+ "loss": 0.5679,
5723
+ "step": 798
5724
+ },
5725
+ {
5726
+ "epoch": 1.3562486738807553,
5727
+ "grad_norm": 0.20577247440814972,
5728
+ "learning_rate": 2.9492292590636613e-05,
5729
+ "loss": 0.3231,
5730
+ "step": 799
5731
+ },
5732
+ {
5733
+ "epoch": 1.3579461065138978,
5734
+ "grad_norm": 0.23013556003570557,
5735
+ "learning_rate": 2.9209461370341204e-05,
5736
+ "loss": 0.4158,
5737
+ "step": 800
5738
+ },
5739
+ {
5740
+ "epoch": 1.3579461065138978,
5741
+ "eval_loss": 0.36441436409950256,
5742
+ "eval_runtime": 65.9028,
5743
+ "eval_samples_per_second": 2.929,
5744
+ "eval_steps_per_second": 2.929,
5745
+ "step": 800
5746
  }
5747
  ],
5748
  "logging_steps": 1,
 
5757
  "early_stopping_threshold": 0.0
5758
  },
5759
  "attributes": {
5760
+ "early_stopping_patience_counter": 3
5761
  }
5762
  },
5763
  "TrainerControl": {
 
5766
  "should_evaluate": false,
5767
  "should_log": false,
5768
  "should_save": true,
5769
+ "should_training_stop": true
5770
  },
5771
  "attributes": {}
5772
  }
5773
  },
5774
+ "total_flos": 5.185996556504924e+17,
5775
  "train_batch_size": 1,
5776
  "trial_name": null,
5777
  "trial_params": null