arcwarden46 commited on
Commit
4f6be62
·
verified ·
1 Parent(s): 7c6ba04

Training in progress, step 1050, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c3b97767ca7c9178bec65b2993d9d3cccf9c8cde6eeb4a72ac6a0b6a88d78e6
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33b2440d2694321ee888d51ec5666ec9e866384a438fe8a89aab4937822170b1
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1afbb720527c8a5867883fc72092a9d32aee54c6814a16dc9eec65ce3811750
3
  size 168149074
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d50e471a33e6d700094dc5bd8ce2bca4a2efdc1b1d33b74f03e583750f796133
3
  size 168149074
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aab1ea4bef8988b197925fbee11876428674fba7799ca0e8ae2027afbecdc488
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ba78f70e8450cf0ec82402ec5f695006e3cd4c321e54ba57e606421f97e4957
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b5cfc34d80ad44f8253d7cd926a081fa016d75ac6bb00b185ad09b5e1b4725b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eec984887369339384df18a86847622608ab8c12ab961dc4703f600d8c64c21c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.636073112487793,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-900",
4
- "epoch": 1.9533369506239826,
5
  "eval_steps": 150,
6
- "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -693,6 +693,119 @@
693
  "eval_samples_per_second": 14.415,
694
  "eval_steps_per_second": 1.818,
695
  "step": 900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
696
  }
697
  ],
698
  "logging_steps": 10,
@@ -707,7 +820,7 @@
707
  "early_stopping_threshold": 0.0
708
  },
709
  "attributes": {
710
- "early_stopping_patience_counter": 0
711
  }
712
  },
713
  "TrainerControl": {
@@ -721,7 +834,7 @@
721
  "attributes": {}
722
  }
723
  },
724
- "total_flos": 1.2619222458826752e+18,
725
  "train_batch_size": 8,
726
  "trial_name": null,
727
  "trial_params": null
 
1
  {
2
  "best_metric": 0.636073112487793,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-900",
4
+ "epoch": 2.278893109061313,
5
  "eval_steps": 150,
6
+ "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
693
  "eval_samples_per_second": 14.415,
694
  "eval_steps_per_second": 1.818,
695
  "step": 900
696
+ },
697
+ {
698
+ "epoch": 1.9750406945198047,
699
+ "grad_norm": 33.91230773925781,
700
+ "learning_rate": 1.067489489247974e-05,
701
+ "loss": 1.9067,
702
+ "step": 910
703
+ },
704
+ {
705
+ "epoch": 1.9967444384156265,
706
+ "grad_norm": 28.854825973510742,
707
+ "learning_rate": 1.036474508437579e-05,
708
+ "loss": 2.3281,
709
+ "step": 920
710
+ },
711
+ {
712
+ "epoch": 2.018448182311449,
713
+ "grad_norm": 18.260103225708008,
714
+ "learning_rate": 1.0056771083298894e-05,
715
+ "loss": 2.1337,
716
+ "step": 930
717
+ },
718
+ {
719
+ "epoch": 2.0401519262072707,
720
+ "grad_norm": 19.979646682739258,
721
+ "learning_rate": 9.751117453465674e-06,
722
+ "loss": 1.6345,
723
+ "step": 940
724
+ },
725
+ {
726
+ "epoch": 2.0618556701030926,
727
+ "grad_norm": 22.703859329223633,
728
+ "learning_rate": 9.447927669901284e-06,
729
+ "loss": 1.634,
730
+ "step": 950
731
+ },
732
+ {
733
+ "epoch": 2.083559413998915,
734
+ "grad_norm": 21.72873878479004,
735
+ "learning_rate": 9.147344051091682e-06,
736
+ "loss": 1.5881,
737
+ "step": 960
738
+ },
739
+ {
740
+ "epoch": 2.1052631578947367,
741
+ "grad_norm": 31.957630157470703,
742
+ "learning_rate": 8.849507692178758e-06,
743
+ "loss": 1.3856,
744
+ "step": 970
745
+ },
746
+ {
747
+ "epoch": 2.126966901790559,
748
+ "grad_norm": 21.75389862060547,
749
+ "learning_rate": 8.554558398729726e-06,
750
+ "loss": 1.9382,
751
+ "step": 980
752
+ },
753
+ {
754
+ "epoch": 2.148670645686381,
755
+ "grad_norm": 23.939027786254883,
756
+ "learning_rate": 8.262634621111819e-06,
757
+ "loss": 1.8201,
758
+ "step": 990
759
+ },
760
+ {
761
+ "epoch": 2.1703743895822027,
762
+ "grad_norm": 21.948673248291016,
763
+ "learning_rate": 7.97387338950315e-06,
764
+ "loss": 1.5186,
765
+ "step": 1000
766
+ },
767
+ {
768
+ "epoch": 2.192078133478025,
769
+ "grad_norm": 26.39198112487793,
770
+ "learning_rate": 7.688410249570214e-06,
771
+ "loss": 1.4693,
772
+ "step": 1010
773
+ },
774
+ {
775
+ "epoch": 2.213781877373847,
776
+ "grad_norm": 43.11570358276367,
777
+ "learning_rate": 7.4063791988421905e-06,
778
+ "loss": 1.3836,
779
+ "step": 1020
780
+ },
781
+ {
782
+ "epoch": 2.235485621269669,
783
+ "grad_norm": 21.80547523498535,
784
+ "learning_rate": 7.127912623811993e-06,
785
+ "loss": 1.9962,
786
+ "step": 1030
787
+ },
788
+ {
789
+ "epoch": 2.257189365165491,
790
+ "grad_norm": 20.71767234802246,
791
+ "learning_rate": 6.853141237793507e-06,
792
+ "loss": 1.6606,
793
+ "step": 1040
794
+ },
795
+ {
796
+ "epoch": 2.278893109061313,
797
+ "grad_norm": 21.81035614013672,
798
+ "learning_rate": 6.582194019564266e-06,
799
+ "loss": 1.4825,
800
+ "step": 1050
801
+ },
802
+ {
803
+ "epoch": 2.278893109061313,
804
+ "eval_loss": 0.6611286997795105,
805
+ "eval_runtime": 53.8731,
806
+ "eval_samples_per_second": 14.423,
807
+ "eval_steps_per_second": 1.819,
808
+ "step": 1050
809
  }
810
  ],
811
  "logging_steps": 10,
 
820
  "early_stopping_threshold": 0.0
821
  },
822
  "attributes": {
823
+ "early_stopping_patience_counter": 1
824
  }
825
  },
826
  "TrainerControl": {
 
834
  "attributes": {}
835
  }
836
  },
837
+ "total_flos": 1.4722426201964544e+18,
838
  "train_batch_size": 8,
839
  "trial_name": null,
840
  "trial_params": null