arcwarden46 commited on
Commit
6f58a29
·
verified ·
1 Parent(s): 11f39f6

Training in progress, step 1200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33b2440d2694321ee888d51ec5666ec9e866384a438fe8a89aab4937822170b1
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01965d98a92c0f64f8a0d029a10632f08d0287219af676b8048158b0661d0a62
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d50e471a33e6d700094dc5bd8ce2bca4a2efdc1b1d33b74f03e583750f796133
3
  size 168149074
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61d5a8a0dcbd9606342ddfa403836caac031ee714e121062f218d4eebfd185e1
3
  size 168149074
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ba78f70e8450cf0ec82402ec5f695006e3cd4c321e54ba57e606421f97e4957
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8e13ad7428c5509076c15c21d35ce0b2bffea5947cd099daf2af0afb123a71d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eec984887369339384df18a86847622608ab8c12ab961dc4703f600d8c64c21c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beb0b0f4cb227409c25efa2d36f03edfc3a0032e3296f1707945c3a0c5611cc5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.636073112487793,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-900",
4
- "epoch": 2.278893109061313,
5
  "eval_steps": 150,
6
- "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -806,6 +806,119 @@
806
  "eval_samples_per_second": 14.423,
807
  "eval_steps_per_second": 1.819,
808
  "step": 1050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809
  }
810
  ],
811
  "logging_steps": 10,
@@ -820,7 +933,7 @@
820
  "early_stopping_threshold": 0.0
821
  },
822
  "attributes": {
823
- "early_stopping_patience_counter": 1
824
  }
825
  },
826
  "TrainerControl": {
@@ -834,7 +947,7 @@
834
  "attributes": {}
835
  }
836
  },
837
- "total_flos": 1.4722426201964544e+18,
838
  "train_batch_size": 8,
839
  "trial_name": null,
840
  "trial_params": null
 
1
  {
2
  "best_metric": 0.636073112487793,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-900",
4
+ "epoch": 2.6044492674986435,
5
  "eval_steps": 150,
6
+ "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
806
  "eval_samples_per_second": 14.423,
807
  "eval_steps_per_second": 1.819,
808
  "step": 1050
809
+ },
810
+ {
811
+ "epoch": 2.3005968529571352,
812
+ "grad_norm": 31.06637191772461,
813
+ "learning_rate": 6.315198152822273e-06,
814
+ "loss": 1.4709,
815
+ "step": 1060
816
+ },
817
+ {
818
+ "epoch": 2.322300596852957,
819
+ "grad_norm": 41.439178466796875,
820
+ "learning_rate": 6.052278966485492e-06,
821
+ "loss": 1.5022,
822
+ "step": 1070
823
+ },
824
+ {
825
+ "epoch": 2.3440043407487794,
826
+ "grad_norm": 23.359983444213867,
827
+ "learning_rate": 5.793559875861938e-06,
828
+ "loss": 1.9443,
829
+ "step": 1080
830
+ },
831
+ {
832
+ "epoch": 2.3657080846446013,
833
+ "grad_norm": 20.295778274536133,
834
+ "learning_rate": 5.539162324718075e-06,
835
+ "loss": 1.739,
836
+ "step": 1090
837
+ },
838
+ {
839
+ "epoch": 2.387411828540423,
840
+ "grad_norm": 25.940486907958984,
841
+ "learning_rate": 5.289205728272587e-06,
842
+ "loss": 1.5355,
843
+ "step": 1100
844
+ },
845
+ {
846
+ "epoch": 2.4091155724362454,
847
+ "grad_norm": 28.764272689819336,
848
+ "learning_rate": 5.043807417142436e-06,
849
+ "loss": 1.4645,
850
+ "step": 1110
851
+ },
852
+ {
853
+ "epoch": 2.4308193163320673,
854
+ "grad_norm": 36.14052963256836,
855
+ "learning_rate": 4.8030825822673816e-06,
856
+ "loss": 1.3555,
857
+ "step": 1120
858
+ },
859
+ {
860
+ "epoch": 2.452523060227889,
861
+ "grad_norm": 20.15415382385254,
862
+ "learning_rate": 4.567144220838923e-06,
863
+ "loss": 1.9016,
864
+ "step": 1130
865
+ },
866
+ {
867
+ "epoch": 2.4742268041237114,
868
+ "grad_norm": 22.350454330444336,
869
+ "learning_rate": 4.336103083258942e-06,
870
+ "loss": 1.7998,
871
+ "step": 1140
872
+ },
873
+ {
874
+ "epoch": 2.4959305480195333,
875
+ "grad_norm": 22.9199161529541,
876
+ "learning_rate": 4.110067621153041e-06,
877
+ "loss": 1.5755,
878
+ "step": 1150
879
+ },
880
+ {
881
+ "epoch": 2.517634291915355,
882
+ "grad_norm": 27.70863151550293,
883
+ "learning_rate": 3.889143936462915e-06,
884
+ "loss": 1.3297,
885
+ "step": 1160
886
+ },
887
+ {
888
+ "epoch": 2.5393380358111775,
889
+ "grad_norm": 41.40476608276367,
890
+ "learning_rate": 3.673435731641692e-06,
891
+ "loss": 1.3004,
892
+ "step": 1170
893
+ },
894
+ {
895
+ "epoch": 2.5610417797069993,
896
+ "grad_norm": 21.756010055541992,
897
+ "learning_rate": 3.4630442609755666e-06,
898
+ "loss": 2.0685,
899
+ "step": 1180
900
+ },
901
+ {
902
+ "epoch": 2.5827455236028216,
903
+ "grad_norm": 29.703012466430664,
904
+ "learning_rate": 3.2580682830546667e-06,
905
+ "loss": 1.8172,
906
+ "step": 1190
907
+ },
908
+ {
909
+ "epoch": 2.6044492674986435,
910
+ "grad_norm": 21.751575469970703,
911
+ "learning_rate": 3.0586040144153436e-06,
912
+ "loss": 1.5008,
913
+ "step": 1200
914
+ },
915
+ {
916
+ "epoch": 2.6044492674986435,
917
+ "eval_loss": 0.6650447845458984,
918
+ "eval_runtime": 53.8186,
919
+ "eval_samples_per_second": 14.437,
920
+ "eval_steps_per_second": 1.821,
921
+ "step": 1200
922
  }
923
  ],
924
  "logging_steps": 10,
 
933
  "early_stopping_threshold": 0.0
934
  },
935
  "attributes": {
936
+ "early_stopping_patience_counter": 2
937
  }
938
  },
939
  "TrainerControl": {
 
947
  "attributes": {}
948
  }
949
  },
950
+ "total_flos": 1.6825629945102336e+18,
951
  "train_batch_size": 8,
952
  "trial_name": null,
953
  "trial_params": null