arcwarden46 commited on
Commit
6e8eea5
·
verified ·
1 Parent(s): 552ba07

Training in progress, step 1350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01965d98a92c0f64f8a0d029a10632f08d0287219af676b8048158b0661d0a62
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f792ca2f39253114b59eb717807e2ff96b18c41da2f5d430b8418f21938975d
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61d5a8a0dcbd9606342ddfa403836caac031ee714e121062f218d4eebfd185e1
3
  size 168149074
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bb6d2fc8b2efd6822aad78c3e4fd174d2075ff491b7a302ce16e6e2b9a7176c
3
  size 168149074
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8e13ad7428c5509076c15c21d35ce0b2bffea5947cd099daf2af0afb123a71d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86519396222f045b85ec9776e7db27686ba4130a0d8956349c7b70ae36555704
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:beb0b0f4cb227409c25efa2d36f03edfc3a0032e3296f1707945c3a0c5611cc5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a866955cff9370cd3957339d6bf23f5ca8494fc491b0c5ef9330a9273b5d4460
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.636073112487793,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-900",
4
- "epoch": 2.6044492674986435,
5
  "eval_steps": 150,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -919,6 +919,119 @@
919
  "eval_samples_per_second": 14.437,
920
  "eval_steps_per_second": 1.821,
921
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
922
  }
923
  ],
924
  "logging_steps": 10,
@@ -933,7 +1046,7 @@
933
  "early_stopping_threshold": 0.0
934
  },
935
  "attributes": {
936
- "early_stopping_patience_counter": 2
937
  }
938
  },
939
  "TrainerControl": {
@@ -942,12 +1055,12 @@
942
  "should_evaluate": false,
943
  "should_log": false,
944
  "should_save": true,
945
- "should_training_stop": false
946
  },
947
  "attributes": {}
948
  }
949
  },
950
- "total_flos": 1.6825629945102336e+18,
951
  "train_batch_size": 8,
952
  "trial_name": null,
953
  "trial_params": null
 
1
  {
2
  "best_metric": 0.636073112487793,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-900",
4
+ "epoch": 2.930005425935974,
5
  "eval_steps": 150,
6
+ "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
919
  "eval_samples_per_second": 14.437,
920
  "eval_steps_per_second": 1.821,
921
  "step": 1200
922
+ },
923
+ {
924
+ "epoch": 2.6261530113944653,
925
+ "grad_norm": 23.57729721069336,
926
+ "learning_rate": 2.86474508437579e-06,
927
+ "loss": 1.3837,
928
+ "step": 1210
929
+ },
930
+ {
931
+ "epoch": 2.6478567552902876,
932
+ "grad_norm": 35.865020751953125,
933
+ "learning_rate": 2.67658249108603e-06,
934
+ "loss": 1.562,
935
+ "step": 1220
936
+ },
937
+ {
938
+ "epoch": 2.6695604991861095,
939
+ "grad_norm": 19.588651657104492,
940
+ "learning_rate": 2.4942045588130504e-06,
941
+ "loss": 1.7947,
942
+ "step": 1230
943
+ },
944
+ {
945
+ "epoch": 2.691264243081932,
946
+ "grad_norm": 25.088882446289062,
947
+ "learning_rate": 2.317696896481024e-06,
948
+ "loss": 1.7935,
949
+ "step": 1240
950
+ },
951
+ {
952
+ "epoch": 2.7129679869777537,
953
+ "grad_norm": 31.45013427734375,
954
+ "learning_rate": 2.1471423574861643e-06,
955
+ "loss": 1.8259,
956
+ "step": 1250
957
+ },
958
+ {
959
+ "epoch": 2.7346717308735755,
960
+ "grad_norm": 27.531116485595703,
961
+ "learning_rate": 1.982621000804979e-06,
962
+ "loss": 1.4111,
963
+ "step": 1260
964
+ },
965
+ {
966
+ "epoch": 2.756375474769398,
967
+ "grad_norm": 42.346099853515625,
968
+ "learning_rate": 1.8242100534143065e-06,
969
+ "loss": 1.4248,
970
+ "step": 1270
971
+ },
972
+ {
973
+ "epoch": 2.7780792186652197,
974
+ "grad_norm": 25.887807846069336,
975
+ "learning_rate": 1.6719838740406313e-06,
976
+ "loss": 1.9046,
977
+ "step": 1280
978
+ },
979
+ {
980
+ "epoch": 2.799782962561042,
981
+ "grad_norm": 22.550439834594727,
982
+ "learning_rate": 1.5260139182558363e-06,
983
+ "loss": 1.7181,
984
+ "step": 1290
985
+ },
986
+ {
987
+ "epoch": 2.821486706456864,
988
+ "grad_norm": 24.849620819091797,
989
+ "learning_rate": 1.3863687049356465e-06,
990
+ "loss": 1.5168,
991
+ "step": 1300
992
+ },
993
+ {
994
+ "epoch": 2.8431904503526857,
995
+ "grad_norm": 25.409860610961914,
996
+ "learning_rate": 1.25311378409661e-06,
997
+ "loss": 1.4321,
998
+ "step": 1310
999
+ },
1000
+ {
1001
+ "epoch": 2.864894194248508,
1002
+ "grad_norm": 36.389671325683594,
1003
+ "learning_rate": 1.1263117061266677e-06,
1004
+ "loss": 1.3841,
1005
+ "step": 1320
1006
+ },
1007
+ {
1008
+ "epoch": 2.88659793814433,
1009
+ "grad_norm": 24.459070205688477,
1010
+ "learning_rate": 1.006021992423738e-06,
1011
+ "loss": 1.9626,
1012
+ "step": 1330
1013
+ },
1014
+ {
1015
+ "epoch": 2.908301682040152,
1016
+ "grad_norm": 20.42031478881836,
1017
+ "learning_rate": 8.923011074561405e-07,
1018
+ "loss": 1.6266,
1019
+ "step": 1340
1020
+ },
1021
+ {
1022
+ "epoch": 2.930005425935974,
1023
+ "grad_norm": 25.92626190185547,
1024
+ "learning_rate": 7.852024322579649e-07,
1025
+ "loss": 1.6606,
1026
+ "step": 1350
1027
+ },
1028
+ {
1029
+ "epoch": 2.930005425935974,
1030
+ "eval_loss": 0.6615604758262634,
1031
+ "eval_runtime": 53.8295,
1032
+ "eval_samples_per_second": 14.434,
1033
+ "eval_steps_per_second": 1.821,
1034
+ "step": 1350
1035
  }
1036
  ],
1037
  "logging_steps": 10,
 
1046
  "early_stopping_threshold": 0.0
1047
  },
1048
  "attributes": {
1049
+ "early_stopping_patience_counter": 3
1050
  }
1051
  },
1052
  "TrainerControl": {
 
1055
  "should_evaluate": false,
1056
  "should_log": false,
1057
  "should_save": true,
1058
+ "should_training_stop": true
1059
  },
1060
  "attributes": {}
1061
  }
1062
  },
1063
+ "total_flos": 1.8928833688240128e+18,
1064
  "train_batch_size": 8,
1065
  "trial_name": null,
1066
  "trial_params": null