eddysang commited on
Commit
e646bbb
·
verified ·
1 Parent(s): e33d353

Training in progress, step 153, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d357909dba959fc25a9df85f0556aa115af78e55ff31a0347e9d6dc9086669be
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c079d645762a6059f2de7010f66b05a0ffbfa9f07f4149749e599a52925580e
3
  size 671466706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c985768cd88156b279865b4af398ed65c42e2eb474c28476bc122fd2d648fd1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afb210574debeca01beef086494f1b0da6d8ee8853b3831ec7094b554157981c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aeb1fbb5e964bbc83fa43b049054867ad1faca8f43271132d241ae074069d5d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76ff8614026ec7c5c2d9793615ca4e2f707e550ce0b5a4376af475431afe3f1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.027135722257901597,
5
  "eval_steps": 50,
6
- "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -983,6 +983,133 @@
983
  "learning_rate": 4.212216399081918e-05,
984
  "loss": 0.0,
985
  "step": 136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
986
  }
987
  ],
988
  "logging_steps": 1,
@@ -1002,7 +1129,7 @@
1002
  "attributes": {}
1003
  }
1004
  },
1005
- "total_flos": 8.116593719550935e+17,
1006
  "train_batch_size": 2,
1007
  "trial_name": null,
1008
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.030527687540139296,
5
  "eval_steps": 50,
6
+ "global_step": 153,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
983
  "learning_rate": 4.212216399081918e-05,
984
  "loss": 0.0,
985
  "step": 136
986
+ },
987
+ {
988
+ "epoch": 0.02733524962744499,
989
+ "grad_norm": NaN,
990
+ "learning_rate": 4.095071251953399e-05,
991
+ "loss": 0.0,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 0.027534776996988385,
996
+ "grad_norm": NaN,
997
+ "learning_rate": 3.978963279105821e-05,
998
+ "loss": 0.0,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 0.02773430436653178,
1003
+ "grad_norm": NaN,
1004
+ "learning_rate": 3.863927848152472e-05,
1005
+ "loss": 0.0,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 0.027933831736075172,
1010
+ "grad_norm": NaN,
1011
+ "learning_rate": 3.750000000000001e-05,
1012
+ "loss": 0.0,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 0.028133359105618566,
1017
+ "grad_norm": NaN,
1018
+ "learning_rate": 3.637214438174593e-05,
1019
+ "loss": 0.0,
1020
+ "step": 141
1021
+ },
1022
+ {
1023
+ "epoch": 0.02833288647516196,
1024
+ "grad_norm": NaN,
1025
+ "learning_rate": 3.525605518250964e-05,
1026
+ "loss": 0.0,
1027
+ "step": 142
1028
+ },
1029
+ {
1030
+ "epoch": 0.028532413844705354,
1031
+ "grad_norm": NaN,
1032
+ "learning_rate": 3.415207237387297e-05,
1033
+ "loss": 0.0,
1034
+ "step": 143
1035
+ },
1036
+ {
1037
+ "epoch": 0.028731941214248748,
1038
+ "grad_norm": NaN,
1039
+ "learning_rate": 3.3060532239693994e-05,
1040
+ "loss": 0.0,
1041
+ "step": 144
1042
+ },
1043
+ {
1044
+ "epoch": 0.02893146858379214,
1045
+ "grad_norm": NaN,
1046
+ "learning_rate": 3.198176727367156e-05,
1047
+ "loss": 0.0,
1048
+ "step": 145
1049
+ },
1050
+ {
1051
+ "epoch": 0.029130995953335535,
1052
+ "grad_norm": NaN,
1053
+ "learning_rate": 3.091610607806452e-05,
1054
+ "loss": 0.0,
1055
+ "step": 146
1056
+ },
1057
+ {
1058
+ "epoch": 0.02933052332287893,
1059
+ "grad_norm": NaN,
1060
+ "learning_rate": 2.986387326359637e-05,
1061
+ "loss": 0.0,
1062
+ "step": 147
1063
+ },
1064
+ {
1065
+ "epoch": 0.029530050692422323,
1066
+ "grad_norm": NaN,
1067
+ "learning_rate": 2.8825389350575624e-05,
1068
+ "loss": 0.0,
1069
+ "step": 148
1070
+ },
1071
+ {
1072
+ "epoch": 0.02972957806196572,
1073
+ "grad_norm": NaN,
1074
+ "learning_rate": 2.78009706712622e-05,
1075
+ "loss": 0.0,
1076
+ "step": 149
1077
+ },
1078
+ {
1079
+ "epoch": 0.029929105431509114,
1080
+ "grad_norm": NaN,
1081
+ "learning_rate": 2.6790929273509545e-05,
1082
+ "loss": 0.0,
1083
+ "step": 150
1084
+ },
1085
+ {
1086
+ "epoch": 0.029929105431509114,
1087
+ "eval_loss": NaN,
1088
+ "eval_runtime": 3233.9846,
1089
+ "eval_samples_per_second": 5.22,
1090
+ "eval_steps_per_second": 2.61,
1091
+ "step": 150
1092
+ },
1093
+ {
1094
+ "epoch": 0.030128632801052508,
1095
+ "grad_norm": NaN,
1096
+ "learning_rate": 2.579557282571196e-05,
1097
+ "loss": 0.0,
1098
+ "step": 151
1099
+ },
1100
+ {
1101
+ "epoch": 0.030328160170595902,
1102
+ "grad_norm": NaN,
1103
+ "learning_rate": 2.4815204523085654e-05,
1104
+ "loss": 0.0,
1105
+ "step": 152
1106
+ },
1107
+ {
1108
+ "epoch": 0.030527687540139296,
1109
+ "grad_norm": NaN,
1110
+ "learning_rate": 2.385012299531262e-05,
1111
+ "loss": 0.0,
1112
+ "step": 153
1113
  }
1114
  ],
1115
  "logging_steps": 1,
 
1129
  "attributes": {}
1130
  }
1131
  },
1132
+ "total_flos": 9.131167934494802e+17,
1133
  "train_batch_size": 2,
1134
  "trial_name": null,
1135
  "trial_params": null