eddysang commited on
Commit
ac6a018
·
verified ·
1 Parent(s): cc450b9

Training in progress, step 34, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b5db45574d4a8c70773208b47b767b809d492505b83768f43814580c1122cb2
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38bb48413e7fec321d31d234a33b486eafc285ce7ed9fd00b27909a34898662e
3
  size 671466706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3a35ffc5a2035c2bb835d48f31dca7f4b4744ac86d0daf6a89e43471f43f751
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ad4d886bbce296ef82d35e7b35217a219d13c344b8f35284b8759501aa1ff1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0abe1a027b3fea2bf654a1c387b6eb2241fa486bab4a282d3a0e829c4308c91
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e1983b20d7ce0214623b79adb071ed1f5c168cabcab4cc0ff2c0c61c63ddce9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0033919652822376996,
5
  "eval_steps": 50,
6
- "global_step": 17,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -134,6 +134,125 @@
134
  "learning_rate": 0.00012749999999999998,
135
  "loss": 0.0,
136
  "step": 17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  }
138
  ],
139
  "logging_steps": 1,
@@ -153,7 +272,7 @@
153
  "attributes": {}
154
  }
155
  },
156
- "total_flos": 1.0145742149438669e+17,
157
  "train_batch_size": 2,
158
  "trial_name": null,
159
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.006783930564475399,
5
  "eval_steps": 50,
6
+ "global_step": 34,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
134
  "learning_rate": 0.00012749999999999998,
135
  "loss": 0.0,
136
  "step": 17
137
+ },
138
+ {
139
+ "epoch": 0.0035914926517810935,
140
+ "grad_norm": NaN,
141
+ "learning_rate": 0.000135,
142
+ "loss": 0.0,
143
+ "step": 18
144
+ },
145
+ {
146
+ "epoch": 0.0037910200213244877,
147
+ "grad_norm": NaN,
148
+ "learning_rate": 0.0001425,
149
+ "loss": 0.0,
150
+ "step": 19
151
+ },
152
+ {
153
+ "epoch": 0.003990547390867882,
154
+ "grad_norm": NaN,
155
+ "learning_rate": 0.00015,
156
+ "loss": 0.0,
157
+ "step": 20
158
+ },
159
+ {
160
+ "epoch": 0.004190074760411276,
161
+ "grad_norm": NaN,
162
+ "learning_rate": 0.00014998857713672935,
163
+ "loss": 0.0,
164
+ "step": 21
165
+ },
166
+ {
167
+ "epoch": 0.00438960212995467,
168
+ "grad_norm": NaN,
169
+ "learning_rate": 0.00014995431202643217,
170
+ "loss": 0.0,
171
+ "step": 22
172
+ },
173
+ {
174
+ "epoch": 0.004589129499498064,
175
+ "grad_norm": NaN,
176
+ "learning_rate": 0.000149897215106593,
177
+ "loss": 0.0,
178
+ "step": 23
179
+ },
180
+ {
181
+ "epoch": 0.004788656869041458,
182
+ "grad_norm": NaN,
183
+ "learning_rate": 0.0001498173037694868,
184
+ "loss": 0.0,
185
+ "step": 24
186
+ },
187
+ {
188
+ "epoch": 0.004988184238584852,
189
+ "grad_norm": NaN,
190
+ "learning_rate": 0.0001497146023568809,
191
+ "loss": 0.0,
192
+ "step": 25
193
+ },
194
+ {
195
+ "epoch": 0.005187711608128246,
196
+ "grad_norm": NaN,
197
+ "learning_rate": 0.00014958914215262048,
198
+ "loss": 0.0,
199
+ "step": 26
200
+ },
201
+ {
202
+ "epoch": 0.005387238977671641,
203
+ "grad_norm": NaN,
204
+ "learning_rate": 0.00014944096137309914,
205
+ "loss": 0.0,
206
+ "step": 27
207
+ },
208
+ {
209
+ "epoch": 0.0055867663472150345,
210
+ "grad_norm": NaN,
211
+ "learning_rate": 0.00014927010515561776,
212
+ "loss": 0.0,
213
+ "step": 28
214
+ },
215
+ {
216
+ "epoch": 0.005786293716758428,
217
+ "grad_norm": NaN,
218
+ "learning_rate": 0.00014907662554463532,
219
+ "loss": 0.0,
220
+ "step": 29
221
+ },
222
+ {
223
+ "epoch": 0.005985821086301822,
224
+ "grad_norm": NaN,
225
+ "learning_rate": 0.0001488605814759156,
226
+ "loss": 0.0,
227
+ "step": 30
228
+ },
229
+ {
230
+ "epoch": 0.006185348455845217,
231
+ "grad_norm": NaN,
232
+ "learning_rate": 0.00014862203875857477,
233
+ "loss": 0.0,
234
+ "step": 31
235
+ },
236
+ {
237
+ "epoch": 0.006384875825388611,
238
+ "grad_norm": NaN,
239
+ "learning_rate": 0.0001483610700550354,
240
+ "loss": 0.0,
241
+ "step": 32
242
+ },
243
+ {
244
+ "epoch": 0.0065844031949320045,
245
+ "grad_norm": NaN,
246
+ "learning_rate": 0.00014807775485889264,
247
+ "loss": 0.0,
248
+ "step": 33
249
+ },
250
+ {
251
+ "epoch": 0.006783930564475399,
252
+ "grad_norm": NaN,
253
+ "learning_rate": 0.0001477721794706997,
254
+ "loss": 0.0,
255
+ "step": 34
256
  }
257
  ],
258
  "logging_steps": 1,
 
272
  "attributes": {}
273
  }
274
  },
275
+ "total_flos": 2.0291484298877338e+17,
276
  "train_batch_size": 2,
277
  "trial_name": null,
278
  "trial_params": null