eddysang commited on
Commit
b89eb8e
·
verified ·
1 Parent(s): ac6a018

Training in progress, step 51, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38bb48413e7fec321d31d234a33b486eafc285ce7ed9fd00b27909a34898662e
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a27096981255da323876810b71d4d0fa75aff2a623be29237e8cc96f237ee80
3
  size 671466706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57ad4d886bbce296ef82d35e7b35217a219d13c344b8f35284b8759501aa1ff1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dcfd7c73f8b512c49d9750d648bdf1702116da4421461a4a4bd0b6a13ab5b60
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e1983b20d7ce0214623b79adb071ed1f5c168cabcab4cc0ff2c0c61c63ddce9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c86702c0a3caad6c51746e54805a7289de03dff9cc5abc148a58966cf1f4d339
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.006783930564475399,
5
  "eval_steps": 50,
6
- "global_step": 34,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -253,6 +253,133 @@
253
  "learning_rate": 0.0001477721794706997,
254
  "loss": 0.0,
255
  "step": 34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  }
257
  ],
258
  "logging_steps": 1,
@@ -272,7 +399,7 @@
272
  "attributes": {}
273
  }
274
  },
275
- "total_flos": 2.0291484298877338e+17,
276
  "train_batch_size": 2,
277
  "trial_name": null,
278
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.010175895846713098,
5
  "eval_steps": 50,
6
+ "global_step": 51,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
253
  "learning_rate": 0.0001477721794706997,
254
  "loss": 0.0,
255
  "step": 34
256
+ },
257
+ {
258
+ "epoch": 0.006983457934018793,
259
+ "grad_norm": NaN,
260
+ "learning_rate": 0.0001474444369716801,
261
+ "loss": 0.0,
262
+ "step": 35
263
+ },
264
+ {
265
+ "epoch": 0.007182985303562187,
266
+ "grad_norm": NaN,
267
+ "learning_rate": 0.0001470946271953739,
268
+ "loss": 0.0,
269
+ "step": 36
270
+ },
271
+ {
272
+ "epoch": 0.007382512673105581,
273
+ "grad_norm": NaN,
274
+ "learning_rate": 0.00014672285669722765,
275
+ "loss": 0.0,
276
+ "step": 37
277
+ },
278
+ {
279
+ "epoch": 0.0075820400426489755,
280
+ "grad_norm": NaN,
281
+ "learning_rate": 0.00014632923872213652,
282
+ "loss": 0.0,
283
+ "step": 38
284
+ },
285
+ {
286
+ "epoch": 0.007781567412192369,
287
+ "grad_norm": NaN,
288
+ "learning_rate": 0.00014591389316994876,
289
+ "loss": 0.0,
290
+ "step": 39
291
+ },
292
+ {
293
+ "epoch": 0.007981094781735764,
294
+ "grad_norm": NaN,
295
+ "learning_rate": 0.0001454769465589431,
296
+ "loss": 0.0,
297
+ "step": 40
298
+ },
299
+ {
300
+ "epoch": 0.008180622151279158,
301
+ "grad_norm": NaN,
302
+ "learning_rate": 0.00014501853198729012,
303
+ "loss": 0.0,
304
+ "step": 41
305
+ },
306
+ {
307
+ "epoch": 0.008380149520822552,
308
+ "grad_norm": NaN,
309
+ "learning_rate": 0.00014453878909250904,
310
+ "loss": 0.0,
311
+ "step": 42
312
+ },
313
+ {
314
+ "epoch": 0.008579676890365946,
315
+ "grad_norm": NaN,
316
+ "learning_rate": 0.00014403786400893302,
317
+ "loss": 0.0,
318
+ "step": 43
319
+ },
320
+ {
321
+ "epoch": 0.00877920425990934,
322
+ "grad_norm": NaN,
323
+ "learning_rate": 0.00014351590932319504,
324
+ "loss": 0.0,
325
+ "step": 44
326
+ },
327
+ {
328
+ "epoch": 0.008978731629452733,
329
+ "grad_norm": NaN,
330
+ "learning_rate": 0.00014297308402774875,
331
+ "loss": 0.0,
332
+ "step": 45
333
+ },
334
+ {
335
+ "epoch": 0.009178258998996129,
336
+ "grad_norm": NaN,
337
+ "learning_rate": 0.0001424095534724375,
338
+ "loss": 0.0,
339
+ "step": 46
340
+ },
341
+ {
342
+ "epoch": 0.009377786368539523,
343
+ "grad_norm": NaN,
344
+ "learning_rate": 0.00014182548931412757,
345
+ "loss": 0.0,
346
+ "step": 47
347
+ },
348
+ {
349
+ "epoch": 0.009577313738082916,
350
+ "grad_norm": NaN,
351
+ "learning_rate": 0.0001412210694644195,
352
+ "loss": 0.0,
353
+ "step": 48
354
+ },
355
+ {
356
+ "epoch": 0.00977684110762631,
357
+ "grad_norm": NaN,
358
+ "learning_rate": 0.00014059647803545467,
359
+ "loss": 0.0,
360
+ "step": 49
361
+ },
362
+ {
363
+ "epoch": 0.009976368477169704,
364
+ "grad_norm": NaN,
365
+ "learning_rate": 0.0001399519052838329,
366
+ "loss": 0.0,
367
+ "step": 50
368
+ },
369
+ {
370
+ "epoch": 0.009976368477169704,
371
+ "eval_loss": NaN,
372
+ "eval_runtime": 1847.8631,
373
+ "eval_samples_per_second": 9.136,
374
+ "eval_steps_per_second": 4.568,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.010175895846713098,
379
+ "grad_norm": NaN,
380
+ "learning_rate": 0.00013928754755265842,
381
+ "loss": 0.0,
382
+ "step": 51
383
  }
384
  ],
385
  "logging_steps": 1,
 
399
  "attributes": {}
400
  }
401
  },
402
+ "total_flos": 3.0437226448316006e+17,
403
  "train_batch_size": 2,
404
  "trial_name": null,
405
  "trial_params": null