arcwarden46 commited on
Commit
6b0fd27
·
verified ·
1 Parent(s): 583f119

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75f6fa361f5e714231d81472f109f634b7cf3d218542ad522c3320bdc72d1ede
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a7926a9c2c08baeaa6ae6d2730cd4924e30ae0f15097852f045b6899d1bdeae
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d73fd2b1e5f4b9857ee12592a0208a4de24c8cdbb115962e97f1e6cbf34004a3
3
  size 168149074
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7782e4a5863f4e4ba79cea04cf2544ba29a3b746778ea7896e0d3aeb9c3dc14
3
  size 168149074
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d8026d2fdbb840b3bb65046bc60ca3592595116a718952ff3aaaf00eaf7c240
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57e86c80d7f7ee9366d63709cfa052333d869770c3074c030ef6768d4d86d72b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa8e8a5e2a44ead074ab81a212600c52a27ef90bdfd18224d7df20a5dd9fd4f1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01e3c901f96fab114520bfb821ed8ef01b62c0db35b4ce9bb2ae527a57de6b3a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6706861853599548,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
- "epoch": 0.9766684753119913,
5
  "eval_steps": 150,
6
- "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -354,6 +354,119 @@
354
  "eval_samples_per_second": 14.446,
355
  "eval_steps_per_second": 1.822,
356
  "step": 450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  }
358
  ],
359
  "logging_steps": 10,
@@ -382,7 +495,7 @@
382
  "attributes": {}
383
  }
384
  },
385
- "total_flos": 6.309611229413376e+17,
386
  "train_batch_size": 8,
387
  "trial_name": null,
388
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6503757834434509,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-600",
4
+ "epoch": 1.3022246337493217,
5
  "eval_steps": 150,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
354
  "eval_samples_per_second": 14.446,
355
  "eval_steps_per_second": 1.822,
356
  "step": 450
357
+ },
358
+ {
359
+ "epoch": 0.9983722192078134,
360
+ "grad_norm": 25.043182373046875,
361
+ "learning_rate": 2.446083767528193e-05,
362
+ "loss": 2.7357,
363
+ "step": 460
364
+ },
365
+ {
366
+ "epoch": 1.0200759631036354,
367
+ "grad_norm": 15.549626350402832,
368
+ "learning_rate": 2.4206440124138064e-05,
369
+ "loss": 2.7512,
370
+ "step": 470
371
+ },
372
+ {
373
+ "epoch": 1.0417797069994574,
374
+ "grad_norm": 18.95859146118164,
375
+ "learning_rate": 2.3947721033514517e-05,
376
+ "loss": 2.3525,
377
+ "step": 480
378
+ },
379
+ {
380
+ "epoch": 1.0634834508952795,
381
+ "grad_norm": 18.817302703857422,
382
+ "learning_rate": 2.3684801847177732e-05,
383
+ "loss": 2.2039,
384
+ "step": 490
385
+ },
386
+ {
387
+ "epoch": 1.0851871947911014,
388
+ "grad_norm": 23.182357788085938,
389
+ "learning_rate": 2.341780598043574e-05,
390
+ "loss": 2.0556,
391
+ "step": 500
392
+ },
393
+ {
394
+ "epoch": 1.1068909386869235,
395
+ "grad_norm": 34.256813049316406,
396
+ "learning_rate": 2.3146858762206493e-05,
397
+ "loss": 2.2184,
398
+ "step": 510
399
+ },
400
+ {
401
+ "epoch": 1.1285946825827455,
402
+ "grad_norm": 18.3436336517334,
403
+ "learning_rate": 2.287208737618801e-05,
404
+ "loss": 2.5032,
405
+ "step": 520
406
+ },
407
+ {
408
+ "epoch": 1.1502984264785676,
409
+ "grad_norm": 18.193883895874023,
410
+ "learning_rate": 2.259362080115781e-05,
411
+ "loss": 2.1954,
412
+ "step": 530
413
+ },
414
+ {
415
+ "epoch": 1.1720021703743897,
416
+ "grad_norm": 22.53719711303711,
417
+ "learning_rate": 2.231158975042979e-05,
418
+ "loss": 2.2031,
419
+ "step": 540
420
+ },
421
+ {
422
+ "epoch": 1.1937059142702116,
423
+ "grad_norm": 21.267290115356445,
424
+ "learning_rate": 2.2026126610496852e-05,
425
+ "loss": 1.8531,
426
+ "step": 550
427
+ },
428
+ {
429
+ "epoch": 1.2154096581660336,
430
+ "grad_norm": 29.71878433227539,
431
+ "learning_rate": 2.173736537888819e-05,
432
+ "loss": 1.9597,
433
+ "step": 560
434
+ },
435
+ {
436
+ "epoch": 1.2371134020618557,
437
+ "grad_norm": 18.650861740112305,
438
+ "learning_rate": 2.1445441601270276e-05,
439
+ "loss": 2.6653,
440
+ "step": 570
441
+ },
442
+ {
443
+ "epoch": 1.2588171459576776,
444
+ "grad_norm": 22.564220428466797,
445
+ "learning_rate": 2.115049230782124e-05,
446
+ "loss": 2.34,
447
+ "step": 580
448
+ },
449
+ {
450
+ "epoch": 1.2805208898534997,
451
+ "grad_norm": 22.589075088500977,
452
+ "learning_rate": 2.085265594890832e-05,
453
+ "loss": 2.181,
454
+ "step": 590
455
+ },
456
+ {
457
+ "epoch": 1.3022246337493217,
458
+ "grad_norm": 22.656047821044922,
459
+ "learning_rate": 2.055207233009872e-05,
460
+ "loss": 1.9121,
461
+ "step": 600
462
+ },
463
+ {
464
+ "epoch": 1.3022246337493217,
465
+ "eval_loss": 0.6503757834434509,
466
+ "eval_runtime": 53.7768,
467
+ "eval_samples_per_second": 14.449,
468
+ "eval_steps_per_second": 1.822,
469
+ "step": 600
470
  }
471
  ],
472
  "logging_steps": 10,
 
495
  "attributes": {}
496
  }
497
  },
498
+ "total_flos": 8.412814972551168e+17,
499
  "train_batch_size": 8,
500
  "trial_name": null,
501
  "trial_params": null