arcwarden46 commited on
Commit
0ea3e8d
·
verified ·
1 Parent(s): cca7fb7

Training in progress, step 750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a7926a9c2c08baeaa6ae6d2730cd4924e30ae0f15097852f045b6899d1bdeae
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f804a30ae13bfe1577a0031410a13b04b4ddce907fdddc2611f38187d0a2dac2
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7782e4a5863f4e4ba79cea04cf2544ba29a3b746778ea7896e0d3aeb9c3dc14
3
  size 168149074
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6c237037ecab40c8f5c8b075480d122677269b22fb8024271553bfa93d581ce
3
  size 168149074
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57e86c80d7f7ee9366d63709cfa052333d869770c3074c030ef6768d4d86d72b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e72cd091c77f00cfbb9582fc9d0b27f44bfa0dfb0d0851f9131d63afb61f68b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01e3c901f96fab114520bfb821ed8ef01b62c0db35b4ce9bb2ae527a57de6b3a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:964d544513334060b6db9e9107b617b1f518ba72468ffe587ba1e9068c8c5a78
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6503757834434509,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-600",
4
- "epoch": 1.3022246337493217,
5
  "eval_steps": 150,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -467,6 +467,119 @@
467
  "eval_samples_per_second": 14.449,
468
  "eval_steps_per_second": 1.822,
469
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470
  }
471
  ],
472
  "logging_steps": 10,
@@ -495,7 +608,7 @@
495
  "attributes": {}
496
  }
497
  },
498
- "total_flos": 8.412814972551168e+17,
499
  "train_batch_size": 8,
500
  "trial_name": null,
501
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.647061824798584,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-750",
4
+ "epoch": 1.6277807921866523,
5
  "eval_steps": 150,
6
+ "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
467
  "eval_samples_per_second": 14.449,
468
  "eval_steps_per_second": 1.822,
469
  "step": 600
470
+ },
471
+ {
472
+ "epoch": 1.3239283776451438,
473
+ "grad_norm": 33.046363830566406,
474
+ "learning_rate": 2.0248882546534327e-05,
475
+ "loss": 1.9914,
476
+ "step": 610
477
+ },
478
+ {
479
+ "epoch": 1.345632121540966,
480
+ "grad_norm": 21.024412155151367,
481
+ "learning_rate": 1.9943228916701108e-05,
482
+ "loss": 2.5415,
483
+ "step": 620
484
+ },
485
+ {
486
+ "epoch": 1.3673358654367878,
487
+ "grad_norm": 20.984058380126953,
488
+ "learning_rate": 1.963525491562421e-05,
489
+ "loss": 2.329,
490
+ "step": 630
491
+ },
492
+ {
493
+ "epoch": 1.3890396093326098,
494
+ "grad_norm": 23.811756134033203,
495
+ "learning_rate": 1.9325105107520264e-05,
496
+ "loss": 2.236,
497
+ "step": 640
498
+ },
499
+ {
500
+ "epoch": 1.410743353228432,
501
+ "grad_norm": 24.431550979614258,
502
+ "learning_rate": 1.9012925077938318e-05,
503
+ "loss": 2.0522,
504
+ "step": 650
505
+ },
506
+ {
507
+ "epoch": 1.432447097124254,
508
+ "grad_norm": 27.39398193359375,
509
+ "learning_rate": 1.8698861365421433e-05,
510
+ "loss": 1.8751,
511
+ "step": 660
512
+ },
513
+ {
514
+ "epoch": 1.454150841020076,
515
+ "grad_norm": 18.39029312133789,
516
+ "learning_rate": 1.8383061392720914e-05,
517
+ "loss": 2.6245,
518
+ "step": 670
519
+ },
520
+ {
521
+ "epoch": 1.475854584915898,
522
+ "grad_norm": 19.114816665649414,
523
+ "learning_rate": 1.8065673397595475e-05,
524
+ "loss": 2.1778,
525
+ "step": 680
526
+ },
527
+ {
528
+ "epoch": 1.49755832881172,
529
+ "grad_norm": 20.83147621154785,
530
+ "learning_rate": 1.7746846363227843e-05,
531
+ "loss": 1.9417,
532
+ "step": 690
533
+ },
534
+ {
535
+ "epoch": 1.519262072707542,
536
+ "grad_norm": 26.67376708984375,
537
+ "learning_rate": 1.7426729948291474e-05,
538
+ "loss": 1.9912,
539
+ "step": 700
540
+ },
541
+ {
542
+ "epoch": 1.540965816603364,
543
+ "grad_norm": 29.251083374023438,
544
+ "learning_rate": 1.7105474416700165e-05,
545
+ "loss": 2.0298,
546
+ "step": 710
547
+ },
548
+ {
549
+ "epoch": 1.5626695604991863,
550
+ "grad_norm": 17.292633056640625,
551
+ "learning_rate": 1.6783230567073597e-05,
552
+ "loss": 2.4696,
553
+ "step": 720
554
+ },
555
+ {
556
+ "epoch": 1.5843733043950081,
557
+ "grad_norm": 20.139678955078125,
558
+ "learning_rate": 1.646014966195185e-05,
559
+ "loss": 2.2227,
560
+ "step": 730
561
+ },
562
+ {
563
+ "epoch": 1.6060770482908302,
564
+ "grad_norm": 20.934541702270508,
565
+ "learning_rate": 1.613638335679216e-05,
566
+ "loss": 1.9018,
567
+ "step": 740
568
+ },
569
+ {
570
+ "epoch": 1.6277807921866523,
571
+ "grad_norm": 24.31667709350586,
572
+ "learning_rate": 1.5812083628781265e-05,
573
+ "loss": 2.1797,
574
+ "step": 750
575
+ },
576
+ {
577
+ "epoch": 1.6277807921866523,
578
+ "eval_loss": 0.647061824798584,
579
+ "eval_runtime": 53.7706,
580
+ "eval_samples_per_second": 14.45,
581
+ "eval_steps_per_second": 1.823,
582
+ "step": 750
583
  }
584
  ],
585
  "logging_steps": 10,
 
608
  "attributes": {}
609
  }
610
  },
611
+ "total_flos": 1.051601871568896e+18,
612
  "train_batch_size": 8,
613
  "trial_name": null,
614
  "trial_params": null