jflotz commited on
Commit
503ce77
·
1 Parent(s): bdb2235

Training in progress, step 720000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:640777d4df17094532677050d169633ab25c6e5307fe7b26b1ef2480f2fa436c
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a4dbe9eca38a1792482b27bee5680ed76ff9b4fd9c693743e32db4ef9e8647
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:589beff1989fc9c0ae009d9c4e89cc81dea78aca3df050a942a083e247b0b3bb
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c1af069a55aa141dcc761c0323c960402d5bdd923f0a54d0011b64f759ce573
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d73966766ab27f8d8b82049597603e5e27df9de3bfb4bc68f052966836974a7
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b66387fbbba596b80573df493f29c352b88c96c68303d4022fc8f1e14c19ee9e
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d01455789648693f3c874de11c7ed6063a3725328c7accf41053ba2de2ef602
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd40898eaf2bb8f0e6b4793242d3113eb1c145ed61a59dba0b51391b3bf76148
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dea50129c89a3bcb4d508277085bd760cf99e8e7596db26b1b0416ea6e662c44
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33131db4d0dac1ab66ab66d875733bb83b9d53bdcdbb76cdb67e79eb395bffe
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4ef7983f20d7466e22a66c14b4eb278bac441cabcddf12c2ab45c6a7ad45ccb
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b4ac527158b6aa910da1978f5ab170d69172ed0e22a278e864ebd1fa5d08f7e
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7030fabf7b722e2752b51d9deb3cc7dac4745661c75fe5c48a022d295a9ffb0d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c41eb120388058e9e86dafd9920ba056d2f7de5234fa0df636e9377bd5cd58c
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db8fd80ed1622a37fb8836b353462fe160453f93a1dbe072cd8bd1e37f25f658
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a27da1b3b783fed75c82831476e53d38ef818a90b5db4dbe8bd2147c26f1d00
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9333397ba4ec25f889f96bb6104d7c6cf0bc5fd5c8036f75df959bdd43d52a66
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc1301019ee94a4e73634cfb2e922704c945921ceecc63c3c74ee79aae0fb318
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58730a986e4ecbe1e49387372f3829be2d2d88e8ab521897bf3b764a726b44be
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f15d3ae110e1b804d6f357797df2bd20ffa7af4cc5c48ef5f3805abb21eebf52
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:335855e99fe5c1eafe16f664b8e35342405853ed0ef1faa2892c2126df9feea5
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2712ef8beaf45f8ea70ea16f7da83a9783b51ec4e90a9cb114e7f3a2c3044b7
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4193244015848456,
5
- "global_step": 710000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8526,11 +8526,131 @@
8526
  "learning_rate": 3.979699361219395e-05,
8527
  "loss": 0.2956,
8528
  "step": 710000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8529
  }
8530
  ],
8531
  "max_steps": 1000000,
8532
  "num_train_epochs": 2,
8533
- "total_flos": 4.800106373011031e+22,
8534
  "trial_name": null,
8535
  "trial_params": null
8536
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4393148861142095,
5
+ "global_step": 720000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8526
  "learning_rate": 3.979699361219395e-05,
8527
  "loss": 0.2956,
8528
  "step": 710000
8529
+ },
8530
+ {
8531
+ "epoch": 1.42,
8532
+ "learning_rate": 3.9702298612493816e-05,
8533
+ "loss": 0.2958,
8534
+ "step": 710500
8535
+ },
8536
+ {
8537
+ "epoch": 1.42,
8538
+ "learning_rate": 3.960771378517049e-05,
8539
+ "loss": 0.296,
8540
+ "step": 711000
8541
+ },
8542
+ {
8543
+ "epoch": 1.42,
8544
+ "learning_rate": 3.951323938881533e-05,
8545
+ "loss": 0.2953,
8546
+ "step": 711500
8547
+ },
8548
+ {
8549
+ "epoch": 1.42,
8550
+ "learning_rate": 3.941887568171766e-05,
8551
+ "loss": 0.2956,
8552
+ "step": 712000
8553
+ },
8554
+ {
8555
+ "epoch": 1.42,
8556
+ "learning_rate": 3.9324622921864323e-05,
8557
+ "loss": 0.2963,
8558
+ "step": 712500
8559
+ },
8560
+ {
8561
+ "epoch": 1.43,
8562
+ "learning_rate": 3.923048136693873e-05,
8563
+ "loss": 0.2951,
8564
+ "step": 713000
8565
+ },
8566
+ {
8567
+ "epoch": 1.43,
8568
+ "learning_rate": 3.913645127432028e-05,
8569
+ "loss": 0.2957,
8570
+ "step": 713500
8571
+ },
8572
+ {
8573
+ "epoch": 1.43,
8574
+ "learning_rate": 3.904253290108369e-05,
8575
+ "loss": 0.2953,
8576
+ "step": 714000
8577
+ },
8578
+ {
8579
+ "epoch": 1.43,
8580
+ "learning_rate": 3.8948726503998176e-05,
8581
+ "loss": 0.2954,
8582
+ "step": 714500
8583
+ },
8584
+ {
8585
+ "epoch": 1.43,
8586
+ "learning_rate": 3.885503233952689e-05,
8587
+ "loss": 0.2958,
8588
+ "step": 715000
8589
+ },
8590
+ {
8591
+ "epoch": 1.43,
8592
+ "learning_rate": 3.876145066382606e-05,
8593
+ "loss": 0.2948,
8594
+ "step": 715500
8595
+ },
8596
+ {
8597
+ "epoch": 1.43,
8598
+ "learning_rate": 3.86679817327444e-05,
8599
+ "loss": 0.2953,
8600
+ "step": 716000
8601
+ },
8602
+ {
8603
+ "epoch": 1.43,
8604
+ "learning_rate": 3.857462580182245e-05,
8605
+ "loss": 0.2952,
8606
+ "step": 716500
8607
+ },
8608
+ {
8609
+ "epoch": 1.43,
8610
+ "learning_rate": 3.848138312629171e-05,
8611
+ "loss": 0.2953,
8612
+ "step": 717000
8613
+ },
8614
+ {
8615
+ "epoch": 1.43,
8616
+ "learning_rate": 3.838825396107415e-05,
8617
+ "loss": 0.2962,
8618
+ "step": 717500
8619
+ },
8620
+ {
8621
+ "epoch": 1.44,
8622
+ "learning_rate": 3.8295238560781317e-05,
8623
+ "loss": 0.2957,
8624
+ "step": 718000
8625
+ },
8626
+ {
8627
+ "epoch": 1.44,
8628
+ "learning_rate": 3.820233717971374e-05,
8629
+ "loss": 0.2955,
8630
+ "step": 718500
8631
+ },
8632
+ {
8633
+ "epoch": 1.44,
8634
+ "learning_rate": 3.810955007186029e-05,
8635
+ "loss": 0.2953,
8636
+ "step": 719000
8637
+ },
8638
+ {
8639
+ "epoch": 1.44,
8640
+ "learning_rate": 3.801687749089737e-05,
8641
+ "loss": 0.295,
8642
+ "step": 719500
8643
+ },
8644
+ {
8645
+ "epoch": 1.44,
8646
+ "learning_rate": 3.792431969018824e-05,
8647
+ "loss": 0.2951,
8648
+ "step": 720000
8649
  }
8650
  ],
8651
  "max_steps": 1000000,
8652
  "num_train_epochs": 2,
8653
+ "total_flos": 4.86771334519035e+22,
8654
  "trial_name": null,
8655
  "trial_params": null
8656
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:589beff1989fc9c0ae009d9c4e89cc81dea78aca3df050a942a083e247b0b3bb
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c1af069a55aa141dcc761c0323c960402d5bdd923f0a54d0011b64f759ce573
3
  size 449450757