jflotz commited on
Commit
56bfcbf
·
1 Parent(s): 1ac6581

Training in progress, step 1000000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0c84155e298a5a6b7ee070a8179e83bcd232eb2e5869a2fab62635fe434a15b
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ad35154e8068fb3a11d53f7dbc996f2907fbf771e6b088b60198e7662bd085
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6119a41cc84dfc82b1ad8e160d2de4f6723c4c70df2567e7b05d5135b2ad413c
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aa0642d46fe211727fefc5ac6e0bc28efa8511d1f0d9e5eee1987fa821600bc
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d92331d191f9b54e8682c7179f1d46d8e4298cbee5bf860f80c01313d919cbf
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffef422be2539ff2f38d9ef02e7d220a1b72474f08bb4e90499c7aaeff187041
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b3f3f7f2089f141dbb6bfd17083ac633b398668fbdd29b4239ed78cd16828ef
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:463b459e546b940eb314ddfec0443ea23a3be76cf5287309632d30e7fc449578
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a1e11f0bc52abbbd19f8d4cb2af2cb9140b19cf7c5678a2b90ba0e9593a2025
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dd41b22e2b316b6844ef149b2ed9245b6df6b6b1ef3eaa82e48507bd8f6361f
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bffc19433b361587fa636d9fe7f366ad621d886a3e88e7b929e925c972115064
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b5227791b50d3a4ce6a4303e20bb6f7a630db623b90933290f4b8be2d663b28
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:baf40a9092f2a61c65fa8d4b01ad107ecd14742f4e2c480971b7e313c2ab669b
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8b3d25a9338c36376e5ff28e0294df3d7c9ae55a2257f11d59bab9a6b7bff56
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35d8ccac7b1580c03e5bd6e78ceed5d6b1e846215ceb31ec6a8e03829af4c92e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:850d36f688ac21bc9b8eca3e25be925153622f77bb798cc74ba77c97e9987016
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a564015df3012aed298103a98abad0123fee1b00b18b24191670937f37a6281
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bae6388e81de2bc43d53517beccd69cbe0291d07b9778d77d34130d2c4dd9ade
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3839c7b56c36b9afc190821fb76a047c055639f3ce0f32d8cdce397cba6fa5f
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2368b26770136755d6ac3415a40478eaa9855ef850310208946463b5a771ae00
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8600853d84ba2918d905e70ba99b5bea80c1839bef1da439315875bb3ad462d
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3af8ce8ebbd21d4c5d2eeb030d14d5a15759925617012e21e6e139dfa9889ef
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.5181718895735843,
5
- "global_step": 990000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -11886,11 +11886,139 @@
11886
  "learning_rate": 1.0038271986135177e-05,
11887
  "loss": 0.2827,
11888
  "step": 990000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11889
  }
11890
  ],
11891
  "max_steps": 1000000,
11892
  "num_train_epochs": 2,
11893
- "total_flos": 6.693099686599179e+22,
11894
  "trial_name": null,
11895
  "trial_params": null
11896
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.538101577634107,
5
+ "global_step": 1000000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
11886
  "learning_rate": 1.0038271986135177e-05,
11887
  "loss": 0.2827,
11888
  "step": 990000
11889
+ },
11890
+ {
11891
+ "epoch": 1.52,
11892
+ "learning_rate": 1.003454077439879e-05,
11893
+ "loss": 0.2826,
11894
+ "step": 990500
11895
+ },
11896
+ {
11897
+ "epoch": 1.52,
11898
+ "learning_rate": 1.0031000845556304e-05,
11899
+ "loss": 0.2828,
11900
+ "step": 991000
11901
+ },
11902
+ {
11903
+ "epoch": 1.52,
11904
+ "learning_rate": 1.0027652209285743e-05,
11905
+ "loss": 0.2829,
11906
+ "step": 991500
11907
+ },
11908
+ {
11909
+ "epoch": 1.52,
11910
+ "learning_rate": 1.0024494874742152e-05,
11911
+ "loss": 0.2832,
11912
+ "step": 992000
11913
+ },
11914
+ {
11915
+ "epoch": 1.52,
11916
+ "learning_rate": 1.0021528850557572e-05,
11917
+ "loss": 0.283,
11918
+ "step": 992500
11919
+ },
11920
+ {
11921
+ "epoch": 1.52,
11922
+ "learning_rate": 1.0018754144840986e-05,
11923
+ "loss": 0.2821,
11924
+ "step": 993000
11925
+ },
11926
+ {
11927
+ "epoch": 1.53,
11928
+ "learning_rate": 1.0016170765178345e-05,
11929
+ "loss": 0.2822,
11930
+ "step": 993500
11931
+ },
11932
+ {
11933
+ "epoch": 1.53,
11934
+ "learning_rate": 1.0013778718632507e-05,
11935
+ "loss": 0.2826,
11936
+ "step": 994000
11937
+ },
11938
+ {
11939
+ "epoch": 1.53,
11940
+ "learning_rate": 1.0011578011743233e-05,
11941
+ "loss": 0.2828,
11942
+ "step": 994500
11943
+ },
11944
+ {
11945
+ "epoch": 1.53,
11946
+ "learning_rate": 1.000956865052717e-05,
11947
+ "loss": 0.2827,
11948
+ "step": 995000
11949
+ },
11950
+ {
11951
+ "epoch": 1.53,
11952
+ "learning_rate": 1.0007750640477843e-05,
11953
+ "loss": 0.2829,
11954
+ "step": 995500
11955
+ },
11956
+ {
11957
+ "epoch": 1.53,
11958
+ "learning_rate": 1.0006123986565623e-05,
11959
+ "loss": 0.2829,
11960
+ "step": 996000
11961
+ },
11962
+ {
11963
+ "epoch": 1.53,
11964
+ "learning_rate": 1.0004688693237708e-05,
11965
+ "loss": 0.2832,
11966
+ "step": 996500
11967
+ },
11968
+ {
11969
+ "epoch": 1.53,
11970
+ "learning_rate": 1.0003444764418138e-05,
11971
+ "loss": 0.2835,
11972
+ "step": 997000
11973
+ },
11974
+ {
11975
+ "epoch": 1.53,
11976
+ "learning_rate": 1.0002392203507781e-05,
11977
+ "loss": 0.2832,
11978
+ "step": 997500
11979
+ },
11980
+ {
11981
+ "epoch": 1.53,
11982
+ "learning_rate": 1.000153101338428e-05,
11983
+ "loss": 0.2826,
11984
+ "step": 998000
11985
+ },
11986
+ {
11987
+ "epoch": 1.54,
11988
+ "learning_rate": 1.00008611964021e-05,
11989
+ "loss": 0.283,
11990
+ "step": 998500
11991
+ },
11992
+ {
11993
+ "epoch": 1.54,
11994
+ "learning_rate": 1.00003827543925e-05,
11995
+ "loss": 0.2832,
11996
+ "step": 999000
11997
+ },
11998
+ {
11999
+ "epoch": 1.54,
12000
+ "learning_rate": 1.0000095688663532e-05,
12001
+ "loss": 0.2827,
12002
+ "step": 999500
12003
+ },
12004
+ {
12005
+ "epoch": 1.54,
12006
+ "learning_rate": 1e-05,
12007
+ "loss": 0.2828,
12008
+ "step": 1000000
12009
+ },
12010
+ {
12011
+ "epoch": 1.54,
12012
+ "eval_loss": 0.3599591553211212,
12013
+ "eval_runtime": 50.3004,
12014
+ "eval_samples_per_second": 2519.919,
12015
+ "eval_steps_per_second": 9.861,
12016
+ "step": 1000000
12017
  }
12018
  ],
12019
  "max_steps": 1000000,
12020
  "num_train_epochs": 2,
12021
+ "total_flos": 6.7607019944302705e+22,
12022
  "trial_name": null,
12023
  "trial_params": null
12024
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6119a41cc84dfc82b1ad8e160d2de4f6723c4c70df2567e7b05d5135b2ad413c
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aa0642d46fe211727fefc5ac6e0bc28efa8511d1f0d9e5eee1987fa821600bc
3
  size 449450757