Training in progress, step 1000000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +131 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4ad35154e8068fb3a11d53f7dbc996f2907fbf771e6b088b60198e7662bd085
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6aa0642d46fe211727fefc5ac6e0bc28efa8511d1f0d9e5eee1987fa821600bc
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ffef422be2539ff2f38d9ef02e7d220a1b72474f08bb4e90499c7aaeff187041
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:463b459e546b940eb314ddfec0443ea23a3be76cf5287309632d30e7fc449578
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1dd41b22e2b316b6844ef149b2ed9245b6df6b6b1ef3eaa82e48507bd8f6361f
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b5227791b50d3a4ce6a4303e20bb6f7a630db623b90933290f4b8be2d663b28
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8b3d25a9338c36376e5ff28e0294df3d7c9ae55a2257f11d59bab9a6b7bff56
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:850d36f688ac21bc9b8eca3e25be925153622f77bb798cc74ba77c97e9987016
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bae6388e81de2bc43d53517beccd69cbe0291d07b9778d77d34130d2c4dd9ade
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2368b26770136755d6ac3415a40478eaa9855ef850310208946463b5a771ae00
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3af8ce8ebbd21d4c5d2eeb030d14d5a15759925617012e21e6e139dfa9889ef
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -11886,11 +11886,139 @@
|
|
| 11886 |
"learning_rate": 1.0038271986135177e-05,
|
| 11887 |
"loss": 0.2827,
|
| 11888 |
"step": 990000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11889 |
}
|
| 11890 |
],
|
| 11891 |
"max_steps": 1000000,
|
| 11892 |
"num_train_epochs": 2,
|
| 11893 |
-
"total_flos": 6.
|
| 11894 |
"trial_name": null,
|
| 11895 |
"trial_params": null
|
| 11896 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.538101577634107,
|
| 5 |
+
"global_step": 1000000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 11886 |
"learning_rate": 1.0038271986135177e-05,
|
| 11887 |
"loss": 0.2827,
|
| 11888 |
"step": 990000
|
| 11889 |
+
},
|
| 11890 |
+
{
|
| 11891 |
+
"epoch": 1.52,
|
| 11892 |
+
"learning_rate": 1.003454077439879e-05,
|
| 11893 |
+
"loss": 0.2826,
|
| 11894 |
+
"step": 990500
|
| 11895 |
+
},
|
| 11896 |
+
{
|
| 11897 |
+
"epoch": 1.52,
|
| 11898 |
+
"learning_rate": 1.0031000845556304e-05,
|
| 11899 |
+
"loss": 0.2828,
|
| 11900 |
+
"step": 991000
|
| 11901 |
+
},
|
| 11902 |
+
{
|
| 11903 |
+
"epoch": 1.52,
|
| 11904 |
+
"learning_rate": 1.0027652209285743e-05,
|
| 11905 |
+
"loss": 0.2829,
|
| 11906 |
+
"step": 991500
|
| 11907 |
+
},
|
| 11908 |
+
{
|
| 11909 |
+
"epoch": 1.52,
|
| 11910 |
+
"learning_rate": 1.0024494874742152e-05,
|
| 11911 |
+
"loss": 0.2832,
|
| 11912 |
+
"step": 992000
|
| 11913 |
+
},
|
| 11914 |
+
{
|
| 11915 |
+
"epoch": 1.52,
|
| 11916 |
+
"learning_rate": 1.0021528850557572e-05,
|
| 11917 |
+
"loss": 0.283,
|
| 11918 |
+
"step": 992500
|
| 11919 |
+
},
|
| 11920 |
+
{
|
| 11921 |
+
"epoch": 1.52,
|
| 11922 |
+
"learning_rate": 1.0018754144840986e-05,
|
| 11923 |
+
"loss": 0.2821,
|
| 11924 |
+
"step": 993000
|
| 11925 |
+
},
|
| 11926 |
+
{
|
| 11927 |
+
"epoch": 1.53,
|
| 11928 |
+
"learning_rate": 1.0016170765178345e-05,
|
| 11929 |
+
"loss": 0.2822,
|
| 11930 |
+
"step": 993500
|
| 11931 |
+
},
|
| 11932 |
+
{
|
| 11933 |
+
"epoch": 1.53,
|
| 11934 |
+
"learning_rate": 1.0013778718632507e-05,
|
| 11935 |
+
"loss": 0.2826,
|
| 11936 |
+
"step": 994000
|
| 11937 |
+
},
|
| 11938 |
+
{
|
| 11939 |
+
"epoch": 1.53,
|
| 11940 |
+
"learning_rate": 1.0011578011743233e-05,
|
| 11941 |
+
"loss": 0.2828,
|
| 11942 |
+
"step": 994500
|
| 11943 |
+
},
|
| 11944 |
+
{
|
| 11945 |
+
"epoch": 1.53,
|
| 11946 |
+
"learning_rate": 1.000956865052717e-05,
|
| 11947 |
+
"loss": 0.2827,
|
| 11948 |
+
"step": 995000
|
| 11949 |
+
},
|
| 11950 |
+
{
|
| 11951 |
+
"epoch": 1.53,
|
| 11952 |
+
"learning_rate": 1.0007750640477843e-05,
|
| 11953 |
+
"loss": 0.2829,
|
| 11954 |
+
"step": 995500
|
| 11955 |
+
},
|
| 11956 |
+
{
|
| 11957 |
+
"epoch": 1.53,
|
| 11958 |
+
"learning_rate": 1.0006123986565623e-05,
|
| 11959 |
+
"loss": 0.2829,
|
| 11960 |
+
"step": 996000
|
| 11961 |
+
},
|
| 11962 |
+
{
|
| 11963 |
+
"epoch": 1.53,
|
| 11964 |
+
"learning_rate": 1.0004688693237708e-05,
|
| 11965 |
+
"loss": 0.2832,
|
| 11966 |
+
"step": 996500
|
| 11967 |
+
},
|
| 11968 |
+
{
|
| 11969 |
+
"epoch": 1.53,
|
| 11970 |
+
"learning_rate": 1.0003444764418138e-05,
|
| 11971 |
+
"loss": 0.2835,
|
| 11972 |
+
"step": 997000
|
| 11973 |
+
},
|
| 11974 |
+
{
|
| 11975 |
+
"epoch": 1.53,
|
| 11976 |
+
"learning_rate": 1.0002392203507781e-05,
|
| 11977 |
+
"loss": 0.2832,
|
| 11978 |
+
"step": 997500
|
| 11979 |
+
},
|
| 11980 |
+
{
|
| 11981 |
+
"epoch": 1.53,
|
| 11982 |
+
"learning_rate": 1.000153101338428e-05,
|
| 11983 |
+
"loss": 0.2826,
|
| 11984 |
+
"step": 998000
|
| 11985 |
+
},
|
| 11986 |
+
{
|
| 11987 |
+
"epoch": 1.54,
|
| 11988 |
+
"learning_rate": 1.00008611964021e-05,
|
| 11989 |
+
"loss": 0.283,
|
| 11990 |
+
"step": 998500
|
| 11991 |
+
},
|
| 11992 |
+
{
|
| 11993 |
+
"epoch": 1.54,
|
| 11994 |
+
"learning_rate": 1.00003827543925e-05,
|
| 11995 |
+
"loss": 0.2832,
|
| 11996 |
+
"step": 999000
|
| 11997 |
+
},
|
| 11998 |
+
{
|
| 11999 |
+
"epoch": 1.54,
|
| 12000 |
+
"learning_rate": 1.0000095688663532e-05,
|
| 12001 |
+
"loss": 0.2827,
|
| 12002 |
+
"step": 999500
|
| 12003 |
+
},
|
| 12004 |
+
{
|
| 12005 |
+
"epoch": 1.54,
|
| 12006 |
+
"learning_rate": 1e-05,
|
| 12007 |
+
"loss": 0.2828,
|
| 12008 |
+
"step": 1000000
|
| 12009 |
+
},
|
| 12010 |
+
{
|
| 12011 |
+
"epoch": 1.54,
|
| 12012 |
+
"eval_loss": 0.3599591553211212,
|
| 12013 |
+
"eval_runtime": 50.3004,
|
| 12014 |
+
"eval_samples_per_second": 2519.919,
|
| 12015 |
+
"eval_steps_per_second": 9.861,
|
| 12016 |
+
"step": 1000000
|
| 12017 |
}
|
| 12018 |
],
|
| 12019 |
"max_steps": 1000000,
|
| 12020 |
"num_train_epochs": 2,
|
| 12021 |
+
"total_flos": 6.7607019944302705e+22,
|
| 12022 |
"trial_name": null,
|
| 12023 |
"trial_params": null
|
| 12024 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6aa0642d46fe211727fefc5ac6e0bc28efa8511d1f0d9e5eee1987fa821600bc
|
| 3 |
size 449450757
|