Training in progress, step 1000000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +131 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893441093
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4ad35154e8068fb3a11d53f7dbc996f2907fbf771e6b088b60198e7662bd085
|
3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449450757
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6aa0642d46fe211727fefc5ac6e0bc28efa8511d1f0d9e5eee1987fa821600bc
|
3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffef422be2539ff2f38d9ef02e7d220a1b72474f08bb4e90499c7aaeff187041
|
3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:463b459e546b940eb314ddfec0443ea23a3be76cf5287309632d30e7fc449578
|
3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dd41b22e2b316b6844ef149b2ed9245b6df6b6b1ef3eaa82e48507bd8f6361f
|
3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b5227791b50d3a4ce6a4303e20bb6f7a630db623b90933290f4b8be2d663b28
|
3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8b3d25a9338c36376e5ff28e0294df3d7c9ae55a2257f11d59bab9a6b7bff56
|
3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:850d36f688ac21bc9b8eca3e25be925153622f77bb798cc74ba77c97e9987016
|
3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bae6388e81de2bc43d53517beccd69cbe0291d07b9778d77d34130d2c4dd9ade
|
3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2368b26770136755d6ac3415a40478eaa9855ef850310208946463b5a771ae00
|
3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3af8ce8ebbd21d4c5d2eeb030d14d5a15759925617012e21e6e139dfa9889ef
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -11886,11 +11886,139 @@
|
|
11886 |
"learning_rate": 1.0038271986135177e-05,
|
11887 |
"loss": 0.2827,
|
11888 |
"step": 990000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11889 |
}
|
11890 |
],
|
11891 |
"max_steps": 1000000,
|
11892 |
"num_train_epochs": 2,
|
11893 |
-
"total_flos": 6.
|
11894 |
"trial_name": null,
|
11895 |
"trial_params": null
|
11896 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.538101577634107,
|
5 |
+
"global_step": 1000000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
11886 |
"learning_rate": 1.0038271986135177e-05,
|
11887 |
"loss": 0.2827,
|
11888 |
"step": 990000
|
11889 |
+
},
|
11890 |
+
{
|
11891 |
+
"epoch": 1.52,
|
11892 |
+
"learning_rate": 1.003454077439879e-05,
|
11893 |
+
"loss": 0.2826,
|
11894 |
+
"step": 990500
|
11895 |
+
},
|
11896 |
+
{
|
11897 |
+
"epoch": 1.52,
|
11898 |
+
"learning_rate": 1.0031000845556304e-05,
|
11899 |
+
"loss": 0.2828,
|
11900 |
+
"step": 991000
|
11901 |
+
},
|
11902 |
+
{
|
11903 |
+
"epoch": 1.52,
|
11904 |
+
"learning_rate": 1.0027652209285743e-05,
|
11905 |
+
"loss": 0.2829,
|
11906 |
+
"step": 991500
|
11907 |
+
},
|
11908 |
+
{
|
11909 |
+
"epoch": 1.52,
|
11910 |
+
"learning_rate": 1.0024494874742152e-05,
|
11911 |
+
"loss": 0.2832,
|
11912 |
+
"step": 992000
|
11913 |
+
},
|
11914 |
+
{
|
11915 |
+
"epoch": 1.52,
|
11916 |
+
"learning_rate": 1.0021528850557572e-05,
|
11917 |
+
"loss": 0.283,
|
11918 |
+
"step": 992500
|
11919 |
+
},
|
11920 |
+
{
|
11921 |
+
"epoch": 1.52,
|
11922 |
+
"learning_rate": 1.0018754144840986e-05,
|
11923 |
+
"loss": 0.2821,
|
11924 |
+
"step": 993000
|
11925 |
+
},
|
11926 |
+
{
|
11927 |
+
"epoch": 1.53,
|
11928 |
+
"learning_rate": 1.0016170765178345e-05,
|
11929 |
+
"loss": 0.2822,
|
11930 |
+
"step": 993500
|
11931 |
+
},
|
11932 |
+
{
|
11933 |
+
"epoch": 1.53,
|
11934 |
+
"learning_rate": 1.0013778718632507e-05,
|
11935 |
+
"loss": 0.2826,
|
11936 |
+
"step": 994000
|
11937 |
+
},
|
11938 |
+
{
|
11939 |
+
"epoch": 1.53,
|
11940 |
+
"learning_rate": 1.0011578011743233e-05,
|
11941 |
+
"loss": 0.2828,
|
11942 |
+
"step": 994500
|
11943 |
+
},
|
11944 |
+
{
|
11945 |
+
"epoch": 1.53,
|
11946 |
+
"learning_rate": 1.000956865052717e-05,
|
11947 |
+
"loss": 0.2827,
|
11948 |
+
"step": 995000
|
11949 |
+
},
|
11950 |
+
{
|
11951 |
+
"epoch": 1.53,
|
11952 |
+
"learning_rate": 1.0007750640477843e-05,
|
11953 |
+
"loss": 0.2829,
|
11954 |
+
"step": 995500
|
11955 |
+
},
|
11956 |
+
{
|
11957 |
+
"epoch": 1.53,
|
11958 |
+
"learning_rate": 1.0006123986565623e-05,
|
11959 |
+
"loss": 0.2829,
|
11960 |
+
"step": 996000
|
11961 |
+
},
|
11962 |
+
{
|
11963 |
+
"epoch": 1.53,
|
11964 |
+
"learning_rate": 1.0004688693237708e-05,
|
11965 |
+
"loss": 0.2832,
|
11966 |
+
"step": 996500
|
11967 |
+
},
|
11968 |
+
{
|
11969 |
+
"epoch": 1.53,
|
11970 |
+
"learning_rate": 1.0003444764418138e-05,
|
11971 |
+
"loss": 0.2835,
|
11972 |
+
"step": 997000
|
11973 |
+
},
|
11974 |
+
{
|
11975 |
+
"epoch": 1.53,
|
11976 |
+
"learning_rate": 1.0002392203507781e-05,
|
11977 |
+
"loss": 0.2832,
|
11978 |
+
"step": 997500
|
11979 |
+
},
|
11980 |
+
{
|
11981 |
+
"epoch": 1.53,
|
11982 |
+
"learning_rate": 1.000153101338428e-05,
|
11983 |
+
"loss": 0.2826,
|
11984 |
+
"step": 998000
|
11985 |
+
},
|
11986 |
+
{
|
11987 |
+
"epoch": 1.54,
|
11988 |
+
"learning_rate": 1.00008611964021e-05,
|
11989 |
+
"loss": 0.283,
|
11990 |
+
"step": 998500
|
11991 |
+
},
|
11992 |
+
{
|
11993 |
+
"epoch": 1.54,
|
11994 |
+
"learning_rate": 1.00003827543925e-05,
|
11995 |
+
"loss": 0.2832,
|
11996 |
+
"step": 999000
|
11997 |
+
},
|
11998 |
+
{
|
11999 |
+
"epoch": 1.54,
|
12000 |
+
"learning_rate": 1.0000095688663532e-05,
|
12001 |
+
"loss": 0.2827,
|
12002 |
+
"step": 999500
|
12003 |
+
},
|
12004 |
+
{
|
12005 |
+
"epoch": 1.54,
|
12006 |
+
"learning_rate": 1e-05,
|
12007 |
+
"loss": 0.2828,
|
12008 |
+
"step": 1000000
|
12009 |
+
},
|
12010 |
+
{
|
12011 |
+
"epoch": 1.54,
|
12012 |
+
"eval_loss": 0.3599591553211212,
|
12013 |
+
"eval_runtime": 50.3004,
|
12014 |
+
"eval_samples_per_second": 2519.919,
|
12015 |
+
"eval_steps_per_second": 9.861,
|
12016 |
+
"step": 1000000
|
12017 |
}
|
12018 |
],
|
12019 |
"max_steps": 1000000,
|
12020 |
"num_train_epochs": 2,
|
12021 |
+
"total_flos": 6.7607019944302705e+22,
|
12022 |
"trial_name": null,
|
12023 |
"trial_params": null
|
12024 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449450757
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6aa0642d46fe211727fefc5ac6e0bc28efa8511d1f0d9e5eee1987fa821600bc
|
3 |
size 449450757
|