Training in progress, step 650000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa92f0e8b9e69e1553d8b13a15bb13ce9949137fccf0723c1cf598ce83f198b0
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b48d3a0bd417c9af2fc7e229c4f39167675dca2415013cbeac1e6dc95824f669
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e055e902e7363a164e2d5682ba553c77cec859581fb13cd45150bf96f1a362c
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:436765cf8b9dcd2a96469489c52342b1fc2a8edf0ab7af7b53c1cbd1ff9932a6
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ee8f18ff63c361ce90d137b232b2607444382342857d71c811d9abe82e89eeb
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:106d03af9d874407e7a0086ddb94edb099a500fa25e66c11a4dedce8d45fc7e2
|
| 3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b544465929a51046e9a52e629bd463b9098d69ff8cc60ad2e18003214dae8858
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -4742,11 +4742,85 @@
|
|
| 4742 |
"eval_samples_per_second": 993.184,
|
| 4743 |
"eval_steps_per_second": 15.891,
|
| 4744 |
"step": 640000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4745 |
}
|
| 4746 |
],
|
| 4747 |
"max_steps": 1000000,
|
| 4748 |
"num_train_epochs": 16,
|
| 4749 |
-
"total_flos": 4.
|
| 4750 |
"trial_name": null,
|
| 4751 |
"trial_params": null
|
| 4752 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.925634095316628,
|
| 5 |
+
"global_step": 650000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 4742 |
"eval_samples_per_second": 993.184,
|
| 4743 |
"eval_steps_per_second": 15.891,
|
| 4744 |
"step": 640000
|
| 4745 |
+
},
|
| 4746 |
+
{
|
| 4747 |
+
"epoch": 9.79,
|
| 4748 |
+
"learning_rate": 5.3801387994131576e-05,
|
| 4749 |
+
"loss": 0.2501,
|
| 4750 |
+
"step": 641000
|
| 4751 |
+
},
|
| 4752 |
+
{
|
| 4753 |
+
"epoch": 9.8,
|
| 4754 |
+
"learning_rate": 5.358686991636209e-05,
|
| 4755 |
+
"loss": 0.2503,
|
| 4756 |
+
"step": 642000
|
| 4757 |
+
},
|
| 4758 |
+
{
|
| 4759 |
+
"epoch": 9.82,
|
| 4760 |
+
"learning_rate": 5.3372640688351476e-05,
|
| 4761 |
+
"loss": 0.2505,
|
| 4762 |
+
"step": 643000
|
| 4763 |
+
},
|
| 4764 |
+
{
|
| 4765 |
+
"epoch": 9.83,
|
| 4766 |
+
"learning_rate": 5.315870265287618e-05,
|
| 4767 |
+
"loss": 0.2502,
|
| 4768 |
+
"step": 644000
|
| 4769 |
+
},
|
| 4770 |
+
{
|
| 4771 |
+
"epoch": 9.85,
|
| 4772 |
+
"learning_rate": 5.294505814952835e-05,
|
| 4773 |
+
"loss": 0.2501,
|
| 4774 |
+
"step": 645000
|
| 4775 |
+
},
|
| 4776 |
+
{
|
| 4777 |
+
"epoch": 9.85,
|
| 4778 |
+
"eval_runtime": 1.0688,
|
| 4779 |
+
"eval_samples_per_second": 935.652,
|
| 4780 |
+
"eval_steps_per_second": 14.97,
|
| 4781 |
+
"step": 645000
|
| 4782 |
+
},
|
| 4783 |
+
{
|
| 4784 |
+
"epoch": 9.86,
|
| 4785 |
+
"learning_rate": 5.2731709514689995e-05,
|
| 4786 |
+
"loss": 0.2502,
|
| 4787 |
+
"step": 646000
|
| 4788 |
+
},
|
| 4789 |
+
{
|
| 4790 |
+
"epoch": 9.88,
|
| 4791 |
+
"learning_rate": 5.25186590815076e-05,
|
| 4792 |
+
"loss": 0.2501,
|
| 4793 |
+
"step": 647000
|
| 4794 |
+
},
|
| 4795 |
+
{
|
| 4796 |
+
"epoch": 9.9,
|
| 4797 |
+
"learning_rate": 5.2305909179866635e-05,
|
| 4798 |
+
"loss": 0.2495,
|
| 4799 |
+
"step": 648000
|
| 4800 |
+
},
|
| 4801 |
+
{
|
| 4802 |
+
"epoch": 9.91,
|
| 4803 |
+
"learning_rate": 5.209346213636584e-05,
|
| 4804 |
+
"loss": 0.2498,
|
| 4805 |
+
"step": 649000
|
| 4806 |
+
},
|
| 4807 |
+
{
|
| 4808 |
+
"epoch": 9.93,
|
| 4809 |
+
"learning_rate": 5.188132027429215e-05,
|
| 4810 |
+
"loss": 0.2495,
|
| 4811 |
+
"step": 650000
|
| 4812 |
+
},
|
| 4813 |
+
{
|
| 4814 |
+
"epoch": 9.93,
|
| 4815 |
+
"eval_runtime": 1.0361,
|
| 4816 |
+
"eval_samples_per_second": 965.164,
|
| 4817 |
+
"eval_steps_per_second": 15.443,
|
| 4818 |
+
"step": 650000
|
| 4819 |
}
|
| 4820 |
],
|
| 4821 |
"max_steps": 1000000,
|
| 4822 |
"num_train_epochs": 16,
|
| 4823 |
+
"total_flos": 4.556511053359226e+22,
|
| 4824 |
"trial_name": null,
|
| 4825 |
"trial_params": null
|
| 4826 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b48d3a0bd417c9af2fc7e229c4f39167675dca2415013cbeac1e6dc95824f669
|
| 3 |
size 449471589
|