Training in progress, step 720000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893441093
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66a4dbe9eca38a1792482b27bee5680ed76ff9b4fd9c693743e32db4ef9e8647
|
3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449450757
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c1af069a55aa141dcc761c0323c960402d5bdd923f0a54d0011b64f759ce573
|
3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b66387fbbba596b80573df493f29c352b88c96c68303d4022fc8f1e14c19ee9e
|
3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd40898eaf2bb8f0e6b4793242d3113eb1c145ed61a59dba0b51391b3bf76148
|
3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d33131db4d0dac1ab66ab66d875733bb83b9d53bdcdbb76cdb67e79eb395bffe
|
3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b4ac527158b6aa910da1978f5ab170d69172ed0e22a278e864ebd1fa5d08f7e
|
3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c41eb120388058e9e86dafd9920ba056d2f7de5234fa0df636e9377bd5cd58c
|
3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a27da1b3b783fed75c82831476e53d38ef818a90b5db4dbe8bd2147c26f1d00
|
3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc1301019ee94a4e73634cfb2e922704c945921ceecc63c3c74ee79aae0fb318
|
3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14583
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f15d3ae110e1b804d6f357797df2bd20ffa7af4cc5c48ef5f3805abb21eebf52
|
3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2712ef8beaf45f8ea70ea16f7da83a9783b51ec4e90a9cb114e7f3a2c3044b7
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -8526,11 +8526,131 @@
|
|
8526 |
"learning_rate": 3.979699361219395e-05,
|
8527 |
"loss": 0.2956,
|
8528 |
"step": 710000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8529 |
}
|
8530 |
],
|
8531 |
"max_steps": 1000000,
|
8532 |
"num_train_epochs": 2,
|
8533 |
-
"total_flos": 4.
|
8534 |
"trial_name": null,
|
8535 |
"trial_params": null
|
8536 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.4393148861142095,
|
5 |
+
"global_step": 720000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
8526 |
"learning_rate": 3.979699361219395e-05,
|
8527 |
"loss": 0.2956,
|
8528 |
"step": 710000
|
8529 |
+
},
|
8530 |
+
{
|
8531 |
+
"epoch": 1.42,
|
8532 |
+
"learning_rate": 3.9702298612493816e-05,
|
8533 |
+
"loss": 0.2958,
|
8534 |
+
"step": 710500
|
8535 |
+
},
|
8536 |
+
{
|
8537 |
+
"epoch": 1.42,
|
8538 |
+
"learning_rate": 3.960771378517049e-05,
|
8539 |
+
"loss": 0.296,
|
8540 |
+
"step": 711000
|
8541 |
+
},
|
8542 |
+
{
|
8543 |
+
"epoch": 1.42,
|
8544 |
+
"learning_rate": 3.951323938881533e-05,
|
8545 |
+
"loss": 0.2953,
|
8546 |
+
"step": 711500
|
8547 |
+
},
|
8548 |
+
{
|
8549 |
+
"epoch": 1.42,
|
8550 |
+
"learning_rate": 3.941887568171766e-05,
|
8551 |
+
"loss": 0.2956,
|
8552 |
+
"step": 712000
|
8553 |
+
},
|
8554 |
+
{
|
8555 |
+
"epoch": 1.42,
|
8556 |
+
"learning_rate": 3.9324622921864323e-05,
|
8557 |
+
"loss": 0.2963,
|
8558 |
+
"step": 712500
|
8559 |
+
},
|
8560 |
+
{
|
8561 |
+
"epoch": 1.43,
|
8562 |
+
"learning_rate": 3.923048136693873e-05,
|
8563 |
+
"loss": 0.2951,
|
8564 |
+
"step": 713000
|
8565 |
+
},
|
8566 |
+
{
|
8567 |
+
"epoch": 1.43,
|
8568 |
+
"learning_rate": 3.913645127432028e-05,
|
8569 |
+
"loss": 0.2957,
|
8570 |
+
"step": 713500
|
8571 |
+
},
|
8572 |
+
{
|
8573 |
+
"epoch": 1.43,
|
8574 |
+
"learning_rate": 3.904253290108369e-05,
|
8575 |
+
"loss": 0.2953,
|
8576 |
+
"step": 714000
|
8577 |
+
},
|
8578 |
+
{
|
8579 |
+
"epoch": 1.43,
|
8580 |
+
"learning_rate": 3.8948726503998176e-05,
|
8581 |
+
"loss": 0.2954,
|
8582 |
+
"step": 714500
|
8583 |
+
},
|
8584 |
+
{
|
8585 |
+
"epoch": 1.43,
|
8586 |
+
"learning_rate": 3.885503233952689e-05,
|
8587 |
+
"loss": 0.2958,
|
8588 |
+
"step": 715000
|
8589 |
+
},
|
8590 |
+
{
|
8591 |
+
"epoch": 1.43,
|
8592 |
+
"learning_rate": 3.876145066382606e-05,
|
8593 |
+
"loss": 0.2948,
|
8594 |
+
"step": 715500
|
8595 |
+
},
|
8596 |
+
{
|
8597 |
+
"epoch": 1.43,
|
8598 |
+
"learning_rate": 3.86679817327444e-05,
|
8599 |
+
"loss": 0.2953,
|
8600 |
+
"step": 716000
|
8601 |
+
},
|
8602 |
+
{
|
8603 |
+
"epoch": 1.43,
|
8604 |
+
"learning_rate": 3.857462580182245e-05,
|
8605 |
+
"loss": 0.2952,
|
8606 |
+
"step": 716500
|
8607 |
+
},
|
8608 |
+
{
|
8609 |
+
"epoch": 1.43,
|
8610 |
+
"learning_rate": 3.848138312629171e-05,
|
8611 |
+
"loss": 0.2953,
|
8612 |
+
"step": 717000
|
8613 |
+
},
|
8614 |
+
{
|
8615 |
+
"epoch": 1.43,
|
8616 |
+
"learning_rate": 3.838825396107415e-05,
|
8617 |
+
"loss": 0.2962,
|
8618 |
+
"step": 717500
|
8619 |
+
},
|
8620 |
+
{
|
8621 |
+
"epoch": 1.44,
|
8622 |
+
"learning_rate": 3.8295238560781317e-05,
|
8623 |
+
"loss": 0.2957,
|
8624 |
+
"step": 718000
|
8625 |
+
},
|
8626 |
+
{
|
8627 |
+
"epoch": 1.44,
|
8628 |
+
"learning_rate": 3.820233717971374e-05,
|
8629 |
+
"loss": 0.2955,
|
8630 |
+
"step": 718500
|
8631 |
+
},
|
8632 |
+
{
|
8633 |
+
"epoch": 1.44,
|
8634 |
+
"learning_rate": 3.810955007186029e-05,
|
8635 |
+
"loss": 0.2953,
|
8636 |
+
"step": 719000
|
8637 |
+
},
|
8638 |
+
{
|
8639 |
+
"epoch": 1.44,
|
8640 |
+
"learning_rate": 3.801687749089737e-05,
|
8641 |
+
"loss": 0.295,
|
8642 |
+
"step": 719500
|
8643 |
+
},
|
8644 |
+
{
|
8645 |
+
"epoch": 1.44,
|
8646 |
+
"learning_rate": 3.792431969018824e-05,
|
8647 |
+
"loss": 0.2951,
|
8648 |
+
"step": 720000
|
8649 |
}
|
8650 |
],
|
8651 |
"max_steps": 1000000,
|
8652 |
"num_train_epochs": 2,
|
8653 |
+
"total_flos": 4.86771334519035e+22,
|
8654 |
"trial_name": null,
|
8655 |
"trial_params": null
|
8656 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449450757
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c1af069a55aa141dcc761c0323c960402d5bdd923f0a54d0011b64f759ce573
|
3 |
size 449450757
|