Training in progress, step 1050, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 83945296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33b2440d2694321ee888d51ec5666ec9e866384a438fe8a89aab4937822170b1
|
3 |
size 83945296
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 168149074
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d50e471a33e6d700094dc5bd8ce2bca4a2efdc1b1d33b74f03e583750f796133
|
3 |
size 168149074
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ba78f70e8450cf0ec82402ec5f695006e3cd4c321e54ba57e606421f97e4957
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eec984887369339384df18a86847622608ab8c12ab961dc4703f600d8c64c21c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.636073112487793,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-900",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -693,6 +693,119 @@
|
|
693 |
"eval_samples_per_second": 14.415,
|
694 |
"eval_steps_per_second": 1.818,
|
695 |
"step": 900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
696 |
}
|
697 |
],
|
698 |
"logging_steps": 10,
|
@@ -707,7 +820,7 @@
|
|
707 |
"early_stopping_threshold": 0.0
|
708 |
},
|
709 |
"attributes": {
|
710 |
-
"early_stopping_patience_counter":
|
711 |
}
|
712 |
},
|
713 |
"TrainerControl": {
|
@@ -721,7 +834,7 @@
|
|
721 |
"attributes": {}
|
722 |
}
|
723 |
},
|
724 |
-
"total_flos": 1.
|
725 |
"train_batch_size": 8,
|
726 |
"trial_name": null,
|
727 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.636073112487793,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-900",
|
4 |
+
"epoch": 2.278893109061313,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 1050,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
693 |
"eval_samples_per_second": 14.415,
|
694 |
"eval_steps_per_second": 1.818,
|
695 |
"step": 900
|
696 |
+
},
|
697 |
+
{
|
698 |
+
"epoch": 1.9750406945198047,
|
699 |
+
"grad_norm": 33.91230773925781,
|
700 |
+
"learning_rate": 1.067489489247974e-05,
|
701 |
+
"loss": 1.9067,
|
702 |
+
"step": 910
|
703 |
+
},
|
704 |
+
{
|
705 |
+
"epoch": 1.9967444384156265,
|
706 |
+
"grad_norm": 28.854825973510742,
|
707 |
+
"learning_rate": 1.036474508437579e-05,
|
708 |
+
"loss": 2.3281,
|
709 |
+
"step": 920
|
710 |
+
},
|
711 |
+
{
|
712 |
+
"epoch": 2.018448182311449,
|
713 |
+
"grad_norm": 18.260103225708008,
|
714 |
+
"learning_rate": 1.0056771083298894e-05,
|
715 |
+
"loss": 2.1337,
|
716 |
+
"step": 930
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 2.0401519262072707,
|
720 |
+
"grad_norm": 19.979646682739258,
|
721 |
+
"learning_rate": 9.751117453465674e-06,
|
722 |
+
"loss": 1.6345,
|
723 |
+
"step": 940
|
724 |
+
},
|
725 |
+
{
|
726 |
+
"epoch": 2.0618556701030926,
|
727 |
+
"grad_norm": 22.703859329223633,
|
728 |
+
"learning_rate": 9.447927669901284e-06,
|
729 |
+
"loss": 1.634,
|
730 |
+
"step": 950
|
731 |
+
},
|
732 |
+
{
|
733 |
+
"epoch": 2.083559413998915,
|
734 |
+
"grad_norm": 21.72873878479004,
|
735 |
+
"learning_rate": 9.147344051091682e-06,
|
736 |
+
"loss": 1.5881,
|
737 |
+
"step": 960
|
738 |
+
},
|
739 |
+
{
|
740 |
+
"epoch": 2.1052631578947367,
|
741 |
+
"grad_norm": 31.957630157470703,
|
742 |
+
"learning_rate": 8.849507692178758e-06,
|
743 |
+
"loss": 1.3856,
|
744 |
+
"step": 970
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"epoch": 2.126966901790559,
|
748 |
+
"grad_norm": 21.75389862060547,
|
749 |
+
"learning_rate": 8.554558398729726e-06,
|
750 |
+
"loss": 1.9382,
|
751 |
+
"step": 980
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"epoch": 2.148670645686381,
|
755 |
+
"grad_norm": 23.939027786254883,
|
756 |
+
"learning_rate": 8.262634621111819e-06,
|
757 |
+
"loss": 1.8201,
|
758 |
+
"step": 990
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 2.1703743895822027,
|
762 |
+
"grad_norm": 21.948673248291016,
|
763 |
+
"learning_rate": 7.97387338950315e-06,
|
764 |
+
"loss": 1.5186,
|
765 |
+
"step": 1000
|
766 |
+
},
|
767 |
+
{
|
768 |
+
"epoch": 2.192078133478025,
|
769 |
+
"grad_norm": 26.39198112487793,
|
770 |
+
"learning_rate": 7.688410249570214e-06,
|
771 |
+
"loss": 1.4693,
|
772 |
+
"step": 1010
|
773 |
+
},
|
774 |
+
{
|
775 |
+
"epoch": 2.213781877373847,
|
776 |
+
"grad_norm": 43.11570358276367,
|
777 |
+
"learning_rate": 7.4063791988421905e-06,
|
778 |
+
"loss": 1.3836,
|
779 |
+
"step": 1020
|
780 |
+
},
|
781 |
+
{
|
782 |
+
"epoch": 2.235485621269669,
|
783 |
+
"grad_norm": 21.80547523498535,
|
784 |
+
"learning_rate": 7.127912623811993e-06,
|
785 |
+
"loss": 1.9962,
|
786 |
+
"step": 1030
|
787 |
+
},
|
788 |
+
{
|
789 |
+
"epoch": 2.257189365165491,
|
790 |
+
"grad_norm": 20.71767234802246,
|
791 |
+
"learning_rate": 6.853141237793507e-06,
|
792 |
+
"loss": 1.6606,
|
793 |
+
"step": 1040
|
794 |
+
},
|
795 |
+
{
|
796 |
+
"epoch": 2.278893109061313,
|
797 |
+
"grad_norm": 21.81035614013672,
|
798 |
+
"learning_rate": 6.582194019564266e-06,
|
799 |
+
"loss": 1.4825,
|
800 |
+
"step": 1050
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"epoch": 2.278893109061313,
|
804 |
+
"eval_loss": 0.6611286997795105,
|
805 |
+
"eval_runtime": 53.8731,
|
806 |
+
"eval_samples_per_second": 14.423,
|
807 |
+
"eval_steps_per_second": 1.819,
|
808 |
+
"step": 1050
|
809 |
}
|
810 |
],
|
811 |
"logging_steps": 10,
|
|
|
820 |
"early_stopping_threshold": 0.0
|
821 |
},
|
822 |
"attributes": {
|
823 |
+
"early_stopping_patience_counter": 1
|
824 |
}
|
825 |
},
|
826 |
"TrainerControl": {
|
|
|
834 |
"attributes": {}
|
835 |
}
|
836 |
},
|
837 |
+
"total_flos": 1.4722426201964544e+18,
|
838 |
"train_batch_size": 8,
|
839 |
"trial_name": null,
|
840 |
"trial_params": null
|