Training in progress, step 55305, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 306619286
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17f126e47a46daca975d4610fafaec402d025df406bc1ef1091fb3bb633e95a0
|
3 |
size 306619286
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 919972410
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb3ad1610c33c264ac4cc15bacd96d9f3fa7c6e6119230a90ef857002b3788af
|
3 |
size 919972410
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4482438756e43b359aec133e7b920aff9ca62a0599618ff5293c529cfac8e76
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -38595,6 +38595,216 @@
|
|
38595 |
"eval_samples_per_second": 3138.58,
|
38596 |
"eval_steps_per_second": 49.042,
|
38597 |
"step": 55000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38598 |
}
|
38599 |
],
|
38600 |
"logging_steps": 10,
|
@@ -38609,12 +38819,12 @@
|
|
38609 |
"should_evaluate": false,
|
38610 |
"should_log": false,
|
38611 |
"should_save": true,
|
38612 |
-
"should_training_stop":
|
38613 |
},
|
38614 |
"attributes": {}
|
38615 |
}
|
38616 |
},
|
38617 |
-
"total_flos": 9.
|
38618 |
"train_batch_size": 8,
|
38619 |
"trial_name": null,
|
38620 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9999977398116359,
|
5 |
"eval_steps": 5000,
|
6 |
+
"global_step": 55305,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
38595 |
"eval_samples_per_second": 3138.58,
|
38596 |
"eval_steps_per_second": 49.042,
|
38597 |
"step": 55000
|
38598 |
+
},
|
38599 |
+
{
|
38600 |
+
"epoch": 0.994663695272364,
|
38601 |
+
"grad_norm": 40.125,
|
38602 |
+
"learning_rate": 1.5541611456888346e-06,
|
38603 |
+
"loss": 17.4001,
|
38604 |
+
"step": 55010
|
38605 |
+
},
|
38606 |
+
{
|
38607 |
+
"epoch": 0.9948445103414919,
|
38608 |
+
"grad_norm": 40.21875,
|
38609 |
+
"learning_rate": 1.5544436690747077e-06,
|
38610 |
+
"loss": 17.6567,
|
38611 |
+
"step": 55020
|
38612 |
+
},
|
38613 |
+
{
|
38614 |
+
"epoch": 0.9950253254106197,
|
38615 |
+
"grad_norm": 38.875,
|
38616 |
+
"learning_rate": 1.554726192460581e-06,
|
38617 |
+
"loss": 17.646,
|
38618 |
+
"step": 55030
|
38619 |
+
},
|
38620 |
+
{
|
38621 |
+
"epoch": 0.9952061404797475,
|
38622 |
+
"grad_norm": 42.15625,
|
38623 |
+
"learning_rate": 1.5550087158464543e-06,
|
38624 |
+
"loss": 17.3117,
|
38625 |
+
"step": 55040
|
38626 |
+
},
|
38627 |
+
{
|
38628 |
+
"epoch": 0.9953869555488755,
|
38629 |
+
"grad_norm": 35.9375,
|
38630 |
+
"learning_rate": 1.5552912392323276e-06,
|
38631 |
+
"loss": 17.9208,
|
38632 |
+
"step": 55050
|
38633 |
+
},
|
38634 |
+
{
|
38635 |
+
"epoch": 0.9955677706180033,
|
38636 |
+
"grad_norm": 37.78125,
|
38637 |
+
"learning_rate": 1.5555737626182007e-06,
|
38638 |
+
"loss": 17.497,
|
38639 |
+
"step": 55060
|
38640 |
+
},
|
38641 |
+
{
|
38642 |
+
"epoch": 0.9957485856871312,
|
38643 |
+
"grad_norm": 36.46875,
|
38644 |
+
"learning_rate": 1.555856286004074e-06,
|
38645 |
+
"loss": 17.4683,
|
38646 |
+
"step": 55070
|
38647 |
+
},
|
38648 |
+
{
|
38649 |
+
"epoch": 0.995929400756259,
|
38650 |
+
"grad_norm": 38.5625,
|
38651 |
+
"learning_rate": 1.5561388093899474e-06,
|
38652 |
+
"loss": 17.5277,
|
38653 |
+
"step": 55080
|
38654 |
+
},
|
38655 |
+
{
|
38656 |
+
"epoch": 0.9961102158253868,
|
38657 |
+
"grad_norm": 37.9375,
|
38658 |
+
"learning_rate": 1.5564213327758207e-06,
|
38659 |
+
"loss": 17.5301,
|
38660 |
+
"step": 55090
|
38661 |
+
},
|
38662 |
+
{
|
38663 |
+
"epoch": 0.9962910308945148,
|
38664 |
+
"grad_norm": 39.3125,
|
38665 |
+
"learning_rate": 1.556703856161694e-06,
|
38666 |
+
"loss": 17.7091,
|
38667 |
+
"step": 55100
|
38668 |
+
},
|
38669 |
+
{
|
38670 |
+
"epoch": 0.9964718459636426,
|
38671 |
+
"grad_norm": 42.59375,
|
38672 |
+
"learning_rate": 1.5569863795475671e-06,
|
38673 |
+
"loss": 17.6605,
|
38674 |
+
"step": 55110
|
38675 |
+
},
|
38676 |
+
{
|
38677 |
+
"epoch": 0.9966526610327705,
|
38678 |
+
"grad_norm": 40.71875,
|
38679 |
+
"learning_rate": 1.5572689029334404e-06,
|
38680 |
+
"loss": 17.718,
|
38681 |
+
"step": 55120
|
38682 |
+
},
|
38683 |
+
{
|
38684 |
+
"epoch": 0.9968334761018983,
|
38685 |
+
"grad_norm": 39.21875,
|
38686 |
+
"learning_rate": 1.5575514263193137e-06,
|
38687 |
+
"loss": 17.2612,
|
38688 |
+
"step": 55130
|
38689 |
+
},
|
38690 |
+
{
|
38691 |
+
"epoch": 0.9970142911710262,
|
38692 |
+
"grad_norm": 38.0625,
|
38693 |
+
"learning_rate": 1.557833949705187e-06,
|
38694 |
+
"loss": 17.609,
|
38695 |
+
"step": 55140
|
38696 |
+
},
|
38697 |
+
{
|
38698 |
+
"epoch": 0.9971951062401541,
|
38699 |
+
"grad_norm": 39.375,
|
38700 |
+
"learning_rate": 1.5581164730910604e-06,
|
38701 |
+
"loss": 18.0846,
|
38702 |
+
"step": 55150
|
38703 |
+
},
|
38704 |
+
{
|
38705 |
+
"epoch": 0.9973759213092819,
|
38706 |
+
"grad_norm": 36.09375,
|
38707 |
+
"learning_rate": 1.5583989964769337e-06,
|
38708 |
+
"loss": 17.3367,
|
38709 |
+
"step": 55160
|
38710 |
+
},
|
38711 |
+
{
|
38712 |
+
"epoch": 0.9975567363784098,
|
38713 |
+
"grad_norm": 40.6875,
|
38714 |
+
"learning_rate": 1.5586815198628066e-06,
|
38715 |
+
"loss": 17.6999,
|
38716 |
+
"step": 55170
|
38717 |
+
},
|
38718 |
+
{
|
38719 |
+
"epoch": 0.9977375514475376,
|
38720 |
+
"grad_norm": 38.96875,
|
38721 |
+
"learning_rate": 1.55896404324868e-06,
|
38722 |
+
"loss": 17.5,
|
38723 |
+
"step": 55180
|
38724 |
+
},
|
38725 |
+
{
|
38726 |
+
"epoch": 0.9979183665166655,
|
38727 |
+
"grad_norm": 39.375,
|
38728 |
+
"learning_rate": 1.5592465666345532e-06,
|
38729 |
+
"loss": 17.5552,
|
38730 |
+
"step": 55190
|
38731 |
+
},
|
38732 |
+
{
|
38733 |
+
"epoch": 0.9980991815857934,
|
38734 |
+
"grad_norm": 41.75,
|
38735 |
+
"learning_rate": 1.5595290900204265e-06,
|
38736 |
+
"loss": 17.587,
|
38737 |
+
"step": 55200
|
38738 |
+
},
|
38739 |
+
{
|
38740 |
+
"epoch": 0.9982799966549212,
|
38741 |
+
"grad_norm": 38.71875,
|
38742 |
+
"learning_rate": 1.5598116134062999e-06,
|
38743 |
+
"loss": 17.1768,
|
38744 |
+
"step": 55210
|
38745 |
+
},
|
38746 |
+
{
|
38747 |
+
"epoch": 0.9984608117240491,
|
38748 |
+
"grad_norm": 38.4375,
|
38749 |
+
"learning_rate": 1.5600941367921732e-06,
|
38750 |
+
"loss": 17.9381,
|
38751 |
+
"step": 55220
|
38752 |
+
},
|
38753 |
+
{
|
38754 |
+
"epoch": 0.9986416267931769,
|
38755 |
+
"grad_norm": 36.65625,
|
38756 |
+
"learning_rate": 1.5603766601780463e-06,
|
38757 |
+
"loss": 17.5928,
|
38758 |
+
"step": 55230
|
38759 |
+
},
|
38760 |
+
{
|
38761 |
+
"epoch": 0.9988224418623048,
|
38762 |
+
"grad_norm": 40.8125,
|
38763 |
+
"learning_rate": 1.5606591835639196e-06,
|
38764 |
+
"loss": 17.5531,
|
38765 |
+
"step": 55240
|
38766 |
+
},
|
38767 |
+
{
|
38768 |
+
"epoch": 0.9990032569314327,
|
38769 |
+
"grad_norm": 37.8125,
|
38770 |
+
"learning_rate": 1.560941706949793e-06,
|
38771 |
+
"loss": 17.3558,
|
38772 |
+
"step": 55250
|
38773 |
+
},
|
38774 |
+
{
|
38775 |
+
"epoch": 0.9991840720005605,
|
38776 |
+
"grad_norm": 38.90625,
|
38777 |
+
"learning_rate": 1.5612242303356662e-06,
|
38778 |
+
"loss": 18.1386,
|
38779 |
+
"step": 55260
|
38780 |
+
},
|
38781 |
+
{
|
38782 |
+
"epoch": 0.9993648870696884,
|
38783 |
+
"grad_norm": 38.28125,
|
38784 |
+
"learning_rate": 1.5615067537215395e-06,
|
38785 |
+
"loss": 17.7706,
|
38786 |
+
"step": 55270
|
38787 |
+
},
|
38788 |
+
{
|
38789 |
+
"epoch": 0.9995457021388162,
|
38790 |
+
"grad_norm": 41.40625,
|
38791 |
+
"learning_rate": 1.5617892771074129e-06,
|
38792 |
+
"loss": 17.5942,
|
38793 |
+
"step": 55280
|
38794 |
+
},
|
38795 |
+
{
|
38796 |
+
"epoch": 0.9997265172079441,
|
38797 |
+
"grad_norm": 39.09375,
|
38798 |
+
"learning_rate": 1.5620718004932858e-06,
|
38799 |
+
"loss": 17.6764,
|
38800 |
+
"step": 55290
|
38801 |
+
},
|
38802 |
+
{
|
38803 |
+
"epoch": 0.999907332277072,
|
38804 |
+
"grad_norm": 38.65625,
|
38805 |
+
"learning_rate": 1.562354323879159e-06,
|
38806 |
+
"loss": 17.3797,
|
38807 |
+
"step": 55300
|
38808 |
}
|
38809 |
],
|
38810 |
"logging_steps": 10,
|
|
|
38819 |
"should_evaluate": false,
|
38820 |
"should_log": false,
|
38821 |
"should_save": true,
|
38822 |
+
"should_training_stop": true
|
38823 |
},
|
38824 |
"attributes": {}
|
38825 |
}
|
38826 |
},
|
38827 |
+
"total_flos": 9.653613618876383e+18,
|
38828 |
"train_batch_size": 8,
|
38829 |
"trial_name": null,
|
38830 |
"trial_params": null
|