Training in progress, step 130000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step130000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step130000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3181ba5c18e42ea6a2e47ced69a1726cab9aedbdb76f8d38d78b6f84e74b0eb
|
3 |
size 42002584
|
last-checkpoint/global_step130000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f207ff949e4cab55b34cb9dc8ab7a680dec693cded77cd6c6fa0c3f5916bc33a
|
3 |
+
size 251710672
|
last-checkpoint/global_step130000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cbfe59546c9af41b4e786372538a8ddbc00c63ed88cc4bea9a66ac90a180aa1
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step130000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a6411ad924de80114eb71f3836dc652ae4d7de32682ad3c82b390b42d21c9d5
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 3.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -16814,6 +16814,1406 @@
|
|
16814 |
"learning_rate": 0.0001657158040903091,
|
16815 |
"loss": 1.2539,
|
16816 |
"step": 120000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16817 |
}
|
16818 |
],
|
16819 |
"logging_steps": 50,
|
@@ -16833,7 +18233,7 @@
|
|
16833 |
"attributes": {}
|
16834 |
}
|
16835 |
},
|
16836 |
-
"total_flos": 3.
|
16837 |
"train_batch_size": 2,
|
16838 |
"trial_name": null,
|
16839 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.881408055414564,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 130000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
16814 |
"learning_rate": 0.0001657158040903091,
|
16815 |
"loss": 1.2539,
|
16816 |
"step": 120000
|
16817 |
+
},
|
16818 |
+
{
|
16819 |
+
"epoch": 3.584331054250142,
|
16820 |
+
"grad_norm": 6.447416305541992,
|
16821 |
+
"learning_rate": 0.00016570151829439027,
|
16822 |
+
"loss": 1.2407,
|
16823 |
+
"step": 120050
|
16824 |
+
},
|
16825 |
+
{
|
16826 |
+
"epoch": 3.5858239035022246,
|
16827 |
+
"grad_norm": 4.890771389007568,
|
16828 |
+
"learning_rate": 0.00016568723249847143,
|
16829 |
+
"loss": 1.282,
|
16830 |
+
"step": 120100
|
16831 |
+
},
|
16832 |
+
{
|
16833 |
+
"epoch": 3.587316752754307,
|
16834 |
+
"grad_norm": 3.6665565967559814,
|
16835 |
+
"learning_rate": 0.0001656729467025526,
|
16836 |
+
"loss": 1.269,
|
16837 |
+
"step": 120150
|
16838 |
+
},
|
16839 |
+
{
|
16840 |
+
"epoch": 3.5888096020063895,
|
16841 |
+
"grad_norm": 3.8311381340026855,
|
16842 |
+
"learning_rate": 0.00016565866090663376,
|
16843 |
+
"loss": 1.2546,
|
16844 |
+
"step": 120200
|
16845 |
+
},
|
16846 |
+
{
|
16847 |
+
"epoch": 3.590302451258472,
|
16848 |
+
"grad_norm": 3.9717676639556885,
|
16849 |
+
"learning_rate": 0.00016564437511071493,
|
16850 |
+
"loss": 1.2402,
|
16851 |
+
"step": 120250
|
16852 |
+
},
|
16853 |
+
{
|
16854 |
+
"epoch": 3.5917953005105545,
|
16855 |
+
"grad_norm": 5.8756585121154785,
|
16856 |
+
"learning_rate": 0.0001656300893147961,
|
16857 |
+
"loss": 1.2934,
|
16858 |
+
"step": 120300
|
16859 |
+
},
|
16860 |
+
{
|
16861 |
+
"epoch": 3.593288149762637,
|
16862 |
+
"grad_norm": 4.234692573547363,
|
16863 |
+
"learning_rate": 0.00016561580351887725,
|
16864 |
+
"loss": 1.2773,
|
16865 |
+
"step": 120350
|
16866 |
+
},
|
16867 |
+
{
|
16868 |
+
"epoch": 3.5947809990147195,
|
16869 |
+
"grad_norm": 5.237219333648682,
|
16870 |
+
"learning_rate": 0.00016560151772295842,
|
16871 |
+
"loss": 1.2638,
|
16872 |
+
"step": 120400
|
16873 |
+
},
|
16874 |
+
{
|
16875 |
+
"epoch": 3.5962738482668017,
|
16876 |
+
"grad_norm": 3.977701187133789,
|
16877 |
+
"learning_rate": 0.0001655872319270396,
|
16878 |
+
"loss": 1.2452,
|
16879 |
+
"step": 120450
|
16880 |
+
},
|
16881 |
+
{
|
16882 |
+
"epoch": 3.5977666975188844,
|
16883 |
+
"grad_norm": 4.238033294677734,
|
16884 |
+
"learning_rate": 0.00016557294613112075,
|
16885 |
+
"loss": 1.2813,
|
16886 |
+
"step": 120500
|
16887 |
+
},
|
16888 |
+
{
|
16889 |
+
"epoch": 3.599259546770967,
|
16890 |
+
"grad_norm": 5.174859523773193,
|
16891 |
+
"learning_rate": 0.00016555866033520194,
|
16892 |
+
"loss": 1.2477,
|
16893 |
+
"step": 120550
|
16894 |
+
},
|
16895 |
+
{
|
16896 |
+
"epoch": 3.60075239602305,
|
16897 |
+
"grad_norm": 4.832043647766113,
|
16898 |
+
"learning_rate": 0.00016554437453928308,
|
16899 |
+
"loss": 1.2081,
|
16900 |
+
"step": 120600
|
16901 |
+
},
|
16902 |
+
{
|
16903 |
+
"epoch": 3.602245245275132,
|
16904 |
+
"grad_norm": 5.991222858428955,
|
16905 |
+
"learning_rate": 0.00016553008874336427,
|
16906 |
+
"loss": 1.2275,
|
16907 |
+
"step": 120650
|
16908 |
+
},
|
16909 |
+
{
|
16910 |
+
"epoch": 3.6037380945272144,
|
16911 |
+
"grad_norm": 5.4362101554870605,
|
16912 |
+
"learning_rate": 0.0001655158029474454,
|
16913 |
+
"loss": 1.262,
|
16914 |
+
"step": 120700
|
16915 |
+
},
|
16916 |
+
{
|
16917 |
+
"epoch": 3.605230943779297,
|
16918 |
+
"grad_norm": 5.659782409667969,
|
16919 |
+
"learning_rate": 0.0001655015171515266,
|
16920 |
+
"loss": 1.3115,
|
16921 |
+
"step": 120750
|
16922 |
+
},
|
16923 |
+
{
|
16924 |
+
"epoch": 3.60672379303138,
|
16925 |
+
"grad_norm": 4.6319146156311035,
|
16926 |
+
"learning_rate": 0.00016548723135560776,
|
16927 |
+
"loss": 1.3261,
|
16928 |
+
"step": 120800
|
16929 |
+
},
|
16930 |
+
{
|
16931 |
+
"epoch": 3.608216642283462,
|
16932 |
+
"grad_norm": 4.129281520843506,
|
16933 |
+
"learning_rate": 0.00016547294555968893,
|
16934 |
+
"loss": 1.3001,
|
16935 |
+
"step": 120850
|
16936 |
+
},
|
16937 |
+
{
|
16938 |
+
"epoch": 3.6097094915355448,
|
16939 |
+
"grad_norm": 4.629218101501465,
|
16940 |
+
"learning_rate": 0.0001654586597637701,
|
16941 |
+
"loss": 1.2856,
|
16942 |
+
"step": 120900
|
16943 |
+
},
|
16944 |
+
{
|
16945 |
+
"epoch": 3.611202340787627,
|
16946 |
+
"grad_norm": 4.741670608520508,
|
16947 |
+
"learning_rate": 0.00016544437396785126,
|
16948 |
+
"loss": 1.2671,
|
16949 |
+
"step": 120950
|
16950 |
+
},
|
16951 |
+
{
|
16952 |
+
"epoch": 3.6126951900397097,
|
16953 |
+
"grad_norm": 3.8376450538635254,
|
16954 |
+
"learning_rate": 0.00016543008817193242,
|
16955 |
+
"loss": 1.1981,
|
16956 |
+
"step": 121000
|
16957 |
+
},
|
16958 |
+
{
|
16959 |
+
"epoch": 3.6141880392917924,
|
16960 |
+
"grad_norm": 5.130064010620117,
|
16961 |
+
"learning_rate": 0.0001654158023760136,
|
16962 |
+
"loss": 1.2174,
|
16963 |
+
"step": 121050
|
16964 |
+
},
|
16965 |
+
{
|
16966 |
+
"epoch": 3.6156808885438747,
|
16967 |
+
"grad_norm": 4.116199970245361,
|
16968 |
+
"learning_rate": 0.00016540151658009475,
|
16969 |
+
"loss": 1.2786,
|
16970 |
+
"step": 121100
|
16971 |
+
},
|
16972 |
+
{
|
16973 |
+
"epoch": 3.6171737377959574,
|
16974 |
+
"grad_norm": 3.8612489700317383,
|
16975 |
+
"learning_rate": 0.00016538723078417592,
|
16976 |
+
"loss": 1.2788,
|
16977 |
+
"step": 121150
|
16978 |
+
},
|
16979 |
+
{
|
16980 |
+
"epoch": 3.6186665870480397,
|
16981 |
+
"grad_norm": 4.064988136291504,
|
16982 |
+
"learning_rate": 0.00016537294498825708,
|
16983 |
+
"loss": 1.2465,
|
16984 |
+
"step": 121200
|
16985 |
+
},
|
16986 |
+
{
|
16987 |
+
"epoch": 3.6201594363001224,
|
16988 |
+
"grad_norm": 4.097548007965088,
|
16989 |
+
"learning_rate": 0.00016535865919233825,
|
16990 |
+
"loss": 1.2659,
|
16991 |
+
"step": 121250
|
16992 |
+
},
|
16993 |
+
{
|
16994 |
+
"epoch": 3.621652285552205,
|
16995 |
+
"grad_norm": 4.937625885009766,
|
16996 |
+
"learning_rate": 0.0001653443733964194,
|
16997 |
+
"loss": 1.2071,
|
16998 |
+
"step": 121300
|
16999 |
+
},
|
17000 |
+
{
|
17001 |
+
"epoch": 3.6231451348042873,
|
17002 |
+
"grad_norm": 4.893852710723877,
|
17003 |
+
"learning_rate": 0.00016533008760050057,
|
17004 |
+
"loss": 1.2962,
|
17005 |
+
"step": 121350
|
17006 |
+
},
|
17007 |
+
{
|
17008 |
+
"epoch": 3.62463798405637,
|
17009 |
+
"grad_norm": 5.306116580963135,
|
17010 |
+
"learning_rate": 0.00016531580180458174,
|
17011 |
+
"loss": 1.27,
|
17012 |
+
"step": 121400
|
17013 |
+
},
|
17014 |
+
{
|
17015 |
+
"epoch": 3.6261308333084523,
|
17016 |
+
"grad_norm": 4.8264360427856445,
|
17017 |
+
"learning_rate": 0.0001653015160086629,
|
17018 |
+
"loss": 1.2717,
|
17019 |
+
"step": 121450
|
17020 |
+
},
|
17021 |
+
{
|
17022 |
+
"epoch": 3.627623682560535,
|
17023 |
+
"grad_norm": 4.300417423248291,
|
17024 |
+
"learning_rate": 0.00016528723021274407,
|
17025 |
+
"loss": 1.3015,
|
17026 |
+
"step": 121500
|
17027 |
+
},
|
17028 |
+
{
|
17029 |
+
"epoch": 3.6291165318126177,
|
17030 |
+
"grad_norm": 4.886301517486572,
|
17031 |
+
"learning_rate": 0.00016527294441682523,
|
17032 |
+
"loss": 1.2295,
|
17033 |
+
"step": 121550
|
17034 |
+
},
|
17035 |
+
{
|
17036 |
+
"epoch": 3.6306093810647,
|
17037 |
+
"grad_norm": 5.038539409637451,
|
17038 |
+
"learning_rate": 0.00016525865862090643,
|
17039 |
+
"loss": 1.2542,
|
17040 |
+
"step": 121600
|
17041 |
+
},
|
17042 |
+
{
|
17043 |
+
"epoch": 3.6321022303167827,
|
17044 |
+
"grad_norm": 4.360227584838867,
|
17045 |
+
"learning_rate": 0.00016524437282498756,
|
17046 |
+
"loss": 1.2712,
|
17047 |
+
"step": 121650
|
17048 |
+
},
|
17049 |
+
{
|
17050 |
+
"epoch": 3.633595079568865,
|
17051 |
+
"grad_norm": 4.984081745147705,
|
17052 |
+
"learning_rate": 0.00016523008702906875,
|
17053 |
+
"loss": 1.2949,
|
17054 |
+
"step": 121700
|
17055 |
+
},
|
17056 |
+
{
|
17057 |
+
"epoch": 3.6350879288209477,
|
17058 |
+
"grad_norm": 5.2939229011535645,
|
17059 |
+
"learning_rate": 0.0001652158012331499,
|
17060 |
+
"loss": 1.2393,
|
17061 |
+
"step": 121750
|
17062 |
+
},
|
17063 |
+
{
|
17064 |
+
"epoch": 3.6365807780730304,
|
17065 |
+
"grad_norm": 5.242954730987549,
|
17066 |
+
"learning_rate": 0.00016520151543723108,
|
17067 |
+
"loss": 1.1846,
|
17068 |
+
"step": 121800
|
17069 |
+
},
|
17070 |
+
{
|
17071 |
+
"epoch": 3.6380736273251126,
|
17072 |
+
"grad_norm": 4.06920862197876,
|
17073 |
+
"learning_rate": 0.00016518722964131225,
|
17074 |
+
"loss": 1.2599,
|
17075 |
+
"step": 121850
|
17076 |
+
},
|
17077 |
+
{
|
17078 |
+
"epoch": 3.6395664765771953,
|
17079 |
+
"grad_norm": 5.329120635986328,
|
17080 |
+
"learning_rate": 0.0001651729438453934,
|
17081 |
+
"loss": 1.2407,
|
17082 |
+
"step": 121900
|
17083 |
+
},
|
17084 |
+
{
|
17085 |
+
"epoch": 3.6410593258292776,
|
17086 |
+
"grad_norm": 5.278786659240723,
|
17087 |
+
"learning_rate": 0.00016515865804947458,
|
17088 |
+
"loss": 1.2645,
|
17089 |
+
"step": 121950
|
17090 |
+
},
|
17091 |
+
{
|
17092 |
+
"epoch": 3.6425521750813603,
|
17093 |
+
"grad_norm": 3.5512640476226807,
|
17094 |
+
"learning_rate": 0.00016514437225355574,
|
17095 |
+
"loss": 1.2492,
|
17096 |
+
"step": 122000
|
17097 |
+
},
|
17098 |
+
{
|
17099 |
+
"epoch": 3.644045024333443,
|
17100 |
+
"grad_norm": 3.9851057529449463,
|
17101 |
+
"learning_rate": 0.0001651300864576369,
|
17102 |
+
"loss": 1.27,
|
17103 |
+
"step": 122050
|
17104 |
+
},
|
17105 |
+
{
|
17106 |
+
"epoch": 3.6455378735855253,
|
17107 |
+
"grad_norm": 2.720550298690796,
|
17108 |
+
"learning_rate": 0.00016511580066171807,
|
17109 |
+
"loss": 1.2387,
|
17110 |
+
"step": 122100
|
17111 |
+
},
|
17112 |
+
{
|
17113 |
+
"epoch": 3.647030722837608,
|
17114 |
+
"grad_norm": 4.045743465423584,
|
17115 |
+
"learning_rate": 0.00016510151486579924,
|
17116 |
+
"loss": 1.2768,
|
17117 |
+
"step": 122150
|
17118 |
+
},
|
17119 |
+
{
|
17120 |
+
"epoch": 3.6485235720896902,
|
17121 |
+
"grad_norm": 4.347272872924805,
|
17122 |
+
"learning_rate": 0.0001650872290698804,
|
17123 |
+
"loss": 1.2114,
|
17124 |
+
"step": 122200
|
17125 |
+
},
|
17126 |
+
{
|
17127 |
+
"epoch": 3.650016421341773,
|
17128 |
+
"grad_norm": 4.8486151695251465,
|
17129 |
+
"learning_rate": 0.00016507294327396157,
|
17130 |
+
"loss": 1.2761,
|
17131 |
+
"step": 122250
|
17132 |
+
},
|
17133 |
+
{
|
17134 |
+
"epoch": 3.6515092705938557,
|
17135 |
+
"grad_norm": 4.846822261810303,
|
17136 |
+
"learning_rate": 0.00016505865747804276,
|
17137 |
+
"loss": 1.2563,
|
17138 |
+
"step": 122300
|
17139 |
+
},
|
17140 |
+
{
|
17141 |
+
"epoch": 3.653002119845938,
|
17142 |
+
"grad_norm": 5.770145893096924,
|
17143 |
+
"learning_rate": 0.0001650443716821239,
|
17144 |
+
"loss": 1.2244,
|
17145 |
+
"step": 122350
|
17146 |
+
},
|
17147 |
+
{
|
17148 |
+
"epoch": 3.6544949690980206,
|
17149 |
+
"grad_norm": 4.940767765045166,
|
17150 |
+
"learning_rate": 0.0001650300858862051,
|
17151 |
+
"loss": 1.2658,
|
17152 |
+
"step": 122400
|
17153 |
+
},
|
17154 |
+
{
|
17155 |
+
"epoch": 3.655987818350103,
|
17156 |
+
"grad_norm": 4.317561626434326,
|
17157 |
+
"learning_rate": 0.00016501580009028622,
|
17158 |
+
"loss": 1.2454,
|
17159 |
+
"step": 122450
|
17160 |
+
},
|
17161 |
+
{
|
17162 |
+
"epoch": 3.6574806676021856,
|
17163 |
+
"grad_norm": 4.162029266357422,
|
17164 |
+
"learning_rate": 0.00016500151429436742,
|
17165 |
+
"loss": 1.2757,
|
17166 |
+
"step": 122500
|
17167 |
+
},
|
17168 |
+
{
|
17169 |
+
"epoch": 3.6589735168542683,
|
17170 |
+
"grad_norm": 3.620096445083618,
|
17171 |
+
"learning_rate": 0.00016498722849844855,
|
17172 |
+
"loss": 1.2615,
|
17173 |
+
"step": 122550
|
17174 |
+
},
|
17175 |
+
{
|
17176 |
+
"epoch": 3.6604663661063506,
|
17177 |
+
"grad_norm": 4.369839191436768,
|
17178 |
+
"learning_rate": 0.00016497294270252975,
|
17179 |
+
"loss": 1.2435,
|
17180 |
+
"step": 122600
|
17181 |
+
},
|
17182 |
+
{
|
17183 |
+
"epoch": 3.661959215358433,
|
17184 |
+
"grad_norm": 5.848557472229004,
|
17185 |
+
"learning_rate": 0.0001649586569066109,
|
17186 |
+
"loss": 1.2422,
|
17187 |
+
"step": 122650
|
17188 |
+
},
|
17189 |
+
{
|
17190 |
+
"epoch": 3.6634520646105155,
|
17191 |
+
"grad_norm": 4.413859844207764,
|
17192 |
+
"learning_rate": 0.00016494437111069207,
|
17193 |
+
"loss": 1.2505,
|
17194 |
+
"step": 122700
|
17195 |
+
},
|
17196 |
+
{
|
17197 |
+
"epoch": 3.6649449138625982,
|
17198 |
+
"grad_norm": 4.773179531097412,
|
17199 |
+
"learning_rate": 0.00016493008531477324,
|
17200 |
+
"loss": 1.2856,
|
17201 |
+
"step": 122750
|
17202 |
+
},
|
17203 |
+
{
|
17204 |
+
"epoch": 3.666437763114681,
|
17205 |
+
"grad_norm": 6.004321098327637,
|
17206 |
+
"learning_rate": 0.0001649157995188544,
|
17207 |
+
"loss": 1.2183,
|
17208 |
+
"step": 122800
|
17209 |
+
},
|
17210 |
+
{
|
17211 |
+
"epoch": 3.667930612366763,
|
17212 |
+
"grad_norm": 4.601461410522461,
|
17213 |
+
"learning_rate": 0.00016490151372293557,
|
17214 |
+
"loss": 1.2328,
|
17215 |
+
"step": 122850
|
17216 |
+
},
|
17217 |
+
{
|
17218 |
+
"epoch": 3.6694234616188455,
|
17219 |
+
"grad_norm": 4.456672668457031,
|
17220 |
+
"learning_rate": 0.00016488722792701673,
|
17221 |
+
"loss": 1.2269,
|
17222 |
+
"step": 122900
|
17223 |
+
},
|
17224 |
+
{
|
17225 |
+
"epoch": 3.670916310870928,
|
17226 |
+
"grad_norm": 4.381960391998291,
|
17227 |
+
"learning_rate": 0.0001648729421310979,
|
17228 |
+
"loss": 1.2971,
|
17229 |
+
"step": 122950
|
17230 |
+
},
|
17231 |
+
{
|
17232 |
+
"epoch": 3.672409160123011,
|
17233 |
+
"grad_norm": 4.233649730682373,
|
17234 |
+
"learning_rate": 0.00016485865633517906,
|
17235 |
+
"loss": 1.2597,
|
17236 |
+
"step": 123000
|
17237 |
+
},
|
17238 |
+
{
|
17239 |
+
"epoch": 3.673902009375093,
|
17240 |
+
"grad_norm": 3.898301601409912,
|
17241 |
+
"learning_rate": 0.00016484437053926023,
|
17242 |
+
"loss": 1.2237,
|
17243 |
+
"step": 123050
|
17244 |
+
},
|
17245 |
+
{
|
17246 |
+
"epoch": 3.675394858627176,
|
17247 |
+
"grad_norm": 4.5442585945129395,
|
17248 |
+
"learning_rate": 0.00016483008474334142,
|
17249 |
+
"loss": 1.2503,
|
17250 |
+
"step": 123100
|
17251 |
+
},
|
17252 |
+
{
|
17253 |
+
"epoch": 3.676887707879258,
|
17254 |
+
"grad_norm": 6.81040620803833,
|
17255 |
+
"learning_rate": 0.00016481579894742256,
|
17256 |
+
"loss": 1.2685,
|
17257 |
+
"step": 123150
|
17258 |
+
},
|
17259 |
+
{
|
17260 |
+
"epoch": 3.678380557131341,
|
17261 |
+
"grad_norm": 3.5733816623687744,
|
17262 |
+
"learning_rate": 0.00016480151315150375,
|
17263 |
+
"loss": 1.2621,
|
17264 |
+
"step": 123200
|
17265 |
+
},
|
17266 |
+
{
|
17267 |
+
"epoch": 3.6798734063834235,
|
17268 |
+
"grad_norm": 5.7314252853393555,
|
17269 |
+
"learning_rate": 0.00016478722735558489,
|
17270 |
+
"loss": 1.2356,
|
17271 |
+
"step": 123250
|
17272 |
+
},
|
17273 |
+
{
|
17274 |
+
"epoch": 3.681366255635506,
|
17275 |
+
"grad_norm": 3.6596696376800537,
|
17276 |
+
"learning_rate": 0.00016477294155966608,
|
17277 |
+
"loss": 1.2131,
|
17278 |
+
"step": 123300
|
17279 |
+
},
|
17280 |
+
{
|
17281 |
+
"epoch": 3.6828591048875885,
|
17282 |
+
"grad_norm": 4.004091739654541,
|
17283 |
+
"learning_rate": 0.00016475865576374722,
|
17284 |
+
"loss": 1.2796,
|
17285 |
+
"step": 123350
|
17286 |
+
},
|
17287 |
+
{
|
17288 |
+
"epoch": 3.6843519541396708,
|
17289 |
+
"grad_norm": 4.247511863708496,
|
17290 |
+
"learning_rate": 0.0001647443699678284,
|
17291 |
+
"loss": 1.2252,
|
17292 |
+
"step": 123400
|
17293 |
+
},
|
17294 |
+
{
|
17295 |
+
"epoch": 3.6858448033917535,
|
17296 |
+
"grad_norm": 4.852141380310059,
|
17297 |
+
"learning_rate": 0.00016473008417190957,
|
17298 |
+
"loss": 1.2485,
|
17299 |
+
"step": 123450
|
17300 |
+
},
|
17301 |
+
{
|
17302 |
+
"epoch": 3.687337652643836,
|
17303 |
+
"grad_norm": 4.641091346740723,
|
17304 |
+
"learning_rate": 0.00016471579837599074,
|
17305 |
+
"loss": 1.2414,
|
17306 |
+
"step": 123500
|
17307 |
+
},
|
17308 |
+
{
|
17309 |
+
"epoch": 3.6888305018959184,
|
17310 |
+
"grad_norm": 3.5827832221984863,
|
17311 |
+
"learning_rate": 0.0001647015125800719,
|
17312 |
+
"loss": 1.2304,
|
17313 |
+
"step": 123550
|
17314 |
+
},
|
17315 |
+
{
|
17316 |
+
"epoch": 3.690323351148001,
|
17317 |
+
"grad_norm": 3.877167224884033,
|
17318 |
+
"learning_rate": 0.00016468722678415307,
|
17319 |
+
"loss": 1.2609,
|
17320 |
+
"step": 123600
|
17321 |
+
},
|
17322 |
+
{
|
17323 |
+
"epoch": 3.6918162004000834,
|
17324 |
+
"grad_norm": 4.917414665222168,
|
17325 |
+
"learning_rate": 0.00016467294098823423,
|
17326 |
+
"loss": 1.3357,
|
17327 |
+
"step": 123650
|
17328 |
+
},
|
17329 |
+
{
|
17330 |
+
"epoch": 3.693309049652166,
|
17331 |
+
"grad_norm": 5.0784406661987305,
|
17332 |
+
"learning_rate": 0.0001646586551923154,
|
17333 |
+
"loss": 1.2776,
|
17334 |
+
"step": 123700
|
17335 |
+
},
|
17336 |
+
{
|
17337 |
+
"epoch": 3.694801898904249,
|
17338 |
+
"grad_norm": 5.322165489196777,
|
17339 |
+
"learning_rate": 0.00016464436939639656,
|
17340 |
+
"loss": 1.265,
|
17341 |
+
"step": 123750
|
17342 |
+
},
|
17343 |
+
{
|
17344 |
+
"epoch": 3.696294748156331,
|
17345 |
+
"grad_norm": 4.516918182373047,
|
17346 |
+
"learning_rate": 0.00016463008360047772,
|
17347 |
+
"loss": 1.2559,
|
17348 |
+
"step": 123800
|
17349 |
+
},
|
17350 |
+
{
|
17351 |
+
"epoch": 3.697787597408414,
|
17352 |
+
"grad_norm": 4.489822864532471,
|
17353 |
+
"learning_rate": 0.0001646157978045589,
|
17354 |
+
"loss": 1.2206,
|
17355 |
+
"step": 123850
|
17356 |
+
},
|
17357 |
+
{
|
17358 |
+
"epoch": 3.699280446660496,
|
17359 |
+
"grad_norm": 5.039775848388672,
|
17360 |
+
"learning_rate": 0.00016460151200864005,
|
17361 |
+
"loss": 1.2285,
|
17362 |
+
"step": 123900
|
17363 |
+
},
|
17364 |
+
{
|
17365 |
+
"epoch": 3.7007732959125788,
|
17366 |
+
"grad_norm": 3.899019241333008,
|
17367 |
+
"learning_rate": 0.00016458722621272122,
|
17368 |
+
"loss": 1.2484,
|
17369 |
+
"step": 123950
|
17370 |
+
},
|
17371 |
+
{
|
17372 |
+
"epoch": 3.7022661451646615,
|
17373 |
+
"grad_norm": 5.370189189910889,
|
17374 |
+
"learning_rate": 0.00016457294041680238,
|
17375 |
+
"loss": 1.2901,
|
17376 |
+
"step": 124000
|
17377 |
+
},
|
17378 |
+
{
|
17379 |
+
"epoch": 3.7037589944167437,
|
17380 |
+
"grad_norm": 4.206950664520264,
|
17381 |
+
"learning_rate": 0.00016455865462088355,
|
17382 |
+
"loss": 1.2384,
|
17383 |
+
"step": 124050
|
17384 |
+
},
|
17385 |
+
{
|
17386 |
+
"epoch": 3.7052518436688264,
|
17387 |
+
"grad_norm": 6.357316970825195,
|
17388 |
+
"learning_rate": 0.0001645443688249647,
|
17389 |
+
"loss": 1.2222,
|
17390 |
+
"step": 124100
|
17391 |
+
},
|
17392 |
+
{
|
17393 |
+
"epoch": 3.7067446929209087,
|
17394 |
+
"grad_norm": 5.480586051940918,
|
17395 |
+
"learning_rate": 0.00016453008302904588,
|
17396 |
+
"loss": 1.2402,
|
17397 |
+
"step": 124150
|
17398 |
+
},
|
17399 |
+
{
|
17400 |
+
"epoch": 3.7082375421729914,
|
17401 |
+
"grad_norm": 4.471677303314209,
|
17402 |
+
"learning_rate": 0.00016451579723312704,
|
17403 |
+
"loss": 1.2962,
|
17404 |
+
"step": 124200
|
17405 |
+
},
|
17406 |
+
{
|
17407 |
+
"epoch": 3.709730391425074,
|
17408 |
+
"grad_norm": 4.873519420623779,
|
17409 |
+
"learning_rate": 0.00016450151143720823,
|
17410 |
+
"loss": 1.2438,
|
17411 |
+
"step": 124250
|
17412 |
+
},
|
17413 |
+
{
|
17414 |
+
"epoch": 3.7112232406771564,
|
17415 |
+
"grad_norm": 4.237666130065918,
|
17416 |
+
"learning_rate": 0.00016448722564128937,
|
17417 |
+
"loss": 1.3002,
|
17418 |
+
"step": 124300
|
17419 |
+
},
|
17420 |
+
{
|
17421 |
+
"epoch": 3.712716089929239,
|
17422 |
+
"grad_norm": 5.313760280609131,
|
17423 |
+
"learning_rate": 0.00016447293984537056,
|
17424 |
+
"loss": 1.217,
|
17425 |
+
"step": 124350
|
17426 |
+
},
|
17427 |
+
{
|
17428 |
+
"epoch": 3.7142089391813213,
|
17429 |
+
"grad_norm": 6.441144943237305,
|
17430 |
+
"learning_rate": 0.0001644586540494517,
|
17431 |
+
"loss": 1.3176,
|
17432 |
+
"step": 124400
|
17433 |
+
},
|
17434 |
+
{
|
17435 |
+
"epoch": 3.715701788433404,
|
17436 |
+
"grad_norm": 5.12811803817749,
|
17437 |
+
"learning_rate": 0.0001644443682535329,
|
17438 |
+
"loss": 1.2937,
|
17439 |
+
"step": 124450
|
17440 |
+
},
|
17441 |
+
{
|
17442 |
+
"epoch": 3.7171946376854867,
|
17443 |
+
"grad_norm": 4.984979152679443,
|
17444 |
+
"learning_rate": 0.00016443008245761406,
|
17445 |
+
"loss": 1.1999,
|
17446 |
+
"step": 124500
|
17447 |
+
},
|
17448 |
+
{
|
17449 |
+
"epoch": 3.718687486937569,
|
17450 |
+
"grad_norm": 4.063045501708984,
|
17451 |
+
"learning_rate": 0.00016441579666169522,
|
17452 |
+
"loss": 1.235,
|
17453 |
+
"step": 124550
|
17454 |
+
},
|
17455 |
+
{
|
17456 |
+
"epoch": 3.7201803361896517,
|
17457 |
+
"grad_norm": 3.6467928886413574,
|
17458 |
+
"learning_rate": 0.00016440151086577639,
|
17459 |
+
"loss": 1.1857,
|
17460 |
+
"step": 124600
|
17461 |
+
},
|
17462 |
+
{
|
17463 |
+
"epoch": 3.721673185441734,
|
17464 |
+
"grad_norm": 4.508729934692383,
|
17465 |
+
"learning_rate": 0.00016438722506985755,
|
17466 |
+
"loss": 1.3138,
|
17467 |
+
"step": 124650
|
17468 |
+
},
|
17469 |
+
{
|
17470 |
+
"epoch": 3.7231660346938167,
|
17471 |
+
"grad_norm": 5.27504825592041,
|
17472 |
+
"learning_rate": 0.00016437293927393872,
|
17473 |
+
"loss": 1.2026,
|
17474 |
+
"step": 124700
|
17475 |
+
},
|
17476 |
+
{
|
17477 |
+
"epoch": 3.7246588839458994,
|
17478 |
+
"grad_norm": 4.412220478057861,
|
17479 |
+
"learning_rate": 0.00016435865347801988,
|
17480 |
+
"loss": 1.2699,
|
17481 |
+
"step": 124750
|
17482 |
+
},
|
17483 |
+
{
|
17484 |
+
"epoch": 3.7261517331979817,
|
17485 |
+
"grad_norm": 5.649445533752441,
|
17486 |
+
"learning_rate": 0.00016434436768210104,
|
17487 |
+
"loss": 1.2803,
|
17488 |
+
"step": 124800
|
17489 |
+
},
|
17490 |
+
{
|
17491 |
+
"epoch": 3.7276445824500644,
|
17492 |
+
"grad_norm": 3.868227005004883,
|
17493 |
+
"learning_rate": 0.0001643300818861822,
|
17494 |
+
"loss": 1.2614,
|
17495 |
+
"step": 124850
|
17496 |
+
},
|
17497 |
+
{
|
17498 |
+
"epoch": 3.7291374317021466,
|
17499 |
+
"grad_norm": 4.377996444702148,
|
17500 |
+
"learning_rate": 0.00016431579609026337,
|
17501 |
+
"loss": 1.2548,
|
17502 |
+
"step": 124900
|
17503 |
+
},
|
17504 |
+
{
|
17505 |
+
"epoch": 3.7306302809542293,
|
17506 |
+
"grad_norm": 5.023475646972656,
|
17507 |
+
"learning_rate": 0.00016430151029434454,
|
17508 |
+
"loss": 1.2775,
|
17509 |
+
"step": 124950
|
17510 |
+
},
|
17511 |
+
{
|
17512 |
+
"epoch": 3.732123130206312,
|
17513 |
+
"grad_norm": 8.013720512390137,
|
17514 |
+
"learning_rate": 0.0001642872244984257,
|
17515 |
+
"loss": 1.2716,
|
17516 |
+
"step": 125000
|
17517 |
+
},
|
17518 |
+
{
|
17519 |
+
"epoch": 3.7336159794583943,
|
17520 |
+
"grad_norm": 4.947465419769287,
|
17521 |
+
"learning_rate": 0.0001642729387025069,
|
17522 |
+
"loss": 1.2681,
|
17523 |
+
"step": 125050
|
17524 |
+
},
|
17525 |
+
{
|
17526 |
+
"epoch": 3.7351088287104766,
|
17527 |
+
"grad_norm": 3.9547464847564697,
|
17528 |
+
"learning_rate": 0.00016425865290658803,
|
17529 |
+
"loss": 1.244,
|
17530 |
+
"step": 125100
|
17531 |
+
},
|
17532 |
+
{
|
17533 |
+
"epoch": 3.7366016779625593,
|
17534 |
+
"grad_norm": 5.331160545349121,
|
17535 |
+
"learning_rate": 0.00016424436711066922,
|
17536 |
+
"loss": 1.3086,
|
17537 |
+
"step": 125150
|
17538 |
+
},
|
17539 |
+
{
|
17540 |
+
"epoch": 3.738094527214642,
|
17541 |
+
"grad_norm": 3.980027914047241,
|
17542 |
+
"learning_rate": 0.00016423008131475036,
|
17543 |
+
"loss": 1.2461,
|
17544 |
+
"step": 125200
|
17545 |
+
},
|
17546 |
+
{
|
17547 |
+
"epoch": 3.7395873764667242,
|
17548 |
+
"grad_norm": 3.4833130836486816,
|
17549 |
+
"learning_rate": 0.00016421579551883155,
|
17550 |
+
"loss": 1.2107,
|
17551 |
+
"step": 125250
|
17552 |
+
},
|
17553 |
+
{
|
17554 |
+
"epoch": 3.741080225718807,
|
17555 |
+
"grad_norm": 4.746307373046875,
|
17556 |
+
"learning_rate": 0.00016420150972291272,
|
17557 |
+
"loss": 1.2875,
|
17558 |
+
"step": 125300
|
17559 |
+
},
|
17560 |
+
{
|
17561 |
+
"epoch": 3.742573074970889,
|
17562 |
+
"grad_norm": 3.9247684478759766,
|
17563 |
+
"learning_rate": 0.00016418722392699388,
|
17564 |
+
"loss": 1.2185,
|
17565 |
+
"step": 125350
|
17566 |
+
},
|
17567 |
+
{
|
17568 |
+
"epoch": 3.744065924222972,
|
17569 |
+
"grad_norm": 5.135631084442139,
|
17570 |
+
"learning_rate": 0.00016417293813107505,
|
17571 |
+
"loss": 1.2512,
|
17572 |
+
"step": 125400
|
17573 |
+
},
|
17574 |
+
{
|
17575 |
+
"epoch": 3.7455587734750546,
|
17576 |
+
"grad_norm": 4.875301361083984,
|
17577 |
+
"learning_rate": 0.0001641586523351562,
|
17578 |
+
"loss": 1.2794,
|
17579 |
+
"step": 125450
|
17580 |
+
},
|
17581 |
+
{
|
17582 |
+
"epoch": 3.747051622727137,
|
17583 |
+
"grad_norm": 5.335410118103027,
|
17584 |
+
"learning_rate": 0.00016414436653923738,
|
17585 |
+
"loss": 1.2617,
|
17586 |
+
"step": 125500
|
17587 |
+
},
|
17588 |
+
{
|
17589 |
+
"epoch": 3.7485444719792196,
|
17590 |
+
"grad_norm": 4.500457286834717,
|
17591 |
+
"learning_rate": 0.00016413008074331854,
|
17592 |
+
"loss": 1.2482,
|
17593 |
+
"step": 125550
|
17594 |
+
},
|
17595 |
+
{
|
17596 |
+
"epoch": 3.750037321231302,
|
17597 |
+
"grad_norm": 4.4383087158203125,
|
17598 |
+
"learning_rate": 0.0001641157949473997,
|
17599 |
+
"loss": 1.2534,
|
17600 |
+
"step": 125600
|
17601 |
+
},
|
17602 |
+
{
|
17603 |
+
"epoch": 3.7515301704833846,
|
17604 |
+
"grad_norm": 4.272806644439697,
|
17605 |
+
"learning_rate": 0.00016410150915148087,
|
17606 |
+
"loss": 1.2564,
|
17607 |
+
"step": 125650
|
17608 |
+
},
|
17609 |
+
{
|
17610 |
+
"epoch": 3.7530230197354673,
|
17611 |
+
"grad_norm": 6.498635292053223,
|
17612 |
+
"learning_rate": 0.00016408722335556204,
|
17613 |
+
"loss": 1.2422,
|
17614 |
+
"step": 125700
|
17615 |
+
},
|
17616 |
+
{
|
17617 |
+
"epoch": 3.7545158689875495,
|
17618 |
+
"grad_norm": 4.720774173736572,
|
17619 |
+
"learning_rate": 0.00016407293755964323,
|
17620 |
+
"loss": 1.2543,
|
17621 |
+
"step": 125750
|
17622 |
+
},
|
17623 |
+
{
|
17624 |
+
"epoch": 3.7560087182396322,
|
17625 |
+
"grad_norm": 4.762580871582031,
|
17626 |
+
"learning_rate": 0.00016405865176372436,
|
17627 |
+
"loss": 1.2453,
|
17628 |
+
"step": 125800
|
17629 |
+
},
|
17630 |
+
{
|
17631 |
+
"epoch": 3.7575015674917145,
|
17632 |
+
"grad_norm": 3.98309326171875,
|
17633 |
+
"learning_rate": 0.00016404436596780556,
|
17634 |
+
"loss": 1.2295,
|
17635 |
+
"step": 125850
|
17636 |
+
},
|
17637 |
+
{
|
17638 |
+
"epoch": 3.758994416743797,
|
17639 |
+
"grad_norm": 5.4489336013793945,
|
17640 |
+
"learning_rate": 0.0001640300801718867,
|
17641 |
+
"loss": 1.2466,
|
17642 |
+
"step": 125900
|
17643 |
+
},
|
17644 |
+
{
|
17645 |
+
"epoch": 3.76048726599588,
|
17646 |
+
"grad_norm": 5.064475059509277,
|
17647 |
+
"learning_rate": 0.00016401579437596789,
|
17648 |
+
"loss": 1.2451,
|
17649 |
+
"step": 125950
|
17650 |
+
},
|
17651 |
+
{
|
17652 |
+
"epoch": 3.761980115247962,
|
17653 |
+
"grad_norm": 4.22551155090332,
|
17654 |
+
"learning_rate": 0.00016400150858004902,
|
17655 |
+
"loss": 1.2346,
|
17656 |
+
"step": 126000
|
17657 |
+
},
|
17658 |
+
{
|
17659 |
+
"epoch": 3.763472964500045,
|
17660 |
+
"grad_norm": 4.364709854125977,
|
17661 |
+
"learning_rate": 0.00016398722278413021,
|
17662 |
+
"loss": 1.2894,
|
17663 |
+
"step": 126050
|
17664 |
+
},
|
17665 |
+
{
|
17666 |
+
"epoch": 3.764965813752127,
|
17667 |
+
"grad_norm": 4.05735969543457,
|
17668 |
+
"learning_rate": 0.00016397293698821138,
|
17669 |
+
"loss": 1.1943,
|
17670 |
+
"step": 126100
|
17671 |
+
},
|
17672 |
+
{
|
17673 |
+
"epoch": 3.76645866300421,
|
17674 |
+
"grad_norm": 4.884429931640625,
|
17675 |
+
"learning_rate": 0.00016395865119229254,
|
17676 |
+
"loss": 1.2587,
|
17677 |
+
"step": 126150
|
17678 |
+
},
|
17679 |
+
{
|
17680 |
+
"epoch": 3.7679515122562925,
|
17681 |
+
"grad_norm": 4.451826572418213,
|
17682 |
+
"learning_rate": 0.0001639443653963737,
|
17683 |
+
"loss": 1.2856,
|
17684 |
+
"step": 126200
|
17685 |
+
},
|
17686 |
+
{
|
17687 |
+
"epoch": 3.769444361508375,
|
17688 |
+
"grad_norm": 5.6031646728515625,
|
17689 |
+
"learning_rate": 0.00016393007960045487,
|
17690 |
+
"loss": 1.2705,
|
17691 |
+
"step": 126250
|
17692 |
+
},
|
17693 |
+
{
|
17694 |
+
"epoch": 3.7709372107604575,
|
17695 |
+
"grad_norm": 4.248857498168945,
|
17696 |
+
"learning_rate": 0.00016391579380453604,
|
17697 |
+
"loss": 1.3043,
|
17698 |
+
"step": 126300
|
17699 |
+
},
|
17700 |
+
{
|
17701 |
+
"epoch": 3.7724300600125398,
|
17702 |
+
"grad_norm": 4.71324348449707,
|
17703 |
+
"learning_rate": 0.0001639015080086172,
|
17704 |
+
"loss": 1.2658,
|
17705 |
+
"step": 126350
|
17706 |
+
},
|
17707 |
+
{
|
17708 |
+
"epoch": 3.7739229092646225,
|
17709 |
+
"grad_norm": 5.27174186706543,
|
17710 |
+
"learning_rate": 0.00016388722221269837,
|
17711 |
+
"loss": 1.2325,
|
17712 |
+
"step": 126400
|
17713 |
+
},
|
17714 |
+
{
|
17715 |
+
"epoch": 3.775415758516705,
|
17716 |
+
"grad_norm": 4.696889877319336,
|
17717 |
+
"learning_rate": 0.00016387293641677953,
|
17718 |
+
"loss": 1.2819,
|
17719 |
+
"step": 126450
|
17720 |
+
},
|
17721 |
+
{
|
17722 |
+
"epoch": 3.7769086077687875,
|
17723 |
+
"grad_norm": 5.210821151733398,
|
17724 |
+
"learning_rate": 0.0001638586506208607,
|
17725 |
+
"loss": 1.2733,
|
17726 |
+
"step": 126500
|
17727 |
+
},
|
17728 |
+
{
|
17729 |
+
"epoch": 3.77840145702087,
|
17730 |
+
"grad_norm": 4.370712757110596,
|
17731 |
+
"learning_rate": 0.00016384436482494186,
|
17732 |
+
"loss": 1.2121,
|
17733 |
+
"step": 126550
|
17734 |
+
},
|
17735 |
+
{
|
17736 |
+
"epoch": 3.7798943062729524,
|
17737 |
+
"grad_norm": 3.9094676971435547,
|
17738 |
+
"learning_rate": 0.00016383007902902303,
|
17739 |
+
"loss": 1.2744,
|
17740 |
+
"step": 126600
|
17741 |
+
},
|
17742 |
+
{
|
17743 |
+
"epoch": 3.781387155525035,
|
17744 |
+
"grad_norm": 4.375615119934082,
|
17745 |
+
"learning_rate": 0.0001638157932331042,
|
17746 |
+
"loss": 1.2808,
|
17747 |
+
"step": 126650
|
17748 |
+
},
|
17749 |
+
{
|
17750 |
+
"epoch": 3.782880004777118,
|
17751 |
+
"grad_norm": 4.995912075042725,
|
17752 |
+
"learning_rate": 0.00016380150743718536,
|
17753 |
+
"loss": 1.3116,
|
17754 |
+
"step": 126700
|
17755 |
+
},
|
17756 |
+
{
|
17757 |
+
"epoch": 3.7843728540292,
|
17758 |
+
"grad_norm": 3.9363934993743896,
|
17759 |
+
"learning_rate": 0.00016378722164126652,
|
17760 |
+
"loss": 1.2554,
|
17761 |
+
"step": 126750
|
17762 |
+
},
|
17763 |
+
{
|
17764 |
+
"epoch": 3.785865703281283,
|
17765 |
+
"grad_norm": 3.885279893875122,
|
17766 |
+
"learning_rate": 0.00016377293584534768,
|
17767 |
+
"loss": 1.2099,
|
17768 |
+
"step": 126800
|
17769 |
+
},
|
17770 |
+
{
|
17771 |
+
"epoch": 3.787358552533365,
|
17772 |
+
"grad_norm": 4.745761871337891,
|
17773 |
+
"learning_rate": 0.00016375865004942885,
|
17774 |
+
"loss": 1.2336,
|
17775 |
+
"step": 126850
|
17776 |
+
},
|
17777 |
+
{
|
17778 |
+
"epoch": 3.7888514017854478,
|
17779 |
+
"grad_norm": 5.675398826599121,
|
17780 |
+
"learning_rate": 0.00016374436425351004,
|
17781 |
+
"loss": 1.2736,
|
17782 |
+
"step": 126900
|
17783 |
+
},
|
17784 |
+
{
|
17785 |
+
"epoch": 3.7903442510375305,
|
17786 |
+
"grad_norm": 5.933438301086426,
|
17787 |
+
"learning_rate": 0.00016373007845759118,
|
17788 |
+
"loss": 1.2245,
|
17789 |
+
"step": 126950
|
17790 |
+
},
|
17791 |
+
{
|
17792 |
+
"epoch": 3.7918371002896127,
|
17793 |
+
"grad_norm": 4.410390853881836,
|
17794 |
+
"learning_rate": 0.00016371579266167237,
|
17795 |
+
"loss": 1.2786,
|
17796 |
+
"step": 127000
|
17797 |
+
},
|
17798 |
+
{
|
17799 |
+
"epoch": 3.7933299495416954,
|
17800 |
+
"grad_norm": 3.837341785430908,
|
17801 |
+
"learning_rate": 0.0001637015068657535,
|
17802 |
+
"loss": 1.2154,
|
17803 |
+
"step": 127050
|
17804 |
+
},
|
17805 |
+
{
|
17806 |
+
"epoch": 3.7948227987937777,
|
17807 |
+
"grad_norm": 4.170938014984131,
|
17808 |
+
"learning_rate": 0.0001636872210698347,
|
17809 |
+
"loss": 1.1933,
|
17810 |
+
"step": 127100
|
17811 |
+
},
|
17812 |
+
{
|
17813 |
+
"epoch": 3.7963156480458604,
|
17814 |
+
"grad_norm": 5.0940985679626465,
|
17815 |
+
"learning_rate": 0.00016367293527391584,
|
17816 |
+
"loss": 1.2399,
|
17817 |
+
"step": 127150
|
17818 |
+
},
|
17819 |
+
{
|
17820 |
+
"epoch": 3.797808497297943,
|
17821 |
+
"grad_norm": 3.5157582759857178,
|
17822 |
+
"learning_rate": 0.00016365864947799703,
|
17823 |
+
"loss": 1.2217,
|
17824 |
+
"step": 127200
|
17825 |
+
},
|
17826 |
+
{
|
17827 |
+
"epoch": 3.7993013465500254,
|
17828 |
+
"grad_norm": 4.230498790740967,
|
17829 |
+
"learning_rate": 0.0001636443636820782,
|
17830 |
+
"loss": 1.2665,
|
17831 |
+
"step": 127250
|
17832 |
+
},
|
17833 |
+
{
|
17834 |
+
"epoch": 3.8007941958021076,
|
17835 |
+
"grad_norm": 4.24791955947876,
|
17836 |
+
"learning_rate": 0.00016363007788615936,
|
17837 |
+
"loss": 1.28,
|
17838 |
+
"step": 127300
|
17839 |
+
},
|
17840 |
+
{
|
17841 |
+
"epoch": 3.8022870450541904,
|
17842 |
+
"grad_norm": 4.877384185791016,
|
17843 |
+
"learning_rate": 0.00016361579209024052,
|
17844 |
+
"loss": 1.3076,
|
17845 |
+
"step": 127350
|
17846 |
+
},
|
17847 |
+
{
|
17848 |
+
"epoch": 3.803779894306273,
|
17849 |
+
"grad_norm": 6.252450942993164,
|
17850 |
+
"learning_rate": 0.0001636015062943217,
|
17851 |
+
"loss": 1.2648,
|
17852 |
+
"step": 127400
|
17853 |
+
},
|
17854 |
+
{
|
17855 |
+
"epoch": 3.8052727435583553,
|
17856 |
+
"grad_norm": 4.372878551483154,
|
17857 |
+
"learning_rate": 0.00016358722049840285,
|
17858 |
+
"loss": 1.2749,
|
17859 |
+
"step": 127450
|
17860 |
+
},
|
17861 |
+
{
|
17862 |
+
"epoch": 3.806765592810438,
|
17863 |
+
"grad_norm": 5.042996883392334,
|
17864 |
+
"learning_rate": 0.00016357293470248402,
|
17865 |
+
"loss": 1.2379,
|
17866 |
+
"step": 127500
|
17867 |
+
},
|
17868 |
+
{
|
17869 |
+
"epoch": 3.8082584420625203,
|
17870 |
+
"grad_norm": 4.833359241485596,
|
17871 |
+
"learning_rate": 0.00016355864890656518,
|
17872 |
+
"loss": 1.2456,
|
17873 |
+
"step": 127550
|
17874 |
+
},
|
17875 |
+
{
|
17876 |
+
"epoch": 3.809751291314603,
|
17877 |
+
"grad_norm": 3.7556586265563965,
|
17878 |
+
"learning_rate": 0.00016354436311064635,
|
17879 |
+
"loss": 1.2998,
|
17880 |
+
"step": 127600
|
17881 |
+
},
|
17882 |
+
{
|
17883 |
+
"epoch": 3.8112441405666857,
|
17884 |
+
"grad_norm": 4.22800874710083,
|
17885 |
+
"learning_rate": 0.0001635300773147275,
|
17886 |
+
"loss": 1.2307,
|
17887 |
+
"step": 127650
|
17888 |
+
},
|
17889 |
+
{
|
17890 |
+
"epoch": 3.812736989818768,
|
17891 |
+
"grad_norm": 3.8682031631469727,
|
17892 |
+
"learning_rate": 0.0001635157915188087,
|
17893 |
+
"loss": 1.2429,
|
17894 |
+
"step": 127700
|
17895 |
+
},
|
17896 |
+
{
|
17897 |
+
"epoch": 3.8142298390708507,
|
17898 |
+
"grad_norm": 5.100236415863037,
|
17899 |
+
"learning_rate": 0.00016350150572288984,
|
17900 |
+
"loss": 1.2725,
|
17901 |
+
"step": 127750
|
17902 |
+
},
|
17903 |
+
{
|
17904 |
+
"epoch": 3.815722688322933,
|
17905 |
+
"grad_norm": 5.069801330566406,
|
17906 |
+
"learning_rate": 0.00016348721992697103,
|
17907 |
+
"loss": 1.3199,
|
17908 |
+
"step": 127800
|
17909 |
+
},
|
17910 |
+
{
|
17911 |
+
"epoch": 3.8172155375750156,
|
17912 |
+
"grad_norm": 4.382961273193359,
|
17913 |
+
"learning_rate": 0.00016347293413105217,
|
17914 |
+
"loss": 1.2727,
|
17915 |
+
"step": 127850
|
17916 |
+
},
|
17917 |
+
{
|
17918 |
+
"epoch": 3.8187083868270983,
|
17919 |
+
"grad_norm": 4.622864246368408,
|
17920 |
+
"learning_rate": 0.00016345864833513336,
|
17921 |
+
"loss": 1.3309,
|
17922 |
+
"step": 127900
|
17923 |
+
},
|
17924 |
+
{
|
17925 |
+
"epoch": 3.8202012360791806,
|
17926 |
+
"grad_norm": 5.895843982696533,
|
17927 |
+
"learning_rate": 0.00016344436253921453,
|
17928 |
+
"loss": 1.2453,
|
17929 |
+
"step": 127950
|
17930 |
+
},
|
17931 |
+
{
|
17932 |
+
"epoch": 3.8216940853312633,
|
17933 |
+
"grad_norm": 4.68233060836792,
|
17934 |
+
"learning_rate": 0.0001634300767432957,
|
17935 |
+
"loss": 1.2948,
|
17936 |
+
"step": 128000
|
17937 |
+
},
|
17938 |
+
{
|
17939 |
+
"epoch": 3.8231869345833456,
|
17940 |
+
"grad_norm": 4.286251068115234,
|
17941 |
+
"learning_rate": 0.00016341579094737686,
|
17942 |
+
"loss": 1.2758,
|
17943 |
+
"step": 128050
|
17944 |
+
},
|
17945 |
+
{
|
17946 |
+
"epoch": 3.8246797838354283,
|
17947 |
+
"grad_norm": 3.9140446186065674,
|
17948 |
+
"learning_rate": 0.00016340150515145802,
|
17949 |
+
"loss": 1.2791,
|
17950 |
+
"step": 128100
|
17951 |
+
},
|
17952 |
+
{
|
17953 |
+
"epoch": 3.826172633087511,
|
17954 |
+
"grad_norm": 5.249776840209961,
|
17955 |
+
"learning_rate": 0.00016338721935553918,
|
17956 |
+
"loss": 1.3379,
|
17957 |
+
"step": 128150
|
17958 |
+
},
|
17959 |
+
{
|
17960 |
+
"epoch": 3.8276654823395933,
|
17961 |
+
"grad_norm": 4.369449138641357,
|
17962 |
+
"learning_rate": 0.00016337293355962035,
|
17963 |
+
"loss": 1.179,
|
17964 |
+
"step": 128200
|
17965 |
+
},
|
17966 |
+
{
|
17967 |
+
"epoch": 3.829158331591676,
|
17968 |
+
"grad_norm": 5.655234336853027,
|
17969 |
+
"learning_rate": 0.00016335864776370151,
|
17970 |
+
"loss": 1.2535,
|
17971 |
+
"step": 128250
|
17972 |
+
},
|
17973 |
+
{
|
17974 |
+
"epoch": 3.830651180843758,
|
17975 |
+
"grad_norm": 5.233351230621338,
|
17976 |
+
"learning_rate": 0.00016334436196778268,
|
17977 |
+
"loss": 1.2601,
|
17978 |
+
"step": 128300
|
17979 |
+
},
|
17980 |
+
{
|
17981 |
+
"epoch": 3.832144030095841,
|
17982 |
+
"grad_norm": 4.451437950134277,
|
17983 |
+
"learning_rate": 0.00016333007617186384,
|
17984 |
+
"loss": 1.2733,
|
17985 |
+
"step": 128350
|
17986 |
+
},
|
17987 |
+
{
|
17988 |
+
"epoch": 3.8336368793479236,
|
17989 |
+
"grad_norm": 3.807561159133911,
|
17990 |
+
"learning_rate": 0.000163315790375945,
|
17991 |
+
"loss": 1.2721,
|
17992 |
+
"step": 128400
|
17993 |
+
},
|
17994 |
+
{
|
17995 |
+
"epoch": 3.835129728600006,
|
17996 |
+
"grad_norm": 4.507883071899414,
|
17997 |
+
"learning_rate": 0.00016330150458002617,
|
17998 |
+
"loss": 1.2498,
|
17999 |
+
"step": 128450
|
18000 |
+
},
|
18001 |
+
{
|
18002 |
+
"epoch": 3.8366225778520886,
|
18003 |
+
"grad_norm": 4.246575832366943,
|
18004 |
+
"learning_rate": 0.00016328721878410736,
|
18005 |
+
"loss": 1.2299,
|
18006 |
+
"step": 128500
|
18007 |
+
},
|
18008 |
+
{
|
18009 |
+
"epoch": 3.838115427104171,
|
18010 |
+
"grad_norm": 5.045656204223633,
|
18011 |
+
"learning_rate": 0.0001632729329881885,
|
18012 |
+
"loss": 1.2536,
|
18013 |
+
"step": 128550
|
18014 |
+
},
|
18015 |
+
{
|
18016 |
+
"epoch": 3.8396082763562536,
|
18017 |
+
"grad_norm": 3.3326852321624756,
|
18018 |
+
"learning_rate": 0.0001632586471922697,
|
18019 |
+
"loss": 1.2401,
|
18020 |
+
"step": 128600
|
18021 |
+
},
|
18022 |
+
{
|
18023 |
+
"epoch": 3.8411011256083363,
|
18024 |
+
"grad_norm": 4.201671600341797,
|
18025 |
+
"learning_rate": 0.00016324436139635083,
|
18026 |
+
"loss": 1.2681,
|
18027 |
+
"step": 128650
|
18028 |
+
},
|
18029 |
+
{
|
18030 |
+
"epoch": 3.8425939748604185,
|
18031 |
+
"grad_norm": 3.979217052459717,
|
18032 |
+
"learning_rate": 0.00016323007560043202,
|
18033 |
+
"loss": 1.2901,
|
18034 |
+
"step": 128700
|
18035 |
+
},
|
18036 |
+
{
|
18037 |
+
"epoch": 3.8440868241125012,
|
18038 |
+
"grad_norm": 4.908186435699463,
|
18039 |
+
"learning_rate": 0.0001632157898045132,
|
18040 |
+
"loss": 1.2443,
|
18041 |
+
"step": 128750
|
18042 |
+
},
|
18043 |
+
{
|
18044 |
+
"epoch": 3.8455796733645835,
|
18045 |
+
"grad_norm": 7.0551252365112305,
|
18046 |
+
"learning_rate": 0.00016320150400859435,
|
18047 |
+
"loss": 1.3312,
|
18048 |
+
"step": 128800
|
18049 |
+
},
|
18050 |
+
{
|
18051 |
+
"epoch": 3.847072522616666,
|
18052 |
+
"grad_norm": 2.9734935760498047,
|
18053 |
+
"learning_rate": 0.00016318721821267552,
|
18054 |
+
"loss": 1.1726,
|
18055 |
+
"step": 128850
|
18056 |
+
},
|
18057 |
+
{
|
18058 |
+
"epoch": 3.848565371868749,
|
18059 |
+
"grad_norm": 8.724651336669922,
|
18060 |
+
"learning_rate": 0.00016317293241675668,
|
18061 |
+
"loss": 1.3131,
|
18062 |
+
"step": 128900
|
18063 |
+
},
|
18064 |
+
{
|
18065 |
+
"epoch": 3.850058221120831,
|
18066 |
+
"grad_norm": 4.633723735809326,
|
18067 |
+
"learning_rate": 0.00016315864662083785,
|
18068 |
+
"loss": 1.2309,
|
18069 |
+
"step": 128950
|
18070 |
+
},
|
18071 |
+
{
|
18072 |
+
"epoch": 3.851551070372914,
|
18073 |
+
"grad_norm": 5.132857799530029,
|
18074 |
+
"learning_rate": 0.000163144360824919,
|
18075 |
+
"loss": 1.1988,
|
18076 |
+
"step": 129000
|
18077 |
+
},
|
18078 |
+
{
|
18079 |
+
"epoch": 3.853043919624996,
|
18080 |
+
"grad_norm": 3.8106892108917236,
|
18081 |
+
"learning_rate": 0.00016313007502900018,
|
18082 |
+
"loss": 1.2284,
|
18083 |
+
"step": 129050
|
18084 |
+
},
|
18085 |
+
{
|
18086 |
+
"epoch": 3.854536768877079,
|
18087 |
+
"grad_norm": 3.9938535690307617,
|
18088 |
+
"learning_rate": 0.00016311578923308134,
|
18089 |
+
"loss": 1.2252,
|
18090 |
+
"step": 129100
|
18091 |
+
},
|
18092 |
+
{
|
18093 |
+
"epoch": 3.8560296181291616,
|
18094 |
+
"grad_norm": 4.499520301818848,
|
18095 |
+
"learning_rate": 0.0001631015034371625,
|
18096 |
+
"loss": 1.2834,
|
18097 |
+
"step": 129150
|
18098 |
+
},
|
18099 |
+
{
|
18100 |
+
"epoch": 3.857522467381244,
|
18101 |
+
"grad_norm": 3.780081033706665,
|
18102 |
+
"learning_rate": 0.00016308721764124367,
|
18103 |
+
"loss": 1.1696,
|
18104 |
+
"step": 129200
|
18105 |
+
},
|
18106 |
+
{
|
18107 |
+
"epoch": 3.8590153166333265,
|
18108 |
+
"grad_norm": 5.509545803070068,
|
18109 |
+
"learning_rate": 0.00016307293184532483,
|
18110 |
+
"loss": 1.2129,
|
18111 |
+
"step": 129250
|
18112 |
+
},
|
18113 |
+
{
|
18114 |
+
"epoch": 3.860508165885409,
|
18115 |
+
"grad_norm": 3.87497615814209,
|
18116 |
+
"learning_rate": 0.000163058646049406,
|
18117 |
+
"loss": 1.2971,
|
18118 |
+
"step": 129300
|
18119 |
+
},
|
18120 |
+
{
|
18121 |
+
"epoch": 3.8620010151374915,
|
18122 |
+
"grad_norm": 4.839779853820801,
|
18123 |
+
"learning_rate": 0.00016304436025348716,
|
18124 |
+
"loss": 1.2385,
|
18125 |
+
"step": 129350
|
18126 |
+
},
|
18127 |
+
{
|
18128 |
+
"epoch": 3.863493864389574,
|
18129 |
+
"grad_norm": 7.801334857940674,
|
18130 |
+
"learning_rate": 0.00016303007445756833,
|
18131 |
+
"loss": 1.2482,
|
18132 |
+
"step": 129400
|
18133 |
+
},
|
18134 |
+
{
|
18135 |
+
"epoch": 3.8649867136416565,
|
18136 |
+
"grad_norm": 3.884004831314087,
|
18137 |
+
"learning_rate": 0.0001630157886616495,
|
18138 |
+
"loss": 1.264,
|
18139 |
+
"step": 129450
|
18140 |
+
},
|
18141 |
+
{
|
18142 |
+
"epoch": 3.8664795628937387,
|
18143 |
+
"grad_norm": 5.332859516143799,
|
18144 |
+
"learning_rate": 0.00016300150286573066,
|
18145 |
+
"loss": 1.2263,
|
18146 |
+
"step": 129500
|
18147 |
+
},
|
18148 |
+
{
|
18149 |
+
"epoch": 3.8679724121458214,
|
18150 |
+
"grad_norm": 4.195265293121338,
|
18151 |
+
"learning_rate": 0.00016298721706981185,
|
18152 |
+
"loss": 1.2579,
|
18153 |
+
"step": 129550
|
18154 |
+
},
|
18155 |
+
{
|
18156 |
+
"epoch": 3.869465261397904,
|
18157 |
+
"grad_norm": 4.393342018127441,
|
18158 |
+
"learning_rate": 0.000162972931273893,
|
18159 |
+
"loss": 1.2513,
|
18160 |
+
"step": 129600
|
18161 |
+
},
|
18162 |
+
{
|
18163 |
+
"epoch": 3.8709581106499864,
|
18164 |
+
"grad_norm": 5.231433391571045,
|
18165 |
+
"learning_rate": 0.00016295864547797418,
|
18166 |
+
"loss": 1.2508,
|
18167 |
+
"step": 129650
|
18168 |
+
},
|
18169 |
+
{
|
18170 |
+
"epoch": 3.872450959902069,
|
18171 |
+
"grad_norm": 4.832424640655518,
|
18172 |
+
"learning_rate": 0.00016294435968205532,
|
18173 |
+
"loss": 1.2902,
|
18174 |
+
"step": 129700
|
18175 |
+
},
|
18176 |
+
{
|
18177 |
+
"epoch": 3.8739438091541514,
|
18178 |
+
"grad_norm": 3.9749419689178467,
|
18179 |
+
"learning_rate": 0.0001629300738861365,
|
18180 |
+
"loss": 1.2803,
|
18181 |
+
"step": 129750
|
18182 |
+
},
|
18183 |
+
{
|
18184 |
+
"epoch": 3.875436658406234,
|
18185 |
+
"grad_norm": 4.696380138397217,
|
18186 |
+
"learning_rate": 0.00016291578809021765,
|
18187 |
+
"loss": 1.2226,
|
18188 |
+
"step": 129800
|
18189 |
+
},
|
18190 |
+
{
|
18191 |
+
"epoch": 3.876929507658317,
|
18192 |
+
"grad_norm": 3.757120132446289,
|
18193 |
+
"learning_rate": 0.00016290150229429884,
|
18194 |
+
"loss": 1.2499,
|
18195 |
+
"step": 129850
|
18196 |
+
},
|
18197 |
+
{
|
18198 |
+
"epoch": 3.878422356910399,
|
18199 |
+
"grad_norm": 4.010384559631348,
|
18200 |
+
"learning_rate": 0.00016288721649838,
|
18201 |
+
"loss": 1.3455,
|
18202 |
+
"step": 129900
|
18203 |
+
},
|
18204 |
+
{
|
18205 |
+
"epoch": 3.8799152061624818,
|
18206 |
+
"grad_norm": 3.7654688358306885,
|
18207 |
+
"learning_rate": 0.00016287293070246117,
|
18208 |
+
"loss": 1.2689,
|
18209 |
+
"step": 129950
|
18210 |
+
},
|
18211 |
+
{
|
18212 |
+
"epoch": 3.881408055414564,
|
18213 |
+
"grad_norm": 4.330887317657471,
|
18214 |
+
"learning_rate": 0.00016285864490654233,
|
18215 |
+
"loss": 1.2988,
|
18216 |
+
"step": 130000
|
18217 |
}
|
18218 |
],
|
18219 |
"logging_steps": 50,
|
|
|
18233 |
"attributes": {}
|
18234 |
}
|
18235 |
},
|
18236 |
+
"total_flos": 3.2834368542993285e+18,
|
18237 |
"train_batch_size": 2,
|
18238 |
"trial_name": null,
|
18239 |
"trial_params": null
|