Training in progress, step 120000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step120000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step120000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbeae5a154910d18d2d84e053daf683d78d822739559bbc7386a2b14ba6399e7
|
3 |
size 42002584
|
last-checkpoint/global_step120000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a05b9ba71f7eaa3898e2fc90e7a6a905550e1f9e8de2b5d5eaa28536267a62db
|
3 |
+
size 251710672
|
last-checkpoint/global_step120000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f63c09da913d91a5a4c11005db421abe1ca9ca95ebdca17e904fca454769096
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step120000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:595af005e3ac842b7efedd61141a99be7c1d0dbeb6b85a0412a7eb03337cada6
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 3.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -15414,6 +15414,1406 @@
|
|
15414 |
"learning_rate": 0.00016857296327407585,
|
15415 |
"loss": 1.1694,
|
15416 |
"step": 110000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15417 |
}
|
15418 |
],
|
15419 |
"logging_steps": 50,
|
@@ -15433,7 +16833,7 @@
|
|
15433 |
"attributes": {}
|
15434 |
}
|
15435 |
},
|
15436 |
-
"total_flos":
|
15437 |
"train_batch_size": 2,
|
15438 |
"trial_name": null,
|
15439 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.582838204998059,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 120000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
15414 |
"learning_rate": 0.00016857296327407585,
|
15415 |
"loss": 1.1694,
|
15416 |
"step": 110000
|
15417 |
+
},
|
15418 |
+
{
|
15419 |
+
"epoch": 3.285761203833637,
|
15420 |
+
"grad_norm": 4.061399459838867,
|
15421 |
+
"learning_rate": 0.00016855867747815704,
|
15422 |
+
"loss": 1.2366,
|
15423 |
+
"step": 110050
|
15424 |
+
},
|
15425 |
+
{
|
15426 |
+
"epoch": 3.2872540530857193,
|
15427 |
+
"grad_norm": 4.011737823486328,
|
15428 |
+
"learning_rate": 0.0001685443916822382,
|
15429 |
+
"loss": 1.2354,
|
15430 |
+
"step": 110100
|
15431 |
+
},
|
15432 |
+
{
|
15433 |
+
"epoch": 3.288746902337802,
|
15434 |
+
"grad_norm": 4.765646457672119,
|
15435 |
+
"learning_rate": 0.00016853010588631937,
|
15436 |
+
"loss": 1.2935,
|
15437 |
+
"step": 110150
|
15438 |
+
},
|
15439 |
+
{
|
15440 |
+
"epoch": 3.2902397515898842,
|
15441 |
+
"grad_norm": 4.911751747131348,
|
15442 |
+
"learning_rate": 0.00016851582009040053,
|
15443 |
+
"loss": 1.2079,
|
15444 |
+
"step": 110200
|
15445 |
+
},
|
15446 |
+
{
|
15447 |
+
"epoch": 3.291732600841967,
|
15448 |
+
"grad_norm": 5.475641250610352,
|
15449 |
+
"learning_rate": 0.0001685015342944817,
|
15450 |
+
"loss": 1.2861,
|
15451 |
+
"step": 110250
|
15452 |
+
},
|
15453 |
+
{
|
15454 |
+
"epoch": 3.2932254500940497,
|
15455 |
+
"grad_norm": 4.465792655944824,
|
15456 |
+
"learning_rate": 0.00016848724849856286,
|
15457 |
+
"loss": 1.239,
|
15458 |
+
"step": 110300
|
15459 |
+
},
|
15460 |
+
{
|
15461 |
+
"epoch": 3.294718299346132,
|
15462 |
+
"grad_norm": 6.820103645324707,
|
15463 |
+
"learning_rate": 0.00016847296270264403,
|
15464 |
+
"loss": 1.1682,
|
15465 |
+
"step": 110350
|
15466 |
+
},
|
15467 |
+
{
|
15468 |
+
"epoch": 3.2962111485982146,
|
15469 |
+
"grad_norm": 6.0468010902404785,
|
15470 |
+
"learning_rate": 0.0001684586769067252,
|
15471 |
+
"loss": 1.2195,
|
15472 |
+
"step": 110400
|
15473 |
+
},
|
15474 |
+
{
|
15475 |
+
"epoch": 3.297703997850297,
|
15476 |
+
"grad_norm": 6.527349472045898,
|
15477 |
+
"learning_rate": 0.00016844439111080635,
|
15478 |
+
"loss": 1.2655,
|
15479 |
+
"step": 110450
|
15480 |
+
},
|
15481 |
+
{
|
15482 |
+
"epoch": 3.2991968471023796,
|
15483 |
+
"grad_norm": 4.763921737670898,
|
15484 |
+
"learning_rate": 0.00016843010531488752,
|
15485 |
+
"loss": 1.2427,
|
15486 |
+
"step": 110500
|
15487 |
+
},
|
15488 |
+
{
|
15489 |
+
"epoch": 3.3006896963544623,
|
15490 |
+
"grad_norm": 5.6205949783325195,
|
15491 |
+
"learning_rate": 0.0001684158195189687,
|
15492 |
+
"loss": 1.2629,
|
15493 |
+
"step": 110550
|
15494 |
+
},
|
15495 |
+
{
|
15496 |
+
"epoch": 3.3021825456065446,
|
15497 |
+
"grad_norm": 4.788918972015381,
|
15498 |
+
"learning_rate": 0.00016840153372304985,
|
15499 |
+
"loss": 1.246,
|
15500 |
+
"step": 110600
|
15501 |
+
},
|
15502 |
+
{
|
15503 |
+
"epoch": 3.3036753948586273,
|
15504 |
+
"grad_norm": 4.326768398284912,
|
15505 |
+
"learning_rate": 0.00016838724792713104,
|
15506 |
+
"loss": 1.23,
|
15507 |
+
"step": 110650
|
15508 |
+
},
|
15509 |
+
{
|
15510 |
+
"epoch": 3.3051682441107095,
|
15511 |
+
"grad_norm": 4.26240873336792,
|
15512 |
+
"learning_rate": 0.00016837296213121218,
|
15513 |
+
"loss": 1.2487,
|
15514 |
+
"step": 110700
|
15515 |
+
},
|
15516 |
+
{
|
15517 |
+
"epoch": 3.3066610933627922,
|
15518 |
+
"grad_norm": 5.802476406097412,
|
15519 |
+
"learning_rate": 0.00016835867633529334,
|
15520 |
+
"loss": 1.2401,
|
15521 |
+
"step": 110750
|
15522 |
+
},
|
15523 |
+
{
|
15524 |
+
"epoch": 3.308153942614875,
|
15525 |
+
"grad_norm": 3.884272575378418,
|
15526 |
+
"learning_rate": 0.0001683443905393745,
|
15527 |
+
"loss": 1.2199,
|
15528 |
+
"step": 110800
|
15529 |
+
},
|
15530 |
+
{
|
15531 |
+
"epoch": 3.309646791866957,
|
15532 |
+
"grad_norm": 5.074997425079346,
|
15533 |
+
"learning_rate": 0.00016833010474345567,
|
15534 |
+
"loss": 1.2285,
|
15535 |
+
"step": 110850
|
15536 |
+
},
|
15537 |
+
{
|
15538 |
+
"epoch": 3.31113964111904,
|
15539 |
+
"grad_norm": 3.9242136478424072,
|
15540 |
+
"learning_rate": 0.00016831581894753686,
|
15541 |
+
"loss": 1.2276,
|
15542 |
+
"step": 110900
|
15543 |
+
},
|
15544 |
+
{
|
15545 |
+
"epoch": 3.312632490371122,
|
15546 |
+
"grad_norm": 5.645691394805908,
|
15547 |
+
"learning_rate": 0.000168301533151618,
|
15548 |
+
"loss": 1.2031,
|
15549 |
+
"step": 110950
|
15550 |
+
},
|
15551 |
+
{
|
15552 |
+
"epoch": 3.314125339623205,
|
15553 |
+
"grad_norm": 6.003003120422363,
|
15554 |
+
"learning_rate": 0.0001682872473556992,
|
15555 |
+
"loss": 1.2442,
|
15556 |
+
"step": 111000
|
15557 |
+
},
|
15558 |
+
{
|
15559 |
+
"epoch": 3.3156181888752876,
|
15560 |
+
"grad_norm": 4.822206020355225,
|
15561 |
+
"learning_rate": 0.00016827296155978033,
|
15562 |
+
"loss": 1.2398,
|
15563 |
+
"step": 111050
|
15564 |
+
},
|
15565 |
+
{
|
15566 |
+
"epoch": 3.31711103812737,
|
15567 |
+
"grad_norm": 3.79050350189209,
|
15568 |
+
"learning_rate": 0.00016825867576386152,
|
15569 |
+
"loss": 1.2108,
|
15570 |
+
"step": 111100
|
15571 |
+
},
|
15572 |
+
{
|
15573 |
+
"epoch": 3.3186038873794526,
|
15574 |
+
"grad_norm": 5.795800685882568,
|
15575 |
+
"learning_rate": 0.00016824438996794266,
|
15576 |
+
"loss": 1.2537,
|
15577 |
+
"step": 111150
|
15578 |
+
},
|
15579 |
+
{
|
15580 |
+
"epoch": 3.320096736631535,
|
15581 |
+
"grad_norm": 4.554315090179443,
|
15582 |
+
"learning_rate": 0.00016823010417202385,
|
15583 |
+
"loss": 1.2485,
|
15584 |
+
"step": 111200
|
15585 |
+
},
|
15586 |
+
{
|
15587 |
+
"epoch": 3.3215895858836175,
|
15588 |
+
"grad_norm": 3.562445640563965,
|
15589 |
+
"learning_rate": 0.00016821581837610502,
|
15590 |
+
"loss": 1.2802,
|
15591 |
+
"step": 111250
|
15592 |
+
},
|
15593 |
+
{
|
15594 |
+
"epoch": 3.3230824351357002,
|
15595 |
+
"grad_norm": 4.7466278076171875,
|
15596 |
+
"learning_rate": 0.00016820153258018618,
|
15597 |
+
"loss": 1.2108,
|
15598 |
+
"step": 111300
|
15599 |
+
},
|
15600 |
+
{
|
15601 |
+
"epoch": 3.3245752843877825,
|
15602 |
+
"grad_norm": 4.278189659118652,
|
15603 |
+
"learning_rate": 0.00016818724678426735,
|
15604 |
+
"loss": 1.2035,
|
15605 |
+
"step": 111350
|
15606 |
+
},
|
15607 |
+
{
|
15608 |
+
"epoch": 3.326068133639865,
|
15609 |
+
"grad_norm": 3.9288856983184814,
|
15610 |
+
"learning_rate": 0.0001681729609883485,
|
15611 |
+
"loss": 1.2155,
|
15612 |
+
"step": 111400
|
15613 |
+
},
|
15614 |
+
{
|
15615 |
+
"epoch": 3.3275609828919475,
|
15616 |
+
"grad_norm": 5.247483253479004,
|
15617 |
+
"learning_rate": 0.00016815867519242967,
|
15618 |
+
"loss": 1.2523,
|
15619 |
+
"step": 111450
|
15620 |
+
},
|
15621 |
+
{
|
15622 |
+
"epoch": 3.32905383214403,
|
15623 |
+
"grad_norm": 5.946502685546875,
|
15624 |
+
"learning_rate": 0.00016814438939651084,
|
15625 |
+
"loss": 1.2254,
|
15626 |
+
"step": 111500
|
15627 |
+
},
|
15628 |
+
{
|
15629 |
+
"epoch": 3.3305466813961124,
|
15630 |
+
"grad_norm": 4.208354473114014,
|
15631 |
+
"learning_rate": 0.000168130103600592,
|
15632 |
+
"loss": 1.2168,
|
15633 |
+
"step": 111550
|
15634 |
+
},
|
15635 |
+
{
|
15636 |
+
"epoch": 3.332039530648195,
|
15637 |
+
"grad_norm": 6.033560752868652,
|
15638 |
+
"learning_rate": 0.00016811581780467317,
|
15639 |
+
"loss": 1.2876,
|
15640 |
+
"step": 111600
|
15641 |
+
},
|
15642 |
+
{
|
15643 |
+
"epoch": 3.333532379900278,
|
15644 |
+
"grad_norm": 4.848031520843506,
|
15645 |
+
"learning_rate": 0.00016810153200875433,
|
15646 |
+
"loss": 1.2404,
|
15647 |
+
"step": 111650
|
15648 |
+
},
|
15649 |
+
{
|
15650 |
+
"epoch": 3.33502522915236,
|
15651 |
+
"grad_norm": 3.873622179031372,
|
15652 |
+
"learning_rate": 0.00016808724621283553,
|
15653 |
+
"loss": 1.2489,
|
15654 |
+
"step": 111700
|
15655 |
+
},
|
15656 |
+
{
|
15657 |
+
"epoch": 3.336518078404443,
|
15658 |
+
"grad_norm": 4.604708671569824,
|
15659 |
+
"learning_rate": 0.00016807296041691666,
|
15660 |
+
"loss": 1.2255,
|
15661 |
+
"step": 111750
|
15662 |
+
},
|
15663 |
+
{
|
15664 |
+
"epoch": 3.338010927656525,
|
15665 |
+
"grad_norm": 5.416150093078613,
|
15666 |
+
"learning_rate": 0.00016805867462099785,
|
15667 |
+
"loss": 1.2108,
|
15668 |
+
"step": 111800
|
15669 |
+
},
|
15670 |
+
{
|
15671 |
+
"epoch": 3.339503776908608,
|
15672 |
+
"grad_norm": 4.228431224822998,
|
15673 |
+
"learning_rate": 0.000168044388825079,
|
15674 |
+
"loss": 1.2447,
|
15675 |
+
"step": 111850
|
15676 |
+
},
|
15677 |
+
{
|
15678 |
+
"epoch": 3.3409966261606905,
|
15679 |
+
"grad_norm": 4.664206027984619,
|
15680 |
+
"learning_rate": 0.00016803010302916018,
|
15681 |
+
"loss": 1.2296,
|
15682 |
+
"step": 111900
|
15683 |
+
},
|
15684 |
+
{
|
15685 |
+
"epoch": 3.3424894754127727,
|
15686 |
+
"grad_norm": 4.114374160766602,
|
15687 |
+
"learning_rate": 0.00016801581723324132,
|
15688 |
+
"loss": 1.1755,
|
15689 |
+
"step": 111950
|
15690 |
+
},
|
15691 |
+
{
|
15692 |
+
"epoch": 3.3439823246648555,
|
15693 |
+
"grad_norm": 5.543557643890381,
|
15694 |
+
"learning_rate": 0.0001680015314373225,
|
15695 |
+
"loss": 1.2252,
|
15696 |
+
"step": 112000
|
15697 |
+
},
|
15698 |
+
{
|
15699 |
+
"epoch": 3.3454751739169377,
|
15700 |
+
"grad_norm": 4.992297649383545,
|
15701 |
+
"learning_rate": 0.00016798724564140368,
|
15702 |
+
"loss": 1.2215,
|
15703 |
+
"step": 112050
|
15704 |
+
},
|
15705 |
+
{
|
15706 |
+
"epoch": 3.3469680231690204,
|
15707 |
+
"grad_norm": 5.890657901763916,
|
15708 |
+
"learning_rate": 0.00016797295984548484,
|
15709 |
+
"loss": 1.258,
|
15710 |
+
"step": 112100
|
15711 |
+
},
|
15712 |
+
{
|
15713 |
+
"epoch": 3.3484608724211027,
|
15714 |
+
"grad_norm": 8.049378395080566,
|
15715 |
+
"learning_rate": 0.000167958674049566,
|
15716 |
+
"loss": 1.2354,
|
15717 |
+
"step": 112150
|
15718 |
+
},
|
15719 |
+
{
|
15720 |
+
"epoch": 3.3499537216731854,
|
15721 |
+
"grad_norm": 4.3732075691223145,
|
15722 |
+
"learning_rate": 0.00016794438825364717,
|
15723 |
+
"loss": 1.1945,
|
15724 |
+
"step": 112200
|
15725 |
+
},
|
15726 |
+
{
|
15727 |
+
"epoch": 3.351446570925268,
|
15728 |
+
"grad_norm": 6.537196159362793,
|
15729 |
+
"learning_rate": 0.00016793010245772834,
|
15730 |
+
"loss": 1.2857,
|
15731 |
+
"step": 112250
|
15732 |
+
},
|
15733 |
+
{
|
15734 |
+
"epoch": 3.3529394201773504,
|
15735 |
+
"grad_norm": 4.264357089996338,
|
15736 |
+
"learning_rate": 0.0001679158166618095,
|
15737 |
+
"loss": 1.2425,
|
15738 |
+
"step": 112300
|
15739 |
+
},
|
15740 |
+
{
|
15741 |
+
"epoch": 3.354432269429433,
|
15742 |
+
"grad_norm": 5.0003662109375,
|
15743 |
+
"learning_rate": 0.00016790153086589067,
|
15744 |
+
"loss": 1.2781,
|
15745 |
+
"step": 112350
|
15746 |
+
},
|
15747 |
+
{
|
15748 |
+
"epoch": 3.3559251186815153,
|
15749 |
+
"grad_norm": 3.599029064178467,
|
15750 |
+
"learning_rate": 0.00016788724506997183,
|
15751 |
+
"loss": 1.1866,
|
15752 |
+
"step": 112400
|
15753 |
+
},
|
15754 |
+
{
|
15755 |
+
"epoch": 3.357417967933598,
|
15756 |
+
"grad_norm": 5.207790374755859,
|
15757 |
+
"learning_rate": 0.000167872959274053,
|
15758 |
+
"loss": 1.248,
|
15759 |
+
"step": 112450
|
15760 |
+
},
|
15761 |
+
{
|
15762 |
+
"epoch": 3.3589108171856807,
|
15763 |
+
"grad_norm": 4.075089931488037,
|
15764 |
+
"learning_rate": 0.0001678586734781342,
|
15765 |
+
"loss": 1.195,
|
15766 |
+
"step": 112500
|
15767 |
+
},
|
15768 |
+
{
|
15769 |
+
"epoch": 3.360403666437763,
|
15770 |
+
"grad_norm": 5.74088716506958,
|
15771 |
+
"learning_rate": 0.00016784438768221532,
|
15772 |
+
"loss": 1.2343,
|
15773 |
+
"step": 112550
|
15774 |
+
},
|
15775 |
+
{
|
15776 |
+
"epoch": 3.3618965156898457,
|
15777 |
+
"grad_norm": 4.6949262619018555,
|
15778 |
+
"learning_rate": 0.00016783010188629652,
|
15779 |
+
"loss": 1.2295,
|
15780 |
+
"step": 112600
|
15781 |
+
},
|
15782 |
+
{
|
15783 |
+
"epoch": 3.363389364941928,
|
15784 |
+
"grad_norm": 5.432741165161133,
|
15785 |
+
"learning_rate": 0.00016781581609037765,
|
15786 |
+
"loss": 1.3118,
|
15787 |
+
"step": 112650
|
15788 |
+
},
|
15789 |
+
{
|
15790 |
+
"epoch": 3.3648822141940107,
|
15791 |
+
"grad_norm": 4.652493000030518,
|
15792 |
+
"learning_rate": 0.00016780153029445885,
|
15793 |
+
"loss": 1.2686,
|
15794 |
+
"step": 112700
|
15795 |
+
},
|
15796 |
+
{
|
15797 |
+
"epoch": 3.3663750634460934,
|
15798 |
+
"grad_norm": 4.528727054595947,
|
15799 |
+
"learning_rate": 0.00016778724449854,
|
15800 |
+
"loss": 1.2235,
|
15801 |
+
"step": 112750
|
15802 |
+
},
|
15803 |
+
{
|
15804 |
+
"epoch": 3.3678679126981756,
|
15805 |
+
"grad_norm": 4.215224266052246,
|
15806 |
+
"learning_rate": 0.00016777295870262117,
|
15807 |
+
"loss": 1.2993,
|
15808 |
+
"step": 112800
|
15809 |
+
},
|
15810 |
+
{
|
15811 |
+
"epoch": 3.3693607619502584,
|
15812 |
+
"grad_norm": 4.306272506713867,
|
15813 |
+
"learning_rate": 0.00016775867290670234,
|
15814 |
+
"loss": 1.2298,
|
15815 |
+
"step": 112850
|
15816 |
+
},
|
15817 |
+
{
|
15818 |
+
"epoch": 3.3708536112023406,
|
15819 |
+
"grad_norm": 3.477867841720581,
|
15820 |
+
"learning_rate": 0.0001677443871107835,
|
15821 |
+
"loss": 1.2557,
|
15822 |
+
"step": 112900
|
15823 |
+
},
|
15824 |
+
{
|
15825 |
+
"epoch": 3.3723464604544233,
|
15826 |
+
"grad_norm": 4.306337833404541,
|
15827 |
+
"learning_rate": 0.00016773010131486467,
|
15828 |
+
"loss": 1.1917,
|
15829 |
+
"step": 112950
|
15830 |
+
},
|
15831 |
+
{
|
15832 |
+
"epoch": 3.373839309706506,
|
15833 |
+
"grad_norm": 4.982789993286133,
|
15834 |
+
"learning_rate": 0.00016771581551894583,
|
15835 |
+
"loss": 1.2572,
|
15836 |
+
"step": 113000
|
15837 |
+
},
|
15838 |
+
{
|
15839 |
+
"epoch": 3.3753321589585883,
|
15840 |
+
"grad_norm": 4.850037097930908,
|
15841 |
+
"learning_rate": 0.000167701529723027,
|
15842 |
+
"loss": 1.2686,
|
15843 |
+
"step": 113050
|
15844 |
+
},
|
15845 |
+
{
|
15846 |
+
"epoch": 3.376825008210671,
|
15847 |
+
"grad_norm": 4.561743259429932,
|
15848 |
+
"learning_rate": 0.00016768724392710816,
|
15849 |
+
"loss": 1.2696,
|
15850 |
+
"step": 113100
|
15851 |
+
},
|
15852 |
+
{
|
15853 |
+
"epoch": 3.3783178574627533,
|
15854 |
+
"grad_norm": 4.380636692047119,
|
15855 |
+
"learning_rate": 0.00016767295813118933,
|
15856 |
+
"loss": 1.2335,
|
15857 |
+
"step": 113150
|
15858 |
+
},
|
15859 |
+
{
|
15860 |
+
"epoch": 3.379810706714836,
|
15861 |
+
"grad_norm": 5.846193313598633,
|
15862 |
+
"learning_rate": 0.00016765867233527052,
|
15863 |
+
"loss": 1.2353,
|
15864 |
+
"step": 113200
|
15865 |
+
},
|
15866 |
+
{
|
15867 |
+
"epoch": 3.3813035559669187,
|
15868 |
+
"grad_norm": 3.9056570529937744,
|
15869 |
+
"learning_rate": 0.00016764438653935166,
|
15870 |
+
"loss": 1.3195,
|
15871 |
+
"step": 113250
|
15872 |
+
},
|
15873 |
+
{
|
15874 |
+
"epoch": 3.382796405219001,
|
15875 |
+
"grad_norm": 5.165102958679199,
|
15876 |
+
"learning_rate": 0.00016763010074343285,
|
15877 |
+
"loss": 1.2193,
|
15878 |
+
"step": 113300
|
15879 |
+
},
|
15880 |
+
{
|
15881 |
+
"epoch": 3.3842892544710836,
|
15882 |
+
"grad_norm": 4.945791244506836,
|
15883 |
+
"learning_rate": 0.00016761581494751399,
|
15884 |
+
"loss": 1.22,
|
15885 |
+
"step": 113350
|
15886 |
+
},
|
15887 |
+
{
|
15888 |
+
"epoch": 3.385782103723166,
|
15889 |
+
"grad_norm": 6.68800687789917,
|
15890 |
+
"learning_rate": 0.00016760152915159515,
|
15891 |
+
"loss": 1.2546,
|
15892 |
+
"step": 113400
|
15893 |
+
},
|
15894 |
+
{
|
15895 |
+
"epoch": 3.3872749529752486,
|
15896 |
+
"grad_norm": 8.491449356079102,
|
15897 |
+
"learning_rate": 0.00016758724335567632,
|
15898 |
+
"loss": 1.2185,
|
15899 |
+
"step": 113450
|
15900 |
+
},
|
15901 |
+
{
|
15902 |
+
"epoch": 3.3887678022273313,
|
15903 |
+
"grad_norm": 5.642709255218506,
|
15904 |
+
"learning_rate": 0.00016757295755975748,
|
15905 |
+
"loss": 1.2071,
|
15906 |
+
"step": 113500
|
15907 |
+
},
|
15908 |
+
{
|
15909 |
+
"epoch": 3.3902606514794136,
|
15910 |
+
"grad_norm": 4.33923864364624,
|
15911 |
+
"learning_rate": 0.00016755867176383867,
|
15912 |
+
"loss": 1.2902,
|
15913 |
+
"step": 113550
|
15914 |
+
},
|
15915 |
+
{
|
15916 |
+
"epoch": 3.3917535007314963,
|
15917 |
+
"grad_norm": 9.632731437683105,
|
15918 |
+
"learning_rate": 0.0001675443859679198,
|
15919 |
+
"loss": 1.2163,
|
15920 |
+
"step": 113600
|
15921 |
+
},
|
15922 |
+
{
|
15923 |
+
"epoch": 3.3932463499835785,
|
15924 |
+
"grad_norm": 4.608974456787109,
|
15925 |
+
"learning_rate": 0.000167530100172001,
|
15926 |
+
"loss": 1.2247,
|
15927 |
+
"step": 113650
|
15928 |
+
},
|
15929 |
+
{
|
15930 |
+
"epoch": 3.3947391992356613,
|
15931 |
+
"grad_norm": 3.9156320095062256,
|
15932 |
+
"learning_rate": 0.00016751581437608214,
|
15933 |
+
"loss": 1.2184,
|
15934 |
+
"step": 113700
|
15935 |
+
},
|
15936 |
+
{
|
15937 |
+
"epoch": 3.3962320484877435,
|
15938 |
+
"grad_norm": 3.8621175289154053,
|
15939 |
+
"learning_rate": 0.00016750152858016333,
|
15940 |
+
"loss": 1.1579,
|
15941 |
+
"step": 113750
|
15942 |
+
},
|
15943 |
+
{
|
15944 |
+
"epoch": 3.397724897739826,
|
15945 |
+
"grad_norm": 4.917006969451904,
|
15946 |
+
"learning_rate": 0.00016748724278424447,
|
15947 |
+
"loss": 1.2958,
|
15948 |
+
"step": 113800
|
15949 |
+
},
|
15950 |
+
{
|
15951 |
+
"epoch": 3.399217746991909,
|
15952 |
+
"grad_norm": 4.837984561920166,
|
15953 |
+
"learning_rate": 0.00016747295698832566,
|
15954 |
+
"loss": 1.25,
|
15955 |
+
"step": 113850
|
15956 |
+
},
|
15957 |
+
{
|
15958 |
+
"epoch": 3.400710596243991,
|
15959 |
+
"grad_norm": 5.688441753387451,
|
15960 |
+
"learning_rate": 0.00016745867119240682,
|
15961 |
+
"loss": 1.2573,
|
15962 |
+
"step": 113900
|
15963 |
+
},
|
15964 |
+
{
|
15965 |
+
"epoch": 3.402203445496074,
|
15966 |
+
"grad_norm": 5.892030239105225,
|
15967 |
+
"learning_rate": 0.000167444385396488,
|
15968 |
+
"loss": 1.2146,
|
15969 |
+
"step": 113950
|
15970 |
+
},
|
15971 |
+
{
|
15972 |
+
"epoch": 3.403696294748156,
|
15973 |
+
"grad_norm": 5.0287652015686035,
|
15974 |
+
"learning_rate": 0.00016743009960056915,
|
15975 |
+
"loss": 1.2544,
|
15976 |
+
"step": 114000
|
15977 |
+
},
|
15978 |
+
{
|
15979 |
+
"epoch": 3.405189144000239,
|
15980 |
+
"grad_norm": 3.9118645191192627,
|
15981 |
+
"learning_rate": 0.00016741581380465032,
|
15982 |
+
"loss": 1.2881,
|
15983 |
+
"step": 114050
|
15984 |
+
},
|
15985 |
+
{
|
15986 |
+
"epoch": 3.4066819932523216,
|
15987 |
+
"grad_norm": 4.481536865234375,
|
15988 |
+
"learning_rate": 0.00016740152800873148,
|
15989 |
+
"loss": 1.2773,
|
15990 |
+
"step": 114100
|
15991 |
+
},
|
15992 |
+
{
|
15993 |
+
"epoch": 3.408174842504404,
|
15994 |
+
"grad_norm": 4.5398125648498535,
|
15995 |
+
"learning_rate": 0.00016738724221281265,
|
15996 |
+
"loss": 1.2502,
|
15997 |
+
"step": 114150
|
15998 |
+
},
|
15999 |
+
{
|
16000 |
+
"epoch": 3.4096676917564865,
|
16001 |
+
"grad_norm": 4.823166847229004,
|
16002 |
+
"learning_rate": 0.0001673729564168938,
|
16003 |
+
"loss": 1.2654,
|
16004 |
+
"step": 114200
|
16005 |
+
},
|
16006 |
+
{
|
16007 |
+
"epoch": 3.411160541008569,
|
16008 |
+
"grad_norm": 6.366767406463623,
|
16009 |
+
"learning_rate": 0.00016735867062097498,
|
16010 |
+
"loss": 1.2552,
|
16011 |
+
"step": 114250
|
16012 |
+
},
|
16013 |
+
{
|
16014 |
+
"epoch": 3.4126533902606515,
|
16015 |
+
"grad_norm": 4.806399822235107,
|
16016 |
+
"learning_rate": 0.00016734438482505614,
|
16017 |
+
"loss": 1.1956,
|
16018 |
+
"step": 114300
|
16019 |
+
},
|
16020 |
+
{
|
16021 |
+
"epoch": 3.4141462395127338,
|
16022 |
+
"grad_norm": 7.901057720184326,
|
16023 |
+
"learning_rate": 0.00016733009902913733,
|
16024 |
+
"loss": 1.2192,
|
16025 |
+
"step": 114350
|
16026 |
+
},
|
16027 |
+
{
|
16028 |
+
"epoch": 3.4156390887648165,
|
16029 |
+
"grad_norm": 4.8132500648498535,
|
16030 |
+
"learning_rate": 0.00016731581323321847,
|
16031 |
+
"loss": 1.2719,
|
16032 |
+
"step": 114400
|
16033 |
+
},
|
16034 |
+
{
|
16035 |
+
"epoch": 3.417131938016899,
|
16036 |
+
"grad_norm": 4.0474629402160645,
|
16037 |
+
"learning_rate": 0.00016730152743729966,
|
16038 |
+
"loss": 1.2212,
|
16039 |
+
"step": 114450
|
16040 |
+
},
|
16041 |
+
{
|
16042 |
+
"epoch": 3.4186247872689814,
|
16043 |
+
"grad_norm": 5.218587875366211,
|
16044 |
+
"learning_rate": 0.0001672872416413808,
|
16045 |
+
"loss": 1.2162,
|
16046 |
+
"step": 114500
|
16047 |
+
},
|
16048 |
+
{
|
16049 |
+
"epoch": 3.420117636521064,
|
16050 |
+
"grad_norm": 4.62558650970459,
|
16051 |
+
"learning_rate": 0.000167272955845462,
|
16052 |
+
"loss": 1.2715,
|
16053 |
+
"step": 114550
|
16054 |
+
},
|
16055 |
+
{
|
16056 |
+
"epoch": 3.4216104857731464,
|
16057 |
+
"grad_norm": 5.178062915802002,
|
16058 |
+
"learning_rate": 0.00016725867004954313,
|
16059 |
+
"loss": 1.2799,
|
16060 |
+
"step": 114600
|
16061 |
+
},
|
16062 |
+
{
|
16063 |
+
"epoch": 3.423103335025229,
|
16064 |
+
"grad_norm": 4.345101833343506,
|
16065 |
+
"learning_rate": 0.00016724438425362432,
|
16066 |
+
"loss": 1.2453,
|
16067 |
+
"step": 114650
|
16068 |
+
},
|
16069 |
+
{
|
16070 |
+
"epoch": 3.424596184277312,
|
16071 |
+
"grad_norm": 4.486167907714844,
|
16072 |
+
"learning_rate": 0.00016723009845770549,
|
16073 |
+
"loss": 1.2591,
|
16074 |
+
"step": 114700
|
16075 |
+
},
|
16076 |
+
{
|
16077 |
+
"epoch": 3.426089033529394,
|
16078 |
+
"grad_norm": 4.727272033691406,
|
16079 |
+
"learning_rate": 0.00016721581266178665,
|
16080 |
+
"loss": 1.3124,
|
16081 |
+
"step": 114750
|
16082 |
+
},
|
16083 |
+
{
|
16084 |
+
"epoch": 3.427581882781477,
|
16085 |
+
"grad_norm": 3.8477623462677,
|
16086 |
+
"learning_rate": 0.00016720152686586782,
|
16087 |
+
"loss": 1.2389,
|
16088 |
+
"step": 114800
|
16089 |
+
},
|
16090 |
+
{
|
16091 |
+
"epoch": 3.429074732033559,
|
16092 |
+
"grad_norm": 3.8839187622070312,
|
16093 |
+
"learning_rate": 0.00016718724106994898,
|
16094 |
+
"loss": 1.2635,
|
16095 |
+
"step": 114850
|
16096 |
+
},
|
16097 |
+
{
|
16098 |
+
"epoch": 3.4305675812856418,
|
16099 |
+
"grad_norm": 4.389588356018066,
|
16100 |
+
"learning_rate": 0.00016717295527403014,
|
16101 |
+
"loss": 1.3258,
|
16102 |
+
"step": 114900
|
16103 |
+
},
|
16104 |
+
{
|
16105 |
+
"epoch": 3.4320604305377245,
|
16106 |
+
"grad_norm": 4.100683689117432,
|
16107 |
+
"learning_rate": 0.0001671586694781113,
|
16108 |
+
"loss": 1.1715,
|
16109 |
+
"step": 114950
|
16110 |
+
},
|
16111 |
+
{
|
16112 |
+
"epoch": 3.4335532797898067,
|
16113 |
+
"grad_norm": 4.422749042510986,
|
16114 |
+
"learning_rate": 0.00016714438368219247,
|
16115 |
+
"loss": 1.2415,
|
16116 |
+
"step": 115000
|
16117 |
+
},
|
16118 |
+
{
|
16119 |
+
"epoch": 3.4350461290418894,
|
16120 |
+
"grad_norm": 5.368492126464844,
|
16121 |
+
"learning_rate": 0.00016713009788627364,
|
16122 |
+
"loss": 1.2644,
|
16123 |
+
"step": 115050
|
16124 |
+
},
|
16125 |
+
{
|
16126 |
+
"epoch": 3.4365389782939717,
|
16127 |
+
"grad_norm": 4.524289608001709,
|
16128 |
+
"learning_rate": 0.0001671158120903548,
|
16129 |
+
"loss": 1.2413,
|
16130 |
+
"step": 115100
|
16131 |
+
},
|
16132 |
+
{
|
16133 |
+
"epoch": 3.4380318275460544,
|
16134 |
+
"grad_norm": 6.099424362182617,
|
16135 |
+
"learning_rate": 0.000167101526294436,
|
16136 |
+
"loss": 1.2915,
|
16137 |
+
"step": 115150
|
16138 |
+
},
|
16139 |
+
{
|
16140 |
+
"epoch": 3.439524676798137,
|
16141 |
+
"grad_norm": 4.018702507019043,
|
16142 |
+
"learning_rate": 0.00016708724049851713,
|
16143 |
+
"loss": 1.2868,
|
16144 |
+
"step": 115200
|
16145 |
+
},
|
16146 |
+
{
|
16147 |
+
"epoch": 3.4410175260502194,
|
16148 |
+
"grad_norm": 4.0083112716674805,
|
16149 |
+
"learning_rate": 0.00016707295470259832,
|
16150 |
+
"loss": 1.1582,
|
16151 |
+
"step": 115250
|
16152 |
+
},
|
16153 |
+
{
|
16154 |
+
"epoch": 3.442510375302302,
|
16155 |
+
"grad_norm": 4.636148452758789,
|
16156 |
+
"learning_rate": 0.00016705866890667946,
|
16157 |
+
"loss": 1.2637,
|
16158 |
+
"step": 115300
|
16159 |
+
},
|
16160 |
+
{
|
16161 |
+
"epoch": 3.4440032245543843,
|
16162 |
+
"grad_norm": 4.421489238739014,
|
16163 |
+
"learning_rate": 0.00016704438311076065,
|
16164 |
+
"loss": 1.2649,
|
16165 |
+
"step": 115350
|
16166 |
+
},
|
16167 |
+
{
|
16168 |
+
"epoch": 3.445496073806467,
|
16169 |
+
"grad_norm": 5.654540538787842,
|
16170 |
+
"learning_rate": 0.00016703009731484182,
|
16171 |
+
"loss": 1.3091,
|
16172 |
+
"step": 115400
|
16173 |
+
},
|
16174 |
+
{
|
16175 |
+
"epoch": 3.4469889230585498,
|
16176 |
+
"grad_norm": 4.134968280792236,
|
16177 |
+
"learning_rate": 0.00016701581151892298,
|
16178 |
+
"loss": 1.2496,
|
16179 |
+
"step": 115450
|
16180 |
+
},
|
16181 |
+
{
|
16182 |
+
"epoch": 3.448481772310632,
|
16183 |
+
"grad_norm": 4.880650043487549,
|
16184 |
+
"learning_rate": 0.00016700152572300415,
|
16185 |
+
"loss": 1.2744,
|
16186 |
+
"step": 115500
|
16187 |
+
},
|
16188 |
+
{
|
16189 |
+
"epoch": 3.4499746215627147,
|
16190 |
+
"grad_norm": 6.422858238220215,
|
16191 |
+
"learning_rate": 0.0001669872399270853,
|
16192 |
+
"loss": 1.1798,
|
16193 |
+
"step": 115550
|
16194 |
+
},
|
16195 |
+
{
|
16196 |
+
"epoch": 3.451467470814797,
|
16197 |
+
"grad_norm": 5.5306315422058105,
|
16198 |
+
"learning_rate": 0.00016697295413116648,
|
16199 |
+
"loss": 1.2321,
|
16200 |
+
"step": 115600
|
16201 |
+
},
|
16202 |
+
{
|
16203 |
+
"epoch": 3.4529603200668797,
|
16204 |
+
"grad_norm": 3.598203659057617,
|
16205 |
+
"learning_rate": 0.00016695866833524764,
|
16206 |
+
"loss": 1.2494,
|
16207 |
+
"step": 115650
|
16208 |
+
},
|
16209 |
+
{
|
16210 |
+
"epoch": 3.4544531693189624,
|
16211 |
+
"grad_norm": 4.61868143081665,
|
16212 |
+
"learning_rate": 0.0001669443825393288,
|
16213 |
+
"loss": 1.2525,
|
16214 |
+
"step": 115700
|
16215 |
+
},
|
16216 |
+
{
|
16217 |
+
"epoch": 3.4559460185710447,
|
16218 |
+
"grad_norm": 3.731189727783203,
|
16219 |
+
"learning_rate": 0.00016693009674340997,
|
16220 |
+
"loss": 1.2407,
|
16221 |
+
"step": 115750
|
16222 |
+
},
|
16223 |
+
{
|
16224 |
+
"epoch": 3.4574388678231274,
|
16225 |
+
"grad_norm": 5.61367654800415,
|
16226 |
+
"learning_rate": 0.00016691581094749114,
|
16227 |
+
"loss": 1.176,
|
16228 |
+
"step": 115800
|
16229 |
+
},
|
16230 |
+
{
|
16231 |
+
"epoch": 3.4589317170752096,
|
16232 |
+
"grad_norm": 4.095551490783691,
|
16233 |
+
"learning_rate": 0.0001669015251515723,
|
16234 |
+
"loss": 1.2203,
|
16235 |
+
"step": 115850
|
16236 |
+
},
|
16237 |
+
{
|
16238 |
+
"epoch": 3.4604245663272923,
|
16239 |
+
"grad_norm": 5.298232555389404,
|
16240 |
+
"learning_rate": 0.00016688723935565346,
|
16241 |
+
"loss": 1.2516,
|
16242 |
+
"step": 115900
|
16243 |
+
},
|
16244 |
+
{
|
16245 |
+
"epoch": 3.4619174155793746,
|
16246 |
+
"grad_norm": 5.313197135925293,
|
16247 |
+
"learning_rate": 0.00016687295355973466,
|
16248 |
+
"loss": 1.2375,
|
16249 |
+
"step": 115950
|
16250 |
+
},
|
16251 |
+
{
|
16252 |
+
"epoch": 3.4634102648314573,
|
16253 |
+
"grad_norm": 5.016303062438965,
|
16254 |
+
"learning_rate": 0.0001668586677638158,
|
16255 |
+
"loss": 1.2515,
|
16256 |
+
"step": 116000
|
16257 |
+
},
|
16258 |
+
{
|
16259 |
+
"epoch": 3.46490311408354,
|
16260 |
+
"grad_norm": 4.23463249206543,
|
16261 |
+
"learning_rate": 0.00016684438196789696,
|
16262 |
+
"loss": 1.2218,
|
16263 |
+
"step": 116050
|
16264 |
+
},
|
16265 |
+
{
|
16266 |
+
"epoch": 3.4663959633356223,
|
16267 |
+
"grad_norm": 4.841723918914795,
|
16268 |
+
"learning_rate": 0.00016683009617197812,
|
16269 |
+
"loss": 1.2503,
|
16270 |
+
"step": 116100
|
16271 |
+
},
|
16272 |
+
{
|
16273 |
+
"epoch": 3.467888812587705,
|
16274 |
+
"grad_norm": 4.741985321044922,
|
16275 |
+
"learning_rate": 0.0001668158103760593,
|
16276 |
+
"loss": 1.257,
|
16277 |
+
"step": 116150
|
16278 |
+
},
|
16279 |
+
{
|
16280 |
+
"epoch": 3.4693816618397872,
|
16281 |
+
"grad_norm": 5.977380275726318,
|
16282 |
+
"learning_rate": 0.00016680152458014048,
|
16283 |
+
"loss": 1.2774,
|
16284 |
+
"step": 116200
|
16285 |
+
},
|
16286 |
+
{
|
16287 |
+
"epoch": 3.47087451109187,
|
16288 |
+
"grad_norm": 4.758930683135986,
|
16289 |
+
"learning_rate": 0.00016678723878422162,
|
16290 |
+
"loss": 1.2718,
|
16291 |
+
"step": 116250
|
16292 |
+
},
|
16293 |
+
{
|
16294 |
+
"epoch": 3.4723673603439527,
|
16295 |
+
"grad_norm": 3.923983097076416,
|
16296 |
+
"learning_rate": 0.0001667729529883028,
|
16297 |
+
"loss": 1.2274,
|
16298 |
+
"step": 116300
|
16299 |
+
},
|
16300 |
+
{
|
16301 |
+
"epoch": 3.473860209596035,
|
16302 |
+
"grad_norm": 4.535523891448975,
|
16303 |
+
"learning_rate": 0.00016675866719238395,
|
16304 |
+
"loss": 1.2749,
|
16305 |
+
"step": 116350
|
16306 |
+
},
|
16307 |
+
{
|
16308 |
+
"epoch": 3.4753530588481176,
|
16309 |
+
"grad_norm": 3.575572967529297,
|
16310 |
+
"learning_rate": 0.00016674438139646514,
|
16311 |
+
"loss": 1.2378,
|
16312 |
+
"step": 116400
|
16313 |
+
},
|
16314 |
+
{
|
16315 |
+
"epoch": 3.4768459081002,
|
16316 |
+
"grad_norm": 4.385488510131836,
|
16317 |
+
"learning_rate": 0.00016673009560054628,
|
16318 |
+
"loss": 1.278,
|
16319 |
+
"step": 116450
|
16320 |
+
},
|
16321 |
+
{
|
16322 |
+
"epoch": 3.4783387573522826,
|
16323 |
+
"grad_norm": 3.994697332382202,
|
16324 |
+
"learning_rate": 0.00016671580980462747,
|
16325 |
+
"loss": 1.2324,
|
16326 |
+
"step": 116500
|
16327 |
+
},
|
16328 |
+
{
|
16329 |
+
"epoch": 3.479831606604365,
|
16330 |
+
"grad_norm": 4.184852600097656,
|
16331 |
+
"learning_rate": 0.00016670152400870863,
|
16332 |
+
"loss": 1.2528,
|
16333 |
+
"step": 116550
|
16334 |
+
},
|
16335 |
+
{
|
16336 |
+
"epoch": 3.4813244558564476,
|
16337 |
+
"grad_norm": 4.509354591369629,
|
16338 |
+
"learning_rate": 0.0001666872382127898,
|
16339 |
+
"loss": 1.2764,
|
16340 |
+
"step": 116600
|
16341 |
+
},
|
16342 |
+
{
|
16343 |
+
"epoch": 3.4828173051085303,
|
16344 |
+
"grad_norm": 4.86820125579834,
|
16345 |
+
"learning_rate": 0.00016667295241687096,
|
16346 |
+
"loss": 1.2332,
|
16347 |
+
"step": 116650
|
16348 |
+
},
|
16349 |
+
{
|
16350 |
+
"epoch": 3.4843101543606125,
|
16351 |
+
"grad_norm": 5.312245845794678,
|
16352 |
+
"learning_rate": 0.00016665866662095213,
|
16353 |
+
"loss": 1.2443,
|
16354 |
+
"step": 116700
|
16355 |
+
},
|
16356 |
+
{
|
16357 |
+
"epoch": 3.4858030036126952,
|
16358 |
+
"grad_norm": 4.096804618835449,
|
16359 |
+
"learning_rate": 0.0001666443808250333,
|
16360 |
+
"loss": 1.2352,
|
16361 |
+
"step": 116750
|
16362 |
+
},
|
16363 |
+
{
|
16364 |
+
"epoch": 3.4872958528647775,
|
16365 |
+
"grad_norm": 4.2911200523376465,
|
16366 |
+
"learning_rate": 0.00016663009502911446,
|
16367 |
+
"loss": 1.2366,
|
16368 |
+
"step": 116800
|
16369 |
+
},
|
16370 |
+
{
|
16371 |
+
"epoch": 3.48878870211686,
|
16372 |
+
"grad_norm": 5.922074794769287,
|
16373 |
+
"learning_rate": 0.00016661580923319562,
|
16374 |
+
"loss": 1.28,
|
16375 |
+
"step": 116850
|
16376 |
+
},
|
16377 |
+
{
|
16378 |
+
"epoch": 3.490281551368943,
|
16379 |
+
"grad_norm": 4.1473612785339355,
|
16380 |
+
"learning_rate": 0.00016660152343727678,
|
16381 |
+
"loss": 1.1911,
|
16382 |
+
"step": 116900
|
16383 |
+
},
|
16384 |
+
{
|
16385 |
+
"epoch": 3.491774400621025,
|
16386 |
+
"grad_norm": 4.428061485290527,
|
16387 |
+
"learning_rate": 0.00016658723764135795,
|
16388 |
+
"loss": 1.2649,
|
16389 |
+
"step": 116950
|
16390 |
+
},
|
16391 |
+
{
|
16392 |
+
"epoch": 3.493267249873108,
|
16393 |
+
"grad_norm": 4.338399887084961,
|
16394 |
+
"learning_rate": 0.00016657295184543914,
|
16395 |
+
"loss": 1.2596,
|
16396 |
+
"step": 117000
|
16397 |
+
},
|
16398 |
+
{
|
16399 |
+
"epoch": 3.49476009912519,
|
16400 |
+
"grad_norm": 5.209356307983398,
|
16401 |
+
"learning_rate": 0.00016655866604952028,
|
16402 |
+
"loss": 1.2898,
|
16403 |
+
"step": 117050
|
16404 |
+
},
|
16405 |
+
{
|
16406 |
+
"epoch": 3.496252948377273,
|
16407 |
+
"grad_norm": 4.742622375488281,
|
16408 |
+
"learning_rate": 0.00016654438025360147,
|
16409 |
+
"loss": 1.2243,
|
16410 |
+
"step": 117100
|
16411 |
+
},
|
16412 |
+
{
|
16413 |
+
"epoch": 3.4977457976293556,
|
16414 |
+
"grad_norm": 5.037703037261963,
|
16415 |
+
"learning_rate": 0.0001665300944576826,
|
16416 |
+
"loss": 1.2425,
|
16417 |
+
"step": 117150
|
16418 |
+
},
|
16419 |
+
{
|
16420 |
+
"epoch": 3.499238646881438,
|
16421 |
+
"grad_norm": 4.114877700805664,
|
16422 |
+
"learning_rate": 0.0001665158086617638,
|
16423 |
+
"loss": 1.2498,
|
16424 |
+
"step": 117200
|
16425 |
+
},
|
16426 |
+
{
|
16427 |
+
"epoch": 3.5007314961335205,
|
16428 |
+
"grad_norm": 3.778136968612671,
|
16429 |
+
"learning_rate": 0.00016650152286584494,
|
16430 |
+
"loss": 1.2793,
|
16431 |
+
"step": 117250
|
16432 |
+
},
|
16433 |
+
{
|
16434 |
+
"epoch": 3.502224345385603,
|
16435 |
+
"grad_norm": 3.3782308101654053,
|
16436 |
+
"learning_rate": 0.00016648723706992613,
|
16437 |
+
"loss": 1.234,
|
16438 |
+
"step": 117300
|
16439 |
+
},
|
16440 |
+
{
|
16441 |
+
"epoch": 3.5037171946376855,
|
16442 |
+
"grad_norm": 3.4435694217681885,
|
16443 |
+
"learning_rate": 0.0001664729512740073,
|
16444 |
+
"loss": 1.2218,
|
16445 |
+
"step": 117350
|
16446 |
+
},
|
16447 |
+
{
|
16448 |
+
"epoch": 3.505210043889768,
|
16449 |
+
"grad_norm": 6.202013969421387,
|
16450 |
+
"learning_rate": 0.00016645866547808846,
|
16451 |
+
"loss": 1.2541,
|
16452 |
+
"step": 117400
|
16453 |
+
},
|
16454 |
+
{
|
16455 |
+
"epoch": 3.5067028931418505,
|
16456 |
+
"grad_norm": 4.274670600891113,
|
16457 |
+
"learning_rate": 0.00016644437968216962,
|
16458 |
+
"loss": 1.257,
|
16459 |
+
"step": 117450
|
16460 |
+
},
|
16461 |
+
{
|
16462 |
+
"epoch": 3.508195742393933,
|
16463 |
+
"grad_norm": 4.702452182769775,
|
16464 |
+
"learning_rate": 0.0001664300938862508,
|
16465 |
+
"loss": 1.2419,
|
16466 |
+
"step": 117500
|
16467 |
+
},
|
16468 |
+
{
|
16469 |
+
"epoch": 3.5096885916460154,
|
16470 |
+
"grad_norm": 2.897954225540161,
|
16471 |
+
"learning_rate": 0.00016641580809033195,
|
16472 |
+
"loss": 1.2716,
|
16473 |
+
"step": 117550
|
16474 |
+
},
|
16475 |
+
{
|
16476 |
+
"epoch": 3.511181440898098,
|
16477 |
+
"grad_norm": 4.723432540893555,
|
16478 |
+
"learning_rate": 0.00016640152229441312,
|
16479 |
+
"loss": 1.2357,
|
16480 |
+
"step": 117600
|
16481 |
+
},
|
16482 |
+
{
|
16483 |
+
"epoch": 3.512674290150181,
|
16484 |
+
"grad_norm": 4.234017848968506,
|
16485 |
+
"learning_rate": 0.00016638723649849428,
|
16486 |
+
"loss": 1.239,
|
16487 |
+
"step": 117650
|
16488 |
+
},
|
16489 |
+
{
|
16490 |
+
"epoch": 3.514167139402263,
|
16491 |
+
"grad_norm": 6.463681221008301,
|
16492 |
+
"learning_rate": 0.00016637295070257545,
|
16493 |
+
"loss": 1.2579,
|
16494 |
+
"step": 117700
|
16495 |
+
},
|
16496 |
+
{
|
16497 |
+
"epoch": 3.515659988654346,
|
16498 |
+
"grad_norm": 5.568453311920166,
|
16499 |
+
"learning_rate": 0.0001663586649066566,
|
16500 |
+
"loss": 1.2486,
|
16501 |
+
"step": 117750
|
16502 |
+
},
|
16503 |
+
{
|
16504 |
+
"epoch": 3.517152837906428,
|
16505 |
+
"grad_norm": 4.636147975921631,
|
16506 |
+
"learning_rate": 0.0001663443791107378,
|
16507 |
+
"loss": 1.2411,
|
16508 |
+
"step": 117800
|
16509 |
+
},
|
16510 |
+
{
|
16511 |
+
"epoch": 3.518645687158511,
|
16512 |
+
"grad_norm": 5.019303798675537,
|
16513 |
+
"learning_rate": 0.00016633009331481894,
|
16514 |
+
"loss": 1.2608,
|
16515 |
+
"step": 117850
|
16516 |
+
},
|
16517 |
+
{
|
16518 |
+
"epoch": 3.5201385364105935,
|
16519 |
+
"grad_norm": 5.414350986480713,
|
16520 |
+
"learning_rate": 0.00016631580751890013,
|
16521 |
+
"loss": 1.2642,
|
16522 |
+
"step": 117900
|
16523 |
+
},
|
16524 |
+
{
|
16525 |
+
"epoch": 3.5216313856626758,
|
16526 |
+
"grad_norm": 3.891533136367798,
|
16527 |
+
"learning_rate": 0.00016630152172298127,
|
16528 |
+
"loss": 1.2433,
|
16529 |
+
"step": 117950
|
16530 |
+
},
|
16531 |
+
{
|
16532 |
+
"epoch": 3.5231242349147585,
|
16533 |
+
"grad_norm": 3.9345791339874268,
|
16534 |
+
"learning_rate": 0.00016628723592706246,
|
16535 |
+
"loss": 1.2654,
|
16536 |
+
"step": 118000
|
16537 |
+
},
|
16538 |
+
{
|
16539 |
+
"epoch": 3.5246170841668407,
|
16540 |
+
"grad_norm": 3.8173060417175293,
|
16541 |
+
"learning_rate": 0.0001662729501311436,
|
16542 |
+
"loss": 1.2531,
|
16543 |
+
"step": 118050
|
16544 |
+
},
|
16545 |
+
{
|
16546 |
+
"epoch": 3.5261099334189234,
|
16547 |
+
"grad_norm": 4.534266471862793,
|
16548 |
+
"learning_rate": 0.0001662586643352248,
|
16549 |
+
"loss": 1.2924,
|
16550 |
+
"step": 118100
|
16551 |
+
},
|
16552 |
+
{
|
16553 |
+
"epoch": 3.527602782671006,
|
16554 |
+
"grad_norm": 5.261129856109619,
|
16555 |
+
"learning_rate": 0.00016624437853930596,
|
16556 |
+
"loss": 1.2433,
|
16557 |
+
"step": 118150
|
16558 |
+
},
|
16559 |
+
{
|
16560 |
+
"epoch": 3.5290956319230884,
|
16561 |
+
"grad_norm": 4.014307022094727,
|
16562 |
+
"learning_rate": 0.00016623009274338712,
|
16563 |
+
"loss": 1.2501,
|
16564 |
+
"step": 118200
|
16565 |
+
},
|
16566 |
+
{
|
16567 |
+
"epoch": 3.5305884811751707,
|
16568 |
+
"grad_norm": 4.939244270324707,
|
16569 |
+
"learning_rate": 0.00016621580694746828,
|
16570 |
+
"loss": 1.2156,
|
16571 |
+
"step": 118250
|
16572 |
+
},
|
16573 |
+
{
|
16574 |
+
"epoch": 3.5320813304272534,
|
16575 |
+
"grad_norm": 7.338710308074951,
|
16576 |
+
"learning_rate": 0.00016620152115154945,
|
16577 |
+
"loss": 1.2571,
|
16578 |
+
"step": 118300
|
16579 |
+
},
|
16580 |
+
{
|
16581 |
+
"epoch": 3.533574179679336,
|
16582 |
+
"grad_norm": 4.462497711181641,
|
16583 |
+
"learning_rate": 0.00016618723535563061,
|
16584 |
+
"loss": 1.2532,
|
16585 |
+
"step": 118350
|
16586 |
+
},
|
16587 |
+
{
|
16588 |
+
"epoch": 3.5350670289314188,
|
16589 |
+
"grad_norm": 3.8068175315856934,
|
16590 |
+
"learning_rate": 0.00016617294955971178,
|
16591 |
+
"loss": 1.2707,
|
16592 |
+
"step": 118400
|
16593 |
+
},
|
16594 |
+
{
|
16595 |
+
"epoch": 3.536559878183501,
|
16596 |
+
"grad_norm": 4.011793613433838,
|
16597 |
+
"learning_rate": 0.00016615866376379294,
|
16598 |
+
"loss": 1.2012,
|
16599 |
+
"step": 118450
|
16600 |
+
},
|
16601 |
+
{
|
16602 |
+
"epoch": 3.5380527274355833,
|
16603 |
+
"grad_norm": 4.955504894256592,
|
16604 |
+
"learning_rate": 0.0001661443779678741,
|
16605 |
+
"loss": 1.2312,
|
16606 |
+
"step": 118500
|
16607 |
+
},
|
16608 |
+
{
|
16609 |
+
"epoch": 3.539545576687666,
|
16610 |
+
"grad_norm": 5.355716228485107,
|
16611 |
+
"learning_rate": 0.00016613009217195527,
|
16612 |
+
"loss": 1.2677,
|
16613 |
+
"step": 118550
|
16614 |
+
},
|
16615 |
+
{
|
16616 |
+
"epoch": 3.5410384259397487,
|
16617 |
+
"grad_norm": 3.9078450202941895,
|
16618 |
+
"learning_rate": 0.00016611580637603644,
|
16619 |
+
"loss": 1.2216,
|
16620 |
+
"step": 118600
|
16621 |
+
},
|
16622 |
+
{
|
16623 |
+
"epoch": 3.542531275191831,
|
16624 |
+
"grad_norm": 4.30654764175415,
|
16625 |
+
"learning_rate": 0.0001661015205801176,
|
16626 |
+
"loss": 1.2864,
|
16627 |
+
"step": 118650
|
16628 |
+
},
|
16629 |
+
{
|
16630 |
+
"epoch": 3.5440241244439137,
|
16631 |
+
"grad_norm": 5.134391784667969,
|
16632 |
+
"learning_rate": 0.00016608723478419877,
|
16633 |
+
"loss": 1.2577,
|
16634 |
+
"step": 118700
|
16635 |
+
},
|
16636 |
+
{
|
16637 |
+
"epoch": 3.545516973695996,
|
16638 |
+
"grad_norm": 3.4655020236968994,
|
16639 |
+
"learning_rate": 0.00016607294898827993,
|
16640 |
+
"loss": 1.2427,
|
16641 |
+
"step": 118750
|
16642 |
+
},
|
16643 |
+
{
|
16644 |
+
"epoch": 3.5470098229480787,
|
16645 |
+
"grad_norm": 4.221212863922119,
|
16646 |
+
"learning_rate": 0.0001660586631923611,
|
16647 |
+
"loss": 1.243,
|
16648 |
+
"step": 118800
|
16649 |
+
},
|
16650 |
+
{
|
16651 |
+
"epoch": 3.5485026722001614,
|
16652 |
+
"grad_norm": 5.412088394165039,
|
16653 |
+
"learning_rate": 0.0001660443773964423,
|
16654 |
+
"loss": 1.2377,
|
16655 |
+
"step": 118850
|
16656 |
+
},
|
16657 |
+
{
|
16658 |
+
"epoch": 3.5499955214522436,
|
16659 |
+
"grad_norm": 5.42820930480957,
|
16660 |
+
"learning_rate": 0.00016603009160052343,
|
16661 |
+
"loss": 1.245,
|
16662 |
+
"step": 118900
|
16663 |
+
},
|
16664 |
+
{
|
16665 |
+
"epoch": 3.5514883707043263,
|
16666 |
+
"grad_norm": 4.585496425628662,
|
16667 |
+
"learning_rate": 0.00016601580580460462,
|
16668 |
+
"loss": 1.2885,
|
16669 |
+
"step": 118950
|
16670 |
+
},
|
16671 |
+
{
|
16672 |
+
"epoch": 3.5529812199564086,
|
16673 |
+
"grad_norm": 4.376836776733398,
|
16674 |
+
"learning_rate": 0.00016600152000868575,
|
16675 |
+
"loss": 1.2915,
|
16676 |
+
"step": 119000
|
16677 |
+
},
|
16678 |
+
{
|
16679 |
+
"epoch": 3.5544740692084913,
|
16680 |
+
"grad_norm": 5.338039875030518,
|
16681 |
+
"learning_rate": 0.00016598723421276695,
|
16682 |
+
"loss": 1.1785,
|
16683 |
+
"step": 119050
|
16684 |
+
},
|
16685 |
+
{
|
16686 |
+
"epoch": 3.555966918460574,
|
16687 |
+
"grad_norm": 4.422656059265137,
|
16688 |
+
"learning_rate": 0.00016597294841684808,
|
16689 |
+
"loss": 1.235,
|
16690 |
+
"step": 119100
|
16691 |
+
},
|
16692 |
+
{
|
16693 |
+
"epoch": 3.5574597677126563,
|
16694 |
+
"grad_norm": 5.377553939819336,
|
16695 |
+
"learning_rate": 0.00016595866262092928,
|
16696 |
+
"loss": 1.2796,
|
16697 |
+
"step": 119150
|
16698 |
+
},
|
16699 |
+
{
|
16700 |
+
"epoch": 3.558952616964739,
|
16701 |
+
"grad_norm": 4.3863959312438965,
|
16702 |
+
"learning_rate": 0.00016594437682501044,
|
16703 |
+
"loss": 1.2497,
|
16704 |
+
"step": 119200
|
16705 |
+
},
|
16706 |
+
{
|
16707 |
+
"epoch": 3.5604454662168212,
|
16708 |
+
"grad_norm": 5.522170066833496,
|
16709 |
+
"learning_rate": 0.0001659300910290916,
|
16710 |
+
"loss": 1.2899,
|
16711 |
+
"step": 119250
|
16712 |
+
},
|
16713 |
+
{
|
16714 |
+
"epoch": 3.561938315468904,
|
16715 |
+
"grad_norm": 3.6835429668426514,
|
16716 |
+
"learning_rate": 0.00016591580523317277,
|
16717 |
+
"loss": 1.2272,
|
16718 |
+
"step": 119300
|
16719 |
+
},
|
16720 |
+
{
|
16721 |
+
"epoch": 3.5634311647209866,
|
16722 |
+
"grad_norm": 3.4317328929901123,
|
16723 |
+
"learning_rate": 0.00016590151943725393,
|
16724 |
+
"loss": 1.2315,
|
16725 |
+
"step": 119350
|
16726 |
+
},
|
16727 |
+
{
|
16728 |
+
"epoch": 3.564924013973069,
|
16729 |
+
"grad_norm": 4.688202381134033,
|
16730 |
+
"learning_rate": 0.0001658872336413351,
|
16731 |
+
"loss": 1.2938,
|
16732 |
+
"step": 119400
|
16733 |
+
},
|
16734 |
+
{
|
16735 |
+
"epoch": 3.5664168632251516,
|
16736 |
+
"grad_norm": 3.5351858139038086,
|
16737 |
+
"learning_rate": 0.00016587294784541626,
|
16738 |
+
"loss": 1.2247,
|
16739 |
+
"step": 119450
|
16740 |
+
},
|
16741 |
+
{
|
16742 |
+
"epoch": 3.567909712477234,
|
16743 |
+
"grad_norm": 4.518115043640137,
|
16744 |
+
"learning_rate": 0.00016585866204949743,
|
16745 |
+
"loss": 1.3105,
|
16746 |
+
"step": 119500
|
16747 |
+
},
|
16748 |
+
{
|
16749 |
+
"epoch": 3.5694025617293166,
|
16750 |
+
"grad_norm": 3.226865530014038,
|
16751 |
+
"learning_rate": 0.0001658443762535786,
|
16752 |
+
"loss": 1.2498,
|
16753 |
+
"step": 119550
|
16754 |
+
},
|
16755 |
+
{
|
16756 |
+
"epoch": 3.5708954109813993,
|
16757 |
+
"grad_norm": 5.850762367248535,
|
16758 |
+
"learning_rate": 0.00016583009045765976,
|
16759 |
+
"loss": 1.2802,
|
16760 |
+
"step": 119600
|
16761 |
+
},
|
16762 |
+
{
|
16763 |
+
"epoch": 3.5723882602334815,
|
16764 |
+
"grad_norm": 6.21754789352417,
|
16765 |
+
"learning_rate": 0.00016581580466174095,
|
16766 |
+
"loss": 1.23,
|
16767 |
+
"step": 119650
|
16768 |
+
},
|
16769 |
+
{
|
16770 |
+
"epoch": 3.5738811094855643,
|
16771 |
+
"grad_norm": 5.319993019104004,
|
16772 |
+
"learning_rate": 0.0001658015188658221,
|
16773 |
+
"loss": 1.3102,
|
16774 |
+
"step": 119700
|
16775 |
+
},
|
16776 |
+
{
|
16777 |
+
"epoch": 3.5753739587376465,
|
16778 |
+
"grad_norm": 4.379292964935303,
|
16779 |
+
"learning_rate": 0.00016578723306990328,
|
16780 |
+
"loss": 1.203,
|
16781 |
+
"step": 119750
|
16782 |
+
},
|
16783 |
+
{
|
16784 |
+
"epoch": 3.5768668079897292,
|
16785 |
+
"grad_norm": 3.9974188804626465,
|
16786 |
+
"learning_rate": 0.00016577294727398442,
|
16787 |
+
"loss": 1.29,
|
16788 |
+
"step": 119800
|
16789 |
+
},
|
16790 |
+
{
|
16791 |
+
"epoch": 3.578359657241812,
|
16792 |
+
"grad_norm": 4.0016188621521,
|
16793 |
+
"learning_rate": 0.0001657586614780656,
|
16794 |
+
"loss": 1.2131,
|
16795 |
+
"step": 119850
|
16796 |
+
},
|
16797 |
+
{
|
16798 |
+
"epoch": 3.579852506493894,
|
16799 |
+
"grad_norm": 5.10989236831665,
|
16800 |
+
"learning_rate": 0.00016574437568214675,
|
16801 |
+
"loss": 1.2519,
|
16802 |
+
"step": 119900
|
16803 |
+
},
|
16804 |
+
{
|
16805 |
+
"epoch": 3.581345355745977,
|
16806 |
+
"grad_norm": 3.3042585849761963,
|
16807 |
+
"learning_rate": 0.00016573008988622794,
|
16808 |
+
"loss": 1.2462,
|
16809 |
+
"step": 119950
|
16810 |
+
},
|
16811 |
+
{
|
16812 |
+
"epoch": 3.582838204998059,
|
16813 |
+
"grad_norm": 6.814448356628418,
|
16814 |
+
"learning_rate": 0.0001657158040903091,
|
16815 |
+
"loss": 1.2539,
|
16816 |
+
"step": 120000
|
16817 |
}
|
16818 |
],
|
16819 |
"logging_steps": 50,
|
|
|
16833 |
"attributes": {}
|
16834 |
}
|
16835 |
},
|
16836 |
+
"total_flos": 3.0274271789208044e+18,
|
16837 |
"train_batch_size": 2,
|
16838 |
"trial_name": null,
|
16839 |
"trial_params": null
|