Training in progress, step 100000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step100000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step100000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7588cefc151efd1e1cfa17e895b1010d6ed315056d026e97629e18b3972618d
|
3 |
size 42002584
|
last-checkpoint/global_step100000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4b785c42f454a1c51dbed6c5b74a9d6343749197a5e6cde1d445ac4b3ca1a60
|
3 |
+
size 251710672
|
last-checkpoint/global_step100000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fff2710a8442c568e06c9c7d551c43edc3c1860378cc911fce7987b968485f63
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step100000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a14f1c8b063c37636f2160e6de4343bf434f663e4b031e782cf421c1c9b0a5d7
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -12614,6 +12614,1406 @@
|
|
12614 |
"learning_rate": 0.0001742872816416094,
|
12615 |
"loss": 1.2491,
|
12616 |
"step": 90000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12617 |
}
|
12618 |
],
|
12619 |
"logging_steps": 50,
|
@@ -12633,7 +14033,7 @@
|
|
12633 |
"attributes": {}
|
12634 |
}
|
12635 |
},
|
12636 |
-
"total_flos": 2.
|
12637 |
"train_batch_size": 2,
|
12638 |
"trial_name": null,
|
12639 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.9856985041650494,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 100000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
12614 |
"learning_rate": 0.0001742872816416094,
|
12615 |
"loss": 1.2491,
|
12616 |
"step": 90000
|
12617 |
+
},
|
12618 |
+
{
|
12619 |
+
"epoch": 2.688621503000627,
|
12620 |
+
"grad_norm": 3.7262911796569824,
|
12621 |
+
"learning_rate": 0.00017427299584569055,
|
12622 |
+
"loss": 1.3218,
|
12623 |
+
"step": 90050
|
12624 |
+
},
|
12625 |
+
{
|
12626 |
+
"epoch": 2.6901143522527096,
|
12627 |
+
"grad_norm": 4.5458855628967285,
|
12628 |
+
"learning_rate": 0.00017425871004977172,
|
12629 |
+
"loss": 1.3188,
|
12630 |
+
"step": 90100
|
12631 |
+
},
|
12632 |
+
{
|
12633 |
+
"epoch": 2.691607201504792,
|
12634 |
+
"grad_norm": 4.8145599365234375,
|
12635 |
+
"learning_rate": 0.00017424442425385288,
|
12636 |
+
"loss": 1.2746,
|
12637 |
+
"step": 90150
|
12638 |
+
},
|
12639 |
+
{
|
12640 |
+
"epoch": 2.6931000507568745,
|
12641 |
+
"grad_norm": 5.353755474090576,
|
12642 |
+
"learning_rate": 0.00017423013845793405,
|
12643 |
+
"loss": 1.2153,
|
12644 |
+
"step": 90200
|
12645 |
+
},
|
12646 |
+
{
|
12647 |
+
"epoch": 2.6945929000089572,
|
12648 |
+
"grad_norm": 5.111592769622803,
|
12649 |
+
"learning_rate": 0.0001742158526620152,
|
12650 |
+
"loss": 1.2482,
|
12651 |
+
"step": 90250
|
12652 |
+
},
|
12653 |
+
{
|
12654 |
+
"epoch": 2.6960857492610395,
|
12655 |
+
"grad_norm": 5.903563022613525,
|
12656 |
+
"learning_rate": 0.00017420156686609638,
|
12657 |
+
"loss": 1.2749,
|
12658 |
+
"step": 90300
|
12659 |
+
},
|
12660 |
+
{
|
12661 |
+
"epoch": 2.697578598513122,
|
12662 |
+
"grad_norm": 5.421234130859375,
|
12663 |
+
"learning_rate": 0.00017418728107017754,
|
12664 |
+
"loss": 1.3023,
|
12665 |
+
"step": 90350
|
12666 |
+
},
|
12667 |
+
{
|
12668 |
+
"epoch": 2.6990714477652045,
|
12669 |
+
"grad_norm": 4.8063154220581055,
|
12670 |
+
"learning_rate": 0.00017417299527425873,
|
12671 |
+
"loss": 1.3413,
|
12672 |
+
"step": 90400
|
12673 |
+
},
|
12674 |
+
{
|
12675 |
+
"epoch": 2.700564297017287,
|
12676 |
+
"grad_norm": 4.831643104553223,
|
12677 |
+
"learning_rate": 0.00017415870947833987,
|
12678 |
+
"loss": 1.3733,
|
12679 |
+
"step": 90450
|
12680 |
+
},
|
12681 |
+
{
|
12682 |
+
"epoch": 2.70205714626937,
|
12683 |
+
"grad_norm": 5.314348220825195,
|
12684 |
+
"learning_rate": 0.00017414442368242106,
|
12685 |
+
"loss": 1.2819,
|
12686 |
+
"step": 90500
|
12687 |
+
},
|
12688 |
+
{
|
12689 |
+
"epoch": 2.703549995521452,
|
12690 |
+
"grad_norm": 5.592355728149414,
|
12691 |
+
"learning_rate": 0.0001741301378865022,
|
12692 |
+
"loss": 1.2959,
|
12693 |
+
"step": 90550
|
12694 |
+
},
|
12695 |
+
{
|
12696 |
+
"epoch": 2.705042844773535,
|
12697 |
+
"grad_norm": 4.042795181274414,
|
12698 |
+
"learning_rate": 0.0001741158520905834,
|
12699 |
+
"loss": 1.28,
|
12700 |
+
"step": 90600
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 2.706535694025617,
|
12704 |
+
"grad_norm": 4.347578525543213,
|
12705 |
+
"learning_rate": 0.00017410156629466456,
|
12706 |
+
"loss": 1.3491,
|
12707 |
+
"step": 90650
|
12708 |
+
},
|
12709 |
+
{
|
12710 |
+
"epoch": 2.7080285432777,
|
12711 |
+
"grad_norm": 5.4727678298950195,
|
12712 |
+
"learning_rate": 0.00017408728049874572,
|
12713 |
+
"loss": 1.3294,
|
12714 |
+
"step": 90700
|
12715 |
+
},
|
12716 |
+
{
|
12717 |
+
"epoch": 2.7095213925297825,
|
12718 |
+
"grad_norm": 4.353364944458008,
|
12719 |
+
"learning_rate": 0.00017407299470282688,
|
12720 |
+
"loss": 1.27,
|
12721 |
+
"step": 90750
|
12722 |
+
},
|
12723 |
+
{
|
12724 |
+
"epoch": 2.711014241781865,
|
12725 |
+
"grad_norm": 3.916985034942627,
|
12726 |
+
"learning_rate": 0.00017405870890690805,
|
12727 |
+
"loss": 1.297,
|
12728 |
+
"step": 90800
|
12729 |
+
},
|
12730 |
+
{
|
12731 |
+
"epoch": 2.7125070910339475,
|
12732 |
+
"grad_norm": 4.451666831970215,
|
12733 |
+
"learning_rate": 0.00017404442311098921,
|
12734 |
+
"loss": 1.3051,
|
12735 |
+
"step": 90850
|
12736 |
+
},
|
12737 |
+
{
|
12738 |
+
"epoch": 2.7139999402860298,
|
12739 |
+
"grad_norm": 5.726661205291748,
|
12740 |
+
"learning_rate": 0.00017403013731507038,
|
12741 |
+
"loss": 1.3353,
|
12742 |
+
"step": 90900
|
12743 |
+
},
|
12744 |
+
{
|
12745 |
+
"epoch": 2.7154927895381125,
|
12746 |
+
"grad_norm": 4.238119125366211,
|
12747 |
+
"learning_rate": 0.00017401585151915154,
|
12748 |
+
"loss": 1.3279,
|
12749 |
+
"step": 90950
|
12750 |
+
},
|
12751 |
+
{
|
12752 |
+
"epoch": 2.716985638790195,
|
12753 |
+
"grad_norm": 4.4539642333984375,
|
12754 |
+
"learning_rate": 0.0001740015657232327,
|
12755 |
+
"loss": 1.2492,
|
12756 |
+
"step": 91000
|
12757 |
+
},
|
12758 |
+
{
|
12759 |
+
"epoch": 2.7184784880422774,
|
12760 |
+
"grad_norm": 5.241584300994873,
|
12761 |
+
"learning_rate": 0.00017398727992731387,
|
12762 |
+
"loss": 1.3039,
|
12763 |
+
"step": 91050
|
12764 |
+
},
|
12765 |
+
{
|
12766 |
+
"epoch": 2.71997133729436,
|
12767 |
+
"grad_norm": 7.063384056091309,
|
12768 |
+
"learning_rate": 0.00017397299413139506,
|
12769 |
+
"loss": 1.2849,
|
12770 |
+
"step": 91100
|
12771 |
+
},
|
12772 |
+
{
|
12773 |
+
"epoch": 2.7214641865464424,
|
12774 |
+
"grad_norm": 3.5526702404022217,
|
12775 |
+
"learning_rate": 0.0001739587083354762,
|
12776 |
+
"loss": 1.3074,
|
12777 |
+
"step": 91150
|
12778 |
+
},
|
12779 |
+
{
|
12780 |
+
"epoch": 2.722957035798525,
|
12781 |
+
"grad_norm": 4.808590888977051,
|
12782 |
+
"learning_rate": 0.0001739444225395574,
|
12783 |
+
"loss": 1.3124,
|
12784 |
+
"step": 91200
|
12785 |
+
},
|
12786 |
+
{
|
12787 |
+
"epoch": 2.724449885050608,
|
12788 |
+
"grad_norm": 6.006200790405273,
|
12789 |
+
"learning_rate": 0.00017393013674363853,
|
12790 |
+
"loss": 1.2375,
|
12791 |
+
"step": 91250
|
12792 |
+
},
|
12793 |
+
{
|
12794 |
+
"epoch": 2.72594273430269,
|
12795 |
+
"grad_norm": 3.5086710453033447,
|
12796 |
+
"learning_rate": 0.00017391585094771972,
|
12797 |
+
"loss": 1.2535,
|
12798 |
+
"step": 91300
|
12799 |
+
},
|
12800 |
+
{
|
12801 |
+
"epoch": 2.727435583554773,
|
12802 |
+
"grad_norm": 4.1073832511901855,
|
12803 |
+
"learning_rate": 0.00017390156515180086,
|
12804 |
+
"loss": 1.3369,
|
12805 |
+
"step": 91350
|
12806 |
+
},
|
12807 |
+
{
|
12808 |
+
"epoch": 2.728928432806855,
|
12809 |
+
"grad_norm": 4.759340286254883,
|
12810 |
+
"learning_rate": 0.00017388727935588205,
|
12811 |
+
"loss": 1.2975,
|
12812 |
+
"step": 91400
|
12813 |
+
},
|
12814 |
+
{
|
12815 |
+
"epoch": 2.7304212820589377,
|
12816 |
+
"grad_norm": 4.313277244567871,
|
12817 |
+
"learning_rate": 0.00017387299355996322,
|
12818 |
+
"loss": 1.2607,
|
12819 |
+
"step": 91450
|
12820 |
+
},
|
12821 |
+
{
|
12822 |
+
"epoch": 2.7319141313110205,
|
12823 |
+
"grad_norm": 4.681273937225342,
|
12824 |
+
"learning_rate": 0.00017385870776404438,
|
12825 |
+
"loss": 1.2994,
|
12826 |
+
"step": 91500
|
12827 |
+
},
|
12828 |
+
{
|
12829 |
+
"epoch": 2.7334069805631027,
|
12830 |
+
"grad_norm": 5.569629192352295,
|
12831 |
+
"learning_rate": 0.00017384442196812555,
|
12832 |
+
"loss": 1.29,
|
12833 |
+
"step": 91550
|
12834 |
+
},
|
12835 |
+
{
|
12836 |
+
"epoch": 2.7348998298151854,
|
12837 |
+
"grad_norm": 5.809197425842285,
|
12838 |
+
"learning_rate": 0.0001738301361722067,
|
12839 |
+
"loss": 1.3245,
|
12840 |
+
"step": 91600
|
12841 |
+
},
|
12842 |
+
{
|
12843 |
+
"epoch": 2.7363926790672677,
|
12844 |
+
"grad_norm": 3.6838326454162598,
|
12845 |
+
"learning_rate": 0.00017381585037628788,
|
12846 |
+
"loss": 1.2904,
|
12847 |
+
"step": 91650
|
12848 |
+
},
|
12849 |
+
{
|
12850 |
+
"epoch": 2.7378855283193504,
|
12851 |
+
"grad_norm": 5.378135681152344,
|
12852 |
+
"learning_rate": 0.00017380156458036904,
|
12853 |
+
"loss": 1.2837,
|
12854 |
+
"step": 91700
|
12855 |
+
},
|
12856 |
+
{
|
12857 |
+
"epoch": 2.739378377571433,
|
12858 |
+
"grad_norm": 5.717269420623779,
|
12859 |
+
"learning_rate": 0.0001737872787844502,
|
12860 |
+
"loss": 1.301,
|
12861 |
+
"step": 91750
|
12862 |
+
},
|
12863 |
+
{
|
12864 |
+
"epoch": 2.7408712268235154,
|
12865 |
+
"grad_norm": 5.068012237548828,
|
12866 |
+
"learning_rate": 0.00017377299298853137,
|
12867 |
+
"loss": 1.2899,
|
12868 |
+
"step": 91800
|
12869 |
+
},
|
12870 |
+
{
|
12871 |
+
"epoch": 2.7423640760755976,
|
12872 |
+
"grad_norm": 5.394426345825195,
|
12873 |
+
"learning_rate": 0.00017375870719261253,
|
12874 |
+
"loss": 1.2719,
|
12875 |
+
"step": 91850
|
12876 |
+
},
|
12877 |
+
{
|
12878 |
+
"epoch": 2.7438569253276803,
|
12879 |
+
"grad_norm": 4.564161777496338,
|
12880 |
+
"learning_rate": 0.00017374442139669373,
|
12881 |
+
"loss": 1.2951,
|
12882 |
+
"step": 91900
|
12883 |
+
},
|
12884 |
+
{
|
12885 |
+
"epoch": 2.745349774579763,
|
12886 |
+
"grad_norm": 5.253499507904053,
|
12887 |
+
"learning_rate": 0.00017373013560077486,
|
12888 |
+
"loss": 1.2543,
|
12889 |
+
"step": 91950
|
12890 |
+
},
|
12891 |
+
{
|
12892 |
+
"epoch": 2.7468426238318453,
|
12893 |
+
"grad_norm": 3.995630979537964,
|
12894 |
+
"learning_rate": 0.00017371584980485605,
|
12895 |
+
"loss": 1.2854,
|
12896 |
+
"step": 92000
|
12897 |
+
},
|
12898 |
+
{
|
12899 |
+
"epoch": 2.748335473083928,
|
12900 |
+
"grad_norm": 3.954859733581543,
|
12901 |
+
"learning_rate": 0.0001737015640089372,
|
12902 |
+
"loss": 1.287,
|
12903 |
+
"step": 92050
|
12904 |
+
},
|
12905 |
+
{
|
12906 |
+
"epoch": 2.7498283223360103,
|
12907 |
+
"grad_norm": 4.409485340118408,
|
12908 |
+
"learning_rate": 0.00017368727821301838,
|
12909 |
+
"loss": 1.2889,
|
12910 |
+
"step": 92100
|
12911 |
+
},
|
12912 |
+
{
|
12913 |
+
"epoch": 2.751321171588093,
|
12914 |
+
"grad_norm": 5.021519660949707,
|
12915 |
+
"learning_rate": 0.00017367299241709952,
|
12916 |
+
"loss": 1.3849,
|
12917 |
+
"step": 92150
|
12918 |
+
},
|
12919 |
+
{
|
12920 |
+
"epoch": 2.7528140208401757,
|
12921 |
+
"grad_norm": 4.707982063293457,
|
12922 |
+
"learning_rate": 0.00017365870662118071,
|
12923 |
+
"loss": 1.3099,
|
12924 |
+
"step": 92200
|
12925 |
+
},
|
12926 |
+
{
|
12927 |
+
"epoch": 2.754306870092258,
|
12928 |
+
"grad_norm": 3.8836755752563477,
|
12929 |
+
"learning_rate": 0.00017364442082526188,
|
12930 |
+
"loss": 1.2707,
|
12931 |
+
"step": 92250
|
12932 |
+
},
|
12933 |
+
{
|
12934 |
+
"epoch": 2.7557997193443406,
|
12935 |
+
"grad_norm": 4.692137718200684,
|
12936 |
+
"learning_rate": 0.00017363013502934304,
|
12937 |
+
"loss": 1.3051,
|
12938 |
+
"step": 92300
|
12939 |
+
},
|
12940 |
+
{
|
12941 |
+
"epoch": 2.757292568596423,
|
12942 |
+
"grad_norm": 4.50584602355957,
|
12943 |
+
"learning_rate": 0.0001736158492334242,
|
12944 |
+
"loss": 1.2969,
|
12945 |
+
"step": 92350
|
12946 |
+
},
|
12947 |
+
{
|
12948 |
+
"epoch": 2.7587854178485056,
|
12949 |
+
"grad_norm": 5.027088642120361,
|
12950 |
+
"learning_rate": 0.00017360156343750535,
|
12951 |
+
"loss": 1.2846,
|
12952 |
+
"step": 92400
|
12953 |
+
},
|
12954 |
+
{
|
12955 |
+
"epoch": 2.7602782671005883,
|
12956 |
+
"grad_norm": 5.339383125305176,
|
12957 |
+
"learning_rate": 0.00017358727764158654,
|
12958 |
+
"loss": 1.3153,
|
12959 |
+
"step": 92450
|
12960 |
+
},
|
12961 |
+
{
|
12962 |
+
"epoch": 2.7617711163526706,
|
12963 |
+
"grad_norm": 3.3670814037323,
|
12964 |
+
"learning_rate": 0.00017357299184566767,
|
12965 |
+
"loss": 1.2268,
|
12966 |
+
"step": 92500
|
12967 |
+
},
|
12968 |
+
{
|
12969 |
+
"epoch": 2.7632639656047533,
|
12970 |
+
"grad_norm": 4.426408290863037,
|
12971 |
+
"learning_rate": 0.00017355870604974887,
|
12972 |
+
"loss": 1.2737,
|
12973 |
+
"step": 92550
|
12974 |
+
},
|
12975 |
+
{
|
12976 |
+
"epoch": 2.7647568148568356,
|
12977 |
+
"grad_norm": 4.831822395324707,
|
12978 |
+
"learning_rate": 0.00017354442025383003,
|
12979 |
+
"loss": 1.3036,
|
12980 |
+
"step": 92600
|
12981 |
+
},
|
12982 |
+
{
|
12983 |
+
"epoch": 2.7662496641089183,
|
12984 |
+
"grad_norm": 5.008368015289307,
|
12985 |
+
"learning_rate": 0.0001735301344579112,
|
12986 |
+
"loss": 1.2846,
|
12987 |
+
"step": 92650
|
12988 |
+
},
|
12989 |
+
{
|
12990 |
+
"epoch": 2.767742513361001,
|
12991 |
+
"grad_norm": 4.761803150177002,
|
12992 |
+
"learning_rate": 0.00017351584866199236,
|
12993 |
+
"loss": 1.2715,
|
12994 |
+
"step": 92700
|
12995 |
+
},
|
12996 |
+
{
|
12997 |
+
"epoch": 2.7692353626130832,
|
12998 |
+
"grad_norm": 4.651097774505615,
|
12999 |
+
"learning_rate": 0.00017350156286607352,
|
13000 |
+
"loss": 1.2337,
|
13001 |
+
"step": 92750
|
13002 |
+
},
|
13003 |
+
{
|
13004 |
+
"epoch": 2.770728211865166,
|
13005 |
+
"grad_norm": 4.635573387145996,
|
13006 |
+
"learning_rate": 0.0001734872770701547,
|
13007 |
+
"loss": 1.3133,
|
13008 |
+
"step": 92800
|
13009 |
+
},
|
13010 |
+
{
|
13011 |
+
"epoch": 2.772221061117248,
|
13012 |
+
"grad_norm": 4.60062313079834,
|
13013 |
+
"learning_rate": 0.00017347299127423585,
|
13014 |
+
"loss": 1.2319,
|
13015 |
+
"step": 92850
|
13016 |
+
},
|
13017 |
+
{
|
13018 |
+
"epoch": 2.773713910369331,
|
13019 |
+
"grad_norm": 5.222136497497559,
|
13020 |
+
"learning_rate": 0.00017345870547831702,
|
13021 |
+
"loss": 1.3058,
|
13022 |
+
"step": 92900
|
13023 |
+
},
|
13024 |
+
{
|
13025 |
+
"epoch": 2.7752067596214136,
|
13026 |
+
"grad_norm": 5.110498428344727,
|
13027 |
+
"learning_rate": 0.00017344441968239818,
|
13028 |
+
"loss": 1.2217,
|
13029 |
+
"step": 92950
|
13030 |
+
},
|
13031 |
+
{
|
13032 |
+
"epoch": 2.776699608873496,
|
13033 |
+
"grad_norm": 4.382105350494385,
|
13034 |
+
"learning_rate": 0.00017343013388647935,
|
13035 |
+
"loss": 1.2964,
|
13036 |
+
"step": 93000
|
13037 |
+
},
|
13038 |
+
{
|
13039 |
+
"epoch": 2.7781924581255786,
|
13040 |
+
"grad_norm": 4.167068958282471,
|
13041 |
+
"learning_rate": 0.00017341584809056054,
|
13042 |
+
"loss": 1.3275,
|
13043 |
+
"step": 93050
|
13044 |
+
},
|
13045 |
+
{
|
13046 |
+
"epoch": 2.779685307377661,
|
13047 |
+
"grad_norm": 2.9642536640167236,
|
13048 |
+
"learning_rate": 0.00017340156229464168,
|
13049 |
+
"loss": 1.3143,
|
13050 |
+
"step": 93100
|
13051 |
+
},
|
13052 |
+
{
|
13053 |
+
"epoch": 2.7811781566297435,
|
13054 |
+
"grad_norm": 3.9589407444000244,
|
13055 |
+
"learning_rate": 0.00017338727649872287,
|
13056 |
+
"loss": 1.2388,
|
13057 |
+
"step": 93150
|
13058 |
+
},
|
13059 |
+
{
|
13060 |
+
"epoch": 2.7826710058818263,
|
13061 |
+
"grad_norm": 4.6660051345825195,
|
13062 |
+
"learning_rate": 0.000173372990702804,
|
13063 |
+
"loss": 1.3026,
|
13064 |
+
"step": 93200
|
13065 |
+
},
|
13066 |
+
{
|
13067 |
+
"epoch": 2.7841638551339085,
|
13068 |
+
"grad_norm": 4.213698387145996,
|
13069 |
+
"learning_rate": 0.0001733587049068852,
|
13070 |
+
"loss": 1.3192,
|
13071 |
+
"step": 93250
|
13072 |
+
},
|
13073 |
+
{
|
13074 |
+
"epoch": 2.7856567043859912,
|
13075 |
+
"grad_norm": 4.1265645027160645,
|
13076 |
+
"learning_rate": 0.00017334441911096636,
|
13077 |
+
"loss": 1.2899,
|
13078 |
+
"step": 93300
|
13079 |
+
},
|
13080 |
+
{
|
13081 |
+
"epoch": 2.7871495536380735,
|
13082 |
+
"grad_norm": 5.5079545974731445,
|
13083 |
+
"learning_rate": 0.00017333013331504753,
|
13084 |
+
"loss": 1.2968,
|
13085 |
+
"step": 93350
|
13086 |
+
},
|
13087 |
+
{
|
13088 |
+
"epoch": 2.788642402890156,
|
13089 |
+
"grad_norm": 4.825951099395752,
|
13090 |
+
"learning_rate": 0.0001733158475191287,
|
13091 |
+
"loss": 1.2432,
|
13092 |
+
"step": 93400
|
13093 |
+
},
|
13094 |
+
{
|
13095 |
+
"epoch": 2.790135252142239,
|
13096 |
+
"grad_norm": 3.7230324745178223,
|
13097 |
+
"learning_rate": 0.00017330156172320986,
|
13098 |
+
"loss": 1.385,
|
13099 |
+
"step": 93450
|
13100 |
+
},
|
13101 |
+
{
|
13102 |
+
"epoch": 2.791628101394321,
|
13103 |
+
"grad_norm": 4.764435291290283,
|
13104 |
+
"learning_rate": 0.00017328727592729102,
|
13105 |
+
"loss": 1.2362,
|
13106 |
+
"step": 93500
|
13107 |
+
},
|
13108 |
+
{
|
13109 |
+
"epoch": 2.793120950646404,
|
13110 |
+
"grad_norm": 5.750154972076416,
|
13111 |
+
"learning_rate": 0.00017327299013137219,
|
13112 |
+
"loss": 1.2868,
|
13113 |
+
"step": 93550
|
13114 |
+
},
|
13115 |
+
{
|
13116 |
+
"epoch": 2.794613799898486,
|
13117 |
+
"grad_norm": 3.704388380050659,
|
13118 |
+
"learning_rate": 0.00017325870433545335,
|
13119 |
+
"loss": 1.2839,
|
13120 |
+
"step": 93600
|
13121 |
+
},
|
13122 |
+
{
|
13123 |
+
"epoch": 2.796106649150569,
|
13124 |
+
"grad_norm": 5.4798407554626465,
|
13125 |
+
"learning_rate": 0.00017324441853953452,
|
13126 |
+
"loss": 1.3227,
|
13127 |
+
"step": 93650
|
13128 |
+
},
|
13129 |
+
{
|
13130 |
+
"epoch": 2.7975994984026515,
|
13131 |
+
"grad_norm": 4.121763229370117,
|
13132 |
+
"learning_rate": 0.00017323013274361568,
|
13133 |
+
"loss": 1.2249,
|
13134 |
+
"step": 93700
|
13135 |
+
},
|
13136 |
+
{
|
13137 |
+
"epoch": 2.799092347654734,
|
13138 |
+
"grad_norm": 4.939391136169434,
|
13139 |
+
"learning_rate": 0.00017321584694769684,
|
13140 |
+
"loss": 1.3017,
|
13141 |
+
"step": 93750
|
13142 |
+
},
|
13143 |
+
{
|
13144 |
+
"epoch": 2.8005851969068165,
|
13145 |
+
"grad_norm": 4.476954936981201,
|
13146 |
+
"learning_rate": 0.000173201561151778,
|
13147 |
+
"loss": 1.2851,
|
13148 |
+
"step": 93800
|
13149 |
+
},
|
13150 |
+
{
|
13151 |
+
"epoch": 2.8020780461588988,
|
13152 |
+
"grad_norm": 4.639068603515625,
|
13153 |
+
"learning_rate": 0.0001731872753558592,
|
13154 |
+
"loss": 1.3026,
|
13155 |
+
"step": 93850
|
13156 |
+
},
|
13157 |
+
{
|
13158 |
+
"epoch": 2.8035708954109815,
|
13159 |
+
"grad_norm": 4.282186031341553,
|
13160 |
+
"learning_rate": 0.00017317298955994034,
|
13161 |
+
"loss": 1.2374,
|
13162 |
+
"step": 93900
|
13163 |
+
},
|
13164 |
+
{
|
13165 |
+
"epoch": 2.805063744663064,
|
13166 |
+
"grad_norm": 5.077556610107422,
|
13167 |
+
"learning_rate": 0.00017315870376402153,
|
13168 |
+
"loss": 1.3172,
|
13169 |
+
"step": 93950
|
13170 |
+
},
|
13171 |
+
{
|
13172 |
+
"epoch": 2.8065565939151464,
|
13173 |
+
"grad_norm": 5.154215335845947,
|
13174 |
+
"learning_rate": 0.00017314441796810267,
|
13175 |
+
"loss": 1.2998,
|
13176 |
+
"step": 94000
|
13177 |
+
},
|
13178 |
+
{
|
13179 |
+
"epoch": 2.8080494431672287,
|
13180 |
+
"grad_norm": 6.019194602966309,
|
13181 |
+
"learning_rate": 0.00017313013217218386,
|
13182 |
+
"loss": 1.3121,
|
13183 |
+
"step": 94050
|
13184 |
+
},
|
13185 |
+
{
|
13186 |
+
"epoch": 2.8095422924193114,
|
13187 |
+
"grad_norm": 4.445097923278809,
|
13188 |
+
"learning_rate": 0.00017311584637626502,
|
13189 |
+
"loss": 1.2887,
|
13190 |
+
"step": 94100
|
13191 |
+
},
|
13192 |
+
{
|
13193 |
+
"epoch": 2.811035141671394,
|
13194 |
+
"grad_norm": 3.711930990219116,
|
13195 |
+
"learning_rate": 0.0001731015605803462,
|
13196 |
+
"loss": 1.3031,
|
13197 |
+
"step": 94150
|
13198 |
+
},
|
13199 |
+
{
|
13200 |
+
"epoch": 2.8125279909234764,
|
13201 |
+
"grad_norm": 7.101813316345215,
|
13202 |
+
"learning_rate": 0.00017308727478442735,
|
13203 |
+
"loss": 1.2877,
|
13204 |
+
"step": 94200
|
13205 |
+
},
|
13206 |
+
{
|
13207 |
+
"epoch": 2.814020840175559,
|
13208 |
+
"grad_norm": 5.068753719329834,
|
13209 |
+
"learning_rate": 0.00017307298898850852,
|
13210 |
+
"loss": 1.2459,
|
13211 |
+
"step": 94250
|
13212 |
+
},
|
13213 |
+
{
|
13214 |
+
"epoch": 2.8155136894276414,
|
13215 |
+
"grad_norm": 5.185253143310547,
|
13216 |
+
"learning_rate": 0.00017305870319258968,
|
13217 |
+
"loss": 1.2292,
|
13218 |
+
"step": 94300
|
13219 |
+
},
|
13220 |
+
{
|
13221 |
+
"epoch": 2.817006538679724,
|
13222 |
+
"grad_norm": 4.654400825500488,
|
13223 |
+
"learning_rate": 0.00017304441739667085,
|
13224 |
+
"loss": 1.325,
|
13225 |
+
"step": 94350
|
13226 |
+
},
|
13227 |
+
{
|
13228 |
+
"epoch": 2.8184993879318068,
|
13229 |
+
"grad_norm": 4.575232028961182,
|
13230 |
+
"learning_rate": 0.000173030131600752,
|
13231 |
+
"loss": 1.3077,
|
13232 |
+
"step": 94400
|
13233 |
+
},
|
13234 |
+
{
|
13235 |
+
"epoch": 2.819992237183889,
|
13236 |
+
"grad_norm": 5.062366485595703,
|
13237 |
+
"learning_rate": 0.00017301584580483318,
|
13238 |
+
"loss": 1.319,
|
13239 |
+
"step": 94450
|
13240 |
+
},
|
13241 |
+
{
|
13242 |
+
"epoch": 2.8214850864359717,
|
13243 |
+
"grad_norm": 3.9217934608459473,
|
13244 |
+
"learning_rate": 0.00017300156000891434,
|
13245 |
+
"loss": 1.3042,
|
13246 |
+
"step": 94500
|
13247 |
+
},
|
13248 |
+
{
|
13249 |
+
"epoch": 2.822977935688054,
|
13250 |
+
"grad_norm": 7.488821506500244,
|
13251 |
+
"learning_rate": 0.00017298727421299553,
|
13252 |
+
"loss": 1.302,
|
13253 |
+
"step": 94550
|
13254 |
+
},
|
13255 |
+
{
|
13256 |
+
"epoch": 2.8244707849401367,
|
13257 |
+
"grad_norm": 4.662975311279297,
|
13258 |
+
"learning_rate": 0.00017297298841707667,
|
13259 |
+
"loss": 1.2965,
|
13260 |
+
"step": 94600
|
13261 |
+
},
|
13262 |
+
{
|
13263 |
+
"epoch": 2.8259636341922194,
|
13264 |
+
"grad_norm": 4.872460842132568,
|
13265 |
+
"learning_rate": 0.00017295870262115786,
|
13266 |
+
"loss": 1.2729,
|
13267 |
+
"step": 94650
|
13268 |
+
},
|
13269 |
+
{
|
13270 |
+
"epoch": 2.8274564834443017,
|
13271 |
+
"grad_norm": 4.52310848236084,
|
13272 |
+
"learning_rate": 0.000172944416825239,
|
13273 |
+
"loss": 1.2872,
|
13274 |
+
"step": 94700
|
13275 |
+
},
|
13276 |
+
{
|
13277 |
+
"epoch": 2.8289493326963844,
|
13278 |
+
"grad_norm": 4.012574672698975,
|
13279 |
+
"learning_rate": 0.0001729301310293202,
|
13280 |
+
"loss": 1.2537,
|
13281 |
+
"step": 94750
|
13282 |
+
},
|
13283 |
+
{
|
13284 |
+
"epoch": 2.8304421819484666,
|
13285 |
+
"grad_norm": 5.984830379486084,
|
13286 |
+
"learning_rate": 0.00017291584523340133,
|
13287 |
+
"loss": 1.3413,
|
13288 |
+
"step": 94800
|
13289 |
+
},
|
13290 |
+
{
|
13291 |
+
"epoch": 2.8319350312005493,
|
13292 |
+
"grad_norm": 4.736281394958496,
|
13293 |
+
"learning_rate": 0.00017290155943748252,
|
13294 |
+
"loss": 1.3116,
|
13295 |
+
"step": 94850
|
13296 |
+
},
|
13297 |
+
{
|
13298 |
+
"epoch": 2.833427880452632,
|
13299 |
+
"grad_norm": 4.227638244628906,
|
13300 |
+
"learning_rate": 0.00017288727364156369,
|
13301 |
+
"loss": 1.2607,
|
13302 |
+
"step": 94900
|
13303 |
+
},
|
13304 |
+
{
|
13305 |
+
"epoch": 2.8349207297047143,
|
13306 |
+
"grad_norm": 4.373624324798584,
|
13307 |
+
"learning_rate": 0.00017287298784564485,
|
13308 |
+
"loss": 1.3173,
|
13309 |
+
"step": 94950
|
13310 |
+
},
|
13311 |
+
{
|
13312 |
+
"epoch": 2.836413578956797,
|
13313 |
+
"grad_norm": 3.9513514041900635,
|
13314 |
+
"learning_rate": 0.00017285870204972602,
|
13315 |
+
"loss": 1.2344,
|
13316 |
+
"step": 95000
|
13317 |
+
},
|
13318 |
+
{
|
13319 |
+
"epoch": 2.8379064282088793,
|
13320 |
+
"grad_norm": 4.8411664962768555,
|
13321 |
+
"learning_rate": 0.00017284441625380715,
|
13322 |
+
"loss": 1.2618,
|
13323 |
+
"step": 95050
|
13324 |
+
},
|
13325 |
+
{
|
13326 |
+
"epoch": 2.839399277460962,
|
13327 |
+
"grad_norm": 6.098468780517578,
|
13328 |
+
"learning_rate": 0.00017283013045788834,
|
13329 |
+
"loss": 1.2512,
|
13330 |
+
"step": 95100
|
13331 |
+
},
|
13332 |
+
{
|
13333 |
+
"epoch": 2.8408921267130447,
|
13334 |
+
"grad_norm": 5.991581439971924,
|
13335 |
+
"learning_rate": 0.00017281584466196948,
|
13336 |
+
"loss": 1.306,
|
13337 |
+
"step": 95150
|
13338 |
+
},
|
13339 |
+
{
|
13340 |
+
"epoch": 2.842384975965127,
|
13341 |
+
"grad_norm": 4.810219764709473,
|
13342 |
+
"learning_rate": 0.00017280155886605067,
|
13343 |
+
"loss": 1.3146,
|
13344 |
+
"step": 95200
|
13345 |
+
},
|
13346 |
+
{
|
13347 |
+
"epoch": 2.8438778252172097,
|
13348 |
+
"grad_norm": 4.411154747009277,
|
13349 |
+
"learning_rate": 0.00017278727307013184,
|
13350 |
+
"loss": 1.3104,
|
13351 |
+
"step": 95250
|
13352 |
+
},
|
13353 |
+
{
|
13354 |
+
"epoch": 2.845370674469292,
|
13355 |
+
"grad_norm": 5.2504472732543945,
|
13356 |
+
"learning_rate": 0.000172772987274213,
|
13357 |
+
"loss": 1.3055,
|
13358 |
+
"step": 95300
|
13359 |
+
},
|
13360 |
+
{
|
13361 |
+
"epoch": 2.8468635237213746,
|
13362 |
+
"grad_norm": 3.6604950428009033,
|
13363 |
+
"learning_rate": 0.00017275870147829417,
|
13364 |
+
"loss": 1.2861,
|
13365 |
+
"step": 95350
|
13366 |
+
},
|
13367 |
+
{
|
13368 |
+
"epoch": 2.8483563729734573,
|
13369 |
+
"grad_norm": 4.993444442749023,
|
13370 |
+
"learning_rate": 0.00017274441568237533,
|
13371 |
+
"loss": 1.3199,
|
13372 |
+
"step": 95400
|
13373 |
+
},
|
13374 |
+
{
|
13375 |
+
"epoch": 2.8498492222255396,
|
13376 |
+
"grad_norm": 4.28807258605957,
|
13377 |
+
"learning_rate": 0.0001727301298864565,
|
13378 |
+
"loss": 1.2676,
|
13379 |
+
"step": 95450
|
13380 |
+
},
|
13381 |
+
{
|
13382 |
+
"epoch": 2.8513420714776223,
|
13383 |
+
"grad_norm": 4.000975608825684,
|
13384 |
+
"learning_rate": 0.00017271584409053766,
|
13385 |
+
"loss": 1.2916,
|
13386 |
+
"step": 95500
|
13387 |
+
},
|
13388 |
+
{
|
13389 |
+
"epoch": 2.8528349207297046,
|
13390 |
+
"grad_norm": 3.3593053817749023,
|
13391 |
+
"learning_rate": 0.00017270155829461883,
|
13392 |
+
"loss": 1.2802,
|
13393 |
+
"step": 95550
|
13394 |
+
},
|
13395 |
+
{
|
13396 |
+
"epoch": 2.8543277699817873,
|
13397 |
+
"grad_norm": 4.179107666015625,
|
13398 |
+
"learning_rate": 0.0001726872724987,
|
13399 |
+
"loss": 1.3147,
|
13400 |
+
"step": 95600
|
13401 |
+
},
|
13402 |
+
{
|
13403 |
+
"epoch": 2.85582061923387,
|
13404 |
+
"grad_norm": 3.5861895084381104,
|
13405 |
+
"learning_rate": 0.00017267298670278116,
|
13406 |
+
"loss": 1.2653,
|
13407 |
+
"step": 95650
|
13408 |
+
},
|
13409 |
+
{
|
13410 |
+
"epoch": 2.8573134684859522,
|
13411 |
+
"grad_norm": 4.41680383682251,
|
13412 |
+
"learning_rate": 0.00017265870090686235,
|
13413 |
+
"loss": 1.2961,
|
13414 |
+
"step": 95700
|
13415 |
+
},
|
13416 |
+
{
|
13417 |
+
"epoch": 2.858806317738035,
|
13418 |
+
"grad_norm": 6.727049827575684,
|
13419 |
+
"learning_rate": 0.00017264441511094349,
|
13420 |
+
"loss": 1.2936,
|
13421 |
+
"step": 95750
|
13422 |
+
},
|
13423 |
+
{
|
13424 |
+
"epoch": 2.860299166990117,
|
13425 |
+
"grad_norm": 4.895215034484863,
|
13426 |
+
"learning_rate": 0.00017263012931502468,
|
13427 |
+
"loss": 1.2528,
|
13428 |
+
"step": 95800
|
13429 |
+
},
|
13430 |
+
{
|
13431 |
+
"epoch": 2.8617920162422,
|
13432 |
+
"grad_norm": 5.4271955490112305,
|
13433 |
+
"learning_rate": 0.00017261584351910581,
|
13434 |
+
"loss": 1.3365,
|
13435 |
+
"step": 95850
|
13436 |
+
},
|
13437 |
+
{
|
13438 |
+
"epoch": 2.8632848654942826,
|
13439 |
+
"grad_norm": 4.273880481719971,
|
13440 |
+
"learning_rate": 0.000172601557723187,
|
13441 |
+
"loss": 1.2779,
|
13442 |
+
"step": 95900
|
13443 |
+
},
|
13444 |
+
{
|
13445 |
+
"epoch": 2.864777714746365,
|
13446 |
+
"grad_norm": 4.767480850219727,
|
13447 |
+
"learning_rate": 0.00017258727192726814,
|
13448 |
+
"loss": 1.2716,
|
13449 |
+
"step": 95950
|
13450 |
+
},
|
13451 |
+
{
|
13452 |
+
"epoch": 2.8662705639984476,
|
13453 |
+
"grad_norm": 4.384129524230957,
|
13454 |
+
"learning_rate": 0.00017257298613134934,
|
13455 |
+
"loss": 1.2969,
|
13456 |
+
"step": 96000
|
13457 |
+
},
|
13458 |
+
{
|
13459 |
+
"epoch": 2.86776341325053,
|
13460 |
+
"grad_norm": 4.527589797973633,
|
13461 |
+
"learning_rate": 0.0001725587003354305,
|
13462 |
+
"loss": 1.266,
|
13463 |
+
"step": 96050
|
13464 |
+
},
|
13465 |
+
{
|
13466 |
+
"epoch": 2.8692562625026126,
|
13467 |
+
"grad_norm": 4.149820804595947,
|
13468 |
+
"learning_rate": 0.00017254441453951167,
|
13469 |
+
"loss": 1.31,
|
13470 |
+
"step": 96100
|
13471 |
+
},
|
13472 |
+
{
|
13473 |
+
"epoch": 2.8707491117546953,
|
13474 |
+
"grad_norm": 4.574423313140869,
|
13475 |
+
"learning_rate": 0.00017253012874359283,
|
13476 |
+
"loss": 1.3,
|
13477 |
+
"step": 96150
|
13478 |
+
},
|
13479 |
+
{
|
13480 |
+
"epoch": 2.8722419610067775,
|
13481 |
+
"grad_norm": 4.80661153793335,
|
13482 |
+
"learning_rate": 0.000172515842947674,
|
13483 |
+
"loss": 1.304,
|
13484 |
+
"step": 96200
|
13485 |
+
},
|
13486 |
+
{
|
13487 |
+
"epoch": 2.87373481025886,
|
13488 |
+
"grad_norm": 4.065107822418213,
|
13489 |
+
"learning_rate": 0.00017250155715175516,
|
13490 |
+
"loss": 1.2581,
|
13491 |
+
"step": 96250
|
13492 |
+
},
|
13493 |
+
{
|
13494 |
+
"epoch": 2.8752276595109425,
|
13495 |
+
"grad_norm": 4.727302551269531,
|
13496 |
+
"learning_rate": 0.00017248727135583632,
|
13497 |
+
"loss": 1.321,
|
13498 |
+
"step": 96300
|
13499 |
+
},
|
13500 |
+
{
|
13501 |
+
"epoch": 2.876720508763025,
|
13502 |
+
"grad_norm": 4.85087251663208,
|
13503 |
+
"learning_rate": 0.0001724729855599175,
|
13504 |
+
"loss": 1.3013,
|
13505 |
+
"step": 96350
|
13506 |
+
},
|
13507 |
+
{
|
13508 |
+
"epoch": 2.8782133580151075,
|
13509 |
+
"grad_norm": 5.125801086425781,
|
13510 |
+
"learning_rate": 0.00017245869976399865,
|
13511 |
+
"loss": 1.2956,
|
13512 |
+
"step": 96400
|
13513 |
+
},
|
13514 |
+
{
|
13515 |
+
"epoch": 2.87970620726719,
|
13516 |
+
"grad_norm": 4.144227504730225,
|
13517 |
+
"learning_rate": 0.00017244441396807982,
|
13518 |
+
"loss": 1.2929,
|
13519 |
+
"step": 96450
|
13520 |
+
},
|
13521 |
+
{
|
13522 |
+
"epoch": 2.8811990565192724,
|
13523 |
+
"grad_norm": 4.702892303466797,
|
13524 |
+
"learning_rate": 0.000172430128172161,
|
13525 |
+
"loss": 1.361,
|
13526 |
+
"step": 96500
|
13527 |
+
},
|
13528 |
+
{
|
13529 |
+
"epoch": 2.882691905771355,
|
13530 |
+
"grad_norm": 5.188230991363525,
|
13531 |
+
"learning_rate": 0.00017241584237624215,
|
13532 |
+
"loss": 1.2474,
|
13533 |
+
"step": 96550
|
13534 |
+
},
|
13535 |
+
{
|
13536 |
+
"epoch": 2.884184755023438,
|
13537 |
+
"grad_norm": 5.263503551483154,
|
13538 |
+
"learning_rate": 0.00017240155658032334,
|
13539 |
+
"loss": 1.2728,
|
13540 |
+
"step": 96600
|
13541 |
+
},
|
13542 |
+
{
|
13543 |
+
"epoch": 2.88567760427552,
|
13544 |
+
"grad_norm": 3.8955039978027344,
|
13545 |
+
"learning_rate": 0.00017238727078440448,
|
13546 |
+
"loss": 1.2638,
|
13547 |
+
"step": 96650
|
13548 |
+
},
|
13549 |
+
{
|
13550 |
+
"epoch": 2.887170453527603,
|
13551 |
+
"grad_norm": 4.1187238693237305,
|
13552 |
+
"learning_rate": 0.00017237298498848567,
|
13553 |
+
"loss": 1.2934,
|
13554 |
+
"step": 96700
|
13555 |
+
},
|
13556 |
+
{
|
13557 |
+
"epoch": 2.888663302779685,
|
13558 |
+
"grad_norm": 6.000136375427246,
|
13559 |
+
"learning_rate": 0.00017235869919256683,
|
13560 |
+
"loss": 1.293,
|
13561 |
+
"step": 96750
|
13562 |
+
},
|
13563 |
+
{
|
13564 |
+
"epoch": 2.890156152031768,
|
13565 |
+
"grad_norm": 5.494427680969238,
|
13566 |
+
"learning_rate": 0.000172344413396648,
|
13567 |
+
"loss": 1.331,
|
13568 |
+
"step": 96800
|
13569 |
+
},
|
13570 |
+
{
|
13571 |
+
"epoch": 2.8916490012838505,
|
13572 |
+
"grad_norm": 5.483157157897949,
|
13573 |
+
"learning_rate": 0.00017233012760072916,
|
13574 |
+
"loss": 1.3299,
|
13575 |
+
"step": 96850
|
13576 |
+
},
|
13577 |
+
{
|
13578 |
+
"epoch": 2.8931418505359328,
|
13579 |
+
"grad_norm": 4.264294147491455,
|
13580 |
+
"learning_rate": 0.00017231584180481033,
|
13581 |
+
"loss": 1.2966,
|
13582 |
+
"step": 96900
|
13583 |
+
},
|
13584 |
+
{
|
13585 |
+
"epoch": 2.8946346997880155,
|
13586 |
+
"grad_norm": 3.7987403869628906,
|
13587 |
+
"learning_rate": 0.0001723015560088915,
|
13588 |
+
"loss": 1.3028,
|
13589 |
+
"step": 96950
|
13590 |
+
},
|
13591 |
+
{
|
13592 |
+
"epoch": 2.8961275490400977,
|
13593 |
+
"grad_norm": 4.712928771972656,
|
13594 |
+
"learning_rate": 0.00017228727021297266,
|
13595 |
+
"loss": 1.2154,
|
13596 |
+
"step": 97000
|
13597 |
+
},
|
13598 |
+
{
|
13599 |
+
"epoch": 2.8976203982921804,
|
13600 |
+
"grad_norm": 5.0749640464782715,
|
13601 |
+
"learning_rate": 0.00017227298441705382,
|
13602 |
+
"loss": 1.269,
|
13603 |
+
"step": 97050
|
13604 |
+
},
|
13605 |
+
{
|
13606 |
+
"epoch": 2.899113247544263,
|
13607 |
+
"grad_norm": 4.288541316986084,
|
13608 |
+
"learning_rate": 0.00017225869862113499,
|
13609 |
+
"loss": 1.2575,
|
13610 |
+
"step": 97100
|
13611 |
+
},
|
13612 |
+
{
|
13613 |
+
"epoch": 2.9006060967963454,
|
13614 |
+
"grad_norm": 4.452602386474609,
|
13615 |
+
"learning_rate": 0.00017224441282521615,
|
13616 |
+
"loss": 1.2613,
|
13617 |
+
"step": 97150
|
13618 |
+
},
|
13619 |
+
{
|
13620 |
+
"epoch": 2.902098946048428,
|
13621 |
+
"grad_norm": 4.485718250274658,
|
13622 |
+
"learning_rate": 0.00017223012702929731,
|
13623 |
+
"loss": 1.2678,
|
13624 |
+
"step": 97200
|
13625 |
+
},
|
13626 |
+
{
|
13627 |
+
"epoch": 2.9035917953005104,
|
13628 |
+
"grad_norm": 3.692293643951416,
|
13629 |
+
"learning_rate": 0.00017221584123337848,
|
13630 |
+
"loss": 1.2929,
|
13631 |
+
"step": 97250
|
13632 |
+
},
|
13633 |
+
{
|
13634 |
+
"epoch": 2.905084644552593,
|
13635 |
+
"grad_norm": 4.699619770050049,
|
13636 |
+
"learning_rate": 0.00017220155543745967,
|
13637 |
+
"loss": 1.2601,
|
13638 |
+
"step": 97300
|
13639 |
+
},
|
13640 |
+
{
|
13641 |
+
"epoch": 2.906577493804676,
|
13642 |
+
"grad_norm": 4.477214336395264,
|
13643 |
+
"learning_rate": 0.0001721872696415408,
|
13644 |
+
"loss": 1.3272,
|
13645 |
+
"step": 97350
|
13646 |
+
},
|
13647 |
+
{
|
13648 |
+
"epoch": 2.908070343056758,
|
13649 |
+
"grad_norm": 4.274310111999512,
|
13650 |
+
"learning_rate": 0.000172172983845622,
|
13651 |
+
"loss": 1.3221,
|
13652 |
+
"step": 97400
|
13653 |
+
},
|
13654 |
+
{
|
13655 |
+
"epoch": 2.9095631923088408,
|
13656 |
+
"grad_norm": 4.423600196838379,
|
13657 |
+
"learning_rate": 0.00017215869804970314,
|
13658 |
+
"loss": 1.3161,
|
13659 |
+
"step": 97450
|
13660 |
+
},
|
13661 |
+
{
|
13662 |
+
"epoch": 2.911056041560923,
|
13663 |
+
"grad_norm": 4.693662166595459,
|
13664 |
+
"learning_rate": 0.00017214441225378433,
|
13665 |
+
"loss": 1.3374,
|
13666 |
+
"step": 97500
|
13667 |
+
},
|
13668 |
+
{
|
13669 |
+
"epoch": 2.9125488908130057,
|
13670 |
+
"grad_norm": 5.746738910675049,
|
13671 |
+
"learning_rate": 0.0001721301264578655,
|
13672 |
+
"loss": 1.3171,
|
13673 |
+
"step": 97550
|
13674 |
+
},
|
13675 |
+
{
|
13676 |
+
"epoch": 2.9140417400650884,
|
13677 |
+
"grad_norm": 5.049290657043457,
|
13678 |
+
"learning_rate": 0.00017211584066194666,
|
13679 |
+
"loss": 1.2713,
|
13680 |
+
"step": 97600
|
13681 |
+
},
|
13682 |
+
{
|
13683 |
+
"epoch": 2.9155345893171707,
|
13684 |
+
"grad_norm": 4.0549468994140625,
|
13685 |
+
"learning_rate": 0.00017210155486602782,
|
13686 |
+
"loss": 1.3214,
|
13687 |
+
"step": 97650
|
13688 |
+
},
|
13689 |
+
{
|
13690 |
+
"epoch": 2.9170274385692534,
|
13691 |
+
"grad_norm": 5.128767013549805,
|
13692 |
+
"learning_rate": 0.00017208726907010896,
|
13693 |
+
"loss": 1.2717,
|
13694 |
+
"step": 97700
|
13695 |
+
},
|
13696 |
+
{
|
13697 |
+
"epoch": 2.9185202878213357,
|
13698 |
+
"grad_norm": 5.148744106292725,
|
13699 |
+
"learning_rate": 0.00017207298327419015,
|
13700 |
+
"loss": 1.2602,
|
13701 |
+
"step": 97750
|
13702 |
+
},
|
13703 |
+
{
|
13704 |
+
"epoch": 2.9200131370734184,
|
13705 |
+
"grad_norm": 3.682936429977417,
|
13706 |
+
"learning_rate": 0.0001720586974782713,
|
13707 |
+
"loss": 1.2941,
|
13708 |
+
"step": 97800
|
13709 |
+
},
|
13710 |
+
{
|
13711 |
+
"epoch": 2.921505986325501,
|
13712 |
+
"grad_norm": 4.665682315826416,
|
13713 |
+
"learning_rate": 0.00017204441168235248,
|
13714 |
+
"loss": 1.2959,
|
13715 |
+
"step": 97850
|
13716 |
+
},
|
13717 |
+
{
|
13718 |
+
"epoch": 2.9229988355775833,
|
13719 |
+
"grad_norm": 4.523177146911621,
|
13720 |
+
"learning_rate": 0.00017203012588643365,
|
13721 |
+
"loss": 1.2755,
|
13722 |
+
"step": 97900
|
13723 |
+
},
|
13724 |
+
{
|
13725 |
+
"epoch": 2.924491684829666,
|
13726 |
+
"grad_norm": 4.719842433929443,
|
13727 |
+
"learning_rate": 0.0001720158400905148,
|
13728 |
+
"loss": 1.2523,
|
13729 |
+
"step": 97950
|
13730 |
+
},
|
13731 |
+
{
|
13732 |
+
"epoch": 2.9259845340817483,
|
13733 |
+
"grad_norm": 3.7076125144958496,
|
13734 |
+
"learning_rate": 0.00017200155429459598,
|
13735 |
+
"loss": 1.2909,
|
13736 |
+
"step": 98000
|
13737 |
+
},
|
13738 |
+
{
|
13739 |
+
"epoch": 2.927477383333831,
|
13740 |
+
"grad_norm": 8.15734577178955,
|
13741 |
+
"learning_rate": 0.00017198726849867714,
|
13742 |
+
"loss": 1.2369,
|
13743 |
+
"step": 98050
|
13744 |
+
},
|
13745 |
+
{
|
13746 |
+
"epoch": 2.9289702325859137,
|
13747 |
+
"grad_norm": 4.653754711151123,
|
13748 |
+
"learning_rate": 0.0001719729827027583,
|
13749 |
+
"loss": 1.3744,
|
13750 |
+
"step": 98100
|
13751 |
+
},
|
13752 |
+
{
|
13753 |
+
"epoch": 2.930463081837996,
|
13754 |
+
"grad_norm": 4.599735736846924,
|
13755 |
+
"learning_rate": 0.00017195869690683947,
|
13756 |
+
"loss": 1.333,
|
13757 |
+
"step": 98150
|
13758 |
+
},
|
13759 |
+
{
|
13760 |
+
"epoch": 2.9319559310900787,
|
13761 |
+
"grad_norm": 5.794803619384766,
|
13762 |
+
"learning_rate": 0.00017194441111092063,
|
13763 |
+
"loss": 1.3269,
|
13764 |
+
"step": 98200
|
13765 |
+
},
|
13766 |
+
{
|
13767 |
+
"epoch": 2.933448780342161,
|
13768 |
+
"grad_norm": 4.360086441040039,
|
13769 |
+
"learning_rate": 0.0001719301253150018,
|
13770 |
+
"loss": 1.3025,
|
13771 |
+
"step": 98250
|
13772 |
+
},
|
13773 |
+
{
|
13774 |
+
"epoch": 2.9349416295942437,
|
13775 |
+
"grad_norm": 4.956981182098389,
|
13776 |
+
"learning_rate": 0.00017191583951908296,
|
13777 |
+
"loss": 1.2484,
|
13778 |
+
"step": 98300
|
13779 |
+
},
|
13780 |
+
{
|
13781 |
+
"epoch": 2.9364344788463264,
|
13782 |
+
"grad_norm": 3.210866928100586,
|
13783 |
+
"learning_rate": 0.00017190155372316416,
|
13784 |
+
"loss": 1.2796,
|
13785 |
+
"step": 98350
|
13786 |
+
},
|
13787 |
+
{
|
13788 |
+
"epoch": 2.9379273280984086,
|
13789 |
+
"grad_norm": 5.565187931060791,
|
13790 |
+
"learning_rate": 0.0001718872679272453,
|
13791 |
+
"loss": 1.3243,
|
13792 |
+
"step": 98400
|
13793 |
+
},
|
13794 |
+
{
|
13795 |
+
"epoch": 2.939420177350491,
|
13796 |
+
"grad_norm": 4.15312385559082,
|
13797 |
+
"learning_rate": 0.00017187298213132649,
|
13798 |
+
"loss": 1.2922,
|
13799 |
+
"step": 98450
|
13800 |
+
},
|
13801 |
+
{
|
13802 |
+
"epoch": 2.9409130266025736,
|
13803 |
+
"grad_norm": 4.836455345153809,
|
13804 |
+
"learning_rate": 0.00017185869633540762,
|
13805 |
+
"loss": 1.2495,
|
13806 |
+
"step": 98500
|
13807 |
+
},
|
13808 |
+
{
|
13809 |
+
"epoch": 2.9424058758546563,
|
13810 |
+
"grad_norm": 5.803861618041992,
|
13811 |
+
"learning_rate": 0.00017184441053948881,
|
13812 |
+
"loss": 1.3052,
|
13813 |
+
"step": 98550
|
13814 |
+
},
|
13815 |
+
{
|
13816 |
+
"epoch": 2.9438987251067386,
|
13817 |
+
"grad_norm": 5.279322147369385,
|
13818 |
+
"learning_rate": 0.00017183012474356995,
|
13819 |
+
"loss": 1.299,
|
13820 |
+
"step": 98600
|
13821 |
+
},
|
13822 |
+
{
|
13823 |
+
"epoch": 2.9453915743588213,
|
13824 |
+
"grad_norm": 3.5537776947021484,
|
13825 |
+
"learning_rate": 0.00017181583894765114,
|
13826 |
+
"loss": 1.3192,
|
13827 |
+
"step": 98650
|
13828 |
+
},
|
13829 |
+
{
|
13830 |
+
"epoch": 2.9468844236109035,
|
13831 |
+
"grad_norm": 4.724547386169434,
|
13832 |
+
"learning_rate": 0.0001718015531517323,
|
13833 |
+
"loss": 1.2877,
|
13834 |
+
"step": 98700
|
13835 |
+
},
|
13836 |
+
{
|
13837 |
+
"epoch": 2.9483772728629862,
|
13838 |
+
"grad_norm": 3.863722801208496,
|
13839 |
+
"learning_rate": 0.00017178726735581347,
|
13840 |
+
"loss": 1.2713,
|
13841 |
+
"step": 98750
|
13842 |
+
},
|
13843 |
+
{
|
13844 |
+
"epoch": 2.949870122115069,
|
13845 |
+
"grad_norm": 3.8161838054656982,
|
13846 |
+
"learning_rate": 0.00017177298155989464,
|
13847 |
+
"loss": 1.2484,
|
13848 |
+
"step": 98800
|
13849 |
+
},
|
13850 |
+
{
|
13851 |
+
"epoch": 2.951362971367151,
|
13852 |
+
"grad_norm": 4.72680139541626,
|
13853 |
+
"learning_rate": 0.0001717586957639758,
|
13854 |
+
"loss": 1.3184,
|
13855 |
+
"step": 98850
|
13856 |
+
},
|
13857 |
+
{
|
13858 |
+
"epoch": 2.952855820619234,
|
13859 |
+
"grad_norm": 4.270802021026611,
|
13860 |
+
"learning_rate": 0.00017174440996805697,
|
13861 |
+
"loss": 1.2655,
|
13862 |
+
"step": 98900
|
13863 |
+
},
|
13864 |
+
{
|
13865 |
+
"epoch": 2.954348669871316,
|
13866 |
+
"grad_norm": 4.669827461242676,
|
13867 |
+
"learning_rate": 0.00017173012417213813,
|
13868 |
+
"loss": 1.3366,
|
13869 |
+
"step": 98950
|
13870 |
+
},
|
13871 |
+
{
|
13872 |
+
"epoch": 2.955841519123399,
|
13873 |
+
"grad_norm": 4.791524887084961,
|
13874 |
+
"learning_rate": 0.0001717158383762193,
|
13875 |
+
"loss": 1.2887,
|
13876 |
+
"step": 99000
|
13877 |
+
},
|
13878 |
+
{
|
13879 |
+
"epoch": 2.9573343683754816,
|
13880 |
+
"grad_norm": 5.6565423011779785,
|
13881 |
+
"learning_rate": 0.00017170155258030046,
|
13882 |
+
"loss": 1.2693,
|
13883 |
+
"step": 99050
|
13884 |
+
},
|
13885 |
+
{
|
13886 |
+
"epoch": 2.958827217627564,
|
13887 |
+
"grad_norm": 3.3838722705841064,
|
13888 |
+
"learning_rate": 0.00017168726678438163,
|
13889 |
+
"loss": 1.3281,
|
13890 |
+
"step": 99100
|
13891 |
+
},
|
13892 |
+
{
|
13893 |
+
"epoch": 2.9603200668796466,
|
13894 |
+
"grad_norm": 5.198723793029785,
|
13895 |
+
"learning_rate": 0.00017167298098846282,
|
13896 |
+
"loss": 1.2931,
|
13897 |
+
"step": 99150
|
13898 |
+
},
|
13899 |
+
{
|
13900 |
+
"epoch": 2.961812916131729,
|
13901 |
+
"grad_norm": 3.6596312522888184,
|
13902 |
+
"learning_rate": 0.00017165869519254395,
|
13903 |
+
"loss": 1.235,
|
13904 |
+
"step": 99200
|
13905 |
+
},
|
13906 |
+
{
|
13907 |
+
"epoch": 2.9633057653838115,
|
13908 |
+
"grad_norm": 4.816101551055908,
|
13909 |
+
"learning_rate": 0.00017164440939662515,
|
13910 |
+
"loss": 1.3069,
|
13911 |
+
"step": 99250
|
13912 |
+
},
|
13913 |
+
{
|
13914 |
+
"epoch": 2.9647986146358942,
|
13915 |
+
"grad_norm": 4.188026428222656,
|
13916 |
+
"learning_rate": 0.00017163012360070628,
|
13917 |
+
"loss": 1.3104,
|
13918 |
+
"step": 99300
|
13919 |
+
},
|
13920 |
+
{
|
13921 |
+
"epoch": 2.9662914638879765,
|
13922 |
+
"grad_norm": 4.76021671295166,
|
13923 |
+
"learning_rate": 0.00017161583780478748,
|
13924 |
+
"loss": 1.3125,
|
13925 |
+
"step": 99350
|
13926 |
+
},
|
13927 |
+
{
|
13928 |
+
"epoch": 2.967784313140059,
|
13929 |
+
"grad_norm": 4.8385138511657715,
|
13930 |
+
"learning_rate": 0.00017160155200886861,
|
13931 |
+
"loss": 1.2871,
|
13932 |
+
"step": 99400
|
13933 |
+
},
|
13934 |
+
{
|
13935 |
+
"epoch": 2.9692771623921415,
|
13936 |
+
"grad_norm": 10.232873916625977,
|
13937 |
+
"learning_rate": 0.0001715872662129498,
|
13938 |
+
"loss": 1.2901,
|
13939 |
+
"step": 99450
|
13940 |
+
},
|
13941 |
+
{
|
13942 |
+
"epoch": 2.970770011644224,
|
13943 |
+
"grad_norm": 4.217905521392822,
|
13944 |
+
"learning_rate": 0.00017157298041703097,
|
13945 |
+
"loss": 1.3533,
|
13946 |
+
"step": 99500
|
13947 |
+
},
|
13948 |
+
{
|
13949 |
+
"epoch": 2.972262860896307,
|
13950 |
+
"grad_norm": 4.916757583618164,
|
13951 |
+
"learning_rate": 0.00017155869462111213,
|
13952 |
+
"loss": 1.3062,
|
13953 |
+
"step": 99550
|
13954 |
+
},
|
13955 |
+
{
|
13956 |
+
"epoch": 2.973755710148389,
|
13957 |
+
"grad_norm": 4.588007926940918,
|
13958 |
+
"learning_rate": 0.0001715444088251933,
|
13959 |
+
"loss": 1.2755,
|
13960 |
+
"step": 99600
|
13961 |
+
},
|
13962 |
+
{
|
13963 |
+
"epoch": 2.975248559400472,
|
13964 |
+
"grad_norm": 7.835936546325684,
|
13965 |
+
"learning_rate": 0.00017153012302927446,
|
13966 |
+
"loss": 1.2472,
|
13967 |
+
"step": 99650
|
13968 |
+
},
|
13969 |
+
{
|
13970 |
+
"epoch": 2.976741408652554,
|
13971 |
+
"grad_norm": 3.825573444366455,
|
13972 |
+
"learning_rate": 0.00017151583723335563,
|
13973 |
+
"loss": 1.252,
|
13974 |
+
"step": 99700
|
13975 |
+
},
|
13976 |
+
{
|
13977 |
+
"epoch": 2.978234257904637,
|
13978 |
+
"grad_norm": 4.329678535461426,
|
13979 |
+
"learning_rate": 0.0001715015514374368,
|
13980 |
+
"loss": 1.2689,
|
13981 |
+
"step": 99750
|
13982 |
+
},
|
13983 |
+
{
|
13984 |
+
"epoch": 2.9797271071567195,
|
13985 |
+
"grad_norm": 5.514796733856201,
|
13986 |
+
"learning_rate": 0.00017148726564151796,
|
13987 |
+
"loss": 1.2776,
|
13988 |
+
"step": 99800
|
13989 |
+
},
|
13990 |
+
{
|
13991 |
+
"epoch": 2.9812199564088018,
|
13992 |
+
"grad_norm": 5.769738674163818,
|
13993 |
+
"learning_rate": 0.00017147297984559912,
|
13994 |
+
"loss": 1.2717,
|
13995 |
+
"step": 99850
|
13996 |
+
},
|
13997 |
+
{
|
13998 |
+
"epoch": 2.9827128056608845,
|
13999 |
+
"grad_norm": 5.01526403427124,
|
14000 |
+
"learning_rate": 0.0001714586940496803,
|
14001 |
+
"loss": 1.2571,
|
14002 |
+
"step": 99900
|
14003 |
+
},
|
14004 |
+
{
|
14005 |
+
"epoch": 2.9842056549129667,
|
14006 |
+
"grad_norm": 4.526731491088867,
|
14007 |
+
"learning_rate": 0.00017144440825376148,
|
14008 |
+
"loss": 1.3401,
|
14009 |
+
"step": 99950
|
14010 |
+
},
|
14011 |
+
{
|
14012 |
+
"epoch": 2.9856985041650494,
|
14013 |
+
"grad_norm": 3.969651222229004,
|
14014 |
+
"learning_rate": 0.00017143012245784262,
|
14015 |
+
"loss": 1.2814,
|
14016 |
+
"step": 100000
|
14017 |
}
|
14018 |
],
|
14019 |
"logging_steps": 50,
|
|
|
14033 |
"attributes": {}
|
14034 |
}
|
14035 |
},
|
14036 |
+
"total_flos": 2.527602128637133e+18,
|
14037 |
"train_batch_size": 2,
|
14038 |
"trial_name": null,
|
14039 |
"trial_params": null
|