Training in progress, step 110000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step110000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step110000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c16f3c52e607a9dac8780d9c330e47c8a3a8f1c5fac748a68403d97a84559cce
|
3 |
size 42002584
|
last-checkpoint/global_step110000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36962fbe070aafbf4ddc22f9c4477d3df82dfa1404cc5363a48ce660b3e0445c
|
3 |
+
size 251710672
|
last-checkpoint/global_step110000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a45b1bf675b9e3456f20ad935d83482efd209b99f4de78dd9d4d03feb5abccc1
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step110000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d043daa1f1795686121f0e0cf1e44e17c0336ef18d3837708f21dc0527cd2ab9
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -14014,6 +14014,1406 @@
|
|
14014 |
"learning_rate": 0.00017143012245784262,
|
14015 |
"loss": 1.2814,
|
14016 |
"step": 100000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14017 |
}
|
14018 |
],
|
14019 |
"logging_steps": 50,
|
@@ -14033,7 +15433,7 @@
|
|
14033 |
"attributes": {}
|
14034 |
}
|
14035 |
},
|
14036 |
-
"total_flos": 2.
|
14037 |
"train_batch_size": 2,
|
14038 |
"trial_name": null,
|
14039 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.2842683545815543,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 110000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
14014 |
"learning_rate": 0.00017143012245784262,
|
14015 |
"loss": 1.2814,
|
14016 |
"step": 100000
|
14017 |
+
},
|
14018 |
+
{
|
14019 |
+
"epoch": 2.987191353417132,
|
14020 |
+
"grad_norm": 4.796939849853516,
|
14021 |
+
"learning_rate": 0.0001714158366619238,
|
14022 |
+
"loss": 1.286,
|
14023 |
+
"step": 100050
|
14024 |
+
},
|
14025 |
+
{
|
14026 |
+
"epoch": 2.9886842026692144,
|
14027 |
+
"grad_norm": 5.400031089782715,
|
14028 |
+
"learning_rate": 0.00017140155086600495,
|
14029 |
+
"loss": 1.2939,
|
14030 |
+
"step": 100100
|
14031 |
+
},
|
14032 |
+
{
|
14033 |
+
"epoch": 2.990177051921297,
|
14034 |
+
"grad_norm": 4.253983497619629,
|
14035 |
+
"learning_rate": 0.00017138726507008614,
|
14036 |
+
"loss": 1.2753,
|
14037 |
+
"step": 100150
|
14038 |
+
},
|
14039 |
+
{
|
14040 |
+
"epoch": 2.9916699011733794,
|
14041 |
+
"grad_norm": 4.808609485626221,
|
14042 |
+
"learning_rate": 0.0001713729792741673,
|
14043 |
+
"loss": 1.3019,
|
14044 |
+
"step": 100200
|
14045 |
+
},
|
14046 |
+
{
|
14047 |
+
"epoch": 2.993162750425462,
|
14048 |
+
"grad_norm": 4.482404708862305,
|
14049 |
+
"learning_rate": 0.00017135869347824844,
|
14050 |
+
"loss": 1.2642,
|
14051 |
+
"step": 100250
|
14052 |
+
},
|
14053 |
+
{
|
14054 |
+
"epoch": 2.994655599677545,
|
14055 |
+
"grad_norm": 5.035706043243408,
|
14056 |
+
"learning_rate": 0.00017134440768232963,
|
14057 |
+
"loss": 1.2927,
|
14058 |
+
"step": 100300
|
14059 |
+
},
|
14060 |
+
{
|
14061 |
+
"epoch": 2.996148448929627,
|
14062 |
+
"grad_norm": 4.565020561218262,
|
14063 |
+
"learning_rate": 0.00017133012188641077,
|
14064 |
+
"loss": 1.3068,
|
14065 |
+
"step": 100350
|
14066 |
+
},
|
14067 |
+
{
|
14068 |
+
"epoch": 2.9976412981817098,
|
14069 |
+
"grad_norm": 5.071930408477783,
|
14070 |
+
"learning_rate": 0.00017131583609049196,
|
14071 |
+
"loss": 1.3127,
|
14072 |
+
"step": 100400
|
14073 |
+
},
|
14074 |
+
{
|
14075 |
+
"epoch": 2.999134147433792,
|
14076 |
+
"grad_norm": 5.127132415771484,
|
14077 |
+
"learning_rate": 0.0001713015502945731,
|
14078 |
+
"loss": 1.3057,
|
14079 |
+
"step": 100450
|
14080 |
+
},
|
14081 |
+
{
|
14082 |
+
"epoch": 3.0006269966858747,
|
14083 |
+
"grad_norm": 5.550358772277832,
|
14084 |
+
"learning_rate": 0.0001712872644986543,
|
14085 |
+
"loss": 1.2978,
|
14086 |
+
"step": 100500
|
14087 |
+
},
|
14088 |
+
{
|
14089 |
+
"epoch": 3.002119845937957,
|
14090 |
+
"grad_norm": 5.580805778503418,
|
14091 |
+
"learning_rate": 0.00017127297870273545,
|
14092 |
+
"loss": 1.2057,
|
14093 |
+
"step": 100550
|
14094 |
+
},
|
14095 |
+
{
|
14096 |
+
"epoch": 3.0036126951900397,
|
14097 |
+
"grad_norm": 3.326228618621826,
|
14098 |
+
"learning_rate": 0.00017125869290681662,
|
14099 |
+
"loss": 1.1558,
|
14100 |
+
"step": 100600
|
14101 |
+
},
|
14102 |
+
{
|
14103 |
+
"epoch": 3.0051055444421224,
|
14104 |
+
"grad_norm": 4.807900905609131,
|
14105 |
+
"learning_rate": 0.00017124440711089778,
|
14106 |
+
"loss": 1.1667,
|
14107 |
+
"step": 100650
|
14108 |
+
},
|
14109 |
+
{
|
14110 |
+
"epoch": 3.0065983936942047,
|
14111 |
+
"grad_norm": 4.419902801513672,
|
14112 |
+
"learning_rate": 0.00017123012131497895,
|
14113 |
+
"loss": 1.1765,
|
14114 |
+
"step": 100700
|
14115 |
+
},
|
14116 |
+
{
|
14117 |
+
"epoch": 3.0080912429462874,
|
14118 |
+
"grad_norm": 4.567293643951416,
|
14119 |
+
"learning_rate": 0.0001712158355190601,
|
14120 |
+
"loss": 1.2552,
|
14121 |
+
"step": 100750
|
14122 |
+
},
|
14123 |
+
{
|
14124 |
+
"epoch": 3.0095840921983696,
|
14125 |
+
"grad_norm": 3.129995346069336,
|
14126 |
+
"learning_rate": 0.00017120154972314128,
|
14127 |
+
"loss": 1.2222,
|
14128 |
+
"step": 100800
|
14129 |
+
},
|
14130 |
+
{
|
14131 |
+
"epoch": 3.0110769414504523,
|
14132 |
+
"grad_norm": 4.515715599060059,
|
14133 |
+
"learning_rate": 0.00017118726392722244,
|
14134 |
+
"loss": 1.2794,
|
14135 |
+
"step": 100850
|
14136 |
+
},
|
14137 |
+
{
|
14138 |
+
"epoch": 3.012569790702535,
|
14139 |
+
"grad_norm": 4.2073283195495605,
|
14140 |
+
"learning_rate": 0.0001711729781313036,
|
14141 |
+
"loss": 1.2108,
|
14142 |
+
"step": 100900
|
14143 |
+
},
|
14144 |
+
{
|
14145 |
+
"epoch": 3.0140626399546173,
|
14146 |
+
"grad_norm": 4.649296760559082,
|
14147 |
+
"learning_rate": 0.00017115869233538477,
|
14148 |
+
"loss": 1.2171,
|
14149 |
+
"step": 100950
|
14150 |
+
},
|
14151 |
+
{
|
14152 |
+
"epoch": 3.0155554892067,
|
14153 |
+
"grad_norm": 5.268732070922852,
|
14154 |
+
"learning_rate": 0.00017114440653946596,
|
14155 |
+
"loss": 1.2566,
|
14156 |
+
"step": 101000
|
14157 |
+
},
|
14158 |
+
{
|
14159 |
+
"epoch": 3.0170483384587823,
|
14160 |
+
"grad_norm": 4.506618976593018,
|
14161 |
+
"learning_rate": 0.0001711301207435471,
|
14162 |
+
"loss": 1.1528,
|
14163 |
+
"step": 101050
|
14164 |
+
},
|
14165 |
+
{
|
14166 |
+
"epoch": 3.018541187710865,
|
14167 |
+
"grad_norm": 5.6522016525268555,
|
14168 |
+
"learning_rate": 0.0001711158349476283,
|
14169 |
+
"loss": 1.2399,
|
14170 |
+
"step": 101100
|
14171 |
+
},
|
14172 |
+
{
|
14173 |
+
"epoch": 3.0200340369629477,
|
14174 |
+
"grad_norm": 4.122422695159912,
|
14175 |
+
"learning_rate": 0.00017110154915170943,
|
14176 |
+
"loss": 1.2239,
|
14177 |
+
"step": 101150
|
14178 |
+
},
|
14179 |
+
{
|
14180 |
+
"epoch": 3.02152688621503,
|
14181 |
+
"grad_norm": 3.759425401687622,
|
14182 |
+
"learning_rate": 0.00017108726335579062,
|
14183 |
+
"loss": 1.2463,
|
14184 |
+
"step": 101200
|
14185 |
+
},
|
14186 |
+
{
|
14187 |
+
"epoch": 3.0230197354671127,
|
14188 |
+
"grad_norm": 3.6985554695129395,
|
14189 |
+
"learning_rate": 0.00017107297755987176,
|
14190 |
+
"loss": 1.1758,
|
14191 |
+
"step": 101250
|
14192 |
+
},
|
14193 |
+
{
|
14194 |
+
"epoch": 3.024512584719195,
|
14195 |
+
"grad_norm": 4.267302513122559,
|
14196 |
+
"learning_rate": 0.00017105869176395295,
|
14197 |
+
"loss": 1.1626,
|
14198 |
+
"step": 101300
|
14199 |
+
},
|
14200 |
+
{
|
14201 |
+
"epoch": 3.0260054339712776,
|
14202 |
+
"grad_norm": 4.210064888000488,
|
14203 |
+
"learning_rate": 0.00017104440596803412,
|
14204 |
+
"loss": 1.1989,
|
14205 |
+
"step": 101350
|
14206 |
+
},
|
14207 |
+
{
|
14208 |
+
"epoch": 3.02749828322336,
|
14209 |
+
"grad_norm": 4.68282413482666,
|
14210 |
+
"learning_rate": 0.00017103012017211528,
|
14211 |
+
"loss": 1.1906,
|
14212 |
+
"step": 101400
|
14213 |
+
},
|
14214 |
+
{
|
14215 |
+
"epoch": 3.0289911324754426,
|
14216 |
+
"grad_norm": 4.420328617095947,
|
14217 |
+
"learning_rate": 0.00017101583437619645,
|
14218 |
+
"loss": 1.2236,
|
14219 |
+
"step": 101450
|
14220 |
+
},
|
14221 |
+
{
|
14222 |
+
"epoch": 3.0304839817275253,
|
14223 |
+
"grad_norm": 4.2961931228637695,
|
14224 |
+
"learning_rate": 0.0001710015485802776,
|
14225 |
+
"loss": 1.159,
|
14226 |
+
"step": 101500
|
14227 |
+
},
|
14228 |
+
{
|
14229 |
+
"epoch": 3.0319768309796076,
|
14230 |
+
"grad_norm": 5.494664669036865,
|
14231 |
+
"learning_rate": 0.00017098726278435878,
|
14232 |
+
"loss": 1.2286,
|
14233 |
+
"step": 101550
|
14234 |
+
},
|
14235 |
+
{
|
14236 |
+
"epoch": 3.0334696802316903,
|
14237 |
+
"grad_norm": 4.834953308105469,
|
14238 |
+
"learning_rate": 0.00017097297698843994,
|
14239 |
+
"loss": 1.1724,
|
14240 |
+
"step": 101600
|
14241 |
+
},
|
14242 |
+
{
|
14243 |
+
"epoch": 3.0349625294837725,
|
14244 |
+
"grad_norm": 4.731471538543701,
|
14245 |
+
"learning_rate": 0.0001709586911925211,
|
14246 |
+
"loss": 1.2089,
|
14247 |
+
"step": 101650
|
14248 |
+
},
|
14249 |
+
{
|
14250 |
+
"epoch": 3.0364553787358552,
|
14251 |
+
"grad_norm": 3.613417863845825,
|
14252 |
+
"learning_rate": 0.00017094440539660227,
|
14253 |
+
"loss": 1.2864,
|
14254 |
+
"step": 101700
|
14255 |
+
},
|
14256 |
+
{
|
14257 |
+
"epoch": 3.037948227987938,
|
14258 |
+
"grad_norm": 4.939467430114746,
|
14259 |
+
"learning_rate": 0.00017093011960068343,
|
14260 |
+
"loss": 1.2509,
|
14261 |
+
"step": 101750
|
14262 |
+
},
|
14263 |
+
{
|
14264 |
+
"epoch": 3.03944107724002,
|
14265 |
+
"grad_norm": 4.377845287322998,
|
14266 |
+
"learning_rate": 0.00017091583380476463,
|
14267 |
+
"loss": 1.2377,
|
14268 |
+
"step": 101800
|
14269 |
+
},
|
14270 |
+
{
|
14271 |
+
"epoch": 3.040933926492103,
|
14272 |
+
"grad_norm": 5.249380111694336,
|
14273 |
+
"learning_rate": 0.00017090154800884576,
|
14274 |
+
"loss": 1.1936,
|
14275 |
+
"step": 101850
|
14276 |
+
},
|
14277 |
+
{
|
14278 |
+
"epoch": 3.042426775744185,
|
14279 |
+
"grad_norm": 5.340421676635742,
|
14280 |
+
"learning_rate": 0.00017088726221292695,
|
14281 |
+
"loss": 1.2575,
|
14282 |
+
"step": 101900
|
14283 |
+
},
|
14284 |
+
{
|
14285 |
+
"epoch": 3.043919624996268,
|
14286 |
+
"grad_norm": 4.105010986328125,
|
14287 |
+
"learning_rate": 0.0001708729764170081,
|
14288 |
+
"loss": 1.1913,
|
14289 |
+
"step": 101950
|
14290 |
+
},
|
14291 |
+
{
|
14292 |
+
"epoch": 3.0454124742483506,
|
14293 |
+
"grad_norm": 4.164525032043457,
|
14294 |
+
"learning_rate": 0.00017085869062108928,
|
14295 |
+
"loss": 1.2782,
|
14296 |
+
"step": 102000
|
14297 |
+
},
|
14298 |
+
{
|
14299 |
+
"epoch": 3.046905323500433,
|
14300 |
+
"grad_norm": 5.213939666748047,
|
14301 |
+
"learning_rate": 0.00017084440482517042,
|
14302 |
+
"loss": 1.2888,
|
14303 |
+
"step": 102050
|
14304 |
+
},
|
14305 |
+
{
|
14306 |
+
"epoch": 3.0483981727525156,
|
14307 |
+
"grad_norm": 6.272274494171143,
|
14308 |
+
"learning_rate": 0.0001708301190292516,
|
14309 |
+
"loss": 1.1914,
|
14310 |
+
"step": 102100
|
14311 |
+
},
|
14312 |
+
{
|
14313 |
+
"epoch": 3.049891022004598,
|
14314 |
+
"grad_norm": 4.293304443359375,
|
14315 |
+
"learning_rate": 0.00017081583323333278,
|
14316 |
+
"loss": 1.2037,
|
14317 |
+
"step": 102150
|
14318 |
+
},
|
14319 |
+
{
|
14320 |
+
"epoch": 3.0513838712566805,
|
14321 |
+
"grad_norm": 4.839235305786133,
|
14322 |
+
"learning_rate": 0.00017080154743741394,
|
14323 |
+
"loss": 1.2241,
|
14324 |
+
"step": 102200
|
14325 |
+
},
|
14326 |
+
{
|
14327 |
+
"epoch": 3.0528767205087632,
|
14328 |
+
"grad_norm": 5.0241498947143555,
|
14329 |
+
"learning_rate": 0.0001707872616414951,
|
14330 |
+
"loss": 1.2341,
|
14331 |
+
"step": 102250
|
14332 |
+
},
|
14333 |
+
{
|
14334 |
+
"epoch": 3.0543695697608455,
|
14335 |
+
"grad_norm": 6.890449047088623,
|
14336 |
+
"learning_rate": 0.00017077297584557627,
|
14337 |
+
"loss": 1.2411,
|
14338 |
+
"step": 102300
|
14339 |
+
},
|
14340 |
+
{
|
14341 |
+
"epoch": 3.055862419012928,
|
14342 |
+
"grad_norm": 5.459249496459961,
|
14343 |
+
"learning_rate": 0.00017075869004965744,
|
14344 |
+
"loss": 1.2553,
|
14345 |
+
"step": 102350
|
14346 |
+
},
|
14347 |
+
{
|
14348 |
+
"epoch": 3.0573552682650105,
|
14349 |
+
"grad_norm": 4.772298336029053,
|
14350 |
+
"learning_rate": 0.0001707444042537386,
|
14351 |
+
"loss": 1.1836,
|
14352 |
+
"step": 102400
|
14353 |
+
},
|
14354 |
+
{
|
14355 |
+
"epoch": 3.058848117517093,
|
14356 |
+
"grad_norm": 4.730257511138916,
|
14357 |
+
"learning_rate": 0.00017073011845781977,
|
14358 |
+
"loss": 1.2091,
|
14359 |
+
"step": 102450
|
14360 |
+
},
|
14361 |
+
{
|
14362 |
+
"epoch": 3.0603409667691754,
|
14363 |
+
"grad_norm": 4.203242301940918,
|
14364 |
+
"learning_rate": 0.00017071583266190093,
|
14365 |
+
"loss": 1.2106,
|
14366 |
+
"step": 102500
|
14367 |
+
},
|
14368 |
+
{
|
14369 |
+
"epoch": 3.061833816021258,
|
14370 |
+
"grad_norm": 5.499764442443848,
|
14371 |
+
"learning_rate": 0.0001707015468659821,
|
14372 |
+
"loss": 1.2492,
|
14373 |
+
"step": 102550
|
14374 |
+
},
|
14375 |
+
{
|
14376 |
+
"epoch": 3.063326665273341,
|
14377 |
+
"grad_norm": 4.593760967254639,
|
14378 |
+
"learning_rate": 0.0001706872610700633,
|
14379 |
+
"loss": 1.2849,
|
14380 |
+
"step": 102600
|
14381 |
+
},
|
14382 |
+
{
|
14383 |
+
"epoch": 3.064819514525423,
|
14384 |
+
"grad_norm": 4.561386585235596,
|
14385 |
+
"learning_rate": 0.00017067297527414442,
|
14386 |
+
"loss": 1.2404,
|
14387 |
+
"step": 102650
|
14388 |
+
},
|
14389 |
+
{
|
14390 |
+
"epoch": 3.066312363777506,
|
14391 |
+
"grad_norm": 5.402337551116943,
|
14392 |
+
"learning_rate": 0.00017065868947822562,
|
14393 |
+
"loss": 1.1952,
|
14394 |
+
"step": 102700
|
14395 |
+
},
|
14396 |
+
{
|
14397 |
+
"epoch": 3.067805213029588,
|
14398 |
+
"grad_norm": 5.212750434875488,
|
14399 |
+
"learning_rate": 0.00017064440368230675,
|
14400 |
+
"loss": 1.2505,
|
14401 |
+
"step": 102750
|
14402 |
+
},
|
14403 |
+
{
|
14404 |
+
"epoch": 3.069298062281671,
|
14405 |
+
"grad_norm": 5.780417442321777,
|
14406 |
+
"learning_rate": 0.00017063011788638795,
|
14407 |
+
"loss": 1.2653,
|
14408 |
+
"step": 102800
|
14409 |
+
},
|
14410 |
+
{
|
14411 |
+
"epoch": 3.0707909115337535,
|
14412 |
+
"grad_norm": 5.937560081481934,
|
14413 |
+
"learning_rate": 0.00017061583209046908,
|
14414 |
+
"loss": 1.2068,
|
14415 |
+
"step": 102850
|
14416 |
+
},
|
14417 |
+
{
|
14418 |
+
"epoch": 3.0722837607858358,
|
14419 |
+
"grad_norm": 4.921445369720459,
|
14420 |
+
"learning_rate": 0.00017060154629455025,
|
14421 |
+
"loss": 1.1634,
|
14422 |
+
"step": 102900
|
14423 |
+
},
|
14424 |
+
{
|
14425 |
+
"epoch": 3.0737766100379185,
|
14426 |
+
"grad_norm": 5.346931457519531,
|
14427 |
+
"learning_rate": 0.00017058726049863144,
|
14428 |
+
"loss": 1.2136,
|
14429 |
+
"step": 102950
|
14430 |
+
},
|
14431 |
+
{
|
14432 |
+
"epoch": 3.0752694592900007,
|
14433 |
+
"grad_norm": 4.446613788604736,
|
14434 |
+
"learning_rate": 0.00017057297470271258,
|
14435 |
+
"loss": 1.1925,
|
14436 |
+
"step": 103000
|
14437 |
+
},
|
14438 |
+
{
|
14439 |
+
"epoch": 3.0767623085420834,
|
14440 |
+
"grad_norm": 4.870328903198242,
|
14441 |
+
"learning_rate": 0.00017055868890679377,
|
14442 |
+
"loss": 1.1855,
|
14443 |
+
"step": 103050
|
14444 |
+
},
|
14445 |
+
{
|
14446 |
+
"epoch": 3.078255157794166,
|
14447 |
+
"grad_norm": 5.902019500732422,
|
14448 |
+
"learning_rate": 0.0001705444031108749,
|
14449 |
+
"loss": 1.2081,
|
14450 |
+
"step": 103100
|
14451 |
+
},
|
14452 |
+
{
|
14453 |
+
"epoch": 3.0797480070462484,
|
14454 |
+
"grad_norm": 4.221609592437744,
|
14455 |
+
"learning_rate": 0.0001705301173149561,
|
14456 |
+
"loss": 1.2125,
|
14457 |
+
"step": 103150
|
14458 |
+
},
|
14459 |
+
{
|
14460 |
+
"epoch": 3.081240856298331,
|
14461 |
+
"grad_norm": 4.263118267059326,
|
14462 |
+
"learning_rate": 0.00017051583151903726,
|
14463 |
+
"loss": 1.2164,
|
14464 |
+
"step": 103200
|
14465 |
+
},
|
14466 |
+
{
|
14467 |
+
"epoch": 3.0827337055504134,
|
14468 |
+
"grad_norm": 7.199416160583496,
|
14469 |
+
"learning_rate": 0.00017050154572311843,
|
14470 |
+
"loss": 1.2191,
|
14471 |
+
"step": 103250
|
14472 |
+
},
|
14473 |
+
{
|
14474 |
+
"epoch": 3.084226554802496,
|
14475 |
+
"grad_norm": 5.3002119064331055,
|
14476 |
+
"learning_rate": 0.0001704872599271996,
|
14477 |
+
"loss": 1.2828,
|
14478 |
+
"step": 103300
|
14479 |
+
},
|
14480 |
+
{
|
14481 |
+
"epoch": 3.0857194040545783,
|
14482 |
+
"grad_norm": 4.3020405769348145,
|
14483 |
+
"learning_rate": 0.00017047297413128076,
|
14484 |
+
"loss": 1.2341,
|
14485 |
+
"step": 103350
|
14486 |
+
},
|
14487 |
+
{
|
14488 |
+
"epoch": 3.087212253306661,
|
14489 |
+
"grad_norm": 5.1998443603515625,
|
14490 |
+
"learning_rate": 0.00017045868833536192,
|
14491 |
+
"loss": 1.2271,
|
14492 |
+
"step": 103400
|
14493 |
+
},
|
14494 |
+
{
|
14495 |
+
"epoch": 3.0887051025587438,
|
14496 |
+
"grad_norm": 4.382910251617432,
|
14497 |
+
"learning_rate": 0.00017044440253944309,
|
14498 |
+
"loss": 1.1994,
|
14499 |
+
"step": 103450
|
14500 |
+
},
|
14501 |
+
{
|
14502 |
+
"epoch": 3.090197951810826,
|
14503 |
+
"grad_norm": 5.094580173492432,
|
14504 |
+
"learning_rate": 0.00017043011674352425,
|
14505 |
+
"loss": 1.2557,
|
14506 |
+
"step": 103500
|
14507 |
+
},
|
14508 |
+
{
|
14509 |
+
"epoch": 3.0916908010629087,
|
14510 |
+
"grad_norm": 3.9512152671813965,
|
14511 |
+
"learning_rate": 0.00017041583094760542,
|
14512 |
+
"loss": 1.283,
|
14513 |
+
"step": 103550
|
14514 |
+
},
|
14515 |
+
{
|
14516 |
+
"epoch": 3.093183650314991,
|
14517 |
+
"grad_norm": 6.366419792175293,
|
14518 |
+
"learning_rate": 0.00017040154515168658,
|
14519 |
+
"loss": 1.2327,
|
14520 |
+
"step": 103600
|
14521 |
+
},
|
14522 |
+
{
|
14523 |
+
"epoch": 3.0946764995670737,
|
14524 |
+
"grad_norm": 3.0555920600891113,
|
14525 |
+
"learning_rate": 0.00017038725935576777,
|
14526 |
+
"loss": 1.1865,
|
14527 |
+
"step": 103650
|
14528 |
+
},
|
14529 |
+
{
|
14530 |
+
"epoch": 3.0961693488191564,
|
14531 |
+
"grad_norm": 5.861559867858887,
|
14532 |
+
"learning_rate": 0.0001703729735598489,
|
14533 |
+
"loss": 1.2394,
|
14534 |
+
"step": 103700
|
14535 |
+
},
|
14536 |
+
{
|
14537 |
+
"epoch": 3.0976621980712387,
|
14538 |
+
"grad_norm": 4.790325164794922,
|
14539 |
+
"learning_rate": 0.0001703586877639301,
|
14540 |
+
"loss": 1.2256,
|
14541 |
+
"step": 103750
|
14542 |
+
},
|
14543 |
+
{
|
14544 |
+
"epoch": 3.0991550473233214,
|
14545 |
+
"grad_norm": 4.111683368682861,
|
14546 |
+
"learning_rate": 0.00017034440196801124,
|
14547 |
+
"loss": 1.2228,
|
14548 |
+
"step": 103800
|
14549 |
+
},
|
14550 |
+
{
|
14551 |
+
"epoch": 3.1006478965754036,
|
14552 |
+
"grad_norm": 4.152289390563965,
|
14553 |
+
"learning_rate": 0.00017033011617209243,
|
14554 |
+
"loss": 1.2547,
|
14555 |
+
"step": 103850
|
14556 |
+
},
|
14557 |
+
{
|
14558 |
+
"epoch": 3.1021407458274863,
|
14559 |
+
"grad_norm": 5.741196155548096,
|
14560 |
+
"learning_rate": 0.00017031583037617357,
|
14561 |
+
"loss": 1.2049,
|
14562 |
+
"step": 103900
|
14563 |
+
},
|
14564 |
+
{
|
14565 |
+
"epoch": 3.103633595079569,
|
14566 |
+
"grad_norm": 6.526316165924072,
|
14567 |
+
"learning_rate": 0.00017030154458025476,
|
14568 |
+
"loss": 1.2845,
|
14569 |
+
"step": 103950
|
14570 |
+
},
|
14571 |
+
{
|
14572 |
+
"epoch": 3.1051264443316513,
|
14573 |
+
"grad_norm": 4.736927509307861,
|
14574 |
+
"learning_rate": 0.00017028725878433592,
|
14575 |
+
"loss": 1.2777,
|
14576 |
+
"step": 104000
|
14577 |
+
},
|
14578 |
+
{
|
14579 |
+
"epoch": 3.106619293583734,
|
14580 |
+
"grad_norm": 4.231018543243408,
|
14581 |
+
"learning_rate": 0.0001702729729884171,
|
14582 |
+
"loss": 1.2387,
|
14583 |
+
"step": 104050
|
14584 |
+
},
|
14585 |
+
{
|
14586 |
+
"epoch": 3.1081121428358163,
|
14587 |
+
"grad_norm": 4.244640350341797,
|
14588 |
+
"learning_rate": 0.00017025868719249825,
|
14589 |
+
"loss": 1.1534,
|
14590 |
+
"step": 104100
|
14591 |
+
},
|
14592 |
+
{
|
14593 |
+
"epoch": 3.109604992087899,
|
14594 |
+
"grad_norm": 4.443108558654785,
|
14595 |
+
"learning_rate": 0.00017024440139657942,
|
14596 |
+
"loss": 1.198,
|
14597 |
+
"step": 104150
|
14598 |
+
},
|
14599 |
+
{
|
14600 |
+
"epoch": 3.1110978413399817,
|
14601 |
+
"grad_norm": 4.030060768127441,
|
14602 |
+
"learning_rate": 0.00017023011560066058,
|
14603 |
+
"loss": 1.2005,
|
14604 |
+
"step": 104200
|
14605 |
+
},
|
14606 |
+
{
|
14607 |
+
"epoch": 3.112590690592064,
|
14608 |
+
"grad_norm": 4.423096179962158,
|
14609 |
+
"learning_rate": 0.00017021582980474175,
|
14610 |
+
"loss": 1.2502,
|
14611 |
+
"step": 104250
|
14612 |
+
},
|
14613 |
+
{
|
14614 |
+
"epoch": 3.1140835398441467,
|
14615 |
+
"grad_norm": 4.914687156677246,
|
14616 |
+
"learning_rate": 0.0001702015440088229,
|
14617 |
+
"loss": 1.2291,
|
14618 |
+
"step": 104300
|
14619 |
+
},
|
14620 |
+
{
|
14621 |
+
"epoch": 3.115576389096229,
|
14622 |
+
"grad_norm": 6.267758369445801,
|
14623 |
+
"learning_rate": 0.00017018725821290408,
|
14624 |
+
"loss": 1.2694,
|
14625 |
+
"step": 104350
|
14626 |
+
},
|
14627 |
+
{
|
14628 |
+
"epoch": 3.1170692383483116,
|
14629 |
+
"grad_norm": 4.870142936706543,
|
14630 |
+
"learning_rate": 0.00017017297241698524,
|
14631 |
+
"loss": 1.1742,
|
14632 |
+
"step": 104400
|
14633 |
+
},
|
14634 |
+
{
|
14635 |
+
"epoch": 3.1185620876003943,
|
14636 |
+
"grad_norm": 4.136995792388916,
|
14637 |
+
"learning_rate": 0.00017015868662106643,
|
14638 |
+
"loss": 1.2683,
|
14639 |
+
"step": 104450
|
14640 |
+
},
|
14641 |
+
{
|
14642 |
+
"epoch": 3.1200549368524766,
|
14643 |
+
"grad_norm": 3.877889633178711,
|
14644 |
+
"learning_rate": 0.00017014440082514757,
|
14645 |
+
"loss": 1.2226,
|
14646 |
+
"step": 104500
|
14647 |
+
},
|
14648 |
+
{
|
14649 |
+
"epoch": 3.1215477861045593,
|
14650 |
+
"grad_norm": 5.544692039489746,
|
14651 |
+
"learning_rate": 0.00017013011502922876,
|
14652 |
+
"loss": 1.2163,
|
14653 |
+
"step": 104550
|
14654 |
+
},
|
14655 |
+
{
|
14656 |
+
"epoch": 3.1230406353566416,
|
14657 |
+
"grad_norm": 4.616161346435547,
|
14658 |
+
"learning_rate": 0.0001701158292333099,
|
14659 |
+
"loss": 1.2569,
|
14660 |
+
"step": 104600
|
14661 |
+
},
|
14662 |
+
{
|
14663 |
+
"epoch": 3.1245334846087243,
|
14664 |
+
"grad_norm": 4.393960475921631,
|
14665 |
+
"learning_rate": 0.0001701015434373911,
|
14666 |
+
"loss": 1.277,
|
14667 |
+
"step": 104650
|
14668 |
+
},
|
14669 |
+
{
|
14670 |
+
"epoch": 3.126026333860807,
|
14671 |
+
"grad_norm": 5.330219268798828,
|
14672 |
+
"learning_rate": 0.00017008725764147223,
|
14673 |
+
"loss": 1.1929,
|
14674 |
+
"step": 104700
|
14675 |
+
},
|
14676 |
+
{
|
14677 |
+
"epoch": 3.1275191831128892,
|
14678 |
+
"grad_norm": 5.1908979415893555,
|
14679 |
+
"learning_rate": 0.00017007297184555342,
|
14680 |
+
"loss": 1.2356,
|
14681 |
+
"step": 104750
|
14682 |
+
},
|
14683 |
+
{
|
14684 |
+
"epoch": 3.129012032364972,
|
14685 |
+
"grad_norm": 3.941892623901367,
|
14686 |
+
"learning_rate": 0.00017005868604963459,
|
14687 |
+
"loss": 1.1774,
|
14688 |
+
"step": 104800
|
14689 |
+
},
|
14690 |
+
{
|
14691 |
+
"epoch": 3.130504881617054,
|
14692 |
+
"grad_norm": 5.030502796173096,
|
14693 |
+
"learning_rate": 0.00017004440025371575,
|
14694 |
+
"loss": 1.2153,
|
14695 |
+
"step": 104850
|
14696 |
+
},
|
14697 |
+
{
|
14698 |
+
"epoch": 3.131997730869137,
|
14699 |
+
"grad_norm": 3.437596082687378,
|
14700 |
+
"learning_rate": 0.00017003011445779692,
|
14701 |
+
"loss": 1.2438,
|
14702 |
+
"step": 104900
|
14703 |
+
},
|
14704 |
+
{
|
14705 |
+
"epoch": 3.133490580121219,
|
14706 |
+
"grad_norm": 4.600546836853027,
|
14707 |
+
"learning_rate": 0.00017001582866187808,
|
14708 |
+
"loss": 1.2477,
|
14709 |
+
"step": 104950
|
14710 |
+
},
|
14711 |
+
{
|
14712 |
+
"epoch": 3.134983429373302,
|
14713 |
+
"grad_norm": 7.366837501525879,
|
14714 |
+
"learning_rate": 0.00017000154286595924,
|
14715 |
+
"loss": 1.2411,
|
14716 |
+
"step": 105000
|
14717 |
+
},
|
14718 |
+
{
|
14719 |
+
"epoch": 3.1364762786253846,
|
14720 |
+
"grad_norm": 4.779984474182129,
|
14721 |
+
"learning_rate": 0.0001699872570700404,
|
14722 |
+
"loss": 1.1974,
|
14723 |
+
"step": 105050
|
14724 |
+
},
|
14725 |
+
{
|
14726 |
+
"epoch": 3.137969127877467,
|
14727 |
+
"grad_norm": 3.6350603103637695,
|
14728 |
+
"learning_rate": 0.00016997297127412157,
|
14729 |
+
"loss": 1.2811,
|
14730 |
+
"step": 105100
|
14731 |
+
},
|
14732 |
+
{
|
14733 |
+
"epoch": 3.1394619771295496,
|
14734 |
+
"grad_norm": 3.3783535957336426,
|
14735 |
+
"learning_rate": 0.00016995868547820274,
|
14736 |
+
"loss": 1.2377,
|
14737 |
+
"step": 105150
|
14738 |
+
},
|
14739 |
+
{
|
14740 |
+
"epoch": 3.140954826381632,
|
14741 |
+
"grad_norm": 5.765346527099609,
|
14742 |
+
"learning_rate": 0.0001699443996822839,
|
14743 |
+
"loss": 1.2648,
|
14744 |
+
"step": 105200
|
14745 |
+
},
|
14746 |
+
{
|
14747 |
+
"epoch": 3.1424476756337145,
|
14748 |
+
"grad_norm": 6.171380996704102,
|
14749 |
+
"learning_rate": 0.0001699301138863651,
|
14750 |
+
"loss": 1.2653,
|
14751 |
+
"step": 105250
|
14752 |
+
},
|
14753 |
+
{
|
14754 |
+
"epoch": 3.1439405248857972,
|
14755 |
+
"grad_norm": 4.658510684967041,
|
14756 |
+
"learning_rate": 0.00016991582809044623,
|
14757 |
+
"loss": 1.2838,
|
14758 |
+
"step": 105300
|
14759 |
+
},
|
14760 |
+
{
|
14761 |
+
"epoch": 3.1454333741378795,
|
14762 |
+
"grad_norm": 4.044935703277588,
|
14763 |
+
"learning_rate": 0.00016990154229452742,
|
14764 |
+
"loss": 1.2807,
|
14765 |
+
"step": 105350
|
14766 |
+
},
|
14767 |
+
{
|
14768 |
+
"epoch": 3.146926223389962,
|
14769 |
+
"grad_norm": 3.6364893913269043,
|
14770 |
+
"learning_rate": 0.00016988725649860856,
|
14771 |
+
"loss": 1.2631,
|
14772 |
+
"step": 105400
|
14773 |
+
},
|
14774 |
+
{
|
14775 |
+
"epoch": 3.1484190726420445,
|
14776 |
+
"grad_norm": 4.018857955932617,
|
14777 |
+
"learning_rate": 0.00016987297070268975,
|
14778 |
+
"loss": 1.2301,
|
14779 |
+
"step": 105450
|
14780 |
+
},
|
14781 |
+
{
|
14782 |
+
"epoch": 3.149911921894127,
|
14783 |
+
"grad_norm": 4.4695725440979,
|
14784 |
+
"learning_rate": 0.0001698586849067709,
|
14785 |
+
"loss": 1.2299,
|
14786 |
+
"step": 105500
|
14787 |
+
},
|
14788 |
+
{
|
14789 |
+
"epoch": 3.1514047711462094,
|
14790 |
+
"grad_norm": 3.9617760181427,
|
14791 |
+
"learning_rate": 0.00016984439911085206,
|
14792 |
+
"loss": 1.2364,
|
14793 |
+
"step": 105550
|
14794 |
+
},
|
14795 |
+
{
|
14796 |
+
"epoch": 3.152897620398292,
|
14797 |
+
"grad_norm": 4.522684574127197,
|
14798 |
+
"learning_rate": 0.00016983011331493325,
|
14799 |
+
"loss": 1.2737,
|
14800 |
+
"step": 105600
|
14801 |
+
},
|
14802 |
+
{
|
14803 |
+
"epoch": 3.154390469650375,
|
14804 |
+
"grad_norm": 4.221574783325195,
|
14805 |
+
"learning_rate": 0.00016981582751901439,
|
14806 |
+
"loss": 1.2214,
|
14807 |
+
"step": 105650
|
14808 |
+
},
|
14809 |
+
{
|
14810 |
+
"epoch": 3.155883318902457,
|
14811 |
+
"grad_norm": 4.848055839538574,
|
14812 |
+
"learning_rate": 0.00016980154172309558,
|
14813 |
+
"loss": 1.1756,
|
14814 |
+
"step": 105700
|
14815 |
+
},
|
14816 |
+
{
|
14817 |
+
"epoch": 3.15737616815454,
|
14818 |
+
"grad_norm": 4.364899635314941,
|
14819 |
+
"learning_rate": 0.00016978725592717671,
|
14820 |
+
"loss": 1.2188,
|
14821 |
+
"step": 105750
|
14822 |
+
},
|
14823 |
+
{
|
14824 |
+
"epoch": 3.158869017406622,
|
14825 |
+
"grad_norm": 3.952366352081299,
|
14826 |
+
"learning_rate": 0.0001697729701312579,
|
14827 |
+
"loss": 1.2422,
|
14828 |
+
"step": 105800
|
14829 |
+
},
|
14830 |
+
{
|
14831 |
+
"epoch": 3.1603618666587048,
|
14832 |
+
"grad_norm": 4.575440883636475,
|
14833 |
+
"learning_rate": 0.00016975868433533907,
|
14834 |
+
"loss": 1.2711,
|
14835 |
+
"step": 105850
|
14836 |
+
},
|
14837 |
+
{
|
14838 |
+
"epoch": 3.1618547159107875,
|
14839 |
+
"grad_norm": 5.710911273956299,
|
14840 |
+
"learning_rate": 0.00016974439853942024,
|
14841 |
+
"loss": 1.1996,
|
14842 |
+
"step": 105900
|
14843 |
+
},
|
14844 |
+
{
|
14845 |
+
"epoch": 3.1633475651628697,
|
14846 |
+
"grad_norm": 3.885944128036499,
|
14847 |
+
"learning_rate": 0.0001697301127435014,
|
14848 |
+
"loss": 1.2633,
|
14849 |
+
"step": 105950
|
14850 |
+
},
|
14851 |
+
{
|
14852 |
+
"epoch": 3.1648404144149525,
|
14853 |
+
"grad_norm": 4.504218578338623,
|
14854 |
+
"learning_rate": 0.00016971582694758256,
|
14855 |
+
"loss": 1.2028,
|
14856 |
+
"step": 106000
|
14857 |
+
},
|
14858 |
+
{
|
14859 |
+
"epoch": 3.1663332636670347,
|
14860 |
+
"grad_norm": 5.381597995758057,
|
14861 |
+
"learning_rate": 0.00016970154115166373,
|
14862 |
+
"loss": 1.2445,
|
14863 |
+
"step": 106050
|
14864 |
+
},
|
14865 |
+
{
|
14866 |
+
"epoch": 3.1678261129191174,
|
14867 |
+
"grad_norm": 3.7343199253082275,
|
14868 |
+
"learning_rate": 0.0001696872553557449,
|
14869 |
+
"loss": 1.2553,
|
14870 |
+
"step": 106100
|
14871 |
+
},
|
14872 |
+
{
|
14873 |
+
"epoch": 3.1693189621712,
|
14874 |
+
"grad_norm": 4.142834186553955,
|
14875 |
+
"learning_rate": 0.00016967296955982606,
|
14876 |
+
"loss": 1.2319,
|
14877 |
+
"step": 106150
|
14878 |
+
},
|
14879 |
+
{
|
14880 |
+
"epoch": 3.1708118114232824,
|
14881 |
+
"grad_norm": 3.856889486312866,
|
14882 |
+
"learning_rate": 0.00016965868376390722,
|
14883 |
+
"loss": 1.2911,
|
14884 |
+
"step": 106200
|
14885 |
+
},
|
14886 |
+
{
|
14887 |
+
"epoch": 3.172304660675365,
|
14888 |
+
"grad_norm": 4.8363938331604,
|
14889 |
+
"learning_rate": 0.0001696443979679884,
|
14890 |
+
"loss": 1.2712,
|
14891 |
+
"step": 106250
|
14892 |
+
},
|
14893 |
+
{
|
14894 |
+
"epoch": 3.1737975099274474,
|
14895 |
+
"grad_norm": 5.556827068328857,
|
14896 |
+
"learning_rate": 0.00016963011217206955,
|
14897 |
+
"loss": 1.2295,
|
14898 |
+
"step": 106300
|
14899 |
+
},
|
14900 |
+
{
|
14901 |
+
"epoch": 3.17529035917953,
|
14902 |
+
"grad_norm": 4.371210098266602,
|
14903 |
+
"learning_rate": 0.00016961582637615072,
|
14904 |
+
"loss": 1.2745,
|
14905 |
+
"step": 106350
|
14906 |
+
},
|
14907 |
+
{
|
14908 |
+
"epoch": 3.1767832084316128,
|
14909 |
+
"grad_norm": 4.789484024047852,
|
14910 |
+
"learning_rate": 0.0001696015405802319,
|
14911 |
+
"loss": 1.232,
|
14912 |
+
"step": 106400
|
14913 |
+
},
|
14914 |
+
{
|
14915 |
+
"epoch": 3.178276057683695,
|
14916 |
+
"grad_norm": 3.9675486087799072,
|
14917 |
+
"learning_rate": 0.00016958725478431305,
|
14918 |
+
"loss": 1.2131,
|
14919 |
+
"step": 106450
|
14920 |
+
},
|
14921 |
+
{
|
14922 |
+
"epoch": 3.1797689069357777,
|
14923 |
+
"grad_norm": 6.18342924118042,
|
14924 |
+
"learning_rate": 0.00016957296898839424,
|
14925 |
+
"loss": 1.2337,
|
14926 |
+
"step": 106500
|
14927 |
+
},
|
14928 |
+
{
|
14929 |
+
"epoch": 3.18126175618786,
|
14930 |
+
"grad_norm": 3.3635101318359375,
|
14931 |
+
"learning_rate": 0.00016955868319247538,
|
14932 |
+
"loss": 1.2632,
|
14933 |
+
"step": 106550
|
14934 |
+
},
|
14935 |
+
{
|
14936 |
+
"epoch": 3.1827546054399427,
|
14937 |
+
"grad_norm": 3.658357620239258,
|
14938 |
+
"learning_rate": 0.00016954439739655657,
|
14939 |
+
"loss": 1.2306,
|
14940 |
+
"step": 106600
|
14941 |
+
},
|
14942 |
+
{
|
14943 |
+
"epoch": 3.1842474546920254,
|
14944 |
+
"grad_norm": 5.325497627258301,
|
14945 |
+
"learning_rate": 0.00016953011160063773,
|
14946 |
+
"loss": 1.2381,
|
14947 |
+
"step": 106650
|
14948 |
+
},
|
14949 |
+
{
|
14950 |
+
"epoch": 3.1857403039441077,
|
14951 |
+
"grad_norm": 5.2231268882751465,
|
14952 |
+
"learning_rate": 0.0001695158258047189,
|
14953 |
+
"loss": 1.2236,
|
14954 |
+
"step": 106700
|
14955 |
+
},
|
14956 |
+
{
|
14957 |
+
"epoch": 3.1872331531961904,
|
14958 |
+
"grad_norm": 5.6110334396362305,
|
14959 |
+
"learning_rate": 0.00016950154000880006,
|
14960 |
+
"loss": 1.2479,
|
14961 |
+
"step": 106750
|
14962 |
+
},
|
14963 |
+
{
|
14964 |
+
"epoch": 3.1887260024482726,
|
14965 |
+
"grad_norm": 4.795280933380127,
|
14966 |
+
"learning_rate": 0.00016948725421288123,
|
14967 |
+
"loss": 1.1846,
|
14968 |
+
"step": 106800
|
14969 |
+
},
|
14970 |
+
{
|
14971 |
+
"epoch": 3.1902188517003554,
|
14972 |
+
"grad_norm": 5.203749179840088,
|
14973 |
+
"learning_rate": 0.0001694729684169624,
|
14974 |
+
"loss": 1.2531,
|
14975 |
+
"step": 106850
|
14976 |
+
},
|
14977 |
+
{
|
14978 |
+
"epoch": 3.191711700952438,
|
14979 |
+
"grad_norm": 6.236663818359375,
|
14980 |
+
"learning_rate": 0.00016945868262104356,
|
14981 |
+
"loss": 1.1977,
|
14982 |
+
"step": 106900
|
14983 |
+
},
|
14984 |
+
{
|
14985 |
+
"epoch": 3.1932045502045203,
|
14986 |
+
"grad_norm": 4.13303279876709,
|
14987 |
+
"learning_rate": 0.00016944439682512472,
|
14988 |
+
"loss": 1.2418,
|
14989 |
+
"step": 106950
|
14990 |
+
},
|
14991 |
+
{
|
14992 |
+
"epoch": 3.194697399456603,
|
14993 |
+
"grad_norm": 6.106944561004639,
|
14994 |
+
"learning_rate": 0.00016943011102920589,
|
14995 |
+
"loss": 1.1926,
|
14996 |
+
"step": 107000
|
14997 |
+
},
|
14998 |
+
{
|
14999 |
+
"epoch": 3.1961902487086853,
|
15000 |
+
"grad_norm": 4.1147332191467285,
|
15001 |
+
"learning_rate": 0.00016941582523328705,
|
15002 |
+
"loss": 1.2549,
|
15003 |
+
"step": 107050
|
15004 |
+
},
|
15005 |
+
{
|
15006 |
+
"epoch": 3.197683097960768,
|
15007 |
+
"grad_norm": 4.951442718505859,
|
15008 |
+
"learning_rate": 0.00016940153943736824,
|
15009 |
+
"loss": 1.2964,
|
15010 |
+
"step": 107100
|
15011 |
+
},
|
15012 |
+
{
|
15013 |
+
"epoch": 3.1991759472128503,
|
15014 |
+
"grad_norm": 6.196277141571045,
|
15015 |
+
"learning_rate": 0.00016938725364144938,
|
15016 |
+
"loss": 1.2541,
|
15017 |
+
"step": 107150
|
15018 |
+
},
|
15019 |
+
{
|
15020 |
+
"epoch": 3.200668796464933,
|
15021 |
+
"grad_norm": 5.174891948699951,
|
15022 |
+
"learning_rate": 0.00016937296784553057,
|
15023 |
+
"loss": 1.238,
|
15024 |
+
"step": 107200
|
15025 |
+
},
|
15026 |
+
{
|
15027 |
+
"epoch": 3.2021616457170157,
|
15028 |
+
"grad_norm": 6.037936687469482,
|
15029 |
+
"learning_rate": 0.0001693586820496117,
|
15030 |
+
"loss": 1.2919,
|
15031 |
+
"step": 107250
|
15032 |
+
},
|
15033 |
+
{
|
15034 |
+
"epoch": 3.203654494969098,
|
15035 |
+
"grad_norm": 4.104307651519775,
|
15036 |
+
"learning_rate": 0.0001693443962536929,
|
15037 |
+
"loss": 1.178,
|
15038 |
+
"step": 107300
|
15039 |
+
},
|
15040 |
+
{
|
15041 |
+
"epoch": 3.2051473442211806,
|
15042 |
+
"grad_norm": 4.1462273597717285,
|
15043 |
+
"learning_rate": 0.00016933011045777404,
|
15044 |
+
"loss": 1.2468,
|
15045 |
+
"step": 107350
|
15046 |
+
},
|
15047 |
+
{
|
15048 |
+
"epoch": 3.206640193473263,
|
15049 |
+
"grad_norm": 3.701998233795166,
|
15050 |
+
"learning_rate": 0.00016931582466185523,
|
15051 |
+
"loss": 1.2262,
|
15052 |
+
"step": 107400
|
15053 |
+
},
|
15054 |
+
{
|
15055 |
+
"epoch": 3.2081330427253456,
|
15056 |
+
"grad_norm": 4.66901159286499,
|
15057 |
+
"learning_rate": 0.0001693015388659364,
|
15058 |
+
"loss": 1.2413,
|
15059 |
+
"step": 107450
|
15060 |
+
},
|
15061 |
+
{
|
15062 |
+
"epoch": 3.2096258919774283,
|
15063 |
+
"grad_norm": 5.96376371383667,
|
15064 |
+
"learning_rate": 0.00016928725307001756,
|
15065 |
+
"loss": 1.1554,
|
15066 |
+
"step": 107500
|
15067 |
+
},
|
15068 |
+
{
|
15069 |
+
"epoch": 3.2111187412295106,
|
15070 |
+
"grad_norm": 3.801471710205078,
|
15071 |
+
"learning_rate": 0.00016927296727409872,
|
15072 |
+
"loss": 1.2821,
|
15073 |
+
"step": 107550
|
15074 |
+
},
|
15075 |
+
{
|
15076 |
+
"epoch": 3.2126115904815933,
|
15077 |
+
"grad_norm": 5.026028633117676,
|
15078 |
+
"learning_rate": 0.0001692586814781799,
|
15079 |
+
"loss": 1.2307,
|
15080 |
+
"step": 107600
|
15081 |
+
},
|
15082 |
+
{
|
15083 |
+
"epoch": 3.2141044397336755,
|
15084 |
+
"grad_norm": 5.475889682769775,
|
15085 |
+
"learning_rate": 0.00016924439568226105,
|
15086 |
+
"loss": 1.2,
|
15087 |
+
"step": 107650
|
15088 |
+
},
|
15089 |
+
{
|
15090 |
+
"epoch": 3.2155972889857583,
|
15091 |
+
"grad_norm": 6.249619007110596,
|
15092 |
+
"learning_rate": 0.00016923010988634222,
|
15093 |
+
"loss": 1.2448,
|
15094 |
+
"step": 107700
|
15095 |
+
},
|
15096 |
+
{
|
15097 |
+
"epoch": 3.2170901382378405,
|
15098 |
+
"grad_norm": 4.803418159484863,
|
15099 |
+
"learning_rate": 0.00016921582409042338,
|
15100 |
+
"loss": 1.2669,
|
15101 |
+
"step": 107750
|
15102 |
+
},
|
15103 |
+
{
|
15104 |
+
"epoch": 3.218582987489923,
|
15105 |
+
"grad_norm": 3.9626855850219727,
|
15106 |
+
"learning_rate": 0.00016920153829450455,
|
15107 |
+
"loss": 1.2167,
|
15108 |
+
"step": 107800
|
15109 |
+
},
|
15110 |
+
{
|
15111 |
+
"epoch": 3.220075836742006,
|
15112 |
+
"grad_norm": 3.5219650268554688,
|
15113 |
+
"learning_rate": 0.0001691872524985857,
|
15114 |
+
"loss": 1.2554,
|
15115 |
+
"step": 107850
|
15116 |
+
},
|
15117 |
+
{
|
15118 |
+
"epoch": 3.221568685994088,
|
15119 |
+
"grad_norm": 4.510613441467285,
|
15120 |
+
"learning_rate": 0.0001691729667026669,
|
15121 |
+
"loss": 1.1923,
|
15122 |
+
"step": 107900
|
15123 |
+
},
|
15124 |
+
{
|
15125 |
+
"epoch": 3.223061535246171,
|
15126 |
+
"grad_norm": 5.354753494262695,
|
15127 |
+
"learning_rate": 0.00016915868090674804,
|
15128 |
+
"loss": 1.1883,
|
15129 |
+
"step": 107950
|
15130 |
+
},
|
15131 |
+
{
|
15132 |
+
"epoch": 3.224554384498253,
|
15133 |
+
"grad_norm": 4.148642063140869,
|
15134 |
+
"learning_rate": 0.00016914439511082923,
|
15135 |
+
"loss": 1.2497,
|
15136 |
+
"step": 108000
|
15137 |
+
},
|
15138 |
+
{
|
15139 |
+
"epoch": 3.226047233750336,
|
15140 |
+
"grad_norm": 4.1666646003723145,
|
15141 |
+
"learning_rate": 0.00016913010931491037,
|
15142 |
+
"loss": 1.1872,
|
15143 |
+
"step": 108050
|
15144 |
+
},
|
15145 |
+
{
|
15146 |
+
"epoch": 3.2275400830024186,
|
15147 |
+
"grad_norm": 4.137986183166504,
|
15148 |
+
"learning_rate": 0.00016911582351899156,
|
15149 |
+
"loss": 1.2788,
|
15150 |
+
"step": 108100
|
15151 |
+
},
|
15152 |
+
{
|
15153 |
+
"epoch": 3.229032932254501,
|
15154 |
+
"grad_norm": 3.901366710662842,
|
15155 |
+
"learning_rate": 0.0001691015377230727,
|
15156 |
+
"loss": 1.2651,
|
15157 |
+
"step": 108150
|
15158 |
+
},
|
15159 |
+
{
|
15160 |
+
"epoch": 3.2305257815065835,
|
15161 |
+
"grad_norm": 3.96573543548584,
|
15162 |
+
"learning_rate": 0.00016908725192715386,
|
15163 |
+
"loss": 1.2277,
|
15164 |
+
"step": 108200
|
15165 |
+
},
|
15166 |
+
{
|
15167 |
+
"epoch": 3.232018630758666,
|
15168 |
+
"grad_norm": 4.226304054260254,
|
15169 |
+
"learning_rate": 0.00016907296613123506,
|
15170 |
+
"loss": 1.2792,
|
15171 |
+
"step": 108250
|
15172 |
+
},
|
15173 |
+
{
|
15174 |
+
"epoch": 3.2335114800107485,
|
15175 |
+
"grad_norm": 5.907839775085449,
|
15176 |
+
"learning_rate": 0.0001690586803353162,
|
15177 |
+
"loss": 1.2123,
|
15178 |
+
"step": 108300
|
15179 |
+
},
|
15180 |
+
{
|
15181 |
+
"epoch": 3.235004329262831,
|
15182 |
+
"grad_norm": 3.7594401836395264,
|
15183 |
+
"learning_rate": 0.00016904439453939738,
|
15184 |
+
"loss": 1.1956,
|
15185 |
+
"step": 108350
|
15186 |
+
},
|
15187 |
+
{
|
15188 |
+
"epoch": 3.2364971785149135,
|
15189 |
+
"grad_norm": 3.6791820526123047,
|
15190 |
+
"learning_rate": 0.00016903010874347852,
|
15191 |
+
"loss": 1.2103,
|
15192 |
+
"step": 108400
|
15193 |
+
},
|
15194 |
+
{
|
15195 |
+
"epoch": 3.237990027766996,
|
15196 |
+
"grad_norm": 4.249289512634277,
|
15197 |
+
"learning_rate": 0.00016901582294755971,
|
15198 |
+
"loss": 1.2348,
|
15199 |
+
"step": 108450
|
15200 |
+
},
|
15201 |
+
{
|
15202 |
+
"epoch": 3.2394828770190784,
|
15203 |
+
"grad_norm": 5.203214645385742,
|
15204 |
+
"learning_rate": 0.00016900153715164085,
|
15205 |
+
"loss": 1.2122,
|
15206 |
+
"step": 108500
|
15207 |
+
},
|
15208 |
+
{
|
15209 |
+
"epoch": 3.240975726271161,
|
15210 |
+
"grad_norm": 5.2223591804504395,
|
15211 |
+
"learning_rate": 0.00016898725135572204,
|
15212 |
+
"loss": 1.1984,
|
15213 |
+
"step": 108550
|
15214 |
+
},
|
15215 |
+
{
|
15216 |
+
"epoch": 3.242468575523244,
|
15217 |
+
"grad_norm": 3.9512999057769775,
|
15218 |
+
"learning_rate": 0.0001689729655598032,
|
15219 |
+
"loss": 1.2791,
|
15220 |
+
"step": 108600
|
15221 |
+
},
|
15222 |
+
{
|
15223 |
+
"epoch": 3.243961424775326,
|
15224 |
+
"grad_norm": 5.953644752502441,
|
15225 |
+
"learning_rate": 0.00016895867976388437,
|
15226 |
+
"loss": 1.2582,
|
15227 |
+
"step": 108650
|
15228 |
+
},
|
15229 |
+
{
|
15230 |
+
"epoch": 3.245454274027409,
|
15231 |
+
"grad_norm": 4.990479469299316,
|
15232 |
+
"learning_rate": 0.00016894439396796554,
|
15233 |
+
"loss": 1.2594,
|
15234 |
+
"step": 108700
|
15235 |
+
},
|
15236 |
+
{
|
15237 |
+
"epoch": 3.246947123279491,
|
15238 |
+
"grad_norm": 4.3011651039123535,
|
15239 |
+
"learning_rate": 0.0001689301081720467,
|
15240 |
+
"loss": 1.2351,
|
15241 |
+
"step": 108750
|
15242 |
+
},
|
15243 |
+
{
|
15244 |
+
"epoch": 3.248439972531574,
|
15245 |
+
"grad_norm": 4.554538249969482,
|
15246 |
+
"learning_rate": 0.00016891582237612787,
|
15247 |
+
"loss": 1.2482,
|
15248 |
+
"step": 108800
|
15249 |
+
},
|
15250 |
+
{
|
15251 |
+
"epoch": 3.2499328217836565,
|
15252 |
+
"grad_norm": 4.715415000915527,
|
15253 |
+
"learning_rate": 0.00016890153658020903,
|
15254 |
+
"loss": 1.2691,
|
15255 |
+
"step": 108850
|
15256 |
+
},
|
15257 |
+
{
|
15258 |
+
"epoch": 3.2514256710357388,
|
15259 |
+
"grad_norm": 5.759798049926758,
|
15260 |
+
"learning_rate": 0.0001688872507842902,
|
15261 |
+
"loss": 1.2224,
|
15262 |
+
"step": 108900
|
15263 |
+
},
|
15264 |
+
{
|
15265 |
+
"epoch": 3.2529185202878215,
|
15266 |
+
"grad_norm": 3.7031774520874023,
|
15267 |
+
"learning_rate": 0.00016887296498837136,
|
15268 |
+
"loss": 1.2166,
|
15269 |
+
"step": 108950
|
15270 |
+
},
|
15271 |
+
{
|
15272 |
+
"epoch": 3.2544113695399037,
|
15273 |
+
"grad_norm": 5.22156286239624,
|
15274 |
+
"learning_rate": 0.00016885867919245253,
|
15275 |
+
"loss": 1.2465,
|
15276 |
+
"step": 109000
|
15277 |
+
},
|
15278 |
+
{
|
15279 |
+
"epoch": 3.2559042187919864,
|
15280 |
+
"grad_norm": 5.56991720199585,
|
15281 |
+
"learning_rate": 0.00016884439339653372,
|
15282 |
+
"loss": 1.2274,
|
15283 |
+
"step": 109050
|
15284 |
+
},
|
15285 |
+
{
|
15286 |
+
"epoch": 3.257397068044069,
|
15287 |
+
"grad_norm": 4.750823020935059,
|
15288 |
+
"learning_rate": 0.00016883010760061485,
|
15289 |
+
"loss": 1.1885,
|
15290 |
+
"step": 109100
|
15291 |
+
},
|
15292 |
+
{
|
15293 |
+
"epoch": 3.2588899172961514,
|
15294 |
+
"grad_norm": 4.139292240142822,
|
15295 |
+
"learning_rate": 0.00016881582180469605,
|
15296 |
+
"loss": 1.2308,
|
15297 |
+
"step": 109150
|
15298 |
+
},
|
15299 |
+
{
|
15300 |
+
"epoch": 3.260382766548234,
|
15301 |
+
"grad_norm": 3.652226209640503,
|
15302 |
+
"learning_rate": 0.00016880153600877718,
|
15303 |
+
"loss": 1.212,
|
15304 |
+
"step": 109200
|
15305 |
+
},
|
15306 |
+
{
|
15307 |
+
"epoch": 3.2618756158003164,
|
15308 |
+
"grad_norm": 5.819267272949219,
|
15309 |
+
"learning_rate": 0.00016878725021285838,
|
15310 |
+
"loss": 1.1852,
|
15311 |
+
"step": 109250
|
15312 |
+
},
|
15313 |
+
{
|
15314 |
+
"epoch": 3.263368465052399,
|
15315 |
+
"grad_norm": 4.4434027671813965,
|
15316 |
+
"learning_rate": 0.00016877296441693954,
|
15317 |
+
"loss": 1.2124,
|
15318 |
+
"step": 109300
|
15319 |
+
},
|
15320 |
+
{
|
15321 |
+
"epoch": 3.2648613143044813,
|
15322 |
+
"grad_norm": 3.6519103050231934,
|
15323 |
+
"learning_rate": 0.0001687586786210207,
|
15324 |
+
"loss": 1.2033,
|
15325 |
+
"step": 109350
|
15326 |
+
},
|
15327 |
+
{
|
15328 |
+
"epoch": 3.266354163556564,
|
15329 |
+
"grad_norm": 5.874778747558594,
|
15330 |
+
"learning_rate": 0.00016874439282510187,
|
15331 |
+
"loss": 1.2745,
|
15332 |
+
"step": 109400
|
15333 |
+
},
|
15334 |
+
{
|
15335 |
+
"epoch": 3.2678470128086468,
|
15336 |
+
"grad_norm": 4.622878551483154,
|
15337 |
+
"learning_rate": 0.00016873010702918303,
|
15338 |
+
"loss": 1.2599,
|
15339 |
+
"step": 109450
|
15340 |
+
},
|
15341 |
+
{
|
15342 |
+
"epoch": 3.269339862060729,
|
15343 |
+
"grad_norm": 4.694761276245117,
|
15344 |
+
"learning_rate": 0.0001687158212332642,
|
15345 |
+
"loss": 1.2233,
|
15346 |
+
"step": 109500
|
15347 |
+
},
|
15348 |
+
{
|
15349 |
+
"epoch": 3.2708327113128117,
|
15350 |
+
"grad_norm": 5.996031284332275,
|
15351 |
+
"learning_rate": 0.00016870153543734536,
|
15352 |
+
"loss": 1.2452,
|
15353 |
+
"step": 109550
|
15354 |
+
},
|
15355 |
+
{
|
15356 |
+
"epoch": 3.272325560564894,
|
15357 |
+
"grad_norm": 3.6771366596221924,
|
15358 |
+
"learning_rate": 0.00016868724964142653,
|
15359 |
+
"loss": 1.253,
|
15360 |
+
"step": 109600
|
15361 |
+
},
|
15362 |
+
{
|
15363 |
+
"epoch": 3.2738184098169767,
|
15364 |
+
"grad_norm": 3.622293710708618,
|
15365 |
+
"learning_rate": 0.0001686729638455077,
|
15366 |
+
"loss": 1.2456,
|
15367 |
+
"step": 109650
|
15368 |
+
},
|
15369 |
+
{
|
15370 |
+
"epoch": 3.2753112590690594,
|
15371 |
+
"grad_norm": 6.092465400695801,
|
15372 |
+
"learning_rate": 0.00016865867804958886,
|
15373 |
+
"loss": 1.2742,
|
15374 |
+
"step": 109700
|
15375 |
+
},
|
15376 |
+
{
|
15377 |
+
"epoch": 3.2768041083211417,
|
15378 |
+
"grad_norm": 4.962725639343262,
|
15379 |
+
"learning_rate": 0.00016864439225367005,
|
15380 |
+
"loss": 1.2426,
|
15381 |
+
"step": 109750
|
15382 |
+
},
|
15383 |
+
{
|
15384 |
+
"epoch": 3.2782969575732244,
|
15385 |
+
"grad_norm": 4.702395915985107,
|
15386 |
+
"learning_rate": 0.0001686301064577512,
|
15387 |
+
"loss": 1.2324,
|
15388 |
+
"step": 109800
|
15389 |
+
},
|
15390 |
+
{
|
15391 |
+
"epoch": 3.2797898068253066,
|
15392 |
+
"grad_norm": 4.226596832275391,
|
15393 |
+
"learning_rate": 0.00016861582066183238,
|
15394 |
+
"loss": 1.2791,
|
15395 |
+
"step": 109850
|
15396 |
+
},
|
15397 |
+
{
|
15398 |
+
"epoch": 3.2812826560773893,
|
15399 |
+
"grad_norm": 4.105664253234863,
|
15400 |
+
"learning_rate": 0.00016860153486591352,
|
15401 |
+
"loss": 1.2606,
|
15402 |
+
"step": 109900
|
15403 |
+
},
|
15404 |
+
{
|
15405 |
+
"epoch": 3.2827755053294716,
|
15406 |
+
"grad_norm": 4.2797369956970215,
|
15407 |
+
"learning_rate": 0.0001685872490699947,
|
15408 |
+
"loss": 1.3038,
|
15409 |
+
"step": 109950
|
15410 |
+
},
|
15411 |
+
{
|
15412 |
+
"epoch": 3.2842683545815543,
|
15413 |
+
"grad_norm": 5.301023483276367,
|
15414 |
+
"learning_rate": 0.00016857296327407585,
|
15415 |
+
"loss": 1.1694,
|
15416 |
+
"step": 110000
|
15417 |
}
|
15418 |
],
|
15419 |
"logging_steps": 50,
|
|
|
15433 |
"attributes": {}
|
15434 |
}
|
15435 |
},
|
15436 |
+
"total_flos": 2.778986267917943e+18,
|
15437 |
"train_batch_size": 2,
|
15438 |
"trial_name": null,
|
15439 |
"trial_params": null
|