Training in progress, step 90000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step90000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step90000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38763327006639b1a1a32c6dc1d61d474876e154045bb01eb89f99012d5ea6bf
|
3 |
size 42002584
|
last-checkpoint/global_step90000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05c634fca74f4a731a16247344a71fa8b3d0753112c61a040bd92114f3615a4b
|
3 |
+
size 251710672
|
last-checkpoint/global_step90000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:935bcf04c20d3009fa033ff865d2e0cec6c8f264ef17f98d6d5748e4442e66be
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step90000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61b33653a5c3bc3f5cea737eec066d765bdf2f023daef7df3a3e1e44753fe1c3
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11214,6 +11214,1406 @@
|
|
11214 |
"learning_rate": 0.00017714444082537616,
|
11215 |
"loss": 1.3118,
|
11216 |
"step": 80000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11217 |
}
|
11218 |
],
|
11219 |
"logging_steps": 50,
|
@@ -11233,7 +12633,7 @@
|
|
11233 |
"attributes": {}
|
11234 |
}
|
11235 |
},
|
11236 |
-
"total_flos": 2.
|
11237 |
"train_batch_size": 2,
|
11238 |
"trial_name": null,
|
11239 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.6871286537485446,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 90000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11214 |
"learning_rate": 0.00017714444082537616,
|
11215 |
"loss": 1.3118,
|
11216 |
"step": 80000
|
11217 |
+
},
|
11218 |
+
{
|
11219 |
+
"epoch": 2.390051652584122,
|
11220 |
+
"grad_norm": 7.3811869621276855,
|
11221 |
+
"learning_rate": 0.00017713015502945732,
|
11222 |
+
"loss": 1.2989,
|
11223 |
+
"step": 80050
|
11224 |
+
},
|
11225 |
+
{
|
11226 |
+
"epoch": 2.3915445018362047,
|
11227 |
+
"grad_norm": 4.3925604820251465,
|
11228 |
+
"learning_rate": 0.0001771158692335385,
|
11229 |
+
"loss": 1.3183,
|
11230 |
+
"step": 80100
|
11231 |
+
},
|
11232 |
+
{
|
11233 |
+
"epoch": 2.393037351088287,
|
11234 |
+
"grad_norm": 3.382279396057129,
|
11235 |
+
"learning_rate": 0.00017710158343761965,
|
11236 |
+
"loss": 1.2749,
|
11237 |
+
"step": 80150
|
11238 |
+
},
|
11239 |
+
{
|
11240 |
+
"epoch": 2.3945302003403697,
|
11241 |
+
"grad_norm": 6.085289478302002,
|
11242 |
+
"learning_rate": 0.00017708729764170082,
|
11243 |
+
"loss": 1.2881,
|
11244 |
+
"step": 80200
|
11245 |
+
},
|
11246 |
+
{
|
11247 |
+
"epoch": 2.3960230495924524,
|
11248 |
+
"grad_norm": 3.96476149559021,
|
11249 |
+
"learning_rate": 0.00017707301184578198,
|
11250 |
+
"loss": 1.2656,
|
11251 |
+
"step": 80250
|
11252 |
+
},
|
11253 |
+
{
|
11254 |
+
"epoch": 2.3975158988445346,
|
11255 |
+
"grad_norm": 4.456584930419922,
|
11256 |
+
"learning_rate": 0.00017705872604986315,
|
11257 |
+
"loss": 1.2969,
|
11258 |
+
"step": 80300
|
11259 |
+
},
|
11260 |
+
{
|
11261 |
+
"epoch": 2.3990087480966173,
|
11262 |
+
"grad_norm": 5.09492301940918,
|
11263 |
+
"learning_rate": 0.0001770444402539443,
|
11264 |
+
"loss": 1.2679,
|
11265 |
+
"step": 80350
|
11266 |
+
},
|
11267 |
+
{
|
11268 |
+
"epoch": 2.4005015973486996,
|
11269 |
+
"grad_norm": 4.211208820343018,
|
11270 |
+
"learning_rate": 0.00017703015445802548,
|
11271 |
+
"loss": 1.2235,
|
11272 |
+
"step": 80400
|
11273 |
+
},
|
11274 |
+
{
|
11275 |
+
"epoch": 2.4019944466007823,
|
11276 |
+
"grad_norm": 4.683793067932129,
|
11277 |
+
"learning_rate": 0.00017701586866210664,
|
11278 |
+
"loss": 1.2791,
|
11279 |
+
"step": 80450
|
11280 |
+
},
|
11281 |
+
{
|
11282 |
+
"epoch": 2.403487295852865,
|
11283 |
+
"grad_norm": 4.003592014312744,
|
11284 |
+
"learning_rate": 0.00017700158286618783,
|
11285 |
+
"loss": 1.2498,
|
11286 |
+
"step": 80500
|
11287 |
+
},
|
11288 |
+
{
|
11289 |
+
"epoch": 2.4049801451049473,
|
11290 |
+
"grad_norm": 5.065978527069092,
|
11291 |
+
"learning_rate": 0.00017698729707026897,
|
11292 |
+
"loss": 1.2681,
|
11293 |
+
"step": 80550
|
11294 |
+
},
|
11295 |
+
{
|
11296 |
+
"epoch": 2.40647299435703,
|
11297 |
+
"grad_norm": 4.453160762786865,
|
11298 |
+
"learning_rate": 0.00017697301127435016,
|
11299 |
+
"loss": 1.3567,
|
11300 |
+
"step": 80600
|
11301 |
+
},
|
11302 |
+
{
|
11303 |
+
"epoch": 2.4079658436091123,
|
11304 |
+
"grad_norm": 4.852217197418213,
|
11305 |
+
"learning_rate": 0.0001769587254784313,
|
11306 |
+
"loss": 1.2808,
|
11307 |
+
"step": 80650
|
11308 |
+
},
|
11309 |
+
{
|
11310 |
+
"epoch": 2.409458692861195,
|
11311 |
+
"grad_norm": 4.629857063293457,
|
11312 |
+
"learning_rate": 0.0001769444396825125,
|
11313 |
+
"loss": 1.2994,
|
11314 |
+
"step": 80700
|
11315 |
+
},
|
11316 |
+
{
|
11317 |
+
"epoch": 2.4109515421132772,
|
11318 |
+
"grad_norm": 4.611933708190918,
|
11319 |
+
"learning_rate": 0.00017693015388659363,
|
11320 |
+
"loss": 1.2477,
|
11321 |
+
"step": 80750
|
11322 |
+
},
|
11323 |
+
{
|
11324 |
+
"epoch": 2.41244439136536,
|
11325 |
+
"grad_norm": 4.3425822257995605,
|
11326 |
+
"learning_rate": 0.00017691586809067482,
|
11327 |
+
"loss": 1.2861,
|
11328 |
+
"step": 80800
|
11329 |
+
},
|
11330 |
+
{
|
11331 |
+
"epoch": 2.4139372406174426,
|
11332 |
+
"grad_norm": 5.061252593994141,
|
11333 |
+
"learning_rate": 0.00017690158229475598,
|
11334 |
+
"loss": 1.345,
|
11335 |
+
"step": 80850
|
11336 |
+
},
|
11337 |
+
{
|
11338 |
+
"epoch": 2.415430089869525,
|
11339 |
+
"grad_norm": 5.230067253112793,
|
11340 |
+
"learning_rate": 0.00017688729649883715,
|
11341 |
+
"loss": 1.3097,
|
11342 |
+
"step": 80900
|
11343 |
+
},
|
11344 |
+
{
|
11345 |
+
"epoch": 2.4169229391216076,
|
11346 |
+
"grad_norm": 4.092568874359131,
|
11347 |
+
"learning_rate": 0.00017687301070291831,
|
11348 |
+
"loss": 1.2609,
|
11349 |
+
"step": 80950
|
11350 |
+
},
|
11351 |
+
{
|
11352 |
+
"epoch": 2.41841578837369,
|
11353 |
+
"grad_norm": 7.115472316741943,
|
11354 |
+
"learning_rate": 0.00017685872490699948,
|
11355 |
+
"loss": 1.3306,
|
11356 |
+
"step": 81000
|
11357 |
+
},
|
11358 |
+
{
|
11359 |
+
"epoch": 2.4199086376257726,
|
11360 |
+
"grad_norm": 4.142023086547852,
|
11361 |
+
"learning_rate": 0.00017684443911108064,
|
11362 |
+
"loss": 1.2715,
|
11363 |
+
"step": 81050
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 2.421401486877855,
|
11367 |
+
"grad_norm": 4.5259575843811035,
|
11368 |
+
"learning_rate": 0.0001768301533151618,
|
11369 |
+
"loss": 1.3097,
|
11370 |
+
"step": 81100
|
11371 |
+
},
|
11372 |
+
{
|
11373 |
+
"epoch": 2.4228943361299375,
|
11374 |
+
"grad_norm": 4.749493598937988,
|
11375 |
+
"learning_rate": 0.00017681586751924297,
|
11376 |
+
"loss": 1.2413,
|
11377 |
+
"step": 81150
|
11378 |
+
},
|
11379 |
+
{
|
11380 |
+
"epoch": 2.4243871853820202,
|
11381 |
+
"grad_norm": 3.844097137451172,
|
11382 |
+
"learning_rate": 0.00017680158172332414,
|
11383 |
+
"loss": 1.3365,
|
11384 |
+
"step": 81200
|
11385 |
+
},
|
11386 |
+
{
|
11387 |
+
"epoch": 2.4258800346341025,
|
11388 |
+
"grad_norm": 4.504172325134277,
|
11389 |
+
"learning_rate": 0.0001767872959274053,
|
11390 |
+
"loss": 1.2889,
|
11391 |
+
"step": 81250
|
11392 |
+
},
|
11393 |
+
{
|
11394 |
+
"epoch": 2.427372883886185,
|
11395 |
+
"grad_norm": 4.036706447601318,
|
11396 |
+
"learning_rate": 0.0001767730101314865,
|
11397 |
+
"loss": 1.3041,
|
11398 |
+
"step": 81300
|
11399 |
+
},
|
11400 |
+
{
|
11401 |
+
"epoch": 2.4288657331382675,
|
11402 |
+
"grad_norm": 4.100889205932617,
|
11403 |
+
"learning_rate": 0.00017675872433556763,
|
11404 |
+
"loss": 1.2886,
|
11405 |
+
"step": 81350
|
11406 |
+
},
|
11407 |
+
{
|
11408 |
+
"epoch": 2.43035858239035,
|
11409 |
+
"grad_norm": 5.082368850708008,
|
11410 |
+
"learning_rate": 0.00017674443853964882,
|
11411 |
+
"loss": 1.333,
|
11412 |
+
"step": 81400
|
11413 |
+
},
|
11414 |
+
{
|
11415 |
+
"epoch": 2.431851431642433,
|
11416 |
+
"grad_norm": 4.644407749176025,
|
11417 |
+
"learning_rate": 0.00017673015274372996,
|
11418 |
+
"loss": 1.3191,
|
11419 |
+
"step": 81450
|
11420 |
+
},
|
11421 |
+
{
|
11422 |
+
"epoch": 2.433344280894515,
|
11423 |
+
"grad_norm": 4.445605754852295,
|
11424 |
+
"learning_rate": 0.00017671586694781115,
|
11425 |
+
"loss": 1.2961,
|
11426 |
+
"step": 81500
|
11427 |
+
},
|
11428 |
+
{
|
11429 |
+
"epoch": 2.434837130146598,
|
11430 |
+
"grad_norm": 4.809011936187744,
|
11431 |
+
"learning_rate": 0.00017670158115189232,
|
11432 |
+
"loss": 1.2496,
|
11433 |
+
"step": 81550
|
11434 |
+
},
|
11435 |
+
{
|
11436 |
+
"epoch": 2.43632997939868,
|
11437 |
+
"grad_norm": 5.294357776641846,
|
11438 |
+
"learning_rate": 0.00017668729535597348,
|
11439 |
+
"loss": 1.2885,
|
11440 |
+
"step": 81600
|
11441 |
+
},
|
11442 |
+
{
|
11443 |
+
"epoch": 2.437822828650763,
|
11444 |
+
"grad_norm": 4.103408336639404,
|
11445 |
+
"learning_rate": 0.00017667300956005465,
|
11446 |
+
"loss": 1.2612,
|
11447 |
+
"step": 81650
|
11448 |
+
},
|
11449 |
+
{
|
11450 |
+
"epoch": 2.4393156779028455,
|
11451 |
+
"grad_norm": 4.427671909332275,
|
11452 |
+
"learning_rate": 0.0001766587237641358,
|
11453 |
+
"loss": 1.2849,
|
11454 |
+
"step": 81700
|
11455 |
+
},
|
11456 |
+
{
|
11457 |
+
"epoch": 2.440808527154928,
|
11458 |
+
"grad_norm": 5.336637496948242,
|
11459 |
+
"learning_rate": 0.00017664443796821698,
|
11460 |
+
"loss": 1.2923,
|
11461 |
+
"step": 81750
|
11462 |
+
},
|
11463 |
+
{
|
11464 |
+
"epoch": 2.4423013764070105,
|
11465 |
+
"grad_norm": 5.235950946807861,
|
11466 |
+
"learning_rate": 0.00017663015217229814,
|
11467 |
+
"loss": 1.3086,
|
11468 |
+
"step": 81800
|
11469 |
+
},
|
11470 |
+
{
|
11471 |
+
"epoch": 2.4437942256590928,
|
11472 |
+
"grad_norm": 5.3591766357421875,
|
11473 |
+
"learning_rate": 0.0001766158663763793,
|
11474 |
+
"loss": 1.3326,
|
11475 |
+
"step": 81850
|
11476 |
+
},
|
11477 |
+
{
|
11478 |
+
"epoch": 2.4452870749111755,
|
11479 |
+
"grad_norm": 5.432180881500244,
|
11480 |
+
"learning_rate": 0.00017660158058046047,
|
11481 |
+
"loss": 1.2918,
|
11482 |
+
"step": 81900
|
11483 |
+
},
|
11484 |
+
{
|
11485 |
+
"epoch": 2.446779924163258,
|
11486 |
+
"grad_norm": 5.184196949005127,
|
11487 |
+
"learning_rate": 0.00017658729478454163,
|
11488 |
+
"loss": 1.2652,
|
11489 |
+
"step": 81950
|
11490 |
+
},
|
11491 |
+
{
|
11492 |
+
"epoch": 2.4482727734153404,
|
11493 |
+
"grad_norm": 4.962531566619873,
|
11494 |
+
"learning_rate": 0.0001765730089886228,
|
11495 |
+
"loss": 1.2579,
|
11496 |
+
"step": 82000
|
11497 |
+
},
|
11498 |
+
{
|
11499 |
+
"epoch": 2.449765622667423,
|
11500 |
+
"grad_norm": 3.7370457649230957,
|
11501 |
+
"learning_rate": 0.00017655872319270396,
|
11502 |
+
"loss": 1.3204,
|
11503 |
+
"step": 82050
|
11504 |
+
},
|
11505 |
+
{
|
11506 |
+
"epoch": 2.4512584719195054,
|
11507 |
+
"grad_norm": 4.21932315826416,
|
11508 |
+
"learning_rate": 0.00017654443739678513,
|
11509 |
+
"loss": 1.333,
|
11510 |
+
"step": 82100
|
11511 |
+
},
|
11512 |
+
{
|
11513 |
+
"epoch": 2.452751321171588,
|
11514 |
+
"grad_norm": 4.366473197937012,
|
11515 |
+
"learning_rate": 0.0001765301516008663,
|
11516 |
+
"loss": 1.2594,
|
11517 |
+
"step": 82150
|
11518 |
+
},
|
11519 |
+
{
|
11520 |
+
"epoch": 2.454244170423671,
|
11521 |
+
"grad_norm": 5.760409832000732,
|
11522 |
+
"learning_rate": 0.00017651586580494746,
|
11523 |
+
"loss": 1.3004,
|
11524 |
+
"step": 82200
|
11525 |
+
},
|
11526 |
+
{
|
11527 |
+
"epoch": 2.455737019675753,
|
11528 |
+
"grad_norm": 4.652368545532227,
|
11529 |
+
"learning_rate": 0.00017650158000902862,
|
11530 |
+
"loss": 1.3092,
|
11531 |
+
"step": 82250
|
11532 |
+
},
|
11533 |
+
{
|
11534 |
+
"epoch": 2.457229868927836,
|
11535 |
+
"grad_norm": 3.5619237422943115,
|
11536 |
+
"learning_rate": 0.0001764872942131098,
|
11537 |
+
"loss": 1.3315,
|
11538 |
+
"step": 82300
|
11539 |
+
},
|
11540 |
+
{
|
11541 |
+
"epoch": 2.458722718179918,
|
11542 |
+
"grad_norm": 3.285752534866333,
|
11543 |
+
"learning_rate": 0.00017647300841719098,
|
11544 |
+
"loss": 1.2906,
|
11545 |
+
"step": 82350
|
11546 |
+
},
|
11547 |
+
{
|
11548 |
+
"epoch": 2.4602155674320008,
|
11549 |
+
"grad_norm": 4.153928279876709,
|
11550 |
+
"learning_rate": 0.00017645872262127212,
|
11551 |
+
"loss": 1.3055,
|
11552 |
+
"step": 82400
|
11553 |
+
},
|
11554 |
+
{
|
11555 |
+
"epoch": 2.4617084166840835,
|
11556 |
+
"grad_norm": 3.767779588699341,
|
11557 |
+
"learning_rate": 0.0001764444368253533,
|
11558 |
+
"loss": 1.264,
|
11559 |
+
"step": 82450
|
11560 |
+
},
|
11561 |
+
{
|
11562 |
+
"epoch": 2.4632012659361657,
|
11563 |
+
"grad_norm": 4.4513983726501465,
|
11564 |
+
"learning_rate": 0.00017643015102943445,
|
11565 |
+
"loss": 1.2749,
|
11566 |
+
"step": 82500
|
11567 |
+
},
|
11568 |
+
{
|
11569 |
+
"epoch": 2.4646941151882484,
|
11570 |
+
"grad_norm": 4.600104808807373,
|
11571 |
+
"learning_rate": 0.00017641586523351564,
|
11572 |
+
"loss": 1.3194,
|
11573 |
+
"step": 82550
|
11574 |
+
},
|
11575 |
+
{
|
11576 |
+
"epoch": 2.4661869644403307,
|
11577 |
+
"grad_norm": 5.654660701751709,
|
11578 |
+
"learning_rate": 0.00017640157943759677,
|
11579 |
+
"loss": 1.2525,
|
11580 |
+
"step": 82600
|
11581 |
+
},
|
11582 |
+
{
|
11583 |
+
"epoch": 2.4676798136924134,
|
11584 |
+
"grad_norm": 5.852449893951416,
|
11585 |
+
"learning_rate": 0.00017638729364167797,
|
11586 |
+
"loss": 1.2609,
|
11587 |
+
"step": 82650
|
11588 |
+
},
|
11589 |
+
{
|
11590 |
+
"epoch": 2.469172662944496,
|
11591 |
+
"grad_norm": 4.8410139083862305,
|
11592 |
+
"learning_rate": 0.00017637300784575913,
|
11593 |
+
"loss": 1.2892,
|
11594 |
+
"step": 82700
|
11595 |
+
},
|
11596 |
+
{
|
11597 |
+
"epoch": 2.4706655121965784,
|
11598 |
+
"grad_norm": 4.150029182434082,
|
11599 |
+
"learning_rate": 0.0001763587220498403,
|
11600 |
+
"loss": 1.2893,
|
11601 |
+
"step": 82750
|
11602 |
+
},
|
11603 |
+
{
|
11604 |
+
"epoch": 2.472158361448661,
|
11605 |
+
"grad_norm": 4.355432033538818,
|
11606 |
+
"learning_rate": 0.00017634443625392146,
|
11607 |
+
"loss": 1.294,
|
11608 |
+
"step": 82800
|
11609 |
+
},
|
11610 |
+
{
|
11611 |
+
"epoch": 2.4736512107007433,
|
11612 |
+
"grad_norm": 4.689866542816162,
|
11613 |
+
"learning_rate": 0.00017633015045800262,
|
11614 |
+
"loss": 1.3327,
|
11615 |
+
"step": 82850
|
11616 |
+
},
|
11617 |
+
{
|
11618 |
+
"epoch": 2.475144059952826,
|
11619 |
+
"grad_norm": 5.386442184448242,
|
11620 |
+
"learning_rate": 0.0001763158646620838,
|
11621 |
+
"loss": 1.3027,
|
11622 |
+
"step": 82900
|
11623 |
+
},
|
11624 |
+
{
|
11625 |
+
"epoch": 2.4766369092049083,
|
11626 |
+
"grad_norm": 4.756774425506592,
|
11627 |
+
"learning_rate": 0.00017630157886616495,
|
11628 |
+
"loss": 1.2994,
|
11629 |
+
"step": 82950
|
11630 |
+
},
|
11631 |
+
{
|
11632 |
+
"epoch": 2.478129758456991,
|
11633 |
+
"grad_norm": 5.8331451416015625,
|
11634 |
+
"learning_rate": 0.00017628729307024612,
|
11635 |
+
"loss": 1.298,
|
11636 |
+
"step": 83000
|
11637 |
+
},
|
11638 |
+
{
|
11639 |
+
"epoch": 2.4796226077090737,
|
11640 |
+
"grad_norm": 4.216109275817871,
|
11641 |
+
"learning_rate": 0.00017627300727432728,
|
11642 |
+
"loss": 1.3288,
|
11643 |
+
"step": 83050
|
11644 |
+
},
|
11645 |
+
{
|
11646 |
+
"epoch": 2.481115456961156,
|
11647 |
+
"grad_norm": 5.123735427856445,
|
11648 |
+
"learning_rate": 0.00017625872147840845,
|
11649 |
+
"loss": 1.3103,
|
11650 |
+
"step": 83100
|
11651 |
+
},
|
11652 |
+
{
|
11653 |
+
"epoch": 2.4826083062132387,
|
11654 |
+
"grad_norm": 3.7272541522979736,
|
11655 |
+
"learning_rate": 0.00017624443568248964,
|
11656 |
+
"loss": 1.2753,
|
11657 |
+
"step": 83150
|
11658 |
+
},
|
11659 |
+
{
|
11660 |
+
"epoch": 2.484101155465321,
|
11661 |
+
"grad_norm": 5.656236171722412,
|
11662 |
+
"learning_rate": 0.00017623014988657078,
|
11663 |
+
"loss": 1.3013,
|
11664 |
+
"step": 83200
|
11665 |
+
},
|
11666 |
+
{
|
11667 |
+
"epoch": 2.4855940047174037,
|
11668 |
+
"grad_norm": 4.702991962432861,
|
11669 |
+
"learning_rate": 0.00017621586409065197,
|
11670 |
+
"loss": 1.3073,
|
11671 |
+
"step": 83250
|
11672 |
+
},
|
11673 |
+
{
|
11674 |
+
"epoch": 2.487086853969486,
|
11675 |
+
"grad_norm": 4.512374401092529,
|
11676 |
+
"learning_rate": 0.0001762015782947331,
|
11677 |
+
"loss": 1.2951,
|
11678 |
+
"step": 83300
|
11679 |
+
},
|
11680 |
+
{
|
11681 |
+
"epoch": 2.4885797032215686,
|
11682 |
+
"grad_norm": 3.560082197189331,
|
11683 |
+
"learning_rate": 0.0001761872924988143,
|
11684 |
+
"loss": 1.3083,
|
11685 |
+
"step": 83350
|
11686 |
+
},
|
11687 |
+
{
|
11688 |
+
"epoch": 2.4900725524736513,
|
11689 |
+
"grad_norm": 5.189292907714844,
|
11690 |
+
"learning_rate": 0.00017617300670289544,
|
11691 |
+
"loss": 1.3153,
|
11692 |
+
"step": 83400
|
11693 |
+
},
|
11694 |
+
{
|
11695 |
+
"epoch": 2.4915654017257336,
|
11696 |
+
"grad_norm": 4.624311447143555,
|
11697 |
+
"learning_rate": 0.00017615872090697663,
|
11698 |
+
"loss": 1.2606,
|
11699 |
+
"step": 83450
|
11700 |
+
},
|
11701 |
+
{
|
11702 |
+
"epoch": 2.4930582509778163,
|
11703 |
+
"grad_norm": 4.030190944671631,
|
11704 |
+
"learning_rate": 0.0001761444351110578,
|
11705 |
+
"loss": 1.2552,
|
11706 |
+
"step": 83500
|
11707 |
+
},
|
11708 |
+
{
|
11709 |
+
"epoch": 2.4945511002298986,
|
11710 |
+
"grad_norm": 5.9866042137146,
|
11711 |
+
"learning_rate": 0.00017613014931513896,
|
11712 |
+
"loss": 1.2818,
|
11713 |
+
"step": 83550
|
11714 |
+
},
|
11715 |
+
{
|
11716 |
+
"epoch": 2.4960439494819813,
|
11717 |
+
"grad_norm": 4.025645732879639,
|
11718 |
+
"learning_rate": 0.00017611586351922012,
|
11719 |
+
"loss": 1.325,
|
11720 |
+
"step": 83600
|
11721 |
+
},
|
11722 |
+
{
|
11723 |
+
"epoch": 2.497536798734064,
|
11724 |
+
"grad_norm": 5.12217903137207,
|
11725 |
+
"learning_rate": 0.0001761015777233013,
|
11726 |
+
"loss": 1.2656,
|
11727 |
+
"step": 83650
|
11728 |
+
},
|
11729 |
+
{
|
11730 |
+
"epoch": 2.4990296479861462,
|
11731 |
+
"grad_norm": 3.5725717544555664,
|
11732 |
+
"learning_rate": 0.00017608729192738245,
|
11733 |
+
"loss": 1.2605,
|
11734 |
+
"step": 83700
|
11735 |
+
},
|
11736 |
+
{
|
11737 |
+
"epoch": 2.500522497238229,
|
11738 |
+
"grad_norm": 6.810322284698486,
|
11739 |
+
"learning_rate": 0.00017607300613146362,
|
11740 |
+
"loss": 1.3275,
|
11741 |
+
"step": 83750
|
11742 |
+
},
|
11743 |
+
{
|
11744 |
+
"epoch": 2.502015346490311,
|
11745 |
+
"grad_norm": 6.723217487335205,
|
11746 |
+
"learning_rate": 0.00017605872033554478,
|
11747 |
+
"loss": 1.3038,
|
11748 |
+
"step": 83800
|
11749 |
+
},
|
11750 |
+
{
|
11751 |
+
"epoch": 2.503508195742394,
|
11752 |
+
"grad_norm": 5.044365882873535,
|
11753 |
+
"learning_rate": 0.00017604443453962595,
|
11754 |
+
"loss": 1.2821,
|
11755 |
+
"step": 83850
|
11756 |
+
},
|
11757 |
+
{
|
11758 |
+
"epoch": 2.5050010449944766,
|
11759 |
+
"grad_norm": 4.397944450378418,
|
11760 |
+
"learning_rate": 0.0001760301487437071,
|
11761 |
+
"loss": 1.3281,
|
11762 |
+
"step": 83900
|
11763 |
+
},
|
11764 |
+
{
|
11765 |
+
"epoch": 2.506493894246559,
|
11766 |
+
"grad_norm": 3.9087088108062744,
|
11767 |
+
"learning_rate": 0.0001760158629477883,
|
11768 |
+
"loss": 1.2674,
|
11769 |
+
"step": 83950
|
11770 |
+
},
|
11771 |
+
{
|
11772 |
+
"epoch": 2.5079867434986416,
|
11773 |
+
"grad_norm": 4.3125128746032715,
|
11774 |
+
"learning_rate": 0.00017600157715186944,
|
11775 |
+
"loss": 1.2654,
|
11776 |
+
"step": 84000
|
11777 |
+
},
|
11778 |
+
{
|
11779 |
+
"epoch": 2.509479592750724,
|
11780 |
+
"grad_norm": 5.792150020599365,
|
11781 |
+
"learning_rate": 0.00017598729135595063,
|
11782 |
+
"loss": 1.3846,
|
11783 |
+
"step": 84050
|
11784 |
+
},
|
11785 |
+
{
|
11786 |
+
"epoch": 2.5109724420028066,
|
11787 |
+
"grad_norm": 4.449563026428223,
|
11788 |
+
"learning_rate": 0.00017597300556003177,
|
11789 |
+
"loss": 1.2402,
|
11790 |
+
"step": 84100
|
11791 |
+
},
|
11792 |
+
{
|
11793 |
+
"epoch": 2.5124652912548893,
|
11794 |
+
"grad_norm": 3.286771774291992,
|
11795 |
+
"learning_rate": 0.00017595871976411296,
|
11796 |
+
"loss": 1.2547,
|
11797 |
+
"step": 84150
|
11798 |
+
},
|
11799 |
+
{
|
11800 |
+
"epoch": 2.5139581405069715,
|
11801 |
+
"grad_norm": 4.84338903427124,
|
11802 |
+
"learning_rate": 0.0001759444339681941,
|
11803 |
+
"loss": 1.2127,
|
11804 |
+
"step": 84200
|
11805 |
+
},
|
11806 |
+
{
|
11807 |
+
"epoch": 2.5154509897590542,
|
11808 |
+
"grad_norm": 4.913907051086426,
|
11809 |
+
"learning_rate": 0.0001759301481722753,
|
11810 |
+
"loss": 1.3453,
|
11811 |
+
"step": 84250
|
11812 |
+
},
|
11813 |
+
{
|
11814 |
+
"epoch": 2.5169438390111365,
|
11815 |
+
"grad_norm": 3.828268527984619,
|
11816 |
+
"learning_rate": 0.00017591586237635645,
|
11817 |
+
"loss": 1.3056,
|
11818 |
+
"step": 84300
|
11819 |
+
},
|
11820 |
+
{
|
11821 |
+
"epoch": 2.518436688263219,
|
11822 |
+
"grad_norm": 4.86277961730957,
|
11823 |
+
"learning_rate": 0.00017590157658043762,
|
11824 |
+
"loss": 1.2613,
|
11825 |
+
"step": 84350
|
11826 |
+
},
|
11827 |
+
{
|
11828 |
+
"epoch": 2.519929537515302,
|
11829 |
+
"grad_norm": 3.3477306365966797,
|
11830 |
+
"learning_rate": 0.00017588729078451878,
|
11831 |
+
"loss": 1.2784,
|
11832 |
+
"step": 84400
|
11833 |
+
},
|
11834 |
+
{
|
11835 |
+
"epoch": 2.521422386767384,
|
11836 |
+
"grad_norm": 3.982935905456543,
|
11837 |
+
"learning_rate": 0.00017587300498859995,
|
11838 |
+
"loss": 1.279,
|
11839 |
+
"step": 84450
|
11840 |
+
},
|
11841 |
+
{
|
11842 |
+
"epoch": 2.522915236019467,
|
11843 |
+
"grad_norm": 4.321762561798096,
|
11844 |
+
"learning_rate": 0.0001758587191926811,
|
11845 |
+
"loss": 1.2706,
|
11846 |
+
"step": 84500
|
11847 |
+
},
|
11848 |
+
{
|
11849 |
+
"epoch": 2.524408085271549,
|
11850 |
+
"grad_norm": 3.989497423171997,
|
11851 |
+
"learning_rate": 0.00017584443339676228,
|
11852 |
+
"loss": 1.2431,
|
11853 |
+
"step": 84550
|
11854 |
+
},
|
11855 |
+
{
|
11856 |
+
"epoch": 2.525900934523632,
|
11857 |
+
"grad_norm": 4.457726955413818,
|
11858 |
+
"learning_rate": 0.00017583014760084344,
|
11859 |
+
"loss": 1.2686,
|
11860 |
+
"step": 84600
|
11861 |
+
},
|
11862 |
+
{
|
11863 |
+
"epoch": 2.5273937837757146,
|
11864 |
+
"grad_norm": 4.100541114807129,
|
11865 |
+
"learning_rate": 0.0001758158618049246,
|
11866 |
+
"loss": 1.2379,
|
11867 |
+
"step": 84650
|
11868 |
+
},
|
11869 |
+
{
|
11870 |
+
"epoch": 2.528886633027797,
|
11871 |
+
"grad_norm": 5.386853218078613,
|
11872 |
+
"learning_rate": 0.00017580157600900577,
|
11873 |
+
"loss": 1.2767,
|
11874 |
+
"step": 84700
|
11875 |
+
},
|
11876 |
+
{
|
11877 |
+
"epoch": 2.5303794822798795,
|
11878 |
+
"grad_norm": 4.51826286315918,
|
11879 |
+
"learning_rate": 0.00017578729021308694,
|
11880 |
+
"loss": 1.2072,
|
11881 |
+
"step": 84750
|
11882 |
+
},
|
11883 |
+
{
|
11884 |
+
"epoch": 2.531872331531962,
|
11885 |
+
"grad_norm": 7.255303382873535,
|
11886 |
+
"learning_rate": 0.0001757730044171681,
|
11887 |
+
"loss": 1.2324,
|
11888 |
+
"step": 84800
|
11889 |
+
},
|
11890 |
+
{
|
11891 |
+
"epoch": 2.5333651807840445,
|
11892 |
+
"grad_norm": 4.7532572746276855,
|
11893 |
+
"learning_rate": 0.00017575871862124927,
|
11894 |
+
"loss": 1.2534,
|
11895 |
+
"step": 84850
|
11896 |
+
},
|
11897 |
+
{
|
11898 |
+
"epoch": 2.534858030036127,
|
11899 |
+
"grad_norm": 4.200450897216797,
|
11900 |
+
"learning_rate": 0.00017574443282533043,
|
11901 |
+
"loss": 1.2732,
|
11902 |
+
"step": 84900
|
11903 |
+
},
|
11904 |
+
{
|
11905 |
+
"epoch": 2.5363508792882095,
|
11906 |
+
"grad_norm": 4.062733173370361,
|
11907 |
+
"learning_rate": 0.0001757301470294116,
|
11908 |
+
"loss": 1.2987,
|
11909 |
+
"step": 84950
|
11910 |
+
},
|
11911 |
+
{
|
11912 |
+
"epoch": 2.537843728540292,
|
11913 |
+
"grad_norm": 4.786027908325195,
|
11914 |
+
"learning_rate": 0.00017571586123349279,
|
11915 |
+
"loss": 1.2593,
|
11916 |
+
"step": 85000
|
11917 |
+
},
|
11918 |
+
{
|
11919 |
+
"epoch": 2.5393365777923744,
|
11920 |
+
"grad_norm": 6.346192359924316,
|
11921 |
+
"learning_rate": 0.00017570157543757392,
|
11922 |
+
"loss": 1.2854,
|
11923 |
+
"step": 85050
|
11924 |
+
},
|
11925 |
+
{
|
11926 |
+
"epoch": 2.540829427044457,
|
11927 |
+
"grad_norm": 5.030992031097412,
|
11928 |
+
"learning_rate": 0.00017568728964165512,
|
11929 |
+
"loss": 1.2498,
|
11930 |
+
"step": 85100
|
11931 |
+
},
|
11932 |
+
{
|
11933 |
+
"epoch": 2.54232227629654,
|
11934 |
+
"grad_norm": 3.7949612140655518,
|
11935 |
+
"learning_rate": 0.00017567300384573625,
|
11936 |
+
"loss": 1.275,
|
11937 |
+
"step": 85150
|
11938 |
+
},
|
11939 |
+
{
|
11940 |
+
"epoch": 2.543815125548622,
|
11941 |
+
"grad_norm": 4.442857265472412,
|
11942 |
+
"learning_rate": 0.00017565871804981745,
|
11943 |
+
"loss": 1.2156,
|
11944 |
+
"step": 85200
|
11945 |
+
},
|
11946 |
+
{
|
11947 |
+
"epoch": 2.5453079748007044,
|
11948 |
+
"grad_norm": 5.143223285675049,
|
11949 |
+
"learning_rate": 0.00017564443225389858,
|
11950 |
+
"loss": 1.3554,
|
11951 |
+
"step": 85250
|
11952 |
+
},
|
11953 |
+
{
|
11954 |
+
"epoch": 2.546800824052787,
|
11955 |
+
"grad_norm": 4.240771770477295,
|
11956 |
+
"learning_rate": 0.00017563014645797977,
|
11957 |
+
"loss": 1.3288,
|
11958 |
+
"step": 85300
|
11959 |
+
},
|
11960 |
+
{
|
11961 |
+
"epoch": 2.5482936733048698,
|
11962 |
+
"grad_norm": 5.060192108154297,
|
11963 |
+
"learning_rate": 0.00017561586066206094,
|
11964 |
+
"loss": 1.3284,
|
11965 |
+
"step": 85350
|
11966 |
+
},
|
11967 |
+
{
|
11968 |
+
"epoch": 2.549786522556952,
|
11969 |
+
"grad_norm": 4.155733585357666,
|
11970 |
+
"learning_rate": 0.0001756015748661421,
|
11971 |
+
"loss": 1.2422,
|
11972 |
+
"step": 85400
|
11973 |
+
},
|
11974 |
+
{
|
11975 |
+
"epoch": 2.5512793718090347,
|
11976 |
+
"grad_norm": 5.4249749183654785,
|
11977 |
+
"learning_rate": 0.00017558728907022327,
|
11978 |
+
"loss": 1.262,
|
11979 |
+
"step": 85450
|
11980 |
+
},
|
11981 |
+
{
|
11982 |
+
"epoch": 2.552772221061117,
|
11983 |
+
"grad_norm": 5.704741954803467,
|
11984 |
+
"learning_rate": 0.00017557300327430443,
|
11985 |
+
"loss": 1.308,
|
11986 |
+
"step": 85500
|
11987 |
+
},
|
11988 |
+
{
|
11989 |
+
"epoch": 2.5542650703131997,
|
11990 |
+
"grad_norm": 4.013110637664795,
|
11991 |
+
"learning_rate": 0.0001755587174783856,
|
11992 |
+
"loss": 1.2667,
|
11993 |
+
"step": 85550
|
11994 |
+
},
|
11995 |
+
{
|
11996 |
+
"epoch": 2.5557579195652824,
|
11997 |
+
"grad_norm": 4.5984673500061035,
|
11998 |
+
"learning_rate": 0.00017554443168246676,
|
11999 |
+
"loss": 1.2756,
|
12000 |
+
"step": 85600
|
12001 |
+
},
|
12002 |
+
{
|
12003 |
+
"epoch": 2.5572507688173647,
|
12004 |
+
"grad_norm": 4.716318130493164,
|
12005 |
+
"learning_rate": 0.00017553014588654793,
|
12006 |
+
"loss": 1.2896,
|
12007 |
+
"step": 85650
|
12008 |
+
},
|
12009 |
+
{
|
12010 |
+
"epoch": 2.5587436180694474,
|
12011 |
+
"grad_norm": 3.9019134044647217,
|
12012 |
+
"learning_rate": 0.0001755158600906291,
|
12013 |
+
"loss": 1.3419,
|
12014 |
+
"step": 85700
|
12015 |
+
},
|
12016 |
+
{
|
12017 |
+
"epoch": 2.5602364673215297,
|
12018 |
+
"grad_norm": 4.762269020080566,
|
12019 |
+
"learning_rate": 0.00017550157429471026,
|
12020 |
+
"loss": 1.2856,
|
12021 |
+
"step": 85750
|
12022 |
+
},
|
12023 |
+
{
|
12024 |
+
"epoch": 2.5617293165736124,
|
12025 |
+
"grad_norm": 4.8936567306518555,
|
12026 |
+
"learning_rate": 0.00017548728849879145,
|
12027 |
+
"loss": 1.3033,
|
12028 |
+
"step": 85800
|
12029 |
+
},
|
12030 |
+
{
|
12031 |
+
"epoch": 2.563222165825695,
|
12032 |
+
"grad_norm": 5.605986595153809,
|
12033 |
+
"learning_rate": 0.00017547300270287259,
|
12034 |
+
"loss": 1.2928,
|
12035 |
+
"step": 85850
|
12036 |
+
},
|
12037 |
+
{
|
12038 |
+
"epoch": 2.5647150150777773,
|
12039 |
+
"grad_norm": 5.456994533538818,
|
12040 |
+
"learning_rate": 0.00017545871690695378,
|
12041 |
+
"loss": 1.2646,
|
12042 |
+
"step": 85900
|
12043 |
+
},
|
12044 |
+
{
|
12045 |
+
"epoch": 2.56620786432986,
|
12046 |
+
"grad_norm": 5.125797748565674,
|
12047 |
+
"learning_rate": 0.00017544443111103491,
|
12048 |
+
"loss": 1.3337,
|
12049 |
+
"step": 85950
|
12050 |
+
},
|
12051 |
+
{
|
12052 |
+
"epoch": 2.5677007135819423,
|
12053 |
+
"grad_norm": 6.698281288146973,
|
12054 |
+
"learning_rate": 0.0001754301453151161,
|
12055 |
+
"loss": 1.231,
|
12056 |
+
"step": 86000
|
12057 |
+
},
|
12058 |
+
{
|
12059 |
+
"epoch": 2.569193562834025,
|
12060 |
+
"grad_norm": 4.8110175132751465,
|
12061 |
+
"learning_rate": 0.00017541585951919724,
|
12062 |
+
"loss": 1.2829,
|
12063 |
+
"step": 86050
|
12064 |
+
},
|
12065 |
+
{
|
12066 |
+
"epoch": 2.5706864120861077,
|
12067 |
+
"grad_norm": 4.389436721801758,
|
12068 |
+
"learning_rate": 0.00017540157372327844,
|
12069 |
+
"loss": 1.3394,
|
12070 |
+
"step": 86100
|
12071 |
+
},
|
12072 |
+
{
|
12073 |
+
"epoch": 2.57217926133819,
|
12074 |
+
"grad_norm": 4.567478179931641,
|
12075 |
+
"learning_rate": 0.0001753872879273596,
|
12076 |
+
"loss": 1.3179,
|
12077 |
+
"step": 86150
|
12078 |
+
},
|
12079 |
+
{
|
12080 |
+
"epoch": 2.5736721105902727,
|
12081 |
+
"grad_norm": 6.0036444664001465,
|
12082 |
+
"learning_rate": 0.00017537300213144077,
|
12083 |
+
"loss": 1.2837,
|
12084 |
+
"step": 86200
|
12085 |
+
},
|
12086 |
+
{
|
12087 |
+
"epoch": 2.575164959842355,
|
12088 |
+
"grad_norm": 4.954610824584961,
|
12089 |
+
"learning_rate": 0.00017535871633552193,
|
12090 |
+
"loss": 1.3057,
|
12091 |
+
"step": 86250
|
12092 |
+
},
|
12093 |
+
{
|
12094 |
+
"epoch": 2.5766578090944376,
|
12095 |
+
"grad_norm": 3.623244524002075,
|
12096 |
+
"learning_rate": 0.0001753444305396031,
|
12097 |
+
"loss": 1.3203,
|
12098 |
+
"step": 86300
|
12099 |
+
},
|
12100 |
+
{
|
12101 |
+
"epoch": 2.5781506583465204,
|
12102 |
+
"grad_norm": 3.969407081604004,
|
12103 |
+
"learning_rate": 0.00017533014474368426,
|
12104 |
+
"loss": 1.2465,
|
12105 |
+
"step": 86350
|
12106 |
+
},
|
12107 |
+
{
|
12108 |
+
"epoch": 2.5796435075986026,
|
12109 |
+
"grad_norm": 4.700910568237305,
|
12110 |
+
"learning_rate": 0.00017531585894776542,
|
12111 |
+
"loss": 1.2673,
|
12112 |
+
"step": 86400
|
12113 |
+
},
|
12114 |
+
{
|
12115 |
+
"epoch": 2.5811363568506853,
|
12116 |
+
"grad_norm": 4.05939245223999,
|
12117 |
+
"learning_rate": 0.0001753015731518466,
|
12118 |
+
"loss": 1.2177,
|
12119 |
+
"step": 86450
|
12120 |
+
},
|
12121 |
+
{
|
12122 |
+
"epoch": 2.5826292061027676,
|
12123 |
+
"grad_norm": 4.144320011138916,
|
12124 |
+
"learning_rate": 0.00017528728735592775,
|
12125 |
+
"loss": 1.2198,
|
12126 |
+
"step": 86500
|
12127 |
+
},
|
12128 |
+
{
|
12129 |
+
"epoch": 2.5841220553548503,
|
12130 |
+
"grad_norm": 4.483496189117432,
|
12131 |
+
"learning_rate": 0.00017527300156000892,
|
12132 |
+
"loss": 1.3403,
|
12133 |
+
"step": 86550
|
12134 |
+
},
|
12135 |
+
{
|
12136 |
+
"epoch": 2.585614904606933,
|
12137 |
+
"grad_norm": 6.164941787719727,
|
12138 |
+
"learning_rate": 0.0001752587157640901,
|
12139 |
+
"loss": 1.3121,
|
12140 |
+
"step": 86600
|
12141 |
+
},
|
12142 |
+
{
|
12143 |
+
"epoch": 2.5871077538590153,
|
12144 |
+
"grad_norm": 5.265034198760986,
|
12145 |
+
"learning_rate": 0.00017524442996817125,
|
12146 |
+
"loss": 1.2484,
|
12147 |
+
"step": 86650
|
12148 |
+
},
|
12149 |
+
{
|
12150 |
+
"epoch": 2.588600603111098,
|
12151 |
+
"grad_norm": 4.718167304992676,
|
12152 |
+
"learning_rate": 0.00017523014417225244,
|
12153 |
+
"loss": 1.3256,
|
12154 |
+
"step": 86700
|
12155 |
+
},
|
12156 |
+
{
|
12157 |
+
"epoch": 2.5900934523631802,
|
12158 |
+
"grad_norm": 3.9518258571624756,
|
12159 |
+
"learning_rate": 0.00017521585837633358,
|
12160 |
+
"loss": 1.271,
|
12161 |
+
"step": 86750
|
12162 |
+
},
|
12163 |
+
{
|
12164 |
+
"epoch": 2.591586301615263,
|
12165 |
+
"grad_norm": 4.604781627655029,
|
12166 |
+
"learning_rate": 0.00017520157258041477,
|
12167 |
+
"loss": 1.2821,
|
12168 |
+
"step": 86800
|
12169 |
+
},
|
12170 |
+
{
|
12171 |
+
"epoch": 2.5930791508673456,
|
12172 |
+
"grad_norm": 3.5907840728759766,
|
12173 |
+
"learning_rate": 0.0001751872867844959,
|
12174 |
+
"loss": 1.2958,
|
12175 |
+
"step": 86850
|
12176 |
+
},
|
12177 |
+
{
|
12178 |
+
"epoch": 2.594572000119428,
|
12179 |
+
"grad_norm": 5.262489318847656,
|
12180 |
+
"learning_rate": 0.0001751730009885771,
|
12181 |
+
"loss": 1.3186,
|
12182 |
+
"step": 86900
|
12183 |
+
},
|
12184 |
+
{
|
12185 |
+
"epoch": 2.5960648493715106,
|
12186 |
+
"grad_norm": 4.812371253967285,
|
12187 |
+
"learning_rate": 0.00017515871519265826,
|
12188 |
+
"loss": 1.2586,
|
12189 |
+
"step": 86950
|
12190 |
+
},
|
12191 |
+
{
|
12192 |
+
"epoch": 2.597557698623593,
|
12193 |
+
"grad_norm": 4.645204544067383,
|
12194 |
+
"learning_rate": 0.00017514442939673943,
|
12195 |
+
"loss": 1.2656,
|
12196 |
+
"step": 87000
|
12197 |
+
},
|
12198 |
+
{
|
12199 |
+
"epoch": 2.5990505478756756,
|
12200 |
+
"grad_norm": 5.977488040924072,
|
12201 |
+
"learning_rate": 0.0001751301436008206,
|
12202 |
+
"loss": 1.3167,
|
12203 |
+
"step": 87050
|
12204 |
+
},
|
12205 |
+
{
|
12206 |
+
"epoch": 2.6005433971277583,
|
12207 |
+
"grad_norm": 4.534546375274658,
|
12208 |
+
"learning_rate": 0.00017511585780490176,
|
12209 |
+
"loss": 1.3263,
|
12210 |
+
"step": 87100
|
12211 |
+
},
|
12212 |
+
{
|
12213 |
+
"epoch": 2.6020362463798405,
|
12214 |
+
"grad_norm": 4.562127113342285,
|
12215 |
+
"learning_rate": 0.00017510157200898292,
|
12216 |
+
"loss": 1.3122,
|
12217 |
+
"step": 87150
|
12218 |
+
},
|
12219 |
+
{
|
12220 |
+
"epoch": 2.6035290956319233,
|
12221 |
+
"grad_norm": 3.588327646255493,
|
12222 |
+
"learning_rate": 0.00017508728621306409,
|
12223 |
+
"loss": 1.2953,
|
12224 |
+
"step": 87200
|
12225 |
+
},
|
12226 |
+
{
|
12227 |
+
"epoch": 2.6050219448840055,
|
12228 |
+
"grad_norm": 5.136357307434082,
|
12229 |
+
"learning_rate": 0.00017507300041714525,
|
12230 |
+
"loss": 1.2831,
|
12231 |
+
"step": 87250
|
12232 |
+
},
|
12233 |
+
{
|
12234 |
+
"epoch": 2.606514794136088,
|
12235 |
+
"grad_norm": 4.8900299072265625,
|
12236 |
+
"learning_rate": 0.00017505871462122641,
|
12237 |
+
"loss": 1.2847,
|
12238 |
+
"step": 87300
|
12239 |
+
},
|
12240 |
+
{
|
12241 |
+
"epoch": 2.608007643388171,
|
12242 |
+
"grad_norm": 3.7218592166900635,
|
12243 |
+
"learning_rate": 0.00017504442882530758,
|
12244 |
+
"loss": 1.2828,
|
12245 |
+
"step": 87350
|
12246 |
+
},
|
12247 |
+
{
|
12248 |
+
"epoch": 2.609500492640253,
|
12249 |
+
"grad_norm": 3.4648168087005615,
|
12250 |
+
"learning_rate": 0.00017503014302938874,
|
12251 |
+
"loss": 1.2576,
|
12252 |
+
"step": 87400
|
12253 |
+
},
|
12254 |
+
{
|
12255 |
+
"epoch": 2.6109933418923355,
|
12256 |
+
"grad_norm": 3.6955080032348633,
|
12257 |
+
"learning_rate": 0.0001750158572334699,
|
12258 |
+
"loss": 1.3208,
|
12259 |
+
"step": 87450
|
12260 |
+
},
|
12261 |
+
{
|
12262 |
+
"epoch": 2.612486191144418,
|
12263 |
+
"grad_norm": 4.296437740325928,
|
12264 |
+
"learning_rate": 0.00017500157143755107,
|
12265 |
+
"loss": 1.2455,
|
12266 |
+
"step": 87500
|
12267 |
+
},
|
12268 |
+
{
|
12269 |
+
"epoch": 2.613979040396501,
|
12270 |
+
"grad_norm": 4.3054518699646,
|
12271 |
+
"learning_rate": 0.00017498728564163224,
|
12272 |
+
"loss": 1.2722,
|
12273 |
+
"step": 87550
|
12274 |
+
},
|
12275 |
+
{
|
12276 |
+
"epoch": 2.615471889648583,
|
12277 |
+
"grad_norm": 6.258134841918945,
|
12278 |
+
"learning_rate": 0.0001749729998457134,
|
12279 |
+
"loss": 1.2777,
|
12280 |
+
"step": 87600
|
12281 |
+
},
|
12282 |
+
{
|
12283 |
+
"epoch": 2.616964738900666,
|
12284 |
+
"grad_norm": 5.092291355133057,
|
12285 |
+
"learning_rate": 0.0001749587140497946,
|
12286 |
+
"loss": 1.2906,
|
12287 |
+
"step": 87650
|
12288 |
+
},
|
12289 |
+
{
|
12290 |
+
"epoch": 2.618457588152748,
|
12291 |
+
"grad_norm": 3.986844778060913,
|
12292 |
+
"learning_rate": 0.00017494442825387573,
|
12293 |
+
"loss": 1.3326,
|
12294 |
+
"step": 87700
|
12295 |
+
},
|
12296 |
+
{
|
12297 |
+
"epoch": 2.619950437404831,
|
12298 |
+
"grad_norm": 5.269211292266846,
|
12299 |
+
"learning_rate": 0.00017493014245795692,
|
12300 |
+
"loss": 1.3253,
|
12301 |
+
"step": 87750
|
12302 |
+
},
|
12303 |
+
{
|
12304 |
+
"epoch": 2.6214432866569135,
|
12305 |
+
"grad_norm": 3.6939330101013184,
|
12306 |
+
"learning_rate": 0.00017491585666203806,
|
12307 |
+
"loss": 1.318,
|
12308 |
+
"step": 87800
|
12309 |
+
},
|
12310 |
+
{
|
12311 |
+
"epoch": 2.6229361359089958,
|
12312 |
+
"grad_norm": 5.028196334838867,
|
12313 |
+
"learning_rate": 0.00017490157086611925,
|
12314 |
+
"loss": 1.3089,
|
12315 |
+
"step": 87850
|
12316 |
+
},
|
12317 |
+
{
|
12318 |
+
"epoch": 2.6244289851610785,
|
12319 |
+
"grad_norm": 5.179739475250244,
|
12320 |
+
"learning_rate": 0.0001748872850702004,
|
12321 |
+
"loss": 1.2471,
|
12322 |
+
"step": 87900
|
12323 |
+
},
|
12324 |
+
{
|
12325 |
+
"epoch": 2.6259218344131607,
|
12326 |
+
"grad_norm": 4.581385135650635,
|
12327 |
+
"learning_rate": 0.00017487299927428158,
|
12328 |
+
"loss": 1.2703,
|
12329 |
+
"step": 87950
|
12330 |
+
},
|
12331 |
+
{
|
12332 |
+
"epoch": 2.6274146836652434,
|
12333 |
+
"grad_norm": 4.185626983642578,
|
12334 |
+
"learning_rate": 0.00017485871347836275,
|
12335 |
+
"loss": 1.2967,
|
12336 |
+
"step": 88000
|
12337 |
+
},
|
12338 |
+
{
|
12339 |
+
"epoch": 2.628907532917326,
|
12340 |
+
"grad_norm": 4.166527271270752,
|
12341 |
+
"learning_rate": 0.0001748444276824439,
|
12342 |
+
"loss": 1.282,
|
12343 |
+
"step": 88050
|
12344 |
+
},
|
12345 |
+
{
|
12346 |
+
"epoch": 2.6304003821694084,
|
12347 |
+
"grad_norm": 5.894084930419922,
|
12348 |
+
"learning_rate": 0.00017483014188652508,
|
12349 |
+
"loss": 1.3337,
|
12350 |
+
"step": 88100
|
12351 |
+
},
|
12352 |
+
{
|
12353 |
+
"epoch": 2.631893231421491,
|
12354 |
+
"grad_norm": 5.775843620300293,
|
12355 |
+
"learning_rate": 0.00017481585609060624,
|
12356 |
+
"loss": 1.3186,
|
12357 |
+
"step": 88150
|
12358 |
+
},
|
12359 |
+
{
|
12360 |
+
"epoch": 2.6333860806735734,
|
12361 |
+
"grad_norm": 4.601424217224121,
|
12362 |
+
"learning_rate": 0.0001748015702946874,
|
12363 |
+
"loss": 1.2961,
|
12364 |
+
"step": 88200
|
12365 |
+
},
|
12366 |
+
{
|
12367 |
+
"epoch": 2.634878929925656,
|
12368 |
+
"grad_norm": 4.345365524291992,
|
12369 |
+
"learning_rate": 0.00017478728449876857,
|
12370 |
+
"loss": 1.2776,
|
12371 |
+
"step": 88250
|
12372 |
+
},
|
12373 |
+
{
|
12374 |
+
"epoch": 2.636371779177739,
|
12375 |
+
"grad_norm": 4.751996040344238,
|
12376 |
+
"learning_rate": 0.00017477299870284973,
|
12377 |
+
"loss": 1.2847,
|
12378 |
+
"step": 88300
|
12379 |
+
},
|
12380 |
+
{
|
12381 |
+
"epoch": 2.637864628429821,
|
12382 |
+
"grad_norm": 4.37894344329834,
|
12383 |
+
"learning_rate": 0.0001747587129069309,
|
12384 |
+
"loss": 1.2631,
|
12385 |
+
"step": 88350
|
12386 |
+
},
|
12387 |
+
{
|
12388 |
+
"epoch": 2.6393574776819038,
|
12389 |
+
"grad_norm": 4.729623794555664,
|
12390 |
+
"learning_rate": 0.00017474442711101206,
|
12391 |
+
"loss": 1.3288,
|
12392 |
+
"step": 88400
|
12393 |
+
},
|
12394 |
+
{
|
12395 |
+
"epoch": 2.640850326933986,
|
12396 |
+
"grad_norm": 4.7951788902282715,
|
12397 |
+
"learning_rate": 0.00017473014131509326,
|
12398 |
+
"loss": 1.3207,
|
12399 |
+
"step": 88450
|
12400 |
+
},
|
12401 |
+
{
|
12402 |
+
"epoch": 2.6423431761860687,
|
12403 |
+
"grad_norm": 3.692270040512085,
|
12404 |
+
"learning_rate": 0.0001747158555191744,
|
12405 |
+
"loss": 1.3038,
|
12406 |
+
"step": 88500
|
12407 |
+
},
|
12408 |
+
{
|
12409 |
+
"epoch": 2.6438360254381514,
|
12410 |
+
"grad_norm": 4.1009297370910645,
|
12411 |
+
"learning_rate": 0.00017470156972325559,
|
12412 |
+
"loss": 1.2874,
|
12413 |
+
"step": 88550
|
12414 |
+
},
|
12415 |
+
{
|
12416 |
+
"epoch": 2.6453288746902337,
|
12417 |
+
"grad_norm": 4.919840335845947,
|
12418 |
+
"learning_rate": 0.00017468728392733672,
|
12419 |
+
"loss": 1.294,
|
12420 |
+
"step": 88600
|
12421 |
+
},
|
12422 |
+
{
|
12423 |
+
"epoch": 2.6468217239423164,
|
12424 |
+
"grad_norm": 3.7501938343048096,
|
12425 |
+
"learning_rate": 0.00017467299813141791,
|
12426 |
+
"loss": 1.3196,
|
12427 |
+
"step": 88650
|
12428 |
+
},
|
12429 |
+
{
|
12430 |
+
"epoch": 2.6483145731943987,
|
12431 |
+
"grad_norm": 4.218129634857178,
|
12432 |
+
"learning_rate": 0.00017465871233549905,
|
12433 |
+
"loss": 1.2823,
|
12434 |
+
"step": 88700
|
12435 |
+
},
|
12436 |
+
{
|
12437 |
+
"epoch": 2.6498074224464814,
|
12438 |
+
"grad_norm": 5.627344131469727,
|
12439 |
+
"learning_rate": 0.00017464442653958024,
|
12440 |
+
"loss": 1.2796,
|
12441 |
+
"step": 88750
|
12442 |
+
},
|
12443 |
+
{
|
12444 |
+
"epoch": 2.651300271698564,
|
12445 |
+
"grad_norm": 4.543562412261963,
|
12446 |
+
"learning_rate": 0.0001746301407436614,
|
12447 |
+
"loss": 1.2605,
|
12448 |
+
"step": 88800
|
12449 |
+
},
|
12450 |
+
{
|
12451 |
+
"epoch": 2.6527931209506463,
|
12452 |
+
"grad_norm": 4.705561637878418,
|
12453 |
+
"learning_rate": 0.00017461585494774257,
|
12454 |
+
"loss": 1.2822,
|
12455 |
+
"step": 88850
|
12456 |
+
},
|
12457 |
+
{
|
12458 |
+
"epoch": 2.654285970202729,
|
12459 |
+
"grad_norm": 4.356922149658203,
|
12460 |
+
"learning_rate": 0.00017460156915182374,
|
12461 |
+
"loss": 1.3247,
|
12462 |
+
"step": 88900
|
12463 |
+
},
|
12464 |
+
{
|
12465 |
+
"epoch": 2.6557788194548113,
|
12466 |
+
"grad_norm": 4.276957988739014,
|
12467 |
+
"learning_rate": 0.0001745872833559049,
|
12468 |
+
"loss": 1.3103,
|
12469 |
+
"step": 88950
|
12470 |
+
},
|
12471 |
+
{
|
12472 |
+
"epoch": 2.657271668706894,
|
12473 |
+
"grad_norm": 4.122916221618652,
|
12474 |
+
"learning_rate": 0.00017457299755998607,
|
12475 |
+
"loss": 1.302,
|
12476 |
+
"step": 89000
|
12477 |
+
},
|
12478 |
+
{
|
12479 |
+
"epoch": 2.6587645179589767,
|
12480 |
+
"grad_norm": 4.473240852355957,
|
12481 |
+
"learning_rate": 0.00017455871176406723,
|
12482 |
+
"loss": 1.274,
|
12483 |
+
"step": 89050
|
12484 |
+
},
|
12485 |
+
{
|
12486 |
+
"epoch": 2.660257367211059,
|
12487 |
+
"grad_norm": 5.299698829650879,
|
12488 |
+
"learning_rate": 0.0001745444259681484,
|
12489 |
+
"loss": 1.2879,
|
12490 |
+
"step": 89100
|
12491 |
+
},
|
12492 |
+
{
|
12493 |
+
"epoch": 2.6617502164631417,
|
12494 |
+
"grad_norm": 5.3960347175598145,
|
12495 |
+
"learning_rate": 0.00017453014017222956,
|
12496 |
+
"loss": 1.3083,
|
12497 |
+
"step": 89150
|
12498 |
+
},
|
12499 |
+
{
|
12500 |
+
"epoch": 2.663243065715224,
|
12501 |
+
"grad_norm": 9.475055694580078,
|
12502 |
+
"learning_rate": 0.00017451585437631073,
|
12503 |
+
"loss": 1.3123,
|
12504 |
+
"step": 89200
|
12505 |
+
},
|
12506 |
+
{
|
12507 |
+
"epoch": 2.6647359149673067,
|
12508 |
+
"grad_norm": 4.01664924621582,
|
12509 |
+
"learning_rate": 0.00017450156858039192,
|
12510 |
+
"loss": 1.2673,
|
12511 |
+
"step": 89250
|
12512 |
+
},
|
12513 |
+
{
|
12514 |
+
"epoch": 2.6662287642193894,
|
12515 |
+
"grad_norm": 4.74378776550293,
|
12516 |
+
"learning_rate": 0.00017448728278447306,
|
12517 |
+
"loss": 1.2704,
|
12518 |
+
"step": 89300
|
12519 |
+
},
|
12520 |
+
{
|
12521 |
+
"epoch": 2.6677216134714716,
|
12522 |
+
"grad_norm": 3.8527019023895264,
|
12523 |
+
"learning_rate": 0.00017447299698855425,
|
12524 |
+
"loss": 1.2638,
|
12525 |
+
"step": 89350
|
12526 |
+
},
|
12527 |
+
{
|
12528 |
+
"epoch": 2.6692144627235543,
|
12529 |
+
"grad_norm": 5.14741849899292,
|
12530 |
+
"learning_rate": 0.00017445871119263538,
|
12531 |
+
"loss": 1.2843,
|
12532 |
+
"step": 89400
|
12533 |
+
},
|
12534 |
+
{
|
12535 |
+
"epoch": 2.6707073119756366,
|
12536 |
+
"grad_norm": 4.814597129821777,
|
12537 |
+
"learning_rate": 0.00017444442539671658,
|
12538 |
+
"loss": 1.2955,
|
12539 |
+
"step": 89450
|
12540 |
+
},
|
12541 |
+
{
|
12542 |
+
"epoch": 2.6722001612277193,
|
12543 |
+
"grad_norm": 3.8238747119903564,
|
12544 |
+
"learning_rate": 0.00017443013960079771,
|
12545 |
+
"loss": 1.2921,
|
12546 |
+
"step": 89500
|
12547 |
+
},
|
12548 |
+
{
|
12549 |
+
"epoch": 2.673693010479802,
|
12550 |
+
"grad_norm": 5.118923187255859,
|
12551 |
+
"learning_rate": 0.0001744158538048789,
|
12552 |
+
"loss": 1.3114,
|
12553 |
+
"step": 89550
|
12554 |
+
},
|
12555 |
+
{
|
12556 |
+
"epoch": 2.6751858597318843,
|
12557 |
+
"grad_norm": 4.3184380531311035,
|
12558 |
+
"learning_rate": 0.00017440156800896007,
|
12559 |
+
"loss": 1.2387,
|
12560 |
+
"step": 89600
|
12561 |
+
},
|
12562 |
+
{
|
12563 |
+
"epoch": 2.6766787089839665,
|
12564 |
+
"grad_norm": 4.6351728439331055,
|
12565 |
+
"learning_rate": 0.00017438728221304123,
|
12566 |
+
"loss": 1.2524,
|
12567 |
+
"step": 89650
|
12568 |
+
},
|
12569 |
+
{
|
12570 |
+
"epoch": 2.6781715582360492,
|
12571 |
+
"grad_norm": 4.391300678253174,
|
12572 |
+
"learning_rate": 0.0001743729964171224,
|
12573 |
+
"loss": 1.2591,
|
12574 |
+
"step": 89700
|
12575 |
+
},
|
12576 |
+
{
|
12577 |
+
"epoch": 2.679664407488132,
|
12578 |
+
"grad_norm": 3.749375104904175,
|
12579 |
+
"learning_rate": 0.00017435871062120354,
|
12580 |
+
"loss": 1.247,
|
12581 |
+
"step": 89750
|
12582 |
+
},
|
12583 |
+
{
|
12584 |
+
"epoch": 2.681157256740214,
|
12585 |
+
"grad_norm": 4.260756492614746,
|
12586 |
+
"learning_rate": 0.00017434442482528473,
|
12587 |
+
"loss": 1.2672,
|
12588 |
+
"step": 89800
|
12589 |
+
},
|
12590 |
+
{
|
12591 |
+
"epoch": 2.682650105992297,
|
12592 |
+
"grad_norm": 3.6358819007873535,
|
12593 |
+
"learning_rate": 0.00017433013902936587,
|
12594 |
+
"loss": 1.2941,
|
12595 |
+
"step": 89850
|
12596 |
+
},
|
12597 |
+
{
|
12598 |
+
"epoch": 2.684142955244379,
|
12599 |
+
"grad_norm": 4.653028964996338,
|
12600 |
+
"learning_rate": 0.00017431585323344706,
|
12601 |
+
"loss": 1.3237,
|
12602 |
+
"step": 89900
|
12603 |
+
},
|
12604 |
+
{
|
12605 |
+
"epoch": 2.685635804496462,
|
12606 |
+
"grad_norm": 4.692080497741699,
|
12607 |
+
"learning_rate": 0.00017430156743752822,
|
12608 |
+
"loss": 1.325,
|
12609 |
+
"step": 89950
|
12610 |
+
},
|
12611 |
+
{
|
12612 |
+
"epoch": 2.6871286537485446,
|
12613 |
+
"grad_norm": 4.317993640899658,
|
12614 |
+
"learning_rate": 0.0001742872816416094,
|
12615 |
+
"loss": 1.2491,
|
12616 |
+
"step": 90000
|
12617 |
}
|
12618 |
],
|
12619 |
"logging_steps": 50,
|
|
|
12633 |
"attributes": {}
|
12634 |
}
|
12635 |
},
|
12636 |
+
"total_flos": 2.274296357100454e+18,
|
12637 |
"train_batch_size": 2,
|
12638 |
"trial_name": null,
|
12639 |
"trial_params": null
|