Training in progress, step 882, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 83945296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd93781227ba36fdd5e8816122324ee6cd5308ad42b3e7c9e3bc8723f76a5e67
|
3 |
size 83945296
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 43123028
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae5d50a02839400dbbba8933a8b059930d8122a9cdbc6c69822b80fb8bd6a74f
|
3 |
size 43123028
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38bb23a048d322410627de94481126a56e9646b992b5e06fe0e729694f1e6988
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:756a4f06b9404bfe6fe163858e84e0338681bfc283853beea8ae022e9807c28a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 294,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4139,6 +4139,2072 @@
|
|
4139 |
"eval_samples_per_second": 12.295,
|
4140 |
"eval_steps_per_second": 6.148,
|
4141 |
"step": 588
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4142 |
}
|
4143 |
],
|
4144 |
"logging_steps": 1,
|
@@ -4158,7 +6224,7 @@
|
|
4158 |
"attributes": {}
|
4159 |
}
|
4160 |
},
|
4161 |
-
"total_flos":
|
4162 |
"train_batch_size": 2,
|
4163 |
"trial_name": null,
|
4164 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7502392344497608,
|
5 |
"eval_steps": 294,
|
6 |
+
"global_step": 882,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4139 |
"eval_samples_per_second": 12.295,
|
4140 |
"eval_steps_per_second": 6.148,
|
4141 |
"step": 588
|
4142 |
+
},
|
4143 |
+
{
|
4144 |
+
"epoch": 0.501010101010101,
|
4145 |
+
"grad_norm": 0.44425809383392334,
|
4146 |
+
"learning_rate": 0.00010107771246568345,
|
4147 |
+
"loss": 2.5832,
|
4148 |
+
"step": 589
|
4149 |
+
},
|
4150 |
+
{
|
4151 |
+
"epoch": 0.5018607123870282,
|
4152 |
+
"grad_norm": 0.4519883990287781,
|
4153 |
+
"learning_rate": 0.00010080829119496586,
|
4154 |
+
"loss": 2.8142,
|
4155 |
+
"step": 590
|
4156 |
+
},
|
4157 |
+
{
|
4158 |
+
"epoch": 0.5027113237639553,
|
4159 |
+
"grad_norm": 0.4547143280506134,
|
4160 |
+
"learning_rate": 0.0001005388640565168,
|
4161 |
+
"loss": 2.4661,
|
4162 |
+
"step": 591
|
4163 |
+
},
|
4164 |
+
{
|
4165 |
+
"epoch": 0.5035619351408825,
|
4166 |
+
"grad_norm": 0.5073342323303223,
|
4167 |
+
"learning_rate": 0.00010026943300622313,
|
4168 |
+
"loss": 2.7849,
|
4169 |
+
"step": 592
|
4170 |
+
},
|
4171 |
+
{
|
4172 |
+
"epoch": 0.5044125465178096,
|
4173 |
+
"grad_norm": 0.5203079581260681,
|
4174 |
+
"learning_rate": 0.0001,
|
4175 |
+
"loss": 2.6077,
|
4176 |
+
"step": 593
|
4177 |
+
},
|
4178 |
+
{
|
4179 |
+
"epoch": 0.5052631578947369,
|
4180 |
+
"grad_norm": 0.4956808090209961,
|
4181 |
+
"learning_rate": 9.97305669937769e-05,
|
4182 |
+
"loss": 2.7116,
|
4183 |
+
"step": 594
|
4184 |
+
},
|
4185 |
+
{
|
4186 |
+
"epoch": 0.506113769271664,
|
4187 |
+
"grad_norm": 0.5807839035987854,
|
4188 |
+
"learning_rate": 9.946113594348321e-05,
|
4189 |
+
"loss": 2.8024,
|
4190 |
+
"step": 595
|
4191 |
+
},
|
4192 |
+
{
|
4193 |
+
"epoch": 0.5069643806485912,
|
4194 |
+
"grad_norm": 0.5834396481513977,
|
4195 |
+
"learning_rate": 9.919170880503415e-05,
|
4196 |
+
"loss": 2.6428,
|
4197 |
+
"step": 596
|
4198 |
+
},
|
4199 |
+
{
|
4200 |
+
"epoch": 0.5078149920255184,
|
4201 |
+
"grad_norm": 0.5668088793754578,
|
4202 |
+
"learning_rate": 9.892228753431657e-05,
|
4203 |
+
"loss": 2.6711,
|
4204 |
+
"step": 597
|
4205 |
+
},
|
4206 |
+
{
|
4207 |
+
"epoch": 0.5086656034024455,
|
4208 |
+
"grad_norm": 0.6034618616104126,
|
4209 |
+
"learning_rate": 9.865287408717465e-05,
|
4210 |
+
"loss": 2.9129,
|
4211 |
+
"step": 598
|
4212 |
+
},
|
4213 |
+
{
|
4214 |
+
"epoch": 0.5095162147793727,
|
4215 |
+
"grad_norm": 0.5953810811042786,
|
4216 |
+
"learning_rate": 9.838347041939584e-05,
|
4217 |
+
"loss": 2.7463,
|
4218 |
+
"step": 599
|
4219 |
+
},
|
4220 |
+
{
|
4221 |
+
"epoch": 0.5103668261562998,
|
4222 |
+
"grad_norm": 0.6790388822555542,
|
4223 |
+
"learning_rate": 9.811407848669657e-05,
|
4224 |
+
"loss": 2.8535,
|
4225 |
+
"step": 600
|
4226 |
+
},
|
4227 |
+
{
|
4228 |
+
"epoch": 0.511217437533227,
|
4229 |
+
"grad_norm": 0.2997681200504303,
|
4230 |
+
"learning_rate": 9.784470024470812e-05,
|
4231 |
+
"loss": 2.4338,
|
4232 |
+
"step": 601
|
4233 |
+
},
|
4234 |
+
{
|
4235 |
+
"epoch": 0.5120680489101542,
|
4236 |
+
"grad_norm": 0.28502747416496277,
|
4237 |
+
"learning_rate": 9.757533764896235e-05,
|
4238 |
+
"loss": 2.2975,
|
4239 |
+
"step": 602
|
4240 |
+
},
|
4241 |
+
{
|
4242 |
+
"epoch": 0.5129186602870813,
|
4243 |
+
"grad_norm": 0.29402679204940796,
|
4244 |
+
"learning_rate": 9.730599265487745e-05,
|
4245 |
+
"loss": 2.6287,
|
4246 |
+
"step": 603
|
4247 |
+
},
|
4248 |
+
{
|
4249 |
+
"epoch": 0.5137692716640085,
|
4250 |
+
"grad_norm": 0.28246256709098816,
|
4251 |
+
"learning_rate": 9.703666721774402e-05,
|
4252 |
+
"loss": 2.4197,
|
4253 |
+
"step": 604
|
4254 |
+
},
|
4255 |
+
{
|
4256 |
+
"epoch": 0.5146198830409356,
|
4257 |
+
"grad_norm": 0.29511624574661255,
|
4258 |
+
"learning_rate": 9.676736329271059e-05,
|
4259 |
+
"loss": 2.6028,
|
4260 |
+
"step": 605
|
4261 |
+
},
|
4262 |
+
{
|
4263 |
+
"epoch": 0.5154704944178629,
|
4264 |
+
"grad_norm": 0.2746260166168213,
|
4265 |
+
"learning_rate": 9.649808283476941e-05,
|
4266 |
+
"loss": 2.5791,
|
4267 |
+
"step": 606
|
4268 |
+
},
|
4269 |
+
{
|
4270 |
+
"epoch": 0.51632110579479,
|
4271 |
+
"grad_norm": 0.2634756863117218,
|
4272 |
+
"learning_rate": 9.622882779874263e-05,
|
4273 |
+
"loss": 2.4199,
|
4274 |
+
"step": 607
|
4275 |
+
},
|
4276 |
+
{
|
4277 |
+
"epoch": 0.5171717171717172,
|
4278 |
+
"grad_norm": 0.2797803282737732,
|
4279 |
+
"learning_rate": 9.595960013926761e-05,
|
4280 |
+
"loss": 2.5637,
|
4281 |
+
"step": 608
|
4282 |
+
},
|
4283 |
+
{
|
4284 |
+
"epoch": 0.5180223285486444,
|
4285 |
+
"grad_norm": 0.2671017646789551,
|
4286 |
+
"learning_rate": 9.569040181078306e-05,
|
4287 |
+
"loss": 2.6811,
|
4288 |
+
"step": 609
|
4289 |
+
},
|
4290 |
+
{
|
4291 |
+
"epoch": 0.5188729399255715,
|
4292 |
+
"grad_norm": 0.26546764373779297,
|
4293 |
+
"learning_rate": 9.542123476751483e-05,
|
4294 |
+
"loss": 2.5613,
|
4295 |
+
"step": 610
|
4296 |
+
},
|
4297 |
+
{
|
4298 |
+
"epoch": 0.5197235513024987,
|
4299 |
+
"grad_norm": 0.2660638988018036,
|
4300 |
+
"learning_rate": 9.515210096346155e-05,
|
4301 |
+
"loss": 2.4644,
|
4302 |
+
"step": 611
|
4303 |
+
},
|
4304 |
+
{
|
4305 |
+
"epoch": 0.5205741626794258,
|
4306 |
+
"grad_norm": 0.2697810232639313,
|
4307 |
+
"learning_rate": 9.488300235238067e-05,
|
4308 |
+
"loss": 2.5643,
|
4309 |
+
"step": 612
|
4310 |
+
},
|
4311 |
+
{
|
4312 |
+
"epoch": 0.521424774056353,
|
4313 |
+
"grad_norm": 0.26712700724601746,
|
4314 |
+
"learning_rate": 9.461394088777402e-05,
|
4315 |
+
"loss": 2.6993,
|
4316 |
+
"step": 613
|
4317 |
+
},
|
4318 |
+
{
|
4319 |
+
"epoch": 0.5222753854332801,
|
4320 |
+
"grad_norm": 0.2802102565765381,
|
4321 |
+
"learning_rate": 9.434491852287385e-05,
|
4322 |
+
"loss": 2.5723,
|
4323 |
+
"step": 614
|
4324 |
+
},
|
4325 |
+
{
|
4326 |
+
"epoch": 0.5231259968102073,
|
4327 |
+
"grad_norm": 0.2690255641937256,
|
4328 |
+
"learning_rate": 9.407593721062859e-05,
|
4329 |
+
"loss": 2.5136,
|
4330 |
+
"step": 615
|
4331 |
+
},
|
4332 |
+
{
|
4333 |
+
"epoch": 0.5239766081871345,
|
4334 |
+
"grad_norm": 0.2789754271507263,
|
4335 |
+
"learning_rate": 9.38069989036886e-05,
|
4336 |
+
"loss": 2.5901,
|
4337 |
+
"step": 616
|
4338 |
+
},
|
4339 |
+
{
|
4340 |
+
"epoch": 0.5248272195640616,
|
4341 |
+
"grad_norm": 0.2947288751602173,
|
4342 |
+
"learning_rate": 9.353810555439203e-05,
|
4343 |
+
"loss": 2.5661,
|
4344 |
+
"step": 617
|
4345 |
+
},
|
4346 |
+
{
|
4347 |
+
"epoch": 0.5256778309409889,
|
4348 |
+
"grad_norm": 0.268081396818161,
|
4349 |
+
"learning_rate": 9.326925911475075e-05,
|
4350 |
+
"loss": 2.603,
|
4351 |
+
"step": 618
|
4352 |
+
},
|
4353 |
+
{
|
4354 |
+
"epoch": 0.526528442317916,
|
4355 |
+
"grad_norm": 0.2749037444591522,
|
4356 |
+
"learning_rate": 9.300046153643602e-05,
|
4357 |
+
"loss": 2.7176,
|
4358 |
+
"step": 619
|
4359 |
+
},
|
4360 |
+
{
|
4361 |
+
"epoch": 0.5273790536948432,
|
4362 |
+
"grad_norm": 0.267333984375,
|
4363 |
+
"learning_rate": 9.27317147707644e-05,
|
4364 |
+
"loss": 2.5775,
|
4365 |
+
"step": 620
|
4366 |
+
},
|
4367 |
+
{
|
4368 |
+
"epoch": 0.5282296650717704,
|
4369 |
+
"grad_norm": 0.2688322961330414,
|
4370 |
+
"learning_rate": 9.246302076868363e-05,
|
4371 |
+
"loss": 2.4796,
|
4372 |
+
"step": 621
|
4373 |
+
},
|
4374 |
+
{
|
4375 |
+
"epoch": 0.5290802764486975,
|
4376 |
+
"grad_norm": 0.2832041382789612,
|
4377 |
+
"learning_rate": 9.219438148075832e-05,
|
4378 |
+
"loss": 2.5764,
|
4379 |
+
"step": 622
|
4380 |
+
},
|
4381 |
+
{
|
4382 |
+
"epoch": 0.5299308878256247,
|
4383 |
+
"grad_norm": 0.3032709062099457,
|
4384 |
+
"learning_rate": 9.192579885715602e-05,
|
4385 |
+
"loss": 2.7559,
|
4386 |
+
"step": 623
|
4387 |
+
},
|
4388 |
+
{
|
4389 |
+
"epoch": 0.5307814992025518,
|
4390 |
+
"grad_norm": 0.3065055012702942,
|
4391 |
+
"learning_rate": 9.165727484763282e-05,
|
4392 |
+
"loss": 2.7058,
|
4393 |
+
"step": 624
|
4394 |
+
},
|
4395 |
+
{
|
4396 |
+
"epoch": 0.531632110579479,
|
4397 |
+
"grad_norm": 0.314698189496994,
|
4398 |
+
"learning_rate": 9.138881140151931e-05,
|
4399 |
+
"loss": 2.6227,
|
4400 |
+
"step": 625
|
4401 |
+
},
|
4402 |
+
{
|
4403 |
+
"epoch": 0.5324827219564061,
|
4404 |
+
"grad_norm": 0.31485408544540405,
|
4405 |
+
"learning_rate": 9.112041046770653e-05,
|
4406 |
+
"loss": 2.5687,
|
4407 |
+
"step": 626
|
4408 |
+
},
|
4409 |
+
{
|
4410 |
+
"epoch": 0.5333333333333333,
|
4411 |
+
"grad_norm": 0.3281068801879883,
|
4412 |
+
"learning_rate": 9.085207399463162e-05,
|
4413 |
+
"loss": 2.5957,
|
4414 |
+
"step": 627
|
4415 |
+
},
|
4416 |
+
{
|
4417 |
+
"epoch": 0.5341839447102605,
|
4418 |
+
"grad_norm": 0.3461722731590271,
|
4419 |
+
"learning_rate": 9.058380393026369e-05,
|
4420 |
+
"loss": 2.8056,
|
4421 |
+
"step": 628
|
4422 |
+
},
|
4423 |
+
{
|
4424 |
+
"epoch": 0.5350345560871876,
|
4425 |
+
"grad_norm": 0.3617733418941498,
|
4426 |
+
"learning_rate": 9.031560222209002e-05,
|
4427 |
+
"loss": 2.8171,
|
4428 |
+
"step": 629
|
4429 |
+
},
|
4430 |
+
{
|
4431 |
+
"epoch": 0.5358851674641149,
|
4432 |
+
"grad_norm": 0.34867244958877563,
|
4433 |
+
"learning_rate": 9.00474708171014e-05,
|
4434 |
+
"loss": 2.6481,
|
4435 |
+
"step": 630
|
4436 |
+
},
|
4437 |
+
{
|
4438 |
+
"epoch": 0.536735778841042,
|
4439 |
+
"grad_norm": 0.3704431653022766,
|
4440 |
+
"learning_rate": 8.977941166177845e-05,
|
4441 |
+
"loss": 2.7771,
|
4442 |
+
"step": 631
|
4443 |
+
},
|
4444 |
+
{
|
4445 |
+
"epoch": 0.5375863902179692,
|
4446 |
+
"grad_norm": 0.3661860525608063,
|
4447 |
+
"learning_rate": 8.951142670207717e-05,
|
4448 |
+
"loss": 2.5948,
|
4449 |
+
"step": 632
|
4450 |
+
},
|
4451 |
+
{
|
4452 |
+
"epoch": 0.5384370015948964,
|
4453 |
+
"grad_norm": 0.3825220763683319,
|
4454 |
+
"learning_rate": 8.924351788341496e-05,
|
4455 |
+
"loss": 2.5843,
|
4456 |
+
"step": 633
|
4457 |
+
},
|
4458 |
+
{
|
4459 |
+
"epoch": 0.5392876129718235,
|
4460 |
+
"grad_norm": 0.39650052785873413,
|
4461 |
+
"learning_rate": 8.897568715065657e-05,
|
4462 |
+
"loss": 2.7541,
|
4463 |
+
"step": 634
|
4464 |
+
},
|
4465 |
+
{
|
4466 |
+
"epoch": 0.5401382243487507,
|
4467 |
+
"grad_norm": 0.4185117483139038,
|
4468 |
+
"learning_rate": 8.87079364480998e-05,
|
4469 |
+
"loss": 2.791,
|
4470 |
+
"step": 635
|
4471 |
+
},
|
4472 |
+
{
|
4473 |
+
"epoch": 0.5409888357256778,
|
4474 |
+
"grad_norm": 0.41095203161239624,
|
4475 |
+
"learning_rate": 8.844026771946147e-05,
|
4476 |
+
"loss": 2.5707,
|
4477 |
+
"step": 636
|
4478 |
+
},
|
4479 |
+
{
|
4480 |
+
"epoch": 0.541839447102605,
|
4481 |
+
"grad_norm": 0.4548766016960144,
|
4482 |
+
"learning_rate": 8.817268290786343e-05,
|
4483 |
+
"loss": 2.8622,
|
4484 |
+
"step": 637
|
4485 |
+
},
|
4486 |
+
{
|
4487 |
+
"epoch": 0.5426900584795321,
|
4488 |
+
"grad_norm": 0.4231926202774048,
|
4489 |
+
"learning_rate": 8.790518395581822e-05,
|
4490 |
+
"loss": 2.5893,
|
4491 |
+
"step": 638
|
4492 |
+
},
|
4493 |
+
{
|
4494 |
+
"epoch": 0.5435406698564593,
|
4495 |
+
"grad_norm": 0.441785603761673,
|
4496 |
+
"learning_rate": 8.763777280521511e-05,
|
4497 |
+
"loss": 2.7675,
|
4498 |
+
"step": 639
|
4499 |
+
},
|
4500 |
+
{
|
4501 |
+
"epoch": 0.5443912812333865,
|
4502 |
+
"grad_norm": 0.4583280682563782,
|
4503 |
+
"learning_rate": 8.737045139730605e-05,
|
4504 |
+
"loss": 2.7461,
|
4505 |
+
"step": 640
|
4506 |
+
},
|
4507 |
+
{
|
4508 |
+
"epoch": 0.5452418926103136,
|
4509 |
+
"grad_norm": 0.5172987580299377,
|
4510 |
+
"learning_rate": 8.71032216726914e-05,
|
4511 |
+
"loss": 2.9216,
|
4512 |
+
"step": 641
|
4513 |
+
},
|
4514 |
+
{
|
4515 |
+
"epoch": 0.5460925039872409,
|
4516 |
+
"grad_norm": 0.49341312050819397,
|
4517 |
+
"learning_rate": 8.683608557130608e-05,
|
4518 |
+
"loss": 2.803,
|
4519 |
+
"step": 642
|
4520 |
+
},
|
4521 |
+
{
|
4522 |
+
"epoch": 0.546943115364168,
|
4523 |
+
"grad_norm": 0.5035589933395386,
|
4524 |
+
"learning_rate": 8.656904503240527e-05,
|
4525 |
+
"loss": 2.869,
|
4526 |
+
"step": 643
|
4527 |
+
},
|
4528 |
+
{
|
4529 |
+
"epoch": 0.5477937267410952,
|
4530 |
+
"grad_norm": 0.5143235921859741,
|
4531 |
+
"learning_rate": 8.630210199455041e-05,
|
4532 |
+
"loss": 2.7889,
|
4533 |
+
"step": 644
|
4534 |
+
},
|
4535 |
+
{
|
4536 |
+
"epoch": 0.5486443381180224,
|
4537 |
+
"grad_norm": 0.5110295414924622,
|
4538 |
+
"learning_rate": 8.603525839559523e-05,
|
4539 |
+
"loss": 2.8229,
|
4540 |
+
"step": 645
|
4541 |
+
},
|
4542 |
+
{
|
4543 |
+
"epoch": 0.5494949494949495,
|
4544 |
+
"grad_norm": 0.555565595626831,
|
4545 |
+
"learning_rate": 8.57685161726715e-05,
|
4546 |
+
"loss": 2.6461,
|
4547 |
+
"step": 646
|
4548 |
+
},
|
4549 |
+
{
|
4550 |
+
"epoch": 0.5503455608718767,
|
4551 |
+
"grad_norm": 0.56583172082901,
|
4552 |
+
"learning_rate": 8.550187726217507e-05,
|
4553 |
+
"loss": 2.7647,
|
4554 |
+
"step": 647
|
4555 |
+
},
|
4556 |
+
{
|
4557 |
+
"epoch": 0.5511961722488038,
|
4558 |
+
"grad_norm": 0.6618173718452454,
|
4559 |
+
"learning_rate": 8.523534359975189e-05,
|
4560 |
+
"loss": 2.6992,
|
4561 |
+
"step": 648
|
4562 |
+
},
|
4563 |
+
{
|
4564 |
+
"epoch": 0.552046783625731,
|
4565 |
+
"grad_norm": 0.6295120120048523,
|
4566 |
+
"learning_rate": 8.496891712028375e-05,
|
4567 |
+
"loss": 2.6303,
|
4568 |
+
"step": 649
|
4569 |
+
},
|
4570 |
+
{
|
4571 |
+
"epoch": 0.5528973950026581,
|
4572 |
+
"grad_norm": 0.6757770776748657,
|
4573 |
+
"learning_rate": 8.470259975787438e-05,
|
4574 |
+
"loss": 2.8003,
|
4575 |
+
"step": 650
|
4576 |
+
},
|
4577 |
+
{
|
4578 |
+
"epoch": 0.5537480063795853,
|
4579 |
+
"grad_norm": 0.26357659697532654,
|
4580 |
+
"learning_rate": 8.443639344583547e-05,
|
4581 |
+
"loss": 2.4452,
|
4582 |
+
"step": 651
|
4583 |
+
},
|
4584 |
+
{
|
4585 |
+
"epoch": 0.5545986177565125,
|
4586 |
+
"grad_norm": 0.31750619411468506,
|
4587 |
+
"learning_rate": 8.417030011667241e-05,
|
4588 |
+
"loss": 2.374,
|
4589 |
+
"step": 652
|
4590 |
+
},
|
4591 |
+
{
|
4592 |
+
"epoch": 0.5554492291334396,
|
4593 |
+
"grad_norm": 0.27070245146751404,
|
4594 |
+
"learning_rate": 8.390432170207057e-05,
|
4595 |
+
"loss": 2.3384,
|
4596 |
+
"step": 653
|
4597 |
+
},
|
4598 |
+
{
|
4599 |
+
"epoch": 0.5562998405103668,
|
4600 |
+
"grad_norm": 0.28004199266433716,
|
4601 |
+
"learning_rate": 8.363846013288095e-05,
|
4602 |
+
"loss": 2.4357,
|
4603 |
+
"step": 654
|
4604 |
+
},
|
4605 |
+
{
|
4606 |
+
"epoch": 0.557150451887294,
|
4607 |
+
"grad_norm": 0.323215126991272,
|
4608 |
+
"learning_rate": 8.337271733910637e-05,
|
4609 |
+
"loss": 2.3801,
|
4610 |
+
"step": 655
|
4611 |
+
},
|
4612 |
+
{
|
4613 |
+
"epoch": 0.5580010632642212,
|
4614 |
+
"grad_norm": 0.292837530374527,
|
4615 |
+
"learning_rate": 8.310709524988743e-05,
|
4616 |
+
"loss": 2.3924,
|
4617 |
+
"step": 656
|
4618 |
+
},
|
4619 |
+
{
|
4620 |
+
"epoch": 0.5588516746411484,
|
4621 |
+
"grad_norm": 0.2810145914554596,
|
4622 |
+
"learning_rate": 8.284159579348851e-05,
|
4623 |
+
"loss": 2.5257,
|
4624 |
+
"step": 657
|
4625 |
+
},
|
4626 |
+
{
|
4627 |
+
"epoch": 0.5597022860180755,
|
4628 |
+
"grad_norm": 0.2848433554172516,
|
4629 |
+
"learning_rate": 8.257622089728362e-05,
|
4630 |
+
"loss": 2.5743,
|
4631 |
+
"step": 658
|
4632 |
+
},
|
4633 |
+
{
|
4634 |
+
"epoch": 0.5605528973950027,
|
4635 |
+
"grad_norm": 0.28531527519226074,
|
4636 |
+
"learning_rate": 8.231097248774274e-05,
|
4637 |
+
"loss": 2.5492,
|
4638 |
+
"step": 659
|
4639 |
+
},
|
4640 |
+
{
|
4641 |
+
"epoch": 0.5614035087719298,
|
4642 |
+
"grad_norm": 0.2882923185825348,
|
4643 |
+
"learning_rate": 8.20458524904174e-05,
|
4644 |
+
"loss": 2.4982,
|
4645 |
+
"step": 660
|
4646 |
+
},
|
4647 |
+
{
|
4648 |
+
"epoch": 0.562254120148857,
|
4649 |
+
"grad_norm": 0.2807391583919525,
|
4650 |
+
"learning_rate": 8.178086282992705e-05,
|
4651 |
+
"loss": 2.5222,
|
4652 |
+
"step": 661
|
4653 |
+
},
|
4654 |
+
{
|
4655 |
+
"epoch": 0.5631047315257841,
|
4656 |
+
"grad_norm": 0.29856112599372864,
|
4657 |
+
"learning_rate": 8.151600542994506e-05,
|
4658 |
+
"loss": 2.723,
|
4659 |
+
"step": 662
|
4660 |
+
},
|
4661 |
+
{
|
4662 |
+
"epoch": 0.5639553429027113,
|
4663 |
+
"grad_norm": 0.2890356481075287,
|
4664 |
+
"learning_rate": 8.125128221318446e-05,
|
4665 |
+
"loss": 2.6931,
|
4666 |
+
"step": 663
|
4667 |
+
},
|
4668 |
+
{
|
4669 |
+
"epoch": 0.5648059542796385,
|
4670 |
+
"grad_norm": 0.28091031312942505,
|
4671 |
+
"learning_rate": 8.098669510138437e-05,
|
4672 |
+
"loss": 2.6273,
|
4673 |
+
"step": 664
|
4674 |
+
},
|
4675 |
+
{
|
4676 |
+
"epoch": 0.5656565656565656,
|
4677 |
+
"grad_norm": 0.2678775489330292,
|
4678 |
+
"learning_rate": 8.072224601529574e-05,
|
4679 |
+
"loss": 2.5571,
|
4680 |
+
"step": 665
|
4681 |
+
},
|
4682 |
+
{
|
4683 |
+
"epoch": 0.5665071770334928,
|
4684 |
+
"grad_norm": 0.2804068624973297,
|
4685 |
+
"learning_rate": 8.045793687466757e-05,
|
4686 |
+
"loss": 2.4443,
|
4687 |
+
"step": 666
|
4688 |
+
},
|
4689 |
+
{
|
4690 |
+
"epoch": 0.56735778841042,
|
4691 |
+
"grad_norm": 0.279041051864624,
|
4692 |
+
"learning_rate": 8.0193769598233e-05,
|
4693 |
+
"loss": 2.6947,
|
4694 |
+
"step": 667
|
4695 |
+
},
|
4696 |
+
{
|
4697 |
+
"epoch": 0.5682083997873472,
|
4698 |
+
"grad_norm": 0.27611520886421204,
|
4699 |
+
"learning_rate": 7.992974610369521e-05,
|
4700 |
+
"loss": 2.782,
|
4701 |
+
"step": 668
|
4702 |
+
},
|
4703 |
+
{
|
4704 |
+
"epoch": 0.5690590111642744,
|
4705 |
+
"grad_norm": 0.28445136547088623,
|
4706 |
+
"learning_rate": 7.966586830771367e-05,
|
4707 |
+
"loss": 2.5741,
|
4708 |
+
"step": 669
|
4709 |
+
},
|
4710 |
+
{
|
4711 |
+
"epoch": 0.5699096225412015,
|
4712 |
+
"grad_norm": 0.2607346773147583,
|
4713 |
+
"learning_rate": 7.940213812589018e-05,
|
4714 |
+
"loss": 2.4522,
|
4715 |
+
"step": 670
|
4716 |
+
},
|
4717 |
+
{
|
4718 |
+
"epoch": 0.5707602339181287,
|
4719 |
+
"grad_norm": 0.2881016433238983,
|
4720 |
+
"learning_rate": 7.913855747275489e-05,
|
4721 |
+
"loss": 2.6714,
|
4722 |
+
"step": 671
|
4723 |
+
},
|
4724 |
+
{
|
4725 |
+
"epoch": 0.5716108452950558,
|
4726 |
+
"grad_norm": 0.3019566237926483,
|
4727 |
+
"learning_rate": 7.887512826175248e-05,
|
4728 |
+
"loss": 2.7117,
|
4729 |
+
"step": 672
|
4730 |
+
},
|
4731 |
+
{
|
4732 |
+
"epoch": 0.572461456671983,
|
4733 |
+
"grad_norm": 0.3051791489124298,
|
4734 |
+
"learning_rate": 7.861185240522827e-05,
|
4735 |
+
"loss": 2.6867,
|
4736 |
+
"step": 673
|
4737 |
+
},
|
4738 |
+
{
|
4739 |
+
"epoch": 0.5733120680489101,
|
4740 |
+
"grad_norm": 0.32132992148399353,
|
4741 |
+
"learning_rate": 7.834873181441427e-05,
|
4742 |
+
"loss": 2.5278,
|
4743 |
+
"step": 674
|
4744 |
+
},
|
4745 |
+
{
|
4746 |
+
"epoch": 0.5741626794258373,
|
4747 |
+
"grad_norm": 0.32348886132240295,
|
4748 |
+
"learning_rate": 7.808576839941542e-05,
|
4749 |
+
"loss": 2.7507,
|
4750 |
+
"step": 675
|
4751 |
+
},
|
4752 |
+
{
|
4753 |
+
"epoch": 0.5750132908027645,
|
4754 |
+
"grad_norm": 0.32297685742378235,
|
4755 |
+
"learning_rate": 7.782296406919557e-05,
|
4756 |
+
"loss": 2.6857,
|
4757 |
+
"step": 676
|
4758 |
+
},
|
4759 |
+
{
|
4760 |
+
"epoch": 0.5758639021796916,
|
4761 |
+
"grad_norm": 0.34016191959381104,
|
4762 |
+
"learning_rate": 7.756032073156373e-05,
|
4763 |
+
"loss": 2.57,
|
4764 |
+
"step": 677
|
4765 |
+
},
|
4766 |
+
{
|
4767 |
+
"epoch": 0.5767145135566188,
|
4768 |
+
"grad_norm": 0.349483847618103,
|
4769 |
+
"learning_rate": 7.729784029316025e-05,
|
4770 |
+
"loss": 2.6255,
|
4771 |
+
"step": 678
|
4772 |
+
},
|
4773 |
+
{
|
4774 |
+
"epoch": 0.577565124933546,
|
4775 |
+
"grad_norm": 0.3523537218570709,
|
4776 |
+
"learning_rate": 7.703552465944287e-05,
|
4777 |
+
"loss": 2.8317,
|
4778 |
+
"step": 679
|
4779 |
+
},
|
4780 |
+
{
|
4781 |
+
"epoch": 0.5784157363104732,
|
4782 |
+
"grad_norm": 0.38119909167289734,
|
4783 |
+
"learning_rate": 7.677337573467294e-05,
|
4784 |
+
"loss": 2.7303,
|
4785 |
+
"step": 680
|
4786 |
+
},
|
4787 |
+
{
|
4788 |
+
"epoch": 0.5792663476874004,
|
4789 |
+
"grad_norm": 0.4053308367729187,
|
4790 |
+
"learning_rate": 7.651139542190164e-05,
|
4791 |
+
"loss": 2.6776,
|
4792 |
+
"step": 681
|
4793 |
+
},
|
4794 |
+
{
|
4795 |
+
"epoch": 0.5801169590643275,
|
4796 |
+
"grad_norm": 0.38038370013237,
|
4797 |
+
"learning_rate": 7.624958562295606e-05,
|
4798 |
+
"loss": 2.6411,
|
4799 |
+
"step": 682
|
4800 |
+
},
|
4801 |
+
{
|
4802 |
+
"epoch": 0.5809675704412547,
|
4803 |
+
"grad_norm": 0.414034903049469,
|
4804 |
+
"learning_rate": 7.598794823842557e-05,
|
4805 |
+
"loss": 2.8368,
|
4806 |
+
"step": 683
|
4807 |
+
},
|
4808 |
+
{
|
4809 |
+
"epoch": 0.5818181818181818,
|
4810 |
+
"grad_norm": 0.41362208127975464,
|
4811 |
+
"learning_rate": 7.572648516764777e-05,
|
4812 |
+
"loss": 2.7608,
|
4813 |
+
"step": 684
|
4814 |
+
},
|
4815 |
+
{
|
4816 |
+
"epoch": 0.582668793195109,
|
4817 |
+
"grad_norm": 0.41102275252342224,
|
4818 |
+
"learning_rate": 7.54651983086949e-05,
|
4819 |
+
"loss": 2.7317,
|
4820 |
+
"step": 685
|
4821 |
+
},
|
4822 |
+
{
|
4823 |
+
"epoch": 0.5835194045720361,
|
4824 |
+
"grad_norm": 0.45451274514198303,
|
4825 |
+
"learning_rate": 7.520408955836007e-05,
|
4826 |
+
"loss": 2.7164,
|
4827 |
+
"step": 686
|
4828 |
+
},
|
4829 |
+
{
|
4830 |
+
"epoch": 0.5843700159489633,
|
4831 |
+
"grad_norm": 0.42562657594680786,
|
4832 |
+
"learning_rate": 7.494316081214334e-05,
|
4833 |
+
"loss": 2.6641,
|
4834 |
+
"step": 687
|
4835 |
+
},
|
4836 |
+
{
|
4837 |
+
"epoch": 0.5852206273258905,
|
4838 |
+
"grad_norm": 0.46389469504356384,
|
4839 |
+
"learning_rate": 7.468241396423801e-05,
|
4840 |
+
"loss": 2.6883,
|
4841 |
+
"step": 688
|
4842 |
+
},
|
4843 |
+
{
|
4844 |
+
"epoch": 0.5860712387028176,
|
4845 |
+
"grad_norm": 0.42721521854400635,
|
4846 |
+
"learning_rate": 7.442185090751705e-05,
|
4847 |
+
"loss": 2.5853,
|
4848 |
+
"step": 689
|
4849 |
+
},
|
4850 |
+
{
|
4851 |
+
"epoch": 0.5869218500797448,
|
4852 |
+
"grad_norm": 0.5020928978919983,
|
4853 |
+
"learning_rate": 7.416147353351909e-05,
|
4854 |
+
"loss": 2.6468,
|
4855 |
+
"step": 690
|
4856 |
+
},
|
4857 |
+
{
|
4858 |
+
"epoch": 0.587772461456672,
|
4859 |
+
"grad_norm": 0.4806350767612457,
|
4860 |
+
"learning_rate": 7.390128373243479e-05,
|
4861 |
+
"loss": 2.5275,
|
4862 |
+
"step": 691
|
4863 |
+
},
|
4864 |
+
{
|
4865 |
+
"epoch": 0.5886230728335992,
|
4866 |
+
"grad_norm": 0.5264183282852173,
|
4867 |
+
"learning_rate": 7.364128339309326e-05,
|
4868 |
+
"loss": 2.6082,
|
4869 |
+
"step": 692
|
4870 |
+
},
|
4871 |
+
{
|
4872 |
+
"epoch": 0.5894736842105263,
|
4873 |
+
"grad_norm": 0.5251814126968384,
|
4874 |
+
"learning_rate": 7.338147440294809e-05,
|
4875 |
+
"loss": 2.6319,
|
4876 |
+
"step": 693
|
4877 |
+
},
|
4878 |
+
{
|
4879 |
+
"epoch": 0.5903242955874535,
|
4880 |
+
"grad_norm": 0.5503838658332825,
|
4881 |
+
"learning_rate": 7.312185864806391e-05,
|
4882 |
+
"loss": 2.6875,
|
4883 |
+
"step": 694
|
4884 |
+
},
|
4885 |
+
{
|
4886 |
+
"epoch": 0.5911749069643807,
|
4887 |
+
"grad_norm": 0.5717040300369263,
|
4888 |
+
"learning_rate": 7.286243801310248e-05,
|
4889 |
+
"loss": 2.6859,
|
4890 |
+
"step": 695
|
4891 |
+
},
|
4892 |
+
{
|
4893 |
+
"epoch": 0.5920255183413078,
|
4894 |
+
"grad_norm": 0.5679376125335693,
|
4895 |
+
"learning_rate": 7.260321438130913e-05,
|
4896 |
+
"loss": 2.6425,
|
4897 |
+
"step": 696
|
4898 |
+
},
|
4899 |
+
{
|
4900 |
+
"epoch": 0.592876129718235,
|
4901 |
+
"grad_norm": 0.6636802554130554,
|
4902 |
+
"learning_rate": 7.234418963449907e-05,
|
4903 |
+
"loss": 2.7617,
|
4904 |
+
"step": 697
|
4905 |
+
},
|
4906 |
+
{
|
4907 |
+
"epoch": 0.5937267410951621,
|
4908 |
+
"grad_norm": 0.6101743578910828,
|
4909 |
+
"learning_rate": 7.208536565304373e-05,
|
4910 |
+
"loss": 2.6909,
|
4911 |
+
"step": 698
|
4912 |
+
},
|
4913 |
+
{
|
4914 |
+
"epoch": 0.5945773524720893,
|
4915 |
+
"grad_norm": 0.6630806922912598,
|
4916 |
+
"learning_rate": 7.182674431585704e-05,
|
4917 |
+
"loss": 2.7461,
|
4918 |
+
"step": 699
|
4919 |
+
},
|
4920 |
+
{
|
4921 |
+
"epoch": 0.5954279638490165,
|
4922 |
+
"grad_norm": 0.738452136516571,
|
4923 |
+
"learning_rate": 7.156832750038192e-05,
|
4924 |
+
"loss": 2.8512,
|
4925 |
+
"step": 700
|
4926 |
+
},
|
4927 |
+
{
|
4928 |
+
"epoch": 0.5962785752259436,
|
4929 |
+
"grad_norm": 0.2764449715614319,
|
4930 |
+
"learning_rate": 7.131011708257654e-05,
|
4931 |
+
"loss": 2.4965,
|
4932 |
+
"step": 701
|
4933 |
+
},
|
4934 |
+
{
|
4935 |
+
"epoch": 0.5971291866028708,
|
4936 |
+
"grad_norm": 0.27585679292678833,
|
4937 |
+
"learning_rate": 7.105211493690073e-05,
|
4938 |
+
"loss": 2.5628,
|
4939 |
+
"step": 702
|
4940 |
+
},
|
4941 |
+
{
|
4942 |
+
"epoch": 0.597979797979798,
|
4943 |
+
"grad_norm": 0.2890099287033081,
|
4944 |
+
"learning_rate": 7.079432293630244e-05,
|
4945 |
+
"loss": 2.6482,
|
4946 |
+
"step": 703
|
4947 |
+
},
|
4948 |
+
{
|
4949 |
+
"epoch": 0.5988304093567252,
|
4950 |
+
"grad_norm": 0.27924832701683044,
|
4951 |
+
"learning_rate": 7.0536742952204e-05,
|
4952 |
+
"loss": 2.4101,
|
4953 |
+
"step": 704
|
4954 |
+
},
|
4955 |
+
{
|
4956 |
+
"epoch": 0.5996810207336523,
|
4957 |
+
"grad_norm": 0.30849024653434753,
|
4958 |
+
"learning_rate": 7.02793768544887e-05,
|
4959 |
+
"loss": 2.4998,
|
4960 |
+
"step": 705
|
4961 |
+
},
|
4962 |
+
{
|
4963 |
+
"epoch": 0.6005316321105795,
|
4964 |
+
"grad_norm": 0.286748468875885,
|
4965 |
+
"learning_rate": 7.002222651148714e-05,
|
4966 |
+
"loss": 2.3868,
|
4967 |
+
"step": 706
|
4968 |
+
},
|
4969 |
+
{
|
4970 |
+
"epoch": 0.6013822434875067,
|
4971 |
+
"grad_norm": 0.2908494472503662,
|
4972 |
+
"learning_rate": 6.976529378996357e-05,
|
4973 |
+
"loss": 2.4456,
|
4974 |
+
"step": 707
|
4975 |
+
},
|
4976 |
+
{
|
4977 |
+
"epoch": 0.6022328548644338,
|
4978 |
+
"grad_norm": 0.26911187171936035,
|
4979 |
+
"learning_rate": 6.950858055510254e-05,
|
4980 |
+
"loss": 2.633,
|
4981 |
+
"step": 708
|
4982 |
+
},
|
4983 |
+
{
|
4984 |
+
"epoch": 0.603083466241361,
|
4985 |
+
"grad_norm": 0.33310920000076294,
|
4986 |
+
"learning_rate": 6.925208867049522e-05,
|
4987 |
+
"loss": 2.8533,
|
4988 |
+
"step": 709
|
4989 |
+
},
|
4990 |
+
{
|
4991 |
+
"epoch": 0.6039340776182881,
|
4992 |
+
"grad_norm": 0.2783631682395935,
|
4993 |
+
"learning_rate": 6.89958199981258e-05,
|
4994 |
+
"loss": 2.4214,
|
4995 |
+
"step": 710
|
4996 |
+
},
|
4997 |
+
{
|
4998 |
+
"epoch": 0.6047846889952153,
|
4999 |
+
"grad_norm": 0.2714787721633911,
|
5000 |
+
"learning_rate": 6.873977639835829e-05,
|
5001 |
+
"loss": 2.6986,
|
5002 |
+
"step": 711
|
5003 |
+
},
|
5004 |
+
{
|
5005 |
+
"epoch": 0.6056353003721425,
|
5006 |
+
"grad_norm": 0.2905879616737366,
|
5007 |
+
"learning_rate": 6.848395972992261e-05,
|
5008 |
+
"loss": 2.5218,
|
5009 |
+
"step": 712
|
5010 |
+
},
|
5011 |
+
{
|
5012 |
+
"epoch": 0.6064859117490696,
|
5013 |
+
"grad_norm": 0.288647323846817,
|
5014 |
+
"learning_rate": 6.822837184990132e-05,
|
5015 |
+
"loss": 2.6838,
|
5016 |
+
"step": 713
|
5017 |
+
},
|
5018 |
+
{
|
5019 |
+
"epoch": 0.6073365231259968,
|
5020 |
+
"grad_norm": 0.2918678820133209,
|
5021 |
+
"learning_rate": 6.797301461371625e-05,
|
5022 |
+
"loss": 2.6513,
|
5023 |
+
"step": 714
|
5024 |
+
},
|
5025 |
+
{
|
5026 |
+
"epoch": 0.6081871345029239,
|
5027 |
+
"grad_norm": 0.26965370774269104,
|
5028 |
+
"learning_rate": 6.771788987511469e-05,
|
5029 |
+
"loss": 2.5379,
|
5030 |
+
"step": 715
|
5031 |
+
},
|
5032 |
+
{
|
5033 |
+
"epoch": 0.6090377458798512,
|
5034 |
+
"grad_norm": 0.27741289138793945,
|
5035 |
+
"learning_rate": 6.746299948615631e-05,
|
5036 |
+
"loss": 2.6699,
|
5037 |
+
"step": 716
|
5038 |
+
},
|
5039 |
+
{
|
5040 |
+
"epoch": 0.6098883572567783,
|
5041 |
+
"grad_norm": 0.26440632343292236,
|
5042 |
+
"learning_rate": 6.720834529719939e-05,
|
5043 |
+
"loss": 2.6012,
|
5044 |
+
"step": 717
|
5045 |
+
},
|
5046 |
+
{
|
5047 |
+
"epoch": 0.6107389686337055,
|
5048 |
+
"grad_norm": 0.27086710929870605,
|
5049 |
+
"learning_rate": 6.695392915688759e-05,
|
5050 |
+
"loss": 2.5854,
|
5051 |
+
"step": 718
|
5052 |
+
},
|
5053 |
+
{
|
5054 |
+
"epoch": 0.6115895800106327,
|
5055 |
+
"grad_norm": 0.2577430009841919,
|
5056 |
+
"learning_rate": 6.66997529121365e-05,
|
5057 |
+
"loss": 2.4548,
|
5058 |
+
"step": 719
|
5059 |
+
},
|
5060 |
+
{
|
5061 |
+
"epoch": 0.6124401913875598,
|
5062 |
+
"grad_norm": 0.2519771158695221,
|
5063 |
+
"learning_rate": 6.644581840812018e-05,
|
5064 |
+
"loss": 2.4582,
|
5065 |
+
"step": 720
|
5066 |
+
},
|
5067 |
+
{
|
5068 |
+
"epoch": 0.613290802764487,
|
5069 |
+
"grad_norm": 0.31657370924949646,
|
5070 |
+
"learning_rate": 6.619212748825776e-05,
|
5071 |
+
"loss": 2.5846,
|
5072 |
+
"step": 721
|
5073 |
+
},
|
5074 |
+
{
|
5075 |
+
"epoch": 0.6141414141414141,
|
5076 |
+
"grad_norm": 0.28847917914390564,
|
5077 |
+
"learning_rate": 6.593868199420017e-05,
|
5078 |
+
"loss": 2.7865,
|
5079 |
+
"step": 722
|
5080 |
+
},
|
5081 |
+
{
|
5082 |
+
"epoch": 0.6149920255183413,
|
5083 |
+
"grad_norm": 0.29375341534614563,
|
5084 |
+
"learning_rate": 6.568548376581662e-05,
|
5085 |
+
"loss": 2.3419,
|
5086 |
+
"step": 723
|
5087 |
+
},
|
5088 |
+
{
|
5089 |
+
"epoch": 0.6158426368952685,
|
5090 |
+
"grad_norm": 0.320289671421051,
|
5091 |
+
"learning_rate": 6.543253464118131e-05,
|
5092 |
+
"loss": 2.7476,
|
5093 |
+
"step": 724
|
5094 |
+
},
|
5095 |
+
{
|
5096 |
+
"epoch": 0.6166932482721956,
|
5097 |
+
"grad_norm": 0.3045991063117981,
|
5098 |
+
"learning_rate": 6.517983645656014e-05,
|
5099 |
+
"loss": 2.5995,
|
5100 |
+
"step": 725
|
5101 |
+
},
|
5102 |
+
{
|
5103 |
+
"epoch": 0.6175438596491228,
|
5104 |
+
"grad_norm": 0.3231359124183655,
|
5105 |
+
"learning_rate": 6.492739104639727e-05,
|
5106 |
+
"loss": 2.6869,
|
5107 |
+
"step": 726
|
5108 |
+
},
|
5109 |
+
{
|
5110 |
+
"epoch": 0.6183944710260499,
|
5111 |
+
"grad_norm": 0.36181551218032837,
|
5112 |
+
"learning_rate": 6.467520024330193e-05,
|
5113 |
+
"loss": 2.8256,
|
5114 |
+
"step": 727
|
5115 |
+
},
|
5116 |
+
{
|
5117 |
+
"epoch": 0.6192450824029772,
|
5118 |
+
"grad_norm": 0.34118232131004333,
|
5119 |
+
"learning_rate": 6.4423265878035e-05,
|
5120 |
+
"loss": 2.7321,
|
5121 |
+
"step": 728
|
5122 |
+
},
|
5123 |
+
{
|
5124 |
+
"epoch": 0.6200956937799043,
|
5125 |
+
"grad_norm": 0.3405606746673584,
|
5126 |
+
"learning_rate": 6.417158977949575e-05,
|
5127 |
+
"loss": 2.6993,
|
5128 |
+
"step": 729
|
5129 |
+
},
|
5130 |
+
{
|
5131 |
+
"epoch": 0.6209463051568315,
|
5132 |
+
"grad_norm": 0.355055570602417,
|
5133 |
+
"learning_rate": 6.392017377470866e-05,
|
5134 |
+
"loss": 2.7063,
|
5135 |
+
"step": 730
|
5136 |
+
},
|
5137 |
+
{
|
5138 |
+
"epoch": 0.6217969165337587,
|
5139 |
+
"grad_norm": 0.368937611579895,
|
5140 |
+
"learning_rate": 6.366901968881002e-05,
|
5141 |
+
"loss": 2.682,
|
5142 |
+
"step": 731
|
5143 |
+
},
|
5144 |
+
{
|
5145 |
+
"epoch": 0.6226475279106858,
|
5146 |
+
"grad_norm": 0.3755683898925781,
|
5147 |
+
"learning_rate": 6.341812934503469e-05,
|
5148 |
+
"loss": 2.7295,
|
5149 |
+
"step": 732
|
5150 |
+
},
|
5151 |
+
{
|
5152 |
+
"epoch": 0.623498139287613,
|
5153 |
+
"grad_norm": 0.37153956294059753,
|
5154 |
+
"learning_rate": 6.316750456470303e-05,
|
5155 |
+
"loss": 2.7157,
|
5156 |
+
"step": 733
|
5157 |
+
},
|
5158 |
+
{
|
5159 |
+
"epoch": 0.6243487506645401,
|
5160 |
+
"grad_norm": 0.4051001965999603,
|
5161 |
+
"learning_rate": 6.291714716720749e-05,
|
5162 |
+
"loss": 2.8429,
|
5163 |
+
"step": 734
|
5164 |
+
},
|
5165 |
+
{
|
5166 |
+
"epoch": 0.6251993620414673,
|
5167 |
+
"grad_norm": 0.3929100036621094,
|
5168 |
+
"learning_rate": 6.26670589699995e-05,
|
5169 |
+
"loss": 2.7148,
|
5170 |
+
"step": 735
|
5171 |
+
},
|
5172 |
+
{
|
5173 |
+
"epoch": 0.6260499734183945,
|
5174 |
+
"grad_norm": 0.4600156843662262,
|
5175 |
+
"learning_rate": 6.24172417885762e-05,
|
5176 |
+
"loss": 2.765,
|
5177 |
+
"step": 736
|
5178 |
+
},
|
5179 |
+
{
|
5180 |
+
"epoch": 0.6269005847953216,
|
5181 |
+
"grad_norm": 0.42697155475616455,
|
5182 |
+
"learning_rate": 6.216769743646733e-05,
|
5183 |
+
"loss": 2.6477,
|
5184 |
+
"step": 737
|
5185 |
+
},
|
5186 |
+
{
|
5187 |
+
"epoch": 0.6277511961722488,
|
5188 |
+
"grad_norm": 0.4428333640098572,
|
5189 |
+
"learning_rate": 6.191842772522214e-05,
|
5190 |
+
"loss": 2.9439,
|
5191 |
+
"step": 738
|
5192 |
+
},
|
5193 |
+
{
|
5194 |
+
"epoch": 0.6286018075491759,
|
5195 |
+
"grad_norm": 0.45895466208457947,
|
5196 |
+
"learning_rate": 6.166943446439604e-05,
|
5197 |
+
"loss": 2.6916,
|
5198 |
+
"step": 739
|
5199 |
+
},
|
5200 |
+
{
|
5201 |
+
"epoch": 0.6294524189261032,
|
5202 |
+
"grad_norm": 0.4783354103565216,
|
5203 |
+
"learning_rate": 6.142071946153751e-05,
|
5204 |
+
"loss": 2.6747,
|
5205 |
+
"step": 740
|
5206 |
+
},
|
5207 |
+
{
|
5208 |
+
"epoch": 0.6303030303030303,
|
5209 |
+
"grad_norm": 0.4584222733974457,
|
5210 |
+
"learning_rate": 6.117228452217525e-05,
|
5211 |
+
"loss": 2.8631,
|
5212 |
+
"step": 741
|
5213 |
+
},
|
5214 |
+
{
|
5215 |
+
"epoch": 0.6311536416799575,
|
5216 |
+
"grad_norm": 0.5074661374092102,
|
5217 |
+
"learning_rate": 6.092413144980464e-05,
|
5218 |
+
"loss": 2.7687,
|
5219 |
+
"step": 742
|
5220 |
+
},
|
5221 |
+
{
|
5222 |
+
"epoch": 0.6320042530568847,
|
5223 |
+
"grad_norm": 0.500322699546814,
|
5224 |
+
"learning_rate": 6.0676262045874976e-05,
|
5225 |
+
"loss": 2.6794,
|
5226 |
+
"step": 743
|
5227 |
+
},
|
5228 |
+
{
|
5229 |
+
"epoch": 0.6328548644338118,
|
5230 |
+
"grad_norm": 0.4991973042488098,
|
5231 |
+
"learning_rate": 6.04286781097763e-05,
|
5232 |
+
"loss": 2.739,
|
5233 |
+
"step": 744
|
5234 |
+
},
|
5235 |
+
{
|
5236 |
+
"epoch": 0.633705475810739,
|
5237 |
+
"grad_norm": 0.5203631520271301,
|
5238 |
+
"learning_rate": 6.018138143882621e-05,
|
5239 |
+
"loss": 2.5866,
|
5240 |
+
"step": 745
|
5241 |
+
},
|
5242 |
+
{
|
5243 |
+
"epoch": 0.6345560871876661,
|
5244 |
+
"grad_norm": 0.5206765532493591,
|
5245 |
+
"learning_rate": 5.9934373828257106e-05,
|
5246 |
+
"loss": 2.5869,
|
5247 |
+
"step": 746
|
5248 |
+
},
|
5249 |
+
{
|
5250 |
+
"epoch": 0.6354066985645933,
|
5251 |
+
"grad_norm": 0.5892865061759949,
|
5252 |
+
"learning_rate": 5.96876570712028e-05,
|
5253 |
+
"loss": 2.6141,
|
5254 |
+
"step": 747
|
5255 |
+
},
|
5256 |
+
{
|
5257 |
+
"epoch": 0.6362573099415205,
|
5258 |
+
"grad_norm": 0.6528813242912292,
|
5259 |
+
"learning_rate": 5.944123295868573e-05,
|
5260 |
+
"loss": 2.7467,
|
5261 |
+
"step": 748
|
5262 |
+
},
|
5263 |
+
{
|
5264 |
+
"epoch": 0.6371079213184476,
|
5265 |
+
"grad_norm": 0.6801220178604126,
|
5266 |
+
"learning_rate": 5.9195103279603956e-05,
|
5267 |
+
"loss": 2.8362,
|
5268 |
+
"step": 749
|
5269 |
+
},
|
5270 |
+
{
|
5271 |
+
"epoch": 0.6379585326953748,
|
5272 |
+
"grad_norm": 0.7539176940917969,
|
5273 |
+
"learning_rate": 5.894926982071804e-05,
|
5274 |
+
"loss": 2.8246,
|
5275 |
+
"step": 750
|
5276 |
+
},
|
5277 |
+
{
|
5278 |
+
"epoch": 0.6388091440723019,
|
5279 |
+
"grad_norm": 0.2966770827770233,
|
5280 |
+
"learning_rate": 5.870373436663823e-05,
|
5281 |
+
"loss": 2.6906,
|
5282 |
+
"step": 751
|
5283 |
+
},
|
5284 |
+
{
|
5285 |
+
"epoch": 0.6396597554492292,
|
5286 |
+
"grad_norm": 0.27538472414016724,
|
5287 |
+
"learning_rate": 5.845849869981137e-05,
|
5288 |
+
"loss": 2.4334,
|
5289 |
+
"step": 752
|
5290 |
+
},
|
5291 |
+
{
|
5292 |
+
"epoch": 0.6405103668261563,
|
5293 |
+
"grad_norm": 0.2812007665634155,
|
5294 |
+
"learning_rate": 5.821356460050805e-05,
|
5295 |
+
"loss": 2.4665,
|
5296 |
+
"step": 753
|
5297 |
+
},
|
5298 |
+
{
|
5299 |
+
"epoch": 0.6413609782030835,
|
5300 |
+
"grad_norm": 0.28760018944740295,
|
5301 |
+
"learning_rate": 5.796893384680964e-05,
|
5302 |
+
"loss": 2.6608,
|
5303 |
+
"step": 754
|
5304 |
+
},
|
5305 |
+
{
|
5306 |
+
"epoch": 0.6422115895800107,
|
5307 |
+
"grad_norm": 0.2786334455013275,
|
5308 |
+
"learning_rate": 5.772460821459542e-05,
|
5309 |
+
"loss": 2.4717,
|
5310 |
+
"step": 755
|
5311 |
+
},
|
5312 |
+
{
|
5313 |
+
"epoch": 0.6430622009569378,
|
5314 |
+
"grad_norm": 0.27650853991508484,
|
5315 |
+
"learning_rate": 5.7480589477529545e-05,
|
5316 |
+
"loss": 2.5303,
|
5317 |
+
"step": 756
|
5318 |
+
},
|
5319 |
+
{
|
5320 |
+
"epoch": 0.643912812333865,
|
5321 |
+
"grad_norm": 0.29573655128479004,
|
5322 |
+
"learning_rate": 5.723687940704856e-05,
|
5323 |
+
"loss": 2.6558,
|
5324 |
+
"step": 757
|
5325 |
+
},
|
5326 |
+
{
|
5327 |
+
"epoch": 0.6447634237107921,
|
5328 |
+
"grad_norm": 0.2814992070198059,
|
5329 |
+
"learning_rate": 5.699347977234799e-05,
|
5330 |
+
"loss": 2.4268,
|
5331 |
+
"step": 758
|
5332 |
+
},
|
5333 |
+
{
|
5334 |
+
"epoch": 0.6456140350877193,
|
5335 |
+
"grad_norm": 0.2827305495738983,
|
5336 |
+
"learning_rate": 5.675039234036983e-05,
|
5337 |
+
"loss": 2.5284,
|
5338 |
+
"step": 759
|
5339 |
+
},
|
5340 |
+
{
|
5341 |
+
"epoch": 0.6464646464646465,
|
5342 |
+
"grad_norm": 0.2877061665058136,
|
5343 |
+
"learning_rate": 5.650761887578977e-05,
|
5344 |
+
"loss": 2.5597,
|
5345 |
+
"step": 760
|
5346 |
+
},
|
5347 |
+
{
|
5348 |
+
"epoch": 0.6473152578415736,
|
5349 |
+
"grad_norm": 0.2902846038341522,
|
5350 |
+
"learning_rate": 5.6265161141004244e-05,
|
5351 |
+
"loss": 2.719,
|
5352 |
+
"step": 761
|
5353 |
+
},
|
5354 |
+
{
|
5355 |
+
"epoch": 0.6481658692185008,
|
5356 |
+
"grad_norm": 0.3014174997806549,
|
5357 |
+
"learning_rate": 5.602302089611755e-05,
|
5358 |
+
"loss": 2.6498,
|
5359 |
+
"step": 762
|
5360 |
+
},
|
5361 |
+
{
|
5362 |
+
"epoch": 0.6490164805954279,
|
5363 |
+
"grad_norm": 0.2754184603691101,
|
5364 |
+
"learning_rate": 5.578119989892931e-05,
|
5365 |
+
"loss": 2.7288,
|
5366 |
+
"step": 763
|
5367 |
+
},
|
5368 |
+
{
|
5369 |
+
"epoch": 0.6498670919723551,
|
5370 |
+
"grad_norm": 0.27788618206977844,
|
5371 |
+
"learning_rate": 5.5539699904921635e-05,
|
5372 |
+
"loss": 2.5285,
|
5373 |
+
"step": 764
|
5374 |
+
},
|
5375 |
+
{
|
5376 |
+
"epoch": 0.6507177033492823,
|
5377 |
+
"grad_norm": 0.27178868651390076,
|
5378 |
+
"learning_rate": 5.529852266724616e-05,
|
5379 |
+
"loss": 2.6693,
|
5380 |
+
"step": 765
|
5381 |
+
},
|
5382 |
+
{
|
5383 |
+
"epoch": 0.6515683147262095,
|
5384 |
+
"grad_norm": 0.2783076763153076,
|
5385 |
+
"learning_rate": 5.505766993671162e-05,
|
5386 |
+
"loss": 2.6003,
|
5387 |
+
"step": 766
|
5388 |
+
},
|
5389 |
+
{
|
5390 |
+
"epoch": 0.6524189261031367,
|
5391 |
+
"grad_norm": 0.27819153666496277,
|
5392 |
+
"learning_rate": 5.481714346177103e-05,
|
5393 |
+
"loss": 2.5775,
|
5394 |
+
"step": 767
|
5395 |
+
},
|
5396 |
+
{
|
5397 |
+
"epoch": 0.6532695374800638,
|
5398 |
+
"grad_norm": 0.27801868319511414,
|
5399 |
+
"learning_rate": 5.457694498850891e-05,
|
5400 |
+
"loss": 2.6342,
|
5401 |
+
"step": 768
|
5402 |
+
},
|
5403 |
+
{
|
5404 |
+
"epoch": 0.654120148856991,
|
5405 |
+
"grad_norm": 0.27529436349868774,
|
5406 |
+
"learning_rate": 5.43370762606287e-05,
|
5407 |
+
"loss": 2.6376,
|
5408 |
+
"step": 769
|
5409 |
+
},
|
5410 |
+
{
|
5411 |
+
"epoch": 0.6549707602339181,
|
5412 |
+
"grad_norm": 0.2912578582763672,
|
5413 |
+
"learning_rate": 5.409753901944006e-05,
|
5414 |
+
"loss": 2.8169,
|
5415 |
+
"step": 770
|
5416 |
+
},
|
5417 |
+
{
|
5418 |
+
"epoch": 0.6558213716108453,
|
5419 |
+
"grad_norm": 0.28431615233421326,
|
5420 |
+
"learning_rate": 5.385833500384632e-05,
|
5421 |
+
"loss": 2.5608,
|
5422 |
+
"step": 771
|
5423 |
+
},
|
5424 |
+
{
|
5425 |
+
"epoch": 0.6566719829877724,
|
5426 |
+
"grad_norm": 0.3062645196914673,
|
5427 |
+
"learning_rate": 5.3619465950331646e-05,
|
5428 |
+
"loss": 2.6686,
|
5429 |
+
"step": 772
|
5430 |
+
},
|
5431 |
+
{
|
5432 |
+
"epoch": 0.6575225943646996,
|
5433 |
+
"grad_norm": 0.29184243083000183,
|
5434 |
+
"learning_rate": 5.3380933592948704e-05,
|
5435 |
+
"loss": 2.4963,
|
5436 |
+
"step": 773
|
5437 |
+
},
|
5438 |
+
{
|
5439 |
+
"epoch": 0.6583732057416268,
|
5440 |
+
"grad_norm": 0.3256790041923523,
|
5441 |
+
"learning_rate": 5.3142739663305906e-05,
|
5442 |
+
"loss": 2.5206,
|
5443 |
+
"step": 774
|
5444 |
+
},
|
5445 |
+
{
|
5446 |
+
"epoch": 0.6592238171185539,
|
5447 |
+
"grad_norm": 0.3323804438114166,
|
5448 |
+
"learning_rate": 5.2904885890554836e-05,
|
5449 |
+
"loss": 2.7495,
|
5450 |
+
"step": 775
|
5451 |
+
},
|
5452 |
+
{
|
5453 |
+
"epoch": 0.6600744284954811,
|
5454 |
+
"grad_norm": 0.3358718454837799,
|
5455 |
+
"learning_rate": 5.266737400137765e-05,
|
5456 |
+
"loss": 2.646,
|
5457 |
+
"step": 776
|
5458 |
+
},
|
5459 |
+
{
|
5460 |
+
"epoch": 0.6609250398724082,
|
5461 |
+
"grad_norm": 0.34872984886169434,
|
5462 |
+
"learning_rate": 5.24302057199749e-05,
|
5463 |
+
"loss": 2.8041,
|
5464 |
+
"step": 777
|
5465 |
+
},
|
5466 |
+
{
|
5467 |
+
"epoch": 0.6617756512493355,
|
5468 |
+
"grad_norm": 0.3432627320289612,
|
5469 |
+
"learning_rate": 5.2193382768052436e-05,
|
5470 |
+
"loss": 2.6066,
|
5471 |
+
"step": 778
|
5472 |
+
},
|
5473 |
+
{
|
5474 |
+
"epoch": 0.6626262626262627,
|
5475 |
+
"grad_norm": 0.3716273009777069,
|
5476 |
+
"learning_rate": 5.19569068648094e-05,
|
5477 |
+
"loss": 2.6634,
|
5478 |
+
"step": 779
|
5479 |
+
},
|
5480 |
+
{
|
5481 |
+
"epoch": 0.6634768740031898,
|
5482 |
+
"grad_norm": 0.3645707964897156,
|
5483 |
+
"learning_rate": 5.172077972692553e-05,
|
5484 |
+
"loss": 2.5782,
|
5485 |
+
"step": 780
|
5486 |
+
},
|
5487 |
+
{
|
5488 |
+
"epoch": 0.664327485380117,
|
5489 |
+
"grad_norm": 0.43927833437919617,
|
5490 |
+
"learning_rate": 5.148500306854862e-05,
|
5491 |
+
"loss": 2.6775,
|
5492 |
+
"step": 781
|
5493 |
+
},
|
5494 |
+
{
|
5495 |
+
"epoch": 0.6651780967570441,
|
5496 |
+
"grad_norm": 0.4325307607650757,
|
5497 |
+
"learning_rate": 5.124957860128237e-05,
|
5498 |
+
"loss": 2.6963,
|
5499 |
+
"step": 782
|
5500 |
+
},
|
5501 |
+
{
|
5502 |
+
"epoch": 0.6660287081339713,
|
5503 |
+
"grad_norm": 0.39009419083595276,
|
5504 |
+
"learning_rate": 5.101450803417357e-05,
|
5505 |
+
"loss": 2.7308,
|
5506 |
+
"step": 783
|
5507 |
+
},
|
5508 |
+
{
|
5509 |
+
"epoch": 0.6668793195108984,
|
5510 |
+
"grad_norm": 0.42211011052131653,
|
5511 |
+
"learning_rate": 5.0779793073700044e-05,
|
5512 |
+
"loss": 2.8391,
|
5513 |
+
"step": 784
|
5514 |
+
},
|
5515 |
+
{
|
5516 |
+
"epoch": 0.6677299308878256,
|
5517 |
+
"grad_norm": 0.4249938130378723,
|
5518 |
+
"learning_rate": 5.054543542375809e-05,
|
5519 |
+
"loss": 2.6562,
|
5520 |
+
"step": 785
|
5521 |
+
},
|
5522 |
+
{
|
5523 |
+
"epoch": 0.6685805422647528,
|
5524 |
+
"grad_norm": 0.4374600350856781,
|
5525 |
+
"learning_rate": 5.031143678565005e-05,
|
5526 |
+
"loss": 2.6552,
|
5527 |
+
"step": 786
|
5528 |
+
},
|
5529 |
+
{
|
5530 |
+
"epoch": 0.6694311536416799,
|
5531 |
+
"grad_norm": 0.471137672662735,
|
5532 |
+
"learning_rate": 5.0077798858072156e-05,
|
5533 |
+
"loss": 2.6368,
|
5534 |
+
"step": 787
|
5535 |
+
},
|
5536 |
+
{
|
5537 |
+
"epoch": 0.6702817650186071,
|
5538 |
+
"grad_norm": 0.4666772186756134,
|
5539 |
+
"learning_rate": 4.984452333710207e-05,
|
5540 |
+
"loss": 2.6859,
|
5541 |
+
"step": 788
|
5542 |
+
},
|
5543 |
+
{
|
5544 |
+
"epoch": 0.6711323763955342,
|
5545 |
+
"grad_norm": 0.5150439739227295,
|
5546 |
+
"learning_rate": 4.961161191618649e-05,
|
5547 |
+
"loss": 2.9448,
|
5548 |
+
"step": 789
|
5549 |
+
},
|
5550 |
+
{
|
5551 |
+
"epoch": 0.6719829877724615,
|
5552 |
+
"grad_norm": 0.4853833019733429,
|
5553 |
+
"learning_rate": 4.937906628612905e-05,
|
5554 |
+
"loss": 2.7409,
|
5555 |
+
"step": 790
|
5556 |
+
},
|
5557 |
+
{
|
5558 |
+
"epoch": 0.6728335991493887,
|
5559 |
+
"grad_norm": 0.4760463535785675,
|
5560 |
+
"learning_rate": 4.914688813507797e-05,
|
5561 |
+
"loss": 2.5815,
|
5562 |
+
"step": 791
|
5563 |
+
},
|
5564 |
+
{
|
5565 |
+
"epoch": 0.6736842105263158,
|
5566 |
+
"grad_norm": 0.5204352140426636,
|
5567 |
+
"learning_rate": 4.89150791485137e-05,
|
5568 |
+
"loss": 2.7374,
|
5569 |
+
"step": 792
|
5570 |
+
},
|
5571 |
+
{
|
5572 |
+
"epoch": 0.674534821903243,
|
5573 |
+
"grad_norm": 0.5126561522483826,
|
5574 |
+
"learning_rate": 4.86836410092368e-05,
|
5575 |
+
"loss": 2.755,
|
5576 |
+
"step": 793
|
5577 |
+
},
|
5578 |
+
{
|
5579 |
+
"epoch": 0.6753854332801701,
|
5580 |
+
"grad_norm": 0.55113685131073,
|
5581 |
+
"learning_rate": 4.845257539735577e-05,
|
5582 |
+
"loss": 2.7027,
|
5583 |
+
"step": 794
|
5584 |
+
},
|
5585 |
+
{
|
5586 |
+
"epoch": 0.6762360446570973,
|
5587 |
+
"grad_norm": 0.5475419759750366,
|
5588 |
+
"learning_rate": 4.822188399027461e-05,
|
5589 |
+
"loss": 2.667,
|
5590 |
+
"step": 795
|
5591 |
+
},
|
5592 |
+
{
|
5593 |
+
"epoch": 0.6770866560340244,
|
5594 |
+
"grad_norm": 0.6212125420570374,
|
5595 |
+
"learning_rate": 4.799156846268095e-05,
|
5596 |
+
"loss": 2.6865,
|
5597 |
+
"step": 796
|
5598 |
+
},
|
5599 |
+
{
|
5600 |
+
"epoch": 0.6779372674109516,
|
5601 |
+
"grad_norm": 0.6271610260009766,
|
5602 |
+
"learning_rate": 4.7761630486533694e-05,
|
5603 |
+
"loss": 2.9713,
|
5604 |
+
"step": 797
|
5605 |
+
},
|
5606 |
+
{
|
5607 |
+
"epoch": 0.6787878787878788,
|
5608 |
+
"grad_norm": 0.6181414723396301,
|
5609 |
+
"learning_rate": 4.7532071731050975e-05,
|
5610 |
+
"loss": 2.8862,
|
5611 |
+
"step": 798
|
5612 |
+
},
|
5613 |
+
{
|
5614 |
+
"epoch": 0.6796384901648059,
|
5615 |
+
"grad_norm": 0.6996387243270874,
|
5616 |
+
"learning_rate": 4.730289386269792e-05,
|
5617 |
+
"loss": 2.9082,
|
5618 |
+
"step": 799
|
5619 |
+
},
|
5620 |
+
{
|
5621 |
+
"epoch": 0.6804891015417331,
|
5622 |
+
"grad_norm": 0.7603393793106079,
|
5623 |
+
"learning_rate": 4.70740985451747e-05,
|
5624 |
+
"loss": 3.034,
|
5625 |
+
"step": 800
|
5626 |
+
},
|
5627 |
+
{
|
5628 |
+
"epoch": 0.6813397129186602,
|
5629 |
+
"grad_norm": 0.27659621834754944,
|
5630 |
+
"learning_rate": 4.684568743940444e-05,
|
5631 |
+
"loss": 2.5099,
|
5632 |
+
"step": 801
|
5633 |
+
},
|
5634 |
+
{
|
5635 |
+
"epoch": 0.6821903242955875,
|
5636 |
+
"grad_norm": 0.29582586884498596,
|
5637 |
+
"learning_rate": 4.661766220352097e-05,
|
5638 |
+
"loss": 2.5086,
|
5639 |
+
"step": 802
|
5640 |
+
},
|
5641 |
+
{
|
5642 |
+
"epoch": 0.6830409356725147,
|
5643 |
+
"grad_norm": 0.31561413407325745,
|
5644 |
+
"learning_rate": 4.639002449285693e-05,
|
5645 |
+
"loss": 2.5214,
|
5646 |
+
"step": 803
|
5647 |
+
},
|
5648 |
+
{
|
5649 |
+
"epoch": 0.6838915470494418,
|
5650 |
+
"grad_norm": 0.27388331294059753,
|
5651 |
+
"learning_rate": 4.616277595993196e-05,
|
5652 |
+
"loss": 2.4724,
|
5653 |
+
"step": 804
|
5654 |
+
},
|
5655 |
+
{
|
5656 |
+
"epoch": 0.684742158426369,
|
5657 |
+
"grad_norm": 0.2823973000049591,
|
5658 |
+
"learning_rate": 4.593591825444028e-05,
|
5659 |
+
"loss": 2.4987,
|
5660 |
+
"step": 805
|
5661 |
+
},
|
5662 |
+
{
|
5663 |
+
"epoch": 0.6855927698032961,
|
5664 |
+
"grad_norm": 0.3274148106575012,
|
5665 |
+
"learning_rate": 4.57094530232389e-05,
|
5666 |
+
"loss": 2.6108,
|
5667 |
+
"step": 806
|
5668 |
+
},
|
5669 |
+
{
|
5670 |
+
"epoch": 0.6864433811802233,
|
5671 |
+
"grad_norm": 0.27294856309890747,
|
5672 |
+
"learning_rate": 4.5483381910335955e-05,
|
5673 |
+
"loss": 2.4774,
|
5674 |
+
"step": 807
|
5675 |
+
},
|
5676 |
+
{
|
5677 |
+
"epoch": 0.6872939925571504,
|
5678 |
+
"grad_norm": 0.2771831750869751,
|
5679 |
+
"learning_rate": 4.525770655687821e-05,
|
5680 |
+
"loss": 2.4953,
|
5681 |
+
"step": 808
|
5682 |
+
},
|
5683 |
+
{
|
5684 |
+
"epoch": 0.6881446039340776,
|
5685 |
+
"grad_norm": 0.28946414589881897,
|
5686 |
+
"learning_rate": 4.5032428601139644e-05,
|
5687 |
+
"loss": 2.7516,
|
5688 |
+
"step": 809
|
5689 |
+
},
|
5690 |
+
{
|
5691 |
+
"epoch": 0.6889952153110048,
|
5692 |
+
"grad_norm": 0.2817944884300232,
|
5693 |
+
"learning_rate": 4.48075496785092e-05,
|
5694 |
+
"loss": 2.6658,
|
5695 |
+
"step": 810
|
5696 |
+
},
|
5697 |
+
{
|
5698 |
+
"epoch": 0.6898458266879319,
|
5699 |
+
"grad_norm": 0.2726970613002777,
|
5700 |
+
"learning_rate": 4.4583071421479194e-05,
|
5701 |
+
"loss": 2.6478,
|
5702 |
+
"step": 811
|
5703 |
+
},
|
5704 |
+
{
|
5705 |
+
"epoch": 0.6906964380648591,
|
5706 |
+
"grad_norm": 0.2892582416534424,
|
5707 |
+
"learning_rate": 4.435899545963332e-05,
|
5708 |
+
"loss": 2.6592,
|
5709 |
+
"step": 812
|
5710 |
+
},
|
5711 |
+
{
|
5712 |
+
"epoch": 0.6915470494417862,
|
5713 |
+
"grad_norm": 0.2825833559036255,
|
5714 |
+
"learning_rate": 4.4135323419634766e-05,
|
5715 |
+
"loss": 2.4431,
|
5716 |
+
"step": 813
|
5717 |
+
},
|
5718 |
+
{
|
5719 |
+
"epoch": 0.6923976608187135,
|
5720 |
+
"grad_norm": 0.27093660831451416,
|
5721 |
+
"learning_rate": 4.391205692521453e-05,
|
5722 |
+
"loss": 2.4303,
|
5723 |
+
"step": 814
|
5724 |
+
},
|
5725 |
+
{
|
5726 |
+
"epoch": 0.6932482721956407,
|
5727 |
+
"grad_norm": 0.2554977834224701,
|
5728 |
+
"learning_rate": 4.368919759715964e-05,
|
5729 |
+
"loss": 2.3259,
|
5730 |
+
"step": 815
|
5731 |
+
},
|
5732 |
+
{
|
5733 |
+
"epoch": 0.6940988835725678,
|
5734 |
+
"grad_norm": 0.27814123034477234,
|
5735 |
+
"learning_rate": 4.346674705330117e-05,
|
5736 |
+
"loss": 2.5495,
|
5737 |
+
"step": 816
|
5738 |
+
},
|
5739 |
+
{
|
5740 |
+
"epoch": 0.694949494949495,
|
5741 |
+
"grad_norm": 0.303751140832901,
|
5742 |
+
"learning_rate": 4.32447069085028e-05,
|
5743 |
+
"loss": 2.5834,
|
5744 |
+
"step": 817
|
5745 |
+
},
|
5746 |
+
{
|
5747 |
+
"epoch": 0.6958001063264221,
|
5748 |
+
"grad_norm": 0.27305588126182556,
|
5749 |
+
"learning_rate": 4.302307877464893e-05,
|
5750 |
+
"loss": 2.479,
|
5751 |
+
"step": 818
|
5752 |
+
},
|
5753 |
+
{
|
5754 |
+
"epoch": 0.6966507177033493,
|
5755 |
+
"grad_norm": 0.265235036611557,
|
5756 |
+
"learning_rate": 4.280186426063291e-05,
|
5757 |
+
"loss": 2.4847,
|
5758 |
+
"step": 819
|
5759 |
+
},
|
5760 |
+
{
|
5761 |
+
"epoch": 0.6975013290802764,
|
5762 |
+
"grad_norm": 0.2711983025074005,
|
5763 |
+
"learning_rate": 4.258106497234551e-05,
|
5764 |
+
"loss": 2.3499,
|
5765 |
+
"step": 820
|
5766 |
+
},
|
5767 |
+
{
|
5768 |
+
"epoch": 0.6983519404572036,
|
5769 |
+
"grad_norm": 0.30092155933380127,
|
5770 |
+
"learning_rate": 4.236068251266324e-05,
|
5771 |
+
"loss": 2.7319,
|
5772 |
+
"step": 821
|
5773 |
+
},
|
5774 |
+
{
|
5775 |
+
"epoch": 0.6992025518341308,
|
5776 |
+
"grad_norm": 0.3007088601589203,
|
5777 |
+
"learning_rate": 4.214071848143655e-05,
|
5778 |
+
"loss": 2.5465,
|
5779 |
+
"step": 822
|
5780 |
+
},
|
5781 |
+
{
|
5782 |
+
"epoch": 0.7000531632110579,
|
5783 |
+
"grad_norm": 0.3158135712146759,
|
5784 |
+
"learning_rate": 4.192117447547845e-05,
|
5785 |
+
"loss": 2.6333,
|
5786 |
+
"step": 823
|
5787 |
+
},
|
5788 |
+
{
|
5789 |
+
"epoch": 0.7009037745879851,
|
5790 |
+
"grad_norm": 0.3173391819000244,
|
5791 |
+
"learning_rate": 4.170205208855281e-05,
|
5792 |
+
"loss": 2.8266,
|
5793 |
+
"step": 824
|
5794 |
+
},
|
5795 |
+
{
|
5796 |
+
"epoch": 0.7017543859649122,
|
5797 |
+
"grad_norm": 0.3236771523952484,
|
5798 |
+
"learning_rate": 4.148335291136267e-05,
|
5799 |
+
"loss": 2.7447,
|
5800 |
+
"step": 825
|
5801 |
+
},
|
5802 |
+
{
|
5803 |
+
"epoch": 0.7026049973418395,
|
5804 |
+
"grad_norm": 0.320290744304657,
|
5805 |
+
"learning_rate": 4.1265078531538916e-05,
|
5806 |
+
"loss": 2.8345,
|
5807 |
+
"step": 826
|
5808 |
+
},
|
5809 |
+
{
|
5810 |
+
"epoch": 0.7034556087187667,
|
5811 |
+
"grad_norm": 0.33358582854270935,
|
5812 |
+
"learning_rate": 4.104723053362867e-05,
|
5813 |
+
"loss": 2.848,
|
5814 |
+
"step": 827
|
5815 |
+
},
|
5816 |
+
{
|
5817 |
+
"epoch": 0.7043062200956938,
|
5818 |
+
"grad_norm": 0.35341066122055054,
|
5819 |
+
"learning_rate": 4.082981049908362e-05,
|
5820 |
+
"loss": 2.7216,
|
5821 |
+
"step": 828
|
5822 |
+
},
|
5823 |
+
{
|
5824 |
+
"epoch": 0.705156831472621,
|
5825 |
+
"grad_norm": 0.35734865069389343,
|
5826 |
+
"learning_rate": 4.061282000624885e-05,
|
5827 |
+
"loss": 2.702,
|
5828 |
+
"step": 829
|
5829 |
+
},
|
5830 |
+
{
|
5831 |
+
"epoch": 0.7060074428495481,
|
5832 |
+
"grad_norm": 0.3350493013858795,
|
5833 |
+
"learning_rate": 4.0396260630351066e-05,
|
5834 |
+
"loss": 2.4811,
|
5835 |
+
"step": 830
|
5836 |
+
},
|
5837 |
+
{
|
5838 |
+
"epoch": 0.7068580542264753,
|
5839 |
+
"grad_norm": 0.35712873935699463,
|
5840 |
+
"learning_rate": 4.018013394348752e-05,
|
5841 |
+
"loss": 2.7484,
|
5842 |
+
"step": 831
|
5843 |
+
},
|
5844 |
+
{
|
5845 |
+
"epoch": 0.7077086656034024,
|
5846 |
+
"grad_norm": 0.37364643812179565,
|
5847 |
+
"learning_rate": 3.996444151461417e-05,
|
5848 |
+
"loss": 2.6113,
|
5849 |
+
"step": 832
|
5850 |
+
},
|
5851 |
+
{
|
5852 |
+
"epoch": 0.7085592769803296,
|
5853 |
+
"grad_norm": 0.41270262002944946,
|
5854 |
+
"learning_rate": 3.9749184909534565e-05,
|
5855 |
+
"loss": 2.6654,
|
5856 |
+
"step": 833
|
5857 |
+
},
|
5858 |
+
{
|
5859 |
+
"epoch": 0.7094098883572568,
|
5860 |
+
"grad_norm": 0.3864672780036926,
|
5861 |
+
"learning_rate": 3.9534365690888566e-05,
|
5862 |
+
"loss": 2.6718,
|
5863 |
+
"step": 834
|
5864 |
+
},
|
5865 |
+
{
|
5866 |
+
"epoch": 0.7102604997341839,
|
5867 |
+
"grad_norm": 0.4115494191646576,
|
5868 |
+
"learning_rate": 3.931998541814069e-05,
|
5869 |
+
"loss": 2.6621,
|
5870 |
+
"step": 835
|
5871 |
+
},
|
5872 |
+
{
|
5873 |
+
"epoch": 0.7111111111111111,
|
5874 |
+
"grad_norm": 0.4606288969516754,
|
5875 |
+
"learning_rate": 3.9106045647569e-05,
|
5876 |
+
"loss": 2.512,
|
5877 |
+
"step": 836
|
5878 |
+
},
|
5879 |
+
{
|
5880 |
+
"epoch": 0.7119617224880382,
|
5881 |
+
"grad_norm": 0.42326587438583374,
|
5882 |
+
"learning_rate": 3.8892547932253795e-05,
|
5883 |
+
"loss": 2.6212,
|
5884 |
+
"step": 837
|
5885 |
+
},
|
5886 |
+
{
|
5887 |
+
"epoch": 0.7128123338649655,
|
5888 |
+
"grad_norm": 0.43627670407295227,
|
5889 |
+
"learning_rate": 3.8679493822066314e-05,
|
5890 |
+
"loss": 2.596,
|
5891 |
+
"step": 838
|
5892 |
+
},
|
5893 |
+
{
|
5894 |
+
"epoch": 0.7136629452418927,
|
5895 |
+
"grad_norm": 0.4475801885128021,
|
5896 |
+
"learning_rate": 3.846688486365748e-05,
|
5897 |
+
"loss": 2.64,
|
5898 |
+
"step": 839
|
5899 |
+
},
|
5900 |
+
{
|
5901 |
+
"epoch": 0.7145135566188198,
|
5902 |
+
"grad_norm": 0.4370073080062866,
|
5903 |
+
"learning_rate": 3.825472260044658e-05,
|
5904 |
+
"loss": 2.5291,
|
5905 |
+
"step": 840
|
5906 |
+
},
|
5907 |
+
{
|
5908 |
+
"epoch": 0.715364167995747,
|
5909 |
+
"grad_norm": 0.4832041561603546,
|
5910 |
+
"learning_rate": 3.804300857261025e-05,
|
5911 |
+
"loss": 2.659,
|
5912 |
+
"step": 841
|
5913 |
+
},
|
5914 |
+
{
|
5915 |
+
"epoch": 0.7162147793726741,
|
5916 |
+
"grad_norm": 0.4905672073364258,
|
5917 |
+
"learning_rate": 3.783174431707119e-05,
|
5918 |
+
"loss": 2.7363,
|
5919 |
+
"step": 842
|
5920 |
+
},
|
5921 |
+
{
|
5922 |
+
"epoch": 0.7170653907496013,
|
5923 |
+
"grad_norm": 0.5135957598686218,
|
5924 |
+
"learning_rate": 3.762093136748692e-05,
|
5925 |
+
"loss": 2.8243,
|
5926 |
+
"step": 843
|
5927 |
+
},
|
5928 |
+
{
|
5929 |
+
"epoch": 0.7179160021265284,
|
5930 |
+
"grad_norm": 0.5257413387298584,
|
5931 |
+
"learning_rate": 3.7410571254238834e-05,
|
5932 |
+
"loss": 2.8406,
|
5933 |
+
"step": 844
|
5934 |
+
},
|
5935 |
+
{
|
5936 |
+
"epoch": 0.7187666135034556,
|
5937 |
+
"grad_norm": 0.5150455236434937,
|
5938 |
+
"learning_rate": 3.7200665504420983e-05,
|
5939 |
+
"loss": 2.6053,
|
5940 |
+
"step": 845
|
5941 |
+
},
|
5942 |
+
{
|
5943 |
+
"epoch": 0.7196172248803828,
|
5944 |
+
"grad_norm": 0.5135449767112732,
|
5945 |
+
"learning_rate": 3.69912156418289e-05,
|
5946 |
+
"loss": 2.4932,
|
5947 |
+
"step": 846
|
5948 |
+
},
|
5949 |
+
{
|
5950 |
+
"epoch": 0.7204678362573099,
|
5951 |
+
"grad_norm": 0.5775859951972961,
|
5952 |
+
"learning_rate": 3.678222318694875e-05,
|
5953 |
+
"loss": 2.6011,
|
5954 |
+
"step": 847
|
5955 |
+
},
|
5956 |
+
{
|
5957 |
+
"epoch": 0.7213184476342371,
|
5958 |
+
"grad_norm": 0.6324962973594666,
|
5959 |
+
"learning_rate": 3.657368965694617e-05,
|
5960 |
+
"loss": 2.8145,
|
5961 |
+
"step": 848
|
5962 |
+
},
|
5963 |
+
{
|
5964 |
+
"epoch": 0.7221690590111642,
|
5965 |
+
"grad_norm": 0.6755786538124084,
|
5966 |
+
"learning_rate": 3.636561656565519e-05,
|
5967 |
+
"loss": 2.7566,
|
5968 |
+
"step": 849
|
5969 |
+
},
|
5970 |
+
{
|
5971 |
+
"epoch": 0.7230196703880915,
|
5972 |
+
"grad_norm": 0.7153156995773315,
|
5973 |
+
"learning_rate": 3.615800542356738e-05,
|
5974 |
+
"loss": 2.9433,
|
5975 |
+
"step": 850
|
5976 |
+
},
|
5977 |
+
{
|
5978 |
+
"epoch": 0.7238702817650186,
|
5979 |
+
"grad_norm": 0.29322749376296997,
|
5980 |
+
"learning_rate": 3.595085773782083e-05,
|
5981 |
+
"loss": 2.3433,
|
5982 |
+
"step": 851
|
5983 |
+
},
|
5984 |
+
{
|
5985 |
+
"epoch": 0.7247208931419458,
|
5986 |
+
"grad_norm": 0.28010284900665283,
|
5987 |
+
"learning_rate": 3.574417501218913e-05,
|
5988 |
+
"loss": 2.5661,
|
5989 |
+
"step": 852
|
5990 |
+
},
|
5991 |
+
{
|
5992 |
+
"epoch": 0.725571504518873,
|
5993 |
+
"grad_norm": 0.27005431056022644,
|
5994 |
+
"learning_rate": 3.55379587470706e-05,
|
5995 |
+
"loss": 2.4129,
|
5996 |
+
"step": 853
|
5997 |
+
},
|
5998 |
+
{
|
5999 |
+
"epoch": 0.7264221158958001,
|
6000 |
+
"grad_norm": 0.2710054814815521,
|
6001 |
+
"learning_rate": 3.533221043947733e-05,
|
6002 |
+
"loss": 2.4066,
|
6003 |
+
"step": 854
|
6004 |
+
},
|
6005 |
+
{
|
6006 |
+
"epoch": 0.7272727272727273,
|
6007 |
+
"grad_norm": 0.27026283740997314,
|
6008 |
+
"learning_rate": 3.512693158302421e-05,
|
6009 |
+
"loss": 2.4139,
|
6010 |
+
"step": 855
|
6011 |
+
},
|
6012 |
+
{
|
6013 |
+
"epoch": 0.7281233386496544,
|
6014 |
+
"grad_norm": 0.2927492558956146,
|
6015 |
+
"learning_rate": 3.492212366791831e-05,
|
6016 |
+
"loss": 2.5517,
|
6017 |
+
"step": 856
|
6018 |
+
},
|
6019 |
+
{
|
6020 |
+
"epoch": 0.7289739500265816,
|
6021 |
+
"grad_norm": 0.2770216166973114,
|
6022 |
+
"learning_rate": 3.471778818094785e-05,
|
6023 |
+
"loss": 2.6102,
|
6024 |
+
"step": 857
|
6025 |
+
},
|
6026 |
+
{
|
6027 |
+
"epoch": 0.7298245614035088,
|
6028 |
+
"grad_norm": 0.26914748549461365,
|
6029 |
+
"learning_rate": 3.45139266054715e-05,
|
6030 |
+
"loss": 2.6047,
|
6031 |
+
"step": 858
|
6032 |
+
},
|
6033 |
+
{
|
6034 |
+
"epoch": 0.7306751727804359,
|
6035 |
+
"grad_norm": 0.27307602763175964,
|
6036 |
+
"learning_rate": 3.4310540421407665e-05,
|
6037 |
+
"loss": 2.5707,
|
6038 |
+
"step": 859
|
6039 |
+
},
|
6040 |
+
{
|
6041 |
+
"epoch": 0.7315257841573631,
|
6042 |
+
"grad_norm": 0.2949882745742798,
|
6043 |
+
"learning_rate": 3.4107631105223525e-05,
|
6044 |
+
"loss": 2.502,
|
6045 |
+
"step": 860
|
6046 |
+
},
|
6047 |
+
{
|
6048 |
+
"epoch": 0.7323763955342902,
|
6049 |
+
"grad_norm": 0.26683247089385986,
|
6050 |
+
"learning_rate": 3.390520012992474e-05,
|
6051 |
+
"loss": 2.5785,
|
6052 |
+
"step": 861
|
6053 |
+
},
|
6054 |
+
{
|
6055 |
+
"epoch": 0.7332270069112174,
|
6056 |
+
"grad_norm": 0.2797079384326935,
|
6057 |
+
"learning_rate": 3.370324896504425e-05,
|
6058 |
+
"loss": 2.6119,
|
6059 |
+
"step": 862
|
6060 |
+
},
|
6061 |
+
{
|
6062 |
+
"epoch": 0.7340776182881446,
|
6063 |
+
"grad_norm": 0.27322161197662354,
|
6064 |
+
"learning_rate": 3.3501779076631864e-05,
|
6065 |
+
"loss": 2.5449,
|
6066 |
+
"step": 863
|
6067 |
+
},
|
6068 |
+
{
|
6069 |
+
"epoch": 0.7349282296650718,
|
6070 |
+
"grad_norm": 0.2743871212005615,
|
6071 |
+
"learning_rate": 3.330079192724379e-05,
|
6072 |
+
"loss": 2.6722,
|
6073 |
+
"step": 864
|
6074 |
+
},
|
6075 |
+
{
|
6076 |
+
"epoch": 0.735778841041999,
|
6077 |
+
"grad_norm": 0.27956199645996094,
|
6078 |
+
"learning_rate": 3.3100288975931635e-05,
|
6079 |
+
"loss": 2.5033,
|
6080 |
+
"step": 865
|
6081 |
+
},
|
6082 |
+
{
|
6083 |
+
"epoch": 0.7366294524189261,
|
6084 |
+
"grad_norm": 0.2711998522281647,
|
6085 |
+
"learning_rate": 3.290027167823204e-05,
|
6086 |
+
"loss": 2.5824,
|
6087 |
+
"step": 866
|
6088 |
+
},
|
6089 |
+
{
|
6090 |
+
"epoch": 0.7374800637958533,
|
6091 |
+
"grad_norm": 0.277340292930603,
|
6092 |
+
"learning_rate": 3.270074148615615e-05,
|
6093 |
+
"loss": 2.5168,
|
6094 |
+
"step": 867
|
6095 |
+
},
|
6096 |
+
{
|
6097 |
+
"epoch": 0.7383306751727804,
|
6098 |
+
"grad_norm": 0.26151034235954285,
|
6099 |
+
"learning_rate": 3.250169984817897e-05,
|
6100 |
+
"loss": 2.5749,
|
6101 |
+
"step": 868
|
6102 |
+
},
|
6103 |
+
{
|
6104 |
+
"epoch": 0.7391812865497076,
|
6105 |
+
"grad_norm": 0.2847073972225189,
|
6106 |
+
"learning_rate": 3.230314820922883e-05,
|
6107 |
+
"loss": 2.4749,
|
6108 |
+
"step": 869
|
6109 |
+
},
|
6110 |
+
{
|
6111 |
+
"epoch": 0.7400318979266348,
|
6112 |
+
"grad_norm": 0.26492998003959656,
|
6113 |
+
"learning_rate": 3.2105088010677e-05,
|
6114 |
+
"loss": 2.5874,
|
6115 |
+
"step": 870
|
6116 |
+
},
|
6117 |
+
{
|
6118 |
+
"epoch": 0.7408825093035619,
|
6119 |
+
"grad_norm": 0.2851366400718689,
|
6120 |
+
"learning_rate": 3.1907520690327184e-05,
|
6121 |
+
"loss": 2.7108,
|
6122 |
+
"step": 871
|
6123 |
+
},
|
6124 |
+
{
|
6125 |
+
"epoch": 0.7417331206804891,
|
6126 |
+
"grad_norm": 0.29604560136795044,
|
6127 |
+
"learning_rate": 3.1710447682405076e-05,
|
6128 |
+
"loss": 2.8558,
|
6129 |
+
"step": 872
|
6130 |
+
},
|
6131 |
+
{
|
6132 |
+
"epoch": 0.7425837320574162,
|
6133 |
+
"grad_norm": 0.2940882444381714,
|
6134 |
+
"learning_rate": 3.151387041754784e-05,
|
6135 |
+
"loss": 2.5961,
|
6136 |
+
"step": 873
|
6137 |
+
},
|
6138 |
+
{
|
6139 |
+
"epoch": 0.7434343434343434,
|
6140 |
+
"grad_norm": 0.3165973126888275,
|
6141 |
+
"learning_rate": 3.131779032279397e-05,
|
6142 |
+
"loss": 2.5976,
|
6143 |
+
"step": 874
|
6144 |
+
},
|
6145 |
+
{
|
6146 |
+
"epoch": 0.7442849548112705,
|
6147 |
+
"grad_norm": 0.3157351315021515,
|
6148 |
+
"learning_rate": 3.112220882157275e-05,
|
6149 |
+
"loss": 2.5901,
|
6150 |
+
"step": 875
|
6151 |
+
},
|
6152 |
+
{
|
6153 |
+
"epoch": 0.7451355661881978,
|
6154 |
+
"grad_norm": 0.3334716558456421,
|
6155 |
+
"learning_rate": 3.092712733369387e-05,
|
6156 |
+
"loss": 2.6966,
|
6157 |
+
"step": 876
|
6158 |
+
},
|
6159 |
+
{
|
6160 |
+
"epoch": 0.745986177565125,
|
6161 |
+
"grad_norm": 0.3281215727329254,
|
6162 |
+
"learning_rate": 3.073254727533732e-05,
|
6163 |
+
"loss": 2.616,
|
6164 |
+
"step": 877
|
6165 |
+
},
|
6166 |
+
{
|
6167 |
+
"epoch": 0.7468367889420521,
|
6168 |
+
"grad_norm": 0.3478872776031494,
|
6169 |
+
"learning_rate": 3.053847005904298e-05,
|
6170 |
+
"loss": 2.7568,
|
6171 |
+
"step": 878
|
6172 |
+
},
|
6173 |
+
{
|
6174 |
+
"epoch": 0.7476874003189793,
|
6175 |
+
"grad_norm": 0.35839834809303284,
|
6176 |
+
"learning_rate": 3.034489709370033e-05,
|
6177 |
+
"loss": 2.7552,
|
6178 |
+
"step": 879
|
6179 |
+
},
|
6180 |
+
{
|
6181 |
+
"epoch": 0.7485380116959064,
|
6182 |
+
"grad_norm": 0.3570314049720764,
|
6183 |
+
"learning_rate": 3.0151829784538254e-05,
|
6184 |
+
"loss": 2.6189,
|
6185 |
+
"step": 880
|
6186 |
+
},
|
6187 |
+
{
|
6188 |
+
"epoch": 0.7493886230728336,
|
6189 |
+
"grad_norm": 0.3771030008792877,
|
6190 |
+
"learning_rate": 2.995926953311504e-05,
|
6191 |
+
"loss": 3.0162,
|
6192 |
+
"step": 881
|
6193 |
+
},
|
6194 |
+
{
|
6195 |
+
"epoch": 0.7502392344497608,
|
6196 |
+
"grad_norm": 0.38607585430145264,
|
6197 |
+
"learning_rate": 2.9767217737307806e-05,
|
6198 |
+
"loss": 2.8731,
|
6199 |
+
"step": 882
|
6200 |
+
},
|
6201 |
+
{
|
6202 |
+
"epoch": 0.7502392344497608,
|
6203 |
+
"eval_loss": 2.657837152481079,
|
6204 |
+
"eval_runtime": 80.5036,
|
6205 |
+
"eval_samples_per_second": 12.298,
|
6206 |
+
"eval_steps_per_second": 6.149,
|
6207 |
+
"step": 882
|
6208 |
}
|
6209 |
],
|
6210 |
"logging_steps": 1,
|
|
|
6224 |
"attributes": {}
|
6225 |
}
|
6226 |
},
|
6227 |
+
"total_flos": 9.018475273959506e+17,
|
6228 |
"train_batch_size": 2,
|
6229 |
"trial_name": null,
|
6230 |
"trial_params": null
|