Training in progress, step 1431, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 327040
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18d3ca7f001daf2cd93414f7f5d6331b61417ecf848fa81a52f5bf8ece10c3a1
|
3 |
size 327040
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 739578
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc9405e534548f42a1f2580de54050510021fc5431f0c1acaf83fd7109ddab90
|
3 |
size 739578
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:918828a372380e080a2947120d97745c7db9527b29df128b70bc4e79a913c901
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca22fbfd9b9376878818337136d0c2795e547d8eb74f00ca7f62575ba61450ca
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 358,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7557,6 +7557,2505 @@
|
|
7557 |
"eval_samples_per_second": 77.816,
|
7558 |
"eval_steps_per_second": 38.908,
|
7559 |
"step": 1074
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7560 |
}
|
7561 |
],
|
7562 |
"logging_steps": 1,
|
@@ -7571,12 +10070,12 @@
|
|
7571 |
"should_evaluate": false,
|
7572 |
"should_log": false,
|
7573 |
"should_save": true,
|
7574 |
-
"should_training_stop":
|
7575 |
},
|
7576 |
"attributes": {}
|
7577 |
}
|
7578 |
},
|
7579 |
-
"total_flos":
|
7580 |
"train_batch_size": 2,
|
7581 |
"trial_name": null,
|
7582 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.0556576529272775,
|
5 |
"eval_steps": 358,
|
6 |
+
"global_step": 1431,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7557 |
"eval_samples_per_second": 77.816,
|
7558 |
"eval_steps_per_second": 38.908,
|
7559 |
"step": 1074
|
7560 |
+
},
|
7561 |
+
{
|
7562 |
+
"epoch": 0.04181130460993942,
|
7563 |
+
"grad_norm": 0.08997377753257751,
|
7564 |
+
"learning_rate": 2.940666611939902e-05,
|
7565 |
+
"loss": 11.4518,
|
7566 |
+
"step": 1075
|
7567 |
+
},
|
7568 |
+
{
|
7569 |
+
"epoch": 0.041850198846785876,
|
7570 |
+
"grad_norm": 0.09690409898757935,
|
7571 |
+
"learning_rate": 2.92502503216746e-05,
|
7572 |
+
"loss": 11.4226,
|
7573 |
+
"step": 1076
|
7574 |
+
},
|
7575 |
+
{
|
7576 |
+
"epoch": 0.04188909308363233,
|
7577 |
+
"grad_norm": 0.118629090487957,
|
7578 |
+
"learning_rate": 2.909418033297262e-05,
|
7579 |
+
"loss": 11.4329,
|
7580 |
+
"step": 1077
|
7581 |
+
},
|
7582 |
+
{
|
7583 |
+
"epoch": 0.04192798732047879,
|
7584 |
+
"grad_norm": 0.12322626262903214,
|
7585 |
+
"learning_rate": 2.893845691612851e-05,
|
7586 |
+
"loss": 11.4323,
|
7587 |
+
"step": 1078
|
7588 |
+
},
|
7589 |
+
{
|
7590 |
+
"epoch": 0.041966881557325246,
|
7591 |
+
"grad_norm": 0.12857896089553833,
|
7592 |
+
"learning_rate": 2.878308083228366e-05,
|
7593 |
+
"loss": 11.4302,
|
7594 |
+
"step": 1079
|
7595 |
+
},
|
7596 |
+
{
|
7597 |
+
"epoch": 0.042005775794171696,
|
7598 |
+
"grad_norm": 0.13390401005744934,
|
7599 |
+
"learning_rate": 2.8628052840881682e-05,
|
7600 |
+
"loss": 11.4698,
|
7601 |
+
"step": 1080
|
7602 |
+
},
|
7603 |
+
{
|
7604 |
+
"epoch": 0.04204467003101815,
|
7605 |
+
"grad_norm": 0.11897007375955582,
|
7606 |
+
"learning_rate": 2.8473373699664997e-05,
|
7607 |
+
"loss": 11.4159,
|
7608 |
+
"step": 1081
|
7609 |
+
},
|
7610 |
+
{
|
7611 |
+
"epoch": 0.04208356426786461,
|
7612 |
+
"grad_norm": 0.13574343919754028,
|
7613 |
+
"learning_rate": 2.8319044164670704e-05,
|
7614 |
+
"loss": 11.4714,
|
7615 |
+
"step": 1082
|
7616 |
+
},
|
7617 |
+
{
|
7618 |
+
"epoch": 0.042122458504711066,
|
7619 |
+
"grad_norm": 0.12145520746707916,
|
7620 |
+
"learning_rate": 2.8165064990227252e-05,
|
7621 |
+
"loss": 11.4508,
|
7622 |
+
"step": 1083
|
7623 |
+
},
|
7624 |
+
{
|
7625 |
+
"epoch": 0.04216135274155752,
|
7626 |
+
"grad_norm": 0.2545243203639984,
|
7627 |
+
"learning_rate": 2.8011436928950553e-05,
|
7628 |
+
"loss": 11.5192,
|
7629 |
+
"step": 1084
|
7630 |
+
},
|
7631 |
+
{
|
7632 |
+
"epoch": 0.04220024697840397,
|
7633 |
+
"grad_norm": 0.1207599937915802,
|
7634 |
+
"learning_rate": 2.7858160731740356e-05,
|
7635 |
+
"loss": 11.4501,
|
7636 |
+
"step": 1085
|
7637 |
+
},
|
7638 |
+
{
|
7639 |
+
"epoch": 0.04223914121525043,
|
7640 |
+
"grad_norm": 0.11171094328165054,
|
7641 |
+
"learning_rate": 2.77052371477766e-05,
|
7642 |
+
"loss": 11.4124,
|
7643 |
+
"step": 1086
|
7644 |
+
},
|
7645 |
+
{
|
7646 |
+
"epoch": 0.042278035452096886,
|
7647 |
+
"grad_norm": 0.18224753439426422,
|
7648 |
+
"learning_rate": 2.755266692451569e-05,
|
7649 |
+
"loss": 11.4513,
|
7650 |
+
"step": 1087
|
7651 |
+
},
|
7652 |
+
{
|
7653 |
+
"epoch": 0.04231692968894334,
|
7654 |
+
"grad_norm": 0.15159229934215546,
|
7655 |
+
"learning_rate": 2.7400450807686938e-05,
|
7656 |
+
"loss": 11.4748,
|
7657 |
+
"step": 1088
|
7658 |
+
},
|
7659 |
+
{
|
7660 |
+
"epoch": 0.04235582392578979,
|
7661 |
+
"grad_norm": 0.167379230260849,
|
7662 |
+
"learning_rate": 2.724858954128876e-05,
|
7663 |
+
"loss": 11.4262,
|
7664 |
+
"step": 1089
|
7665 |
+
},
|
7666 |
+
{
|
7667 |
+
"epoch": 0.04239471816263625,
|
7668 |
+
"grad_norm": 0.1294448971748352,
|
7669 |
+
"learning_rate": 2.7097083867585272e-05,
|
7670 |
+
"loss": 11.4893,
|
7671 |
+
"step": 1090
|
7672 |
+
},
|
7673 |
+
{
|
7674 |
+
"epoch": 0.042433612399482706,
|
7675 |
+
"grad_norm": 0.15913696587085724,
|
7676 |
+
"learning_rate": 2.694593452710249e-05,
|
7677 |
+
"loss": 11.4446,
|
7678 |
+
"step": 1091
|
7679 |
+
},
|
7680 |
+
{
|
7681 |
+
"epoch": 0.04247250663632916,
|
7682 |
+
"grad_norm": 0.1298837959766388,
|
7683 |
+
"learning_rate": 2.679514225862464e-05,
|
7684 |
+
"loss": 11.4545,
|
7685 |
+
"step": 1092
|
7686 |
+
},
|
7687 |
+
{
|
7688 |
+
"epoch": 0.04251140087317562,
|
7689 |
+
"grad_norm": 0.1965494006872177,
|
7690 |
+
"learning_rate": 2.664470779919087e-05,
|
7691 |
+
"loss": 11.4959,
|
7692 |
+
"step": 1093
|
7693 |
+
},
|
7694 |
+
{
|
7695 |
+
"epoch": 0.04255029511002207,
|
7696 |
+
"grad_norm": 0.12095270305871964,
|
7697 |
+
"learning_rate": 2.6494631884091235e-05,
|
7698 |
+
"loss": 11.4916,
|
7699 |
+
"step": 1094
|
7700 |
+
},
|
7701 |
+
{
|
7702 |
+
"epoch": 0.042589189346868526,
|
7703 |
+
"grad_norm": 0.13163423538208008,
|
7704 |
+
"learning_rate": 2.6344915246863412e-05,
|
7705 |
+
"loss": 11.4269,
|
7706 |
+
"step": 1095
|
7707 |
+
},
|
7708 |
+
{
|
7709 |
+
"epoch": 0.04262808358371498,
|
7710 |
+
"grad_norm": 0.20584173500537872,
|
7711 |
+
"learning_rate": 2.6195558619288995e-05,
|
7712 |
+
"loss": 11.4665,
|
7713 |
+
"step": 1096
|
7714 |
+
},
|
7715 |
+
{
|
7716 |
+
"epoch": 0.04266697782056144,
|
7717 |
+
"grad_norm": 0.09251202642917633,
|
7718 |
+
"learning_rate": 2.6046562731389912e-05,
|
7719 |
+
"loss": 11.4101,
|
7720 |
+
"step": 1097
|
7721 |
+
},
|
7722 |
+
{
|
7723 |
+
"epoch": 0.0427058720574079,
|
7724 |
+
"grad_norm": 0.1911085844039917,
|
7725 |
+
"learning_rate": 2.5897928311424902e-05,
|
7726 |
+
"loss": 11.4502,
|
7727 |
+
"step": 1098
|
7728 |
+
},
|
7729 |
+
{
|
7730 |
+
"epoch": 0.04274476629425435,
|
7731 |
+
"grad_norm": 0.11317852139472961,
|
7732 |
+
"learning_rate": 2.5749656085885896e-05,
|
7733 |
+
"loss": 11.4431,
|
7734 |
+
"step": 1099
|
7735 |
+
},
|
7736 |
+
{
|
7737 |
+
"epoch": 0.0427836605311008,
|
7738 |
+
"grad_norm": 0.1697409600019455,
|
7739 |
+
"learning_rate": 2.5601746779494563e-05,
|
7740 |
+
"loss": 11.4348,
|
7741 |
+
"step": 1100
|
7742 |
+
},
|
7743 |
+
{
|
7744 |
+
"epoch": 0.04282255476794726,
|
7745 |
+
"grad_norm": 0.25867024064064026,
|
7746 |
+
"learning_rate": 2.545420111519855e-05,
|
7747 |
+
"loss": 11.4292,
|
7748 |
+
"step": 1101
|
7749 |
+
},
|
7750 |
+
{
|
7751 |
+
"epoch": 0.04286144900479372,
|
7752 |
+
"grad_norm": 0.15417879819869995,
|
7753 |
+
"learning_rate": 2.5307019814168342e-05,
|
7754 |
+
"loss": 11.4815,
|
7755 |
+
"step": 1102
|
7756 |
+
},
|
7757 |
+
{
|
7758 |
+
"epoch": 0.04290034324164017,
|
7759 |
+
"grad_norm": 0.18279899656772614,
|
7760 |
+
"learning_rate": 2.5160203595793273e-05,
|
7761 |
+
"loss": 11.4492,
|
7762 |
+
"step": 1103
|
7763 |
+
},
|
7764 |
+
{
|
7765 |
+
"epoch": 0.042939237478486623,
|
7766 |
+
"grad_norm": 0.13540595769882202,
|
7767 |
+
"learning_rate": 2.5013753177678323e-05,
|
7768 |
+
"loss": 11.4151,
|
7769 |
+
"step": 1104
|
7770 |
+
},
|
7771 |
+
{
|
7772 |
+
"epoch": 0.04297813171533308,
|
7773 |
+
"grad_norm": 0.14286018908023834,
|
7774 |
+
"learning_rate": 2.4867669275640616e-05,
|
7775 |
+
"loss": 11.4201,
|
7776 |
+
"step": 1105
|
7777 |
+
},
|
7778 |
+
{
|
7779 |
+
"epoch": 0.04301702595217954,
|
7780 |
+
"grad_norm": 0.16803769767284393,
|
7781 |
+
"learning_rate": 2.4721952603705657e-05,
|
7782 |
+
"loss": 11.4276,
|
7783 |
+
"step": 1106
|
7784 |
+
},
|
7785 |
+
{
|
7786 |
+
"epoch": 0.043055920189025994,
|
7787 |
+
"grad_norm": 0.09364805370569229,
|
7788 |
+
"learning_rate": 2.457660387410411e-05,
|
7789 |
+
"loss": 11.409,
|
7790 |
+
"step": 1107
|
7791 |
+
},
|
7792 |
+
{
|
7793 |
+
"epoch": 0.043094814425872444,
|
7794 |
+
"grad_norm": 0.17238271236419678,
|
7795 |
+
"learning_rate": 2.44316237972682e-05,
|
7796 |
+
"loss": 11.4356,
|
7797 |
+
"step": 1108
|
7798 |
+
},
|
7799 |
+
{
|
7800 |
+
"epoch": 0.0431337086627189,
|
7801 |
+
"grad_norm": 0.2280428111553192,
|
7802 |
+
"learning_rate": 2.4287013081828257e-05,
|
7803 |
+
"loss": 11.4807,
|
7804 |
+
"step": 1109
|
7805 |
+
},
|
7806 |
+
{
|
7807 |
+
"epoch": 0.04317260289956536,
|
7808 |
+
"grad_norm": 0.14022088050842285,
|
7809 |
+
"learning_rate": 2.4142772434609273e-05,
|
7810 |
+
"loss": 11.4843,
|
7811 |
+
"step": 1110
|
7812 |
+
},
|
7813 |
+
{
|
7814 |
+
"epoch": 0.043211497136411814,
|
7815 |
+
"grad_norm": 0.1371413767337799,
|
7816 |
+
"learning_rate": 2.39989025606274e-05,
|
7817 |
+
"loss": 11.4181,
|
7818 |
+
"step": 1111
|
7819 |
+
},
|
7820 |
+
{
|
7821 |
+
"epoch": 0.04325039137325827,
|
7822 |
+
"grad_norm": 0.309488981962204,
|
7823 |
+
"learning_rate": 2.3855404163086558e-05,
|
7824 |
+
"loss": 11.4803,
|
7825 |
+
"step": 1112
|
7826 |
+
},
|
7827 |
+
{
|
7828 |
+
"epoch": 0.04328928561010472,
|
7829 |
+
"grad_norm": 0.28754037618637085,
|
7830 |
+
"learning_rate": 2.371227794337495e-05,
|
7831 |
+
"loss": 11.6085,
|
7832 |
+
"step": 1113
|
7833 |
+
},
|
7834 |
+
{
|
7835 |
+
"epoch": 0.04332817984695118,
|
7836 |
+
"grad_norm": 0.21863828599452972,
|
7837 |
+
"learning_rate": 2.356952460106169e-05,
|
7838 |
+
"loss": 11.4461,
|
7839 |
+
"step": 1114
|
7840 |
+
},
|
7841 |
+
{
|
7842 |
+
"epoch": 0.043367074083797634,
|
7843 |
+
"grad_norm": 0.28783923387527466,
|
7844 |
+
"learning_rate": 2.342714483389329e-05,
|
7845 |
+
"loss": 11.4747,
|
7846 |
+
"step": 1115
|
7847 |
+
},
|
7848 |
+
{
|
7849 |
+
"epoch": 0.04340596832064409,
|
7850 |
+
"grad_norm": 0.17284227907657623,
|
7851 |
+
"learning_rate": 2.328513933779034e-05,
|
7852 |
+
"loss": 11.4825,
|
7853 |
+
"step": 1116
|
7854 |
+
},
|
7855 |
+
{
|
7856 |
+
"epoch": 0.04344486255749054,
|
7857 |
+
"grad_norm": 0.14010822772979736,
|
7858 |
+
"learning_rate": 2.314350880684416e-05,
|
7859 |
+
"loss": 11.4157,
|
7860 |
+
"step": 1117
|
7861 |
+
},
|
7862 |
+
{
|
7863 |
+
"epoch": 0.043483756794337,
|
7864 |
+
"grad_norm": 0.12766648828983307,
|
7865 |
+
"learning_rate": 2.3002253933313177e-05,
|
7866 |
+
"loss": 11.4398,
|
7867 |
+
"step": 1118
|
7868 |
+
},
|
7869 |
+
{
|
7870 |
+
"epoch": 0.043522651031183454,
|
7871 |
+
"grad_norm": 0.16336886584758759,
|
7872 |
+
"learning_rate": 2.286137540761979e-05,
|
7873 |
+
"loss": 11.4232,
|
7874 |
+
"step": 1119
|
7875 |
+
},
|
7876 |
+
{
|
7877 |
+
"epoch": 0.04356154526802991,
|
7878 |
+
"grad_norm": 0.14002946019172668,
|
7879 |
+
"learning_rate": 2.272087391834684e-05,
|
7880 |
+
"loss": 11.4272,
|
7881 |
+
"step": 1120
|
7882 |
+
},
|
7883 |
+
{
|
7884 |
+
"epoch": 0.04360043950487637,
|
7885 |
+
"grad_norm": 0.1765531450510025,
|
7886 |
+
"learning_rate": 2.2580750152234354e-05,
|
7887 |
+
"loss": 11.5226,
|
7888 |
+
"step": 1121
|
7889 |
+
},
|
7890 |
+
{
|
7891 |
+
"epoch": 0.04363933374172282,
|
7892 |
+
"grad_norm": 0.13177621364593506,
|
7893 |
+
"learning_rate": 2.2441004794176067e-05,
|
7894 |
+
"loss": 11.418,
|
7895 |
+
"step": 1122
|
7896 |
+
},
|
7897 |
+
{
|
7898 |
+
"epoch": 0.043678227978569274,
|
7899 |
+
"grad_norm": 0.2038862705230713,
|
7900 |
+
"learning_rate": 2.2301638527216194e-05,
|
7901 |
+
"loss": 11.4719,
|
7902 |
+
"step": 1123
|
7903 |
+
},
|
7904 |
+
{
|
7905 |
+
"epoch": 0.04371712221541573,
|
7906 |
+
"grad_norm": 0.13038241863250732,
|
7907 |
+
"learning_rate": 2.2162652032546007e-05,
|
7908 |
+
"loss": 11.4537,
|
7909 |
+
"step": 1124
|
7910 |
+
},
|
7911 |
+
{
|
7912 |
+
"epoch": 0.04375601645226219,
|
7913 |
+
"grad_norm": 0.2617287337779999,
|
7914 |
+
"learning_rate": 2.2024045989500542e-05,
|
7915 |
+
"loss": 11.5186,
|
7916 |
+
"step": 1125
|
7917 |
+
},
|
7918 |
+
{
|
7919 |
+
"epoch": 0.043794910689108645,
|
7920 |
+
"grad_norm": 0.1686462163925171,
|
7921 |
+
"learning_rate": 2.1885821075555302e-05,
|
7922 |
+
"loss": 11.4127,
|
7923 |
+
"step": 1126
|
7924 |
+
},
|
7925 |
+
{
|
7926 |
+
"epoch": 0.043833804925955094,
|
7927 |
+
"grad_norm": 0.161709725856781,
|
7928 |
+
"learning_rate": 2.174797796632281e-05,
|
7929 |
+
"loss": 11.4865,
|
7930 |
+
"step": 1127
|
7931 |
+
},
|
7932 |
+
{
|
7933 |
+
"epoch": 0.04387269916280155,
|
7934 |
+
"grad_norm": 0.13687454164028168,
|
7935 |
+
"learning_rate": 2.1610517335549563e-05,
|
7936 |
+
"loss": 11.4135,
|
7937 |
+
"step": 1128
|
7938 |
+
},
|
7939 |
+
{
|
7940 |
+
"epoch": 0.04391159339964801,
|
7941 |
+
"grad_norm": 0.1115826964378357,
|
7942 |
+
"learning_rate": 2.147343985511253e-05,
|
7943 |
+
"loss": 11.4259,
|
7944 |
+
"step": 1129
|
7945 |
+
},
|
7946 |
+
{
|
7947 |
+
"epoch": 0.043950487636494465,
|
7948 |
+
"grad_norm": 0.11887865513563156,
|
7949 |
+
"learning_rate": 2.1336746195015846e-05,
|
7950 |
+
"loss": 11.4208,
|
7951 |
+
"step": 1130
|
7952 |
+
},
|
7953 |
+
{
|
7954 |
+
"epoch": 0.043989381873340914,
|
7955 |
+
"grad_norm": 0.1860654354095459,
|
7956 |
+
"learning_rate": 2.120043702338772e-05,
|
7957 |
+
"loss": 11.5272,
|
7958 |
+
"step": 1131
|
7959 |
+
},
|
7960 |
+
{
|
7961 |
+
"epoch": 0.04402827611018737,
|
7962 |
+
"grad_norm": 0.1269589364528656,
|
7963 |
+
"learning_rate": 2.1064513006477017e-05,
|
7964 |
+
"loss": 11.4515,
|
7965 |
+
"step": 1132
|
7966 |
+
},
|
7967 |
+
{
|
7968 |
+
"epoch": 0.04406717034703383,
|
7969 |
+
"grad_norm": 0.15289169549942017,
|
7970 |
+
"learning_rate": 2.092897480865008e-05,
|
7971 |
+
"loss": 11.4436,
|
7972 |
+
"step": 1133
|
7973 |
+
},
|
7974 |
+
{
|
7975 |
+
"epoch": 0.044106064583880285,
|
7976 |
+
"grad_norm": 0.15345978736877441,
|
7977 |
+
"learning_rate": 2.0793823092387432e-05,
|
7978 |
+
"loss": 11.4424,
|
7979 |
+
"step": 1134
|
7980 |
+
},
|
7981 |
+
{
|
7982 |
+
"epoch": 0.04414495882072674,
|
7983 |
+
"grad_norm": 0.12720987200737,
|
7984 |
+
"learning_rate": 2.065905851828056e-05,
|
7985 |
+
"loss": 11.4425,
|
7986 |
+
"step": 1135
|
7987 |
+
},
|
7988 |
+
{
|
7989 |
+
"epoch": 0.04418385305757319,
|
7990 |
+
"grad_norm": 0.13926883041858673,
|
7991 |
+
"learning_rate": 2.0524681745028708e-05,
|
7992 |
+
"loss": 11.4444,
|
7993 |
+
"step": 1136
|
7994 |
+
},
|
7995 |
+
{
|
7996 |
+
"epoch": 0.04422274729441965,
|
7997 |
+
"grad_norm": 0.1361120641231537,
|
7998 |
+
"learning_rate": 2.0390693429435627e-05,
|
7999 |
+
"loss": 11.4485,
|
8000 |
+
"step": 1137
|
8001 |
+
},
|
8002 |
+
{
|
8003 |
+
"epoch": 0.044261641531266105,
|
8004 |
+
"grad_norm": 0.17767587304115295,
|
8005 |
+
"learning_rate": 2.025709422640637e-05,
|
8006 |
+
"loss": 11.4634,
|
8007 |
+
"step": 1138
|
8008 |
+
},
|
8009 |
+
{
|
8010 |
+
"epoch": 0.04430053576811256,
|
8011 |
+
"grad_norm": 0.20594410598278046,
|
8012 |
+
"learning_rate": 2.0123884788944036e-05,
|
8013 |
+
"loss": 11.4492,
|
8014 |
+
"step": 1139
|
8015 |
+
},
|
8016 |
+
{
|
8017 |
+
"epoch": 0.04433943000495902,
|
8018 |
+
"grad_norm": 0.21684671938419342,
|
8019 |
+
"learning_rate": 1.9991065768146787e-05,
|
8020 |
+
"loss": 11.4779,
|
8021 |
+
"step": 1140
|
8022 |
+
},
|
8023 |
+
{
|
8024 |
+
"epoch": 0.04437832424180547,
|
8025 |
+
"grad_norm": 0.11622948944568634,
|
8026 |
+
"learning_rate": 1.985863781320435e-05,
|
8027 |
+
"loss": 11.4141,
|
8028 |
+
"step": 1141
|
8029 |
+
},
|
8030 |
+
{
|
8031 |
+
"epoch": 0.044417218478651925,
|
8032 |
+
"grad_norm": 0.21400313079357147,
|
8033 |
+
"learning_rate": 1.9726601571395075e-05,
|
8034 |
+
"loss": 11.4662,
|
8035 |
+
"step": 1142
|
8036 |
+
},
|
8037 |
+
{
|
8038 |
+
"epoch": 0.04445611271549838,
|
8039 |
+
"grad_norm": 0.19747650623321533,
|
8040 |
+
"learning_rate": 1.9594957688082793e-05,
|
8041 |
+
"loss": 11.4697,
|
8042 |
+
"step": 1143
|
8043 |
+
},
|
8044 |
+
{
|
8045 |
+
"epoch": 0.04449500695234484,
|
8046 |
+
"grad_norm": 0.16535556316375732,
|
8047 |
+
"learning_rate": 1.946370680671341e-05,
|
8048 |
+
"loss": 11.4289,
|
8049 |
+
"step": 1144
|
8050 |
+
},
|
8051 |
+
{
|
8052 |
+
"epoch": 0.04453390118919129,
|
8053 |
+
"grad_norm": 0.10678819566965103,
|
8054 |
+
"learning_rate": 1.933284956881204e-05,
|
8055 |
+
"loss": 11.4269,
|
8056 |
+
"step": 1145
|
8057 |
+
},
|
8058 |
+
{
|
8059 |
+
"epoch": 0.044572795426037745,
|
8060 |
+
"grad_norm": 0.1680564284324646,
|
8061 |
+
"learning_rate": 1.920238661397972e-05,
|
8062 |
+
"loss": 11.4831,
|
8063 |
+
"step": 1146
|
8064 |
+
},
|
8065 |
+
{
|
8066 |
+
"epoch": 0.0446116896628842,
|
8067 |
+
"grad_norm": 0.20117631554603577,
|
8068 |
+
"learning_rate": 1.9072318579890326e-05,
|
8069 |
+
"loss": 11.4743,
|
8070 |
+
"step": 1147
|
8071 |
+
},
|
8072 |
+
{
|
8073 |
+
"epoch": 0.04465058389973066,
|
8074 |
+
"grad_norm": 0.1598464548587799,
|
8075 |
+
"learning_rate": 1.894264610228744e-05,
|
8076 |
+
"loss": 11.4333,
|
8077 |
+
"step": 1148
|
8078 |
+
},
|
8079 |
+
{
|
8080 |
+
"epoch": 0.044689478136577115,
|
8081 |
+
"grad_norm": 0.13115167617797852,
|
8082 |
+
"learning_rate": 1.8813369814981275e-05,
|
8083 |
+
"loss": 11.4237,
|
8084 |
+
"step": 1149
|
8085 |
+
},
|
8086 |
+
{
|
8087 |
+
"epoch": 0.044728372373423565,
|
8088 |
+
"grad_norm": 0.13870635628700256,
|
8089 |
+
"learning_rate": 1.868449034984554e-05,
|
8090 |
+
"loss": 11.4444,
|
8091 |
+
"step": 1150
|
8092 |
+
},
|
8093 |
+
{
|
8094 |
+
"epoch": 0.04476726661027002,
|
8095 |
+
"grad_norm": 0.16505026817321777,
|
8096 |
+
"learning_rate": 1.85560083368143e-05,
|
8097 |
+
"loss": 11.4504,
|
8098 |
+
"step": 1151
|
8099 |
+
},
|
8100 |
+
{
|
8101 |
+
"epoch": 0.04480616084711648,
|
8102 |
+
"grad_norm": 0.12766501307487488,
|
8103 |
+
"learning_rate": 1.8427924403879115e-05,
|
8104 |
+
"loss": 11.4517,
|
8105 |
+
"step": 1152
|
8106 |
+
},
|
8107 |
+
{
|
8108 |
+
"epoch": 0.044845055083962936,
|
8109 |
+
"grad_norm": 0.18523377180099487,
|
8110 |
+
"learning_rate": 1.8300239177085676e-05,
|
8111 |
+
"loss": 11.4428,
|
8112 |
+
"step": 1153
|
8113 |
+
},
|
8114 |
+
{
|
8115 |
+
"epoch": 0.04488394932080939,
|
8116 |
+
"grad_norm": 0.21062421798706055,
|
8117 |
+
"learning_rate": 1.8172953280530914e-05,
|
8118 |
+
"loss": 11.4629,
|
8119 |
+
"step": 1154
|
8120 |
+
},
|
8121 |
+
{
|
8122 |
+
"epoch": 0.04492284355765584,
|
8123 |
+
"grad_norm": 0.16536962985992432,
|
8124 |
+
"learning_rate": 1.804606733636004e-05,
|
8125 |
+
"loss": 11.4956,
|
8126 |
+
"step": 1155
|
8127 |
+
},
|
8128 |
+
{
|
8129 |
+
"epoch": 0.0449617377945023,
|
8130 |
+
"grad_norm": 0.15244871377944946,
|
8131 |
+
"learning_rate": 1.791958196476321e-05,
|
8132 |
+
"loss": 11.4157,
|
8133 |
+
"step": 1156
|
8134 |
+
},
|
8135 |
+
{
|
8136 |
+
"epoch": 0.045000632031348756,
|
8137 |
+
"grad_norm": 0.1967291533946991,
|
8138 |
+
"learning_rate": 1.779349778397279e-05,
|
8139 |
+
"loss": 11.4396,
|
8140 |
+
"step": 1157
|
8141 |
+
},
|
8142 |
+
{
|
8143 |
+
"epoch": 0.04503952626819521,
|
8144 |
+
"grad_norm": 0.2290966659784317,
|
8145 |
+
"learning_rate": 1.766781541026018e-05,
|
8146 |
+
"loss": 11.4687,
|
8147 |
+
"step": 1158
|
8148 |
+
},
|
8149 |
+
{
|
8150 |
+
"epoch": 0.04507842050504166,
|
8151 |
+
"grad_norm": 0.1414095014333725,
|
8152 |
+
"learning_rate": 1.754253545793285e-05,
|
8153 |
+
"loss": 11.4353,
|
8154 |
+
"step": 1159
|
8155 |
+
},
|
8156 |
+
{
|
8157 |
+
"epoch": 0.04511731474188812,
|
8158 |
+
"grad_norm": 0.14760176837444305,
|
8159 |
+
"learning_rate": 1.741765853933125e-05,
|
8160 |
+
"loss": 11.4339,
|
8161 |
+
"step": 1160
|
8162 |
+
},
|
8163 |
+
{
|
8164 |
+
"epoch": 0.045156208978734576,
|
8165 |
+
"grad_norm": 0.12192599475383759,
|
8166 |
+
"learning_rate": 1.7293185264826018e-05,
|
8167 |
+
"loss": 11.4519,
|
8168 |
+
"step": 1161
|
8169 |
+
},
|
8170 |
+
{
|
8171 |
+
"epoch": 0.04519510321558103,
|
8172 |
+
"grad_norm": 0.11039043962955475,
|
8173 |
+
"learning_rate": 1.7169116242814796e-05,
|
8174 |
+
"loss": 11.4252,
|
8175 |
+
"step": 1162
|
8176 |
+
},
|
8177 |
+
{
|
8178 |
+
"epoch": 0.04523399745242749,
|
8179 |
+
"grad_norm": 0.15859828889369965,
|
8180 |
+
"learning_rate": 1.7045452079719282e-05,
|
8181 |
+
"loss": 11.4189,
|
8182 |
+
"step": 1163
|
8183 |
+
},
|
8184 |
+
{
|
8185 |
+
"epoch": 0.04527289168927394,
|
8186 |
+
"grad_norm": 0.16476251184940338,
|
8187 |
+
"learning_rate": 1.6922193379982453e-05,
|
8188 |
+
"loss": 11.4449,
|
8189 |
+
"step": 1164
|
8190 |
+
},
|
8191 |
+
{
|
8192 |
+
"epoch": 0.045311785926120396,
|
8193 |
+
"grad_norm": 0.24089524149894714,
|
8194 |
+
"learning_rate": 1.679934074606533e-05,
|
8195 |
+
"loss": 11.4579,
|
8196 |
+
"step": 1165
|
8197 |
+
},
|
8198 |
+
{
|
8199 |
+
"epoch": 0.04535068016296685,
|
8200 |
+
"grad_norm": 0.14605370163917542,
|
8201 |
+
"learning_rate": 1.6676894778444207e-05,
|
8202 |
+
"loss": 11.4767,
|
8203 |
+
"step": 1166
|
8204 |
+
},
|
8205 |
+
{
|
8206 |
+
"epoch": 0.04538957439981331,
|
8207 |
+
"grad_norm": 0.15800780057907104,
|
8208 |
+
"learning_rate": 1.6554856075607793e-05,
|
8209 |
+
"loss": 11.4737,
|
8210 |
+
"step": 1167
|
8211 |
+
},
|
8212 |
+
{
|
8213 |
+
"epoch": 0.045428468636659766,
|
8214 |
+
"grad_norm": 0.1213153675198555,
|
8215 |
+
"learning_rate": 1.6433225234054027e-05,
|
8216 |
+
"loss": 11.4345,
|
8217 |
+
"step": 1168
|
8218 |
+
},
|
8219 |
+
{
|
8220 |
+
"epoch": 0.045467362873506216,
|
8221 |
+
"grad_norm": 0.18790219724178314,
|
8222 |
+
"learning_rate": 1.63120028482874e-05,
|
8223 |
+
"loss": 11.4845,
|
8224 |
+
"step": 1169
|
8225 |
+
},
|
8226 |
+
{
|
8227 |
+
"epoch": 0.04550625711035267,
|
8228 |
+
"grad_norm": 0.12065570801496506,
|
8229 |
+
"learning_rate": 1.619118951081594e-05,
|
8230 |
+
"loss": 11.4331,
|
8231 |
+
"step": 1170
|
8232 |
+
},
|
8233 |
+
{
|
8234 |
+
"epoch": 0.04554515134719913,
|
8235 |
+
"grad_norm": 0.18314050137996674,
|
8236 |
+
"learning_rate": 1.607078581214836e-05,
|
8237 |
+
"loss": 11.4867,
|
8238 |
+
"step": 1171
|
8239 |
+
},
|
8240 |
+
{
|
8241 |
+
"epoch": 0.045584045584045586,
|
8242 |
+
"grad_norm": 0.12358561158180237,
|
8243 |
+
"learning_rate": 1.5950792340791043e-05,
|
8244 |
+
"loss": 11.4415,
|
8245 |
+
"step": 1172
|
8246 |
+
},
|
8247 |
+
{
|
8248 |
+
"epoch": 0.045622939820892036,
|
8249 |
+
"grad_norm": 0.16861197352409363,
|
8250 |
+
"learning_rate": 1.5831209683245462e-05,
|
8251 |
+
"loss": 11.4463,
|
8252 |
+
"step": 1173
|
8253 |
+
},
|
8254 |
+
{
|
8255 |
+
"epoch": 0.04566183405773849,
|
8256 |
+
"grad_norm": 0.117819644510746,
|
8257 |
+
"learning_rate": 1.5712038424004993e-05,
|
8258 |
+
"loss": 11.4314,
|
8259 |
+
"step": 1174
|
8260 |
+
},
|
8261 |
+
{
|
8262 |
+
"epoch": 0.04570072829458495,
|
8263 |
+
"grad_norm": 0.1934998482465744,
|
8264 |
+
"learning_rate": 1.5593279145552164e-05,
|
8265 |
+
"loss": 11.4269,
|
8266 |
+
"step": 1175
|
8267 |
+
},
|
8268 |
+
{
|
8269 |
+
"epoch": 0.045739622531431406,
|
8270 |
+
"grad_norm": 0.0948733314871788,
|
8271 |
+
"learning_rate": 1.547493242835596e-05,
|
8272 |
+
"loss": 11.4259,
|
8273 |
+
"step": 1176
|
8274 |
+
},
|
8275 |
+
{
|
8276 |
+
"epoch": 0.04577851676827786,
|
8277 |
+
"grad_norm": 0.11361514031887054,
|
8278 |
+
"learning_rate": 1.535699885086872e-05,
|
8279 |
+
"loss": 11.4504,
|
8280 |
+
"step": 1177
|
8281 |
+
},
|
8282 |
+
{
|
8283 |
+
"epoch": 0.04581741100512431,
|
8284 |
+
"grad_norm": 0.18907108902931213,
|
8285 |
+
"learning_rate": 1.5239478989523525e-05,
|
8286 |
+
"loss": 11.4305,
|
8287 |
+
"step": 1178
|
8288 |
+
},
|
8289 |
+
{
|
8290 |
+
"epoch": 0.04585630524197077,
|
8291 |
+
"grad_norm": 0.2364674061536789,
|
8292 |
+
"learning_rate": 1.5122373418731306e-05,
|
8293 |
+
"loss": 11.4336,
|
8294 |
+
"step": 1179
|
8295 |
+
},
|
8296 |
+
{
|
8297 |
+
"epoch": 0.04589519947881723,
|
8298 |
+
"grad_norm": 0.1388162076473236,
|
8299 |
+
"learning_rate": 1.500568271087801e-05,
|
8300 |
+
"loss": 11.4671,
|
8301 |
+
"step": 1180
|
8302 |
+
},
|
8303 |
+
{
|
8304 |
+
"epoch": 0.04593409371566368,
|
8305 |
+
"grad_norm": 0.17299380898475647,
|
8306 |
+
"learning_rate": 1.4889407436321822e-05,
|
8307 |
+
"loss": 11.4643,
|
8308 |
+
"step": 1181
|
8309 |
+
},
|
8310 |
+
{
|
8311 |
+
"epoch": 0.04597298795251014,
|
8312 |
+
"grad_norm": 0.10393727570772171,
|
8313 |
+
"learning_rate": 1.4773548163390406e-05,
|
8314 |
+
"loss": 11.4679,
|
8315 |
+
"step": 1182
|
8316 |
+
},
|
8317 |
+
{
|
8318 |
+
"epoch": 0.04601188218935659,
|
8319 |
+
"grad_norm": 0.19474563002586365,
|
8320 |
+
"learning_rate": 1.4658105458378113e-05,
|
8321 |
+
"loss": 11.4569,
|
8322 |
+
"step": 1183
|
8323 |
+
},
|
8324 |
+
{
|
8325 |
+
"epoch": 0.04605077642620305,
|
8326 |
+
"grad_norm": 0.15817129611968994,
|
8327 |
+
"learning_rate": 1.4543079885543098e-05,
|
8328 |
+
"loss": 11.4179,
|
8329 |
+
"step": 1184
|
8330 |
+
},
|
8331 |
+
{
|
8332 |
+
"epoch": 0.0460896706630495,
|
8333 |
+
"grad_norm": 0.15230096876621246,
|
8334 |
+
"learning_rate": 1.4428472007104832e-05,
|
8335 |
+
"loss": 11.4267,
|
8336 |
+
"step": 1185
|
8337 |
+
},
|
8338 |
+
{
|
8339 |
+
"epoch": 0.04612856489989596,
|
8340 |
+
"grad_norm": 0.17485696077346802,
|
8341 |
+
"learning_rate": 1.4314282383241096e-05,
|
8342 |
+
"loss": 11.4485,
|
8343 |
+
"step": 1186
|
8344 |
+
},
|
8345 |
+
{
|
8346 |
+
"epoch": 0.04616745913674241,
|
8347 |
+
"grad_norm": 0.12060049921274185,
|
8348 |
+
"learning_rate": 1.4200511572085274e-05,
|
8349 |
+
"loss": 11.4368,
|
8350 |
+
"step": 1187
|
8351 |
+
},
|
8352 |
+
{
|
8353 |
+
"epoch": 0.04620635337358887,
|
8354 |
+
"grad_norm": 0.150129035115242,
|
8355 |
+
"learning_rate": 1.4087160129723853e-05,
|
8356 |
+
"loss": 11.4684,
|
8357 |
+
"step": 1188
|
8358 |
+
},
|
8359 |
+
{
|
8360 |
+
"epoch": 0.046245247610435324,
|
8361 |
+
"grad_norm": 0.11091771721839905,
|
8362 |
+
"learning_rate": 1.3974228610193374e-05,
|
8363 |
+
"loss": 11.4294,
|
8364 |
+
"step": 1189
|
8365 |
+
},
|
8366 |
+
{
|
8367 |
+
"epoch": 0.04628414184728178,
|
8368 |
+
"grad_norm": 0.14679087698459625,
|
8369 |
+
"learning_rate": 1.3861717565477994e-05,
|
8370 |
+
"loss": 11.4625,
|
8371 |
+
"step": 1190
|
8372 |
+
},
|
8373 |
+
{
|
8374 |
+
"epoch": 0.04632303608412824,
|
8375 |
+
"grad_norm": 0.13646747171878815,
|
8376 |
+
"learning_rate": 1.3749627545506616e-05,
|
8377 |
+
"loss": 11.5092,
|
8378 |
+
"step": 1191
|
8379 |
+
},
|
8380 |
+
{
|
8381 |
+
"epoch": 0.04636193032097469,
|
8382 |
+
"grad_norm": 0.1430552899837494,
|
8383 |
+
"learning_rate": 1.363795909815032e-05,
|
8384 |
+
"loss": 11.457,
|
8385 |
+
"step": 1192
|
8386 |
+
},
|
8387 |
+
{
|
8388 |
+
"epoch": 0.046400824557821144,
|
8389 |
+
"grad_norm": 0.1363144963979721,
|
8390 |
+
"learning_rate": 1.3526712769219618e-05,
|
8391 |
+
"loss": 11.4246,
|
8392 |
+
"step": 1193
|
8393 |
+
},
|
8394 |
+
{
|
8395 |
+
"epoch": 0.0464397187946676,
|
8396 |
+
"grad_norm": 0.12670528888702393,
|
8397 |
+
"learning_rate": 1.3415889102461775e-05,
|
8398 |
+
"loss": 11.4318,
|
8399 |
+
"step": 1194
|
8400 |
+
},
|
8401 |
+
{
|
8402 |
+
"epoch": 0.04647861303151406,
|
8403 |
+
"grad_norm": 0.3385750651359558,
|
8404 |
+
"learning_rate": 1.3305488639558206e-05,
|
8405 |
+
"loss": 11.5832,
|
8406 |
+
"step": 1195
|
8407 |
+
},
|
8408 |
+
{
|
8409 |
+
"epoch": 0.046517507268360514,
|
8410 |
+
"grad_norm": 0.2261715829372406,
|
8411 |
+
"learning_rate": 1.3195511920121795e-05,
|
8412 |
+
"loss": 11.5506,
|
8413 |
+
"step": 1196
|
8414 |
+
},
|
8415 |
+
{
|
8416 |
+
"epoch": 0.046556401505206964,
|
8417 |
+
"grad_norm": 0.24841472506523132,
|
8418 |
+
"learning_rate": 1.3085959481694265e-05,
|
8419 |
+
"loss": 11.4581,
|
8420 |
+
"step": 1197
|
8421 |
+
},
|
8422 |
+
{
|
8423 |
+
"epoch": 0.04659529574205342,
|
8424 |
+
"grad_norm": 0.18506531417369843,
|
8425 |
+
"learning_rate": 1.2976831859743521e-05,
|
8426 |
+
"loss": 11.4293,
|
8427 |
+
"step": 1198
|
8428 |
+
},
|
8429 |
+
{
|
8430 |
+
"epoch": 0.04663418997889988,
|
8431 |
+
"grad_norm": 0.15063372254371643,
|
8432 |
+
"learning_rate": 1.286812958766106e-05,
|
8433 |
+
"loss": 11.4309,
|
8434 |
+
"step": 1199
|
8435 |
+
},
|
8436 |
+
{
|
8437 |
+
"epoch": 0.046673084215746334,
|
8438 |
+
"grad_norm": 0.24693375825881958,
|
8439 |
+
"learning_rate": 1.2759853196759453e-05,
|
8440 |
+
"loss": 11.5344,
|
8441 |
+
"step": 1200
|
8442 |
+
},
|
8443 |
+
{
|
8444 |
+
"epoch": 0.046711978452592784,
|
8445 |
+
"grad_norm": 0.12466446310281754,
|
8446 |
+
"learning_rate": 1.2652003216269526e-05,
|
8447 |
+
"loss": 11.4251,
|
8448 |
+
"step": 1201
|
8449 |
+
},
|
8450 |
+
{
|
8451 |
+
"epoch": 0.04675087268943924,
|
8452 |
+
"grad_norm": 0.29291486740112305,
|
8453 |
+
"learning_rate": 1.2544580173337983e-05,
|
8454 |
+
"loss": 11.6883,
|
8455 |
+
"step": 1202
|
8456 |
+
},
|
8457 |
+
{
|
8458 |
+
"epoch": 0.0467897669262857,
|
8459 |
+
"grad_norm": 0.1630331128835678,
|
8460 |
+
"learning_rate": 1.2437584593024753e-05,
|
8461 |
+
"loss": 11.4409,
|
8462 |
+
"step": 1203
|
8463 |
+
},
|
8464 |
+
{
|
8465 |
+
"epoch": 0.046828661163132154,
|
8466 |
+
"grad_norm": 0.18174372613430023,
|
8467 |
+
"learning_rate": 1.2331016998300394e-05,
|
8468 |
+
"loss": 11.4636,
|
8469 |
+
"step": 1204
|
8470 |
+
},
|
8471 |
+
{
|
8472 |
+
"epoch": 0.04686755539997861,
|
8473 |
+
"grad_norm": 0.1426887810230255,
|
8474 |
+
"learning_rate": 1.2224877910043587e-05,
|
8475 |
+
"loss": 11.4672,
|
8476 |
+
"step": 1205
|
8477 |
+
},
|
8478 |
+
{
|
8479 |
+
"epoch": 0.04690644963682506,
|
8480 |
+
"grad_norm": 0.10058625787496567,
|
8481 |
+
"learning_rate": 1.2119167847038548e-05,
|
8482 |
+
"loss": 11.4118,
|
8483 |
+
"step": 1206
|
8484 |
+
},
|
8485 |
+
{
|
8486 |
+
"epoch": 0.04694534387367152,
|
8487 |
+
"grad_norm": 0.15972256660461426,
|
8488 |
+
"learning_rate": 1.201388732597255e-05,
|
8489 |
+
"loss": 11.4379,
|
8490 |
+
"step": 1207
|
8491 |
+
},
|
8492 |
+
{
|
8493 |
+
"epoch": 0.046984238110517974,
|
8494 |
+
"grad_norm": 0.20637661218643188,
|
8495 |
+
"learning_rate": 1.190903686143332e-05,
|
8496 |
+
"loss": 11.4773,
|
8497 |
+
"step": 1208
|
8498 |
+
},
|
8499 |
+
{
|
8500 |
+
"epoch": 0.04702313234736443,
|
8501 |
+
"grad_norm": 0.2554698884487152,
|
8502 |
+
"learning_rate": 1.18046169659066e-05,
|
8503 |
+
"loss": 11.4568,
|
8504 |
+
"step": 1209
|
8505 |
+
},
|
8506 |
+
{
|
8507 |
+
"epoch": 0.04706202658421089,
|
8508 |
+
"grad_norm": 0.17103084921836853,
|
8509 |
+
"learning_rate": 1.170062814977354e-05,
|
8510 |
+
"loss": 11.4325,
|
8511 |
+
"step": 1210
|
8512 |
+
},
|
8513 |
+
{
|
8514 |
+
"epoch": 0.04710092082105734,
|
8515 |
+
"grad_norm": 0.15731778740882874,
|
8516 |
+
"learning_rate": 1.1597070921308363e-05,
|
8517 |
+
"loss": 11.4215,
|
8518 |
+
"step": 1211
|
8519 |
+
},
|
8520 |
+
{
|
8521 |
+
"epoch": 0.047139815057903794,
|
8522 |
+
"grad_norm": 0.1296573281288147,
|
8523 |
+
"learning_rate": 1.1493945786675753e-05,
|
8524 |
+
"loss": 11.4338,
|
8525 |
+
"step": 1212
|
8526 |
+
},
|
8527 |
+
{
|
8528 |
+
"epoch": 0.04717870929475025,
|
8529 |
+
"grad_norm": 0.1563749611377716,
|
8530 |
+
"learning_rate": 1.1391253249928369e-05,
|
8531 |
+
"loss": 11.4196,
|
8532 |
+
"step": 1213
|
8533 |
+
},
|
8534 |
+
{
|
8535 |
+
"epoch": 0.04721760353159671,
|
8536 |
+
"grad_norm": 0.13780969381332397,
|
8537 |
+
"learning_rate": 1.1288993813004467e-05,
|
8538 |
+
"loss": 11.4687,
|
8539 |
+
"step": 1214
|
8540 |
+
},
|
8541 |
+
{
|
8542 |
+
"epoch": 0.04725649776844316,
|
8543 |
+
"grad_norm": 0.22211474180221558,
|
8544 |
+
"learning_rate": 1.118716797572542e-05,
|
8545 |
+
"loss": 11.4907,
|
8546 |
+
"step": 1215
|
8547 |
+
},
|
8548 |
+
{
|
8549 |
+
"epoch": 0.047295392005289615,
|
8550 |
+
"grad_norm": 0.1490112692117691,
|
8551 |
+
"learning_rate": 1.1085776235793243e-05,
|
8552 |
+
"loss": 11.4091,
|
8553 |
+
"step": 1216
|
8554 |
+
},
|
8555 |
+
{
|
8556 |
+
"epoch": 0.04733428624213607,
|
8557 |
+
"grad_norm": 0.11238259822130203,
|
8558 |
+
"learning_rate": 1.098481908878819e-05,
|
8559 |
+
"loss": 11.4216,
|
8560 |
+
"step": 1217
|
8561 |
+
},
|
8562 |
+
{
|
8563 |
+
"epoch": 0.04737318047898253,
|
8564 |
+
"grad_norm": 0.22447730600833893,
|
8565 |
+
"learning_rate": 1.0884297028166302e-05,
|
8566 |
+
"loss": 11.5614,
|
8567 |
+
"step": 1218
|
8568 |
+
},
|
8569 |
+
{
|
8570 |
+
"epoch": 0.047412074715828985,
|
8571 |
+
"grad_norm": 0.1197625920176506,
|
8572 |
+
"learning_rate": 1.0784210545257034e-05,
|
8573 |
+
"loss": 11.4529,
|
8574 |
+
"step": 1219
|
8575 |
+
},
|
8576 |
+
{
|
8577 |
+
"epoch": 0.047450968952675435,
|
8578 |
+
"grad_norm": 0.20383505523204803,
|
8579 |
+
"learning_rate": 1.0684560129260822e-05,
|
8580 |
+
"loss": 11.4606,
|
8581 |
+
"step": 1220
|
8582 |
+
},
|
8583 |
+
{
|
8584 |
+
"epoch": 0.04748986318952189,
|
8585 |
+
"grad_norm": 0.1828719973564148,
|
8586 |
+
"learning_rate": 1.0585346267246743e-05,
|
8587 |
+
"loss": 11.4802,
|
8588 |
+
"step": 1221
|
8589 |
+
},
|
8590 |
+
{
|
8591 |
+
"epoch": 0.04752875742636835,
|
8592 |
+
"grad_norm": 0.2332971841096878,
|
8593 |
+
"learning_rate": 1.0486569444149995e-05,
|
8594 |
+
"loss": 11.4597,
|
8595 |
+
"step": 1222
|
8596 |
+
},
|
8597 |
+
{
|
8598 |
+
"epoch": 0.047567651663214805,
|
8599 |
+
"grad_norm": 0.12918928265571594,
|
8600 |
+
"learning_rate": 1.038823014276975e-05,
|
8601 |
+
"loss": 11.4668,
|
8602 |
+
"step": 1223
|
8603 |
+
},
|
8604 |
+
{
|
8605 |
+
"epoch": 0.04760654590006126,
|
8606 |
+
"grad_norm": 0.20176751911640167,
|
8607 |
+
"learning_rate": 1.0290328843766628e-05,
|
8608 |
+
"loss": 11.4392,
|
8609 |
+
"step": 1224
|
8610 |
+
},
|
8611 |
+
{
|
8612 |
+
"epoch": 0.04764544013690771,
|
8613 |
+
"grad_norm": 0.1748267114162445,
|
8614 |
+
"learning_rate": 1.019286602566033e-05,
|
8615 |
+
"loss": 11.4722,
|
8616 |
+
"step": 1225
|
8617 |
+
},
|
8618 |
+
{
|
8619 |
+
"epoch": 0.04768433437375417,
|
8620 |
+
"grad_norm": 0.13155628740787506,
|
8621 |
+
"learning_rate": 1.009584216482743e-05,
|
8622 |
+
"loss": 11.4369,
|
8623 |
+
"step": 1226
|
8624 |
+
},
|
8625 |
+
{
|
8626 |
+
"epoch": 0.047723228610600625,
|
8627 |
+
"grad_norm": 0.15083764493465424,
|
8628 |
+
"learning_rate": 9.999257735498957e-06,
|
8629 |
+
"loss": 11.4321,
|
8630 |
+
"step": 1227
|
8631 |
+
},
|
8632 |
+
{
|
8633 |
+
"epoch": 0.04776212284744708,
|
8634 |
+
"grad_norm": 0.09894982725381851,
|
8635 |
+
"learning_rate": 9.903113209758096e-06,
|
8636 |
+
"loss": 11.4426,
|
8637 |
+
"step": 1228
|
8638 |
+
},
|
8639 |
+
{
|
8640 |
+
"epoch": 0.04780101708429353,
|
8641 |
+
"grad_norm": 0.1379556804895401,
|
8642 |
+
"learning_rate": 9.807409057537876e-06,
|
8643 |
+
"loss": 11.4429,
|
8644 |
+
"step": 1229
|
8645 |
+
},
|
8646 |
+
{
|
8647 |
+
"epoch": 0.04783991132113999,
|
8648 |
+
"grad_norm": 0.16551163792610168,
|
8649 |
+
"learning_rate": 9.712145746618873e-06,
|
8650 |
+
"loss": 11.4475,
|
8651 |
+
"step": 1230
|
8652 |
+
},
|
8653 |
+
{
|
8654 |
+
"epoch": 0.047878805557986445,
|
8655 |
+
"grad_norm": 0.15437820553779602,
|
8656 |
+
"learning_rate": 9.61732374262696e-06,
|
8657 |
+
"loss": 11.4187,
|
8658 |
+
"step": 1231
|
8659 |
+
},
|
8660 |
+
{
|
8661 |
+
"epoch": 0.0479176997948329,
|
8662 |
+
"grad_norm": 0.12910741567611694,
|
8663 |
+
"learning_rate": 9.522943509030968e-06,
|
8664 |
+
"loss": 11.4256,
|
8665 |
+
"step": 1232
|
8666 |
+
},
|
8667 |
+
{
|
8668 |
+
"epoch": 0.04795659403167936,
|
8669 |
+
"grad_norm": 0.09460493922233582,
|
8670 |
+
"learning_rate": 9.429005507140487e-06,
|
8671 |
+
"loss": 11.439,
|
8672 |
+
"step": 1233
|
8673 |
+
},
|
8674 |
+
{
|
8675 |
+
"epoch": 0.04799548826852581,
|
8676 |
+
"grad_norm": 0.16753201186656952,
|
8677 |
+
"learning_rate": 9.33551019610348e-06,
|
8678 |
+
"loss": 11.4761,
|
8679 |
+
"step": 1234
|
8680 |
+
},
|
8681 |
+
{
|
8682 |
+
"epoch": 0.048034382505372265,
|
8683 |
+
"grad_norm": 0.14802870154380798,
|
8684 |
+
"learning_rate": 9.242458032904311e-06,
|
8685 |
+
"loss": 11.4195,
|
8686 |
+
"step": 1235
|
8687 |
+
},
|
8688 |
+
{
|
8689 |
+
"epoch": 0.04807327674221872,
|
8690 |
+
"grad_norm": 0.20524270832538605,
|
8691 |
+
"learning_rate": 9.14984947236115e-06,
|
8692 |
+
"loss": 11.4556,
|
8693 |
+
"step": 1236
|
8694 |
+
},
|
8695 |
+
{
|
8696 |
+
"epoch": 0.04811217097906518,
|
8697 |
+
"grad_norm": 0.18146079778671265,
|
8698 |
+
"learning_rate": 9.057684967124036e-06,
|
8699 |
+
"loss": 11.439,
|
8700 |
+
"step": 1237
|
8701 |
+
},
|
8702 |
+
{
|
8703 |
+
"epoch": 0.048151065215911636,
|
8704 |
+
"grad_norm": 0.3100352883338928,
|
8705 |
+
"learning_rate": 8.96596496767259e-06,
|
8706 |
+
"loss": 11.5205,
|
8707 |
+
"step": 1238
|
8708 |
+
},
|
8709 |
+
{
|
8710 |
+
"epoch": 0.048189959452758085,
|
8711 |
+
"grad_norm": 0.13172343373298645,
|
8712 |
+
"learning_rate": 8.874689922313717e-06,
|
8713 |
+
"loss": 11.4617,
|
8714 |
+
"step": 1239
|
8715 |
+
},
|
8716 |
+
{
|
8717 |
+
"epoch": 0.04822885368960454,
|
8718 |
+
"grad_norm": 0.21646977961063385,
|
8719 |
+
"learning_rate": 8.7838602771795e-06,
|
8720 |
+
"loss": 11.5159,
|
8721 |
+
"step": 1240
|
8722 |
+
},
|
8723 |
+
{
|
8724 |
+
"epoch": 0.048267747926451,
|
8725 |
+
"grad_norm": 0.12774217128753662,
|
8726 |
+
"learning_rate": 8.693476476225037e-06,
|
8727 |
+
"loss": 11.4551,
|
8728 |
+
"step": 1241
|
8729 |
+
},
|
8730 |
+
{
|
8731 |
+
"epoch": 0.048306642163297456,
|
8732 |
+
"grad_norm": 0.17098848521709442,
|
8733 |
+
"learning_rate": 8.603538961226232e-06,
|
8734 |
+
"loss": 11.442,
|
8735 |
+
"step": 1242
|
8736 |
+
},
|
8737 |
+
{
|
8738 |
+
"epoch": 0.048345536400143906,
|
8739 |
+
"grad_norm": 0.12627612054347992,
|
8740 |
+
"learning_rate": 8.51404817177761e-06,
|
8741 |
+
"loss": 11.489,
|
8742 |
+
"step": 1243
|
8743 |
+
},
|
8744 |
+
{
|
8745 |
+
"epoch": 0.04838443063699036,
|
8746 |
+
"grad_norm": 0.1685693860054016,
|
8747 |
+
"learning_rate": 8.425004545290227e-06,
|
8748 |
+
"loss": 11.4365,
|
8749 |
+
"step": 1244
|
8750 |
+
},
|
8751 |
+
{
|
8752 |
+
"epoch": 0.04842332487383682,
|
8753 |
+
"grad_norm": 0.15457966923713684,
|
8754 |
+
"learning_rate": 8.336408516989536e-06,
|
8755 |
+
"loss": 11.4073,
|
8756 |
+
"step": 1245
|
8757 |
+
},
|
8758 |
+
{
|
8759 |
+
"epoch": 0.048462219110683276,
|
8760 |
+
"grad_norm": 0.14986367523670197,
|
8761 |
+
"learning_rate": 8.24826051991312e-06,
|
8762 |
+
"loss": 11.4463,
|
8763 |
+
"step": 1246
|
8764 |
+
},
|
8765 |
+
{
|
8766 |
+
"epoch": 0.04850111334752973,
|
8767 |
+
"grad_norm": 0.17568694055080414,
|
8768 |
+
"learning_rate": 8.160560984908849e-06,
|
8769 |
+
"loss": 11.4313,
|
8770 |
+
"step": 1247
|
8771 |
+
},
|
8772 |
+
{
|
8773 |
+
"epoch": 0.04854000758437618,
|
8774 |
+
"grad_norm": 0.1453072726726532,
|
8775 |
+
"learning_rate": 8.073310340632457e-06,
|
8776 |
+
"loss": 11.427,
|
8777 |
+
"step": 1248
|
8778 |
+
},
|
8779 |
+
{
|
8780 |
+
"epoch": 0.04857890182122264,
|
8781 |
+
"grad_norm": 0.12231708317995071,
|
8782 |
+
"learning_rate": 7.986509013545673e-06,
|
8783 |
+
"loss": 11.4361,
|
8784 |
+
"step": 1249
|
8785 |
+
},
|
8786 |
+
{
|
8787 |
+
"epoch": 0.048617796058069096,
|
8788 |
+
"grad_norm": 0.18410533666610718,
|
8789 |
+
"learning_rate": 7.900157427914101e-06,
|
8790 |
+
"loss": 11.4503,
|
8791 |
+
"step": 1250
|
8792 |
+
},
|
8793 |
+
{
|
8794 |
+
"epoch": 0.04865669029491555,
|
8795 |
+
"grad_norm": 0.10761623829603195,
|
8796 |
+
"learning_rate": 7.81425600580502e-06,
|
8797 |
+
"loss": 11.4477,
|
8798 |
+
"step": 1251
|
8799 |
+
},
|
8800 |
+
{
|
8801 |
+
"epoch": 0.04869558453176201,
|
8802 |
+
"grad_norm": 0.15875908732414246,
|
8803 |
+
"learning_rate": 7.728805167085462e-06,
|
8804 |
+
"loss": 11.4199,
|
8805 |
+
"step": 1252
|
8806 |
+
},
|
8807 |
+
{
|
8808 |
+
"epoch": 0.04873447876860846,
|
8809 |
+
"grad_norm": 0.10637667030096054,
|
8810 |
+
"learning_rate": 7.643805329420117e-06,
|
8811 |
+
"loss": 11.4229,
|
8812 |
+
"step": 1253
|
8813 |
+
},
|
8814 |
+
{
|
8815 |
+
"epoch": 0.048773373005454916,
|
8816 |
+
"grad_norm": 0.1852317601442337,
|
8817 |
+
"learning_rate": 7.559256908269252e-06,
|
8818 |
+
"loss": 11.4606,
|
8819 |
+
"step": 1254
|
8820 |
+
},
|
8821 |
+
{
|
8822 |
+
"epoch": 0.04881226724230137,
|
8823 |
+
"grad_norm": 0.15936970710754395,
|
8824 |
+
"learning_rate": 7.475160316886698e-06,
|
8825 |
+
"loss": 11.4559,
|
8826 |
+
"step": 1255
|
8827 |
+
},
|
8828 |
+
{
|
8829 |
+
"epoch": 0.04885116147914783,
|
8830 |
+
"grad_norm": 0.15641069412231445,
|
8831 |
+
"learning_rate": 7.3915159663179075e-06,
|
8832 |
+
"loss": 11.4544,
|
8833 |
+
"step": 1256
|
8834 |
+
},
|
8835 |
+
{
|
8836 |
+
"epoch": 0.04889005571599428,
|
8837 |
+
"grad_norm": 0.12669280171394348,
|
8838 |
+
"learning_rate": 7.308324265397836e-06,
|
8839 |
+
"loss": 11.4623,
|
8840 |
+
"step": 1257
|
8841 |
+
},
|
8842 |
+
{
|
8843 |
+
"epoch": 0.048928949952840736,
|
8844 |
+
"grad_norm": 0.2127736657857895,
|
8845 |
+
"learning_rate": 7.225585620748954e-06,
|
8846 |
+
"loss": 11.4445,
|
8847 |
+
"step": 1258
|
8848 |
+
},
|
8849 |
+
{
|
8850 |
+
"epoch": 0.04896784418968719,
|
8851 |
+
"grad_norm": 0.1329868584871292,
|
8852 |
+
"learning_rate": 7.143300436779398e-06,
|
8853 |
+
"loss": 11.4263,
|
8854 |
+
"step": 1259
|
8855 |
+
},
|
8856 |
+
{
|
8857 |
+
"epoch": 0.04900673842653365,
|
8858 |
+
"grad_norm": 0.191726416349411,
|
8859 |
+
"learning_rate": 7.061469115680764e-06,
|
8860 |
+
"loss": 11.4487,
|
8861 |
+
"step": 1260
|
8862 |
+
},
|
8863 |
+
{
|
8864 |
+
"epoch": 0.049045632663380107,
|
8865 |
+
"grad_norm": 0.0934595912694931,
|
8866 |
+
"learning_rate": 6.980092057426346e-06,
|
8867 |
+
"loss": 11.4218,
|
8868 |
+
"step": 1261
|
8869 |
+
},
|
8870 |
+
{
|
8871 |
+
"epoch": 0.049084526900226556,
|
8872 |
+
"grad_norm": 0.17179986834526062,
|
8873 |
+
"learning_rate": 6.899169659769111e-06,
|
8874 |
+
"loss": 11.4969,
|
8875 |
+
"step": 1262
|
8876 |
+
},
|
8877 |
+
{
|
8878 |
+
"epoch": 0.04912342113707301,
|
8879 |
+
"grad_norm": 0.1705106943845749,
|
8880 |
+
"learning_rate": 6.818702318239689e-06,
|
8881 |
+
"loss": 11.4973,
|
8882 |
+
"step": 1263
|
8883 |
+
},
|
8884 |
+
{
|
8885 |
+
"epoch": 0.04916231537391947,
|
8886 |
+
"grad_norm": 0.11566983908414841,
|
8887 |
+
"learning_rate": 6.738690426144545e-06,
|
8888 |
+
"loss": 11.4269,
|
8889 |
+
"step": 1264
|
8890 |
+
},
|
8891 |
+
{
|
8892 |
+
"epoch": 0.04920120961076593,
|
8893 |
+
"grad_norm": 0.1510932445526123,
|
8894 |
+
"learning_rate": 6.659134374563969e-06,
|
8895 |
+
"loss": 11.454,
|
8896 |
+
"step": 1265
|
8897 |
+
},
|
8898 |
+
{
|
8899 |
+
"epoch": 0.04924010384761238,
|
8900 |
+
"grad_norm": 0.15906092524528503,
|
8901 |
+
"learning_rate": 6.580034552350267e-06,
|
8902 |
+
"loss": 11.4606,
|
8903 |
+
"step": 1266
|
8904 |
+
},
|
8905 |
+
{
|
8906 |
+
"epoch": 0.04927899808445883,
|
8907 |
+
"grad_norm": 0.11045938730239868,
|
8908 |
+
"learning_rate": 6.501391346125707e-06,
|
8909 |
+
"loss": 11.4598,
|
8910 |
+
"step": 1267
|
8911 |
+
},
|
8912 |
+
{
|
8913 |
+
"epoch": 0.04931789232130529,
|
8914 |
+
"grad_norm": 0.16918398439884186,
|
8915 |
+
"learning_rate": 6.423205140280797e-06,
|
8916 |
+
"loss": 11.4941,
|
8917 |
+
"step": 1268
|
8918 |
+
},
|
8919 |
+
{
|
8920 |
+
"epoch": 0.04935678655815175,
|
8921 |
+
"grad_norm": 0.18566885590553284,
|
8922 |
+
"learning_rate": 6.345476316972321e-06,
|
8923 |
+
"loss": 11.4397,
|
8924 |
+
"step": 1269
|
8925 |
+
},
|
8926 |
+
{
|
8927 |
+
"epoch": 0.049395680794998204,
|
8928 |
+
"grad_norm": 0.14385563135147095,
|
8929 |
+
"learning_rate": 6.268205256121396e-06,
|
8930 |
+
"loss": 11.4464,
|
8931 |
+
"step": 1270
|
8932 |
+
},
|
8933 |
+
{
|
8934 |
+
"epoch": 0.04943457503184465,
|
8935 |
+
"grad_norm": 0.2474178671836853,
|
8936 |
+
"learning_rate": 6.191392335411839e-06,
|
8937 |
+
"loss": 11.4548,
|
8938 |
+
"step": 1271
|
8939 |
+
},
|
8940 |
+
{
|
8941 |
+
"epoch": 0.04947346926869111,
|
8942 |
+
"grad_norm": 0.12321379780769348,
|
8943 |
+
"learning_rate": 6.115037930288059e-06,
|
8944 |
+
"loss": 11.4204,
|
8945 |
+
"step": 1272
|
8946 |
+
},
|
8947 |
+
{
|
8948 |
+
"epoch": 0.04951236350553757,
|
8949 |
+
"grad_norm": 0.21031615138053894,
|
8950 |
+
"learning_rate": 6.03914241395338e-06,
|
8951 |
+
"loss": 11.5541,
|
8952 |
+
"step": 1273
|
8953 |
+
},
|
8954 |
+
{
|
8955 |
+
"epoch": 0.049551257742384024,
|
8956 |
+
"grad_norm": 0.19632770121097565,
|
8957 |
+
"learning_rate": 5.963706157368199e-06,
|
8958 |
+
"loss": 11.4914,
|
8959 |
+
"step": 1274
|
8960 |
+
},
|
8961 |
+
{
|
8962 |
+
"epoch": 0.04959015197923048,
|
8963 |
+
"grad_norm": 0.13494326174259186,
|
8964 |
+
"learning_rate": 5.888729529248149e-06,
|
8965 |
+
"loss": 11.4478,
|
8966 |
+
"step": 1275
|
8967 |
+
},
|
8968 |
+
{
|
8969 |
+
"epoch": 0.04962904621607693,
|
8970 |
+
"grad_norm": 0.1579502820968628,
|
8971 |
+
"learning_rate": 5.814212896062277e-06,
|
8972 |
+
"loss": 11.4493,
|
8973 |
+
"step": 1276
|
8974 |
+
},
|
8975 |
+
{
|
8976 |
+
"epoch": 0.04966794045292339,
|
8977 |
+
"grad_norm": 0.13213543593883514,
|
8978 |
+
"learning_rate": 5.7401566220313005e-06,
|
8979 |
+
"loss": 11.4415,
|
8980 |
+
"step": 1277
|
8981 |
+
},
|
8982 |
+
{
|
8983 |
+
"epoch": 0.049706834689769844,
|
8984 |
+
"grad_norm": 0.15101048350334167,
|
8985 |
+
"learning_rate": 5.666561069125797e-06,
|
8986 |
+
"loss": 11.4401,
|
8987 |
+
"step": 1278
|
8988 |
+
},
|
8989 |
+
{
|
8990 |
+
"epoch": 0.0497457289266163,
|
8991 |
+
"grad_norm": 0.20494867861270905,
|
8992 |
+
"learning_rate": 5.593426597064444e-06,
|
8993 |
+
"loss": 11.4223,
|
8994 |
+
"step": 1279
|
8995 |
+
},
|
8996 |
+
{
|
8997 |
+
"epoch": 0.04978462316346276,
|
8998 |
+
"grad_norm": 0.13972818851470947,
|
8999 |
+
"learning_rate": 5.520753563312253e-06,
|
9000 |
+
"loss": 11.4167,
|
9001 |
+
"step": 1280
|
9002 |
+
},
|
9003 |
+
{
|
9004 |
+
"epoch": 0.04982351740030921,
|
9005 |
+
"grad_norm": 0.15213821828365326,
|
9006 |
+
"learning_rate": 5.448542323078843e-06,
|
9007 |
+
"loss": 11.466,
|
9008 |
+
"step": 1281
|
9009 |
+
},
|
9010 |
+
{
|
9011 |
+
"epoch": 0.049862411637155664,
|
9012 |
+
"grad_norm": 0.1816379427909851,
|
9013 |
+
"learning_rate": 5.376793229316645e-06,
|
9014 |
+
"loss": 11.5445,
|
9015 |
+
"step": 1282
|
9016 |
+
},
|
9017 |
+
{
|
9018 |
+
"epoch": 0.04990130587400212,
|
9019 |
+
"grad_norm": 0.22997316718101501,
|
9020 |
+
"learning_rate": 5.3055066327192925e-06,
|
9021 |
+
"loss": 11.4652,
|
9022 |
+
"step": 1283
|
9023 |
+
},
|
9024 |
+
{
|
9025 |
+
"epoch": 0.04994020011084858,
|
9026 |
+
"grad_norm": 0.1814534068107605,
|
9027 |
+
"learning_rate": 5.2346828817197655e-06,
|
9028 |
+
"loss": 11.4606,
|
9029 |
+
"step": 1284
|
9030 |
+
},
|
9031 |
+
{
|
9032 |
+
"epoch": 0.04997909434769503,
|
9033 |
+
"grad_norm": 0.3224797546863556,
|
9034 |
+
"learning_rate": 5.164322322488802e-06,
|
9035 |
+
"loss": 11.4257,
|
9036 |
+
"step": 1285
|
9037 |
+
},
|
9038 |
+
{
|
9039 |
+
"epoch": 0.050017988584541484,
|
9040 |
+
"grad_norm": 0.11046472191810608,
|
9041 |
+
"learning_rate": 5.094425298933136e-06,
|
9042 |
+
"loss": 11.4264,
|
9043 |
+
"step": 1286
|
9044 |
+
},
|
9045 |
+
{
|
9046 |
+
"epoch": 0.05005688282138794,
|
9047 |
+
"grad_norm": 0.18401353061199188,
|
9048 |
+
"learning_rate": 5.024992152693875e-06,
|
9049 |
+
"loss": 11.5579,
|
9050 |
+
"step": 1287
|
9051 |
+
},
|
9052 |
+
{
|
9053 |
+
"epoch": 0.0500957770582344,
|
9054 |
+
"grad_norm": 0.17558960616588593,
|
9055 |
+
"learning_rate": 4.956023223144768e-06,
|
9056 |
+
"loss": 11.4755,
|
9057 |
+
"step": 1288
|
9058 |
+
},
|
9059 |
+
{
|
9060 |
+
"epoch": 0.050134671295080854,
|
9061 |
+
"grad_norm": 0.11735843122005463,
|
9062 |
+
"learning_rate": 4.887518847390571e-06,
|
9063 |
+
"loss": 11.4153,
|
9064 |
+
"step": 1289
|
9065 |
+
},
|
9066 |
+
{
|
9067 |
+
"epoch": 0.050173565531927304,
|
9068 |
+
"grad_norm": 0.1737535148859024,
|
9069 |
+
"learning_rate": 4.819479360265444e-06,
|
9070 |
+
"loss": 11.4699,
|
9071 |
+
"step": 1290
|
9072 |
+
},
|
9073 |
+
{
|
9074 |
+
"epoch": 0.05021245976877376,
|
9075 |
+
"grad_norm": 0.20732508599758148,
|
9076 |
+
"learning_rate": 4.7519050943312325e-06,
|
9077 |
+
"loss": 11.4195,
|
9078 |
+
"step": 1291
|
9079 |
+
},
|
9080 |
+
{
|
9081 |
+
"epoch": 0.05025135400562022,
|
9082 |
+
"grad_norm": 0.1525745838880539,
|
9083 |
+
"learning_rate": 4.684796379875922e-06,
|
9084 |
+
"loss": 11.4366,
|
9085 |
+
"step": 1292
|
9086 |
+
},
|
9087 |
+
{
|
9088 |
+
"epoch": 0.050290248242466674,
|
9089 |
+
"grad_norm": 0.15391410887241364,
|
9090 |
+
"learning_rate": 4.618153544911929e-06,
|
9091 |
+
"loss": 11.5117,
|
9092 |
+
"step": 1293
|
9093 |
+
},
|
9094 |
+
{
|
9095 |
+
"epoch": 0.05032914247931313,
|
9096 |
+
"grad_norm": 0.10862606763839722,
|
9097 |
+
"learning_rate": 4.551976915174605e-06,
|
9098 |
+
"loss": 11.4329,
|
9099 |
+
"step": 1294
|
9100 |
+
},
|
9101 |
+
{
|
9102 |
+
"epoch": 0.05036803671615958,
|
9103 |
+
"grad_norm": 0.10060062259435654,
|
9104 |
+
"learning_rate": 4.48626681412061e-06,
|
9105 |
+
"loss": 11.4304,
|
9106 |
+
"step": 1295
|
9107 |
+
},
|
9108 |
+
{
|
9109 |
+
"epoch": 0.05040693095300604,
|
9110 |
+
"grad_norm": 0.12805354595184326,
|
9111 |
+
"learning_rate": 4.421023562926252e-06,
|
9112 |
+
"loss": 11.4189,
|
9113 |
+
"step": 1296
|
9114 |
+
},
|
9115 |
+
{
|
9116 |
+
"epoch": 0.050445825189852495,
|
9117 |
+
"grad_norm": 0.17536696791648865,
|
9118 |
+
"learning_rate": 4.356247480486031e-06,
|
9119 |
+
"loss": 11.4282,
|
9120 |
+
"step": 1297
|
9121 |
+
},
|
9122 |
+
{
|
9123 |
+
"epoch": 0.05048471942669895,
|
9124 |
+
"grad_norm": 0.17301538586616516,
|
9125 |
+
"learning_rate": 4.291938883411007e-06,
|
9126 |
+
"loss": 11.419,
|
9127 |
+
"step": 1298
|
9128 |
+
},
|
9129 |
+
{
|
9130 |
+
"epoch": 0.0505236136635454,
|
9131 |
+
"grad_norm": 0.17384307086467743,
|
9132 |
+
"learning_rate": 4.2280980860272874e-06,
|
9133 |
+
"loss": 11.4603,
|
9134 |
+
"step": 1299
|
9135 |
+
},
|
9136 |
+
{
|
9137 |
+
"epoch": 0.05056250790039186,
|
9138 |
+
"grad_norm": 0.14885058999061584,
|
9139 |
+
"learning_rate": 4.16472540037447e-06,
|
9140 |
+
"loss": 11.4561,
|
9141 |
+
"step": 1300
|
9142 |
+
},
|
9143 |
+
{
|
9144 |
+
"epoch": 0.050601402137238315,
|
9145 |
+
"grad_norm": 0.18874341249465942,
|
9146 |
+
"learning_rate": 4.101821136204142e-06,
|
9147 |
+
"loss": 11.437,
|
9148 |
+
"step": 1301
|
9149 |
+
},
|
9150 |
+
{
|
9151 |
+
"epoch": 0.05064029637408477,
|
9152 |
+
"grad_norm": 0.12578798830509186,
|
9153 |
+
"learning_rate": 4.039385600978318e-06,
|
9154 |
+
"loss": 11.4308,
|
9155 |
+
"step": 1302
|
9156 |
+
},
|
9157 |
+
{
|
9158 |
+
"epoch": 0.05067919061093123,
|
9159 |
+
"grad_norm": 0.14595210552215576,
|
9160 |
+
"learning_rate": 3.977419099868018e-06,
|
9161 |
+
"loss": 11.4412,
|
9162 |
+
"step": 1303
|
9163 |
+
},
|
9164 |
+
{
|
9165 |
+
"epoch": 0.05071808484777768,
|
9166 |
+
"grad_norm": 0.19140197336673737,
|
9167 |
+
"learning_rate": 3.915921935751687e-06,
|
9168 |
+
"loss": 11.4171,
|
9169 |
+
"step": 1304
|
9170 |
+
},
|
9171 |
+
{
|
9172 |
+
"epoch": 0.050756979084624135,
|
9173 |
+
"grad_norm": 0.13179464638233185,
|
9174 |
+
"learning_rate": 3.85489440921376e-06,
|
9175 |
+
"loss": 11.4658,
|
9176 |
+
"step": 1305
|
9177 |
+
},
|
9178 |
+
{
|
9179 |
+
"epoch": 0.05079587332147059,
|
9180 |
+
"grad_norm": 0.14757223427295685,
|
9181 |
+
"learning_rate": 3.794336818543209e-06,
|
9182 |
+
"loss": 11.4411,
|
9183 |
+
"step": 1306
|
9184 |
+
},
|
9185 |
+
{
|
9186 |
+
"epoch": 0.05083476755831705,
|
9187 |
+
"grad_norm": 0.16024403274059296,
|
9188 |
+
"learning_rate": 3.7342494597320755e-06,
|
9189 |
+
"loss": 11.4195,
|
9190 |
+
"step": 1307
|
9191 |
+
},
|
9192 |
+
{
|
9193 |
+
"epoch": 0.050873661795163505,
|
9194 |
+
"grad_norm": 0.18657468259334564,
|
9195 |
+
"learning_rate": 3.6746326264739504e-06,
|
9196 |
+
"loss": 11.4491,
|
9197 |
+
"step": 1308
|
9198 |
+
},
|
9199 |
+
{
|
9200 |
+
"epoch": 0.050912556032009955,
|
9201 |
+
"grad_norm": 0.23776055872440338,
|
9202 |
+
"learning_rate": 3.615486610162655e-06,
|
9203 |
+
"loss": 11.4958,
|
9204 |
+
"step": 1309
|
9205 |
+
},
|
9206 |
+
{
|
9207 |
+
"epoch": 0.05095145026885641,
|
9208 |
+
"grad_norm": 0.14701801538467407,
|
9209 |
+
"learning_rate": 3.5568116998907498e-06,
|
9210 |
+
"loss": 11.433,
|
9211 |
+
"step": 1310
|
9212 |
+
},
|
9213 |
+
{
|
9214 |
+
"epoch": 0.05099034450570287,
|
9215 |
+
"grad_norm": 0.1552416831254959,
|
9216 |
+
"learning_rate": 3.4986081824481152e-06,
|
9217 |
+
"loss": 11.4309,
|
9218 |
+
"step": 1311
|
9219 |
+
},
|
9220 |
+
{
|
9221 |
+
"epoch": 0.051029238742549325,
|
9222 |
+
"grad_norm": 0.17288999259471893,
|
9223 |
+
"learning_rate": 3.440876342320609e-06,
|
9224 |
+
"loss": 11.4981,
|
9225 |
+
"step": 1312
|
9226 |
+
},
|
9227 |
+
{
|
9228 |
+
"epoch": 0.051068132979395775,
|
9229 |
+
"grad_norm": 0.1504266858100891,
|
9230 |
+
"learning_rate": 3.3836164616885992e-06,
|
9231 |
+
"loss": 11.4175,
|
9232 |
+
"step": 1313
|
9233 |
+
},
|
9234 |
+
{
|
9235 |
+
"epoch": 0.05110702721624223,
|
9236 |
+
"grad_norm": 0.11923205107450485,
|
9237 |
+
"learning_rate": 3.3268288204256315e-06,
|
9238 |
+
"loss": 11.4175,
|
9239 |
+
"step": 1314
|
9240 |
+
},
|
9241 |
+
{
|
9242 |
+
"epoch": 0.05114592145308869,
|
9243 |
+
"grad_norm": 0.15316557884216309,
|
9244 |
+
"learning_rate": 3.270513696097055e-06,
|
9245 |
+
"loss": 11.4449,
|
9246 |
+
"step": 1315
|
9247 |
+
},
|
9248 |
+
{
|
9249 |
+
"epoch": 0.051184815689935145,
|
9250 |
+
"grad_norm": 0.15415562689304352,
|
9251 |
+
"learning_rate": 3.214671363958666e-06,
|
9252 |
+
"loss": 11.4771,
|
9253 |
+
"step": 1316
|
9254 |
+
},
|
9255 |
+
{
|
9256 |
+
"epoch": 0.0512237099267816,
|
9257 |
+
"grad_norm": 0.21763721108436584,
|
9258 |
+
"learning_rate": 3.159302096955319e-06,
|
9259 |
+
"loss": 11.4575,
|
9260 |
+
"step": 1317
|
9261 |
+
},
|
9262 |
+
{
|
9263 |
+
"epoch": 0.05126260416362805,
|
9264 |
+
"grad_norm": 0.1797555685043335,
|
9265 |
+
"learning_rate": 3.1044061657196867e-06,
|
9266 |
+
"loss": 11.4424,
|
9267 |
+
"step": 1318
|
9268 |
+
},
|
9269 |
+
{
|
9270 |
+
"epoch": 0.05130149840047451,
|
9271 |
+
"grad_norm": 0.1600632220506668,
|
9272 |
+
"learning_rate": 3.049983838570858e-06,
|
9273 |
+
"loss": 11.4647,
|
9274 |
+
"step": 1319
|
9275 |
+
},
|
9276 |
+
{
|
9277 |
+
"epoch": 0.051340392637320965,
|
9278 |
+
"grad_norm": 0.21375976502895355,
|
9279 |
+
"learning_rate": 2.9960353815130293e-06,
|
9280 |
+
"loss": 11.4618,
|
9281 |
+
"step": 1320
|
9282 |
+
},
|
9283 |
+
{
|
9284 |
+
"epoch": 0.05137928687416742,
|
9285 |
+
"grad_norm": 0.1874701827764511,
|
9286 |
+
"learning_rate": 2.9425610582342834e-06,
|
9287 |
+
"loss": 11.4144,
|
9288 |
+
"step": 1321
|
9289 |
+
},
|
9290 |
+
{
|
9291 |
+
"epoch": 0.05141818111101388,
|
9292 |
+
"grad_norm": 0.17211183905601501,
|
9293 |
+
"learning_rate": 2.8895611301051673e-06,
|
9294 |
+
"loss": 11.4916,
|
9295 |
+
"step": 1322
|
9296 |
+
},
|
9297 |
+
{
|
9298 |
+
"epoch": 0.05145707534786033,
|
9299 |
+
"grad_norm": 0.1562688797712326,
|
9300 |
+
"learning_rate": 2.837035856177539e-06,
|
9301 |
+
"loss": 11.4537,
|
9302 |
+
"step": 1323
|
9303 |
+
},
|
9304 |
+
{
|
9305 |
+
"epoch": 0.051495969584706786,
|
9306 |
+
"grad_norm": 0.1409672051668167,
|
9307 |
+
"learning_rate": 2.7849854931832562e-06,
|
9308 |
+
"loss": 11.4108,
|
9309 |
+
"step": 1324
|
9310 |
+
},
|
9311 |
+
{
|
9312 |
+
"epoch": 0.05153486382155324,
|
9313 |
+
"grad_norm": 0.16954468190670013,
|
9314 |
+
"learning_rate": 2.73341029553289e-06,
|
9315 |
+
"loss": 11.4287,
|
9316 |
+
"step": 1325
|
9317 |
+
},
|
9318 |
+
{
|
9319 |
+
"epoch": 0.0515737580583997,
|
9320 |
+
"grad_norm": 0.14109128713607788,
|
9321 |
+
"learning_rate": 2.682310515314512e-06,
|
9322 |
+
"loss": 11.498,
|
9323 |
+
"step": 1326
|
9324 |
+
},
|
9325 |
+
{
|
9326 |
+
"epoch": 0.05161265229524615,
|
9327 |
+
"grad_norm": 0.17244742810726166,
|
9328 |
+
"learning_rate": 2.6316864022924993e-06,
|
9329 |
+
"loss": 11.4485,
|
9330 |
+
"step": 1327
|
9331 |
+
},
|
9332 |
+
{
|
9333 |
+
"epoch": 0.051651546532092606,
|
9334 |
+
"grad_norm": 0.14715106785297394,
|
9335 |
+
"learning_rate": 2.5815382039062308e-06,
|
9336 |
+
"loss": 11.4552,
|
9337 |
+
"step": 1328
|
9338 |
+
},
|
9339 |
+
{
|
9340 |
+
"epoch": 0.05169044076893906,
|
9341 |
+
"grad_norm": 0.11490415036678314,
|
9342 |
+
"learning_rate": 2.5318661652689036e-06,
|
9343 |
+
"loss": 11.4516,
|
9344 |
+
"step": 1329
|
9345 |
+
},
|
9346 |
+
{
|
9347 |
+
"epoch": 0.05172933500578552,
|
9348 |
+
"grad_norm": 0.216889426112175,
|
9349 |
+
"learning_rate": 2.48267052916642e-06,
|
9350 |
+
"loss": 11.4809,
|
9351 |
+
"step": 1330
|
9352 |
+
},
|
9353 |
+
{
|
9354 |
+
"epoch": 0.051768229242631976,
|
9355 |
+
"grad_norm": 0.12310255318880081,
|
9356 |
+
"learning_rate": 2.4339515360561005e-06,
|
9357 |
+
"loss": 11.4264,
|
9358 |
+
"step": 1331
|
9359 |
+
},
|
9360 |
+
{
|
9361 |
+
"epoch": 0.051807123479478426,
|
9362 |
+
"grad_norm": 0.1695736199617386,
|
9363 |
+
"learning_rate": 2.3857094240654856e-06,
|
9364 |
+
"loss": 11.4448,
|
9365 |
+
"step": 1332
|
9366 |
+
},
|
9367 |
+
{
|
9368 |
+
"epoch": 0.05184601771632488,
|
9369 |
+
"grad_norm": 0.20610633492469788,
|
9370 |
+
"learning_rate": 2.3379444289913342e-06,
|
9371 |
+
"loss": 11.4313,
|
9372 |
+
"step": 1333
|
9373 |
+
},
|
9374 |
+
{
|
9375 |
+
"epoch": 0.05188491195317134,
|
9376 |
+
"grad_norm": 0.1628742516040802,
|
9377 |
+
"learning_rate": 2.2906567842982728e-06,
|
9378 |
+
"loss": 11.4884,
|
9379 |
+
"step": 1334
|
9380 |
+
},
|
9381 |
+
{
|
9382 |
+
"epoch": 0.051923806190017796,
|
9383 |
+
"grad_norm": 0.19907735288143158,
|
9384 |
+
"learning_rate": 2.2438467211177816e-06,
|
9385 |
+
"loss": 11.4273,
|
9386 |
+
"step": 1335
|
9387 |
+
},
|
9388 |
+
{
|
9389 |
+
"epoch": 0.05196270042686425,
|
9390 |
+
"grad_norm": 0.2636147141456604,
|
9391 |
+
"learning_rate": 2.1975144682470415e-06,
|
9392 |
+
"loss": 11.4372,
|
9393 |
+
"step": 1336
|
9394 |
+
},
|
9395 |
+
{
|
9396 |
+
"epoch": 0.0520015946637107,
|
9397 |
+
"grad_norm": 0.32304054498672485,
|
9398 |
+
"learning_rate": 2.151660252147769e-06,
|
9399 |
+
"loss": 11.5135,
|
9400 |
+
"step": 1337
|
9401 |
+
},
|
9402 |
+
{
|
9403 |
+
"epoch": 0.05204048890055716,
|
9404 |
+
"grad_norm": 0.16421319544315338,
|
9405 |
+
"learning_rate": 2.1062842969451713e-06,
|
9406 |
+
"loss": 11.4225,
|
9407 |
+
"step": 1338
|
9408 |
+
},
|
9409 |
+
{
|
9410 |
+
"epoch": 0.052079383137403616,
|
9411 |
+
"grad_norm": 0.21596230566501617,
|
9412 |
+
"learning_rate": 2.0613868244268143e-06,
|
9413 |
+
"loss": 11.4815,
|
9414 |
+
"step": 1339
|
9415 |
+
},
|
9416 |
+
{
|
9417 |
+
"epoch": 0.05211827737425007,
|
9418 |
+
"grad_norm": 0.16720180213451385,
|
9419 |
+
"learning_rate": 2.016968054041546e-06,
|
9420 |
+
"loss": 11.4213,
|
9421 |
+
"step": 1340
|
9422 |
+
},
|
9423 |
+
{
|
9424 |
+
"epoch": 0.05215717161109652,
|
9425 |
+
"grad_norm": 0.12049616873264313,
|
9426 |
+
"learning_rate": 1.973028202898419e-06,
|
9427 |
+
"loss": 11.4324,
|
9428 |
+
"step": 1341
|
9429 |
+
},
|
9430 |
+
{
|
9431 |
+
"epoch": 0.05219606584794298,
|
9432 |
+
"grad_norm": 0.11477518826723099,
|
9433 |
+
"learning_rate": 1.9295674857656486e-06,
|
9434 |
+
"loss": 11.4659,
|
9435 |
+
"step": 1342
|
9436 |
+
},
|
9437 |
+
{
|
9438 |
+
"epoch": 0.052234960084789436,
|
9439 |
+
"grad_norm": 0.16870322823524475,
|
9440 |
+
"learning_rate": 1.8865861150695442e-06,
|
9441 |
+
"loss": 11.4344,
|
9442 |
+
"step": 1343
|
9443 |
+
},
|
9444 |
+
{
|
9445 |
+
"epoch": 0.05227385432163589,
|
9446 |
+
"grad_norm": 0.09433110058307648,
|
9447 |
+
"learning_rate": 1.8440843008934561e-06,
|
9448 |
+
"loss": 11.424,
|
9449 |
+
"step": 1344
|
9450 |
+
},
|
9451 |
+
{
|
9452 |
+
"epoch": 0.05231274855848235,
|
9453 |
+
"grad_norm": 0.12650403380393982,
|
9454 |
+
"learning_rate": 1.8020622509768326e-06,
|
9455 |
+
"loss": 11.4307,
|
9456 |
+
"step": 1345
|
9457 |
+
},
|
9458 |
+
{
|
9459 |
+
"epoch": 0.0523516427953288,
|
9460 |
+
"grad_norm": 0.255119651556015,
|
9461 |
+
"learning_rate": 1.7605201707140418e-06,
|
9462 |
+
"loss": 11.4822,
|
9463 |
+
"step": 1346
|
9464 |
+
},
|
9465 |
+
{
|
9466 |
+
"epoch": 0.052390537032175256,
|
9467 |
+
"grad_norm": 0.16543054580688477,
|
9468 |
+
"learning_rate": 1.7194582631535617e-06,
|
9469 |
+
"loss": 11.4568,
|
9470 |
+
"step": 1347
|
9471 |
+
},
|
9472 |
+
{
|
9473 |
+
"epoch": 0.05242943126902171,
|
9474 |
+
"grad_norm": 0.20184849202632904,
|
9475 |
+
"learning_rate": 1.6788767289968254e-06,
|
9476 |
+
"loss": 11.4388,
|
9477 |
+
"step": 1348
|
9478 |
+
},
|
9479 |
+
{
|
9480 |
+
"epoch": 0.05246832550586817,
|
9481 |
+
"grad_norm": 0.17886343598365784,
|
9482 |
+
"learning_rate": 1.6387757665973559e-06,
|
9483 |
+
"loss": 11.4182,
|
9484 |
+
"step": 1349
|
9485 |
+
},
|
9486 |
+
{
|
9487 |
+
"epoch": 0.05250721974271463,
|
9488 |
+
"grad_norm": 0.1549103856086731,
|
9489 |
+
"learning_rate": 1.5991555719597207e-06,
|
9490 |
+
"loss": 11.4222,
|
9491 |
+
"step": 1350
|
9492 |
+
},
|
9493 |
+
{
|
9494 |
+
"epoch": 0.05254611397956108,
|
9495 |
+
"grad_norm": 0.17105679214000702,
|
9496 |
+
"learning_rate": 1.5600163387386124e-06,
|
9497 |
+
"loss": 11.45,
|
9498 |
+
"step": 1351
|
9499 |
+
},
|
9500 |
+
{
|
9501 |
+
"epoch": 0.05258500821640753,
|
9502 |
+
"grad_norm": 0.13427035510540009,
|
9503 |
+
"learning_rate": 1.5213582582378927e-06,
|
9504 |
+
"loss": 11.438,
|
9505 |
+
"step": 1352
|
9506 |
+
},
|
9507 |
+
{
|
9508 |
+
"epoch": 0.05262390245325399,
|
9509 |
+
"grad_norm": 0.16201744973659515,
|
9510 |
+
"learning_rate": 1.4831815194096266e-06,
|
9511 |
+
"loss": 11.4172,
|
9512 |
+
"step": 1353
|
9513 |
+
},
|
9514 |
+
{
|
9515 |
+
"epoch": 0.05266279669010045,
|
9516 |
+
"grad_norm": 0.1943223476409912,
|
9517 |
+
"learning_rate": 1.4454863088532388e-06,
|
9518 |
+
"loss": 11.4298,
|
9519 |
+
"step": 1354
|
9520 |
+
},
|
9521 |
+
{
|
9522 |
+
"epoch": 0.0527016909269469,
|
9523 |
+
"grad_norm": 0.18249273300170898,
|
9524 |
+
"learning_rate": 1.408272810814515e-06,
|
9525 |
+
"loss": 11.4374,
|
9526 |
+
"step": 1355
|
9527 |
+
},
|
9528 |
+
{
|
9529 |
+
"epoch": 0.05274058516379335,
|
9530 |
+
"grad_norm": 0.15066294372081757,
|
9531 |
+
"learning_rate": 1.3715412071847345e-06,
|
9532 |
+
"loss": 11.4284,
|
9533 |
+
"step": 1356
|
9534 |
+
},
|
9535 |
+
{
|
9536 |
+
"epoch": 0.05277947940063981,
|
9537 |
+
"grad_norm": 0.1240072026848793,
|
9538 |
+
"learning_rate": 1.3352916774998281e-06,
|
9539 |
+
"loss": 11.453,
|
9540 |
+
"step": 1357
|
9541 |
+
},
|
9542 |
+
{
|
9543 |
+
"epoch": 0.05281837363748627,
|
9544 |
+
"grad_norm": 0.21831557154655457,
|
9545 |
+
"learning_rate": 1.299524398939389e-06,
|
9546 |
+
"loss": 11.4345,
|
9547 |
+
"step": 1358
|
9548 |
+
},
|
9549 |
+
{
|
9550 |
+
"epoch": 0.052857267874332724,
|
9551 |
+
"grad_norm": 0.16932734847068787,
|
9552 |
+
"learning_rate": 1.2642395463259404e-06,
|
9553 |
+
"loss": 11.5236,
|
9554 |
+
"step": 1359
|
9555 |
+
},
|
9556 |
+
{
|
9557 |
+
"epoch": 0.052896162111179174,
|
9558 |
+
"grad_norm": 0.22352683544158936,
|
9559 |
+
"learning_rate": 1.2294372921239694e-06,
|
9560 |
+
"loss": 11.4624,
|
9561 |
+
"step": 1360
|
9562 |
+
},
|
9563 |
+
{
|
9564 |
+
"epoch": 0.05293505634802563,
|
9565 |
+
"grad_norm": 0.1469883918762207,
|
9566 |
+
"learning_rate": 1.19511780643915e-06,
|
9567 |
+
"loss": 11.4261,
|
9568 |
+
"step": 1361
|
9569 |
+
},
|
9570 |
+
{
|
9571 |
+
"epoch": 0.05297395058487209,
|
9572 |
+
"grad_norm": 0.17142179608345032,
|
9573 |
+
"learning_rate": 1.161281257017477e-06,
|
9574 |
+
"loss": 11.4227,
|
9575 |
+
"step": 1362
|
9576 |
+
},
|
9577 |
+
{
|
9578 |
+
"epoch": 0.053012844821718544,
|
9579 |
+
"grad_norm": 0.15057890117168427,
|
9580 |
+
"learning_rate": 1.1279278092444889e-06,
|
9581 |
+
"loss": 11.4453,
|
9582 |
+
"step": 1363
|
9583 |
+
},
|
9584 |
+
{
|
9585 |
+
"epoch": 0.053051739058564994,
|
9586 |
+
"grad_norm": 0.13775911927223206,
|
9587 |
+
"learning_rate": 1.0950576261444023e-06,
|
9588 |
+
"loss": 11.4371,
|
9589 |
+
"step": 1364
|
9590 |
+
},
|
9591 |
+
{
|
9592 |
+
"epoch": 0.05309063329541145,
|
9593 |
+
"grad_norm": 0.19454172253608704,
|
9594 |
+
"learning_rate": 1.062670868379334e-06,
|
9595 |
+
"loss": 11.471,
|
9596 |
+
"step": 1365
|
9597 |
+
},
|
9598 |
+
{
|
9599 |
+
"epoch": 0.05312952753225791,
|
9600 |
+
"grad_norm": 0.16760843992233276,
|
9601 |
+
"learning_rate": 1.0307676942485689e-06,
|
9602 |
+
"loss": 11.4212,
|
9603 |
+
"step": 1366
|
9604 |
+
},
|
9605 |
+
{
|
9606 |
+
"epoch": 0.053168421769104364,
|
9607 |
+
"grad_norm": 0.12229340523481369,
|
9608 |
+
"learning_rate": 9.993482596877157e-07,
|
9609 |
+
"loss": 11.4386,
|
9610 |
+
"step": 1367
|
9611 |
+
},
|
9612 |
+
{
|
9613 |
+
"epoch": 0.05320731600595082,
|
9614 |
+
"grad_norm": 0.1881038248538971,
|
9615 |
+
"learning_rate": 9.684127182679526e-07,
|
9616 |
+
"loss": 11.4969,
|
9617 |
+
"step": 1368
|
9618 |
+
},
|
9619 |
+
{
|
9620 |
+
"epoch": 0.05324621024279727,
|
9621 |
+
"grad_norm": 0.2813434898853302,
|
9622 |
+
"learning_rate": 9.379612211953492e-07,
|
9623 |
+
"loss": 11.5781,
|
9624 |
+
"step": 1369
|
9625 |
+
},
|
9626 |
+
{
|
9627 |
+
"epoch": 0.05328510447964373,
|
9628 |
+
"grad_norm": 0.1275080293416977,
|
9629 |
+
"learning_rate": 9.079939173100238e-07,
|
9630 |
+
"loss": 11.4293,
|
9631 |
+
"step": 1370
|
9632 |
+
},
|
9633 |
+
{
|
9634 |
+
"epoch": 0.053323998716490184,
|
9635 |
+
"grad_norm": 0.15989451110363007,
|
9636 |
+
"learning_rate": 8.785109530854874e-07,
|
9637 |
+
"loss": 11.4262,
|
9638 |
+
"step": 1371
|
9639 |
+
},
|
9640 |
+
{
|
9641 |
+
"epoch": 0.05336289295333664,
|
9642 |
+
"grad_norm": 0.18090415000915527,
|
9643 |
+
"learning_rate": 8.495124726279002e-07,
|
9644 |
+
"loss": 11.4283,
|
9645 |
+
"step": 1372
|
9646 |
+
},
|
9647 |
+
{
|
9648 |
+
"epoch": 0.0534017871901831,
|
9649 |
+
"grad_norm": 0.19085204601287842,
|
9650 |
+
"learning_rate": 8.209986176753948e-07,
|
9651 |
+
"loss": 11.4415,
|
9652 |
+
"step": 1373
|
9653 |
+
},
|
9654 |
+
{
|
9655 |
+
"epoch": 0.05344068142702955,
|
9656 |
+
"grad_norm": 0.19983118772506714,
|
9657 |
+
"learning_rate": 7.929695275973204e-07,
|
9658 |
+
"loss": 11.4394,
|
9659 |
+
"step": 1374
|
9660 |
+
},
|
9661 |
+
{
|
9662 |
+
"epoch": 0.053479575663876004,
|
9663 |
+
"grad_norm": 0.3193620443344116,
|
9664 |
+
"learning_rate": 7.654253393936439e-07,
|
9665 |
+
"loss": 11.4513,
|
9666 |
+
"step": 1375
|
9667 |
+
},
|
9668 |
+
{
|
9669 |
+
"epoch": 0.05351846990072246,
|
9670 |
+
"grad_norm": 0.10505767166614532,
|
9671 |
+
"learning_rate": 7.383661876942283e-07,
|
9672 |
+
"loss": 11.4344,
|
9673 |
+
"step": 1376
|
9674 |
+
},
|
9675 |
+
{
|
9676 |
+
"epoch": 0.05355736413756892,
|
9677 |
+
"grad_norm": 0.12139089405536652,
|
9678 |
+
"learning_rate": 7.117922047581549e-07,
|
9679 |
+
"loss": 11.4365,
|
9680 |
+
"step": 1377
|
9681 |
+
},
|
9682 |
+
{
|
9683 |
+
"epoch": 0.05359625837441537,
|
9684 |
+
"grad_norm": 0.22217227518558502,
|
9685 |
+
"learning_rate": 6.857035204731688e-07,
|
9686 |
+
"loss": 11.4517,
|
9687 |
+
"step": 1378
|
9688 |
+
},
|
9689 |
+
{
|
9690 |
+
"epoch": 0.053635152611261824,
|
9691 |
+
"grad_norm": 0.12010081112384796,
|
9692 |
+
"learning_rate": 6.601002623549346e-07,
|
9693 |
+
"loss": 11.4011,
|
9694 |
+
"step": 1379
|
9695 |
+
},
|
9696 |
+
{
|
9697 |
+
"epoch": 0.05367404684810828,
|
9698 |
+
"grad_norm": 0.1507730633020401,
|
9699 |
+
"learning_rate": 6.349825555464706e-07,
|
9700 |
+
"loss": 11.4185,
|
9701 |
+
"step": 1380
|
9702 |
+
},
|
9703 |
+
{
|
9704 |
+
"epoch": 0.05371294108495474,
|
9705 |
+
"grad_norm": 0.19225631654262543,
|
9706 |
+
"learning_rate": 6.103505228175377e-07,
|
9707 |
+
"loss": 11.4336,
|
9708 |
+
"step": 1381
|
9709 |
+
},
|
9710 |
+
{
|
9711 |
+
"epoch": 0.053751835321801195,
|
9712 |
+
"grad_norm": 0.1865839958190918,
|
9713 |
+
"learning_rate": 5.862042845640403e-07,
|
9714 |
+
"loss": 11.4278,
|
9715 |
+
"step": 1382
|
9716 |
+
},
|
9717 |
+
{
|
9718 |
+
"epoch": 0.053790729558647644,
|
9719 |
+
"grad_norm": 0.16153313219547272,
|
9720 |
+
"learning_rate": 5.625439588074044e-07,
|
9721 |
+
"loss": 11.4257,
|
9722 |
+
"step": 1383
|
9723 |
+
},
|
9724 |
+
{
|
9725 |
+
"epoch": 0.0538296237954941,
|
9726 |
+
"grad_norm": 0.14634691178798676,
|
9727 |
+
"learning_rate": 5.393696611940225e-07,
|
9728 |
+
"loss": 11.4201,
|
9729 |
+
"step": 1384
|
9730 |
+
},
|
9731 |
+
{
|
9732 |
+
"epoch": 0.05386851803234056,
|
9733 |
+
"grad_norm": 0.2850719690322876,
|
9734 |
+
"learning_rate": 5.166815049947204e-07,
|
9735 |
+
"loss": 11.4702,
|
9736 |
+
"step": 1385
|
9737 |
+
},
|
9738 |
+
{
|
9739 |
+
"epoch": 0.053907412269187015,
|
9740 |
+
"grad_norm": 0.15424256026744843,
|
9741 |
+
"learning_rate": 4.944796011041475e-07,
|
9742 |
+
"loss": 11.4015,
|
9743 |
+
"step": 1386
|
9744 |
+
},
|
9745 |
+
{
|
9746 |
+
"epoch": 0.05394630650603347,
|
9747 |
+
"grad_norm": 0.13952219486236572,
|
9748 |
+
"learning_rate": 4.727640580402537e-07,
|
9749 |
+
"loss": 11.411,
|
9750 |
+
"step": 1387
|
9751 |
+
},
|
9752 |
+
{
|
9753 |
+
"epoch": 0.05398520074287992,
|
9754 |
+
"grad_norm": 0.1746031492948532,
|
9755 |
+
"learning_rate": 4.5153498194380195e-07,
|
9756 |
+
"loss": 11.4428,
|
9757 |
+
"step": 1388
|
9758 |
+
},
|
9759 |
+
{
|
9760 |
+
"epoch": 0.05402409497972638,
|
9761 |
+
"grad_norm": 0.20047025382518768,
|
9762 |
+
"learning_rate": 4.307924765777682e-07,
|
9763 |
+
"loss": 11.4867,
|
9764 |
+
"step": 1389
|
9765 |
+
},
|
9766 |
+
{
|
9767 |
+
"epoch": 0.054062989216572835,
|
9768 |
+
"grad_norm": 0.1938544064760208,
|
9769 |
+
"learning_rate": 4.105366433269087e-07,
|
9770 |
+
"loss": 11.4689,
|
9771 |
+
"step": 1390
|
9772 |
+
},
|
9773 |
+
{
|
9774 |
+
"epoch": 0.05410188345341929,
|
9775 |
+
"grad_norm": 0.20457126200199127,
|
9776 |
+
"learning_rate": 3.9076758119722666e-07,
|
9777 |
+
"loss": 11.4573,
|
9778 |
+
"step": 1391
|
9779 |
+
},
|
9780 |
+
{
|
9781 |
+
"epoch": 0.05414077769026574,
|
9782 |
+
"grad_norm": 0.12991569936275482,
|
9783 |
+
"learning_rate": 3.714853868154955e-07,
|
9784 |
+
"loss": 11.4399,
|
9785 |
+
"step": 1392
|
9786 |
+
},
|
9787 |
+
{
|
9788 |
+
"epoch": 0.0541796719271122,
|
9789 |
+
"grad_norm": 0.2232292741537094,
|
9790 |
+
"learning_rate": 3.5269015442878083e-07,
|
9791 |
+
"loss": 11.4935,
|
9792 |
+
"step": 1393
|
9793 |
+
},
|
9794 |
+
{
|
9795 |
+
"epoch": 0.054218566163958655,
|
9796 |
+
"grad_norm": 0.15414465963840485,
|
9797 |
+
"learning_rate": 3.343819759040079e-07,
|
9798 |
+
"loss": 11.4398,
|
9799 |
+
"step": 1394
|
9800 |
+
},
|
9801 |
+
{
|
9802 |
+
"epoch": 0.05425746040080511,
|
9803 |
+
"grad_norm": 0.17479385435581207,
|
9804 |
+
"learning_rate": 3.165609407274617e-07,
|
9805 |
+
"loss": 11.4745,
|
9806 |
+
"step": 1395
|
9807 |
+
},
|
9808 |
+
{
|
9809 |
+
"epoch": 0.05429635463765157,
|
9810 |
+
"grad_norm": 0.15992434322834015,
|
9811 |
+
"learning_rate": 2.9922713600439854e-07,
|
9812 |
+
"loss": 11.4093,
|
9813 |
+
"step": 1396
|
9814 |
+
},
|
9815 |
+
{
|
9816 |
+
"epoch": 0.05433524887449802,
|
9817 |
+
"grad_norm": 0.1427340805530548,
|
9818 |
+
"learning_rate": 2.82380646458591e-07,
|
9819 |
+
"loss": 11.4472,
|
9820 |
+
"step": 1397
|
9821 |
+
},
|
9822 |
+
{
|
9823 |
+
"epoch": 0.054374143111344475,
|
9824 |
+
"grad_norm": 0.15816694498062134,
|
9825 |
+
"learning_rate": 2.6602155443195e-07,
|
9826 |
+
"loss": 11.4177,
|
9827 |
+
"step": 1398
|
9828 |
+
},
|
9829 |
+
{
|
9830 |
+
"epoch": 0.05441303734819093,
|
9831 |
+
"grad_norm": 0.14333859086036682,
|
9832 |
+
"learning_rate": 2.501499398840479e-07,
|
9833 |
+
"loss": 11.427,
|
9834 |
+
"step": 1399
|
9835 |
+
},
|
9836 |
+
{
|
9837 |
+
"epoch": 0.05445193158503739,
|
9838 |
+
"grad_norm": 0.12002553045749664,
|
9839 |
+
"learning_rate": 2.3476588039181845e-07,
|
9840 |
+
"loss": 11.4343,
|
9841 |
+
"step": 1400
|
9842 |
+
},
|
9843 |
+
{
|
9844 |
+
"epoch": 0.054490825821883845,
|
9845 |
+
"grad_norm": 0.15301531553268433,
|
9846 |
+
"learning_rate": 2.1986945114911283e-07,
|
9847 |
+
"loss": 11.4611,
|
9848 |
+
"step": 1401
|
9849 |
+
},
|
9850 |
+
{
|
9851 |
+
"epoch": 0.054529720058730295,
|
9852 |
+
"grad_norm": 0.20533597469329834,
|
9853 |
+
"learning_rate": 2.054607249663665e-07,
|
9854 |
+
"loss": 11.4376,
|
9855 |
+
"step": 1402
|
9856 |
+
},
|
9857 |
+
{
|
9858 |
+
"epoch": 0.05456861429557675,
|
9859 |
+
"grad_norm": 0.1993454545736313,
|
9860 |
+
"learning_rate": 1.915397722702217e-07,
|
9861 |
+
"loss": 11.4586,
|
9862 |
+
"step": 1403
|
9863 |
+
},
|
9864 |
+
{
|
9865 |
+
"epoch": 0.05460750853242321,
|
9866 |
+
"grad_norm": 0.12486769258975983,
|
9867 |
+
"learning_rate": 1.7810666110318342e-07,
|
9868 |
+
"loss": 11.4282,
|
9869 |
+
"step": 1404
|
9870 |
+
},
|
9871 |
+
{
|
9872 |
+
"epoch": 0.054646402769269666,
|
9873 |
+
"grad_norm": 0.13480772078037262,
|
9874 |
+
"learning_rate": 1.6516145712333064e-07,
|
9875 |
+
"loss": 11.4318,
|
9876 |
+
"step": 1405
|
9877 |
+
},
|
9878 |
+
{
|
9879 |
+
"epoch": 0.054685297006116115,
|
9880 |
+
"grad_norm": 0.16604341566562653,
|
9881 |
+
"learning_rate": 1.5270422360391668e-07,
|
9882 |
+
"loss": 11.4172,
|
9883 |
+
"step": 1406
|
9884 |
+
},
|
9885 |
+
{
|
9886 |
+
"epoch": 0.05472419124296257,
|
9887 |
+
"grad_norm": 0.17657136917114258,
|
9888 |
+
"learning_rate": 1.4073502143313598e-07,
|
9889 |
+
"loss": 11.4706,
|
9890 |
+
"step": 1407
|
9891 |
+
},
|
9892 |
+
{
|
9893 |
+
"epoch": 0.05476308547980903,
|
9894 |
+
"grad_norm": 0.19776944816112518,
|
9895 |
+
"learning_rate": 1.2925390911379121e-07,
|
9896 |
+
"loss": 11.4629,
|
9897 |
+
"step": 1408
|
9898 |
+
},
|
9899 |
+
{
|
9900 |
+
"epoch": 0.054801979716655486,
|
9901 |
+
"grad_norm": 0.18015769124031067,
|
9902 |
+
"learning_rate": 1.1826094276298216e-07,
|
9903 |
+
"loss": 11.4426,
|
9904 |
+
"step": 1409
|
9905 |
+
},
|
9906 |
+
{
|
9907 |
+
"epoch": 0.05484087395350194,
|
9908 |
+
"grad_norm": 0.15199075639247894,
|
9909 |
+
"learning_rate": 1.0775617611189503e-07,
|
9910 |
+
"loss": 11.4482,
|
9911 |
+
"step": 1410
|
9912 |
+
},
|
9913 |
+
{
|
9914 |
+
"epoch": 0.05487976819034839,
|
9915 |
+
"grad_norm": 0.14073902368545532,
|
9916 |
+
"learning_rate": 9.773966050549143e-08,
|
9917 |
+
"loss": 11.4773,
|
9918 |
+
"step": 1411
|
9919 |
+
},
|
9920 |
+
{
|
9921 |
+
"epoch": 0.05491866242719485,
|
9922 |
+
"grad_norm": 0.10297466069459915,
|
9923 |
+
"learning_rate": 8.821144490225308e-08,
|
9924 |
+
"loss": 11.463,
|
9925 |
+
"step": 1412
|
9926 |
+
},
|
9927 |
+
{
|
9928 |
+
"epoch": 0.054957556664041306,
|
9929 |
+
"grad_norm": 0.19450917840003967,
|
9930 |
+
"learning_rate": 7.917157587399304e-08,
|
9931 |
+
"loss": 11.4102,
|
9932 |
+
"step": 1413
|
9933 |
+
},
|
9934 |
+
{
|
9935 |
+
"epoch": 0.05499645090088776,
|
9936 |
+
"grad_norm": 0.16901682317256927,
|
9937 |
+
"learning_rate": 7.06200976055782e-08,
|
9938 |
+
"loss": 11.4469,
|
9939 |
+
"step": 1414
|
9940 |
+
},
|
9941 |
+
{
|
9942 |
+
"epoch": 0.05503534513773422,
|
9943 |
+
"grad_norm": 0.15647615492343903,
|
9944 |
+
"learning_rate": 6.255705189471828e-08,
|
9945 |
+
"loss": 11.4383,
|
9946 |
+
"step": 1415
|
9947 |
+
},
|
9948 |
+
{
|
9949 |
+
"epoch": 0.05507423937458067,
|
9950 |
+
"grad_norm": 0.11146261543035507,
|
9951 |
+
"learning_rate": 5.498247815179936e-08,
|
9952 |
+
"loss": 11.4292,
|
9953 |
+
"step": 1416
|
9954 |
+
},
|
9955 |
+
{
|
9956 |
+
"epoch": 0.055113133611427126,
|
9957 |
+
"grad_norm": 0.250475138425827,
|
9958 |
+
"learning_rate": 4.789641339963957e-08,
|
9959 |
+
"loss": 11.5614,
|
9960 |
+
"step": 1417
|
9961 |
+
},
|
9962 |
+
{
|
9963 |
+
"epoch": 0.05515202784827358,
|
9964 |
+
"grad_norm": 0.14612603187561035,
|
9965 |
+
"learning_rate": 4.129889227334483e-08,
|
9966 |
+
"loss": 11.4791,
|
9967 |
+
"step": 1418
|
9968 |
+
},
|
9969 |
+
{
|
9970 |
+
"epoch": 0.05519092208512004,
|
9971 |
+
"grad_norm": 0.1520339548587799,
|
9972 |
+
"learning_rate": 3.5189947020142224e-08,
|
9973 |
+
"loss": 11.41,
|
9974 |
+
"step": 1419
|
9975 |
+
},
|
9976 |
+
{
|
9977 |
+
"epoch": 0.05522981632196649,
|
9978 |
+
"grad_norm": 0.16020874679088593,
|
9979 |
+
"learning_rate": 2.9569607499180252e-08,
|
9980 |
+
"loss": 11.4201,
|
9981 |
+
"step": 1420
|
9982 |
+
},
|
9983 |
+
{
|
9984 |
+
"epoch": 0.055268710558812946,
|
9985 |
+
"grad_norm": 0.1421998292207718,
|
9986 |
+
"learning_rate": 2.4437901181439958e-08,
|
9987 |
+
"loss": 11.4428,
|
9988 |
+
"step": 1421
|
9989 |
+
},
|
9990 |
+
{
|
9991 |
+
"epoch": 0.0553076047956594,
|
9992 |
+
"grad_norm": 0.13697375357151031,
|
9993 |
+
"learning_rate": 1.9794853149557314e-08,
|
9994 |
+
"loss": 11.4406,
|
9995 |
+
"step": 1422
|
9996 |
+
},
|
9997 |
+
{
|
9998 |
+
"epoch": 0.05534649903250586,
|
9999 |
+
"grad_norm": 0.12090425193309784,
|
10000 |
+
"learning_rate": 1.564048609771218e-08,
|
10001 |
+
"loss": 11.4115,
|
10002 |
+
"step": 1423
|
10003 |
+
},
|
10004 |
+
{
|
10005 |
+
"epoch": 0.055385393269352316,
|
10006 |
+
"grad_norm": 0.21023690700531006,
|
10007 |
+
"learning_rate": 1.1974820331517312e-08,
|
10008 |
+
"loss": 11.4664,
|
10009 |
+
"step": 1424
|
10010 |
+
},
|
10011 |
+
{
|
10012 |
+
"epoch": 0.055424287506198766,
|
10013 |
+
"grad_norm": 0.1580401510000229,
|
10014 |
+
"learning_rate": 8.797873767951714e-09,
|
10015 |
+
"loss": 11.44,
|
10016 |
+
"step": 1425
|
10017 |
+
},
|
10018 |
+
{
|
10019 |
+
"epoch": 0.05546318174304522,
|
10020 |
+
"grad_norm": 0.147861510515213,
|
10021 |
+
"learning_rate": 6.109661935205236e-09,
|
10022 |
+
"loss": 11.4288,
|
10023 |
+
"step": 1426
|
10024 |
+
},
|
10025 |
+
{
|
10026 |
+
"epoch": 0.05550207597989168,
|
10027 |
+
"grad_norm": 0.14827421307563782,
|
10028 |
+
"learning_rate": 3.9101979726674505e-09,
|
10029 |
+
"loss": 11.4429,
|
10030 |
+
"step": 1427
|
10031 |
+
},
|
10032 |
+
{
|
10033 |
+
"epoch": 0.055540970216738136,
|
10034 |
+
"grad_norm": 0.170853391289711,
|
10035 |
+
"learning_rate": 2.1994926308277486e-09,
|
10036 |
+
"loss": 11.4324,
|
10037 |
+
"step": 1428
|
10038 |
+
},
|
10039 |
+
{
|
10040 |
+
"epoch": 0.05557986445358459,
|
10041 |
+
"grad_norm": 0.1428116112947464,
|
10042 |
+
"learning_rate": 9.775542712309182e-10,
|
10043 |
+
"loss": 11.423,
|
10044 |
+
"step": 1429
|
10045 |
+
},
|
10046 |
+
{
|
10047 |
+
"epoch": 0.05561875869043104,
|
10048 |
+
"grad_norm": 0.1336054503917694,
|
10049 |
+
"learning_rate": 2.443888664327432e-10,
|
10050 |
+
"loss": 11.4468,
|
10051 |
+
"step": 1430
|
10052 |
+
},
|
10053 |
+
{
|
10054 |
+
"epoch": 0.0556576529272775,
|
10055 |
+
"grad_norm": 0.17747704684734344,
|
10056 |
+
"learning_rate": 0.0,
|
10057 |
+
"loss": 11.4313,
|
10058 |
+
"step": 1431
|
10059 |
}
|
10060 |
],
|
10061 |
"logging_steps": 1,
|
|
|
10070 |
"should_evaluate": false,
|
10071 |
"should_log": false,
|
10072 |
"should_save": true,
|
10073 |
+
"should_training_stop": true
|
10074 |
},
|
10075 |
"attributes": {}
|
10076 |
}
|
10077 |
},
|
10078 |
+
"total_flos": 179098141851648.0,
|
10079 |
"train_batch_size": 2,
|
10080 |
"trial_name": null,
|
10081 |
"trial_params": null
|