checkpoint-1650
Browse files- adapter_config.json +5 -5
- adapter_model.safetensors +1 -1
- optimizer.pt +2 -2
- scheduler.pt +1 -1
- trainer_state.json +1404 -4
- training_args.bin +1 -1
adapter_config.json
CHANGED
@@ -25,13 +25,13 @@
|
|
25 |
"rank_pattern": {},
|
26 |
"revision": null,
|
27 |
"target_modules": [
|
28 |
-
"
|
29 |
-
"o_proj",
|
30 |
"q_proj",
|
31 |
-
"
|
32 |
"k_proj",
|
33 |
-
"
|
34 |
-
"
|
|
|
35 |
],
|
36 |
"task_type": "CAUSAL_LM",
|
37 |
"use_dora": false,
|
|
|
25 |
"rank_pattern": {},
|
26 |
"revision": null,
|
27 |
"target_modules": [
|
28 |
+
"v_proj",
|
|
|
29 |
"q_proj",
|
30 |
+
"down_proj",
|
31 |
"k_proj",
|
32 |
+
"up_proj",
|
33 |
+
"gate_proj",
|
34 |
+
"o_proj"
|
35 |
],
|
36 |
"task_type": "CAUSAL_LM",
|
37 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 615543928
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03502629d8a008110406c83cfc613af924f21269ad0a07227ba5992d280cf618
|
3 |
size 615543928
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4aad24ca154b919754f5875f6e03fdcc48606af2cb23571c522f04af9b8d0489
|
3 |
+
size 580187480
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35d5c1063271641cefdfb58d59cf96764ab7ab44485463fd2632f10adb9b15e6
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -10157,10 +10157,1410 @@
|
|
10157 |
"learning_rate": 1.9919911810531874e-05,
|
10158 |
"loss": 2.0905,
|
10159 |
"step": 1450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10160 |
}
|
10161 |
],
|
10162 |
"logging_steps": 1,
|
10163 |
-
"max_steps":
|
10164 |
"num_input_tokens_seen": 0,
|
10165 |
"num_train_epochs": 1,
|
10166 |
"save_steps": 50,
|
@@ -10176,7 +11576,7 @@
|
|
10176 |
"attributes": {}
|
10177 |
}
|
10178 |
},
|
10179 |
-
"total_flos":
|
10180 |
"train_batch_size": 1,
|
10181 |
"trial_name": null,
|
10182 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.14679062319291847,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1650,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
10157 |
"learning_rate": 1.9919911810531874e-05,
|
10158 |
"loss": 2.0905,
|
10159 |
"step": 1450
|
10160 |
+
},
|
10161 |
+
{
|
10162 |
+
"epoch": 0.12908678439571195,
|
10163 |
+
"grad_norm": 0.674976646900177,
|
10164 |
+
"learning_rate": 1.918883128019115e-05,
|
10165 |
+
"loss": 2.0356,
|
10166 |
+
"step": 1451
|
10167 |
+
},
|
10168 |
+
{
|
10169 |
+
"epoch": 0.12917574840976825,
|
10170 |
+
"grad_norm": 0.7310806512832642,
|
10171 |
+
"learning_rate": 1.9187728205932974e-05,
|
10172 |
+
"loss": 2.1057,
|
10173 |
+
"step": 1452
|
10174 |
+
},
|
10175 |
+
{
|
10176 |
+
"epoch": 0.12926471242382456,
|
10177 |
+
"grad_norm": 0.7335729598999023,
|
10178 |
+
"learning_rate": 1.91866244139214e-05,
|
10179 |
+
"loss": 2.024,
|
10180 |
+
"step": 1453
|
10181 |
+
},
|
10182 |
+
{
|
10183 |
+
"epoch": 0.12935367643788087,
|
10184 |
+
"grad_norm": 0.7030975818634033,
|
10185 |
+
"learning_rate": 1.9185519904242647e-05,
|
10186 |
+
"loss": 2.1634,
|
10187 |
+
"step": 1454
|
10188 |
+
},
|
10189 |
+
{
|
10190 |
+
"epoch": 0.12944264045193718,
|
10191 |
+
"grad_norm": 0.7368497848510742,
|
10192 |
+
"learning_rate": 1.9184414676983006e-05,
|
10193 |
+
"loss": 2.0831,
|
10194 |
+
"step": 1455
|
10195 |
+
},
|
10196 |
+
{
|
10197 |
+
"epoch": 0.12953160446599352,
|
10198 |
+
"grad_norm": 0.7524637579917908,
|
10199 |
+
"learning_rate": 1.9183308732228827e-05,
|
10200 |
+
"loss": 2.2064,
|
10201 |
+
"step": 1456
|
10202 |
+
},
|
10203 |
+
{
|
10204 |
+
"epoch": 0.12962056848004982,
|
10205 |
+
"grad_norm": 0.7159258127212524,
|
10206 |
+
"learning_rate": 1.9182202070066494e-05,
|
10207 |
+
"loss": 1.9866,
|
10208 |
+
"step": 1457
|
10209 |
+
},
|
10210 |
+
{
|
10211 |
+
"epoch": 0.12970953249410613,
|
10212 |
+
"grad_norm": 0.7139582633972168,
|
10213 |
+
"learning_rate": 1.918109469058247e-05,
|
10214 |
+
"loss": 2.1594,
|
10215 |
+
"step": 1458
|
10216 |
+
},
|
10217 |
+
{
|
10218 |
+
"epoch": 0.12979849650816244,
|
10219 |
+
"grad_norm": 0.6831613183021545,
|
10220 |
+
"learning_rate": 1.9179986593863257e-05,
|
10221 |
+
"loss": 2.1885,
|
10222 |
+
"step": 1459
|
10223 |
+
},
|
10224 |
+
{
|
10225 |
+
"epoch": 0.12988746052221875,
|
10226 |
+
"grad_norm": 0.7433372735977173,
|
10227 |
+
"learning_rate": 1.9178877779995423e-05,
|
10228 |
+
"loss": 2.1902,
|
10229 |
+
"step": 1460
|
10230 |
+
},
|
10231 |
+
{
|
10232 |
+
"epoch": 0.1299764245362751,
|
10233 |
+
"grad_norm": 0.7060792446136475,
|
10234 |
+
"learning_rate": 1.917776824906559e-05,
|
10235 |
+
"loss": 2.0515,
|
10236 |
+
"step": 1461
|
10237 |
+
},
|
10238 |
+
{
|
10239 |
+
"epoch": 0.1300653885503314,
|
10240 |
+
"grad_norm": 0.7091501951217651,
|
10241 |
+
"learning_rate": 1.9176658001160443e-05,
|
10242 |
+
"loss": 2.1377,
|
10243 |
+
"step": 1462
|
10244 |
+
},
|
10245 |
+
{
|
10246 |
+
"epoch": 0.1301543525643877,
|
10247 |
+
"grad_norm": 0.7096594572067261,
|
10248 |
+
"learning_rate": 1.91755470363667e-05,
|
10249 |
+
"loss": 2.1208,
|
10250 |
+
"step": 1463
|
10251 |
+
},
|
10252 |
+
{
|
10253 |
+
"epoch": 0.130243316578444,
|
10254 |
+
"grad_norm": 0.6679684519767761,
|
10255 |
+
"learning_rate": 1.9174435354771167e-05,
|
10256 |
+
"loss": 1.9931,
|
10257 |
+
"step": 1464
|
10258 |
+
},
|
10259 |
+
{
|
10260 |
+
"epoch": 0.13033228059250032,
|
10261 |
+
"grad_norm": 0.7237833738327026,
|
10262 |
+
"learning_rate": 1.9173322956460675e-05,
|
10263 |
+
"loss": 2.19,
|
10264 |
+
"step": 1465
|
10265 |
+
},
|
10266 |
+
{
|
10267 |
+
"epoch": 0.13042124460655666,
|
10268 |
+
"grad_norm": 0.716782808303833,
|
10269 |
+
"learning_rate": 1.9172209841522134e-05,
|
10270 |
+
"loss": 2.2026,
|
10271 |
+
"step": 1466
|
10272 |
+
},
|
10273 |
+
{
|
10274 |
+
"epoch": 0.13051020862061297,
|
10275 |
+
"grad_norm": 0.7138167023658752,
|
10276 |
+
"learning_rate": 1.91710960100425e-05,
|
10277 |
+
"loss": 2.1479,
|
10278 |
+
"step": 1467
|
10279 |
+
},
|
10280 |
+
{
|
10281 |
+
"epoch": 0.13059917263466927,
|
10282 |
+
"grad_norm": 0.6813187599182129,
|
10283 |
+
"learning_rate": 1.9169981462108788e-05,
|
10284 |
+
"loss": 2.1735,
|
10285 |
+
"step": 1468
|
10286 |
+
},
|
10287 |
+
{
|
10288 |
+
"epoch": 0.13068813664872558,
|
10289 |
+
"grad_norm": 0.730536937713623,
|
10290 |
+
"learning_rate": 1.9168866197808064e-05,
|
10291 |
+
"loss": 2.1064,
|
10292 |
+
"step": 1469
|
10293 |
+
},
|
10294 |
+
{
|
10295 |
+
"epoch": 0.1307771006627819,
|
10296 |
+
"grad_norm": 0.6633700728416443,
|
10297 |
+
"learning_rate": 1.9167750217227454e-05,
|
10298 |
+
"loss": 2.2471,
|
10299 |
+
"step": 1470
|
10300 |
+
},
|
10301 |
+
{
|
10302 |
+
"epoch": 0.13086606467683823,
|
10303 |
+
"grad_norm": 0.7200865745544434,
|
10304 |
+
"learning_rate": 1.9166633520454144e-05,
|
10305 |
+
"loss": 2.1037,
|
10306 |
+
"step": 1471
|
10307 |
+
},
|
10308 |
+
{
|
10309 |
+
"epoch": 0.13095502869089454,
|
10310 |
+
"grad_norm": 0.6877714991569519,
|
10311 |
+
"learning_rate": 1.9165516107575365e-05,
|
10312 |
+
"loss": 2.2933,
|
10313 |
+
"step": 1472
|
10314 |
+
},
|
10315 |
+
{
|
10316 |
+
"epoch": 0.13104399270495085,
|
10317 |
+
"grad_norm": 0.7162935137748718,
|
10318 |
+
"learning_rate": 1.9164397978678412e-05,
|
10319 |
+
"loss": 2.3149,
|
10320 |
+
"step": 1473
|
10321 |
+
},
|
10322 |
+
{
|
10323 |
+
"epoch": 0.13113295671900715,
|
10324 |
+
"grad_norm": 0.6733060479164124,
|
10325 |
+
"learning_rate": 1.916327913385064e-05,
|
10326 |
+
"loss": 2.1126,
|
10327 |
+
"step": 1474
|
10328 |
+
},
|
10329 |
+
{
|
10330 |
+
"epoch": 0.13122192073306346,
|
10331 |
+
"grad_norm": 0.7055361866950989,
|
10332 |
+
"learning_rate": 1.9162159573179446e-05,
|
10333 |
+
"loss": 2.2286,
|
10334 |
+
"step": 1475
|
10335 |
+
},
|
10336 |
+
{
|
10337 |
+
"epoch": 0.1313108847471198,
|
10338 |
+
"grad_norm": 0.7420381307601929,
|
10339 |
+
"learning_rate": 1.9161039296752296e-05,
|
10340 |
+
"loss": 2.2274,
|
10341 |
+
"step": 1476
|
10342 |
+
},
|
10343 |
+
{
|
10344 |
+
"epoch": 0.1313998487611761,
|
10345 |
+
"grad_norm": 0.7491070032119751,
|
10346 |
+
"learning_rate": 1.9159918304656703e-05,
|
10347 |
+
"loss": 2.135,
|
10348 |
+
"step": 1477
|
10349 |
+
},
|
10350 |
+
{
|
10351 |
+
"epoch": 0.13148881277523242,
|
10352 |
+
"grad_norm": 0.6901069283485413,
|
10353 |
+
"learning_rate": 1.9158796596980242e-05,
|
10354 |
+
"loss": 1.8872,
|
10355 |
+
"step": 1478
|
10356 |
+
},
|
10357 |
+
{
|
10358 |
+
"epoch": 0.13157777678928873,
|
10359 |
+
"grad_norm": 0.7616465091705322,
|
10360 |
+
"learning_rate": 1.9157674173810544e-05,
|
10361 |
+
"loss": 2.2045,
|
10362 |
+
"step": 1479
|
10363 |
+
},
|
10364 |
+
{
|
10365 |
+
"epoch": 0.13166674080334503,
|
10366 |
+
"grad_norm": 0.6756492257118225,
|
10367 |
+
"learning_rate": 1.915655103523529e-05,
|
10368 |
+
"loss": 2.1401,
|
10369 |
+
"step": 1480
|
10370 |
+
},
|
10371 |
+
{
|
10372 |
+
"epoch": 0.13175570481740137,
|
10373 |
+
"grad_norm": 0.7493202686309814,
|
10374 |
+
"learning_rate": 1.915542718134223e-05,
|
10375 |
+
"loss": 2.1485,
|
10376 |
+
"step": 1481
|
10377 |
+
},
|
10378 |
+
{
|
10379 |
+
"epoch": 0.13184466883145768,
|
10380 |
+
"grad_norm": 0.7584684491157532,
|
10381 |
+
"learning_rate": 1.9154302612219144e-05,
|
10382 |
+
"loss": 2.1721,
|
10383 |
+
"step": 1482
|
10384 |
+
},
|
10385 |
+
{
|
10386 |
+
"epoch": 0.131933632845514,
|
10387 |
+
"grad_norm": 0.6957008838653564,
|
10388 |
+
"learning_rate": 1.9153177327953897e-05,
|
10389 |
+
"loss": 2.139,
|
10390 |
+
"step": 1483
|
10391 |
+
},
|
10392 |
+
{
|
10393 |
+
"epoch": 0.1320225968595703,
|
10394 |
+
"grad_norm": 0.672055184841156,
|
10395 |
+
"learning_rate": 1.9152051328634393e-05,
|
10396 |
+
"loss": 2.2069,
|
10397 |
+
"step": 1484
|
10398 |
+
},
|
10399 |
+
{
|
10400 |
+
"epoch": 0.1321115608736266,
|
10401 |
+
"grad_norm": 0.6958116292953491,
|
10402 |
+
"learning_rate": 1.9150924614348594e-05,
|
10403 |
+
"loss": 2.1101,
|
10404 |
+
"step": 1485
|
10405 |
+
},
|
10406 |
+
{
|
10407 |
+
"epoch": 0.13220052488768294,
|
10408 |
+
"grad_norm": 0.7473313808441162,
|
10409 |
+
"learning_rate": 1.9149797185184526e-05,
|
10410 |
+
"loss": 2.0489,
|
10411 |
+
"step": 1486
|
10412 |
+
},
|
10413 |
+
{
|
10414 |
+
"epoch": 0.13228948890173925,
|
10415 |
+
"grad_norm": 0.6686244010925293,
|
10416 |
+
"learning_rate": 1.9148669041230257e-05,
|
10417 |
+
"loss": 2.1462,
|
10418 |
+
"step": 1487
|
10419 |
+
},
|
10420 |
+
{
|
10421 |
+
"epoch": 0.13237845291579556,
|
10422 |
+
"grad_norm": 0.6933692097663879,
|
10423 |
+
"learning_rate": 1.9147540182573925e-05,
|
10424 |
+
"loss": 2.1164,
|
10425 |
+
"step": 1488
|
10426 |
+
},
|
10427 |
+
{
|
10428 |
+
"epoch": 0.13246741692985187,
|
10429 |
+
"grad_norm": 0.6905208230018616,
|
10430 |
+
"learning_rate": 1.9146410609303716e-05,
|
10431 |
+
"loss": 2.1732,
|
10432 |
+
"step": 1489
|
10433 |
+
},
|
10434 |
+
{
|
10435 |
+
"epoch": 0.13255638094390818,
|
10436 |
+
"grad_norm": 0.7162343859672546,
|
10437 |
+
"learning_rate": 1.9145280321507872e-05,
|
10438 |
+
"loss": 2.0226,
|
10439 |
+
"step": 1490
|
10440 |
+
},
|
10441 |
+
{
|
10442 |
+
"epoch": 0.1326453449579645,
|
10443 |
+
"grad_norm": 0.6874192357063293,
|
10444 |
+
"learning_rate": 1.914414931927469e-05,
|
10445 |
+
"loss": 2.107,
|
10446 |
+
"step": 1491
|
10447 |
+
},
|
10448 |
+
{
|
10449 |
+
"epoch": 0.13273430897202082,
|
10450 |
+
"grad_norm": 0.7397066354751587,
|
10451 |
+
"learning_rate": 1.914301760269253e-05,
|
10452 |
+
"loss": 2.037,
|
10453 |
+
"step": 1492
|
10454 |
+
},
|
10455 |
+
{
|
10456 |
+
"epoch": 0.13282327298607713,
|
10457 |
+
"grad_norm": 0.7085984349250793,
|
10458 |
+
"learning_rate": 1.91418851718498e-05,
|
10459 |
+
"loss": 2.0926,
|
10460 |
+
"step": 1493
|
10461 |
+
},
|
10462 |
+
{
|
10463 |
+
"epoch": 0.13291223700013344,
|
10464 |
+
"grad_norm": 0.6834755539894104,
|
10465 |
+
"learning_rate": 1.9140752026834966e-05,
|
10466 |
+
"loss": 2.1119,
|
10467 |
+
"step": 1494
|
10468 |
+
},
|
10469 |
+
{
|
10470 |
+
"epoch": 0.13300120101418977,
|
10471 |
+
"grad_norm": 0.7041957974433899,
|
10472 |
+
"learning_rate": 1.913961816773655e-05,
|
10473 |
+
"loss": 2.1888,
|
10474 |
+
"step": 1495
|
10475 |
+
},
|
10476 |
+
{
|
10477 |
+
"epoch": 0.13309016502824608,
|
10478 |
+
"grad_norm": 0.7494297027587891,
|
10479 |
+
"learning_rate": 1.9138483594643133e-05,
|
10480 |
+
"loss": 2.1941,
|
10481 |
+
"step": 1496
|
10482 |
+
},
|
10483 |
+
{
|
10484 |
+
"epoch": 0.1331791290423024,
|
10485 |
+
"grad_norm": 0.6943486928939819,
|
10486 |
+
"learning_rate": 1.9137348307643344e-05,
|
10487 |
+
"loss": 2.1824,
|
10488 |
+
"step": 1497
|
10489 |
+
},
|
10490 |
+
{
|
10491 |
+
"epoch": 0.1332680930563587,
|
10492 |
+
"grad_norm": 0.690528154373169,
|
10493 |
+
"learning_rate": 1.913621230682588e-05,
|
10494 |
+
"loss": 2.1335,
|
10495 |
+
"step": 1498
|
10496 |
+
},
|
10497 |
+
{
|
10498 |
+
"epoch": 0.133357057070415,
|
10499 |
+
"grad_norm": 0.7519603967666626,
|
10500 |
+
"learning_rate": 1.9135075592279475e-05,
|
10501 |
+
"loss": 2.0838,
|
10502 |
+
"step": 1499
|
10503 |
+
},
|
10504 |
+
{
|
10505 |
+
"epoch": 0.13344602108447134,
|
10506 |
+
"grad_norm": 0.7231424450874329,
|
10507 |
+
"learning_rate": 1.9133938164092942e-05,
|
10508 |
+
"loss": 2.0468,
|
10509 |
+
"step": 1500
|
10510 |
+
},
|
10511 |
+
{
|
10512 |
+
"epoch": 0.13353498509852765,
|
10513 |
+
"grad_norm": 0.6963239908218384,
|
10514 |
+
"learning_rate": 1.913280002235513e-05,
|
10515 |
+
"loss": 2.2255,
|
10516 |
+
"step": 1501
|
10517 |
+
},
|
10518 |
+
{
|
10519 |
+
"epoch": 0.13362394911258396,
|
10520 |
+
"grad_norm": 0.7087194919586182,
|
10521 |
+
"learning_rate": 1.9131661167154954e-05,
|
10522 |
+
"loss": 2.0769,
|
10523 |
+
"step": 1502
|
10524 |
+
},
|
10525 |
+
{
|
10526 |
+
"epoch": 0.13371291312664027,
|
10527 |
+
"grad_norm": 0.7032302618026733,
|
10528 |
+
"learning_rate": 1.9130521598581385e-05,
|
10529 |
+
"loss": 2.1591,
|
10530 |
+
"step": 1503
|
10531 |
+
},
|
10532 |
+
{
|
10533 |
+
"epoch": 0.13380187714069658,
|
10534 |
+
"grad_norm": 0.7066627144813538,
|
10535 |
+
"learning_rate": 1.9129381316723442e-05,
|
10536 |
+
"loss": 2.0189,
|
10537 |
+
"step": 1504
|
10538 |
+
},
|
10539 |
+
{
|
10540 |
+
"epoch": 0.13389084115475292,
|
10541 |
+
"grad_norm": 0.6774100661277771,
|
10542 |
+
"learning_rate": 1.912824032167021e-05,
|
10543 |
+
"loss": 1.9907,
|
10544 |
+
"step": 1505
|
10545 |
+
},
|
10546 |
+
{
|
10547 |
+
"epoch": 0.13397980516880922,
|
10548 |
+
"grad_norm": 0.8225818276405334,
|
10549 |
+
"learning_rate": 1.9127098613510825e-05,
|
10550 |
+
"loss": 2.3418,
|
10551 |
+
"step": 1506
|
10552 |
+
},
|
10553 |
+
{
|
10554 |
+
"epoch": 0.13406876918286553,
|
10555 |
+
"grad_norm": 0.70097815990448,
|
10556 |
+
"learning_rate": 1.9125956192334473e-05,
|
10557 |
+
"loss": 2.2569,
|
10558 |
+
"step": 1507
|
10559 |
+
},
|
10560 |
+
{
|
10561 |
+
"epoch": 0.13415773319692184,
|
10562 |
+
"grad_norm": 0.6995531916618347,
|
10563 |
+
"learning_rate": 1.91248130582304e-05,
|
10564 |
+
"loss": 2.2324,
|
10565 |
+
"step": 1508
|
10566 |
+
},
|
10567 |
+
{
|
10568 |
+
"epoch": 0.13424669721097815,
|
10569 |
+
"grad_norm": 0.6896299123764038,
|
10570 |
+
"learning_rate": 1.9123669211287916e-05,
|
10571 |
+
"loss": 2.0234,
|
10572 |
+
"step": 1509
|
10573 |
+
},
|
10574 |
+
{
|
10575 |
+
"epoch": 0.1343356612250345,
|
10576 |
+
"grad_norm": 0.6727869510650635,
|
10577 |
+
"learning_rate": 1.9122524651596376e-05,
|
10578 |
+
"loss": 2.1753,
|
10579 |
+
"step": 1510
|
10580 |
+
},
|
10581 |
+
{
|
10582 |
+
"epoch": 0.1344246252390908,
|
10583 |
+
"grad_norm": 0.7001076936721802,
|
10584 |
+
"learning_rate": 1.912137937924519e-05,
|
10585 |
+
"loss": 1.9737,
|
10586 |
+
"step": 1511
|
10587 |
+
},
|
10588 |
+
{
|
10589 |
+
"epoch": 0.1345135892531471,
|
10590 |
+
"grad_norm": 0.7247818112373352,
|
10591 |
+
"learning_rate": 1.9120233394323833e-05,
|
10592 |
+
"loss": 1.9521,
|
10593 |
+
"step": 1512
|
10594 |
+
},
|
10595 |
+
{
|
10596 |
+
"epoch": 0.1346025532672034,
|
10597 |
+
"grad_norm": 0.7155790328979492,
|
10598 |
+
"learning_rate": 1.9119086696921826e-05,
|
10599 |
+
"loss": 2.0935,
|
10600 |
+
"step": 1513
|
10601 |
+
},
|
10602 |
+
{
|
10603 |
+
"epoch": 0.13469151728125972,
|
10604 |
+
"grad_norm": 0.8023723363876343,
|
10605 |
+
"learning_rate": 1.911793928712876e-05,
|
10606 |
+
"loss": 2.2189,
|
10607 |
+
"step": 1514
|
10608 |
+
},
|
10609 |
+
{
|
10610 |
+
"epoch": 0.13478048129531606,
|
10611 |
+
"grad_norm": 0.6678867340087891,
|
10612 |
+
"learning_rate": 1.9116791165034258e-05,
|
10613 |
+
"loss": 2.1952,
|
10614 |
+
"step": 1515
|
10615 |
+
},
|
10616 |
+
{
|
10617 |
+
"epoch": 0.13486944530937237,
|
10618 |
+
"grad_norm": 0.6956883072853088,
|
10619 |
+
"learning_rate": 1.9115642330728018e-05,
|
10620 |
+
"loss": 2.1945,
|
10621 |
+
"step": 1516
|
10622 |
+
},
|
10623 |
+
{
|
10624 |
+
"epoch": 0.13495840932342867,
|
10625 |
+
"grad_norm": 0.7054136991500854,
|
10626 |
+
"learning_rate": 1.911449278429979e-05,
|
10627 |
+
"loss": 2.1408,
|
10628 |
+
"step": 1517
|
10629 |
+
},
|
10630 |
+
{
|
10631 |
+
"epoch": 0.13504737333748498,
|
10632 |
+
"grad_norm": 0.6842026710510254,
|
10633 |
+
"learning_rate": 1.9113342525839372e-05,
|
10634 |
+
"loss": 2.2023,
|
10635 |
+
"step": 1518
|
10636 |
+
},
|
10637 |
+
{
|
10638 |
+
"epoch": 0.1351363373515413,
|
10639 |
+
"grad_norm": 0.7804849147796631,
|
10640 |
+
"learning_rate": 1.9112191555436632e-05,
|
10641 |
+
"loss": 2.0387,
|
10642 |
+
"step": 1519
|
10643 |
+
},
|
10644 |
+
{
|
10645 |
+
"epoch": 0.13522530136559763,
|
10646 |
+
"grad_norm": 0.7178310751914978,
|
10647 |
+
"learning_rate": 1.9111039873181478e-05,
|
10648 |
+
"loss": 2.1217,
|
10649 |
+
"step": 1520
|
10650 |
+
},
|
10651 |
+
{
|
10652 |
+
"epoch": 0.13531426537965394,
|
10653 |
+
"grad_norm": 0.7458540201187134,
|
10654 |
+
"learning_rate": 1.910988747916388e-05,
|
10655 |
+
"loss": 1.9931,
|
10656 |
+
"step": 1521
|
10657 |
+
},
|
10658 |
+
{
|
10659 |
+
"epoch": 0.13540322939371024,
|
10660 |
+
"grad_norm": 0.7119858860969543,
|
10661 |
+
"learning_rate": 1.9108734373473874e-05,
|
10662 |
+
"loss": 1.9836,
|
10663 |
+
"step": 1522
|
10664 |
+
},
|
10665 |
+
{
|
10666 |
+
"epoch": 0.13549219340776655,
|
10667 |
+
"grad_norm": 0.721994936466217,
|
10668 |
+
"learning_rate": 1.9107580556201527e-05,
|
10669 |
+
"loss": 2.152,
|
10670 |
+
"step": 1523
|
10671 |
+
},
|
10672 |
+
{
|
10673 |
+
"epoch": 0.13558115742182286,
|
10674 |
+
"grad_norm": 0.7333192825317383,
|
10675 |
+
"learning_rate": 1.9106426027436985e-05,
|
10676 |
+
"loss": 2.2056,
|
10677 |
+
"step": 1524
|
10678 |
+
},
|
10679 |
+
{
|
10680 |
+
"epoch": 0.1356701214358792,
|
10681 |
+
"grad_norm": 0.7366089820861816,
|
10682 |
+
"learning_rate": 1.9105270787270442e-05,
|
10683 |
+
"loss": 2.1027,
|
10684 |
+
"step": 1525
|
10685 |
+
},
|
10686 |
+
{
|
10687 |
+
"epoch": 0.1357590854499355,
|
10688 |
+
"grad_norm": 0.7128590941429138,
|
10689 |
+
"learning_rate": 1.910411483579214e-05,
|
10690 |
+
"loss": 2.0367,
|
10691 |
+
"step": 1526
|
10692 |
+
},
|
10693 |
+
{
|
10694 |
+
"epoch": 0.13584804946399182,
|
10695 |
+
"grad_norm": 0.7528856992721558,
|
10696 |
+
"learning_rate": 1.9102958173092387e-05,
|
10697 |
+
"loss": 2.0628,
|
10698 |
+
"step": 1527
|
10699 |
+
},
|
10700 |
+
{
|
10701 |
+
"epoch": 0.13593701347804812,
|
10702 |
+
"grad_norm": 0.724431037902832,
|
10703 |
+
"learning_rate": 1.9101800799261543e-05,
|
10704 |
+
"loss": 2.2369,
|
10705 |
+
"step": 1528
|
10706 |
+
},
|
10707 |
+
{
|
10708 |
+
"epoch": 0.13602597749210443,
|
10709 |
+
"grad_norm": 0.7336472272872925,
|
10710 |
+
"learning_rate": 1.910064271439002e-05,
|
10711 |
+
"loss": 2.1532,
|
10712 |
+
"step": 1529
|
10713 |
+
},
|
10714 |
+
{
|
10715 |
+
"epoch": 0.13611494150616077,
|
10716 |
+
"grad_norm": 0.7338613867759705,
|
10717 |
+
"learning_rate": 1.9099483918568294e-05,
|
10718 |
+
"loss": 1.9967,
|
10719 |
+
"step": 1530
|
10720 |
+
},
|
10721 |
+
{
|
10722 |
+
"epoch": 0.13620390552021708,
|
10723 |
+
"grad_norm": 0.6688590049743652,
|
10724 |
+
"learning_rate": 1.9098324411886883e-05,
|
10725 |
+
"loss": 2.0811,
|
10726 |
+
"step": 1531
|
10727 |
+
},
|
10728 |
+
{
|
10729 |
+
"epoch": 0.1362928695342734,
|
10730 |
+
"grad_norm": 0.831134021282196,
|
10731 |
+
"learning_rate": 1.9097164194436378e-05,
|
10732 |
+
"loss": 2.1027,
|
10733 |
+
"step": 1532
|
10734 |
+
},
|
10735 |
+
{
|
10736 |
+
"epoch": 0.1363818335483297,
|
10737 |
+
"grad_norm": 0.8285362124443054,
|
10738 |
+
"learning_rate": 1.909600326630741e-05,
|
10739 |
+
"loss": 2.0172,
|
10740 |
+
"step": 1533
|
10741 |
+
},
|
10742 |
+
{
|
10743 |
+
"epoch": 0.136470797562386,
|
10744 |
+
"grad_norm": 0.7233934998512268,
|
10745 |
+
"learning_rate": 1.9094841627590673e-05,
|
10746 |
+
"loss": 2.2165,
|
10747 |
+
"step": 1534
|
10748 |
+
},
|
10749 |
+
{
|
10750 |
+
"epoch": 0.13655976157644234,
|
10751 |
+
"grad_norm": 0.6699313521385193,
|
10752 |
+
"learning_rate": 1.9093679278376913e-05,
|
10753 |
+
"loss": 2.0704,
|
10754 |
+
"step": 1535
|
10755 |
+
},
|
10756 |
+
{
|
10757 |
+
"epoch": 0.13664872559049865,
|
10758 |
+
"grad_norm": 0.7230476140975952,
|
10759 |
+
"learning_rate": 1.909251621875694e-05,
|
10760 |
+
"loss": 2.1482,
|
10761 |
+
"step": 1536
|
10762 |
+
},
|
10763 |
+
{
|
10764 |
+
"epoch": 0.13673768960455496,
|
10765 |
+
"grad_norm": 0.767360508441925,
|
10766 |
+
"learning_rate": 1.9091352448821607e-05,
|
10767 |
+
"loss": 2.1803,
|
10768 |
+
"step": 1537
|
10769 |
+
},
|
10770 |
+
{
|
10771 |
+
"epoch": 0.13682665361861127,
|
10772 |
+
"grad_norm": 0.6728501915931702,
|
10773 |
+
"learning_rate": 1.9090187968661834e-05,
|
10774 |
+
"loss": 2.1466,
|
10775 |
+
"step": 1538
|
10776 |
+
},
|
10777 |
+
{
|
10778 |
+
"epoch": 0.13691561763266757,
|
10779 |
+
"grad_norm": 0.6831271648406982,
|
10780 |
+
"learning_rate": 1.9089022778368584e-05,
|
10781 |
+
"loss": 2.1824,
|
10782 |
+
"step": 1539
|
10783 |
+
},
|
10784 |
+
{
|
10785 |
+
"epoch": 0.1370045816467239,
|
10786 |
+
"grad_norm": 0.7068498730659485,
|
10787 |
+
"learning_rate": 1.908785687803289e-05,
|
10788 |
+
"loss": 2.2738,
|
10789 |
+
"step": 1540
|
10790 |
+
},
|
10791 |
+
{
|
10792 |
+
"epoch": 0.13709354566078022,
|
10793 |
+
"grad_norm": 0.6927396655082703,
|
10794 |
+
"learning_rate": 1.9086690267745835e-05,
|
10795 |
+
"loss": 2.0958,
|
10796 |
+
"step": 1541
|
10797 |
+
},
|
10798 |
+
{
|
10799 |
+
"epoch": 0.13718250967483653,
|
10800 |
+
"grad_norm": 0.6951919198036194,
|
10801 |
+
"learning_rate": 1.9085522947598542e-05,
|
10802 |
+
"loss": 2.0689,
|
10803 |
+
"step": 1542
|
10804 |
+
},
|
10805 |
+
{
|
10806 |
+
"epoch": 0.13727147368889284,
|
10807 |
+
"grad_norm": 0.7033616900444031,
|
10808 |
+
"learning_rate": 1.9084354917682218e-05,
|
10809 |
+
"loss": 2.1778,
|
10810 |
+
"step": 1543
|
10811 |
+
},
|
10812 |
+
{
|
10813 |
+
"epoch": 0.13736043770294915,
|
10814 |
+
"grad_norm": 0.7442080974578857,
|
10815 |
+
"learning_rate": 1.9083186178088103e-05,
|
10816 |
+
"loss": 2.2193,
|
10817 |
+
"step": 1544
|
10818 |
+
},
|
10819 |
+
{
|
10820 |
+
"epoch": 0.13744940171700548,
|
10821 |
+
"grad_norm": 0.7784643173217773,
|
10822 |
+
"learning_rate": 1.90820167289075e-05,
|
10823 |
+
"loss": 2.1083,
|
10824 |
+
"step": 1545
|
10825 |
+
},
|
10826 |
+
{
|
10827 |
+
"epoch": 0.1375383657310618,
|
10828 |
+
"grad_norm": 0.6760720014572144,
|
10829 |
+
"learning_rate": 1.908084657023177e-05,
|
10830 |
+
"loss": 2.2061,
|
10831 |
+
"step": 1546
|
10832 |
+
},
|
10833 |
+
{
|
10834 |
+
"epoch": 0.1376273297451181,
|
10835 |
+
"grad_norm": 0.7411883473396301,
|
10836 |
+
"learning_rate": 1.9079675702152327e-05,
|
10837 |
+
"loss": 2.1173,
|
10838 |
+
"step": 1547
|
10839 |
+
},
|
10840 |
+
{
|
10841 |
+
"epoch": 0.1377162937591744,
|
10842 |
+
"grad_norm": 0.730983555316925,
|
10843 |
+
"learning_rate": 1.907850412476064e-05,
|
10844 |
+
"loss": 2.1532,
|
10845 |
+
"step": 1548
|
10846 |
+
},
|
10847 |
+
{
|
10848 |
+
"epoch": 0.13780525777323072,
|
10849 |
+
"grad_norm": 0.715723991394043,
|
10850 |
+
"learning_rate": 1.9077331838148228e-05,
|
10851 |
+
"loss": 2.0931,
|
10852 |
+
"step": 1549
|
10853 |
+
},
|
10854 |
+
{
|
10855 |
+
"epoch": 0.13789422178728705,
|
10856 |
+
"grad_norm": 0.6817996501922607,
|
10857 |
+
"learning_rate": 1.9076158842406677e-05,
|
10858 |
+
"loss": 2.1363,
|
10859 |
+
"step": 1550
|
10860 |
+
},
|
10861 |
+
{
|
10862 |
+
"epoch": 0.13798318580134336,
|
10863 |
+
"grad_norm": 0.7068785429000854,
|
10864 |
+
"learning_rate": 1.9074985137627623e-05,
|
10865 |
+
"loss": 2.1468,
|
10866 |
+
"step": 1551
|
10867 |
+
},
|
10868 |
+
{
|
10869 |
+
"epoch": 0.13807214981539967,
|
10870 |
+
"grad_norm": 0.7507544755935669,
|
10871 |
+
"learning_rate": 1.9073810723902757e-05,
|
10872 |
+
"loss": 2.1402,
|
10873 |
+
"step": 1552
|
10874 |
+
},
|
10875 |
+
{
|
10876 |
+
"epoch": 0.13816111382945598,
|
10877 |
+
"grad_norm": 0.742301881313324,
|
10878 |
+
"learning_rate": 1.9072635601323817e-05,
|
10879 |
+
"loss": 2.0598,
|
10880 |
+
"step": 1553
|
10881 |
+
},
|
10882 |
+
{
|
10883 |
+
"epoch": 0.1382500778435123,
|
10884 |
+
"grad_norm": 0.7125744819641113,
|
10885 |
+
"learning_rate": 1.9071459769982615e-05,
|
10886 |
+
"loss": 2.1225,
|
10887 |
+
"step": 1554
|
10888 |
+
},
|
10889 |
+
{
|
10890 |
+
"epoch": 0.13833904185756862,
|
10891 |
+
"grad_norm": 0.6999467611312866,
|
10892 |
+
"learning_rate": 1.9070283229971007e-05,
|
10893 |
+
"loss": 1.9213,
|
10894 |
+
"step": 1555
|
10895 |
+
},
|
10896 |
+
{
|
10897 |
+
"epoch": 0.13842800587162493,
|
10898 |
+
"grad_norm": 0.6858512163162231,
|
10899 |
+
"learning_rate": 1.9069105981380898e-05,
|
10900 |
+
"loss": 2.0182,
|
10901 |
+
"step": 1556
|
10902 |
+
},
|
10903 |
+
{
|
10904 |
+
"epoch": 0.13851696988568124,
|
10905 |
+
"grad_norm": 0.7477670907974243,
|
10906 |
+
"learning_rate": 1.906792802430426e-05,
|
10907 |
+
"loss": 2.2359,
|
10908 |
+
"step": 1557
|
10909 |
+
},
|
10910 |
+
{
|
10911 |
+
"epoch": 0.13860593389973755,
|
10912 |
+
"grad_norm": 0.7466025352478027,
|
10913 |
+
"learning_rate": 1.9066749358833117e-05,
|
10914 |
+
"loss": 2.1252,
|
10915 |
+
"step": 1558
|
10916 |
+
},
|
10917 |
+
{
|
10918 |
+
"epoch": 0.13869489791379386,
|
10919 |
+
"grad_norm": 0.7181077599525452,
|
10920 |
+
"learning_rate": 1.9065569985059542e-05,
|
10921 |
+
"loss": 2.1922,
|
10922 |
+
"step": 1559
|
10923 |
+
},
|
10924 |
+
{
|
10925 |
+
"epoch": 0.1387838619278502,
|
10926 |
+
"grad_norm": 0.7508081793785095,
|
10927 |
+
"learning_rate": 1.9064389903075676e-05,
|
10928 |
+
"loss": 2.2926,
|
10929 |
+
"step": 1560
|
10930 |
+
},
|
10931 |
+
{
|
10932 |
+
"epoch": 0.1388728259419065,
|
10933 |
+
"grad_norm": 0.7642948031425476,
|
10934 |
+
"learning_rate": 1.906320911297371e-05,
|
10935 |
+
"loss": 2.2114,
|
10936 |
+
"step": 1561
|
10937 |
+
},
|
10938 |
+
{
|
10939 |
+
"epoch": 0.1389617899559628,
|
10940 |
+
"grad_norm": 0.7403438687324524,
|
10941 |
+
"learning_rate": 1.9062027614845877e-05,
|
10942 |
+
"loss": 2.1091,
|
10943 |
+
"step": 1562
|
10944 |
+
},
|
10945 |
+
{
|
10946 |
+
"epoch": 0.13905075397001912,
|
10947 |
+
"grad_norm": 0.6996174454689026,
|
10948 |
+
"learning_rate": 1.9060845408784486e-05,
|
10949 |
+
"loss": 2.1427,
|
10950 |
+
"step": 1563
|
10951 |
+
},
|
10952 |
+
{
|
10953 |
+
"epoch": 0.13913971798407543,
|
10954 |
+
"grad_norm": 0.7214592695236206,
|
10955 |
+
"learning_rate": 1.905966249488189e-05,
|
10956 |
+
"loss": 2.2872,
|
10957 |
+
"step": 1564
|
10958 |
+
},
|
10959 |
+
{
|
10960 |
+
"epoch": 0.13922868199813176,
|
10961 |
+
"grad_norm": 0.7074094414710999,
|
10962 |
+
"learning_rate": 1.9058478873230494e-05,
|
10963 |
+
"loss": 2.1028,
|
10964 |
+
"step": 1565
|
10965 |
+
},
|
10966 |
+
{
|
10967 |
+
"epoch": 0.13931764601218807,
|
10968 |
+
"grad_norm": 0.7104836702346802,
|
10969 |
+
"learning_rate": 1.9057294543922768e-05,
|
10970 |
+
"loss": 1.9901,
|
10971 |
+
"step": 1566
|
10972 |
+
},
|
10973 |
+
{
|
10974 |
+
"epoch": 0.13940661002624438,
|
10975 |
+
"grad_norm": 0.7215651273727417,
|
10976 |
+
"learning_rate": 1.9056109507051236e-05,
|
10977 |
+
"loss": 2.089,
|
10978 |
+
"step": 1567
|
10979 |
+
},
|
10980 |
+
{
|
10981 |
+
"epoch": 0.1394955740403007,
|
10982 |
+
"grad_norm": 0.6993376612663269,
|
10983 |
+
"learning_rate": 1.9054923762708472e-05,
|
10984 |
+
"loss": 2.1071,
|
10985 |
+
"step": 1568
|
10986 |
+
},
|
10987 |
+
{
|
10988 |
+
"epoch": 0.139584538054357,
|
10989 |
+
"grad_norm": 0.7807263731956482,
|
10990 |
+
"learning_rate": 1.90537373109871e-05,
|
10991 |
+
"loss": 2.127,
|
10992 |
+
"step": 1569
|
10993 |
+
},
|
10994 |
+
{
|
10995 |
+
"epoch": 0.13967350206841334,
|
10996 |
+
"grad_norm": 0.816089928150177,
|
10997 |
+
"learning_rate": 1.905255015197982e-05,
|
10998 |
+
"loss": 2.2639,
|
10999 |
+
"step": 1570
|
11000 |
+
},
|
11001 |
+
{
|
11002 |
+
"epoch": 0.13976246608246964,
|
11003 |
+
"grad_norm": 0.7122036218643188,
|
11004 |
+
"learning_rate": 1.9051362285779363e-05,
|
11005 |
+
"loss": 2.2795,
|
11006 |
+
"step": 1571
|
11007 |
+
},
|
11008 |
+
{
|
11009 |
+
"epoch": 0.13985143009652595,
|
11010 |
+
"grad_norm": 0.7390177249908447,
|
11011 |
+
"learning_rate": 1.905017371247853e-05,
|
11012 |
+
"loss": 2.0848,
|
11013 |
+
"step": 1572
|
11014 |
+
},
|
11015 |
+
{
|
11016 |
+
"epoch": 0.13994039411058226,
|
11017 |
+
"grad_norm": 0.699128270149231,
|
11018 |
+
"learning_rate": 1.9048984432170175e-05,
|
11019 |
+
"loss": 2.1444,
|
11020 |
+
"step": 1573
|
11021 |
+
},
|
11022 |
+
{
|
11023 |
+
"epoch": 0.14002935812463857,
|
11024 |
+
"grad_norm": 0.7561851739883423,
|
11025 |
+
"learning_rate": 1.9047794444947204e-05,
|
11026 |
+
"loss": 2.0741,
|
11027 |
+
"step": 1574
|
11028 |
+
},
|
11029 |
+
{
|
11030 |
+
"epoch": 0.1401183221386949,
|
11031 |
+
"grad_norm": 0.7206645011901855,
|
11032 |
+
"learning_rate": 1.9046603750902578e-05,
|
11033 |
+
"loss": 2.0631,
|
11034 |
+
"step": 1575
|
11035 |
+
},
|
11036 |
+
{
|
11037 |
+
"epoch": 0.14020728615275121,
|
11038 |
+
"grad_norm": 0.7066382765769958,
|
11039 |
+
"learning_rate": 1.9045412350129314e-05,
|
11040 |
+
"loss": 2.0786,
|
11041 |
+
"step": 1576
|
11042 |
+
},
|
11043 |
+
{
|
11044 |
+
"epoch": 0.14029625016680752,
|
11045 |
+
"grad_norm": 0.7629712820053101,
|
11046 |
+
"learning_rate": 1.9044220242720494e-05,
|
11047 |
+
"loss": 2.2756,
|
11048 |
+
"step": 1577
|
11049 |
+
},
|
11050 |
+
{
|
11051 |
+
"epoch": 0.14038521418086383,
|
11052 |
+
"grad_norm": 0.730331301689148,
|
11053 |
+
"learning_rate": 1.904302742876924e-05,
|
11054 |
+
"loss": 2.1321,
|
11055 |
+
"step": 1578
|
11056 |
+
},
|
11057 |
+
{
|
11058 |
+
"epoch": 0.14047417819492017,
|
11059 |
+
"grad_norm": 0.7184867262840271,
|
11060 |
+
"learning_rate": 1.9041833908368736e-05,
|
11061 |
+
"loss": 2.1348,
|
11062 |
+
"step": 1579
|
11063 |
+
},
|
11064 |
+
{
|
11065 |
+
"epoch": 0.14056314220897648,
|
11066 |
+
"grad_norm": 0.7042089104652405,
|
11067 |
+
"learning_rate": 1.904063968161222e-05,
|
11068 |
+
"loss": 2.2112,
|
11069 |
+
"step": 1580
|
11070 |
+
},
|
11071 |
+
{
|
11072 |
+
"epoch": 0.14065210622303279,
|
11073 |
+
"grad_norm": 0.7240262627601624,
|
11074 |
+
"learning_rate": 1.9039444748592984e-05,
|
11075 |
+
"loss": 2.0821,
|
11076 |
+
"step": 1581
|
11077 |
+
},
|
11078 |
+
{
|
11079 |
+
"epoch": 0.1407410702370891,
|
11080 |
+
"grad_norm": 0.717921793460846,
|
11081 |
+
"learning_rate": 1.9038249109404386e-05,
|
11082 |
+
"loss": 2.2045,
|
11083 |
+
"step": 1582
|
11084 |
+
},
|
11085 |
+
{
|
11086 |
+
"epoch": 0.1408300342511454,
|
11087 |
+
"grad_norm": 0.692244291305542,
|
11088 |
+
"learning_rate": 1.903705276413982e-05,
|
11089 |
+
"loss": 2.1747,
|
11090 |
+
"step": 1583
|
11091 |
+
},
|
11092 |
+
{
|
11093 |
+
"epoch": 0.14091899826520174,
|
11094 |
+
"grad_norm": 0.7028403282165527,
|
11095 |
+
"learning_rate": 1.9035855712892753e-05,
|
11096 |
+
"loss": 1.9697,
|
11097 |
+
"step": 1584
|
11098 |
+
},
|
11099 |
+
{
|
11100 |
+
"epoch": 0.14100796227925805,
|
11101 |
+
"grad_norm": 0.6866932511329651,
|
11102 |
+
"learning_rate": 1.9034657955756695e-05,
|
11103 |
+
"loss": 2.1732,
|
11104 |
+
"step": 1585
|
11105 |
+
},
|
11106 |
+
{
|
11107 |
+
"epoch": 0.14109692629331436,
|
11108 |
+
"grad_norm": 0.7175632119178772,
|
11109 |
+
"learning_rate": 1.903345949282522e-05,
|
11110 |
+
"loss": 2.3072,
|
11111 |
+
"step": 1586
|
11112 |
+
},
|
11113 |
+
{
|
11114 |
+
"epoch": 0.14118589030737067,
|
11115 |
+
"grad_norm": 0.7163522839546204,
|
11116 |
+
"learning_rate": 1.903226032419195e-05,
|
11117 |
+
"loss": 2.2625,
|
11118 |
+
"step": 1587
|
11119 |
+
},
|
11120 |
+
{
|
11121 |
+
"epoch": 0.14127485432142697,
|
11122 |
+
"grad_norm": 0.7228026986122131,
|
11123 |
+
"learning_rate": 1.9031060449950568e-05,
|
11124 |
+
"loss": 2.1505,
|
11125 |
+
"step": 1588
|
11126 |
+
},
|
11127 |
+
{
|
11128 |
+
"epoch": 0.1413638183354833,
|
11129 |
+
"grad_norm": 0.7363160252571106,
|
11130 |
+
"learning_rate": 1.9029859870194806e-05,
|
11131 |
+
"loss": 2.2136,
|
11132 |
+
"step": 1589
|
11133 |
+
},
|
11134 |
+
{
|
11135 |
+
"epoch": 0.14145278234953962,
|
11136 |
+
"grad_norm": 0.7044832706451416,
|
11137 |
+
"learning_rate": 1.9028658585018455e-05,
|
11138 |
+
"loss": 2.1166,
|
11139 |
+
"step": 1590
|
11140 |
+
},
|
11141 |
+
{
|
11142 |
+
"epoch": 0.14154174636359593,
|
11143 |
+
"grad_norm": 0.7056241035461426,
|
11144 |
+
"learning_rate": 1.902745659451536e-05,
|
11145 |
+
"loss": 1.944,
|
11146 |
+
"step": 1591
|
11147 |
+
},
|
11148 |
+
{
|
11149 |
+
"epoch": 0.14163071037765224,
|
11150 |
+
"grad_norm": 0.687555730342865,
|
11151 |
+
"learning_rate": 1.9026253898779426e-05,
|
11152 |
+
"loss": 2.1491,
|
11153 |
+
"step": 1592
|
11154 |
+
},
|
11155 |
+
{
|
11156 |
+
"epoch": 0.14171967439170854,
|
11157 |
+
"grad_norm": 0.727637767791748,
|
11158 |
+
"learning_rate": 1.90250504979046e-05,
|
11159 |
+
"loss": 2.0347,
|
11160 |
+
"step": 1593
|
11161 |
+
},
|
11162 |
+
{
|
11163 |
+
"epoch": 0.14180863840576488,
|
11164 |
+
"grad_norm": 0.71175217628479,
|
11165 |
+
"learning_rate": 1.9023846391984905e-05,
|
11166 |
+
"loss": 2.1626,
|
11167 |
+
"step": 1594
|
11168 |
+
},
|
11169 |
+
{
|
11170 |
+
"epoch": 0.1418976024198212,
|
11171 |
+
"grad_norm": 0.7223698496818542,
|
11172 |
+
"learning_rate": 1.9022641581114392e-05,
|
11173 |
+
"loss": 1.9723,
|
11174 |
+
"step": 1595
|
11175 |
+
},
|
11176 |
+
{
|
11177 |
+
"epoch": 0.1419865664338775,
|
11178 |
+
"grad_norm": 0.7376037240028381,
|
11179 |
+
"learning_rate": 1.9021436065387195e-05,
|
11180 |
+
"loss": 2.0388,
|
11181 |
+
"step": 1596
|
11182 |
+
},
|
11183 |
+
{
|
11184 |
+
"epoch": 0.1420755304479338,
|
11185 |
+
"grad_norm": 0.6674297451972961,
|
11186 |
+
"learning_rate": 1.9020229844897483e-05,
|
11187 |
+
"loss": 1.9178,
|
11188 |
+
"step": 1597
|
11189 |
+
},
|
11190 |
+
{
|
11191 |
+
"epoch": 0.14216449446199012,
|
11192 |
+
"grad_norm": 0.6752891540527344,
|
11193 |
+
"learning_rate": 1.9019022919739486e-05,
|
11194 |
+
"loss": 1.9869,
|
11195 |
+
"step": 1598
|
11196 |
+
},
|
11197 |
+
{
|
11198 |
+
"epoch": 0.14225345847604645,
|
11199 |
+
"grad_norm": 0.8238239288330078,
|
11200 |
+
"learning_rate": 1.9017815290007497e-05,
|
11201 |
+
"loss": 2.2705,
|
11202 |
+
"step": 1599
|
11203 |
+
},
|
11204 |
+
{
|
11205 |
+
"epoch": 0.14234242249010276,
|
11206 |
+
"grad_norm": 0.7054691910743713,
|
11207 |
+
"learning_rate": 1.901660695579585e-05,
|
11208 |
+
"loss": 2.127,
|
11209 |
+
"step": 1600
|
11210 |
+
},
|
11211 |
+
{
|
11212 |
+
"epoch": 0.14243138650415907,
|
11213 |
+
"grad_norm": 0.723223090171814,
|
11214 |
+
"learning_rate": 1.9015397917198947e-05,
|
11215 |
+
"loss": 2.1819,
|
11216 |
+
"step": 1601
|
11217 |
+
},
|
11218 |
+
{
|
11219 |
+
"epoch": 0.14252035051821538,
|
11220 |
+
"grad_norm": 0.7419965267181396,
|
11221 |
+
"learning_rate": 1.901418817431123e-05,
|
11222 |
+
"loss": 2.0055,
|
11223 |
+
"step": 1602
|
11224 |
+
},
|
11225 |
+
{
|
11226 |
+
"epoch": 0.14260931453227169,
|
11227 |
+
"grad_norm": 0.7081524133682251,
|
11228 |
+
"learning_rate": 1.9012977727227214e-05,
|
11229 |
+
"loss": 2.0034,
|
11230 |
+
"step": 1603
|
11231 |
+
},
|
11232 |
+
{
|
11233 |
+
"epoch": 0.14269827854632802,
|
11234 |
+
"grad_norm": 0.7017990350723267,
|
11235 |
+
"learning_rate": 1.901176657604146e-05,
|
11236 |
+
"loss": 2.2058,
|
11237 |
+
"step": 1604
|
11238 |
+
},
|
11239 |
+
{
|
11240 |
+
"epoch": 0.14278724256038433,
|
11241 |
+
"grad_norm": 0.7065199613571167,
|
11242 |
+
"learning_rate": 1.901055472084858e-05,
|
11243 |
+
"loss": 2.0947,
|
11244 |
+
"step": 1605
|
11245 |
+
},
|
11246 |
+
{
|
11247 |
+
"epoch": 0.14287620657444064,
|
11248 |
+
"grad_norm": 0.7354803681373596,
|
11249 |
+
"learning_rate": 1.9009342161743248e-05,
|
11250 |
+
"loss": 2.0925,
|
11251 |
+
"step": 1606
|
11252 |
+
},
|
11253 |
+
{
|
11254 |
+
"epoch": 0.14296517058849695,
|
11255 |
+
"grad_norm": 0.7831696271896362,
|
11256 |
+
"learning_rate": 1.9008128898820188e-05,
|
11257 |
+
"loss": 1.9525,
|
11258 |
+
"step": 1607
|
11259 |
+
},
|
11260 |
+
{
|
11261 |
+
"epoch": 0.14305413460255326,
|
11262 |
+
"grad_norm": 0.6968514919281006,
|
11263 |
+
"learning_rate": 1.900691493217418e-05,
|
11264 |
+
"loss": 1.9783,
|
11265 |
+
"step": 1608
|
11266 |
+
},
|
11267 |
+
{
|
11268 |
+
"epoch": 0.1431430986166096,
|
11269 |
+
"grad_norm": 0.741797924041748,
|
11270 |
+
"learning_rate": 1.9005700261900063e-05,
|
11271 |
+
"loss": 2.2641,
|
11272 |
+
"step": 1609
|
11273 |
+
},
|
11274 |
+
{
|
11275 |
+
"epoch": 0.1432320626306659,
|
11276 |
+
"grad_norm": 0.7444104552268982,
|
11277 |
+
"learning_rate": 1.900448488809273e-05,
|
11278 |
+
"loss": 2.0563,
|
11279 |
+
"step": 1610
|
11280 |
+
},
|
11281 |
+
{
|
11282 |
+
"epoch": 0.1433210266447222,
|
11283 |
+
"grad_norm": 0.6823083758354187,
|
11284 |
+
"learning_rate": 1.900326881084712e-05,
|
11285 |
+
"loss": 2.05,
|
11286 |
+
"step": 1611
|
11287 |
+
},
|
11288 |
+
{
|
11289 |
+
"epoch": 0.14340999065877852,
|
11290 |
+
"grad_norm": 0.7570714950561523,
|
11291 |
+
"learning_rate": 1.9002052030258244e-05,
|
11292 |
+
"loss": 1.9717,
|
11293 |
+
"step": 1612
|
11294 |
+
},
|
11295 |
+
{
|
11296 |
+
"epoch": 0.14349895467283483,
|
11297 |
+
"grad_norm": 0.9097091555595398,
|
11298 |
+
"learning_rate": 1.900083454642115e-05,
|
11299 |
+
"loss": 2.0523,
|
11300 |
+
"step": 1613
|
11301 |
+
},
|
11302 |
+
{
|
11303 |
+
"epoch": 0.14358791868689116,
|
11304 |
+
"grad_norm": 0.7547870874404907,
|
11305 |
+
"learning_rate": 1.899961635943095e-05,
|
11306 |
+
"loss": 2.1613,
|
11307 |
+
"step": 1614
|
11308 |
+
},
|
11309 |
+
{
|
11310 |
+
"epoch": 0.14367688270094747,
|
11311 |
+
"grad_norm": 0.7002084851264954,
|
11312 |
+
"learning_rate": 1.8998397469382812e-05,
|
11313 |
+
"loss": 2.093,
|
11314 |
+
"step": 1615
|
11315 |
+
},
|
11316 |
+
{
|
11317 |
+
"epoch": 0.14376584671500378,
|
11318 |
+
"grad_norm": 0.7612960934638977,
|
11319 |
+
"learning_rate": 1.8997177876371958e-05,
|
11320 |
+
"loss": 2.1898,
|
11321 |
+
"step": 1616
|
11322 |
+
},
|
11323 |
+
{
|
11324 |
+
"epoch": 0.1438548107290601,
|
11325 |
+
"grad_norm": 0.7901351451873779,
|
11326 |
+
"learning_rate": 1.899595758049366e-05,
|
11327 |
+
"loss": 2.1686,
|
11328 |
+
"step": 1617
|
11329 |
+
},
|
11330 |
+
{
|
11331 |
+
"epoch": 0.1439437747431164,
|
11332 |
+
"grad_norm": 0.7260928153991699,
|
11333 |
+
"learning_rate": 1.899473658184325e-05,
|
11334 |
+
"loss": 2.08,
|
11335 |
+
"step": 1618
|
11336 |
+
},
|
11337 |
+
{
|
11338 |
+
"epoch": 0.14403273875717273,
|
11339 |
+
"grad_norm": 0.7595137357711792,
|
11340 |
+
"learning_rate": 1.8993514880516114e-05,
|
11341 |
+
"loss": 2.1567,
|
11342 |
+
"step": 1619
|
11343 |
+
},
|
11344 |
+
{
|
11345 |
+
"epoch": 0.14412170277122904,
|
11346 |
+
"grad_norm": 0.6946915984153748,
|
11347 |
+
"learning_rate": 1.899229247660769e-05,
|
11348 |
+
"loss": 2.1557,
|
11349 |
+
"step": 1620
|
11350 |
+
},
|
11351 |
+
{
|
11352 |
+
"epoch": 0.14421066678528535,
|
11353 |
+
"grad_norm": 0.8131952285766602,
|
11354 |
+
"learning_rate": 1.8991069370213477e-05,
|
11355 |
+
"loss": 2.042,
|
11356 |
+
"step": 1621
|
11357 |
+
},
|
11358 |
+
{
|
11359 |
+
"epoch": 0.14429963079934166,
|
11360 |
+
"grad_norm": 0.7136657238006592,
|
11361 |
+
"learning_rate": 1.8989845561429025e-05,
|
11362 |
+
"loss": 2.1521,
|
11363 |
+
"step": 1622
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 0.14438859481339797,
|
11367 |
+
"grad_norm": 0.694391667842865,
|
11368 |
+
"learning_rate": 1.8988621050349936e-05,
|
11369 |
+
"loss": 2.1343,
|
11370 |
+
"step": 1623
|
11371 |
+
},
|
11372 |
+
{
|
11373 |
+
"epoch": 0.1444775588274543,
|
11374 |
+
"grad_norm": 0.8052846789360046,
|
11375 |
+
"learning_rate": 1.898739583707187e-05,
|
11376 |
+
"loss": 2.0542,
|
11377 |
+
"step": 1624
|
11378 |
+
},
|
11379 |
+
{
|
11380 |
+
"epoch": 0.14456652284151061,
|
11381 |
+
"grad_norm": 0.7174726128578186,
|
11382 |
+
"learning_rate": 1.8986169921690546e-05,
|
11383 |
+
"loss": 2.0926,
|
11384 |
+
"step": 1625
|
11385 |
+
},
|
11386 |
+
{
|
11387 |
+
"epoch": 0.14465548685556692,
|
11388 |
+
"grad_norm": 0.7309580445289612,
|
11389 |
+
"learning_rate": 1.898494330430173e-05,
|
11390 |
+
"loss": 2.1391,
|
11391 |
+
"step": 1626
|
11392 |
+
},
|
11393 |
+
{
|
11394 |
+
"epoch": 0.14474445086962323,
|
11395 |
+
"grad_norm": 0.7136374711990356,
|
11396 |
+
"learning_rate": 1.8983715985001245e-05,
|
11397 |
+
"loss": 1.9662,
|
11398 |
+
"step": 1627
|
11399 |
+
},
|
11400 |
+
{
|
11401 |
+
"epoch": 0.14483341488367954,
|
11402 |
+
"grad_norm": 0.7026463150978088,
|
11403 |
+
"learning_rate": 1.8982487963884975e-05,
|
11404 |
+
"loss": 2.082,
|
11405 |
+
"step": 1628
|
11406 |
+
},
|
11407 |
+
{
|
11408 |
+
"epoch": 0.14492237889773588,
|
11409 |
+
"grad_norm": 0.7408065795898438,
|
11410 |
+
"learning_rate": 1.898125924104885e-05,
|
11411 |
+
"loss": 2.0902,
|
11412 |
+
"step": 1629
|
11413 |
+
},
|
11414 |
+
{
|
11415 |
+
"epoch": 0.14501134291179218,
|
11416 |
+
"grad_norm": 0.6933066248893738,
|
11417 |
+
"learning_rate": 1.898002981658886e-05,
|
11418 |
+
"loss": 2.1383,
|
11419 |
+
"step": 1630
|
11420 |
+
},
|
11421 |
+
{
|
11422 |
+
"epoch": 0.1451003069258485,
|
11423 |
+
"grad_norm": 0.7678527235984802,
|
11424 |
+
"learning_rate": 1.897879969060105e-05,
|
11425 |
+
"loss": 2.0613,
|
11426 |
+
"step": 1631
|
11427 |
+
},
|
11428 |
+
{
|
11429 |
+
"epoch": 0.1451892709399048,
|
11430 |
+
"grad_norm": 0.7678179740905762,
|
11431 |
+
"learning_rate": 1.8977568863181517e-05,
|
11432 |
+
"loss": 2.2704,
|
11433 |
+
"step": 1632
|
11434 |
+
},
|
11435 |
+
{
|
11436 |
+
"epoch": 0.1452782349539611,
|
11437 |
+
"grad_norm": 0.7525485157966614,
|
11438 |
+
"learning_rate": 1.8976337334426417e-05,
|
11439 |
+
"loss": 2.1422,
|
11440 |
+
"step": 1633
|
11441 |
+
},
|
11442 |
+
{
|
11443 |
+
"epoch": 0.14536719896801745,
|
11444 |
+
"grad_norm": 0.8016729354858398,
|
11445 |
+
"learning_rate": 1.8975105104431953e-05,
|
11446 |
+
"loss": 2.1039,
|
11447 |
+
"step": 1634
|
11448 |
+
},
|
11449 |
+
{
|
11450 |
+
"epoch": 0.14545616298207376,
|
11451 |
+
"grad_norm": 0.7251212000846863,
|
11452 |
+
"learning_rate": 1.8973872173294394e-05,
|
11453 |
+
"loss": 2.2837,
|
11454 |
+
"step": 1635
|
11455 |
+
},
|
11456 |
+
{
|
11457 |
+
"epoch": 0.14554512699613006,
|
11458 |
+
"grad_norm": 0.7730448246002197,
|
11459 |
+
"learning_rate": 1.8972638541110053e-05,
|
11460 |
+
"loss": 2.0775,
|
11461 |
+
"step": 1636
|
11462 |
+
},
|
11463 |
+
{
|
11464 |
+
"epoch": 0.14563409101018637,
|
11465 |
+
"grad_norm": 0.6937607526779175,
|
11466 |
+
"learning_rate": 1.8971404207975303e-05,
|
11467 |
+
"loss": 2.0895,
|
11468 |
+
"step": 1637
|
11469 |
+
},
|
11470 |
+
{
|
11471 |
+
"epoch": 0.14572305502424268,
|
11472 |
+
"grad_norm": 0.7883414030075073,
|
11473 |
+
"learning_rate": 1.8970169173986573e-05,
|
11474 |
+
"loss": 2.1418,
|
11475 |
+
"step": 1638
|
11476 |
+
},
|
11477 |
+
{
|
11478 |
+
"epoch": 0.14581201903829902,
|
11479 |
+
"grad_norm": 0.734897255897522,
|
11480 |
+
"learning_rate": 1.8968933439240347e-05,
|
11481 |
+
"loss": 2.1223,
|
11482 |
+
"step": 1639
|
11483 |
+
},
|
11484 |
+
{
|
11485 |
+
"epoch": 0.14590098305235533,
|
11486 |
+
"grad_norm": 0.7466630935668945,
|
11487 |
+
"learning_rate": 1.8967697003833156e-05,
|
11488 |
+
"loss": 1.9625,
|
11489 |
+
"step": 1640
|
11490 |
+
},
|
11491 |
+
{
|
11492 |
+
"epoch": 0.14598994706641163,
|
11493 |
+
"grad_norm": 0.7184388637542725,
|
11494 |
+
"learning_rate": 1.8966459867861596e-05,
|
11495 |
+
"loss": 2.2619,
|
11496 |
+
"step": 1641
|
11497 |
+
},
|
11498 |
+
{
|
11499 |
+
"epoch": 0.14607891108046794,
|
11500 |
+
"grad_norm": 0.8266288042068481,
|
11501 |
+
"learning_rate": 1.896522203142231e-05,
|
11502 |
+
"loss": 2.2181,
|
11503 |
+
"step": 1642
|
11504 |
+
},
|
11505 |
+
{
|
11506 |
+
"epoch": 0.14616787509452425,
|
11507 |
+
"grad_norm": 0.6852174401283264,
|
11508 |
+
"learning_rate": 1.8963983494611998e-05,
|
11509 |
+
"loss": 2.1574,
|
11510 |
+
"step": 1643
|
11511 |
+
},
|
11512 |
+
{
|
11513 |
+
"epoch": 0.1462568391085806,
|
11514 |
+
"grad_norm": 0.7667473554611206,
|
11515 |
+
"learning_rate": 1.8962744257527423e-05,
|
11516 |
+
"loss": 2.1507,
|
11517 |
+
"step": 1644
|
11518 |
+
},
|
11519 |
+
{
|
11520 |
+
"epoch": 0.1463458031226369,
|
11521 |
+
"grad_norm": 0.7464703917503357,
|
11522 |
+
"learning_rate": 1.8961504320265386e-05,
|
11523 |
+
"loss": 2.0421,
|
11524 |
+
"step": 1645
|
11525 |
+
},
|
11526 |
+
{
|
11527 |
+
"epoch": 0.1464347671366932,
|
11528 |
+
"grad_norm": 0.772483766078949,
|
11529 |
+
"learning_rate": 1.896026368292276e-05,
|
11530 |
+
"loss": 2.2768,
|
11531 |
+
"step": 1646
|
11532 |
+
},
|
11533 |
+
{
|
11534 |
+
"epoch": 0.14652373115074951,
|
11535 |
+
"grad_norm": 0.7036101222038269,
|
11536 |
+
"learning_rate": 1.8959022345596455e-05,
|
11537 |
+
"loss": 1.9817,
|
11538 |
+
"step": 1647
|
11539 |
+
},
|
11540 |
+
{
|
11541 |
+
"epoch": 0.14661269516480582,
|
11542 |
+
"grad_norm": 0.6918807625770569,
|
11543 |
+
"learning_rate": 1.8957780308383458e-05,
|
11544 |
+
"loss": 2.1934,
|
11545 |
+
"step": 1648
|
11546 |
+
},
|
11547 |
+
{
|
11548 |
+
"epoch": 0.14670165917886216,
|
11549 |
+
"grad_norm": 0.732772707939148,
|
11550 |
+
"learning_rate": 1.8956537571380788e-05,
|
11551 |
+
"loss": 2.0385,
|
11552 |
+
"step": 1649
|
11553 |
+
},
|
11554 |
+
{
|
11555 |
+
"epoch": 0.14679062319291847,
|
11556 |
+
"grad_norm": 0.7011646628379822,
|
11557 |
+
"learning_rate": 1.895529413468553e-05,
|
11558 |
+
"loss": 2.1243,
|
11559 |
+
"step": 1650
|
11560 |
}
|
11561 |
],
|
11562 |
"logging_steps": 1,
|
11563 |
+
"max_steps": 11240,
|
11564 |
"num_input_tokens_seen": 0,
|
11565 |
"num_train_epochs": 1,
|
11566 |
"save_steps": 50,
|
|
|
11576 |
"attributes": {}
|
11577 |
}
|
11578 |
},
|
11579 |
+
"total_flos": 3.906289558093824e+17,
|
11580 |
"train_batch_size": 1,
|
11581 |
"trial_name": null,
|
11582 |
"trial_params": null
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46d940da433d13bc5e65c376f5b517905aff731612ae89e89ae9dff2651f6f88
|
3 |
size 5624
|