Training in progress, step 1344, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 20652232
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cfd8a262e60cb5b83c188d6510fc245b060e39d1b7b22ccedb2a7cf3f5ff2ce
|
3 |
size 20652232
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 41455802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5e951f81ca2dd3bb4c5fe725fab1e06ccce1f821f897ec5ff0487276f71010d
|
3 |
size 41455802
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bfbb2ca33cc9b3557235510ae3199cb13a13731b28befe7890768a81a0291b3
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38b7f1c361dec5189a47d902296a5c2bdaac9aa1cf564418b0600c3c3f362490
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8372,6 +8372,1049 @@
|
|
8372 |
"learning_rate": 3.498172085070084e-06,
|
8373 |
"loss": 9.5849,
|
8374 |
"step": 1195
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8375 |
}
|
8376 |
],
|
8377 |
"logging_steps": 1,
|
@@ -8386,12 +9429,12 @@
|
|
8386 |
"should_evaluate": false,
|
8387 |
"should_log": false,
|
8388 |
"should_save": true,
|
8389 |
-
"should_training_stop":
|
8390 |
},
|
8391 |
"attributes": {}
|
8392 |
}
|
8393 |
},
|
8394 |
-
"total_flos":
|
8395 |
"train_batch_size": 4,
|
8396 |
"trial_name": null,
|
8397 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9998140226892319,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1344,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8372 |
"learning_rate": 3.498172085070084e-06,
|
8373 |
"loss": 9.5849,
|
8374 |
"step": 1195
|
8375 |
+
},
|
8376 |
+
{
|
8377 |
+
"epoch": 0.8897154547145248,
|
8378 |
+
"grad_norm": 3.594160795211792,
|
8379 |
+
"learning_rate": 3.4519204341786902e-06,
|
8380 |
+
"loss": 9.5811,
|
8381 |
+
"step": 1196
|
8382 |
+
},
|
8383 |
+
{
|
8384 |
+
"epoch": 0.8904593639575972,
|
8385 |
+
"grad_norm": 3.5941364765167236,
|
8386 |
+
"learning_rate": 3.405965649329046e-06,
|
8387 |
+
"loss": 9.5818,
|
8388 |
+
"step": 1197
|
8389 |
+
},
|
8390 |
+
{
|
8391 |
+
"epoch": 0.8912032732006695,
|
8392 |
+
"grad_norm": 3.5020856857299805,
|
8393 |
+
"learning_rate": 3.3603080236033867e-06,
|
8394 |
+
"loss": 9.5884,
|
8395 |
+
"step": 1198
|
8396 |
+
},
|
8397 |
+
{
|
8398 |
+
"epoch": 0.8919471824437418,
|
8399 |
+
"grad_norm": 3.4808876514434814,
|
8400 |
+
"learning_rate": 3.3149478481887598e-06,
|
8401 |
+
"loss": 9.5994,
|
8402 |
+
"step": 1199
|
8403 |
+
},
|
8404 |
+
{
|
8405 |
+
"epoch": 0.8926910916868142,
|
8406 |
+
"grad_norm": 3.767507553100586,
|
8407 |
+
"learning_rate": 3.269885412375223e-06,
|
8408 |
+
"loss": 9.5746,
|
8409 |
+
"step": 1200
|
8410 |
+
},
|
8411 |
+
{
|
8412 |
+
"epoch": 0.8934350009298866,
|
8413 |
+
"grad_norm": 3.547511339187622,
|
8414 |
+
"learning_rate": 3.2251210035539323e-06,
|
8415 |
+
"loss": 9.5839,
|
8416 |
+
"step": 1201
|
8417 |
+
},
|
8418 |
+
{
|
8419 |
+
"epoch": 0.8941789101729589,
|
8420 |
+
"grad_norm": 3.767230272293091,
|
8421 |
+
"learning_rate": 3.1806549072153635e-06,
|
8422 |
+
"loss": 9.574,
|
8423 |
+
"step": 1202
|
8424 |
+
},
|
8425 |
+
{
|
8426 |
+
"epoch": 0.8949228194160312,
|
8427 |
+
"grad_norm": 3.5477194786071777,
|
8428 |
+
"learning_rate": 3.1364874069474527e-06,
|
8429 |
+
"loss": 9.5837,
|
8430 |
+
"step": 1203
|
8431 |
+
},
|
8432 |
+
{
|
8433 |
+
"epoch": 0.8956667286591036,
|
8434 |
+
"grad_norm": 3.76715350151062,
|
8435 |
+
"learning_rate": 3.0926187844337984e-06,
|
8436 |
+
"loss": 9.5745,
|
8437 |
+
"step": 1204
|
8438 |
+
},
|
8439 |
+
{
|
8440 |
+
"epoch": 0.8964106379021759,
|
8441 |
+
"grad_norm": 3.7672533988952637,
|
8442 |
+
"learning_rate": 3.0490493194518855e-06,
|
8443 |
+
"loss": 9.5739,
|
8444 |
+
"step": 1205
|
8445 |
+
},
|
8446 |
+
{
|
8447 |
+
"epoch": 0.8971545471452482,
|
8448 |
+
"grad_norm": 3.87873911857605,
|
8449 |
+
"learning_rate": 3.00577928987128e-06,
|
8450 |
+
"loss": 9.5715,
|
8451 |
+
"step": 1206
|
8452 |
+
},
|
8453 |
+
{
|
8454 |
+
"epoch": 0.8978984563883207,
|
8455 |
+
"grad_norm": 3.672361373901367,
|
8456 |
+
"learning_rate": 2.962808971651859e-06,
|
8457 |
+
"loss": 9.5768,
|
8458 |
+
"step": 1207
|
8459 |
+
},
|
8460 |
+
{
|
8461 |
+
"epoch": 0.898642365631393,
|
8462 |
+
"grad_norm": 3.688178539276123,
|
8463 |
+
"learning_rate": 2.920138638842068e-06,
|
8464 |
+
"loss": 9.5783,
|
8465 |
+
"step": 1208
|
8466 |
+
},
|
8467 |
+
{
|
8468 |
+
"epoch": 0.8993862748744653,
|
8469 |
+
"grad_norm": 3.767151355743408,
|
8470 |
+
"learning_rate": 2.8777685635771155e-06,
|
8471 |
+
"loss": 9.5734,
|
8472 |
+
"step": 1209
|
8473 |
+
},
|
8474 |
+
{
|
8475 |
+
"epoch": 0.9001301841175376,
|
8476 |
+
"grad_norm": 3.5939481258392334,
|
8477 |
+
"learning_rate": 2.835699016077353e-06,
|
8478 |
+
"loss": 9.5818,
|
8479 |
+
"step": 1210
|
8480 |
+
},
|
8481 |
+
{
|
8482 |
+
"epoch": 0.90087409336061,
|
8483 |
+
"grad_norm": 3.6881825923919678,
|
8484 |
+
"learning_rate": 2.793930264646405e-06,
|
8485 |
+
"loss": 9.5778,
|
8486 |
+
"step": 1211
|
8487 |
+
},
|
8488 |
+
{
|
8489 |
+
"epoch": 0.9016180026036823,
|
8490 |
+
"grad_norm": 3.60945463180542,
|
8491 |
+
"learning_rate": 2.7524625756695954e-06,
|
8492 |
+
"loss": 9.5813,
|
8493 |
+
"step": 1212
|
8494 |
+
},
|
8495 |
+
{
|
8496 |
+
"epoch": 0.9023619118467547,
|
8497 |
+
"grad_norm": 3.473451852798462,
|
8498 |
+
"learning_rate": 2.711296213612119e-06,
|
8499 |
+
"loss": 9.5915,
|
8500 |
+
"step": 1213
|
8501 |
+
},
|
8502 |
+
{
|
8503 |
+
"epoch": 0.9031058210898271,
|
8504 |
+
"grad_norm": 3.5476932525634766,
|
8505 |
+
"learning_rate": 2.6704314410174958e-06,
|
8506 |
+
"loss": 9.5854,
|
8507 |
+
"step": 1214
|
8508 |
+
},
|
8509 |
+
{
|
8510 |
+
"epoch": 0.9038497303328994,
|
8511 |
+
"grad_norm": 3.878340005874634,
|
8512 |
+
"learning_rate": 2.6298685185057735e-06,
|
8513 |
+
"loss": 9.5707,
|
8514 |
+
"step": 1215
|
8515 |
+
},
|
8516 |
+
{
|
8517 |
+
"epoch": 0.9045936395759717,
|
8518 |
+
"grad_norm": 3.878525972366333,
|
8519 |
+
"learning_rate": 2.5896077047719237e-06,
|
8520 |
+
"loss": 9.5708,
|
8521 |
+
"step": 1216
|
8522 |
+
},
|
8523 |
+
{
|
8524 |
+
"epoch": 0.905337548819044,
|
8525 |
+
"grad_norm": 3.5938403606414795,
|
8526 |
+
"learning_rate": 2.549649256584191e-06,
|
8527 |
+
"loss": 9.5824,
|
8528 |
+
"step": 1217
|
8529 |
+
},
|
8530 |
+
{
|
8531 |
+
"epoch": 0.9060814580621164,
|
8532 |
+
"grad_norm": 3.593989372253418,
|
8533 |
+
"learning_rate": 2.50999342878242e-06,
|
8534 |
+
"loss": 9.5813,
|
8535 |
+
"step": 1218
|
8536 |
+
},
|
8537 |
+
{
|
8538 |
+
"epoch": 0.9068253673051888,
|
8539 |
+
"grad_norm": 3.6723949909210205,
|
8540 |
+
"learning_rate": 2.470640474276509e-06,
|
8541 |
+
"loss": 9.5778,
|
8542 |
+
"step": 1219
|
8543 |
+
},
|
8544 |
+
{
|
8545 |
+
"epoch": 0.9075692765482611,
|
8546 |
+
"grad_norm": 3.5473990440368652,
|
8547 |
+
"learning_rate": 2.4315906440446955e-06,
|
8548 |
+
"loss": 9.5852,
|
8549 |
+
"step": 1220
|
8550 |
+
},
|
8551 |
+
{
|
8552 |
+
"epoch": 0.9083131857913335,
|
8553 |
+
"grad_norm": 3.5474307537078857,
|
8554 |
+
"learning_rate": 2.3928441871320263e-06,
|
8555 |
+
"loss": 9.5848,
|
8556 |
+
"step": 1221
|
8557 |
+
},
|
8558 |
+
{
|
8559 |
+
"epoch": 0.9090570950344058,
|
8560 |
+
"grad_norm": 3.767143964767456,
|
8561 |
+
"learning_rate": 2.3544013506487496e-06,
|
8562 |
+
"loss": 9.5743,
|
8563 |
+
"step": 1222
|
8564 |
+
},
|
8565 |
+
{
|
8566 |
+
"epoch": 0.9098010042774781,
|
8567 |
+
"grad_norm": 3.5938961505889893,
|
8568 |
+
"learning_rate": 2.3162623797687245e-06,
|
8569 |
+
"loss": 9.5823,
|
8570 |
+
"step": 1223
|
8571 |
+
},
|
8572 |
+
{
|
8573 |
+
"epoch": 0.9105449135205504,
|
8574 |
+
"grad_norm": 3.6094627380371094,
|
8575 |
+
"learning_rate": 2.2784275177278934e-06,
|
8576 |
+
"loss": 9.5804,
|
8577 |
+
"step": 1224
|
8578 |
+
},
|
8579 |
+
{
|
8580 |
+
"epoch": 0.9112888227636229,
|
8581 |
+
"grad_norm": 3.593992233276367,
|
8582 |
+
"learning_rate": 2.240897005822684e-06,
|
8583 |
+
"loss": 9.581,
|
8584 |
+
"step": 1225
|
8585 |
+
},
|
8586 |
+
{
|
8587 |
+
"epoch": 0.9120327320066952,
|
8588 |
+
"grad_norm": 3.5026135444641113,
|
8589 |
+
"learning_rate": 2.2036710834084986e-06,
|
8590 |
+
"loss": 9.5884,
|
8591 |
+
"step": 1226
|
8592 |
+
},
|
8593 |
+
{
|
8594 |
+
"epoch": 0.9127766412497675,
|
8595 |
+
"grad_norm": 3.4764721393585205,
|
8596 |
+
"learning_rate": 2.1667499878981866e-06,
|
8597 |
+
"loss": 9.595,
|
8598 |
+
"step": 1227
|
8599 |
+
},
|
8600 |
+
{
|
8601 |
+
"epoch": 0.9135205504928399,
|
8602 |
+
"grad_norm": 3.4730682373046875,
|
8603 |
+
"learning_rate": 2.130133954760538e-06,
|
8604 |
+
"loss": 9.5912,
|
8605 |
+
"step": 1228
|
8606 |
+
},
|
8607 |
+
{
|
8608 |
+
"epoch": 0.9142644597359122,
|
8609 |
+
"grad_norm": 3.5019729137420654,
|
8610 |
+
"learning_rate": 2.0938232175187645e-06,
|
8611 |
+
"loss": 9.5887,
|
8612 |
+
"step": 1229
|
8613 |
+
},
|
8614 |
+
{
|
8615 |
+
"epoch": 0.9150083689789845,
|
8616 |
+
"grad_norm": 3.6096534729003906,
|
8617 |
+
"learning_rate": 2.0578180077489904e-06,
|
8618 |
+
"loss": 9.5819,
|
8619 |
+
"step": 1230
|
8620 |
+
},
|
8621 |
+
{
|
8622 |
+
"epoch": 0.915752278222057,
|
8623 |
+
"grad_norm": 3.767604351043701,
|
8624 |
+
"learning_rate": 2.0221185550788335e-06,
|
8625 |
+
"loss": 9.5749,
|
8626 |
+
"step": 1231
|
8627 |
+
},
|
8628 |
+
{
|
8629 |
+
"epoch": 0.9164961874651293,
|
8630 |
+
"grad_norm": 3.5474653244018555,
|
8631 |
+
"learning_rate": 1.986725087185898e-06,
|
8632 |
+
"loss": 9.584,
|
8633 |
+
"step": 1232
|
8634 |
+
},
|
8635 |
+
{
|
8636 |
+
"epoch": 0.9172400967082016,
|
8637 |
+
"grad_norm": 3.4607582092285156,
|
8638 |
+
"learning_rate": 1.951637829796338e-06,
|
8639 |
+
"loss": 9.595,
|
8640 |
+
"step": 1233
|
8641 |
+
},
|
8642 |
+
{
|
8643 |
+
"epoch": 0.9179840059512739,
|
8644 |
+
"grad_norm": 3.7671072483062744,
|
8645 |
+
"learning_rate": 1.916857006683398e-06,
|
8646 |
+
"loss": 9.5752,
|
8647 |
+
"step": 1234
|
8648 |
+
},
|
8649 |
+
{
|
8650 |
+
"epoch": 0.9187279151943463,
|
8651 |
+
"grad_norm": 3.6726598739624023,
|
8652 |
+
"learning_rate": 1.8823828396660081e-06,
|
8653 |
+
"loss": 9.5787,
|
8654 |
+
"step": 1235
|
8655 |
+
},
|
8656 |
+
{
|
8657 |
+
"epoch": 0.9194718244374186,
|
8658 |
+
"grad_norm": 3.6729495525360107,
|
8659 |
+
"learning_rate": 1.8482155486073739e-06,
|
8660 |
+
"loss": 9.5783,
|
8661 |
+
"step": 1236
|
8662 |
+
},
|
8663 |
+
{
|
8664 |
+
"epoch": 0.920215733680491,
|
8665 |
+
"grad_norm": 3.879298448562622,
|
8666 |
+
"learning_rate": 1.814355351413538e-06,
|
8667 |
+
"loss": 9.5706,
|
8668 |
+
"step": 1237
|
8669 |
+
},
|
8670 |
+
{
|
8671 |
+
"epoch": 0.9209596429235634,
|
8672 |
+
"grad_norm": 3.6096079349517822,
|
8673 |
+
"learning_rate": 1.7808024640320498e-06,
|
8674 |
+
"loss": 9.5803,
|
8675 |
+
"step": 1238
|
8676 |
+
},
|
8677 |
+
{
|
8678 |
+
"epoch": 0.9217035521666357,
|
8679 |
+
"grad_norm": 3.5938854217529297,
|
8680 |
+
"learning_rate": 1.7475571004505087e-06,
|
8681 |
+
"loss": 9.5813,
|
8682 |
+
"step": 1239
|
8683 |
+
},
|
8684 |
+
{
|
8685 |
+
"epoch": 0.922447461409708,
|
8686 |
+
"grad_norm": 3.60953688621521,
|
8687 |
+
"learning_rate": 1.714619472695278e-06,
|
8688 |
+
"loss": 9.5811,
|
8689 |
+
"step": 1240
|
8690 |
+
},
|
8691 |
+
{
|
8692 |
+
"epoch": 0.9231913706527803,
|
8693 |
+
"grad_norm": 3.609628200531006,
|
8694 |
+
"learning_rate": 1.6819897908300853e-06,
|
8695 |
+
"loss": 9.5814,
|
8696 |
+
"step": 1241
|
8697 |
+
},
|
8698 |
+
{
|
8699 |
+
"epoch": 0.9239352798958527,
|
8700 |
+
"grad_norm": 3.5939552783966064,
|
8701 |
+
"learning_rate": 1.6496682629546956e-06,
|
8702 |
+
"loss": 9.5807,
|
8703 |
+
"step": 1242
|
8704 |
+
},
|
8705 |
+
{
|
8706 |
+
"epoch": 0.9246791891389251,
|
8707 |
+
"grad_norm": 3.501974582672119,
|
8708 |
+
"learning_rate": 1.6176550952035908e-06,
|
8709 |
+
"loss": 9.5882,
|
8710 |
+
"step": 1243
|
8711 |
+
},
|
8712 |
+
{
|
8713 |
+
"epoch": 0.9254230983819974,
|
8714 |
+
"grad_norm": 3.5018725395202637,
|
8715 |
+
"learning_rate": 1.5859504917446366e-06,
|
8716 |
+
"loss": 9.5878,
|
8717 |
+
"step": 1244
|
8718 |
+
},
|
8719 |
+
{
|
8720 |
+
"epoch": 0.9261670076250698,
|
8721 |
+
"grad_norm": 3.5475661754608154,
|
8722 |
+
"learning_rate": 1.554554654777801e-06,
|
8723 |
+
"loss": 9.5856,
|
8724 |
+
"step": 1245
|
8725 |
+
},
|
8726 |
+
{
|
8727 |
+
"epoch": 0.9269109168681421,
|
8728 |
+
"grad_norm": 3.53214430809021,
|
8729 |
+
"learning_rate": 1.5234677845338607e-06,
|
8730 |
+
"loss": 9.5855,
|
8731 |
+
"step": 1246
|
8732 |
+
},
|
8733 |
+
{
|
8734 |
+
"epoch": 0.9276548261112144,
|
8735 |
+
"grad_norm": 3.67262601852417,
|
8736 |
+
"learning_rate": 1.492690079273118e-06,
|
8737 |
+
"loss": 9.578,
|
8738 |
+
"step": 1247
|
8739 |
+
},
|
8740 |
+
{
|
8741 |
+
"epoch": 0.9283987353542867,
|
8742 |
+
"grad_norm": 3.5474252700805664,
|
8743 |
+
"learning_rate": 1.4622217352841138e-06,
|
8744 |
+
"loss": 9.5852,
|
8745 |
+
"step": 1248
|
8746 |
+
},
|
8747 |
+
{
|
8748 |
+
"epoch": 0.9291426445973591,
|
8749 |
+
"grad_norm": 3.6728267669677734,
|
8750 |
+
"learning_rate": 1.4320629468824286e-06,
|
8751 |
+
"loss": 9.5787,
|
8752 |
+
"step": 1249
|
8753 |
+
},
|
8754 |
+
{
|
8755 |
+
"epoch": 0.9298865538404315,
|
8756 |
+
"grad_norm": 3.53242564201355,
|
8757 |
+
"learning_rate": 1.4022139064094165e-06,
|
8758 |
+
"loss": 9.5855,
|
8759 |
+
"step": 1250
|
8760 |
+
},
|
8761 |
+
{
|
8762 |
+
"epoch": 0.9306304630835038,
|
8763 |
+
"grad_norm": 3.7670955657958984,
|
8764 |
+
"learning_rate": 1.3726748042309557e-06,
|
8765 |
+
"loss": 9.5742,
|
8766 |
+
"step": 1251
|
8767 |
+
},
|
8768 |
+
{
|
8769 |
+
"epoch": 0.9313743723265762,
|
8770 |
+
"grad_norm": 3.672420024871826,
|
8771 |
+
"learning_rate": 1.3434458287362672e-06,
|
8772 |
+
"loss": 9.5771,
|
8773 |
+
"step": 1252
|
8774 |
+
},
|
8775 |
+
{
|
8776 |
+
"epoch": 0.9321182815696485,
|
8777 |
+
"grad_norm": 3.8784019947052,
|
8778 |
+
"learning_rate": 1.3145271663366932e-06,
|
8779 |
+
"loss": 9.5716,
|
8780 |
+
"step": 1253
|
8781 |
+
},
|
8782 |
+
{
|
8783 |
+
"epoch": 0.9328621908127208,
|
8784 |
+
"grad_norm": 3.6880033016204834,
|
8785 |
+
"learning_rate": 1.2859190014645305e-06,
|
8786 |
+
"loss": 9.5779,
|
8787 |
+
"step": 1254
|
8788 |
+
},
|
8789 |
+
{
|
8790 |
+
"epoch": 0.9336061000557931,
|
8791 |
+
"grad_norm": 3.767427682876587,
|
8792 |
+
"learning_rate": 1.257621516571822e-06,
|
8793 |
+
"loss": 9.5742,
|
8794 |
+
"step": 1255
|
8795 |
+
},
|
8796 |
+
{
|
8797 |
+
"epoch": 0.9343500092988656,
|
8798 |
+
"grad_norm": 3.878462314605713,
|
8799 |
+
"learning_rate": 1.2296348921292333e-06,
|
8800 |
+
"loss": 9.57,
|
8801 |
+
"step": 1256
|
8802 |
+
},
|
8803 |
+
{
|
8804 |
+
"epoch": 0.9350939185419379,
|
8805 |
+
"grad_norm": 3.6724817752838135,
|
8806 |
+
"learning_rate": 1.2019593066248614e-06,
|
8807 |
+
"loss": 9.5777,
|
8808 |
+
"step": 1257
|
8809 |
+
},
|
8810 |
+
{
|
8811 |
+
"epoch": 0.9358378277850102,
|
8812 |
+
"grad_norm": 3.5474514961242676,
|
8813 |
+
"learning_rate": 1.1745949365631115e-06,
|
8814 |
+
"loss": 9.5851,
|
8815 |
+
"step": 1258
|
8816 |
+
},
|
8817 |
+
{
|
8818 |
+
"epoch": 0.9365817370280826,
|
8819 |
+
"grad_norm": 4.006692409515381,
|
8820 |
+
"learning_rate": 1.1475419564636048e-06,
|
8821 |
+
"loss": 9.568,
|
8822 |
+
"step": 1259
|
8823 |
+
},
|
8824 |
+
{
|
8825 |
+
"epoch": 0.9373256462711549,
|
8826 |
+
"grad_norm": 3.672508478164673,
|
8827 |
+
"learning_rate": 1.120800538859995e-06,
|
8828 |
+
"loss": 9.5789,
|
8829 |
+
"step": 1260
|
8830 |
+
},
|
8831 |
+
{
|
8832 |
+
"epoch": 0.9380695555142272,
|
8833 |
+
"grad_norm": 3.5939109325408936,
|
8834 |
+
"learning_rate": 1.0943708542989372e-06,
|
8835 |
+
"loss": 9.5801,
|
8836 |
+
"step": 1261
|
8837 |
+
},
|
8838 |
+
{
|
8839 |
+
"epoch": 0.9388134647572997,
|
8840 |
+
"grad_norm": 3.7673163414001465,
|
8841 |
+
"learning_rate": 1.0682530713389482e-06,
|
8842 |
+
"loss": 9.5732,
|
8843 |
+
"step": 1262
|
8844 |
+
},
|
8845 |
+
{
|
8846 |
+
"epoch": 0.939557374000372,
|
8847 |
+
"grad_norm": 3.593905210494995,
|
8848 |
+
"learning_rate": 1.042447356549381e-06,
|
8849 |
+
"loss": 9.5818,
|
8850 |
+
"step": 1263
|
8851 |
+
},
|
8852 |
+
{
|
8853 |
+
"epoch": 0.9403012832434443,
|
8854 |
+
"grad_norm": 3.6882522106170654,
|
8855 |
+
"learning_rate": 1.0169538745093242e-06,
|
8856 |
+
"loss": 9.578,
|
8857 |
+
"step": 1264
|
8858 |
+
},
|
8859 |
+
{
|
8860 |
+
"epoch": 0.9410451924865166,
|
8861 |
+
"grad_norm": 3.8786656856536865,
|
8862 |
+
"learning_rate": 9.917727878065497e-07,
|
8863 |
+
"loss": 9.5709,
|
8864 |
+
"step": 1265
|
8865 |
+
},
|
8866 |
+
{
|
8867 |
+
"epoch": 0.941789101729589,
|
8868 |
+
"grad_norm": 3.5021424293518066,
|
8869 |
+
"learning_rate": 9.66904257036505e-07,
|
8870 |
+
"loss": 9.588,
|
8871 |
+
"step": 1266
|
8872 |
+
},
|
8873 |
+
{
|
8874 |
+
"epoch": 0.9425330109726613,
|
8875 |
+
"grad_norm": 3.609473466873169,
|
8876 |
+
"learning_rate": 9.423484408012717e-07,
|
8877 |
+
"loss": 9.581,
|
8878 |
+
"step": 1267
|
8879 |
+
},
|
8880 |
+
{
|
8881 |
+
"epoch": 0.9432769202157337,
|
8882 |
+
"grad_norm": 3.672401189804077,
|
8883 |
+
"learning_rate": 9.1810549570856e-07,
|
8884 |
+
"loss": 9.5778,
|
8885 |
+
"step": 1268
|
8886 |
+
},
|
8887 |
+
{
|
8888 |
+
"epoch": 0.944020829458806,
|
8889 |
+
"grad_norm": 3.672403573989868,
|
8890 |
+
"learning_rate": 8.941755763707038e-07,
|
8891 |
+
"loss": 9.5776,
|
8892 |
+
"step": 1269
|
8893 |
+
},
|
8894 |
+
{
|
8895 |
+
"epoch": 0.9447647387018784,
|
8896 |
+
"grad_norm": 3.5481207370758057,
|
8897 |
+
"learning_rate": 8.705588354036676e-07,
|
8898 |
+
"loss": 9.5856,
|
8899 |
+
"step": 1270
|
8900 |
+
},
|
8901 |
+
{
|
8902 |
+
"epoch": 0.9455086479449507,
|
8903 |
+
"grad_norm": 3.531853199005127,
|
8904 |
+
"learning_rate": 8.472554234260965e-07,
|
8905 |
+
"loss": 9.5838,
|
8906 |
+
"step": 1271
|
8907 |
+
},
|
8908 |
+
{
|
8909 |
+
"epoch": 0.946252557188023,
|
8910 |
+
"grad_norm": 3.502507209777832,
|
8911 |
+
"learning_rate": 8.24265489058329e-07,
|
8912 |
+
"loss": 9.5878,
|
8913 |
+
"step": 1272
|
8914 |
+
},
|
8915 |
+
{
|
8916 |
+
"epoch": 0.9469964664310954,
|
8917 |
+
"grad_norm": 3.8787922859191895,
|
8918 |
+
"learning_rate": 8.015891789214803e-07,
|
8919 |
+
"loss": 9.5708,
|
8920 |
+
"step": 1273
|
8921 |
+
},
|
8922 |
+
{
|
8923 |
+
"epoch": 0.9477403756741678,
|
8924 |
+
"grad_norm": 3.5023539066314697,
|
8925 |
+
"learning_rate": 7.79226637636471e-07,
|
8926 |
+
"loss": 9.5884,
|
8927 |
+
"step": 1274
|
8928 |
+
},
|
8929 |
+
{
|
8930 |
+
"epoch": 0.9484842849172401,
|
8931 |
+
"grad_norm": 3.7672739028930664,
|
8932 |
+
"learning_rate": 7.571780078231283e-07,
|
8933 |
+
"loss": 9.574,
|
8934 |
+
"step": 1275
|
8935 |
+
},
|
8936 |
+
{
|
8937 |
+
"epoch": 0.9492281941603125,
|
8938 |
+
"grad_norm": 3.879286289215088,
|
8939 |
+
"learning_rate": 7.354434300992752e-07,
|
8940 |
+
"loss": 9.5714,
|
8941 |
+
"step": 1276
|
8942 |
+
},
|
8943 |
+
{
|
8944 |
+
"epoch": 0.9499721034033848,
|
8945 |
+
"grad_norm": 3.501861810684204,
|
8946 |
+
"learning_rate": 7.140230430798256e-07,
|
8947 |
+
"loss": 9.5876,
|
8948 |
+
"step": 1277
|
8949 |
+
},
|
8950 |
+
{
|
8951 |
+
"epoch": 0.9507160126464571,
|
8952 |
+
"grad_norm": 3.7678983211517334,
|
8953 |
+
"learning_rate": 6.929169833759075e-07,
|
8954 |
+
"loss": 9.5754,
|
8955 |
+
"step": 1278
|
8956 |
+
},
|
8957 |
+
{
|
8958 |
+
"epoch": 0.9514599218895294,
|
8959 |
+
"grad_norm": 3.502337694168091,
|
8960 |
+
"learning_rate": 6.721253855939746e-07,
|
8961 |
+
"loss": 9.5883,
|
8962 |
+
"step": 1279
|
8963 |
+
},
|
8964 |
+
{
|
8965 |
+
"epoch": 0.9522038311326019,
|
8966 |
+
"grad_norm": 3.501981258392334,
|
8967 |
+
"learning_rate": 6.516483823349795e-07,
|
8968 |
+
"loss": 9.5874,
|
8969 |
+
"step": 1280
|
8970 |
+
},
|
8971 |
+
{
|
8972 |
+
"epoch": 0.9529477403756742,
|
8973 |
+
"grad_norm": 3.472780704498291,
|
8974 |
+
"learning_rate": 6.314861041935016e-07,
|
8975 |
+
"loss": 9.5914,
|
8976 |
+
"step": 1281
|
8977 |
+
},
|
8978 |
+
{
|
8979 |
+
"epoch": 0.9536916496187465,
|
8980 |
+
"grad_norm": 3.76802921295166,
|
8981 |
+
"learning_rate": 6.116386797569207e-07,
|
8982 |
+
"loss": 9.5749,
|
8983 |
+
"step": 1282
|
8984 |
+
},
|
8985 |
+
{
|
8986 |
+
"epoch": 0.9544355588618189,
|
8987 |
+
"grad_norm": 3.547377824783325,
|
8988 |
+
"learning_rate": 5.921062356046058e-07,
|
8989 |
+
"loss": 9.5844,
|
8990 |
+
"step": 1283
|
8991 |
+
},
|
8992 |
+
{
|
8993 |
+
"epoch": 0.9551794681048912,
|
8994 |
+
"grad_norm": 3.672638177871704,
|
8995 |
+
"learning_rate": 5.728888963070945e-07,
|
8996 |
+
"loss": 9.578,
|
8997 |
+
"step": 1284
|
8998 |
+
},
|
8999 |
+
{
|
9000 |
+
"epoch": 0.9559233773479635,
|
9001 |
+
"grad_norm": 3.672529458999634,
|
9002 |
+
"learning_rate": 5.539867844253033e-07,
|
9003 |
+
"loss": 9.5785,
|
9004 |
+
"step": 1285
|
9005 |
+
},
|
9006 |
+
{
|
9007 |
+
"epoch": 0.956667286591036,
|
9008 |
+
"grad_norm": 3.531773328781128,
|
9009 |
+
"learning_rate": 5.35400020509752e-07,
|
9010 |
+
"loss": 9.5858,
|
9011 |
+
"step": 1286
|
9012 |
+
},
|
9013 |
+
{
|
9014 |
+
"epoch": 0.9574111958341083,
|
9015 |
+
"grad_norm": 3.609574556350708,
|
9016 |
+
"learning_rate": 5.171287230997968e-07,
|
9017 |
+
"loss": 9.5813,
|
9018 |
+
"step": 1287
|
9019 |
+
},
|
9020 |
+
{
|
9021 |
+
"epoch": 0.9581551050771806,
|
9022 |
+
"grad_norm": 3.6094818115234375,
|
9023 |
+
"learning_rate": 4.99173008722853e-07,
|
9024 |
+
"loss": 9.5812,
|
9025 |
+
"step": 1288
|
9026 |
+
},
|
9027 |
+
{
|
9028 |
+
"epoch": 0.9588990143202529,
|
9029 |
+
"grad_norm": 3.7675580978393555,
|
9030 |
+
"learning_rate": 4.815329918936684e-07,
|
9031 |
+
"loss": 9.5752,
|
9032 |
+
"step": 1289
|
9033 |
+
},
|
9034 |
+
{
|
9035 |
+
"epoch": 0.9596429235633253,
|
9036 |
+
"grad_norm": 3.5020837783813477,
|
9037 |
+
"learning_rate": 4.642087851136123e-07,
|
9038 |
+
"loss": 9.5884,
|
9039 |
+
"step": 1290
|
9040 |
+
},
|
9041 |
+
{
|
9042 |
+
"epoch": 0.9603868328063976,
|
9043 |
+
"grad_norm": 3.593885660171509,
|
9044 |
+
"learning_rate": 4.472004988699041e-07,
|
9045 |
+
"loss": 9.5817,
|
9046 |
+
"step": 1291
|
9047 |
+
},
|
9048 |
+
{
|
9049 |
+
"epoch": 0.9611307420494699,
|
9050 |
+
"grad_norm": 3.547576665878296,
|
9051 |
+
"learning_rate": 4.305082416349804e-07,
|
9052 |
+
"loss": 9.5845,
|
9053 |
+
"step": 1292
|
9054 |
+
},
|
9055 |
+
{
|
9056 |
+
"epoch": 0.9618746512925423,
|
9057 |
+
"grad_norm": 3.6724321842193604,
|
9058 |
+
"learning_rate": 4.141321198657178e-07,
|
9059 |
+
"loss": 9.5779,
|
9060 |
+
"step": 1293
|
9061 |
+
},
|
9062 |
+
{
|
9063 |
+
"epoch": 0.9626185605356147,
|
9064 |
+
"grad_norm": 3.5321435928344727,
|
9065 |
+
"learning_rate": 3.9807223800284453e-07,
|
9066 |
+
"loss": 9.5845,
|
9067 |
+
"step": 1294
|
9068 |
+
},
|
9069 |
+
{
|
9070 |
+
"epoch": 0.963362469778687,
|
9071 |
+
"grad_norm": 3.594072103500366,
|
9072 |
+
"learning_rate": 3.823286984702079e-07,
|
9073 |
+
"loss": 9.5809,
|
9074 |
+
"step": 1295
|
9075 |
+
},
|
9076 |
+
{
|
9077 |
+
"epoch": 0.9641063790217593,
|
9078 |
+
"grad_norm": 3.4732275009155273,
|
9079 |
+
"learning_rate": 3.6690160167413554e-07,
|
9080 |
+
"loss": 9.5921,
|
9081 |
+
"step": 1296
|
9082 |
+
},
|
9083 |
+
{
|
9084 |
+
"epoch": 0.9648502882648317,
|
9085 |
+
"grad_norm": 3.473226547241211,
|
9086 |
+
"learning_rate": 3.5179104600283617e-07,
|
9087 |
+
"loss": 9.5916,
|
9088 |
+
"step": 1297
|
9089 |
+
},
|
9090 |
+
{
|
9091 |
+
"epoch": 0.965594197507904,
|
9092 |
+
"grad_norm": 3.5020735263824463,
|
9093 |
+
"learning_rate": 3.3699712782569446e-07,
|
9094 |
+
"loss": 9.5888,
|
9095 |
+
"step": 1298
|
9096 |
+
},
|
9097 |
+
{
|
9098 |
+
"epoch": 0.9663381067509764,
|
9099 |
+
"grad_norm": 3.6096696853637695,
|
9100 |
+
"learning_rate": 3.2251994149273823e-07,
|
9101 |
+
"loss": 9.5813,
|
9102 |
+
"step": 1299
|
9103 |
+
},
|
9104 |
+
{
|
9105 |
+
"epoch": 0.9670820159940487,
|
9106 |
+
"grad_norm": 3.5941600799560547,
|
9107 |
+
"learning_rate": 3.0835957933397773e-07,
|
9108 |
+
"loss": 9.5805,
|
9109 |
+
"step": 1300
|
9110 |
+
},
|
9111 |
+
{
|
9112 |
+
"epoch": 0.9678259252371211,
|
9113 |
+
"grad_norm": 3.672405481338501,
|
9114 |
+
"learning_rate": 2.945161316588563e-07,
|
9115 |
+
"loss": 9.5779,
|
9116 |
+
"step": 1301
|
9117 |
+
},
|
9118 |
+
{
|
9119 |
+
"epoch": 0.9685698344801934,
|
9120 |
+
"grad_norm": 4.005847930908203,
|
9121 |
+
"learning_rate": 2.809896867556283e-07,
|
9122 |
+
"loss": 9.567,
|
9123 |
+
"step": 1302
|
9124 |
+
},
|
9125 |
+
{
|
9126 |
+
"epoch": 0.9693137437232657,
|
9127 |
+
"grad_norm": 3.501986503601074,
|
9128 |
+
"learning_rate": 2.677803308908544e-07,
|
9129 |
+
"loss": 9.5874,
|
9130 |
+
"step": 1303
|
9131 |
+
},
|
9132 |
+
{
|
9133 |
+
"epoch": 0.9700576529663381,
|
9134 |
+
"grad_norm": 3.6726467609405518,
|
9135 |
+
"learning_rate": 2.548881483088128e-07,
|
9136 |
+
"loss": 9.5771,
|
9137 |
+
"step": 1304
|
9138 |
+
},
|
9139 |
+
{
|
9140 |
+
"epoch": 0.9708015622094105,
|
9141 |
+
"grad_norm": 4.005867958068848,
|
9142 |
+
"learning_rate": 2.423132212309609e-07,
|
9143 |
+
"loss": 9.5669,
|
9144 |
+
"step": 1305
|
9145 |
+
},
|
9146 |
+
{
|
9147 |
+
"epoch": 0.9715454714524828,
|
9148 |
+
"grad_norm": 3.609499931335449,
|
9149 |
+
"learning_rate": 2.3005562985542462e-07,
|
9150 |
+
"loss": 9.5804,
|
9151 |
+
"step": 1306
|
9152 |
+
},
|
9153 |
+
{
|
9154 |
+
"epoch": 0.9722893806955551,
|
9155 |
+
"grad_norm": 3.609724521636963,
|
9156 |
+
"learning_rate": 2.1811545235648212e-07,
|
9157 |
+
"loss": 9.581,
|
9158 |
+
"step": 1307
|
9159 |
+
},
|
9160 |
+
{
|
9161 |
+
"epoch": 0.9730332899386275,
|
9162 |
+
"grad_norm": 3.7671802043914795,
|
9163 |
+
"learning_rate": 2.0649276488408086e-07,
|
9164 |
+
"loss": 9.5743,
|
9165 |
+
"step": 1308
|
9166 |
+
},
|
9167 |
+
{
|
9168 |
+
"epoch": 0.9737771991816998,
|
9169 |
+
"grad_norm": 3.547499179840088,
|
9170 |
+
"learning_rate": 1.9518764156331027e-07,
|
9171 |
+
"loss": 9.5849,
|
9172 |
+
"step": 1309
|
9173 |
+
},
|
9174 |
+
{
|
9175 |
+
"epoch": 0.9745211084247721,
|
9176 |
+
"grad_norm": 3.6095187664031982,
|
9177 |
+
"learning_rate": 1.842001544939742e-07,
|
9178 |
+
"loss": 9.5813,
|
9179 |
+
"step": 1310
|
9180 |
+
},
|
9181 |
+
{
|
9182 |
+
"epoch": 0.9752650176678446,
|
9183 |
+
"grad_norm": 3.6724700927734375,
|
9184 |
+
"learning_rate": 1.735303737501137e-07,
|
9185 |
+
"loss": 9.5773,
|
9186 |
+
"step": 1311
|
9187 |
+
},
|
9188 |
+
{
|
9189 |
+
"epoch": 0.9760089269109169,
|
9190 |
+
"grad_norm": 3.609522581100464,
|
9191 |
+
"learning_rate": 1.6317836737955172e-07,
|
9192 |
+
"loss": 9.581,
|
9193 |
+
"step": 1312
|
9194 |
+
},
|
9195 |
+
{
|
9196 |
+
"epoch": 0.9767528361539892,
|
9197 |
+
"grad_norm": 3.767378330230713,
|
9198 |
+
"learning_rate": 1.5314420140346564e-07,
|
9199 |
+
"loss": 9.5747,
|
9200 |
+
"step": 1313
|
9201 |
+
},
|
9202 |
+
{
|
9203 |
+
"epoch": 0.9774967453970616,
|
9204 |
+
"grad_norm": 3.502347469329834,
|
9205 |
+
"learning_rate": 1.4342793981597103e-07,
|
9206 |
+
"loss": 9.5884,
|
9207 |
+
"step": 1314
|
9208 |
+
},
|
9209 |
+
{
|
9210 |
+
"epoch": 0.9782406546401339,
|
9211 |
+
"grad_norm": 3.6725053787231445,
|
9212 |
+
"learning_rate": 1.340296445837108e-07,
|
9213 |
+
"loss": 9.5769,
|
9214 |
+
"step": 1315
|
9215 |
+
},
|
9216 |
+
{
|
9217 |
+
"epoch": 0.9789845638832062,
|
9218 |
+
"grad_norm": 3.672431707382202,
|
9219 |
+
"learning_rate": 1.2494937564545562e-07,
|
9220 |
+
"loss": 9.5781,
|
9221 |
+
"step": 1316
|
9222 |
+
},
|
9223 |
+
{
|
9224 |
+
"epoch": 0.9797284731262786,
|
9225 |
+
"grad_norm": 3.5474746227264404,
|
9226 |
+
"learning_rate": 1.161871909117207e-07,
|
9227 |
+
"loss": 9.5839,
|
9228 |
+
"step": 1317
|
9229 |
+
},
|
9230 |
+
{
|
9231 |
+
"epoch": 0.980472382369351,
|
9232 |
+
"grad_norm": 3.6726107597351074,
|
9233 |
+
"learning_rate": 1.0774314626441628e-07,
|
9234 |
+
"loss": 9.5783,
|
9235 |
+
"step": 1318
|
9236 |
+
},
|
9237 |
+
{
|
9238 |
+
"epoch": 0.9812162916124233,
|
9239 |
+
"grad_norm": 3.4735405445098877,
|
9240 |
+
"learning_rate": 9.961729555645338e-08,
|
9241 |
+
"loss": 9.5917,
|
9242 |
+
"step": 1319
|
9243 |
+
},
|
9244 |
+
{
|
9245 |
+
"epoch": 0.9819602008554956,
|
9246 |
+
"grad_norm": 3.5478289127349854,
|
9247 |
+
"learning_rate": 9.180969061143852e-08,
|
9248 |
+
"loss": 9.5844,
|
9249 |
+
"step": 1320
|
9250 |
+
},
|
9251 |
+
{
|
9252 |
+
"epoch": 0.982704110098568,
|
9253 |
+
"grad_norm": 3.547591209411621,
|
9254 |
+
"learning_rate": 8.432038122331842e-08,
|
9255 |
+
"loss": 9.5842,
|
9256 |
+
"step": 1321
|
9257 |
+
},
|
9258 |
+
{
|
9259 |
+
"epoch": 0.9834480193416403,
|
9260 |
+
"grad_norm": 3.6724138259887695,
|
9261 |
+
"learning_rate": 7.714941515608587e-08,
|
9262 |
+
"loss": 9.5777,
|
9263 |
+
"step": 1322
|
9264 |
+
},
|
9265 |
+
{
|
9266 |
+
"epoch": 0.9841919285847127,
|
9267 |
+
"grad_norm": 3.7674906253814697,
|
9268 |
+
"learning_rate": 7.029683814343547e-08,
|
9269 |
+
"loss": 9.5748,
|
9270 |
+
"step": 1323
|
9271 |
+
},
|
9272 |
+
{
|
9273 |
+
"epoch": 0.984935837827785,
|
9274 |
+
"grad_norm": 3.767176866531372,
|
9275 |
+
"learning_rate": 6.376269388852496e-08,
|
9276 |
+
"loss": 9.5745,
|
9277 |
+
"step": 1324
|
9278 |
+
},
|
9279 |
+
{
|
9280 |
+
"epoch": 0.9856797470708574,
|
9281 |
+
"grad_norm": 3.593855857849121,
|
9282 |
+
"learning_rate": 5.7547024063642204e-08,
|
9283 |
+
"loss": 9.5817,
|
9284 |
+
"step": 1325
|
9285 |
+
},
|
9286 |
+
{
|
9287 |
+
"epoch": 0.9864236563139297,
|
9288 |
+
"grad_norm": 3.547398567199707,
|
9289 |
+
"learning_rate": 5.164986830998308e-08,
|
9290 |
+
"loss": 9.5846,
|
9291 |
+
"step": 1326
|
9292 |
+
},
|
9293 |
+
{
|
9294 |
+
"epoch": 0.987167565557002,
|
9295 |
+
"grad_norm": 3.4740869998931885,
|
9296 |
+
"learning_rate": 4.607126423737951e-08,
|
9297 |
+
"loss": 9.591,
|
9298 |
+
"step": 1327
|
9299 |
+
},
|
9300 |
+
{
|
9301 |
+
"epoch": 0.9879114748000744,
|
9302 |
+
"grad_norm": 3.5474886894226074,
|
9303 |
+
"learning_rate": 4.0811247424049625e-08,
|
9304 |
+
"loss": 9.5845,
|
9305 |
+
"step": 1328
|
9306 |
+
},
|
9307 |
+
{
|
9308 |
+
"epoch": 0.9886553840431468,
|
9309 |
+
"grad_norm": 3.67246150970459,
|
9310 |
+
"learning_rate": 3.586985141639798e-08,
|
9311 |
+
"loss": 9.5775,
|
9312 |
+
"step": 1329
|
9313 |
+
},
|
9314 |
+
{
|
9315 |
+
"epoch": 0.9893992932862191,
|
9316 |
+
"grad_norm": 3.4746315479278564,
|
9317 |
+
"learning_rate": 3.124710772877682e-08,
|
9318 |
+
"loss": 9.5916,
|
9319 |
+
"step": 1330
|
9320 |
+
},
|
9321 |
+
{
|
9322 |
+
"epoch": 0.9901432025292914,
|
9323 |
+
"grad_norm": 3.5024232864379883,
|
9324 |
+
"learning_rate": 2.694304584329732e-08,
|
9325 |
+
"loss": 9.5883,
|
9326 |
+
"step": 1331
|
9327 |
+
},
|
9328 |
+
{
|
9329 |
+
"epoch": 0.9908871117723638,
|
9330 |
+
"grad_norm": 3.8790781497955322,
|
9331 |
+
"learning_rate": 2.2957693209635368e-08,
|
9332 |
+
"loss": 9.5715,
|
9333 |
+
"step": 1332
|
9334 |
+
},
|
9335 |
+
{
|
9336 |
+
"epoch": 0.9916310210154361,
|
9337 |
+
"grad_norm": 3.6882379055023193,
|
9338 |
+
"learning_rate": 1.9291075244864954e-08,
|
9339 |
+
"loss": 9.5784,
|
9340 |
+
"step": 1333
|
9341 |
+
},
|
9342 |
+
{
|
9343 |
+
"epoch": 0.9923749302585084,
|
9344 |
+
"grad_norm": 3.7677388191223145,
|
9345 |
+
"learning_rate": 1.594321533328058e-08,
|
9346 |
+
"loss": 9.5748,
|
9347 |
+
"step": 1334
|
9348 |
+
},
|
9349 |
+
{
|
9350 |
+
"epoch": 0.9931188395015808,
|
9351 |
+
"grad_norm": 3.5019233226776123,
|
9352 |
+
"learning_rate": 1.2914134826280677e-08,
|
9353 |
+
"loss": 9.5891,
|
9354 |
+
"step": 1335
|
9355 |
+
},
|
9356 |
+
{
|
9357 |
+
"epoch": 0.9938627487446532,
|
9358 |
+
"grad_norm": 3.5015294551849365,
|
9359 |
+
"learning_rate": 1.0203853042184407e-08,
|
9360 |
+
"loss": 9.588,
|
9361 |
+
"step": 1336
|
9362 |
+
},
|
9363 |
+
{
|
9364 |
+
"epoch": 0.9946066579877255,
|
9365 |
+
"grad_norm": 3.5472798347473145,
|
9366 |
+
"learning_rate": 7.812387266142862e-09,
|
9367 |
+
"loss": 9.5846,
|
9368 |
+
"step": 1337
|
9369 |
+
},
|
9370 |
+
{
|
9371 |
+
"epoch": 0.9953505672307978,
|
9372 |
+
"grad_norm": 3.5023794174194336,
|
9373 |
+
"learning_rate": 5.7397527500224755e-09,
|
9374 |
+
"loss": 9.5886,
|
9375 |
+
"step": 1338
|
9376 |
+
},
|
9377 |
+
{
|
9378 |
+
"epoch": 0.9960944764738702,
|
9379 |
+
"grad_norm": 3.5475242137908936,
|
9380 |
+
"learning_rate": 3.985962712310665e-09,
|
9381 |
+
"loss": 9.5851,
|
9382 |
+
"step": 1339
|
9383 |
+
},
|
9384 |
+
{
|
9385 |
+
"epoch": 0.9968383857169425,
|
9386 |
+
"grad_norm": 3.5319745540618896,
|
9387 |
+
"learning_rate": 2.5510283379992504e-09,
|
9388 |
+
"loss": 9.5847,
|
9389 |
+
"step": 1340
|
9390 |
+
},
|
9391 |
+
{
|
9392 |
+
"epoch": 0.9975822949600148,
|
9393 |
+
"grad_norm": 3.4726641178131104,
|
9394 |
+
"learning_rate": 1.4349587785733586e-09,
|
9395 |
+
"loss": 9.5916,
|
9396 |
+
"step": 1341
|
9397 |
+
},
|
9398 |
+
{
|
9399 |
+
"epoch": 0.9983262042030873,
|
9400 |
+
"grad_norm": 3.768148422241211,
|
9401 |
+
"learning_rate": 6.377611518948446e-10,
|
9402 |
+
"loss": 9.5752,
|
9403 |
+
"step": 1342
|
9404 |
+
},
|
9405 |
+
{
|
9406 |
+
"epoch": 0.9990701134461596,
|
9407 |
+
"grad_norm": 3.688249111175537,
|
9408 |
+
"learning_rate": 1.594405421856404e-10,
|
9409 |
+
"loss": 9.578,
|
9410 |
+
"step": 1343
|
9411 |
+
},
|
9412 |
+
{
|
9413 |
+
"epoch": 0.9998140226892319,
|
9414 |
+
"grad_norm": 3.5478217601776123,
|
9415 |
+
"learning_rate": 0.0,
|
9416 |
+
"loss": 9.5859,
|
9417 |
+
"step": 1344
|
9418 |
}
|
9419 |
],
|
9420 |
"logging_steps": 1,
|
|
|
9429 |
"should_evaluate": false,
|
9430 |
"should_log": false,
|
9431 |
"should_save": true,
|
9432 |
+
"should_training_stop": true
|
9433 |
},
|
9434 |
"attributes": {}
|
9435 |
}
|
9436 |
},
|
9437 |
+
"total_flos": 672079401713664.0,
|
9438 |
"train_batch_size": 4,
|
9439 |
"trial_name": null,
|
9440 |
"trial_params": null
|